A moment, please

- Implement mboit on indirect
- Has issues when many high alpha fragments are stacked on top of each
  other. The transmittance function explodes to zero, and we end up just
  writing out black in the second pass.
- Other than that, I think the approach is very sound but hopefully a
  solution can be found
- Needs some clean up work with instancing, and in fact I think mboit
  can be implemented on GL3.2, whereas wboit relied on extensions or
  GL4.0
This commit is contained in:
Jozufozu 2025-02-16 18:16:10 -08:00
parent 5d16ebda60
commit cb87961b19
17 changed files with 1517 additions and 204 deletions

View file

@ -10,4 +10,7 @@ public class Samplers {
public static final GlTextureUnit INSTANCE_BUFFER = GlTextureUnit.T4;
public static final GlTextureUnit LIGHT_LUT = GlTextureUnit.T5;
public static final GlTextureUnit LIGHT_SECTIONS = GlTextureUnit.T6;
public static final GlTextureUnit ZEROTH_MOMENT = GlTextureUnit.T7;
public static final GlTextureUnit MOMENTS = GlTextureUnit.T8;
}

View file

@ -164,7 +164,7 @@ public class IndirectPrograms extends AtomicReferenceCounted {
setInstance(null);
}
public GlProgram getIndirectProgram(InstanceType<?> instanceType, ContextShader contextShader, Material material, boolean oit) {
public GlProgram getIndirectProgram(InstanceType<?> instanceType, ContextShader contextShader, Material material, PipelineCompiler.OitMode oit) {
return pipeline.get(instanceType, contextShader, material, oit);
}

View file

@ -70,7 +70,7 @@ public class InstancingPrograms extends AtomicReferenceCounted {
}
public GlProgram get(InstanceType<?> instanceType, ContextShader contextShader, Material material) {
return pipeline.get(instanceType, contextShader, material, false);
return pipeline.get(instanceType, contextShader, material, PipelineCompiler.OitMode.OFF);
}
@Override

View file

@ -50,7 +50,7 @@ public final class PipelineCompiler {
ALL.add(this);
}
public GlProgram get(InstanceType<?> instanceType, ContextShader contextShader, Material material, boolean oit) {
public GlProgram get(InstanceType<?> instanceType, ContextShader contextShader, Material material, OitMode oit) {
var light = material.light();
var cutout = material.cutout();
var shaders = material.shaders();
@ -128,7 +128,7 @@ public final class PipelineCompiler {
.source());
var debug = key.debugEnabled() ? "_debug" : "";
var cutout = key.useCutout() ? "_cutout" : "";
var oit = key.oit() ? "_oit" : "";
var oit = key.oit().name;
return "pipeline/" + pipeline.compilerMarker() + "/frag/" + material + "/" + light + "_" + context + cutout + debug + oit;
})
.requireExtensions(extensions)
@ -148,8 +148,9 @@ public final class PipelineCompiler {
}
})
.onCompile((key, comp) -> {
if (key.oit()) {
if (key.oit() != OitMode.OFF) {
comp.define("_FLW_OIT");
comp.define(key.oit().define);
}
})
.withResource(API_IMPL_FRAG)
@ -224,6 +225,21 @@ public final class PipelineCompiler {
*/
public record PipelineProgramKey(InstanceType<?> instanceType, ContextShader contextShader, LightShader light,
MaterialShaders materialShaders, boolean useCutout, boolean debugEnabled,
boolean oit) {
OitMode oit) {
}
public enum OitMode {
OFF("", ""),
GENERATE("_FLW_GENERATE_MOMENTS", "_generate"),
RESOLVE("_FLW_RESOLVE_MOMENTS", "_resolve"),
;
public final String define;
public final String name;
OitMode(String define, String name) {
this.define = define;
this.name = name;
}
}
}

View file

@ -17,6 +17,7 @@ import dev.engine_room.flywheel.api.material.Transparency;
import dev.engine_room.flywheel.api.model.Model;
import dev.engine_room.flywheel.backend.compile.ContextShader;
import dev.engine_room.flywheel.backend.compile.IndirectPrograms;
import dev.engine_room.flywheel.backend.compile.PipelineCompiler;
import dev.engine_room.flywheel.backend.engine.InstancerKey;
import dev.engine_room.flywheel.backend.engine.MaterialRenderState;
import dev.engine_room.flywheel.backend.engine.MeshPool;
@ -188,7 +189,7 @@ public class IndirectCullingGroup<I extends Instance> {
GlProgram lastProgram = null;
for (var multiDraw : multiDraws) {
var drawProgram = programs.getIndirectProgram(instanceType, multiDraw.embedded ? ContextShader.EMBEDDED : ContextShader.DEFAULT, multiDraw.material, false);
var drawProgram = programs.getIndirectProgram(instanceType, multiDraw.embedded ? ContextShader.EMBEDDED : ContextShader.DEFAULT, multiDraw.material, PipelineCompiler.OitMode.OFF);
if (drawProgram != lastProgram) {
lastProgram = drawProgram;
@ -202,7 +203,7 @@ public class IndirectCullingGroup<I extends Instance> {
}
}
public void submitTransparent() {
public void submitTransparent(PipelineCompiler.OitMode oit) {
if (nothingToDo()) {
return;
}
@ -214,7 +215,7 @@ public class IndirectCullingGroup<I extends Instance> {
GlProgram lastProgram = null;
for (var multiDraw : transparentDraws) {
var drawProgram = programs.getIndirectProgram(instanceType, multiDraw.embedded ? ContextShader.EMBEDDED : ContextShader.DEFAULT, multiDraw.material, true);
var drawProgram = programs.getIndirectProgram(instanceType, multiDraw.embedded ? ContextShader.EMBEDDED : ContextShader.DEFAULT, multiDraw.material, oit);
if (drawProgram != lastProgram) {
lastProgram = drawProgram;
@ -229,7 +230,7 @@ public class IndirectCullingGroup<I extends Instance> {
}
public void bindForCrumbling(Material material) {
var program = programs.getIndirectProgram(instanceType, ContextShader.CRUMBLING, material, false);
var program = programs.getIndirectProgram(instanceType, ContextShader.CRUMBLING, material, PipelineCompiler.OitMode.OFF);
program.bind();

View file

@ -17,6 +17,7 @@ import dev.engine_room.flywheel.api.instance.Instance;
import dev.engine_room.flywheel.api.instance.InstanceType;
import dev.engine_room.flywheel.backend.Samplers;
import dev.engine_room.flywheel.backend.compile.IndirectPrograms;
import dev.engine_room.flywheel.backend.compile.PipelineCompiler;
import dev.engine_room.flywheel.backend.engine.AbstractInstancer;
import dev.engine_room.flywheel.backend.engine.CommonCrumbling;
import dev.engine_room.flywheel.backend.engine.DrawManager;
@ -48,7 +49,7 @@ public class IndirectDrawManager extends DrawManager<IndirectInstancer<?>> {
private final DepthPyramid depthPyramid;
private final WboitFrameBuffer wboitFrameBuffer;
private final MboitFramebuffer wboitFrameBuffer;
public IndirectDrawManager(IndirectPrograms programs) {
this.programs = programs;
@ -65,7 +66,7 @@ public class IndirectDrawManager extends DrawManager<IndirectInstancer<?>> {
depthPyramid = new DepthPyramid(programs);
wboitFrameBuffer = new WboitFrameBuffer(programs);
wboitFrameBuffer = new MboitFramebuffer(programs);
}
@Override
@ -145,10 +146,16 @@ public class IndirectDrawManager extends DrawManager<IndirectInstancer<?>> {
group.submitSolid();
}
wboitFrameBuffer.setup();
wboitFrameBuffer.generateMoments();
for (var group : cullingGroups.values()) {
group.submitTransparent();
group.submitTransparent(PipelineCompiler.OitMode.GENERATE);
}
wboitFrameBuffer.resolveMoments();
for (var group : cullingGroups.values()) {
group.submitTransparent(PipelineCompiler.OitMode.RESOLVE);
}
wboitFrameBuffer.composite();

View file

@ -0,0 +1,142 @@
package dev.engine_room.flywheel.backend.engine.indirect;
import org.lwjgl.opengl.GL32;
import org.lwjgl.opengl.GL46;
import com.mojang.blaze3d.platform.GlStateManager;
import com.mojang.blaze3d.systems.RenderSystem;
import dev.engine_room.flywheel.backend.Samplers;
import dev.engine_room.flywheel.backend.compile.IndirectPrograms;
import dev.engine_room.flywheel.backend.gl.GlTextureUnit;
import net.minecraft.client.Minecraft;
public class MboitFramebuffer {
public final int fbo;
private final IndirectPrograms programs;
private final int vao;
public int zerothMoment;
public int moments;
public int accumulate;
private int lastWidth = -1;
private int lastHeight = -1;
public MboitFramebuffer(IndirectPrograms programs) {
this.programs = programs;
fbo = GL46.glCreateFramebuffers();
vao = GL46.glCreateVertexArrays();
}
public void generateMoments() {
var mainRenderTarget = Minecraft.getInstance()
.getMainRenderTarget();
createTextures(mainRenderTarget.width, mainRenderTarget.height);
// No depth writes, but we'll still use the depth test
RenderSystem.depthMask(false);
RenderSystem.enableBlend();
RenderSystem.blendFunc(GlStateManager.SourceFactor.ONE, GlStateManager.DestFactor.ONE);
RenderSystem.blendEquation(GL46.GL_FUNC_ADD);
GL46.glNamedFramebufferTexture(fbo, GL46.GL_DEPTH_ATTACHMENT, mainRenderTarget.getDepthTextureId(), 0);
GL46.glNamedFramebufferDrawBuffers(fbo, new int[]{GL46.GL_COLOR_ATTACHMENT0, GL46.GL_COLOR_ATTACHMENT1});
GL46.glClearNamedFramebufferfv(fbo, GL46.GL_COLOR, 0, new float[]{0, 0, 0, 0});
GL46.glClearNamedFramebufferfv(fbo, GL46.GL_COLOR, 1, new float[]{0, 0, 0, 0});
GlStateManager._glBindFramebuffer(GL46.GL_FRAMEBUFFER, fbo);
}
public void resolveMoments() {
// No depth writes, but we'll still use the depth test
RenderSystem.depthMask(false);
RenderSystem.enableBlend();
RenderSystem.blendFunc(GlStateManager.SourceFactor.ONE, GlStateManager.DestFactor.ONE);
RenderSystem.blendEquation(GL46.GL_FUNC_ADD);
Samplers.ZEROTH_MOMENT.makeActive();
GlStateManager._bindTexture(zerothMoment);
Samplers.MOMENTS.makeActive();
GlStateManager._bindTexture(moments);
GL46.glNamedFramebufferDrawBuffers(fbo, new int[]{GL46.GL_COLOR_ATTACHMENT2});
GL46.glClearNamedFramebufferfv(fbo, GL46.GL_COLOR, 0, new float[]{0, 0, 0, 0});
GlStateManager._glBindFramebuffer(GL46.GL_FRAMEBUFFER, fbo);
}
public void composite() {
var mainRenderTarget = Minecraft.getInstance()
.getMainRenderTarget();
mainRenderTarget.bindWrite(false);
var oitCompositeProgram = programs.getOitCompositeProgram();
GlStateManager._depthMask(false);
GlStateManager._depthFunc(GL46.GL_ALWAYS);
GlStateManager._enableBlend();
RenderSystem.blendFunc(GlStateManager.SourceFactor.ONE_MINUS_SRC_ALPHA, GlStateManager.DestFactor.SRC_ALPHA);
oitCompositeProgram.bind();
GlTextureUnit.T0.makeActive();
GlStateManager._bindTexture(zerothMoment);
GlTextureUnit.T1.makeActive();
GlStateManager._bindTexture(accumulate);
// Empty VAO, the actual full screen triangle is generated in the vertex shader
GlStateManager._glBindVertexArray(vao);
GL46.glDrawArrays(GL46.GL_TRIANGLES, 0, 3);
}
public void delete() {
GL46.glDeleteTextures(zerothMoment);
GL46.glDeleteTextures(moments);
GL46.glDeleteTextures(accumulate);
GL46.glDeleteFramebuffers(fbo);
GL46.glDeleteVertexArrays(vao);
}
private void createTextures(int width, int height) {
if (lastWidth == width && lastHeight == height) {
return;
}
lastWidth = width;
lastHeight = height;
GL46.glDeleteTextures(zerothMoment);
GL46.glDeleteTextures(moments);
GL46.glDeleteTextures(accumulate);
zerothMoment = GL46.glCreateTextures(GL46.GL_TEXTURE_2D);
moments = GL46.glCreateTextures(GL46.GL_TEXTURE_2D);
accumulate = GL46.glCreateTextures(GL46.GL_TEXTURE_2D);
GL46.glTextureStorage2D(zerothMoment, 1, GL32.GL_R16F, width, height);
GL46.glTextureStorage2D(moments, 1, GL32.GL_RGBA16F, width, height);
GL46.glTextureStorage2D(accumulate, 1, GL32.GL_RGBA16F, width, height);
// for (int tex : new int[]{zerothMoment, moments, composite}) {
// GL46.glTextureParameteri(tex, GL32.GL_TEXTURE_MIN_FILTER, GL32.GL_NEAREST);
// GL46.glTextureParameteri(tex, GL32.GL_TEXTURE_MAG_FILTER, GL32.GL_NEAREST);
// GL46.glTextureParameteri(tex, GL32.GL_TEXTURE_COMPARE_MODE, GL32.GL_NONE);
// GL46.glTextureParameteri(tex, GL32.GL_TEXTURE_WRAP_S, GL32.GL_CLAMP_TO_EDGE);
// GL46.glTextureParameteri(tex, GL32.GL_TEXTURE_WRAP_T, GL32.GL_CLAMP_TO_EDGE);
// }
GL46.glNamedFramebufferTexture(fbo, GL46.GL_COLOR_ATTACHMENT0, zerothMoment, 0);
GL46.glNamedFramebufferTexture(fbo, GL46.GL_COLOR_ATTACHMENT1, moments, 0);
GL46.glNamedFramebufferTexture(fbo, GL46.GL_COLOR_ATTACHMENT2, accumulate, 0);
}
}

View file

@ -1,117 +0,0 @@
package dev.engine_room.flywheel.backend.engine.indirect;
import org.lwjgl.opengl.ARBDrawBuffersBlend;
import org.lwjgl.opengl.GL32;
import org.lwjgl.opengl.GL46;
import com.mojang.blaze3d.platform.GlStateManager;
import com.mojang.blaze3d.systems.RenderSystem;
import dev.engine_room.flywheel.backend.compile.IndirectPrograms;
import dev.engine_room.flywheel.backend.gl.GlTextureUnit;
import net.minecraft.client.Minecraft;
public class WboitFrameBuffer {
public final int fbo;
private final IndirectPrograms programs;
private final int vao;
public int accum;
public int reveal;
private int lastWidth = -1;
private int lastHeight = -1;
public WboitFrameBuffer(IndirectPrograms programs) {
this.programs = programs;
fbo = GL46.glCreateFramebuffers();
vao = GL46.glCreateVertexArrays();
}
public void setup() {
var mainRenderTarget = Minecraft.getInstance()
.getMainRenderTarget();
createTextures(mainRenderTarget.width, mainRenderTarget.height);
// No depth writes, but we'll still use the depth test
GlStateManager._depthMask(false);
GlStateManager._enableBlend();
ARBDrawBuffersBlend.glBlendFunciARB(0, GL46.GL_ONE, GL46.GL_ONE); // accumulation blend target
ARBDrawBuffersBlend.glBlendFunciARB(1, GL46.GL_ZERO, GL46.GL_ONE_MINUS_SRC_COLOR); // revealage blend target
GlStateManager._blendEquation(GL46.GL_FUNC_ADD);
GL46.glNamedFramebufferTexture(fbo, GL46.GL_DEPTH_ATTACHMENT, mainRenderTarget.getDepthTextureId(), 0);
GlStateManager._glBindFramebuffer(GL46.GL_FRAMEBUFFER, fbo);
GL46.glClearBufferfv(GL46.GL_COLOR, 0, new float[]{0, 0, 0, 0});
GL46.glClearBufferfv(GL46.GL_COLOR, 1, new float[]{1, 1, 1, 1});
}
public void composite() {
var mainRenderTarget = Minecraft.getInstance()
.getMainRenderTarget();
mainRenderTarget.bindWrite(false);
var oitCompositeProgram = programs.getOitCompositeProgram();
GlStateManager._depthMask(false);
GlStateManager._depthFunc(GL46.GL_ALWAYS);
GlStateManager._enableBlend();
RenderSystem.blendFuncSeparate(GlStateManager.SourceFactor.SRC_ALPHA, GlStateManager.DestFactor.ONE_MINUS_SRC_ALPHA, GlStateManager.SourceFactor.ONE, GlStateManager.DestFactor.ONE_MINUS_SRC_ALPHA);
oitCompositeProgram.bind();
GlTextureUnit.T0.makeActive();
GlStateManager._bindTexture(accum);
GlTextureUnit.T1.makeActive();
GlStateManager._bindTexture(reveal);
// Empty VAO, the actual full screen triangle is generated in the vertex shader
GlStateManager._glBindVertexArray(vao);
GL46.glDrawArrays(GL46.GL_TRIANGLES, 0, 3);
}
public void delete() {
GL46.glDeleteTextures(accum);
GL46.glDeleteTextures(reveal);
GL46.glDeleteFramebuffers(fbo);
GL46.glDeleteVertexArrays(vao);
}
private void createTextures(int width, int height) {
if (lastWidth == width && lastHeight == height) {
return;
}
lastWidth = width;
lastHeight = height;
GL46.glDeleteTextures(accum);
GL46.glDeleteTextures(reveal);
accum = GL46.glCreateTextures(GL46.GL_TEXTURE_2D);
reveal = GL46.glCreateTextures(GL46.GL_TEXTURE_2D);
GL46.glNamedFramebufferDrawBuffers(fbo, new int[]{GL46.GL_COLOR_ATTACHMENT0, GL46.GL_COLOR_ATTACHMENT1});
GL46.glNamedFramebufferTexture(fbo, GL46.GL_COLOR_ATTACHMENT0, accum, 0);
GL46.glNamedFramebufferTexture(fbo, GL46.GL_COLOR_ATTACHMENT1, reveal, 0);
GL46.glTextureStorage2D(accum, 1, GL32.GL_RGBA32F, width, height);
GL46.glTextureStorage2D(reveal, 1, GL32.GL_R8, width, height);
for (int tex : new int[]{accum, reveal}) {
GL46.glTextureParameteri(tex, GL32.GL_TEXTURE_MIN_FILTER, GL32.GL_NEAREST);
GL46.glTextureParameteri(tex, GL32.GL_TEXTURE_MAG_FILTER, GL32.GL_NEAREST);
GL46.glTextureParameteri(tex, GL32.GL_TEXTURE_COMPARE_MODE, GL32.GL_NONE);
GL46.glTextureParameteri(tex, GL32.GL_TEXTURE_WRAP_S, GL32.GL_CLAMP_TO_EDGE);
GL46.glTextureParameteri(tex, GL32.GL_TEXTURE_WRAP_T, GL32.GL_CLAMP_TO_EDGE);
}
}
}

View file

@ -1,7 +1,5 @@
package dev.engine_room.flywheel.backend.gl;
import java.nio.ByteBuffer;
import org.jetbrains.annotations.UnknownNullability;
import org.lwjgl.PointerBuffer;
import org.lwjgl.opengl.GL;
@ -13,6 +11,7 @@ import org.lwjgl.opengl.GL43;
import org.lwjgl.opengl.GLCapabilities;
import org.lwjgl.opengl.KHRShaderSubgroup;
import org.lwjgl.system.MemoryStack;
import org.lwjgl.system.MemoryUtil;
import dev.engine_room.flywheel.backend.FlwBackend;
import dev.engine_room.flywheel.backend.compile.core.Compilation;
@ -67,10 +66,11 @@ public final class GlCompat {
*/
public static void safeShaderSource(int glId, CharSequence source) {
try (MemoryStack stack = MemoryStack.stackPush()) {
final ByteBuffer sourceBuffer = stack.UTF8(source, true);
var sourceBuffer = MemoryUtil.memUTF8(source, true);
final PointerBuffer pointers = stack.mallocPointer(1);
pointers.put(sourceBuffer);
GL20C.nglShaderSource(glId, 1, pointers.address0(), 0);
MemoryUtil.memFree(sourceBuffer);
}
}

View file

@ -1,6 +1,5 @@
package dev.engine_room.flywheel.backend.glsl.parse;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.jetbrains.annotations.Nullable;
@ -34,20 +33,21 @@ public class ShaderField {
* Scan the source for function definitions and "parse" them into objects that contain properties of the function.
*/
public static ImmutableMap<String, ShaderField> parseFields(SourceLines source) {
Matcher matcher = PATTERN.matcher(source);
// Matcher matcher = PATTERN.matcher(source);
//
// ImmutableMap.Builder<String, ShaderField> fields = ImmutableMap.builder();
// while (matcher.find()) {
// Span self = Span.fromMatcher(source, matcher);
// Span location = Span.fromMatcher(source, matcher, 1);
// Span decoration = Span.fromMatcher(source, matcher, 2);
// Span type = Span.fromMatcher(source, matcher, 3);
// Span name = Span.fromMatcher(source, matcher, 4);
//
// fields.put(location.get(), new ShaderField(self, location, decoration, type, name));
// }
ImmutableMap.Builder<String, ShaderField> fields = ImmutableMap.builder();
while (matcher.find()) {
Span self = Span.fromMatcher(source, matcher);
Span location = Span.fromMatcher(source, matcher, 1);
Span decoration = Span.fromMatcher(source, matcher, 2);
Span type = Span.fromMatcher(source, matcher, 3);
Span name = Span.fromMatcher(source, matcher, 4);
fields.put(location.get(), new ShaderField(self, location, decoration, type, name));
}
return fields.build();
return ImmutableMap.<String, ShaderField>builder()
.build();
}
public enum Qualifier {

View file

@ -1,6 +1,7 @@
#include "flywheel:internal/packed_material.glsl"
#include "flywheel:internal/diffuse.glsl"
#include "flywheel:internal/colorizer.glsl"
#include "flywheel:internal/mboit/moment_oit.glsl"
// optimize discard usage
#if defined(GL_ARB_conservative_depth) && defined(_FLW_USE_DISCARD)
@ -18,13 +19,13 @@ flat in uvec2 _flw_ids;
#endif
#ifdef _FLW_OIT
// your first render target which is used to accumulate pre-multiplied color values
layout (location = 0) out vec4 accum;
// your second render target which is used to store pixel revealage
layout (location = 1) out float reveal;
#ifdef _FLW_GENERATE_MOMENTS
layout (location = 0) out float _flw_zerothMoment_out;
layout (location = 1) out vec4 _flw_moments_out;
#endif
#ifdef _FLW_RESOLVE_MOMENTS
layout (location = 0) out vec4 _flw_accumulate_out;
#endif
#else
out vec4 _flw_outputColor;
@ -118,26 +119,40 @@ void _flw_main() {
color = flw_fogFilter(color);
color.a = 0.9;
#ifdef _FLW_OIT
float linearDepth = linearize_depth(gl_FragCoord.z, _flw_cullData.znear, _flw_cullData.zfar);
float depth = linearize_depth(gl_FragCoord.z, _flw_cullData.znear, _flw_cullData.zfar);
float lnNear = log(_flw_cullData.znear);
float lnFar = log(_flw_cullData.zfar);
// insert your favorite weighting function here. the color-based factor
// avoids color pollution from the edges of wispy clouds. the z-based
// factor gives precedence to nearer surfaces
//float weight = clamp(pow(min(1.0, color.a * 10.0) + 0.01, 3.0) * 1e8 * pow(1.0 - gl_FragCoord.z * 0.9, 3.0), 1e-2, 3e3);
float weight = max(min(1.0, max(max(color.r, color.g), color.b) * color.a), color.a) *
clamp(0.03 / (1e-5 + pow(depth / 200, 4.0)), 1e-2, 3e3);
float depth = (log(linearDepth) - lnNear);
// blend func: GL_ONE, GL_ONE
// switch to pre-multiplied alpha and weight
accum = vec4(color.rgb * color.a, color.a) * weight;
depth /= lnFar - lnNear;
// blend func: GL_ZERO, GL_ONE_MINUS_SRC_ALPHA
reveal = color.a;
depth = clamp(depth * 2. - 1., -1., 1.);
#ifdef _FLW_GENERATE_MOMENTS
generateMoments(depth, 1 - color.a, vec4(0), _flw_zerothMoment_out, _flw_moments_out);
#endif
#ifdef _FLW_RESOLVE_MOMENTS
float tt;
float td;
resolveMoments(td, tt, depth, gl_FragCoord.xy);
if (abs(td) < 1e-5) {
discard;
}
color.rgb *= color.a;
color *= td;
_flw_accumulate_out = color;
#endif
#else
_flw_outputColor = color;

View file

@ -1,48 +1,20 @@
// shader outputs
layout (location = 0) out vec4 frag;
// color accumulation buffer
layout (binding = 0) uniform sampler2D accum;
// revealage threshold buffer
layout (binding = 1) uniform sampler2D reveal;
// epsilon number
const float EPSILON = 0.00001f;
// calculate floating point numbers equality accurately
bool isApproximatelyEqual(float a, float b) {
return abs(a - b) <= (abs(a) < abs(b) ? abs(b) : abs(a)) * EPSILON;
}
// get the max value between three values
float max3(vec3 v) {
return max(max(v.x, v.y), v.z);
}
layout (binding = 0) uniform sampler2D zerothMoment;
layout (binding = 1) uniform sampler2D accumulate;
void main() {
// fragment coordination
ivec2 coords = ivec2(gl_FragCoord.xy);
// fragment revealage
float revealage = texelFetch(reveal, coords, 0).r;
float b0 = texelFetch(zerothMoment, coords, 0).r;
// save the blending and color texture fetch cost if there is not a transparent fragment
if (isApproximatelyEqual(revealage, 1.0f)) {
if (b0 < 1e-5) {
discard;
}
// fragment color
vec4 accumulation = texelFetch(accum, coords, 0);
vec4 accumulation = texelFetch(accumulate, coords, 0);
// suppress overflow
if (isinf(max3(abs(accumulation.rgb)))) {
accumulation.rgb = vec3(accumulation.a);
}
vec3 normalizedAccumulation = accumulation.rgb / max(accumulation.a, 1e-5);
// prevent floating point precision bug
vec3 average_color = accumulation.rgb / max(accumulation.a, EPSILON);
// blend pixels
frag = vec4(average_color, 1.0f - revealage);
frag = vec4(normalizedAccumulation, exp(-b0));
}

View file

@ -0,0 +1,203 @@
/*! \file
This header defines utility functions to deal with complex numbers and
complex polynomials.*/
/*! Returns the complex conjugate of the given complex number (i.e. it changes
the sign of the y-component).*/
vec2 Conjugate(vec2 Z){
return vec2(Z.x, -Z.y);
}
/*! This function implements complex multiplication.*/
vec2 Multiply(vec2 LHS, vec2 RHS){
return vec2(LHS.x*RHS.x-LHS.y*RHS.y, LHS.x*RHS.y+LHS.y*RHS.x);
}
/*! This function computes the magnitude of the given complex number.*/
float Magnitude(vec2 Z){
return sqrt(dot(Z, Z));
}
/*! This function computes the quotient of two complex numbers. The denominator
must not be zero.*/
vec2 Divide(vec2 Numerator, vec2 Denominator){
return vec2(Numerator.x*Denominator.x+Numerator.y*Denominator.y, -Numerator.x*Denominator.y+Numerator.y*Denominator.x)/dot(Denominator, Denominator);
}
/*! This function divides a real number by a complex number. The denominator
must not be zero.*/
vec2 Divide(float Numerator, vec2 Denominator){
return vec2(Numerator*Denominator.x, -Numerator*Denominator.y)/dot(Denominator, Denominator);
}
/*! This function implements computation of the reciprocal of the given non-
zero complex number.*/
vec2 Reciprocal(vec2 Z){
return vec2(Z.x, -Z.y)/dot(Z, Z);
}
/*! This utility function implements complex squaring.*/
vec2 Square(vec2 Z){
return vec2(Z.x*Z.x-Z.y*Z.y, 2.0f*Z.x*Z.y);
}
/*! This utility function implements complex computation of the third power.*/
vec2 Cube(vec2 Z){
return Multiply(Square(Z), Z);
}
/*! This utility function computes one square root of the given complex value.
The other one can be found using the unary minus operator.
\warning This function is continuous but not defined on the negative real
axis (and cannot be continued continuously there).
\sa SquareRoot() */
vec2 SquareRootUnsafe(vec2 Z){
float ZLengthSq=dot(Z, Z);
float ZLengthInv=rsqrt(ZLengthSq);
vec2 UnnormalizedRoot=Z*ZLengthInv+vec2(1.0f, 0.0f);
float UnnormalizedRootLengthSq=dot(UnnormalizedRoot, UnnormalizedRoot);
float NormalizationFactorInvSq=UnnormalizedRootLengthSq*ZLengthInv;
float NormalizationFactor=rsqrt(NormalizationFactorInvSq);
return NormalizationFactor*UnnormalizedRoot;
}
/*! This utility function computes one square root of the given complex value.
The other one can be found using the unary minus operator.
\note This function has discontinuities for values with real part zero.
\sa SquareRootUnsafe() */
vec2 SquareRoot(vec2 Z){
vec2 ZPositiveRealPart=vec2(abs(Z.x), Z.y);
vec2 ComputedRoot=SquareRootUnsafe(ZPositiveRealPart);
return (Z.x>=0.0)?ComputedRoot:ComputedRoot.yx;
}
/*! This utility function computes one cubic root of the given complex value. The
other roots can be found by multiplication by cubic roots of unity.
\note This function has various discontinuities.*/
vec2 CubicRoot(vec2 Z){
float Argument=atan2(Z.y, Z.x);
float NewArgument=Argument/3.0f;
vec2 NormalizedRoot;
sincos(NewArgument, NormalizedRoot.y, NormalizedRoot.x);
return NormalizedRoot*pow(dot(Z, Z), 1.0f/6.0f);
}
/*! @{
Returns the complex conjugate of the given complex vector (i.e. it changes the
second column resp the y-component).*/
mat2x2 Conjugate(mat2x2 Vector){
return mat2x2(Vector[0].x, -Vector[0].y, Vector[1].x, -Vector[1].y);
}
mat3x2 Conjugate(mat3x2 Vector){
return mat3x2(Vector[0].x, -Vector[0].y, Vector[1].x, -Vector[1].y, Vector[2].x, -Vector[2].y);
}
mat4x2 Conjugate(mat4x2 Vector){
return mat4x2(Vector[0].x, -Vector[0].y, Vector[1].x, -Vector[1].y, Vector[2].x, -Vector[2].y, Vector[3].x, -Vector[3].y);
}
void Conjugate(out vec2 OutConjugateVector[5], vec2 Vector[5]){
for (int i=0;i!=5;++i){
OutConjugateVector[i]=vec2(Vector[i].x, -Vector[i].x);
}
}
//!@}
/*! Returns the real part of a complex number as real.*/
float RealPart(vec2 Z){
return Z.x;
}
/*! Given coefficients of a quadratic polynomial A*x^2+B*x+C, this function
outputs its two complex roots.*/
void SolveQuadratic(out vec2 pOutRoot[2], vec2 A, vec2 B, vec2 C)
{
// Normalize the coefficients
vec2 InvA=Reciprocal(A);
B=Multiply(B, InvA);
C=Multiply(C, InvA);
// Divide the middle coefficient by two
B*=0.5f;
// Apply the quadratic formula
vec2 DiscriminantRoot=SquareRoot(Square(B)-C);
pOutRoot[0]=-B-DiscriminantRoot;
pOutRoot[1]=-B+DiscriminantRoot;
}
/*! Given coefficients of a cubic polynomial A*x^3+B*x^2+C*x+D, this function
outputs its three complex roots.*/
void SolveCubicBlinn(out vec2 pOutRoot[3], vec2 A, vec2 B, vec2 C, vec2 D)
{
// Normalize the polynomial
vec2 InvA=Reciprocal(A);
B=Multiply(B, InvA);
C=Multiply(C, InvA);
D=Multiply(D, InvA);
// Divide middle coefficients by three
B/=3.0f;
C/=3.0f;
// Compute the Hessian and the discriminant
vec2 Delta00=-Square(B)+C;
vec2 Delta01=-Multiply(C, B)+D;
vec2 Delta11=Multiply(B, D)-Square(C);
vec2 Discriminant=4.0f*Multiply(Delta00, Delta11)-Square(Delta01);
// Compute coefficients of the depressed cubic
// (third is zero, fourth is one)
vec2 DepressedD=-2.0f*Multiply(B, Delta00)+Delta01;
vec2 DepressedC=Delta00;
// Take the cubic root of a complex number avoiding cancellation
vec2 DiscriminantRoot=SquareRoot(-Discriminant);
DiscriminantRoot=faceforward(DiscriminantRoot, DiscriminantRoot, DepressedD);
vec2 CubedRoot=DiscriminantRoot-DepressedD;
vec2 FirstRoot=CubicRoot(0.5f*CubedRoot);
vec2 pCubicRoot[3]={
FirstRoot,
Multiply(vec2(-0.5f, -0.5f*sqrt(3.0f)), FirstRoot),
Multiply(vec2(-0.5f, 0.5f*sqrt(3.0f)), FirstRoot)
};
// Also compute the reciprocal cubic roots
vec2 InvFirstRoot=Reciprocal(FirstRoot);
vec2 pInvCubicRoot[3]={
InvFirstRoot,
Multiply(vec2(-0.5f, 0.5f*sqrt(3.0f)), InvFirstRoot),
Multiply(vec2(-0.5f, -0.5f*sqrt(3.0f)), InvFirstRoot)
};
// Turn them into roots of the depressed cubic and revert the depression
// transform
for (int i=0;i!=3;++i)
{
pOutRoot[i]=pCubicRoot[i]-Multiply(DepressedC, pInvCubicRoot[i])-B;
}
}
/*! Given coefficients of a quartic polynomial A*x^4+B*x^3+C*x^2+D*x+E, this
function outputs its four complex roots.*/
void SolveQuarticNeumark(out vec2 pOutRoot[4], vec2 A, vec2 B, vec2 C, vec2 D, vec2 E)
{
// Normalize the polynomial
vec2 InvA=Reciprocal(A);
B=Multiply(B, InvA);
C=Multiply(C, InvA);
D=Multiply(D, InvA);
E=Multiply(E, InvA);
// Construct a normalized cubic
vec2 P=-2.0f*C;
vec2 Q=Square(C)+Multiply(B, D)-4.0f*E;
vec2 R=Square(D)+Multiply(Square(B), E)-Multiply(Multiply(B, C), D);
// Compute a root that is not the smallest of the cubic
vec2 pCubicRoot[3];
SolveCubicBlinn(pCubicRoot, vec2(1.0f, 0.0f), P, Q, R);
vec2 y=(dot(pCubicRoot[1], pCubicRoot[1])>dot(pCubicRoot[0], pCubicRoot[0]))?pCubicRoot[1]:pCubicRoot[0];
// Solve a quadratic to obtain linear coefficients for quadratic polynomials
vec2 BB=Square(B);
vec2 fy=4.0f*y;
vec2 BB_fy=BB-fy;
vec2 tmp=SquareRoot(BB_fy);
vec2 G=(B+tmp)*0.5f;
vec2 g=(B-tmp)*0.5f;
// Construct the corresponding constant coefficients
vec2 Z=C-y;
tmp=Divide(0.5f*Multiply(B, Z)-D, tmp);
vec2 H=Z*0.5f+tmp;
vec2 h=Z*0.5f-tmp;
// Compute the roots
vec2 pQuadraticRoot[2];
SolveQuadratic(pQuadraticRoot, vec2(1.0f, 0.0f), G, H);
pOutRoot[0]=pQuadraticRoot[0];
pOutRoot[1]=pQuadraticRoot[1];
SolveQuadratic(pQuadraticRoot, vec2(1.0f, 0.0f), g, h);
pOutRoot[2]=pQuadraticRoot[0];
pOutRoot[3]=pQuadraticRoot[1];
}

View file

@ -0,0 +1,500 @@
/*! \file
This header provides utility functions to reconstruct the transmittance
from a given vector of power moments (4, 6 or 8 power moments) at a
specified depth. As prerequisite, utility functions for computing the real
roots of polynomials up to degree four are defined.
*/
//#include "flywheel:internal/mboit/trigonometric_moment_math.glsl"
void sincos(float theta, out float s, out float c) {
s = sin(theta);
c = cos(theta);
}
float saturate(float a) {
return clamp(a, 0., 1.);
}
/*! Given coefficients of a quadratic polynomial A*x^2+B*x+C, this function
outputs its two real roots.*/
vec2 solveQuadratic(vec3 coeffs)
{
coeffs[1] *= 0.5;
float x1, x2, tmp;
tmp = (coeffs[1] * coeffs[1] - coeffs[0] * coeffs[2]);
if (coeffs[1] >= 0) {
tmp = sqrt(tmp);
x1 = (-coeffs[2]) / (coeffs[1] + tmp);
x2 = (-coeffs[1] - tmp) / coeffs[0];
} else {
tmp = sqrt(tmp);
x1 = (-coeffs[1] + tmp) / coeffs[0];
x2 = coeffs[2] / (-coeffs[1] + tmp);
}
return vec2(x1, x2);
}
/*! Code taken from the blog "Moments in Graphics" by Christoph Peters.
http://momentsingraphics.de/?p=105
This function computes the three real roots of a cubic polynomial
Coefficient[0]+Coefficient[1]*x+Coefficient[2]*x^2+Coefficient[3]*x^3.*/
vec3 SolveCubic(vec4 Coefficient) {
// Normalize the polynomial
Coefficient.xyz /= Coefficient.w;
// Divide middle coefficients by three
Coefficient.yz /= 3.0f;
// Compute the Hessian and the discrimant
vec3 Delta = vec3(
fma(-Coefficient.z, Coefficient.z, Coefficient.y),
fma(-Coefficient.y, Coefficient.z, Coefficient.x),
dot(vec2(Coefficient.z, -Coefficient.y), Coefficient.xy)
);
float Discriminant = dot(vec2(4.0f*Delta.x, -Delta.y), Delta.zy);
// Compute coefficients of the depressed cubic
// (third is zero, fourth is one)
vec2 Depressed = vec2(
fma(-2.0f*Coefficient.z, Delta.x, Delta.y),
Delta.x
);
// Take the cubic root of a normalized complex number
float Theta = atan(sqrt(Discriminant), -Depressed.x) / 3.0f;
vec2 CubicRoot;
sincos(Theta, CubicRoot.y, CubicRoot.x);
// Compute the three roots, scale appropriately and
// revert the depression transform
vec3 Root = vec3(
CubicRoot.x,
dot(vec2(-0.5f, -0.5f*sqrt(3.0f)), CubicRoot),
dot(vec2(-0.5f, 0.5f*sqrt(3.0f)), CubicRoot)
);
Root = fma(vec3(2.0f*sqrt(-Depressed.y)), Root, vec3(-Coefficient.z));
return Root;
}
/*! Given coefficients of a cubic polynomial
coeffs[0]+coeffs[1]*x+coeffs[2]*x^2+coeffs[3]*x^3 with three real roots,
this function returns the root of least magnitude.*/
float solveCubicBlinnSmallest(vec4 coeffs)
{
coeffs.xyz /= coeffs.w;
coeffs.yz /= 3.0;
vec3 delta = vec3(fma(-coeffs.z, coeffs.z, coeffs.y), fma(-coeffs.z, coeffs.y, coeffs.x), coeffs.z * coeffs.x - coeffs.y * coeffs.y);
float discriminant = 4.0 * delta.x * delta.z - delta.y * delta.y;
vec2 depressed = vec2(delta.z, -coeffs.x * delta.y + 2.0 * coeffs.y * delta.z);
float theta = abs(atan(coeffs.x * sqrt(discriminant), -depressed.y)) / 3.0;
vec2 sin_cos;
sincos(theta, sin_cos.x, sin_cos.y);
float tmp = 2.0 * sqrt(-depressed.x);
vec2 x = vec2(tmp * sin_cos.y, tmp * (-0.5 * sin_cos.y - 0.5 * sqrt(3.0) * sin_cos.x));
vec2 s = (x.x + x.y < 2.0 * coeffs.y) ? vec2(-coeffs.x, x.x + coeffs.y) : vec2(-coeffs.x, x.y + coeffs.y);
return s.x / s.y;
}
/*! Given coefficients of a quartic polynomial
coeffs[0]+coeffs[1]*x+coeffs[2]*x^2+coeffs[3]*x^3+coeffs[4]*x^4 with four
real roots, this function returns all roots.*/
vec4 solveQuarticNeumark(float coeffs[5])
{
// Normalization
float B = coeffs[3] / coeffs[4];
float C = coeffs[2] / coeffs[4];
float D = coeffs[1] / coeffs[4];
float E = coeffs[0] / coeffs[4];
// Compute coefficients of the cubic resolvent
float P = -2.0*C;
float Q = C*C + B*D - 4.0*E;
float R = D*D + B*B*E -B*C*D;
// Obtain the smallest cubic root
float y = solveCubicBlinnSmallest(vec4(R, Q, P, 1.0));
float BB = B*B;
float fy = 4.0 * y;
float BB_fy = BB - fy;
float Z = C - y;
float ZZ = Z*Z;
float fE = 4.0 * E;
float ZZ_fE = ZZ - fE;
float G, g, H, h;
// Compute the coefficients of the quadratics adaptively using the two
// proposed factorizations by Neumark. Choose the appropriate
// factorizations using the heuristic proposed by Herbison-Evans.
if (y < 0 || (ZZ + fE) * BB_fy > ZZ_fE * (BB + fy)) {
float tmp = sqrt(BB_fy);
G = (B + tmp) * 0.5;
g = (B - tmp) * 0.5;
tmp = (B*Z - 2.0*D) / (2.0*tmp);
H = fma(Z, 0.5, tmp);
h = fma(Z, 0.5, -tmp);
} else {
float tmp = sqrt(ZZ_fE);
H = (Z + tmp) * 0.5;
h = (Z - tmp) * 0.5;
tmp = (B*Z - 2.0*D) / (2.0*tmp);
G = fma(B, 0.5, tmp);
g = fma(B, 0.5, -tmp);
}
// Solve the quadratics
return vec4(solveQuadratic(vec3(1.0, G, H)), solveQuadratic(vec3(1.0, g, h)));
}
/*! Definition of utility functions for quantization and dequantization of
power moments stored in 16 bits per moment. */
void offsetMoments(inout vec2 b_even, inout vec2 b_odd, float sign)
{
b_odd += 0.5 * sign;
}
void quantizeMoments(out vec2 b_even_q, out vec2 b_odd_q, vec2 b_even, vec2 b_odd)
{
b_odd_q = b_odd * mat2x2(1.5f, sqrt(3.0f)*0.5f, -2.0f, -sqrt(3.0f)*2.0f / 9.0f);
b_even_q = b_even * mat2x2(4.0f, 0.5f, -4.0f, 0.5f);
}
void offsetAndDequantizeMoments(out vec2 b_even, out vec2 b_odd, vec2 b_even_q, vec2 b_odd_q)
{
offsetMoments(b_even_q, b_odd_q, -1.0);
b_odd = b_odd_q * mat2x2(-1.0f / 3.0f, -0.75f, sqrt(3.0f), 0.75f*sqrt(3.0f));
b_even = b_even_q * mat2x2(0.125f, -0.125f, 1.0f, 1.0f);
}
void offsetMoments(inout vec3 b_even, inout vec3 b_odd, float sign)
{
b_odd += 0.5 * sign;
b_even.z += 0.018888946f * sign;
}
void quantizeMoments(out vec3 b_even_q, out vec3 b_odd_q, vec3 b_even, vec3 b_odd)
{
const mat3x3 QuantizationMatrixOdd = mat3x3(
2.5f, -1.87499864450f, 1.26583039016f,
-10.0f, 4.20757543111f, -1.47644882902f,
8.0f, -1.83257678661f, 0.71061660238f);
const mat3x3 QuantizationMatrixEven = mat3x3(
4.0f, 9.0f, -0.57759806484f,
-4.0f, -24.0f, 4.61936647543f,
0.0f, 16.0f, -3.07953906655f);
b_odd_q = b_odd * QuantizationMatrixOdd;
b_even_q = b_even * QuantizationMatrixEven;
}
void offsetAndDequantizeMoments(out vec3 b_even, out vec3 b_odd, vec3 b_even_q, vec3 b_odd_q)
{
const mat3x3 QuantizationMatrixOdd = mat3x3(
-0.02877789192f, 0.09995235706f, 0.25893353755f,
0.47635550422f, 0.84532580931f, 0.90779616657f,
1.55242808973f, 1.05472570761f, 0.83327335647f);
const mat3x3 QuantizationMatrixEven = mat3x3(
0.00001253044f, -0.24998746956f, -0.37498825271f,
0.16668494186f, 0.16668494186f, 0.21876713299f,
0.86602540579f, 0.86602540579f, 0.81189881793f);
offsetMoments(b_even_q, b_odd_q, -1.0);
b_odd = b_odd_q * QuantizationMatrixOdd;
b_even = b_even_q * QuantizationMatrixEven;
}
void offsetMoments(inout vec4 b_even, inout vec4 b_odd, float sign)
{
b_odd += 0.5 * sign;
b_even += vec4(0.972481993925964, 1.0, 0.999179192513328, 0.991778293073131) * sign;
}
void quantizeMoments(out vec4 b_even_q, out vec4 b_odd_q, vec4 b_even, vec4 b_odd)
{
const mat4x4 mat_odd = mat4x4(3.48044635732474, -27.5760737514826, 55.1267384344761, -31.5311110403183,
1.26797185782836, -0.928755808743913, -2.07520453231032, 1.23598848322588,
-2.1671560004294, 6.17950199592966, -0.276515571579297, -4.23583042392097,
0.974332879165755, -0.443426830933027, -0.360491648368785, 0.310149466050223);
const mat4x4 mat_even = mat4x4(0.280504133158527, -0.757633844606942, 0.392179589334688, -0.887531871812237,
-2.01362265883247, 0.221551373038988, -1.06107954265125, 2.83887201588367,
-7.31010494985321, 13.9855979699139, -0.114305766176437, -7.4361899359832,
-15.8954215629556, 79.6186327084103, -127.457278992502, 63.7349456687829);
b_odd_q = mat_odd * b_odd;
b_even_q = mat_even * b_even;
}
void offsetAndDequantizeMoments(out vec4 b_even, out vec4 b_odd, vec4 b_even_q, vec4 b_odd_q)
{
const mat4x4 mat_odd = mat4x4(-0.00482399708502382, -0.423201508674231, 0.0348312382605129, 1.67179208266592,
-0.0233402218644408, -0.832829097046478, 0.0193406040499625, 1.21021509068975,
-0.010888537031885, -0.926393772997063, -0.11723394414779, 0.983723301818275,
-0.0308713357806732, -0.937989172670245, -0.218033377677099, 0.845991731322996);
const mat4x4 mat_even = mat4x4(-0.976220278891035, -0.456139260269401, -0.0504335521016742, 0.000838800390651085,
-1.04828341778299, -0.229726640510149, 0.0259608334616091, -0.00133632693205861,
-1.03115268628604, -0.077844420809897, 0.00443408851014257, -0.0103744938457406,
-0.996038443434636, 0.0175438624416783, -0.0361414253243963, -0.00317839994022725);
offsetMoments(b_even_q, b_odd_q, -1.0);
b_odd = mat_odd * b_odd_q;
b_even = mat_even * b_even_q;
}
/*! This function reconstructs the transmittance at the given depth from four
normalized power moments and the given zeroth moment.*/
float computeTransmittanceAtDepthFrom4PowerMoments(float b_0, vec2 b_even, vec2 b_odd, float depth, float bias, float overestimation, vec4 bias_vector)
{
vec4 b = vec4(b_odd.x, b_even.x, b_odd.y, b_even.y);
// Bias input data to avoid artifacts
b = mix(b, bias_vector, bias);
vec3 z;
z[0] = depth;
// Compute a Cholesky factorization of the Hankel matrix B storing only non-
// trivial entries or related products
float L21D11=fma(-b[0], b[1], b[2]);
float D11=fma(-b[0], b[0], b[1]);
float InvD11=1.0f/D11;
float L21=L21D11*InvD11;
float SquaredDepthVariance=fma(-b[1], b[1], b[3]);
float D22=fma(-L21D11, L21, SquaredDepthVariance);
// Obtain a scaled inverse image of bz=(1,z[0],z[0]*z[0])^T
vec3 c=vec3(1.0f, z[0], z[0]*z[0]);
// Forward substitution to solve L*c1=bz
c[1]-=b.x;
c[2]-=b.y+L21*c[1];
// Scaling to solve D*c2=c1
c[1]*=InvD11;
c[2]/=D22;
// Backward substitution to solve L^T*c3=c2
c[1]-=L21*c[2];
c[0]-=dot(c.yz, b.xy);
// Solve the quadratic equation c[0]+c[1]*z+c[2]*z^2 to obtain solutions
// z[1] and z[2]
float InvC2=1.0f/c[2];
float p=c[1]*InvC2;
float q=c[0]*InvC2;
float D=(p*p*0.25f)-q;
float r=sqrt(D);
z[1]=-p*0.5f-r;
z[2]=-p*0.5f+r;
// Compute the absorbance by summing the appropriate weights
vec3 polynomial;
vec3 weight_factor = vec3(overestimation, (z[1] < z[0])?1.0f:0.0f, (z[2] < z[0])?1.0f:0.0f);
float f0=weight_factor[0];
float f1=weight_factor[1];
float f2=weight_factor[2];
float f01=(f1-f0)/(z[1]-z[0]);
float f12=(f2-f1)/(z[2]-z[1]);
float f012=(f12-f01)/(z[2]-z[0]);
polynomial[0]=f012;
polynomial[1]=polynomial[0];
polynomial[0]=f01-polynomial[0]*z[1];
polynomial[2]=polynomial[1];
polynomial[1]=polynomial[0]-polynomial[1]*z[0];
polynomial[0]=f0-polynomial[0]*z[0];
float absorbance = polynomial[0] + dot(b.xy, polynomial.yz);;
// Turn the normalized absorbance into transmittance
return saturate(exp(-b_0 * absorbance));
}
/*! This function reconstructs the transmittance at the given depth from six
normalized power moments and the given zeroth moment.*/
float computeTransmittanceAtDepthFrom6PowerMoments(float b_0, vec3 b_even, vec3 b_odd, float depth, float bias, float overestimation, float bias_vector[6])
{
float b[6] = { b_odd.x, b_even.x, b_odd.y, b_even.y, b_odd.z, b_even.z };
// Bias input data to avoid artifacts
for (int i = 0; i != 6; ++i) {
b[i] = mix(b[i], bias_vector[i], bias);
}
vec4 z;
z[0] = depth;
// Compute a Cholesky factorization of the Hankel matrix B storing only non-
// trivial entries or related products
float InvD11 = 1.0f / fma(-b[0], b[0], b[1]);
float L21D11 = fma(-b[0], b[1], b[2]);
float L21 = L21D11*InvD11;
float D22 = fma(-L21D11, L21, fma(-b[1], b[1], b[3]));
float L31D11 = fma(-b[0], b[2], b[3]);
float L31 = L31D11*InvD11;
float InvD22 = 1.0f / D22;
float L32D22 = fma(-L21D11, L31, fma(-b[1], b[2], b[4]));
float L32 = L32D22*InvD22;
float D33 = fma(-b[2], b[2], b[5]) - dot(vec2(L31D11, L32D22), vec2(L31, L32));
float InvD33 = 1.0f / D33;
// Construct the polynomial whose roots have to be points of support of the
// canonical distribution: bz=(1,z[0],z[0]*z[0],z[0]*z[0]*z[0])^T
vec4 c;
c[0] = 1.0f;
c[1] = z[0];
c[2] = c[1] * z[0];
c[3] = c[2] * z[0];
// Forward substitution to solve L*c1=bz
c[1] -= b[0];
c[2] -= fma(L21, c[1], b[1]);
c[3] -= b[2] + dot(vec2(L31, L32), c.yz);
// Scaling to solve D*c2=c1
c.yzw *= vec3(InvD11, InvD22, InvD33);
// Backward substitution to solve L^T*c3=c2
c[2] -= L32*c[3];
c[1] -= dot(vec2(L21, L31), c.zw);
c[0] -= dot(vec3(b[0], b[1], b[2]), c.yzw);
// Solve the cubic equation
z.yzw = SolveCubic(c);
// Compute the absorbance by summing the appropriate weights
vec4 weigth_factor;
weigth_factor[0] = overestimation;
weigth_factor.yzw = vec3(greaterThan(z.yzw, z.xxx));
// Construct an interpolation polynomial
float f0 = weigth_factor[0];
float f1 = weigth_factor[1];
float f2 = weigth_factor[2];
float f3 = weigth_factor[3];
float f01 = (f1 - f0) / (z[1] - z[0]);
float f12 = (f2 - f1) / (z[2] - z[1]);
float f23 = (f3 - f2) / (z[3] - z[2]);
float f012 = (f12 - f01) / (z[2] - z[0]);
float f123 = (f23 - f12) / (z[3] - z[1]);
float f0123 = (f123 - f012) / (z[3] - z[0]);
vec4 polynomial;
// f012+f0123 *(z-z2)
polynomial[0] = fma(-f0123, z[2], f012);
polynomial[1] = f0123;
// *(z-z1) +f01
polynomial[2] = polynomial[1];
polynomial[1] = fma(polynomial[1], -z[1], polynomial[0]);
polynomial[0] = fma(polynomial[0], -z[1], f01);
// *(z-z0) +f0
polynomial[3] = polynomial[2];
polynomial[2] = fma(polynomial[2], -z[0], polynomial[1]);
polynomial[1] = fma(polynomial[1], -z[0], polynomial[0]);
polynomial[0] = fma(polynomial[0], -z[0], f0);
float absorbance = dot(polynomial, vec4 (1.0, b[0], b[1], b[2]));
// Turn the normalized absorbance into transmittance
return saturate(exp(-b_0 * absorbance));
}
/*! This function reconstructs the transmittance at the given depth from eight
normalized power moments and the given zeroth moment.*/
float computeTransmittanceAtDepthFrom8PowerMoments(float b_0, vec4 b_even, vec4 b_odd, float depth, float bias, float overestimation, float bias_vector[8])
{
float b[8] = { b_odd.x, b_even.x, b_odd.y, b_even.y, b_odd.z, b_even.z, b_odd.w, b_even.w };
// Bias input data to avoid artifacts
for (int i = 0; i != 8; ++i) {
b[i] = mix(b[i], bias_vector[i], bias);
}
float z[5];
z[0] = depth;
// Compute a Cholesky factorization of the Hankel matrix B storing only non-trivial entries or related products
float D22 = fma(-b[0], b[0], b[1]);
float InvD22 = 1.0 / D22;
float L32D22 = fma(-b[1], b[0], b[2]);
float L32 = L32D22 * InvD22;
float L42D22 = fma(-b[2], b[0], b[3]);
float L42 = L42D22 * InvD22;
float L52D22 = fma(-b[3], b[0], b[4]);
float L52 = L52D22 * InvD22;
float D33 = fma(-L32, L32D22, fma(-b[1], b[1], b[3]));
float InvD33 = 1.0 / D33;
float L43D33 = fma(-L42, L32D22, fma(-b[2], b[1], b[4]));
float L43 = L43D33 * InvD33;
float L53D33 = fma(-L52, L32D22, fma(-b[3], b[1], b[5]));
float L53 = L53D33 * InvD33;
float D44 = fma(-b[2], b[2], b[5]) - dot(vec2(L42, L43), vec2(L42D22, L43D33));
float InvD44 = 1.0 / D44;
float L54D44 = fma(-b[3], b[2], b[6]) - dot(vec2(L52, L53), vec2(L42D22, L43D33));
float L54 = L54D44 * InvD44;
float D55 = fma(-b[3], b[3], b[7]) - dot(vec3(L52, L53, L54), vec3(L52D22, L53D33, L54D44));
float InvD55 = 1.0 / D55;
// Construct the polynomial whose roots have to be points of support of the
// Canonical distribution:
// bz = (1,z[0],z[0]^2,z[0]^3,z[0]^4)^T
float c[5];
c[0] = 1.0;
c[1] = z[0];
c[2] = c[1] * z[0];
c[3] = c[2] * z[0];
c[4] = c[3] * z[0];
// Forward substitution to solve L*c1 = bz
c[1] -= b[0];
c[2] -= fma(L32, c[1], b[1]);
c[3] -= b[2] + dot(vec2(L42, L43), vec2(c[1], c[2]));
c[4] -= b[3] + dot(vec3(L52, L53, L54), vec3(c[1], c[2], c[3]));
// Scaling to solve D*c2 = c1
//c = c .*[1, InvD22, InvD33, InvD44, InvD55];
c[1] *= InvD22;
c[2] *= InvD33;
c[3] *= InvD44;
c[4] *= InvD55;
// Backward substitution to solve L^T*c3 = c2
c[3] -= L54 * c[4];
c[2] -= dot(vec2(L53, L43), vec2(c[4], c[3]));
c[1] -= dot(vec3(L52, L42, L32), vec3(c[4], c[3], c[2]));
c[0] -= dot(vec4(b[3], b[2], b[1], b[0]), vec4(c[4], c[3], c[2], c[1]));
// Solve the quartic equation
vec4 zz = solveQuarticNeumark(c);
z[1] = zz[0];
z[2] = zz[1];
z[3] = zz[2];
z[4] = zz[3];
// Compute the absorbance by summing the appropriate weights
vec4 weigth_factor = vec4(lessThanEqual(vec4(z[1], z[2], z[3], z[4]), z[0].xxxx));
// Construct an interpolation polynomial
float f0 = overestimation;
float f1 = weigth_factor[0];
float f2 = weigth_factor[1];
float f3 = weigth_factor[2];
float f4 = weigth_factor[3];
float f01 = (f1 - f0) / (z[1] - z[0]);
float f12 = (f2 - f1) / (z[2] - z[1]);
float f23 = (f3 - f2) / (z[3] - z[2]);
float f34 = (f4 - f3) / (z[4] - z[3]);
float f012 = (f12 - f01) / (z[2] - z[0]);
float f123 = (f23 - f12) / (z[3] - z[1]);
float f234 = (f34 - f23) / (z[4] - z[2]);
float f0123 = (f123 - f012) / (z[3] - z[0]);
float f1234 = (f234 - f123) / (z[4] - z[1]);
float f01234 = (f1234 - f0123) / (z[4] - z[0]);
float Polynomial_0;
vec4 Polynomial;
// f0123 + f01234 * (z - z3)
Polynomial_0 = fma(-f01234, z[3], f0123);
Polynomial[0] = f01234;
// * (z - z2) + f012
Polynomial[1] = Polynomial[0];
Polynomial[0] = fma(-Polynomial[0], z[2], Polynomial_0);
Polynomial_0 = fma(-Polynomial_0, z[2], f012);
// * (z - z1) + f01
Polynomial[2] = Polynomial[1];
Polynomial[1] = fma(-Polynomial[1], z[1], Polynomial[0]);
Polynomial[0] = fma(-Polynomial[0], z[1], Polynomial_0);
Polynomial_0 = fma(-Polynomial_0, z[1], f01);
// * (z - z0) + f1
Polynomial[3] = Polynomial[2];
Polynomial[2] = fma(-Polynomial[2], z[0], Polynomial[1]);
Polynomial[1] = fma(-Polynomial[1], z[0], Polynomial[0]);
Polynomial[0] = fma(-Polynomial[0], z[0], Polynomial_0);
Polynomial_0 = fma(-Polynomial_0, z[0], f0);
float absorbance = Polynomial_0 + dot(Polynomial, vec4(b[0], b[1], b[2], b[3]));
// Turn the normalized absorbance into transmittance
return saturate(exp(-b_0 * absorbance));
}

View file

@ -0,0 +1,253 @@
/*! \file
This header provides the functionality to create the vectors of moments and
to blend surfaces together with an appropriately reconstructed
transmittance. It is needed for both additive passes of moment-based OIT.
*/
//cbuffer MomentOIT
//{
// struct {
// vec4 wrapping_zone_parameters;
// float overestimation;
// float moment_bias;
// }MomentOIT;
//};
#include "flywheel:internal/mboit/moment_math.glsl"
const float moment_bias = 0.25;
const float overestimation = 0.25;
const vec4 wrapping_zone_parameters = vec4(0.);
void clip(float a) {
if (a < 0.) {
discard;
}
}
// jozu: The trigonometric moments and higher order power moments rely on a second render target
// which the java side is not set up to support. Trying to enable them as is will cause compile errors also.
#define NUM_MOMENTS 4
#define SINGLE_PRECISION 1
#ifdef _FLW_GENERATE_MOMENTS
/*! Generation of moments in case that rasterizer ordered views are used.
This includes the case if moments are stored in 16 bits. */
/*! This functions relies on fixed function additive blending to compute the
vector of moments.moment vector. The shader that calls this function must
provide the required render targets.*/
#if NUM_MOMENTS == 4
void generateMoments(float depth, float transmittance, vec4 wrapping_zone_parameters, out float b_0, out vec4 b)
#elif NUM_MOMENTS == 6
#if USE_R_RG_RBBA_FOR_MBOIT6
void generateMoments(float depth, float transmittance, vec4 wrapping_zone_parameters, out float b_0, out vec2 b_12, out vec4 b_3456)
#else
void generateMoments(float depth, float transmittance, vec4 wrapping_zone_parameters, out float b_0, out vec2 b_12, out vec2 b_34, out vec2 b_56)
#endif
#elif NUM_MOMENTS == 8
void generateMoments(float depth, float transmittance, vec4 wrapping_zone_parameters, out float b_0, out vec4 b_even, out vec4 b_odd)
#endif
{
transmittance = max(transmittance, 0.000001);
float absorbance = -log(transmittance);
b_0 = absorbance;
#if TRIGONOMETRIC
float phase = fma(depth, wrapping_zone_parameters.y, wrapping_zone_parameters.y);
vec2 circle_point = vec2(sin(phas), cos(phase));
vec2 circle_point_pow2 = Multiply(circle_point, circle_point);
#if NUM_MOMENTS == 4
b = vec4(circle_point, circle_point_pow2) * absorbance;
#elif NUM_MOMENTS == 6
b_12 = circle_point * absorbance;
#if USE_R_RG_RBBA_FOR_MBOIT6
b_3456 = vec4(circle_point_pow2, Multiply(circle_point, circle_point_pow2)) * absorbance;
#else
b_34 = circle_point_pow2 * absorbance;
b_56 = Multiply(circle_point, circle_point_pow2) * absorbance;
#endif
#elif NUM_MOMENTS == 8
b_even = vec4(circle_point_pow2, Multiply(circle_point_pow2, circle_point_pow2)) * absorbance;
b_odd = vec4(circle_point, Multiply(circle_point, circle_point_pow2)) * absorbance;
#endif
#else
float depth_pow2 = depth * depth;
float depth_pow4 = depth_pow2 * depth_pow2;
#if NUM_MOMENTS == 4
b = vec4(depth, depth_pow2, depth_pow2 * depth, depth_pow4) * absorbance;
#elif NUM_MOMENTS == 6
b_12 = vec2(depth, depth_pow2) * absorbance;
#if USE_R_RG_RBBA_FOR_MBOIT6
b_3456 = vec4(depth_pow2 * depth, depth_pow4, depth_pow4 * depth, depth_pow4 * depth_pow2) * absorbance;
#else
b_34 = vec2(depth_pow2 * depth, depth_pow4) * absorbance;
b_56 = vec2(depth_pow4 * depth, depth_pow4 * depth_pow2) * absorbance;
#endif
#elif NUM_MOMENTS == 8
float depth_pow6 = depth_pow4 * depth_pow2;
b_even = vec4(depth_pow2, depth_pow4, depth_pow6, depth_pow6 * depth_pow2) * absorbance;
b_odd = vec4(depth, depth_pow2 * depth, depth_pow4 * depth, depth_pow6 * depth) * absorbance;
#endif
#endif
}
#else//MOMENT_GENERATION is disabled
layout (binding = 7) uniform sampler2D _flw_zeroth_moment_sampler;
layout (binding = 8) uniform sampler2D _flw_moments_sampler;
#if USE_R_RG_RBBA_FOR_MBOIT6
uniform sampler2D extra_moments;
#endif
/*! This function is to be called from the shader that composites the
transparent fragments. It reads the moments and calls the appropriate
function to reconstruct the transmittance at the specified depth.*/
void resolveMoments(out float transmittance_at_depth, out float total_transmittance, float depth, vec2 sv_pos)
{
ivec2 idx0 = ivec2(sv_pos);
ivec2 idx1 = idx0;
transmittance_at_depth = 1;
total_transmittance = 1;
float b_0 = texelFetch(_flw_zeroth_moment_sampler, idx0, 0).x;
clip(b_0 - 0.00100050033f);
total_transmittance = exp(-b_0);
#if NUM_MOMENTS == 4
#if TRIGONOMETRIC
vec4 b_tmp = texelFetch(_flw_moments_sampler, idx0, 0);
vec2 trig_b[2];
trig_b[0] = b_tmp.xy;
trig_b[1] = b_tmp.zw;
#if SINGLE_PRECISION
trig_b[0] /= b_0;
trig_b[1] /= b_0;
#else
trig_b[0] = fma(trig_b[0], 2.0, -1.0);
trig_b[1] = fma(trig_b[1], 2.0, -1.0);
#endif
transmittance_at_depth = computeTransmittanceAtDepthFrom2TrigonometricMoments(b_0, trig_b, depth, moment_bias, overestimation, wrapping_zone_parameters);
#else
vec4 b_1234 = texelFetch(_flw_moments_sampler, idx0, 0).xyzw;
#if SINGLE_PRECISION
vec2 b_even = b_1234.yw;
vec2 b_odd = b_1234.xz;
b_even /= b_0;
b_odd /= b_0;
const vec4 bias_vector = vec4(0, 0.375, 0, 0.375);
#else
vec2 b_even_q = b_1234.yw;
vec2 b_odd_q = b_1234.xz;
// Dequantize the moments
vec2 b_even;
vec2 b_odd;
offsetAndDequantizeMoments(b_even, b_odd, b_even_q, b_odd_q);
const vec4 bias_vector = vec4(0, 0.628, 0, 0.628);
#endif
transmittance_at_depth = computeTransmittanceAtDepthFrom4PowerMoments(b_0, b_even, b_odd, depth, moment_bias, overestimation, bias_vector);
#endif
#elif NUM_MOMENTS == 6
ivec2 idx2 = idx0;
#if TRIGONOMETRIC
vec2 trig_b[3];
trig_b[0] = texelFetch(_flw_moments_sampler, idx0, 0).xy;
#if USE_R_RG_RBBA_FOR_MBOIT6
vec4 tmp = texelFetch(extra_moments, idx0, 0);
trig_b[1] = tmp.xy;
trig_b[2] = tmp.zw;
#else
trig_b[1] = texelFetch(_flw_moments_sampler, idx1, 0).xy;
trig_b[2] = texelFetch(_flw_moments_sampler, idx2, 0).xy;
#endif
#if SINGLE_PRECISION
trig_b[0] /= b_0;
trig_b[1] /= b_0;
trig_b[2] /= b_0;
#else
trig_b[0] = fma(trig_b[0], 2.0, -1.0);
trig_b[1] = fma(trig_b[1], 2.0, -1.0);
trig_b[2] = fma(trig_b[2], 2.0, -1.0);
#endif
transmittance_at_depth = computeTransmittanceAtDepthFrom3TrigonometricMoments(b_0, trig_b, depth, moment_bias, overestimation, wrapping_zone_parameters);
#else
vec2 b_12 = texelFetch(_flw_moments_sampler, idx0, 0).xy;
#if USE_R_RG_RBBA_FOR_MBOIT6
vec4 tmp = texelFetch(extra_moments, idx0, 0);
vec2 b_34 = tmp.xy;
vec2 b_56 = tmp.zw;
#else
vec2 b_34 = texelFetch(_flw_moments_sampler, idx1, 0).xy;
vec2 b_56 = texelFetch(_flw_moments_sampler, idx2, 0).xy;
#endif
#if SINGLE_PRECISION
vec3 b_even = vec3(b_12.y, b_34.y, b_56.y);
vec3 b_odd = vec3(b_12.x, b_34.x, b_56.x);
b_even /= b_0;
b_odd /= b_0;
const float bias_vector[6] = { 0, 0.48, 0, 0.451, 0, 0.45 };
#else
vec3 b_even_q = vec3(b_12.y, b_34.y, b_56.y);
vec3 b_odd_q = vec3(b_12.x, b_34.x, b_56.x);
// Dequantize b_0 and the other moments
vec3 b_even;
vec3 b_odd;
offsetAndDequantizeMoments(b_even, b_odd, b_even_q, b_odd_q);
const float bias_vector[6] = { 0, 0.5566, 0, 0.489, 0, 0.47869382 };
#endif
transmittance_at_depth = computeTransmittanceAtDepthFrom6PowerMoments(b_0, b_even, b_odd, depth, moment_bias, overestimation, bias_vector);
#endif
#elif NUM_MOMENTS == 8
#if TRIGONOMETRIC
vec4 b_tmp = texelFetch(_flw_moments_sampler, idx0, 0);
vec4 b_tmp2 = texelFetch(_flw_moments_sampler, idx1, 0);
#if SINGLE_PRECISION
vec2 trig_b[4] = {
b_tmp2.xy / b_0,
b_tmp.xy / b_0,
b_tmp2.zw / b_0,
b_tmp.zw / b_0
};
#else
vec2 trig_b[4] = {
fma(b_tmp2.xy, 2.0, -1.0),
fma(b_tmp.xy, 2.0, -1.0),
fma(b_tmp2.zw, 2.0, -1.0),
fma(b_tmp.zw, 2.0, -1.0)
};
#endif
transmittance_at_depth = computeTransmittanceAtDepthFrom4TrigonometricMoments(b_0, trig_b, depth, moment_bias, overestimation, wrapping_zone_parameters);
#else
#if SINGLE_PRECISION
vec4 b_even = texelFetch(_flw_moments_sampler, idx0, 0);
vec4 b_odd = texelFetch(_flw_moments_sampler, idx1, 0);
b_even /= b_0;
b_odd /= b_0;
const float bias_vector[8] = { 0, 0.75, 0, 0.67666666666666664, 0, 0.63, 0, 0.60030303030303034 };
#else
vec4 b_even_q = texelFetch(_flw_moments_sampler, idx0, 0);
vec4 b_odd_q = texelFetch(_flw_moments_sampler, idx1, 0);
// Dequantize the moments
vec4 b_even;
vec4 b_odd;
offsetAndDequantizeMoments(b_even, b_odd, b_even_q, b_odd_q);
const float bias_vector[8] = { 0, 0.42474916387959866, 0, 0.22407802675585284, 0, 0.15369230769230768, 0, 0.12900440529089119 };
#endif
transmittance_at_depth = computeTransmittanceAtDepthFrom8PowerMoments(b_0, b_even, b_odd, depth, moment_bias, overestimation, bias_vector);
#endif
#endif
}
#endif

View file

@ -0,0 +1,311 @@
/*! \file
This header provides the utility functions to reconstruct the transmittance
from a given vector of trigonometric moments (2, 3 or 4 trigonometric
moments) at a specified depth.*/
#include "flywheel:internal/mboit/complex_algebra.glsl"
/*! This utility function turns a point on the unit circle into a scalar
parameter. It is guaranteed to grow monotonically for (cos(phi),sin(phi))
with phi in 0 to 2*pi. There are no other guarantees. In particular it is
not an arclength parametrization. If you change this function, you must
also change circleToParameter() in MomentOIT.cpp.*/
float circleToParameter(vec2 circle_point){
float result=abs(circle_point.y)-abs(circle_point.x);
result=(circle_point.x<0.0f)?(2.0f-result):result;
return (circle_point.y<0.0f)?(6.0f-result):result;
}
/*! This utility function returns the appropriate weight factor for a root at
the given location. Both inputs are supposed to be unit vectors. If a
circular arc going counter clockwise from (1.0,0.0) meets root first, it
returns 1.0, otherwise 0.0 or a linear ramp in the wrapping zone.*/
float getRootWeightFactor(float reference_parameter, float root_parameter, vec4 wrapping_zone_parameters){
float binary_weight_factor=(root_parameter<reference_parameter)?1.0f:0.0f;
float linear_weight_factor=saturate(mad(root_parameter, wrapping_zone_parameters.z, wrapping_zone_parameters.w));
return binary_weight_factor+linear_weight_factor;
}
/*! This function reconstructs the transmittance at the given depth from two
normalized trigonometric moments.*/
float computeTransmittanceAtDepthFrom2TrigonometricMoments(float b_0, vec2 trig_b[2], float depth, float bias, float overestimation, vec4 wrapping_zone_parameters)
{
// Apply biasing and reformat the inputs a little bit
float moment_scale = 1.0f - bias;
vec2 b[3] = {
vec2(1.0f, 0.0f),
trig_b[0] * moment_scale,
trig_b[1] * moment_scale
};
// Compute a Cholesky factorization of the Toeplitz matrix
float D00=RealPart(b[0]);
float InvD00=1.0f/D00;
vec2 L10=(b[1])*InvD00;
float D11=RealPart(b[0]-D00*Multiply(L10, Conjugate(L10)));
float InvD11=1.0f/D11;
vec2 L20=(b[2])*InvD00;
vec2 L21=(b[1]-D00*Multiply(L20, Conjugate(L10)))*InvD11;
float D22=RealPart(b[0]-D00*Multiply(L20, Conjugate(L20))-D11*Multiply(L21, Conjugate(L21)));
float InvD22=1.0f/D22;
// Solve a linear system to get the relevant polynomial
float phase = mad(depth, wrapping_zone_parameters.y, wrapping_zone_parameters.y);
vec2 circle_point;
sincos(phase, circle_point.y, circle_point.x);
vec2 c[3] = {
vec2(1.0f, 0.0f),
circle_point,
Multiply(circle_point, circle_point)
};
c[1]-=Multiply(L10, c[0]);
c[2]-=Multiply(L20, c[0])+Multiply(L21, c[1]);
c[0]*=InvD00;
c[1]*=InvD11;
c[2]*=InvD22;
c[1]-=Multiply(Conjugate(L21), c[2]);
c[0]-=Multiply(Conjugate(L10), c[1])+Multiply(Conjugate(L20), c[2]);
// Compute roots of the polynomial
vec2 pRoot[2];
SolveQuadratic(pRoot, Conjugate(c[2]), Conjugate(c[1]), Conjugate(c[0]));
// Figure out how to weight the weights
float depth_parameter = circleToParameter(circle_point);
vec3 weight_factor;
weight_factor[0] = overestimation;
for (int i = 0; i != 2; ++i)
{
float root_parameter = circleToParameter(pRoot[i]);
weight_factor[i+1] = getRootWeightFactor(depth_parameter, root_parameter, wrapping_zone_parameters);
}
// Compute the appropriate linear combination of weights
vec2 z[3] = { circle_point, pRoot[0], pRoot[1] };
float f0=weight_factor[0];
float f1=weight_factor[1];
float f2=weight_factor[2];
vec2 f01=Divide(f1-f0, z[1]-z[0]);
vec2 f12=Divide(f2-f1, z[2]-z[1]);
vec2 f012=Divide(f12-f01, z[2]-z[0]);
vec2 polynomial[3];
polynomial[0]=f012;
polynomial[1]=polynomial[0];
polynomial[0]=f01-Multiply(polynomial[0], z[1]);
polynomial[2]=polynomial[1];
polynomial[1]=polynomial[0]-Multiply(polynomial[1], z[0]);
polynomial[0]=f0-Multiply(polynomial[0], z[0]);
float weight_sum=0.0f;
weight_sum+=RealPart(Multiply(b[0], polynomial[0]));
weight_sum+=RealPart(Multiply(b[1], polynomial[1]));
weight_sum+=RealPart(Multiply(b[2], polynomial[2]));
// Turn the normalized absorbance into transmittance
return exp(-b_0 * weight_sum);
}
/*! This function reconstructs the transmittance at the given depth from three
normalized trigonometric moments. */
float computeTransmittanceAtDepthFrom3TrigonometricMoments(float b_0, vec2 trig_b[3], float depth, float bias, float overestimation, vec4 wrapping_zone_parameters)
{
// Apply biasing and reformat the inputs a little bit
float moment_scale = 1.0f - bias;
vec2 b[4] = {
vec2(1.0f, 0.0f),
trig_b[0] * moment_scale,
trig_b[1] * moment_scale,
trig_b[2] * moment_scale
};
// Compute a Cholesky factorization of the Toeplitz matrix
float D00=RealPart(b[0]);
float InvD00=1.0f/D00;
vec2 L10=(b[1])*InvD00;
float D11=RealPart(b[0]-D00*Multiply(L10, Conjugate(L10)));
float InvD11=1.0f/D11;
vec2 L20=(b[2])*InvD00;
vec2 L21=(b[1]-D00*Multiply(L20, Conjugate(L10)))*InvD11;
float D22=RealPart(b[0]-D00*Multiply(L20, Conjugate(L20))-D11*Multiply(L21, Conjugate(L21)));
float InvD22=1.0f/D22;
vec2 L30=(b[3])*InvD00;
vec2 L31=(b[2]-D00*Multiply(L30, Conjugate(L10)))*InvD11;
vec2 L32=(b[1]-D00*Multiply(L30, Conjugate(L20))-D11*Multiply(L31, Conjugate(L21)))*InvD22;
float D33=RealPart(b[0]-D00*Multiply(L30, Conjugate(L30))-D11*Multiply(L31, Conjugate(L31))-D22*Multiply(L32, Conjugate(L32)));
float InvD33=1.0f/D33;
// Solve a linear system to get the relevant polynomial
float phase = mad(depth, wrapping_zone_parameters.y, wrapping_zone_parameters.y);
vec2 circle_point;
sincos(phase, circle_point.y, circle_point.x);
vec2 circle_point_pow2 = Multiply(circle_point, circle_point);
vec2 c[4] = {
vec2(1.0f, 0.0f),
circle_point,
circle_point_pow2,
Multiply(circle_point, circle_point_pow2)
};
c[1]-=Multiply(L10, c[0]);
c[2]-=Multiply(L20, c[0])+Multiply(L21, c[1]);
c[3]-=Multiply(L30, c[0])+Multiply(L31, c[1])+Multiply(L32, c[2]);
c[0]*=InvD00;
c[1]*=InvD11;
c[2]*=InvD22;
c[3]*=InvD33;
c[2]-=Multiply(Conjugate(L32), c[3]);
c[1]-=Multiply(Conjugate(L21), c[2])+Multiply(Conjugate(L31), c[3]);
c[0]-=Multiply(Conjugate(L10), c[1])+Multiply(Conjugate(L20), c[2])+Multiply(Conjugate(L30), c[3]);
// Compute roots of the polynomial
vec2 pRoot[3];
SolveCubicBlinn(pRoot, Conjugate(c[3]), Conjugate(c[2]), Conjugate(c[1]), Conjugate(c[0]));
// The roots are known to be normalized but for reasons of numerical
// stability it can be better to enforce that
//pRoot[0]=normalize(pRoot[0]);
//pRoot[1]=normalize(pRoot[1]);
//pRoot[2]=normalize(pRoot[2]);
// Figure out how to weight the weights
float depth_parameter = circleToParameter(circle_point);
vec4 weight_factor;
weight_factor[0] = overestimation;
for (int i = 0; i != 3; ++i)
{
float root_parameter = circleToParameter(pRoot[i]);
weight_factor[i+1] = getRootWeightFactor(depth_parameter, root_parameter, wrapping_zone_parameters);
}
// Compute the appropriate linear combination of weights
vec2 z[4] = { circle_point, pRoot[0], pRoot[1], pRoot[2] };
float f0=weight_factor[0];
float f1=weight_factor[1];
float f2=weight_factor[2];
float f3=weight_factor[3];
vec2 f01=Divide(f1-f0, z[1]-z[0]);
vec2 f12=Divide(f2-f1, z[2]-z[1]);
vec2 f23=Divide(f3-f2, z[3]-z[2]);
vec2 f012=Divide(f12-f01, z[2]-z[0]);
vec2 f123=Divide(f23-f12, z[3]-z[1]);
vec2 f0123=Divide(f123-f012, z[3]-z[0]);
vec2 polynomial[4];
polynomial[0]=f0123;
polynomial[1]=polynomial[0];
polynomial[0]=f012-Multiply(polynomial[0], z[2]);
polynomial[2]=polynomial[1];
polynomial[1]=polynomial[0]-Multiply(polynomial[1], z[1]);
polynomial[0]=f01-Multiply(polynomial[0], z[1]);
polynomial[3]=polynomial[2];
polynomial[2]=polynomial[1]-Multiply(polynomial[2], z[0]);
polynomial[1]=polynomial[0]-Multiply(polynomial[1], z[0]);
polynomial[0]=f0-Multiply(polynomial[0], z[0]);
float weight_sum=0;
weight_sum+=RealPart(Multiply(b[0], polynomial[0]));
weight_sum+=RealPart(Multiply(b[1], polynomial[1]));
weight_sum+=RealPart(Multiply(b[2], polynomial[2]));
weight_sum+=RealPart(Multiply(b[3], polynomial[3]));
// Turn the normalized absorbance into transmittance
return exp(-b_0 * weight_sum);
}
/*! This function reconstructs the transmittance at the given depth from four
normalized trigonometric moments.*/
float computeTransmittanceAtDepthFrom4TrigonometricMoments(float b_0, vec2 trig_b[4], float depth, float bias, float overestimation, vec4 wrapping_zone_parameters)
{
// Apply biasing and reformat the inputs a little bit
float moment_scale = 1.0f - bias;
vec2 b[5] = {
vec2(1.0f, 0.0f),
trig_b[0] * moment_scale,
trig_b[1] * moment_scale,
trig_b[2] * moment_scale,
trig_b[3] * moment_scale
};
// Compute a Cholesky factorization of the Toeplitz matrix
float D00=RealPart(b[0]);
float InvD00=1.0/D00;
vec2 L10=(b[1])*InvD00;
float D11=RealPart(b[0]-D00*Multiply(L10, Conjugate(L10)));
float InvD11=1.0/D11;
vec2 L20=(b[2])*InvD00;
vec2 L21=(b[1]-D00*Multiply(L20, Conjugate(L10)))*InvD11;
float D22=RealPart(b[0]-D00*Multiply(L20, Conjugate(L20))-D11*Multiply(L21, Conjugate(L21)));
float InvD22=1.0/D22;
vec2 L30=(b[3])*InvD00;
vec2 L31=(b[2]-D00*Multiply(L30, Conjugate(L10)))*InvD11;
vec2 L32=(b[1]-D00*Multiply(L30, Conjugate(L20))-D11*Multiply(L31, Conjugate(L21)))*InvD22;
float D33=RealPart(b[0]-D00*Multiply(L30, Conjugate(L30))-D11*Multiply(L31, Conjugate(L31))-D22*Multiply(L32, Conjugate(L32)));
float InvD33=1.0/D33;
vec2 L40=(b[4])*InvD00;
vec2 L41=(b[3]-D00*Multiply(L40, Conjugate(L10)))*InvD11;
vec2 L42=(b[2]-D00*Multiply(L40, Conjugate(L20))-D11*Multiply(L41, Conjugate(L21)))*InvD22;
vec2 L43=(b[1]-D00*Multiply(L40, Conjugate(L30))-D11*Multiply(L41, Conjugate(L31))-D22*Multiply(L42, Conjugate(L32)))*InvD33;
float D44=RealPart(b[0]-D00*Multiply(L40, Conjugate(L40))-D11*Multiply(L41, Conjugate(L41))-D22*Multiply(L42, Conjugate(L42))-D33*Multiply(L43, Conjugate(L43)));
float InvD44=1.0/D44;
// Solve a linear system to get the relevant polynomial
float phase = mad(depth, wrapping_zone_parameters.y, wrapping_zone_parameters.y);
vec2 circle_point;
sincos(phase, circle_point.y, circle_point.x);
vec2 circle_point_pow2 = Multiply(circle_point, circle_point);
vec2 c[5] = {
vec2(1.0f, 0.0f),
circle_point,
circle_point_pow2,
Multiply(circle_point, circle_point_pow2),
Multiply(circle_point_pow2, circle_point_pow2)
};
c[1]-=Multiply(L10, c[0]);
c[2]-=Multiply(L20, c[0])+Multiply(L21, c[1]);
c[3]-=Multiply(L30, c[0])+Multiply(L31, c[1])+Multiply(L32, c[2]);
c[4]-=Multiply(L40, c[0])+Multiply(L41, c[1])+Multiply(L42, c[2])+Multiply(L43, c[3]);
c[0]*=InvD00;
c[1]*=InvD11;
c[2]*=InvD22;
c[3]*=InvD33;
c[4]*=InvD44;
c[3]-=Multiply(Conjugate(L43), c[4]);
c[2]-=Multiply(Conjugate(L32), c[3])+Multiply(Conjugate(L42), c[4]);
c[1]-=Multiply(Conjugate(L21), c[2])+Multiply(Conjugate(L31), c[3])+Multiply(Conjugate(L41), c[4]);
c[0]-=Multiply(Conjugate(L10), c[1])+Multiply(Conjugate(L20), c[2])+Multiply(Conjugate(L30), c[3])+Multiply(Conjugate(L40), c[4]);
// Compute roots of the polynomial
vec2 pRoot[4];
SolveQuarticNeumark(pRoot, Conjugate(c[4]), Conjugate(c[3]), Conjugate(c[2]), Conjugate(c[1]), Conjugate(c[0]));
// Figure out how to weight the weights
float depth_parameter = circleToParameter(circle_point);
float weight_factor[5];
weight_factor[0] = overestimation;
for (int i = 0; i != 4; ++i)
{
float root_parameter = circleToParameter(pRoot[i]);
weight_factor[i+1] = getRootWeightFactor(depth_parameter, root_parameter, wrapping_zone_parameters);
}
// Compute the appropriate linear combination of weights
vec2 z[5] = { circle_point, pRoot[0], pRoot[1], pRoot[2], pRoot[3] };
float f0=weight_factor[0];
float f1=weight_factor[1];
float f2=weight_factor[2];
float f3=weight_factor[3];
float f4=weight_factor[4];
vec2 f01=Divide(f1-f0, z[1]-z[0]);
vec2 f12=Divide(f2-f1, z[2]-z[1]);
vec2 f23=Divide(f3-f2, z[3]-z[2]);
vec2 f34=Divide(f4-f3, z[4]-z[3]);
vec2 f012=Divide(f12-f01, z[2]-z[0]);
vec2 f123=Divide(f23-f12, z[3]-z[1]);
vec2 f234=Divide(f34-f23, z[4]-z[2]);
vec2 f0123=Divide(f123-f012, z[3]-z[0]);
vec2 f1234=Divide(f234-f123, z[4]-z[1]);
vec2 f01234=Divide(f1234-f0123, z[4]-z[0]);
vec2 polynomial[5];
polynomial[0]=f01234;
polynomial[1]=polynomial[0];
polynomial[0]=f0123-Multiply(polynomial[0], z[3]);
polynomial[2]=polynomial[1];
polynomial[1]=polynomial[0]-Multiply(polynomial[1], z[2]);
polynomial[0]=f012-Multiply(polynomial[0], z[2]);
polynomial[3]=polynomial[2];
polynomial[2]=polynomial[1]-Multiply(polynomial[2], z[1]);
polynomial[1]=polynomial[0]-Multiply(polynomial[1], z[1]);
polynomial[0]=f01-Multiply(polynomial[0], z[1]);
polynomial[4]=polynomial[3];
polynomial[3]=polynomial[2]-Multiply(polynomial[3], z[0]);
polynomial[2]=polynomial[1]-Multiply(polynomial[2], z[0]);
polynomial[1]=polynomial[0]-Multiply(polynomial[1], z[0]);
polynomial[0]=f0-Multiply(polynomial[0], z[0]);
float weight_sum=0;
weight_sum+=RealPart(Multiply(b[0], polynomial[0]));
weight_sum+=RealPart(Multiply(b[1], polynomial[1]));
weight_sum+=RealPart(Multiply(b[2], polynomial[2]));
weight_sum+=RealPart(Multiply(b[3], polynomial[3]));
weight_sum+=RealPart(Multiply(b[4], polynomial[4]));
// Turn the normalized absorbance into transmittance
return exp(-b_0 * weight_sum);
}

View file

@ -52,6 +52,13 @@ public class ShulkerBoxVisual extends AbstractBlockEntityVisual<ShulkerBoxBlockE
instances = InstanceTree.create(instancerProvider(), ModelTrees.of(ModelLayers.SHULKER, PATHS_TO_PRUNE, texture, MATERIAL));
lid = instances.childOrThrow("lid");
lid.instance()
.color(255, 255, 255, 255);
instances.childOrThrow("base")
.instance()
.color(255, 255, 255, 255);
initialPose = createInitialPose();
applyTransform(partialTick);
}