diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/Samplers.java b/common/src/backend/java/dev/engine_room/flywheel/backend/Samplers.java index 4441c6246..b6e40e291 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/Samplers.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/Samplers.java @@ -10,4 +10,7 @@ public class Samplers { public static final GlTextureUnit INSTANCE_BUFFER = GlTextureUnit.T4; public static final GlTextureUnit LIGHT_LUT = GlTextureUnit.T5; public static final GlTextureUnit LIGHT_SECTIONS = GlTextureUnit.T6; + + public static final GlTextureUnit ZEROTH_MOMENT = GlTextureUnit.T7; + public static final GlTextureUnit MOMENTS = GlTextureUnit.T8; } diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/compile/IndirectPrograms.java b/common/src/backend/java/dev/engine_room/flywheel/backend/compile/IndirectPrograms.java index 9d3ce83e0..cc985b278 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/compile/IndirectPrograms.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/compile/IndirectPrograms.java @@ -164,7 +164,7 @@ public class IndirectPrograms extends AtomicReferenceCounted { setInstance(null); } - public GlProgram getIndirectProgram(InstanceType instanceType, ContextShader contextShader, Material material, boolean oit) { + public GlProgram getIndirectProgram(InstanceType instanceType, ContextShader contextShader, Material material, PipelineCompiler.OitMode oit) { return pipeline.get(instanceType, contextShader, material, oit); } diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/compile/InstancingPrograms.java b/common/src/backend/java/dev/engine_room/flywheel/backend/compile/InstancingPrograms.java index db3014402..392435849 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/compile/InstancingPrograms.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/compile/InstancingPrograms.java @@ -70,7 +70,7 @@ public class InstancingPrograms extends AtomicReferenceCounted { } public GlProgram get(InstanceType instanceType, ContextShader contextShader, Material material) { - return pipeline.get(instanceType, contextShader, material, false); + return pipeline.get(instanceType, contextShader, material, PipelineCompiler.OitMode.OFF); } @Override diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/compile/PipelineCompiler.java b/common/src/backend/java/dev/engine_room/flywheel/backend/compile/PipelineCompiler.java index e315dce35..f33486315 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/compile/PipelineCompiler.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/compile/PipelineCompiler.java @@ -50,7 +50,7 @@ public final class PipelineCompiler { ALL.add(this); } - public GlProgram get(InstanceType instanceType, ContextShader contextShader, Material material, boolean oit) { + public GlProgram get(InstanceType instanceType, ContextShader contextShader, Material material, OitMode oit) { var light = material.light(); var cutout = material.cutout(); var shaders = material.shaders(); @@ -128,7 +128,7 @@ public final class PipelineCompiler { .source()); var debug = key.debugEnabled() ? "_debug" : ""; var cutout = key.useCutout() ? "_cutout" : ""; - var oit = key.oit() ? "_oit" : ""; + var oit = key.oit().name; return "pipeline/" + pipeline.compilerMarker() + "/frag/" + material + "/" + light + "_" + context + cutout + debug + oit; }) .requireExtensions(extensions) @@ -148,8 +148,9 @@ public final class PipelineCompiler { } }) .onCompile((key, comp) -> { - if (key.oit()) { + if (key.oit() != OitMode.OFF) { comp.define("_FLW_OIT"); + comp.define(key.oit().define); } }) .withResource(API_IMPL_FRAG) @@ -224,6 +225,21 @@ public final class PipelineCompiler { */ public record PipelineProgramKey(InstanceType instanceType, ContextShader contextShader, LightShader light, MaterialShaders materialShaders, boolean useCutout, boolean debugEnabled, - boolean oit) { + OitMode oit) { + } + + public enum OitMode { + OFF("", ""), + GENERATE("_FLW_GENERATE_MOMENTS", "_generate"), + RESOLVE("_FLW_RESOLVE_MOMENTS", "_resolve"), + ; + + public final String define; + public final String name; + + OitMode(String define, String name) { + this.define = define; + this.name = name; + } } } diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectCullingGroup.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectCullingGroup.java index aa605f017..25e38ad11 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectCullingGroup.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectCullingGroup.java @@ -17,6 +17,7 @@ import dev.engine_room.flywheel.api.material.Transparency; import dev.engine_room.flywheel.api.model.Model; import dev.engine_room.flywheel.backend.compile.ContextShader; import dev.engine_room.flywheel.backend.compile.IndirectPrograms; +import dev.engine_room.flywheel.backend.compile.PipelineCompiler; import dev.engine_room.flywheel.backend.engine.InstancerKey; import dev.engine_room.flywheel.backend.engine.MaterialRenderState; import dev.engine_room.flywheel.backend.engine.MeshPool; @@ -188,7 +189,7 @@ public class IndirectCullingGroup { GlProgram lastProgram = null; for (var multiDraw : multiDraws) { - var drawProgram = programs.getIndirectProgram(instanceType, multiDraw.embedded ? ContextShader.EMBEDDED : ContextShader.DEFAULT, multiDraw.material, false); + var drawProgram = programs.getIndirectProgram(instanceType, multiDraw.embedded ? ContextShader.EMBEDDED : ContextShader.DEFAULT, multiDraw.material, PipelineCompiler.OitMode.OFF); if (drawProgram != lastProgram) { lastProgram = drawProgram; @@ -202,7 +203,7 @@ public class IndirectCullingGroup { } } - public void submitTransparent() { + public void submitTransparent(PipelineCompiler.OitMode oit) { if (nothingToDo()) { return; } @@ -214,7 +215,7 @@ public class IndirectCullingGroup { GlProgram lastProgram = null; for (var multiDraw : transparentDraws) { - var drawProgram = programs.getIndirectProgram(instanceType, multiDraw.embedded ? ContextShader.EMBEDDED : ContextShader.DEFAULT, multiDraw.material, true); + var drawProgram = programs.getIndirectProgram(instanceType, multiDraw.embedded ? ContextShader.EMBEDDED : ContextShader.DEFAULT, multiDraw.material, oit); if (drawProgram != lastProgram) { lastProgram = drawProgram; @@ -229,7 +230,7 @@ public class IndirectCullingGroup { } public void bindForCrumbling(Material material) { - var program = programs.getIndirectProgram(instanceType, ContextShader.CRUMBLING, material, false); + var program = programs.getIndirectProgram(instanceType, ContextShader.CRUMBLING, material, PipelineCompiler.OitMode.OFF); program.bind(); diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectDrawManager.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectDrawManager.java index a61677865..afc62d5af 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectDrawManager.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectDrawManager.java @@ -17,6 +17,7 @@ import dev.engine_room.flywheel.api.instance.Instance; import dev.engine_room.flywheel.api.instance.InstanceType; import dev.engine_room.flywheel.backend.Samplers; import dev.engine_room.flywheel.backend.compile.IndirectPrograms; +import dev.engine_room.flywheel.backend.compile.PipelineCompiler; import dev.engine_room.flywheel.backend.engine.AbstractInstancer; import dev.engine_room.flywheel.backend.engine.CommonCrumbling; import dev.engine_room.flywheel.backend.engine.DrawManager; @@ -48,7 +49,7 @@ public class IndirectDrawManager extends DrawManager> { private final DepthPyramid depthPyramid; - private final WboitFrameBuffer wboitFrameBuffer; + private final MboitFramebuffer wboitFrameBuffer; public IndirectDrawManager(IndirectPrograms programs) { this.programs = programs; @@ -65,7 +66,7 @@ public class IndirectDrawManager extends DrawManager> { depthPyramid = new DepthPyramid(programs); - wboitFrameBuffer = new WboitFrameBuffer(programs); + wboitFrameBuffer = new MboitFramebuffer(programs); } @Override @@ -145,10 +146,16 @@ public class IndirectDrawManager extends DrawManager> { group.submitSolid(); } - wboitFrameBuffer.setup(); + wboitFrameBuffer.generateMoments(); for (var group : cullingGroups.values()) { - group.submitTransparent(); + group.submitTransparent(PipelineCompiler.OitMode.GENERATE); + } + + wboitFrameBuffer.resolveMoments(); + + for (var group : cullingGroups.values()) { + group.submitTransparent(PipelineCompiler.OitMode.RESOLVE); } wboitFrameBuffer.composite(); diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/MboitFramebuffer.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/MboitFramebuffer.java new file mode 100644 index 000000000..06d283f20 --- /dev/null +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/MboitFramebuffer.java @@ -0,0 +1,142 @@ +package dev.engine_room.flywheel.backend.engine.indirect; + +import org.lwjgl.opengl.GL32; +import org.lwjgl.opengl.GL46; + +import com.mojang.blaze3d.platform.GlStateManager; +import com.mojang.blaze3d.systems.RenderSystem; + +import dev.engine_room.flywheel.backend.Samplers; +import dev.engine_room.flywheel.backend.compile.IndirectPrograms; +import dev.engine_room.flywheel.backend.gl.GlTextureUnit; +import net.minecraft.client.Minecraft; + +public class MboitFramebuffer { + + public final int fbo; + private final IndirectPrograms programs; + private final int vao; + + public int zerothMoment; + public int moments; + public int accumulate; + + private int lastWidth = -1; + private int lastHeight = -1; + + public MboitFramebuffer(IndirectPrograms programs) { + this.programs = programs; + fbo = GL46.glCreateFramebuffers(); + vao = GL46.glCreateVertexArrays(); + } + + public void generateMoments() { + var mainRenderTarget = Minecraft.getInstance() + .getMainRenderTarget(); + + createTextures(mainRenderTarget.width, mainRenderTarget.height); + + // No depth writes, but we'll still use the depth test + RenderSystem.depthMask(false); + RenderSystem.enableBlend(); + RenderSystem.blendFunc(GlStateManager.SourceFactor.ONE, GlStateManager.DestFactor.ONE); + RenderSystem.blendEquation(GL46.GL_FUNC_ADD); + + GL46.glNamedFramebufferTexture(fbo, GL46.GL_DEPTH_ATTACHMENT, mainRenderTarget.getDepthTextureId(), 0); + + GL46.glNamedFramebufferDrawBuffers(fbo, new int[]{GL46.GL_COLOR_ATTACHMENT0, GL46.GL_COLOR_ATTACHMENT1}); + + GL46.glClearNamedFramebufferfv(fbo, GL46.GL_COLOR, 0, new float[]{0, 0, 0, 0}); + GL46.glClearNamedFramebufferfv(fbo, GL46.GL_COLOR, 1, new float[]{0, 0, 0, 0}); + + GlStateManager._glBindFramebuffer(GL46.GL_FRAMEBUFFER, fbo); + } + + public void resolveMoments() { + // No depth writes, but we'll still use the depth test + RenderSystem.depthMask(false); + RenderSystem.enableBlend(); + RenderSystem.blendFunc(GlStateManager.SourceFactor.ONE, GlStateManager.DestFactor.ONE); + RenderSystem.blendEquation(GL46.GL_FUNC_ADD); + + Samplers.ZEROTH_MOMENT.makeActive(); + GlStateManager._bindTexture(zerothMoment); + + Samplers.MOMENTS.makeActive(); + GlStateManager._bindTexture(moments); + + GL46.glNamedFramebufferDrawBuffers(fbo, new int[]{GL46.GL_COLOR_ATTACHMENT2}); + + GL46.glClearNamedFramebufferfv(fbo, GL46.GL_COLOR, 0, new float[]{0, 0, 0, 0}); + + GlStateManager._glBindFramebuffer(GL46.GL_FRAMEBUFFER, fbo); + } + + public void composite() { + var mainRenderTarget = Minecraft.getInstance() + .getMainRenderTarget(); + + mainRenderTarget.bindWrite(false); + + var oitCompositeProgram = programs.getOitCompositeProgram(); + + GlStateManager._depthMask(false); + GlStateManager._depthFunc(GL46.GL_ALWAYS); + GlStateManager._enableBlend(); + RenderSystem.blendFunc(GlStateManager.SourceFactor.ONE_MINUS_SRC_ALPHA, GlStateManager.DestFactor.SRC_ALPHA); + + oitCompositeProgram.bind(); + + GlTextureUnit.T0.makeActive(); + GlStateManager._bindTexture(zerothMoment); + + GlTextureUnit.T1.makeActive(); + GlStateManager._bindTexture(accumulate); + + // Empty VAO, the actual full screen triangle is generated in the vertex shader + GlStateManager._glBindVertexArray(vao); + + GL46.glDrawArrays(GL46.GL_TRIANGLES, 0, 3); + } + + public void delete() { + GL46.glDeleteTextures(zerothMoment); + GL46.glDeleteTextures(moments); + GL46.glDeleteTextures(accumulate); + GL46.glDeleteFramebuffers(fbo); + GL46.glDeleteVertexArrays(vao); + } + + private void createTextures(int width, int height) { + if (lastWidth == width && lastHeight == height) { + return; + } + + lastWidth = width; + lastHeight = height; + + GL46.glDeleteTextures(zerothMoment); + GL46.glDeleteTextures(moments); + GL46.glDeleteTextures(accumulate); + + zerothMoment = GL46.glCreateTextures(GL46.GL_TEXTURE_2D); + moments = GL46.glCreateTextures(GL46.GL_TEXTURE_2D); + accumulate = GL46.glCreateTextures(GL46.GL_TEXTURE_2D); + + GL46.glTextureStorage2D(zerothMoment, 1, GL32.GL_R16F, width, height); + GL46.glTextureStorage2D(moments, 1, GL32.GL_RGBA16F, width, height); + GL46.glTextureStorage2D(accumulate, 1, GL32.GL_RGBA16F, width, height); + + // for (int tex : new int[]{zerothMoment, moments, composite}) { + // GL46.glTextureParameteri(tex, GL32.GL_TEXTURE_MIN_FILTER, GL32.GL_NEAREST); + // GL46.glTextureParameteri(tex, GL32.GL_TEXTURE_MAG_FILTER, GL32.GL_NEAREST); + // GL46.glTextureParameteri(tex, GL32.GL_TEXTURE_COMPARE_MODE, GL32.GL_NONE); + // GL46.glTextureParameteri(tex, GL32.GL_TEXTURE_WRAP_S, GL32.GL_CLAMP_TO_EDGE); + // GL46.glTextureParameteri(tex, GL32.GL_TEXTURE_WRAP_T, GL32.GL_CLAMP_TO_EDGE); + // } + + GL46.glNamedFramebufferTexture(fbo, GL46.GL_COLOR_ATTACHMENT0, zerothMoment, 0); + GL46.glNamedFramebufferTexture(fbo, GL46.GL_COLOR_ATTACHMENT1, moments, 0); + GL46.glNamedFramebufferTexture(fbo, GL46.GL_COLOR_ATTACHMENT2, accumulate, 0); + } +} diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/WboitFrameBuffer.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/WboitFrameBuffer.java deleted file mode 100644 index c76133709..000000000 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/WboitFrameBuffer.java +++ /dev/null @@ -1,117 +0,0 @@ -package dev.engine_room.flywheel.backend.engine.indirect; - -import org.lwjgl.opengl.ARBDrawBuffersBlend; -import org.lwjgl.opengl.GL32; -import org.lwjgl.opengl.GL46; - -import com.mojang.blaze3d.platform.GlStateManager; -import com.mojang.blaze3d.systems.RenderSystem; - -import dev.engine_room.flywheel.backend.compile.IndirectPrograms; -import dev.engine_room.flywheel.backend.gl.GlTextureUnit; -import net.minecraft.client.Minecraft; - -public class WboitFrameBuffer { - - public final int fbo; - private final IndirectPrograms programs; - private final int vao; - - public int accum; - public int reveal; - - private int lastWidth = -1; - private int lastHeight = -1; - - public WboitFrameBuffer(IndirectPrograms programs) { - this.programs = programs; - fbo = GL46.glCreateFramebuffers(); - vao = GL46.glCreateVertexArrays(); - } - - public void setup() { - var mainRenderTarget = Minecraft.getInstance() - .getMainRenderTarget(); - - createTextures(mainRenderTarget.width, mainRenderTarget.height); - - // No depth writes, but we'll still use the depth test - GlStateManager._depthMask(false); - GlStateManager._enableBlend(); - ARBDrawBuffersBlend.glBlendFunciARB(0, GL46.GL_ONE, GL46.GL_ONE); // accumulation blend target - ARBDrawBuffersBlend.glBlendFunciARB(1, GL46.GL_ZERO, GL46.GL_ONE_MINUS_SRC_COLOR); // revealage blend target - GlStateManager._blendEquation(GL46.GL_FUNC_ADD); - - GL46.glNamedFramebufferTexture(fbo, GL46.GL_DEPTH_ATTACHMENT, mainRenderTarget.getDepthTextureId(), 0); - - GlStateManager._glBindFramebuffer(GL46.GL_FRAMEBUFFER, fbo); - - GL46.glClearBufferfv(GL46.GL_COLOR, 0, new float[]{0, 0, 0, 0}); - GL46.glClearBufferfv(GL46.GL_COLOR, 1, new float[]{1, 1, 1, 1}); - } - - public void composite() { - var mainRenderTarget = Minecraft.getInstance() - .getMainRenderTarget(); - - mainRenderTarget.bindWrite(false); - - var oitCompositeProgram = programs.getOitCompositeProgram(); - - GlStateManager._depthMask(false); - GlStateManager._depthFunc(GL46.GL_ALWAYS); - GlStateManager._enableBlend(); - RenderSystem.blendFuncSeparate(GlStateManager.SourceFactor.SRC_ALPHA, GlStateManager.DestFactor.ONE_MINUS_SRC_ALPHA, GlStateManager.SourceFactor.ONE, GlStateManager.DestFactor.ONE_MINUS_SRC_ALPHA); - - oitCompositeProgram.bind(); - - GlTextureUnit.T0.makeActive(); - GlStateManager._bindTexture(accum); - - GlTextureUnit.T1.makeActive(); - GlStateManager._bindTexture(reveal); - - // Empty VAO, the actual full screen triangle is generated in the vertex shader - GlStateManager._glBindVertexArray(vao); - - GL46.glDrawArrays(GL46.GL_TRIANGLES, 0, 3); - } - - public void delete() { - GL46.glDeleteTextures(accum); - GL46.glDeleteTextures(reveal); - GL46.glDeleteFramebuffers(fbo); - GL46.glDeleteVertexArrays(vao); - } - - private void createTextures(int width, int height) { - if (lastWidth == width && lastHeight == height) { - return; - } - - lastWidth = width; - lastHeight = height; - - GL46.glDeleteTextures(accum); - GL46.glDeleteTextures(reveal); - - accum = GL46.glCreateTextures(GL46.GL_TEXTURE_2D); - reveal = GL46.glCreateTextures(GL46.GL_TEXTURE_2D); - - GL46.glNamedFramebufferDrawBuffers(fbo, new int[]{GL46.GL_COLOR_ATTACHMENT0, GL46.GL_COLOR_ATTACHMENT1}); - - GL46.glNamedFramebufferTexture(fbo, GL46.GL_COLOR_ATTACHMENT0, accum, 0); - GL46.glNamedFramebufferTexture(fbo, GL46.GL_COLOR_ATTACHMENT1, reveal, 0); - - GL46.glTextureStorage2D(accum, 1, GL32.GL_RGBA32F, width, height); - GL46.glTextureStorage2D(reveal, 1, GL32.GL_R8, width, height); - - for (int tex : new int[]{accum, reveal}) { - GL46.glTextureParameteri(tex, GL32.GL_TEXTURE_MIN_FILTER, GL32.GL_NEAREST); - GL46.glTextureParameteri(tex, GL32.GL_TEXTURE_MAG_FILTER, GL32.GL_NEAREST); - GL46.glTextureParameteri(tex, GL32.GL_TEXTURE_COMPARE_MODE, GL32.GL_NONE); - GL46.glTextureParameteri(tex, GL32.GL_TEXTURE_WRAP_S, GL32.GL_CLAMP_TO_EDGE); - GL46.glTextureParameteri(tex, GL32.GL_TEXTURE_WRAP_T, GL32.GL_CLAMP_TO_EDGE); - } - } -} diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/gl/GlCompat.java b/common/src/backend/java/dev/engine_room/flywheel/backend/gl/GlCompat.java index 636b9ac4c..b07070bc7 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/gl/GlCompat.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/gl/GlCompat.java @@ -1,7 +1,5 @@ package dev.engine_room.flywheel.backend.gl; -import java.nio.ByteBuffer; - import org.jetbrains.annotations.UnknownNullability; import org.lwjgl.PointerBuffer; import org.lwjgl.opengl.GL; @@ -13,6 +11,7 @@ import org.lwjgl.opengl.GL43; import org.lwjgl.opengl.GLCapabilities; import org.lwjgl.opengl.KHRShaderSubgroup; import org.lwjgl.system.MemoryStack; +import org.lwjgl.system.MemoryUtil; import dev.engine_room.flywheel.backend.FlwBackend; import dev.engine_room.flywheel.backend.compile.core.Compilation; @@ -67,10 +66,11 @@ public final class GlCompat { */ public static void safeShaderSource(int glId, CharSequence source) { try (MemoryStack stack = MemoryStack.stackPush()) { - final ByteBuffer sourceBuffer = stack.UTF8(source, true); + var sourceBuffer = MemoryUtil.memUTF8(source, true); final PointerBuffer pointers = stack.mallocPointer(1); pointers.put(sourceBuffer); GL20C.nglShaderSource(glId, 1, pointers.address0(), 0); + MemoryUtil.memFree(sourceBuffer); } } diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/glsl/parse/ShaderField.java b/common/src/backend/java/dev/engine_room/flywheel/backend/glsl/parse/ShaderField.java index 3f7871d43..c36bf4f7f 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/glsl/parse/ShaderField.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/glsl/parse/ShaderField.java @@ -1,6 +1,5 @@ package dev.engine_room.flywheel.backend.glsl.parse; -import java.util.regex.Matcher; import java.util.regex.Pattern; import org.jetbrains.annotations.Nullable; @@ -34,20 +33,21 @@ public class ShaderField { * Scan the source for function definitions and "parse" them into objects that contain properties of the function. */ public static ImmutableMap parseFields(SourceLines source) { - Matcher matcher = PATTERN.matcher(source); + // Matcher matcher = PATTERN.matcher(source); + // + // ImmutableMap.Builder fields = ImmutableMap.builder(); + // while (matcher.find()) { + // Span self = Span.fromMatcher(source, matcher); + // Span location = Span.fromMatcher(source, matcher, 1); + // Span decoration = Span.fromMatcher(source, matcher, 2); + // Span type = Span.fromMatcher(source, matcher, 3); + // Span name = Span.fromMatcher(source, matcher, 4); + // + // fields.put(location.get(), new ShaderField(self, location, decoration, type, name)); + // } - ImmutableMap.Builder fields = ImmutableMap.builder(); - while (matcher.find()) { - Span self = Span.fromMatcher(source, matcher); - Span location = Span.fromMatcher(source, matcher, 1); - Span decoration = Span.fromMatcher(source, matcher, 2); - Span type = Span.fromMatcher(source, matcher, 3); - Span name = Span.fromMatcher(source, matcher, 4); - - fields.put(location.get(), new ShaderField(self, location, decoration, type, name)); - } - - return fields.build(); + return ImmutableMap.builder() + .build(); } public enum Qualifier { diff --git a/common/src/backend/resources/assets/flywheel/flywheel/internal/common.frag b/common/src/backend/resources/assets/flywheel/flywheel/internal/common.frag index 7bc4ec54d..afd8ae647 100644 --- a/common/src/backend/resources/assets/flywheel/flywheel/internal/common.frag +++ b/common/src/backend/resources/assets/flywheel/flywheel/internal/common.frag @@ -1,6 +1,7 @@ #include "flywheel:internal/packed_material.glsl" #include "flywheel:internal/diffuse.glsl" #include "flywheel:internal/colorizer.glsl" +#include "flywheel:internal/mboit/moment_oit.glsl" // optimize discard usage #if defined(GL_ARB_conservative_depth) && defined(_FLW_USE_DISCARD) @@ -18,13 +19,13 @@ flat in uvec2 _flw_ids; #endif #ifdef _FLW_OIT - -// your first render target which is used to accumulate pre-multiplied color values -layout (location = 0) out vec4 accum; - -// your second render target which is used to store pixel revealage -layout (location = 1) out float reveal; - +#ifdef _FLW_GENERATE_MOMENTS +layout (location = 0) out float _flw_zerothMoment_out; +layout (location = 1) out vec4 _flw_moments_out; +#endif +#ifdef _FLW_RESOLVE_MOMENTS +layout (location = 0) out vec4 _flw_accumulate_out; +#endif #else out vec4 _flw_outputColor; @@ -118,26 +119,40 @@ void _flw_main() { color = flw_fogFilter(color); - color.a = 0.9; - #ifdef _FLW_OIT + float linearDepth = linearize_depth(gl_FragCoord.z, _flw_cullData.znear, _flw_cullData.zfar); - float depth = linearize_depth(gl_FragCoord.z, _flw_cullData.znear, _flw_cullData.zfar); + float lnNear = log(_flw_cullData.znear); + float lnFar = log(_flw_cullData.zfar); - // insert your favorite weighting function here. the color-based factor - // avoids color pollution from the edges of wispy clouds. the z-based - // factor gives precedence to nearer surfaces - //float weight = clamp(pow(min(1.0, color.a * 10.0) + 0.01, 3.0) * 1e8 * pow(1.0 - gl_FragCoord.z * 0.9, 3.0), 1e-2, 3e3); - float weight = max(min(1.0, max(max(color.r, color.g), color.b) * color.a), color.a) * - clamp(0.03 / (1e-5 + pow(depth / 200, 4.0)), 1e-2, 3e3); + float depth = (log(linearDepth) - lnNear); - // blend func: GL_ONE, GL_ONE - // switch to pre-multiplied alpha and weight - accum = vec4(color.rgb * color.a, color.a) * weight; + depth /= lnFar - lnNear; - // blend func: GL_ZERO, GL_ONE_MINUS_SRC_ALPHA - reveal = color.a; + depth = clamp(depth * 2. - 1., -1., 1.); + #ifdef _FLW_GENERATE_MOMENTS + + generateMoments(depth, 1 - color.a, vec4(0), _flw_zerothMoment_out, _flw_moments_out); + + #endif + #ifdef _FLW_RESOLVE_MOMENTS + + float tt; + float td; + resolveMoments(td, tt, depth, gl_FragCoord.xy); + + if (abs(td) < 1e-5) { + discard; + } + + color.rgb *= color.a; + + color *= td; + + _flw_accumulate_out = color; + + #endif #else _flw_outputColor = color; diff --git a/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/oit_composite.frag b/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/oit_composite.frag index 6c5a67419..3afe92839 100644 --- a/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/oit_composite.frag +++ b/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/oit_composite.frag @@ -1,48 +1,20 @@ -// shader outputs layout (location = 0) out vec4 frag; -// color accumulation buffer -layout (binding = 0) uniform sampler2D accum; - -// revealage threshold buffer -layout (binding = 1) uniform sampler2D reveal; - -// epsilon number -const float EPSILON = 0.00001f; - -// calculate floating point numbers equality accurately -bool isApproximatelyEqual(float a, float b) { - return abs(a - b) <= (abs(a) < abs(b) ? abs(b) : abs(a)) * EPSILON; -} - -// get the max value between three values -float max3(vec3 v) { - return max(max(v.x, v.y), v.z); -} +layout (binding = 0) uniform sampler2D zerothMoment; +layout (binding = 1) uniform sampler2D accumulate; void main() { - // fragment coordination ivec2 coords = ivec2(gl_FragCoord.xy); - // fragment revealage - float revealage = texelFetch(reveal, coords, 0).r; + float b0 = texelFetch(zerothMoment, coords, 0).r; - // save the blending and color texture fetch cost if there is not a transparent fragment - if (isApproximatelyEqual(revealage, 1.0f)) { + if (b0 < 1e-5) { discard; } - // fragment color - vec4 accumulation = texelFetch(accum, coords, 0); + vec4 accumulation = texelFetch(accumulate, coords, 0); - // suppress overflow - if (isinf(max3(abs(accumulation.rgb)))) { - accumulation.rgb = vec3(accumulation.a); - } + vec3 normalizedAccumulation = accumulation.rgb / max(accumulation.a, 1e-5); - // prevent floating point precision bug - vec3 average_color = accumulation.rgb / max(accumulation.a, EPSILON); - - // blend pixels - frag = vec4(average_color, 1.0f - revealage); + frag = vec4(normalizedAccumulation, exp(-b0)); } diff --git a/common/src/backend/resources/assets/flywheel/flywheel/internal/mboit/complex_algebra.glsl b/common/src/backend/resources/assets/flywheel/flywheel/internal/mboit/complex_algebra.glsl new file mode 100644 index 000000000..b2cf49485 --- /dev/null +++ b/common/src/backend/resources/assets/flywheel/flywheel/internal/mboit/complex_algebra.glsl @@ -0,0 +1,203 @@ +/*! \file + This header defines utility functions to deal with complex numbers and + complex polynomials.*/ + +/*! Returns the complex conjugate of the given complex number (i.e. it changes + the sign of the y-component).*/ +vec2 Conjugate(vec2 Z){ + return vec2(Z.x, -Z.y); +} +/*! This function implements complex multiplication.*/ +vec2 Multiply(vec2 LHS, vec2 RHS){ + return vec2(LHS.x*RHS.x-LHS.y*RHS.y, LHS.x*RHS.y+LHS.y*RHS.x); +} +/*! This function computes the magnitude of the given complex number.*/ +float Magnitude(vec2 Z){ + return sqrt(dot(Z, Z)); +} +/*! This function computes the quotient of two complex numbers. The denominator + must not be zero.*/ +vec2 Divide(vec2 Numerator, vec2 Denominator){ + return vec2(Numerator.x*Denominator.x+Numerator.y*Denominator.y, -Numerator.x*Denominator.y+Numerator.y*Denominator.x)/dot(Denominator, Denominator); +} +/*! This function divides a real number by a complex number. The denominator + must not be zero.*/ +vec2 Divide(float Numerator, vec2 Denominator){ + return vec2(Numerator*Denominator.x, -Numerator*Denominator.y)/dot(Denominator, Denominator); +} +/*! This function implements computation of the reciprocal of the given non- + zero complex number.*/ +vec2 Reciprocal(vec2 Z){ + return vec2(Z.x, -Z.y)/dot(Z, Z); +} +/*! This utility function implements complex squaring.*/ +vec2 Square(vec2 Z){ + return vec2(Z.x*Z.x-Z.y*Z.y, 2.0f*Z.x*Z.y); +} +/*! This utility function implements complex computation of the third power.*/ +vec2 Cube(vec2 Z){ + return Multiply(Square(Z), Z); +} +/*! This utility function computes one square root of the given complex value. + The other one can be found using the unary minus operator. + \warning This function is continuous but not defined on the negative real + axis (and cannot be continued continuously there). + \sa SquareRoot() */ +vec2 SquareRootUnsafe(vec2 Z){ + float ZLengthSq=dot(Z, Z); + float ZLengthInv=rsqrt(ZLengthSq); + vec2 UnnormalizedRoot=Z*ZLengthInv+vec2(1.0f, 0.0f); + float UnnormalizedRootLengthSq=dot(UnnormalizedRoot, UnnormalizedRoot); + float NormalizationFactorInvSq=UnnormalizedRootLengthSq*ZLengthInv; + float NormalizationFactor=rsqrt(NormalizationFactorInvSq); + return NormalizationFactor*UnnormalizedRoot; +} +/*! This utility function computes one square root of the given complex value. + The other one can be found using the unary minus operator. + \note This function has discontinuities for values with real part zero. + \sa SquareRootUnsafe() */ +vec2 SquareRoot(vec2 Z){ + vec2 ZPositiveRealPart=vec2(abs(Z.x), Z.y); + vec2 ComputedRoot=SquareRootUnsafe(ZPositiveRealPart); + return (Z.x>=0.0)?ComputedRoot:ComputedRoot.yx; +} +/*! This utility function computes one cubic root of the given complex value. The + other roots can be found by multiplication by cubic roots of unity. + \note This function has various discontinuities.*/ +vec2 CubicRoot(vec2 Z){ + float Argument=atan2(Z.y, Z.x); + float NewArgument=Argument/3.0f; + vec2 NormalizedRoot; + sincos(NewArgument, NormalizedRoot.y, NormalizedRoot.x); + return NormalizedRoot*pow(dot(Z, Z), 1.0f/6.0f); +} + +/*! @{ + Returns the complex conjugate of the given complex vector (i.e. it changes the + second column resp the y-component).*/ +mat2x2 Conjugate(mat2x2 Vector){ + return mat2x2(Vector[0].x, -Vector[0].y, Vector[1].x, -Vector[1].y); +} +mat3x2 Conjugate(mat3x2 Vector){ + return mat3x2(Vector[0].x, -Vector[0].y, Vector[1].x, -Vector[1].y, Vector[2].x, -Vector[2].y); +} +mat4x2 Conjugate(mat4x2 Vector){ + return mat4x2(Vector[0].x, -Vector[0].y, Vector[1].x, -Vector[1].y, Vector[2].x, -Vector[2].y, Vector[3].x, -Vector[3].y); +} +void Conjugate(out vec2 OutConjugateVector[5], vec2 Vector[5]){ + for (int i=0;i!=5;++i){ + OutConjugateVector[i]=vec2(Vector[i].x, -Vector[i].x); + } +} +//!@} + +/*! Returns the real part of a complex number as real.*/ +float RealPart(vec2 Z){ + return Z.x; +} + +/*! Given coefficients of a quadratic polynomial A*x^2+B*x+C, this function + outputs its two complex roots.*/ +void SolveQuadratic(out vec2 pOutRoot[2], vec2 A, vec2 B, vec2 C) +{ + // Normalize the coefficients + vec2 InvA=Reciprocal(A); + B=Multiply(B, InvA); + C=Multiply(C, InvA); + // Divide the middle coefficient by two + B*=0.5f; + // Apply the quadratic formula + vec2 DiscriminantRoot=SquareRoot(Square(B)-C); + pOutRoot[0]=-B-DiscriminantRoot; + pOutRoot[1]=-B+DiscriminantRoot; +} + +/*! Given coefficients of a cubic polynomial A*x^3+B*x^2+C*x+D, this function + outputs its three complex roots.*/ +void SolveCubicBlinn(out vec2 pOutRoot[3], vec2 A, vec2 B, vec2 C, vec2 D) +{ + // Normalize the polynomial + vec2 InvA=Reciprocal(A); + B=Multiply(B, InvA); + C=Multiply(C, InvA); + D=Multiply(D, InvA); + // Divide middle coefficients by three + B/=3.0f; + C/=3.0f; + // Compute the Hessian and the discriminant + vec2 Delta00=-Square(B)+C; + vec2 Delta01=-Multiply(C, B)+D; + vec2 Delta11=Multiply(B, D)-Square(C); + vec2 Discriminant=4.0f*Multiply(Delta00, Delta11)-Square(Delta01); + // Compute coefficients of the depressed cubic + // (third is zero, fourth is one) + vec2 DepressedD=-2.0f*Multiply(B, Delta00)+Delta01; + vec2 DepressedC=Delta00; + // Take the cubic root of a complex number avoiding cancellation + vec2 DiscriminantRoot=SquareRoot(-Discriminant); + DiscriminantRoot=faceforward(DiscriminantRoot, DiscriminantRoot, DepressedD); + vec2 CubedRoot=DiscriminantRoot-DepressedD; + vec2 FirstRoot=CubicRoot(0.5f*CubedRoot); + vec2 pCubicRoot[3]={ + FirstRoot, + Multiply(vec2(-0.5f, -0.5f*sqrt(3.0f)), FirstRoot), + Multiply(vec2(-0.5f, 0.5f*sqrt(3.0f)), FirstRoot) + }; + // Also compute the reciprocal cubic roots + vec2 InvFirstRoot=Reciprocal(FirstRoot); + vec2 pInvCubicRoot[3]={ + InvFirstRoot, + Multiply(vec2(-0.5f, 0.5f*sqrt(3.0f)), InvFirstRoot), + Multiply(vec2(-0.5f, -0.5f*sqrt(3.0f)), InvFirstRoot) + }; + // Turn them into roots of the depressed cubic and revert the depression + // transform + + for (int i=0;i!=3;++i) + { + pOutRoot[i]=pCubicRoot[i]-Multiply(DepressedC, pInvCubicRoot[i])-B; + } +} + + +/*! Given coefficients of a quartic polynomial A*x^4+B*x^3+C*x^2+D*x+E, this + function outputs its four complex roots.*/ +void SolveQuarticNeumark(out vec2 pOutRoot[4], vec2 A, vec2 B, vec2 C, vec2 D, vec2 E) +{ + // Normalize the polynomial + vec2 InvA=Reciprocal(A); + B=Multiply(B, InvA); + C=Multiply(C, InvA); + D=Multiply(D, InvA); + E=Multiply(E, InvA); + // Construct a normalized cubic + vec2 P=-2.0f*C; + vec2 Q=Square(C)+Multiply(B, D)-4.0f*E; + vec2 R=Square(D)+Multiply(Square(B), E)-Multiply(Multiply(B, C), D); + // Compute a root that is not the smallest of the cubic + vec2 pCubicRoot[3]; + SolveCubicBlinn(pCubicRoot, vec2(1.0f, 0.0f), P, Q, R); + vec2 y=(dot(pCubicRoot[1], pCubicRoot[1])>dot(pCubicRoot[0], pCubicRoot[0]))?pCubicRoot[1]:pCubicRoot[0]; + + // Solve a quadratic to obtain linear coefficients for quadratic polynomials + vec2 BB=Square(B); + vec2 fy=4.0f*y; + vec2 BB_fy=BB-fy; + vec2 tmp=SquareRoot(BB_fy); + vec2 G=(B+tmp)*0.5f; + vec2 g=(B-tmp)*0.5f; + // Construct the corresponding constant coefficients + vec2 Z=C-y; + tmp=Divide(0.5f*Multiply(B, Z)-D, tmp); + vec2 H=Z*0.5f+tmp; + vec2 h=Z*0.5f-tmp; + + // Compute the roots + vec2 pQuadraticRoot[2]; + SolveQuadratic(pQuadraticRoot, vec2(1.0f, 0.0f), G, H); + pOutRoot[0]=pQuadraticRoot[0]; + pOutRoot[1]=pQuadraticRoot[1]; + SolveQuadratic(pQuadraticRoot, vec2(1.0f, 0.0f), g, h); + pOutRoot[2]=pQuadraticRoot[0]; + pOutRoot[3]=pQuadraticRoot[1]; +} diff --git a/common/src/backend/resources/assets/flywheel/flywheel/internal/mboit/moment_math.glsl b/common/src/backend/resources/assets/flywheel/flywheel/internal/mboit/moment_math.glsl new file mode 100644 index 000000000..0c2d23b72 --- /dev/null +++ b/common/src/backend/resources/assets/flywheel/flywheel/internal/mboit/moment_math.glsl @@ -0,0 +1,500 @@ +/*! \file + This header provides utility functions to reconstruct the transmittance + from a given vector of power moments (4, 6 or 8 power moments) at a + specified depth. As prerequisite, utility functions for computing the real + roots of polynomials up to degree four are defined. +*/ + +//#include "flywheel:internal/mboit/trigonometric_moment_math.glsl" + +void sincos(float theta, out float s, out float c) { + s = sin(theta); + c = cos(theta); +} + +float saturate(float a) { + return clamp(a, 0., 1.); +} + + + +/*! Given coefficients of a quadratic polynomial A*x^2+B*x+C, this function + outputs its two real roots.*/ +vec2 solveQuadratic(vec3 coeffs) +{ + coeffs[1] *= 0.5; + + float x1, x2, tmp; + + tmp = (coeffs[1] * coeffs[1] - coeffs[0] * coeffs[2]); + if (coeffs[1] >= 0) { + tmp = sqrt(tmp); + x1 = (-coeffs[2]) / (coeffs[1] + tmp); + x2 = (-coeffs[1] - tmp) / coeffs[0]; + } else { + tmp = sqrt(tmp); + x1 = (-coeffs[1] + tmp) / coeffs[0]; + x2 = coeffs[2] / (-coeffs[1] + tmp); + } + return vec2(x1, x2); +} + +/*! Code taken from the blog "Moments in Graphics" by Christoph Peters. + http://momentsingraphics.de/?p=105 + This function computes the three real roots of a cubic polynomial + Coefficient[0]+Coefficient[1]*x+Coefficient[2]*x^2+Coefficient[3]*x^3.*/ +vec3 SolveCubic(vec4 Coefficient) { + // Normalize the polynomial + Coefficient.xyz /= Coefficient.w; + // Divide middle coefficients by three + Coefficient.yz /= 3.0f; + // Compute the Hessian and the discrimant + vec3 Delta = vec3( + fma(-Coefficient.z, Coefficient.z, Coefficient.y), + fma(-Coefficient.y, Coefficient.z, Coefficient.x), + dot(vec2(Coefficient.z, -Coefficient.y), Coefficient.xy) + ); + float Discriminant = dot(vec2(4.0f*Delta.x, -Delta.y), Delta.zy); + // Compute coefficients of the depressed cubic + // (third is zero, fourth is one) + vec2 Depressed = vec2( + fma(-2.0f*Coefficient.z, Delta.x, Delta.y), + Delta.x + ); + // Take the cubic root of a normalized complex number + float Theta = atan(sqrt(Discriminant), -Depressed.x) / 3.0f; + vec2 CubicRoot; + sincos(Theta, CubicRoot.y, CubicRoot.x); + // Compute the three roots, scale appropriately and + // revert the depression transform + vec3 Root = vec3( + CubicRoot.x, + dot(vec2(-0.5f, -0.5f*sqrt(3.0f)), CubicRoot), + dot(vec2(-0.5f, 0.5f*sqrt(3.0f)), CubicRoot) + ); + Root = fma(vec3(2.0f*sqrt(-Depressed.y)), Root, vec3(-Coefficient.z)); + return Root; +} + +/*! Given coefficients of a cubic polynomial + coeffs[0]+coeffs[1]*x+coeffs[2]*x^2+coeffs[3]*x^3 with three real roots, + this function returns the root of least magnitude.*/ +float solveCubicBlinnSmallest(vec4 coeffs) +{ + coeffs.xyz /= coeffs.w; + coeffs.yz /= 3.0; + + vec3 delta = vec3(fma(-coeffs.z, coeffs.z, coeffs.y), fma(-coeffs.z, coeffs.y, coeffs.x), coeffs.z * coeffs.x - coeffs.y * coeffs.y); + float discriminant = 4.0 * delta.x * delta.z - delta.y * delta.y; + + vec2 depressed = vec2(delta.z, -coeffs.x * delta.y + 2.0 * coeffs.y * delta.z); + float theta = abs(atan(coeffs.x * sqrt(discriminant), -depressed.y)) / 3.0; + vec2 sin_cos; + sincos(theta, sin_cos.x, sin_cos.y); + float tmp = 2.0 * sqrt(-depressed.x); + vec2 x = vec2(tmp * sin_cos.y, tmp * (-0.5 * sin_cos.y - 0.5 * sqrt(3.0) * sin_cos.x)); + vec2 s = (x.x + x.y < 2.0 * coeffs.y) ? vec2(-coeffs.x, x.x + coeffs.y) : vec2(-coeffs.x, x.y + coeffs.y); + + return s.x / s.y; +} + +/*! Given coefficients of a quartic polynomial + coeffs[0]+coeffs[1]*x+coeffs[2]*x^2+coeffs[3]*x^3+coeffs[4]*x^4 with four + real roots, this function returns all roots.*/ +vec4 solveQuarticNeumark(float coeffs[5]) +{ + // Normalization + float B = coeffs[3] / coeffs[4]; + float C = coeffs[2] / coeffs[4]; + float D = coeffs[1] / coeffs[4]; + float E = coeffs[0] / coeffs[4]; + + // Compute coefficients of the cubic resolvent + float P = -2.0*C; + float Q = C*C + B*D - 4.0*E; + float R = D*D + B*B*E -B*C*D; + + // Obtain the smallest cubic root + float y = solveCubicBlinnSmallest(vec4(R, Q, P, 1.0)); + + float BB = B*B; + float fy = 4.0 * y; + float BB_fy = BB - fy; + + float Z = C - y; + float ZZ = Z*Z; + float fE = 4.0 * E; + float ZZ_fE = ZZ - fE; + + float G, g, H, h; + // Compute the coefficients of the quadratics adaptively using the two + // proposed factorizations by Neumark. Choose the appropriate + // factorizations using the heuristic proposed by Herbison-Evans. + if (y < 0 || (ZZ + fE) * BB_fy > ZZ_fE * (BB + fy)) { + float tmp = sqrt(BB_fy); + G = (B + tmp) * 0.5; + g = (B - tmp) * 0.5; + + tmp = (B*Z - 2.0*D) / (2.0*tmp); + H = fma(Z, 0.5, tmp); + h = fma(Z, 0.5, -tmp); + } else { + float tmp = sqrt(ZZ_fE); + H = (Z + tmp) * 0.5; + h = (Z - tmp) * 0.5; + + tmp = (B*Z - 2.0*D) / (2.0*tmp); + G = fma(B, 0.5, tmp); + g = fma(B, 0.5, -tmp); + } + // Solve the quadratics + return vec4(solveQuadratic(vec3(1.0, G, H)), solveQuadratic(vec3(1.0, g, h))); +} + +/*! Definition of utility functions for quantization and dequantization of + power moments stored in 16 bits per moment. */ +void offsetMoments(inout vec2 b_even, inout vec2 b_odd, float sign) +{ + b_odd += 0.5 * sign; +} + +void quantizeMoments(out vec2 b_even_q, out vec2 b_odd_q, vec2 b_even, vec2 b_odd) +{ + b_odd_q = b_odd * mat2x2(1.5f, sqrt(3.0f)*0.5f, -2.0f, -sqrt(3.0f)*2.0f / 9.0f); + b_even_q = b_even * mat2x2(4.0f, 0.5f, -4.0f, 0.5f); +} + +void offsetAndDequantizeMoments(out vec2 b_even, out vec2 b_odd, vec2 b_even_q, vec2 b_odd_q) +{ + offsetMoments(b_even_q, b_odd_q, -1.0); + b_odd = b_odd_q * mat2x2(-1.0f / 3.0f, -0.75f, sqrt(3.0f), 0.75f*sqrt(3.0f)); + b_even = b_even_q * mat2x2(0.125f, -0.125f, 1.0f, 1.0f); +} + +void offsetMoments(inout vec3 b_even, inout vec3 b_odd, float sign) +{ + b_odd += 0.5 * sign; + b_even.z += 0.018888946f * sign; +} + +void quantizeMoments(out vec3 b_even_q, out vec3 b_odd_q, vec3 b_even, vec3 b_odd) +{ + const mat3x3 QuantizationMatrixOdd = mat3x3( + 2.5f, -1.87499864450f, 1.26583039016f, + -10.0f, 4.20757543111f, -1.47644882902f, + 8.0f, -1.83257678661f, 0.71061660238f); + const mat3x3 QuantizationMatrixEven = mat3x3( + 4.0f, 9.0f, -0.57759806484f, + -4.0f, -24.0f, 4.61936647543f, + 0.0f, 16.0f, -3.07953906655f); + b_odd_q = b_odd * QuantizationMatrixOdd; + b_even_q = b_even * QuantizationMatrixEven; +} + +void offsetAndDequantizeMoments(out vec3 b_even, out vec3 b_odd, vec3 b_even_q, vec3 b_odd_q) +{ + const mat3x3 QuantizationMatrixOdd = mat3x3( + -0.02877789192f, 0.09995235706f, 0.25893353755f, + 0.47635550422f, 0.84532580931f, 0.90779616657f, + 1.55242808973f, 1.05472570761f, 0.83327335647f); + const mat3x3 QuantizationMatrixEven = mat3x3( + 0.00001253044f, -0.24998746956f, -0.37498825271f, + 0.16668494186f, 0.16668494186f, 0.21876713299f, + 0.86602540579f, 0.86602540579f, 0.81189881793f); + offsetMoments(b_even_q, b_odd_q, -1.0); + b_odd = b_odd_q * QuantizationMatrixOdd; + b_even = b_even_q * QuantizationMatrixEven; +} + +void offsetMoments(inout vec4 b_even, inout vec4 b_odd, float sign) +{ + b_odd += 0.5 * sign; + b_even += vec4(0.972481993925964, 1.0, 0.999179192513328, 0.991778293073131) * sign; +} + +void quantizeMoments(out vec4 b_even_q, out vec4 b_odd_q, vec4 b_even, vec4 b_odd) +{ + const mat4x4 mat_odd = mat4x4(3.48044635732474, -27.5760737514826, 55.1267384344761, -31.5311110403183, + 1.26797185782836, -0.928755808743913, -2.07520453231032, 1.23598848322588, + -2.1671560004294, 6.17950199592966, -0.276515571579297, -4.23583042392097, + 0.974332879165755, -0.443426830933027, -0.360491648368785, 0.310149466050223); + const mat4x4 mat_even = mat4x4(0.280504133158527, -0.757633844606942, 0.392179589334688, -0.887531871812237, + -2.01362265883247, 0.221551373038988, -1.06107954265125, 2.83887201588367, + -7.31010494985321, 13.9855979699139, -0.114305766176437, -7.4361899359832, + -15.8954215629556, 79.6186327084103, -127.457278992502, 63.7349456687829); + b_odd_q = mat_odd * b_odd; + b_even_q = mat_even * b_even; +} + +void offsetAndDequantizeMoments(out vec4 b_even, out vec4 b_odd, vec4 b_even_q, vec4 b_odd_q) +{ + const mat4x4 mat_odd = mat4x4(-0.00482399708502382, -0.423201508674231, 0.0348312382605129, 1.67179208266592, + -0.0233402218644408, -0.832829097046478, 0.0193406040499625, 1.21021509068975, + -0.010888537031885, -0.926393772997063, -0.11723394414779, 0.983723301818275, + -0.0308713357806732, -0.937989172670245, -0.218033377677099, 0.845991731322996); + const mat4x4 mat_even = mat4x4(-0.976220278891035, -0.456139260269401, -0.0504335521016742, 0.000838800390651085, + -1.04828341778299, -0.229726640510149, 0.0259608334616091, -0.00133632693205861, + -1.03115268628604, -0.077844420809897, 0.00443408851014257, -0.0103744938457406, + -0.996038443434636, 0.0175438624416783, -0.0361414253243963, -0.00317839994022725); + offsetMoments(b_even_q, b_odd_q, -1.0); + b_odd = mat_odd * b_odd_q; + b_even = mat_even * b_even_q; +} + +/*! This function reconstructs the transmittance at the given depth from four + normalized power moments and the given zeroth moment.*/ +float computeTransmittanceAtDepthFrom4PowerMoments(float b_0, vec2 b_even, vec2 b_odd, float depth, float bias, float overestimation, vec4 bias_vector) +{ + vec4 b = vec4(b_odd.x, b_even.x, b_odd.y, b_even.y); + // Bias input data to avoid artifacts + b = mix(b, bias_vector, bias); + vec3 z; + z[0] = depth; + + // Compute a Cholesky factorization of the Hankel matrix B storing only non- + // trivial entries or related products + float L21D11=fma(-b[0], b[1], b[2]); + float D11=fma(-b[0], b[0], b[1]); + float InvD11=1.0f/D11; + float L21=L21D11*InvD11; + float SquaredDepthVariance=fma(-b[1], b[1], b[3]); + float D22=fma(-L21D11, L21, SquaredDepthVariance); + + // Obtain a scaled inverse image of bz=(1,z[0],z[0]*z[0])^T + vec3 c=vec3(1.0f, z[0], z[0]*z[0]); + // Forward substitution to solve L*c1=bz + c[1]-=b.x; + c[2]-=b.y+L21*c[1]; + // Scaling to solve D*c2=c1 + c[1]*=InvD11; + c[2]/=D22; + // Backward substitution to solve L^T*c3=c2 + c[1]-=L21*c[2]; + c[0]-=dot(c.yz, b.xy); + // Solve the quadratic equation c[0]+c[1]*z+c[2]*z^2 to obtain solutions + // z[1] and z[2] + float InvC2=1.0f/c[2]; + float p=c[1]*InvC2; + float q=c[0]*InvC2; + float D=(p*p*0.25f)-q; + float r=sqrt(D); + z[1]=-p*0.5f-r; + z[2]=-p*0.5f+r; + // Compute the absorbance by summing the appropriate weights + vec3 polynomial; + vec3 weight_factor = vec3(overestimation, (z[1] < z[0])?1.0f:0.0f, (z[2] < z[0])?1.0f:0.0f); + float f0=weight_factor[0]; + float f1=weight_factor[1]; + float f2=weight_factor[2]; + float f01=(f1-f0)/(z[1]-z[0]); + float f12=(f2-f1)/(z[2]-z[1]); + float f012=(f12-f01)/(z[2]-z[0]); + polynomial[0]=f012; + polynomial[1]=polynomial[0]; + polynomial[0]=f01-polynomial[0]*z[1]; + polynomial[2]=polynomial[1]; + polynomial[1]=polynomial[0]-polynomial[1]*z[0]; + polynomial[0]=f0-polynomial[0]*z[0]; + float absorbance = polynomial[0] + dot(b.xy, polynomial.yz);; + // Turn the normalized absorbance into transmittance + return saturate(exp(-b_0 * absorbance)); +} + +/*! This function reconstructs the transmittance at the given depth from six + normalized power moments and the given zeroth moment.*/ +float computeTransmittanceAtDepthFrom6PowerMoments(float b_0, vec3 b_even, vec3 b_odd, float depth, float bias, float overestimation, float bias_vector[6]) +{ + float b[6] = { b_odd.x, b_even.x, b_odd.y, b_even.y, b_odd.z, b_even.z }; + // Bias input data to avoid artifacts + for (int i = 0; i != 6; ++i) { + b[i] = mix(b[i], bias_vector[i], bias); + } + + vec4 z; + z[0] = depth; + + // Compute a Cholesky factorization of the Hankel matrix B storing only non- + // trivial entries or related products + float InvD11 = 1.0f / fma(-b[0], b[0], b[1]); + float L21D11 = fma(-b[0], b[1], b[2]); + float L21 = L21D11*InvD11; + float D22 = fma(-L21D11, L21, fma(-b[1], b[1], b[3])); + float L31D11 = fma(-b[0], b[2], b[3]); + float L31 = L31D11*InvD11; + float InvD22 = 1.0f / D22; + float L32D22 = fma(-L21D11, L31, fma(-b[1], b[2], b[4])); + float L32 = L32D22*InvD22; + float D33 = fma(-b[2], b[2], b[5]) - dot(vec2(L31D11, L32D22), vec2(L31, L32)); + float InvD33 = 1.0f / D33; + + // Construct the polynomial whose roots have to be points of support of the + // canonical distribution: bz=(1,z[0],z[0]*z[0],z[0]*z[0]*z[0])^T + vec4 c; + c[0] = 1.0f; + c[1] = z[0]; + c[2] = c[1] * z[0]; + c[3] = c[2] * z[0]; + // Forward substitution to solve L*c1=bz + c[1] -= b[0]; + c[2] -= fma(L21, c[1], b[1]); + c[3] -= b[2] + dot(vec2(L31, L32), c.yz); + // Scaling to solve D*c2=c1 + c.yzw *= vec3(InvD11, InvD22, InvD33); + // Backward substitution to solve L^T*c3=c2 + c[2] -= L32*c[3]; + c[1] -= dot(vec2(L21, L31), c.zw); + c[0] -= dot(vec3(b[0], b[1], b[2]), c.yzw); + + // Solve the cubic equation + z.yzw = SolveCubic(c); + + // Compute the absorbance by summing the appropriate weights + vec4 weigth_factor; + weigth_factor[0] = overestimation; + weigth_factor.yzw = vec3(greaterThan(z.yzw, z.xxx)); + // Construct an interpolation polynomial + float f0 = weigth_factor[0]; + float f1 = weigth_factor[1]; + float f2 = weigth_factor[2]; + float f3 = weigth_factor[3]; + float f01 = (f1 - f0) / (z[1] - z[0]); + float f12 = (f2 - f1) / (z[2] - z[1]); + float f23 = (f3 - f2) / (z[3] - z[2]); + float f012 = (f12 - f01) / (z[2] - z[0]); + float f123 = (f23 - f12) / (z[3] - z[1]); + float f0123 = (f123 - f012) / (z[3] - z[0]); + vec4 polynomial; + // f012+f0123 *(z-z2) + polynomial[0] = fma(-f0123, z[2], f012); + polynomial[1] = f0123; + // *(z-z1) +f01 + polynomial[2] = polynomial[1]; + polynomial[1] = fma(polynomial[1], -z[1], polynomial[0]); + polynomial[0] = fma(polynomial[0], -z[1], f01); + // *(z-z0) +f0 + polynomial[3] = polynomial[2]; + polynomial[2] = fma(polynomial[2], -z[0], polynomial[1]); + polynomial[1] = fma(polynomial[1], -z[0], polynomial[0]); + polynomial[0] = fma(polynomial[0], -z[0], f0); + float absorbance = dot(polynomial, vec4 (1.0, b[0], b[1], b[2])); + // Turn the normalized absorbance into transmittance + return saturate(exp(-b_0 * absorbance)); +} + +/*! This function reconstructs the transmittance at the given depth from eight + normalized power moments and the given zeroth moment.*/ +float computeTransmittanceAtDepthFrom8PowerMoments(float b_0, vec4 b_even, vec4 b_odd, float depth, float bias, float overestimation, float bias_vector[8]) +{ + float b[8] = { b_odd.x, b_even.x, b_odd.y, b_even.y, b_odd.z, b_even.z, b_odd.w, b_even.w }; + // Bias input data to avoid artifacts + for (int i = 0; i != 8; ++i) { + b[i] = mix(b[i], bias_vector[i], bias); + } + + float z[5]; + z[0] = depth; + + // Compute a Cholesky factorization of the Hankel matrix B storing only non-trivial entries or related products + float D22 = fma(-b[0], b[0], b[1]); + float InvD22 = 1.0 / D22; + float L32D22 = fma(-b[1], b[0], b[2]); + float L32 = L32D22 * InvD22; + float L42D22 = fma(-b[2], b[0], b[3]); + float L42 = L42D22 * InvD22; + float L52D22 = fma(-b[3], b[0], b[4]); + float L52 = L52D22 * InvD22; + + float D33 = fma(-L32, L32D22, fma(-b[1], b[1], b[3])); + float InvD33 = 1.0 / D33; + float L43D33 = fma(-L42, L32D22, fma(-b[2], b[1], b[4])); + float L43 = L43D33 * InvD33; + float L53D33 = fma(-L52, L32D22, fma(-b[3], b[1], b[5])); + float L53 = L53D33 * InvD33; + + float D44 = fma(-b[2], b[2], b[5]) - dot(vec2(L42, L43), vec2(L42D22, L43D33)); + float InvD44 = 1.0 / D44; + float L54D44 = fma(-b[3], b[2], b[6]) - dot(vec2(L52, L53), vec2(L42D22, L43D33)); + float L54 = L54D44 * InvD44; + + float D55 = fma(-b[3], b[3], b[7]) - dot(vec3(L52, L53, L54), vec3(L52D22, L53D33, L54D44)); + float InvD55 = 1.0 / D55; + + // Construct the polynomial whose roots have to be points of support of the + // Canonical distribution: + // bz = (1,z[0],z[0]^2,z[0]^3,z[0]^4)^T + float c[5]; + c[0] = 1.0; + c[1] = z[0]; + c[2] = c[1] * z[0]; + c[3] = c[2] * z[0]; + c[4] = c[3] * z[0]; + + // Forward substitution to solve L*c1 = bz + c[1] -= b[0]; + c[2] -= fma(L32, c[1], b[1]); + c[3] -= b[2] + dot(vec2(L42, L43), vec2(c[1], c[2])); + c[4] -= b[3] + dot(vec3(L52, L53, L54), vec3(c[1], c[2], c[3])); + + // Scaling to solve D*c2 = c1 + //c = c .*[1, InvD22, InvD33, InvD44, InvD55]; + c[1] *= InvD22; + c[2] *= InvD33; + c[3] *= InvD44; + c[4] *= InvD55; + + // Backward substitution to solve L^T*c3 = c2 + c[3] -= L54 * c[4]; + c[2] -= dot(vec2(L53, L43), vec2(c[4], c[3])); + c[1] -= dot(vec3(L52, L42, L32), vec3(c[4], c[3], c[2])); + c[0] -= dot(vec4(b[3], b[2], b[1], b[0]), vec4(c[4], c[3], c[2], c[1])); + + // Solve the quartic equation + vec4 zz = solveQuarticNeumark(c); + z[1] = zz[0]; + z[2] = zz[1]; + z[3] = zz[2]; + z[4] = zz[3]; + + // Compute the absorbance by summing the appropriate weights + vec4 weigth_factor = vec4(lessThanEqual(vec4(z[1], z[2], z[3], z[4]), z[0].xxxx)); + // Construct an interpolation polynomial + float f0 = overestimation; + float f1 = weigth_factor[0]; + float f2 = weigth_factor[1]; + float f3 = weigth_factor[2]; + float f4 = weigth_factor[3]; + float f01 = (f1 - f0) / (z[1] - z[0]); + float f12 = (f2 - f1) / (z[2] - z[1]); + float f23 = (f3 - f2) / (z[3] - z[2]); + float f34 = (f4 - f3) / (z[4] - z[3]); + float f012 = (f12 - f01) / (z[2] - z[0]); + float f123 = (f23 - f12) / (z[3] - z[1]); + float f234 = (f34 - f23) / (z[4] - z[2]); + float f0123 = (f123 - f012) / (z[3] - z[0]); + float f1234 = (f234 - f123) / (z[4] - z[1]); + float f01234 = (f1234 - f0123) / (z[4] - z[0]); + + float Polynomial_0; + vec4 Polynomial; + // f0123 + f01234 * (z - z3) + Polynomial_0 = fma(-f01234, z[3], f0123); + Polynomial[0] = f01234; + // * (z - z2) + f012 + Polynomial[1] = Polynomial[0]; + Polynomial[0] = fma(-Polynomial[0], z[2], Polynomial_0); + Polynomial_0 = fma(-Polynomial_0, z[2], f012); + // * (z - z1) + f01 + Polynomial[2] = Polynomial[1]; + Polynomial[1] = fma(-Polynomial[1], z[1], Polynomial[0]); + Polynomial[0] = fma(-Polynomial[0], z[1], Polynomial_0); + Polynomial_0 = fma(-Polynomial_0, z[1], f01); + // * (z - z0) + f1 + Polynomial[3] = Polynomial[2]; + Polynomial[2] = fma(-Polynomial[2], z[0], Polynomial[1]); + Polynomial[1] = fma(-Polynomial[1], z[0], Polynomial[0]); + Polynomial[0] = fma(-Polynomial[0], z[0], Polynomial_0); + Polynomial_0 = fma(-Polynomial_0, z[0], f0); + float absorbance = Polynomial_0 + dot(Polynomial, vec4(b[0], b[1], b[2], b[3])); + // Turn the normalized absorbance into transmittance + return saturate(exp(-b_0 * absorbance)); +} diff --git a/common/src/backend/resources/assets/flywheel/flywheel/internal/mboit/moment_oit.glsl b/common/src/backend/resources/assets/flywheel/flywheel/internal/mboit/moment_oit.glsl new file mode 100644 index 000000000..f26d5582e --- /dev/null +++ b/common/src/backend/resources/assets/flywheel/flywheel/internal/mboit/moment_oit.glsl @@ -0,0 +1,253 @@ +/*! \file + This header provides the functionality to create the vectors of moments and + to blend surfaces together with an appropriately reconstructed + transmittance. It is needed for both additive passes of moment-based OIT. +*/ + +//cbuffer MomentOIT +//{ +// struct { +// vec4 wrapping_zone_parameters; +// float overestimation; +// float moment_bias; +// }MomentOIT; +//}; + +#include "flywheel:internal/mboit/moment_math.glsl" + +const float moment_bias = 0.25; +const float overestimation = 0.25; +const vec4 wrapping_zone_parameters = vec4(0.); + + +void clip(float a) { + if (a < 0.) { + discard; + } +} + +// jozu: The trigonometric moments and higher order power moments rely on a second render target +// which the java side is not set up to support. Trying to enable them as is will cause compile errors also. +#define NUM_MOMENTS 4 + +#define SINGLE_PRECISION 1 + +#ifdef _FLW_GENERATE_MOMENTS +/*! Generation of moments in case that rasterizer ordered views are used. + This includes the case if moments are stored in 16 bits. */ + +/*! This functions relies on fixed function additive blending to compute the + vector of moments.moment vector. The shader that calls this function must + provide the required render targets.*/ +#if NUM_MOMENTS == 4 +void generateMoments(float depth, float transmittance, vec4 wrapping_zone_parameters, out float b_0, out vec4 b) +#elif NUM_MOMENTS == 6 +#if USE_R_RG_RBBA_FOR_MBOIT6 +void generateMoments(float depth, float transmittance, vec4 wrapping_zone_parameters, out float b_0, out vec2 b_12, out vec4 b_3456) +#else +void generateMoments(float depth, float transmittance, vec4 wrapping_zone_parameters, out float b_0, out vec2 b_12, out vec2 b_34, out vec2 b_56) +#endif +#elif NUM_MOMENTS == 8 +void generateMoments(float depth, float transmittance, vec4 wrapping_zone_parameters, out float b_0, out vec4 b_even, out vec4 b_odd) +#endif +{ + transmittance = max(transmittance, 0.000001); + float absorbance = -log(transmittance); + + b_0 = absorbance; + #if TRIGONOMETRIC + float phase = fma(depth, wrapping_zone_parameters.y, wrapping_zone_parameters.y); + vec2 circle_point = vec2(sin(phas), cos(phase)); + + vec2 circle_point_pow2 = Multiply(circle_point, circle_point); + #if NUM_MOMENTS == 4 + b = vec4(circle_point, circle_point_pow2) * absorbance; + #elif NUM_MOMENTS == 6 + b_12 = circle_point * absorbance; + #if USE_R_RG_RBBA_FOR_MBOIT6 + b_3456 = vec4(circle_point_pow2, Multiply(circle_point, circle_point_pow2)) * absorbance; + #else + b_34 = circle_point_pow2 * absorbance; + b_56 = Multiply(circle_point, circle_point_pow2) * absorbance; + #endif + #elif NUM_MOMENTS == 8 + b_even = vec4(circle_point_pow2, Multiply(circle_point_pow2, circle_point_pow2)) * absorbance; + b_odd = vec4(circle_point, Multiply(circle_point, circle_point_pow2)) * absorbance; + #endif + #else + float depth_pow2 = depth * depth; + float depth_pow4 = depth_pow2 * depth_pow2; + #if NUM_MOMENTS == 4 + b = vec4(depth, depth_pow2, depth_pow2 * depth, depth_pow4) * absorbance; + #elif NUM_MOMENTS == 6 + b_12 = vec2(depth, depth_pow2) * absorbance; + #if USE_R_RG_RBBA_FOR_MBOIT6 + b_3456 = vec4(depth_pow2 * depth, depth_pow4, depth_pow4 * depth, depth_pow4 * depth_pow2) * absorbance; + #else + b_34 = vec2(depth_pow2 * depth, depth_pow4) * absorbance; + b_56 = vec2(depth_pow4 * depth, depth_pow4 * depth_pow2) * absorbance; + #endif + #elif NUM_MOMENTS == 8 + float depth_pow6 = depth_pow4 * depth_pow2; + b_even = vec4(depth_pow2, depth_pow4, depth_pow6, depth_pow6 * depth_pow2) * absorbance; + b_odd = vec4(depth, depth_pow2 * depth, depth_pow4 * depth, depth_pow6 * depth) * absorbance; + #endif + #endif +} + +#else//MOMENT_GENERATION is disabled + +layout (binding = 7) uniform sampler2D _flw_zeroth_moment_sampler; +layout (binding = 8) uniform sampler2D _flw_moments_sampler; +#if USE_R_RG_RBBA_FOR_MBOIT6 +uniform sampler2D extra_moments; +#endif + +/*! This function is to be called from the shader that composites the + transparent fragments. It reads the moments and calls the appropriate + function to reconstruct the transmittance at the specified depth.*/ +void resolveMoments(out float transmittance_at_depth, out float total_transmittance, float depth, vec2 sv_pos) +{ + ivec2 idx0 = ivec2(sv_pos); + ivec2 idx1 = idx0; + + transmittance_at_depth = 1; + total_transmittance = 1; + + float b_0 = texelFetch(_flw_zeroth_moment_sampler, idx0, 0).x; + clip(b_0 - 0.00100050033f); + total_transmittance = exp(-b_0); + + #if NUM_MOMENTS == 4 + #if TRIGONOMETRIC + vec4 b_tmp = texelFetch(_flw_moments_sampler, idx0, 0); + vec2 trig_b[2]; + trig_b[0] = b_tmp.xy; + trig_b[1] = b_tmp.zw; + #if SINGLE_PRECISION + trig_b[0] /= b_0; + trig_b[1] /= b_0; + #else + trig_b[0] = fma(trig_b[0], 2.0, -1.0); + trig_b[1] = fma(trig_b[1], 2.0, -1.0); + #endif + transmittance_at_depth = computeTransmittanceAtDepthFrom2TrigonometricMoments(b_0, trig_b, depth, moment_bias, overestimation, wrapping_zone_parameters); + #else + vec4 b_1234 = texelFetch(_flw_moments_sampler, idx0, 0).xyzw; + #if SINGLE_PRECISION + vec2 b_even = b_1234.yw; + vec2 b_odd = b_1234.xz; + + b_even /= b_0; + b_odd /= b_0; + + const vec4 bias_vector = vec4(0, 0.375, 0, 0.375); + #else + vec2 b_even_q = b_1234.yw; + vec2 b_odd_q = b_1234.xz; + + // Dequantize the moments + vec2 b_even; + vec2 b_odd; + offsetAndDequantizeMoments(b_even, b_odd, b_even_q, b_odd_q); + const vec4 bias_vector = vec4(0, 0.628, 0, 0.628); + #endif + transmittance_at_depth = computeTransmittanceAtDepthFrom4PowerMoments(b_0, b_even, b_odd, depth, moment_bias, overestimation, bias_vector); + #endif + #elif NUM_MOMENTS == 6 + ivec2 idx2 = idx0; + #if TRIGONOMETRIC + vec2 trig_b[3]; + trig_b[0] = texelFetch(_flw_moments_sampler, idx0, 0).xy; + #if USE_R_RG_RBBA_FOR_MBOIT6 + vec4 tmp = texelFetch(extra_moments, idx0, 0); + trig_b[1] = tmp.xy; + trig_b[2] = tmp.zw; + #else + trig_b[1] = texelFetch(_flw_moments_sampler, idx1, 0).xy; + trig_b[2] = texelFetch(_flw_moments_sampler, idx2, 0).xy; + #endif + #if SINGLE_PRECISION + trig_b[0] /= b_0; + trig_b[1] /= b_0; + trig_b[2] /= b_0; + #else + trig_b[0] = fma(trig_b[0], 2.0, -1.0); + trig_b[1] = fma(trig_b[1], 2.0, -1.0); + trig_b[2] = fma(trig_b[2], 2.0, -1.0); + #endif + transmittance_at_depth = computeTransmittanceAtDepthFrom3TrigonometricMoments(b_0, trig_b, depth, moment_bias, overestimation, wrapping_zone_parameters); + #else + vec2 b_12 = texelFetch(_flw_moments_sampler, idx0, 0).xy; + #if USE_R_RG_RBBA_FOR_MBOIT6 + vec4 tmp = texelFetch(extra_moments, idx0, 0); + vec2 b_34 = tmp.xy; + vec2 b_56 = tmp.zw; + #else + vec2 b_34 = texelFetch(_flw_moments_sampler, idx1, 0).xy; + vec2 b_56 = texelFetch(_flw_moments_sampler, idx2, 0).xy; + #endif + #if SINGLE_PRECISION + vec3 b_even = vec3(b_12.y, b_34.y, b_56.y); + vec3 b_odd = vec3(b_12.x, b_34.x, b_56.x); + + b_even /= b_0; + b_odd /= b_0; + + const float bias_vector[6] = { 0, 0.48, 0, 0.451, 0, 0.45 }; + #else + vec3 b_even_q = vec3(b_12.y, b_34.y, b_56.y); + vec3 b_odd_q = vec3(b_12.x, b_34.x, b_56.x); + // Dequantize b_0 and the other moments + vec3 b_even; + vec3 b_odd; + offsetAndDequantizeMoments(b_even, b_odd, b_even_q, b_odd_q); + + const float bias_vector[6] = { 0, 0.5566, 0, 0.489, 0, 0.47869382 }; + #endif + transmittance_at_depth = computeTransmittanceAtDepthFrom6PowerMoments(b_0, b_even, b_odd, depth, moment_bias, overestimation, bias_vector); + #endif + #elif NUM_MOMENTS == 8 + #if TRIGONOMETRIC + vec4 b_tmp = texelFetch(_flw_moments_sampler, idx0, 0); + vec4 b_tmp2 = texelFetch(_flw_moments_sampler, idx1, 0); + #if SINGLE_PRECISION + vec2 trig_b[4] = { + b_tmp2.xy / b_0, + b_tmp.xy / b_0, + b_tmp2.zw / b_0, + b_tmp.zw / b_0 + }; + #else + vec2 trig_b[4] = { + fma(b_tmp2.xy, 2.0, -1.0), + fma(b_tmp.xy, 2.0, -1.0), + fma(b_tmp2.zw, 2.0, -1.0), + fma(b_tmp.zw, 2.0, -1.0) + }; + #endif + transmittance_at_depth = computeTransmittanceAtDepthFrom4TrigonometricMoments(b_0, trig_b, depth, moment_bias, overestimation, wrapping_zone_parameters); + #else + #if SINGLE_PRECISION + vec4 b_even = texelFetch(_flw_moments_sampler, idx0, 0); + vec4 b_odd = texelFetch(_flw_moments_sampler, idx1, 0); + + b_even /= b_0; + b_odd /= b_0; + const float bias_vector[8] = { 0, 0.75, 0, 0.67666666666666664, 0, 0.63, 0, 0.60030303030303034 }; + #else + vec4 b_even_q = texelFetch(_flw_moments_sampler, idx0, 0); + vec4 b_odd_q = texelFetch(_flw_moments_sampler, idx1, 0); + + // Dequantize the moments + vec4 b_even; + vec4 b_odd; + offsetAndDequantizeMoments(b_even, b_odd, b_even_q, b_odd_q); + const float bias_vector[8] = { 0, 0.42474916387959866, 0, 0.22407802675585284, 0, 0.15369230769230768, 0, 0.12900440529089119 }; + #endif + transmittance_at_depth = computeTransmittanceAtDepthFrom8PowerMoments(b_0, b_even, b_odd, depth, moment_bias, overestimation, bias_vector); + #endif + #endif + +} +#endif diff --git a/common/src/backend/resources/assets/flywheel/flywheel/internal/mboit/trigonometric_moment_math.glsl b/common/src/backend/resources/assets/flywheel/flywheel/internal/mboit/trigonometric_moment_math.glsl new file mode 100644 index 000000000..4fc142d8f --- /dev/null +++ b/common/src/backend/resources/assets/flywheel/flywheel/internal/mboit/trigonometric_moment_math.glsl @@ -0,0 +1,311 @@ +/*! \file + This header provides the utility functions to reconstruct the transmittance + from a given vector of trigonometric moments (2, 3 or 4 trigonometric + moments) at a specified depth.*/ +#include "flywheel:internal/mboit/complex_algebra.glsl" + +/*! This utility function turns a point on the unit circle into a scalar + parameter. It is guaranteed to grow monotonically for (cos(phi),sin(phi)) + with phi in 0 to 2*pi. There are no other guarantees. In particular it is + not an arclength parametrization. If you change this function, you must + also change circleToParameter() in MomentOIT.cpp.*/ +float circleToParameter(vec2 circle_point){ + float result=abs(circle_point.y)-abs(circle_point.x); + result=(circle_point.x<0.0f)?(2.0f-result):result; + return (circle_point.y<0.0f)?(6.0f-result):result; +} + +/*! This utility function returns the appropriate weight factor for a root at + the given location. Both inputs are supposed to be unit vectors. If a + circular arc going counter clockwise from (1.0,0.0) meets root first, it + returns 1.0, otherwise 0.0 or a linear ramp in the wrapping zone.*/ +float getRootWeightFactor(float reference_parameter, float root_parameter, vec4 wrapping_zone_parameters){ + float binary_weight_factor=(root_parameter