From 5d16ebda6033c018ced0209358ef653cba11d932 Mon Sep 17 00:00:00 2001 From: Jozufozu Date: Sat, 15 Feb 2025 18:13:16 -0800 Subject: [PATCH 01/12] Wbout it - Implement WBOIT --- .../backend/compile/IndirectPrograms.java | 31 ++++- .../backend/compile/InstancingPrograms.java | 2 +- .../backend/compile/PipelineCompiler.java | 15 ++- .../backend/engine/MaterialRenderState.java | 11 ++ .../engine/indirect/IndirectCullingGroup.java | 39 +++++- .../engine/indirect/IndirectDrawManager.java | 14 ++- .../engine/indirect/WboitFrameBuffer.java | 117 ++++++++++++++++++ .../flywheel/flywheel/internal/common.frag | 45 ++++++- .../internal/indirect/fullscreen.vert | 4 + .../internal/indirect/oit_composite.frag | 48 +++++++ 10 files changed, 310 insertions(+), 16 deletions(-) create mode 100644 common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/WboitFrameBuffer.java create mode 100644 common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/fullscreen.vert create mode 100644 common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/oit_composite.frag diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/compile/IndirectPrograms.java b/common/src/backend/java/dev/engine_room/flywheel/backend/compile/IndirectPrograms.java index 39d211ef9..9d3ce83e0 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/compile/IndirectPrograms.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/compile/IndirectPrograms.java @@ -31,7 +31,9 @@ public class IndirectPrograms extends AtomicReferenceCounted { private static final ResourceLocation SCATTER_SHADER_MAIN = Flywheel.rl("internal/indirect/scatter.glsl"); private static final ResourceLocation DOWNSAMPLE_FIRST = Flywheel.rl("internal/indirect/downsample_first.glsl"); private static final ResourceLocation DOWNSAMPLE_SECOND = Flywheel.rl("internal/indirect/downsample_second.glsl"); - public static final List UTIL_SHADERS = List.of(APPLY_SHADER_MAIN, SCATTER_SHADER_MAIN, DOWNSAMPLE_FIRST, DOWNSAMPLE_SECOND); + + private static final ResourceLocation FULLSCREEN = Flywheel.rl("internal/indirect/fullscreen.vert"); + private static final ResourceLocation OIT_COMPOSITE = Flywheel.rl("internal/indirect/oit_composite.frag"); private static final Compile> CULL = new Compile<>(); private static final Compile UTIL = new Compile<>(); @@ -45,11 +47,13 @@ public class IndirectPrograms extends AtomicReferenceCounted { private final PipelineCompiler pipeline; private final CompilationHarness> culling; private final CompilationHarness utils; + private final CompilationHarness fullscreen; - private IndirectPrograms(PipelineCompiler pipeline, CompilationHarness> culling, CompilationHarness utils) { + private IndirectPrograms(PipelineCompiler pipeline, CompilationHarness> culling, CompilationHarness utils, CompilationHarness fullscreen) { this.pipeline = pipeline; this.culling = culling; this.utils = utils; + this.fullscreen = fullscreen; } private static List getExtensions(GlslVersion glslVersion) { @@ -88,8 +92,9 @@ public class IndirectPrograms extends AtomicReferenceCounted { var pipelineCompiler = PipelineCompiler.create(sources, Pipelines.INDIRECT, vertexComponents, fragmentComponents, EXTENSIONS); var cullingCompiler = createCullingCompiler(sources); var utilCompiler = createUtilCompiler(sources); + var fullscreenCompiler = createFullscreenCompiler(sources); - IndirectPrograms newInstance = new IndirectPrograms(pipelineCompiler, cullingCompiler, utilCompiler); + IndirectPrograms newInstance = new IndirectPrograms(pipelineCompiler, cullingCompiler, utilCompiler, fullscreenCompiler); setInstance(newInstance); } @@ -125,6 +130,17 @@ public class IndirectPrograms extends AtomicReferenceCounted { .harness("utilities", sources); } + private static CompilationHarness createFullscreenCompiler(ShaderSources sources) { + return UTIL.program() + .link(UTIL.shader(GlCompat.MAX_GLSL_VERSION, ShaderType.VERTEX) + .nameMapper($ -> "fullscreen/fullscreen") + .withResource(FULLSCREEN)) + .link(UTIL.shader(GlCompat.MAX_GLSL_VERSION, ShaderType.FRAGMENT) + .nameMapper(rl -> "fullscreen/" + ResourceUtil.toDebugFileNameNoExtension(rl)) + .withResource(s -> s)) + .harness("fullscreen", sources); + } + static void setInstance(@Nullable IndirectPrograms newInstance) { if (instance != null) { instance.release(); @@ -148,8 +164,8 @@ public class IndirectPrograms extends AtomicReferenceCounted { setInstance(null); } - public GlProgram getIndirectProgram(InstanceType instanceType, ContextShader contextShader, Material material) { - return pipeline.get(instanceType, contextShader, material); + public GlProgram getIndirectProgram(InstanceType instanceType, ContextShader contextShader, Material material, boolean oit) { + return pipeline.get(instanceType, contextShader, material, oit); } public GlProgram getCullingProgram(InstanceType instanceType) { @@ -172,10 +188,15 @@ public class IndirectPrograms extends AtomicReferenceCounted { return utils.get(DOWNSAMPLE_SECOND); } + public GlProgram getOitCompositeProgram() { + return fullscreen.get(OIT_COMPOSITE); + } + @Override protected void _delete() { pipeline.delete(); culling.delete(); utils.delete(); + fullscreen.delete(); } } diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/compile/InstancingPrograms.java b/common/src/backend/java/dev/engine_room/flywheel/backend/compile/InstancingPrograms.java index c1e736b1e..db3014402 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/compile/InstancingPrograms.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/compile/InstancingPrograms.java @@ -70,7 +70,7 @@ public class InstancingPrograms extends AtomicReferenceCounted { } public GlProgram get(InstanceType instanceType, ContextShader contextShader, Material material) { - return pipeline.get(instanceType, contextShader, material); + return pipeline.get(instanceType, contextShader, material, false); } @Override diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/compile/PipelineCompiler.java b/common/src/backend/java/dev/engine_room/flywheel/backend/compile/PipelineCompiler.java index e752edfd2..e315dce35 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/compile/PipelineCompiler.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/compile/PipelineCompiler.java @@ -50,7 +50,7 @@ public final class PipelineCompiler { ALL.add(this); } - public GlProgram get(InstanceType instanceType, ContextShader contextShader, Material material) { + public GlProgram get(InstanceType instanceType, ContextShader contextShader, Material material, boolean oit) { var light = material.light(); var cutout = material.cutout(); var shaders = material.shaders(); @@ -66,7 +66,7 @@ public final class PipelineCompiler { MaterialShaderIndices.cutoutSources() .index(cutout.source()); - return harness.get(new PipelineProgramKey(instanceType, contextShader, light, shaders, cutout != CutoutShaders.OFF, FrameUniforms.debugOn())); + return harness.get(new PipelineProgramKey(instanceType, contextShader, light, shaders, cutout != CutoutShaders.OFF, FrameUniforms.debugOn(), oit)); } public void delete() { @@ -128,7 +128,8 @@ public final class PipelineCompiler { .source()); var debug = key.debugEnabled() ? "_debug" : ""; var cutout = key.useCutout() ? "_cutout" : ""; - return "pipeline/" + pipeline.compilerMarker() + "/frag/" + material + "/" + light + "_" + context + cutout + debug; + var oit = key.oit() ? "_oit" : ""; + return "pipeline/" + pipeline.compilerMarker() + "/frag/" + material + "/" + light + "_" + context + cutout + debug + oit; }) .requireExtensions(extensions) .enableExtension("GL_ARB_conservative_depth") @@ -146,6 +147,11 @@ public final class PipelineCompiler { comp.define("_FLW_USE_DISCARD"); } }) + .onCompile((key, comp) -> { + if (key.oit()) { + comp.define("_FLW_OIT"); + } + }) .withResource(API_IMPL_FRAG) .withResource(key -> key.materialShaders() .fragmentSource()) @@ -217,6 +223,7 @@ public final class PipelineCompiler { * @param light The light shader to use. */ public record PipelineProgramKey(InstanceType instanceType, ContextShader contextShader, LightShader light, - MaterialShaders materialShaders, boolean useCutout, boolean debugEnabled) { + MaterialShaders materialShaders, boolean useCutout, boolean debugEnabled, + boolean oit) { } } diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/MaterialRenderState.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/MaterialRenderState.java index 43206153f..ec98d5c09 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/MaterialRenderState.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/MaterialRenderState.java @@ -45,6 +45,17 @@ public final class MaterialRenderState { setupWriteMask(material.writeMask()); } + public static void setupOit(Material material) { + setupTexture(material); + setupBackfaceCulling(material.backfaceCulling()); + setupPolygonOffset(material.polygonOffset()); + setupDepthTest(material.depthTest()); + + WriteMask mask = material.writeMask(); + boolean writeColor = mask.color(); + RenderSystem.colorMask(writeColor, writeColor, writeColor, writeColor); + } + private static void setupTexture(Material material) { Samplers.DIFFUSE.makeActive(); AbstractTexture texture = Minecraft.getInstance() diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectCullingGroup.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectCullingGroup.java index faec3b63a..aa605f017 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectCullingGroup.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectCullingGroup.java @@ -13,6 +13,7 @@ import java.util.List; import dev.engine_room.flywheel.api.instance.Instance; import dev.engine_room.flywheel.api.instance.InstanceType; import dev.engine_room.flywheel.api.material.Material; +import dev.engine_room.flywheel.api.material.Transparency; import dev.engine_room.flywheel.api.model.Model; import dev.engine_room.flywheel.backend.compile.ContextShader; import dev.engine_room.flywheel.backend.compile.IndirectPrograms; @@ -36,6 +37,7 @@ public class IndirectCullingGroup { private final List> instancers = new ArrayList<>(); private final List indirectDraws = new ArrayList<>(); private final List multiDraws = new ArrayList<>(); + private final List transparentDraws = new ArrayList<>(); private final IndirectPrograms programs; private final GlProgram cullProgram; @@ -130,6 +132,7 @@ public class IndirectCullingGroup { private void sortDraws() { multiDraws.clear(); + transparentDraws.clear(); // sort by visual type, then material indirectDraws.sort(DRAW_COMPARATOR); @@ -138,7 +141,9 @@ public class IndirectCullingGroup { // if the next draw call has a different VisualType or Material, start a new MultiDraw if (i == indirectDraws.size() - 1 || incompatibleDraws(draw1, indirectDraws.get(i + 1))) { - multiDraws.add(new MultiDraw(draw1.material(), draw1.isEmbedded(), start, i + 1)); + var dst = draw1.material() + .transparency() == Transparency.TRANSLUCENT ? transparentDraws : multiDraws; + dst.add(new MultiDraw(draw1.material(), draw1.isEmbedded(), start, i + 1)); start = i + 1; } } @@ -171,7 +176,7 @@ public class IndirectCullingGroup { needsDrawSort = true; } - public void submit() { + public void submitSolid() { if (nothingToDo()) { return; } @@ -183,7 +188,7 @@ public class IndirectCullingGroup { GlProgram lastProgram = null; for (var multiDraw : multiDraws) { - var drawProgram = programs.getIndirectProgram(instanceType, multiDraw.embedded ? ContextShader.EMBEDDED : ContextShader.DEFAULT, multiDraw.material); + var drawProgram = programs.getIndirectProgram(instanceType, multiDraw.embedded ? ContextShader.EMBEDDED : ContextShader.DEFAULT, multiDraw.material, false); if (drawProgram != lastProgram) { lastProgram = drawProgram; @@ -197,8 +202,34 @@ public class IndirectCullingGroup { } } + public void submitTransparent() { + if (nothingToDo()) { + return; + } + + buffers.bindForDraw(); + + drawBarrier(); + + GlProgram lastProgram = null; + + for (var multiDraw : transparentDraws) { + var drawProgram = programs.getIndirectProgram(instanceType, multiDraw.embedded ? ContextShader.EMBEDDED : ContextShader.DEFAULT, multiDraw.material, true); + if (drawProgram != lastProgram) { + lastProgram = drawProgram; + + // Don't need to do this unless the program changes. + drawProgram.bind(); + } + + MaterialRenderState.setupOit(multiDraw.material); + + multiDraw.submit(drawProgram); + } + } + public void bindForCrumbling(Material material) { - var program = programs.getIndirectProgram(instanceType, ContextShader.CRUMBLING, material); + var program = programs.getIndirectProgram(instanceType, ContextShader.CRUMBLING, material, false); program.bind(); diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectDrawManager.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectDrawManager.java index 5cc280750..a61677865 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectDrawManager.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectDrawManager.java @@ -48,6 +48,8 @@ public class IndirectDrawManager extends DrawManager> { private final DepthPyramid depthPyramid; + private final WboitFrameBuffer wboitFrameBuffer; + public IndirectDrawManager(IndirectPrograms programs) { this.programs = programs; programs.acquire(); @@ -62,6 +64,8 @@ public class IndirectDrawManager extends DrawManager> { matrixBuffer = new MatrixBuffer(); depthPyramid = new DepthPyramid(programs); + + wboitFrameBuffer = new WboitFrameBuffer(programs); } @Override @@ -138,9 +142,17 @@ public class IndirectDrawManager extends DrawManager> { Uniforms.bindAll(); for (var group : cullingGroups.values()) { - group.submit(); + group.submitSolid(); } + wboitFrameBuffer.setup(); + + for (var group : cullingGroups.values()) { + group.submitTransparent(); + } + + wboitFrameBuffer.composite(); + MaterialRenderState.reset(); TextureBinder.resetLightAndOverlay(); } diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/WboitFrameBuffer.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/WboitFrameBuffer.java new file mode 100644 index 000000000..c76133709 --- /dev/null +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/WboitFrameBuffer.java @@ -0,0 +1,117 @@ +package dev.engine_room.flywheel.backend.engine.indirect; + +import org.lwjgl.opengl.ARBDrawBuffersBlend; +import org.lwjgl.opengl.GL32; +import org.lwjgl.opengl.GL46; + +import com.mojang.blaze3d.platform.GlStateManager; +import com.mojang.blaze3d.systems.RenderSystem; + +import dev.engine_room.flywheel.backend.compile.IndirectPrograms; +import dev.engine_room.flywheel.backend.gl.GlTextureUnit; +import net.minecraft.client.Minecraft; + +public class WboitFrameBuffer { + + public final int fbo; + private final IndirectPrograms programs; + private final int vao; + + public int accum; + public int reveal; + + private int lastWidth = -1; + private int lastHeight = -1; + + public WboitFrameBuffer(IndirectPrograms programs) { + this.programs = programs; + fbo = GL46.glCreateFramebuffers(); + vao = GL46.glCreateVertexArrays(); + } + + public void setup() { + var mainRenderTarget = Minecraft.getInstance() + .getMainRenderTarget(); + + createTextures(mainRenderTarget.width, mainRenderTarget.height); + + // No depth writes, but we'll still use the depth test + GlStateManager._depthMask(false); + GlStateManager._enableBlend(); + ARBDrawBuffersBlend.glBlendFunciARB(0, GL46.GL_ONE, GL46.GL_ONE); // accumulation blend target + ARBDrawBuffersBlend.glBlendFunciARB(1, GL46.GL_ZERO, GL46.GL_ONE_MINUS_SRC_COLOR); // revealage blend target + GlStateManager._blendEquation(GL46.GL_FUNC_ADD); + + GL46.glNamedFramebufferTexture(fbo, GL46.GL_DEPTH_ATTACHMENT, mainRenderTarget.getDepthTextureId(), 0); + + GlStateManager._glBindFramebuffer(GL46.GL_FRAMEBUFFER, fbo); + + GL46.glClearBufferfv(GL46.GL_COLOR, 0, new float[]{0, 0, 0, 0}); + GL46.glClearBufferfv(GL46.GL_COLOR, 1, new float[]{1, 1, 1, 1}); + } + + public void composite() { + var mainRenderTarget = Minecraft.getInstance() + .getMainRenderTarget(); + + mainRenderTarget.bindWrite(false); + + var oitCompositeProgram = programs.getOitCompositeProgram(); + + GlStateManager._depthMask(false); + GlStateManager._depthFunc(GL46.GL_ALWAYS); + GlStateManager._enableBlend(); + RenderSystem.blendFuncSeparate(GlStateManager.SourceFactor.SRC_ALPHA, GlStateManager.DestFactor.ONE_MINUS_SRC_ALPHA, GlStateManager.SourceFactor.ONE, GlStateManager.DestFactor.ONE_MINUS_SRC_ALPHA); + + oitCompositeProgram.bind(); + + GlTextureUnit.T0.makeActive(); + GlStateManager._bindTexture(accum); + + GlTextureUnit.T1.makeActive(); + GlStateManager._bindTexture(reveal); + + // Empty VAO, the actual full screen triangle is generated in the vertex shader + GlStateManager._glBindVertexArray(vao); + + GL46.glDrawArrays(GL46.GL_TRIANGLES, 0, 3); + } + + public void delete() { + GL46.glDeleteTextures(accum); + GL46.glDeleteTextures(reveal); + GL46.glDeleteFramebuffers(fbo); + GL46.glDeleteVertexArrays(vao); + } + + private void createTextures(int width, int height) { + if (lastWidth == width && lastHeight == height) { + return; + } + + lastWidth = width; + lastHeight = height; + + GL46.glDeleteTextures(accum); + GL46.glDeleteTextures(reveal); + + accum = GL46.glCreateTextures(GL46.GL_TEXTURE_2D); + reveal = GL46.glCreateTextures(GL46.GL_TEXTURE_2D); + + GL46.glNamedFramebufferDrawBuffers(fbo, new int[]{GL46.GL_COLOR_ATTACHMENT0, GL46.GL_COLOR_ATTACHMENT1}); + + GL46.glNamedFramebufferTexture(fbo, GL46.GL_COLOR_ATTACHMENT0, accum, 0); + GL46.glNamedFramebufferTexture(fbo, GL46.GL_COLOR_ATTACHMENT1, reveal, 0); + + GL46.glTextureStorage2D(accum, 1, GL32.GL_RGBA32F, width, height); + GL46.glTextureStorage2D(reveal, 1, GL32.GL_R8, width, height); + + for (int tex : new int[]{accum, reveal}) { + GL46.glTextureParameteri(tex, GL32.GL_TEXTURE_MIN_FILTER, GL32.GL_NEAREST); + GL46.glTextureParameteri(tex, GL32.GL_TEXTURE_MAG_FILTER, GL32.GL_NEAREST); + GL46.glTextureParameteri(tex, GL32.GL_TEXTURE_COMPARE_MODE, GL32.GL_NONE); + GL46.glTextureParameteri(tex, GL32.GL_TEXTURE_WRAP_S, GL32.GL_CLAMP_TO_EDGE); + GL46.glTextureParameteri(tex, GL32.GL_TEXTURE_WRAP_T, GL32.GL_CLAMP_TO_EDGE); + } + } +} diff --git a/common/src/backend/resources/assets/flywheel/flywheel/internal/common.frag b/common/src/backend/resources/assets/flywheel/flywheel/internal/common.frag index d2d4fc5d8..7bc4ec54d 100644 --- a/common/src/backend/resources/assets/flywheel/flywheel/internal/common.frag +++ b/common/src/backend/resources/assets/flywheel/flywheel/internal/common.frag @@ -17,8 +17,20 @@ in vec2 _flw_crumblingTexCoord; flat in uvec2 _flw_ids; #endif +#ifdef _FLW_OIT + +// your first render target which is used to accumulate pre-multiplied color values +layout (location = 0) out vec4 accum; + +// your second render target which is used to store pixel revealage +layout (location = 1) out float reveal; + +#else + out vec4 _flw_outputColor; +#endif + float _flw_diffuseFactor() { if (flw_material.cardinalLightingMode == 2u) { return diffuseFromLightDirections(flw_vertexNormal); @@ -33,6 +45,11 @@ float _flw_diffuseFactor() { } } +float linearize_depth(float d, float zNear, float zFar) { + float z_n = 2.0 * d - 1.0; + return 2.0 * zNear * zFar / (zFar + zNear - z_n * (zFar - zNear)); +} + void _flw_main() { flw_sampleColor = texture(flw_diffuseTex, flw_vertexTexCoord); flw_fragColor = flw_vertexColor * flw_sampleColor; @@ -99,5 +116,31 @@ void _flw_main() { } #endif - _flw_outputColor = flw_fogFilter(color); + color = flw_fogFilter(color); + + color.a = 0.9; + + #ifdef _FLW_OIT + + float depth = linearize_depth(gl_FragCoord.z, _flw_cullData.znear, _flw_cullData.zfar); + + // insert your favorite weighting function here. the color-based factor + // avoids color pollution from the edges of wispy clouds. the z-based + // factor gives precedence to nearer surfaces + //float weight = clamp(pow(min(1.0, color.a * 10.0) + 0.01, 3.0) * 1e8 * pow(1.0 - gl_FragCoord.z * 0.9, 3.0), 1e-2, 3e3); + float weight = max(min(1.0, max(max(color.r, color.g), color.b) * color.a), color.a) * + clamp(0.03 / (1e-5 + pow(depth / 200, 4.0)), 1e-2, 3e3); + + // blend func: GL_ONE, GL_ONE + // switch to pre-multiplied alpha and weight + accum = vec4(color.rgb * color.a, color.a) * weight; + + // blend func: GL_ZERO, GL_ONE_MINUS_SRC_ALPHA + reveal = color.a; + + #else + + _flw_outputColor = color; + + #endif } diff --git a/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/fullscreen.vert b/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/fullscreen.vert new file mode 100644 index 000000000..46ae47150 --- /dev/null +++ b/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/fullscreen.vert @@ -0,0 +1,4 @@ +void main() { + vec2 vertices[3] = vec2[3](vec2(-1, -1), vec2(3, -1), vec2(-1, 3)); + gl_Position = vec4(vertices[gl_VertexID], 0, 1); +} diff --git a/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/oit_composite.frag b/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/oit_composite.frag new file mode 100644 index 000000000..6c5a67419 --- /dev/null +++ b/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/oit_composite.frag @@ -0,0 +1,48 @@ +// shader outputs +layout (location = 0) out vec4 frag; + +// color accumulation buffer +layout (binding = 0) uniform sampler2D accum; + +// revealage threshold buffer +layout (binding = 1) uniform sampler2D reveal; + +// epsilon number +const float EPSILON = 0.00001f; + +// calculate floating point numbers equality accurately +bool isApproximatelyEqual(float a, float b) { + return abs(a - b) <= (abs(a) < abs(b) ? abs(b) : abs(a)) * EPSILON; +} + +// get the max value between three values +float max3(vec3 v) { + return max(max(v.x, v.y), v.z); +} + +void main() { + // fragment coordination + ivec2 coords = ivec2(gl_FragCoord.xy); + + // fragment revealage + float revealage = texelFetch(reveal, coords, 0).r; + + // save the blending and color texture fetch cost if there is not a transparent fragment + if (isApproximatelyEqual(revealage, 1.0f)) { + discard; + } + + // fragment color + vec4 accumulation = texelFetch(accum, coords, 0); + + // suppress overflow + if (isinf(max3(abs(accumulation.rgb)))) { + accumulation.rgb = vec3(accumulation.a); + } + + // prevent floating point precision bug + vec3 average_color = accumulation.rgb / max(accumulation.a, EPSILON); + + // blend pixels + frag = vec4(average_color, 1.0f - revealage); +} From cb87961b192b7517b21d9e9eb475b95cc0ba324c Mon Sep 17 00:00:00 2001 From: Jozufozu Date: Sun, 16 Feb 2025 18:16:10 -0800 Subject: [PATCH 02/12] A moment, please - Implement mboit on indirect - Has issues when many high alpha fragments are stacked on top of each other. The transmittance function explodes to zero, and we end up just writing out black in the second pass. - Other than that, I think the approach is very sound but hopefully a solution can be found - Needs some clean up work with instancing, and in fact I think mboit can be implemented on GL3.2, whereas wboit relied on extensions or GL4.0 --- .../flywheel/backend/Samplers.java | 3 + .../backend/compile/IndirectPrograms.java | 2 +- .../backend/compile/InstancingPrograms.java | 2 +- .../backend/compile/PipelineCompiler.java | 24 +- .../engine/indirect/IndirectCullingGroup.java | 9 +- .../engine/indirect/IndirectDrawManager.java | 15 +- .../engine/indirect/MboitFramebuffer.java | 142 +++++ .../engine/indirect/WboitFrameBuffer.java | 117 ---- .../flywheel/backend/gl/GlCompat.java | 6 +- .../backend/glsl/parse/ShaderField.java | 28 +- .../flywheel/flywheel/internal/common.frag | 57 +- .../internal/indirect/oit_composite.frag | 42 +- .../internal/mboit/complex_algebra.glsl | 203 +++++++ .../flywheel/internal/mboit/moment_math.glsl | 500 ++++++++++++++++++ .../flywheel/internal/mboit/moment_oit.glsl | 253 +++++++++ .../mboit/trigonometric_moment_math.glsl | 311 +++++++++++ .../vanillin/visuals/ShulkerBoxVisual.java | 7 + 17 files changed, 1517 insertions(+), 204 deletions(-) create mode 100644 common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/MboitFramebuffer.java delete mode 100644 common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/WboitFrameBuffer.java create mode 100644 common/src/backend/resources/assets/flywheel/flywheel/internal/mboit/complex_algebra.glsl create mode 100644 common/src/backend/resources/assets/flywheel/flywheel/internal/mboit/moment_math.glsl create mode 100644 common/src/backend/resources/assets/flywheel/flywheel/internal/mboit/moment_oit.glsl create mode 100644 common/src/backend/resources/assets/flywheel/flywheel/internal/mboit/trigonometric_moment_math.glsl diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/Samplers.java b/common/src/backend/java/dev/engine_room/flywheel/backend/Samplers.java index 4441c6246..b6e40e291 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/Samplers.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/Samplers.java @@ -10,4 +10,7 @@ public class Samplers { public static final GlTextureUnit INSTANCE_BUFFER = GlTextureUnit.T4; public static final GlTextureUnit LIGHT_LUT = GlTextureUnit.T5; public static final GlTextureUnit LIGHT_SECTIONS = GlTextureUnit.T6; + + public static final GlTextureUnit ZEROTH_MOMENT = GlTextureUnit.T7; + public static final GlTextureUnit MOMENTS = GlTextureUnit.T8; } diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/compile/IndirectPrograms.java b/common/src/backend/java/dev/engine_room/flywheel/backend/compile/IndirectPrograms.java index 9d3ce83e0..cc985b278 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/compile/IndirectPrograms.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/compile/IndirectPrograms.java @@ -164,7 +164,7 @@ public class IndirectPrograms extends AtomicReferenceCounted { setInstance(null); } - public GlProgram getIndirectProgram(InstanceType instanceType, ContextShader contextShader, Material material, boolean oit) { + public GlProgram getIndirectProgram(InstanceType instanceType, ContextShader contextShader, Material material, PipelineCompiler.OitMode oit) { return pipeline.get(instanceType, contextShader, material, oit); } diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/compile/InstancingPrograms.java b/common/src/backend/java/dev/engine_room/flywheel/backend/compile/InstancingPrograms.java index db3014402..392435849 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/compile/InstancingPrograms.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/compile/InstancingPrograms.java @@ -70,7 +70,7 @@ public class InstancingPrograms extends AtomicReferenceCounted { } public GlProgram get(InstanceType instanceType, ContextShader contextShader, Material material) { - return pipeline.get(instanceType, contextShader, material, false); + return pipeline.get(instanceType, contextShader, material, PipelineCompiler.OitMode.OFF); } @Override diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/compile/PipelineCompiler.java b/common/src/backend/java/dev/engine_room/flywheel/backend/compile/PipelineCompiler.java index e315dce35..f33486315 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/compile/PipelineCompiler.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/compile/PipelineCompiler.java @@ -50,7 +50,7 @@ public final class PipelineCompiler { ALL.add(this); } - public GlProgram get(InstanceType instanceType, ContextShader contextShader, Material material, boolean oit) { + public GlProgram get(InstanceType instanceType, ContextShader contextShader, Material material, OitMode oit) { var light = material.light(); var cutout = material.cutout(); var shaders = material.shaders(); @@ -128,7 +128,7 @@ public final class PipelineCompiler { .source()); var debug = key.debugEnabled() ? "_debug" : ""; var cutout = key.useCutout() ? "_cutout" : ""; - var oit = key.oit() ? "_oit" : ""; + var oit = key.oit().name; return "pipeline/" + pipeline.compilerMarker() + "/frag/" + material + "/" + light + "_" + context + cutout + debug + oit; }) .requireExtensions(extensions) @@ -148,8 +148,9 @@ public final class PipelineCompiler { } }) .onCompile((key, comp) -> { - if (key.oit()) { + if (key.oit() != OitMode.OFF) { comp.define("_FLW_OIT"); + comp.define(key.oit().define); } }) .withResource(API_IMPL_FRAG) @@ -224,6 +225,21 @@ public final class PipelineCompiler { */ public record PipelineProgramKey(InstanceType instanceType, ContextShader contextShader, LightShader light, MaterialShaders materialShaders, boolean useCutout, boolean debugEnabled, - boolean oit) { + OitMode oit) { + } + + public enum OitMode { + OFF("", ""), + GENERATE("_FLW_GENERATE_MOMENTS", "_generate"), + RESOLVE("_FLW_RESOLVE_MOMENTS", "_resolve"), + ; + + public final String define; + public final String name; + + OitMode(String define, String name) { + this.define = define; + this.name = name; + } } } diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectCullingGroup.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectCullingGroup.java index aa605f017..25e38ad11 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectCullingGroup.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectCullingGroup.java @@ -17,6 +17,7 @@ import dev.engine_room.flywheel.api.material.Transparency; import dev.engine_room.flywheel.api.model.Model; import dev.engine_room.flywheel.backend.compile.ContextShader; import dev.engine_room.flywheel.backend.compile.IndirectPrograms; +import dev.engine_room.flywheel.backend.compile.PipelineCompiler; import dev.engine_room.flywheel.backend.engine.InstancerKey; import dev.engine_room.flywheel.backend.engine.MaterialRenderState; import dev.engine_room.flywheel.backend.engine.MeshPool; @@ -188,7 +189,7 @@ public class IndirectCullingGroup { GlProgram lastProgram = null; for (var multiDraw : multiDraws) { - var drawProgram = programs.getIndirectProgram(instanceType, multiDraw.embedded ? ContextShader.EMBEDDED : ContextShader.DEFAULT, multiDraw.material, false); + var drawProgram = programs.getIndirectProgram(instanceType, multiDraw.embedded ? ContextShader.EMBEDDED : ContextShader.DEFAULT, multiDraw.material, PipelineCompiler.OitMode.OFF); if (drawProgram != lastProgram) { lastProgram = drawProgram; @@ -202,7 +203,7 @@ public class IndirectCullingGroup { } } - public void submitTransparent() { + public void submitTransparent(PipelineCompiler.OitMode oit) { if (nothingToDo()) { return; } @@ -214,7 +215,7 @@ public class IndirectCullingGroup { GlProgram lastProgram = null; for (var multiDraw : transparentDraws) { - var drawProgram = programs.getIndirectProgram(instanceType, multiDraw.embedded ? ContextShader.EMBEDDED : ContextShader.DEFAULT, multiDraw.material, true); + var drawProgram = programs.getIndirectProgram(instanceType, multiDraw.embedded ? ContextShader.EMBEDDED : ContextShader.DEFAULT, multiDraw.material, oit); if (drawProgram != lastProgram) { lastProgram = drawProgram; @@ -229,7 +230,7 @@ public class IndirectCullingGroup { } public void bindForCrumbling(Material material) { - var program = programs.getIndirectProgram(instanceType, ContextShader.CRUMBLING, material, false); + var program = programs.getIndirectProgram(instanceType, ContextShader.CRUMBLING, material, PipelineCompiler.OitMode.OFF); program.bind(); diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectDrawManager.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectDrawManager.java index a61677865..afc62d5af 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectDrawManager.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectDrawManager.java @@ -17,6 +17,7 @@ import dev.engine_room.flywheel.api.instance.Instance; import dev.engine_room.flywheel.api.instance.InstanceType; import dev.engine_room.flywheel.backend.Samplers; import dev.engine_room.flywheel.backend.compile.IndirectPrograms; +import dev.engine_room.flywheel.backend.compile.PipelineCompiler; import dev.engine_room.flywheel.backend.engine.AbstractInstancer; import dev.engine_room.flywheel.backend.engine.CommonCrumbling; import dev.engine_room.flywheel.backend.engine.DrawManager; @@ -48,7 +49,7 @@ public class IndirectDrawManager extends DrawManager> { private final DepthPyramid depthPyramid; - private final WboitFrameBuffer wboitFrameBuffer; + private final MboitFramebuffer wboitFrameBuffer; public IndirectDrawManager(IndirectPrograms programs) { this.programs = programs; @@ -65,7 +66,7 @@ public class IndirectDrawManager extends DrawManager> { depthPyramid = new DepthPyramid(programs); - wboitFrameBuffer = new WboitFrameBuffer(programs); + wboitFrameBuffer = new MboitFramebuffer(programs); } @Override @@ -145,10 +146,16 @@ public class IndirectDrawManager extends DrawManager> { group.submitSolid(); } - wboitFrameBuffer.setup(); + wboitFrameBuffer.generateMoments(); for (var group : cullingGroups.values()) { - group.submitTransparent(); + group.submitTransparent(PipelineCompiler.OitMode.GENERATE); + } + + wboitFrameBuffer.resolveMoments(); + + for (var group : cullingGroups.values()) { + group.submitTransparent(PipelineCompiler.OitMode.RESOLVE); } wboitFrameBuffer.composite(); diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/MboitFramebuffer.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/MboitFramebuffer.java new file mode 100644 index 000000000..06d283f20 --- /dev/null +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/MboitFramebuffer.java @@ -0,0 +1,142 @@ +package dev.engine_room.flywheel.backend.engine.indirect; + +import org.lwjgl.opengl.GL32; +import org.lwjgl.opengl.GL46; + +import com.mojang.blaze3d.platform.GlStateManager; +import com.mojang.blaze3d.systems.RenderSystem; + +import dev.engine_room.flywheel.backend.Samplers; +import dev.engine_room.flywheel.backend.compile.IndirectPrograms; +import dev.engine_room.flywheel.backend.gl.GlTextureUnit; +import net.minecraft.client.Minecraft; + +public class MboitFramebuffer { + + public final int fbo; + private final IndirectPrograms programs; + private final int vao; + + public int zerothMoment; + public int moments; + public int accumulate; + + private int lastWidth = -1; + private int lastHeight = -1; + + public MboitFramebuffer(IndirectPrograms programs) { + this.programs = programs; + fbo = GL46.glCreateFramebuffers(); + vao = GL46.glCreateVertexArrays(); + } + + public void generateMoments() { + var mainRenderTarget = Minecraft.getInstance() + .getMainRenderTarget(); + + createTextures(mainRenderTarget.width, mainRenderTarget.height); + + // No depth writes, but we'll still use the depth test + RenderSystem.depthMask(false); + RenderSystem.enableBlend(); + RenderSystem.blendFunc(GlStateManager.SourceFactor.ONE, GlStateManager.DestFactor.ONE); + RenderSystem.blendEquation(GL46.GL_FUNC_ADD); + + GL46.glNamedFramebufferTexture(fbo, GL46.GL_DEPTH_ATTACHMENT, mainRenderTarget.getDepthTextureId(), 0); + + GL46.glNamedFramebufferDrawBuffers(fbo, new int[]{GL46.GL_COLOR_ATTACHMENT0, GL46.GL_COLOR_ATTACHMENT1}); + + GL46.glClearNamedFramebufferfv(fbo, GL46.GL_COLOR, 0, new float[]{0, 0, 0, 0}); + GL46.glClearNamedFramebufferfv(fbo, GL46.GL_COLOR, 1, new float[]{0, 0, 0, 0}); + + GlStateManager._glBindFramebuffer(GL46.GL_FRAMEBUFFER, fbo); + } + + public void resolveMoments() { + // No depth writes, but we'll still use the depth test + RenderSystem.depthMask(false); + RenderSystem.enableBlend(); + RenderSystem.blendFunc(GlStateManager.SourceFactor.ONE, GlStateManager.DestFactor.ONE); + RenderSystem.blendEquation(GL46.GL_FUNC_ADD); + + Samplers.ZEROTH_MOMENT.makeActive(); + GlStateManager._bindTexture(zerothMoment); + + Samplers.MOMENTS.makeActive(); + GlStateManager._bindTexture(moments); + + GL46.glNamedFramebufferDrawBuffers(fbo, new int[]{GL46.GL_COLOR_ATTACHMENT2}); + + GL46.glClearNamedFramebufferfv(fbo, GL46.GL_COLOR, 0, new float[]{0, 0, 0, 0}); + + GlStateManager._glBindFramebuffer(GL46.GL_FRAMEBUFFER, fbo); + } + + public void composite() { + var mainRenderTarget = Minecraft.getInstance() + .getMainRenderTarget(); + + mainRenderTarget.bindWrite(false); + + var oitCompositeProgram = programs.getOitCompositeProgram(); + + GlStateManager._depthMask(false); + GlStateManager._depthFunc(GL46.GL_ALWAYS); + GlStateManager._enableBlend(); + RenderSystem.blendFunc(GlStateManager.SourceFactor.ONE_MINUS_SRC_ALPHA, GlStateManager.DestFactor.SRC_ALPHA); + + oitCompositeProgram.bind(); + + GlTextureUnit.T0.makeActive(); + GlStateManager._bindTexture(zerothMoment); + + GlTextureUnit.T1.makeActive(); + GlStateManager._bindTexture(accumulate); + + // Empty VAO, the actual full screen triangle is generated in the vertex shader + GlStateManager._glBindVertexArray(vao); + + GL46.glDrawArrays(GL46.GL_TRIANGLES, 0, 3); + } + + public void delete() { + GL46.glDeleteTextures(zerothMoment); + GL46.glDeleteTextures(moments); + GL46.glDeleteTextures(accumulate); + GL46.glDeleteFramebuffers(fbo); + GL46.glDeleteVertexArrays(vao); + } + + private void createTextures(int width, int height) { + if (lastWidth == width && lastHeight == height) { + return; + } + + lastWidth = width; + lastHeight = height; + + GL46.glDeleteTextures(zerothMoment); + GL46.glDeleteTextures(moments); + GL46.glDeleteTextures(accumulate); + + zerothMoment = GL46.glCreateTextures(GL46.GL_TEXTURE_2D); + moments = GL46.glCreateTextures(GL46.GL_TEXTURE_2D); + accumulate = GL46.glCreateTextures(GL46.GL_TEXTURE_2D); + + GL46.glTextureStorage2D(zerothMoment, 1, GL32.GL_R16F, width, height); + GL46.glTextureStorage2D(moments, 1, GL32.GL_RGBA16F, width, height); + GL46.glTextureStorage2D(accumulate, 1, GL32.GL_RGBA16F, width, height); + + // for (int tex : new int[]{zerothMoment, moments, composite}) { + // GL46.glTextureParameteri(tex, GL32.GL_TEXTURE_MIN_FILTER, GL32.GL_NEAREST); + // GL46.glTextureParameteri(tex, GL32.GL_TEXTURE_MAG_FILTER, GL32.GL_NEAREST); + // GL46.glTextureParameteri(tex, GL32.GL_TEXTURE_COMPARE_MODE, GL32.GL_NONE); + // GL46.glTextureParameteri(tex, GL32.GL_TEXTURE_WRAP_S, GL32.GL_CLAMP_TO_EDGE); + // GL46.glTextureParameteri(tex, GL32.GL_TEXTURE_WRAP_T, GL32.GL_CLAMP_TO_EDGE); + // } + + GL46.glNamedFramebufferTexture(fbo, GL46.GL_COLOR_ATTACHMENT0, zerothMoment, 0); + GL46.glNamedFramebufferTexture(fbo, GL46.GL_COLOR_ATTACHMENT1, moments, 0); + GL46.glNamedFramebufferTexture(fbo, GL46.GL_COLOR_ATTACHMENT2, accumulate, 0); + } +} diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/WboitFrameBuffer.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/WboitFrameBuffer.java deleted file mode 100644 index c76133709..000000000 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/WboitFrameBuffer.java +++ /dev/null @@ -1,117 +0,0 @@ -package dev.engine_room.flywheel.backend.engine.indirect; - -import org.lwjgl.opengl.ARBDrawBuffersBlend; -import org.lwjgl.opengl.GL32; -import org.lwjgl.opengl.GL46; - -import com.mojang.blaze3d.platform.GlStateManager; -import com.mojang.blaze3d.systems.RenderSystem; - -import dev.engine_room.flywheel.backend.compile.IndirectPrograms; -import dev.engine_room.flywheel.backend.gl.GlTextureUnit; -import net.minecraft.client.Minecraft; - -public class WboitFrameBuffer { - - public final int fbo; - private final IndirectPrograms programs; - private final int vao; - - public int accum; - public int reveal; - - private int lastWidth = -1; - private int lastHeight = -1; - - public WboitFrameBuffer(IndirectPrograms programs) { - this.programs = programs; - fbo = GL46.glCreateFramebuffers(); - vao = GL46.glCreateVertexArrays(); - } - - public void setup() { - var mainRenderTarget = Minecraft.getInstance() - .getMainRenderTarget(); - - createTextures(mainRenderTarget.width, mainRenderTarget.height); - - // No depth writes, but we'll still use the depth test - GlStateManager._depthMask(false); - GlStateManager._enableBlend(); - ARBDrawBuffersBlend.glBlendFunciARB(0, GL46.GL_ONE, GL46.GL_ONE); // accumulation blend target - ARBDrawBuffersBlend.glBlendFunciARB(1, GL46.GL_ZERO, GL46.GL_ONE_MINUS_SRC_COLOR); // revealage blend target - GlStateManager._blendEquation(GL46.GL_FUNC_ADD); - - GL46.glNamedFramebufferTexture(fbo, GL46.GL_DEPTH_ATTACHMENT, mainRenderTarget.getDepthTextureId(), 0); - - GlStateManager._glBindFramebuffer(GL46.GL_FRAMEBUFFER, fbo); - - GL46.glClearBufferfv(GL46.GL_COLOR, 0, new float[]{0, 0, 0, 0}); - GL46.glClearBufferfv(GL46.GL_COLOR, 1, new float[]{1, 1, 1, 1}); - } - - public void composite() { - var mainRenderTarget = Minecraft.getInstance() - .getMainRenderTarget(); - - mainRenderTarget.bindWrite(false); - - var oitCompositeProgram = programs.getOitCompositeProgram(); - - GlStateManager._depthMask(false); - GlStateManager._depthFunc(GL46.GL_ALWAYS); - GlStateManager._enableBlend(); - RenderSystem.blendFuncSeparate(GlStateManager.SourceFactor.SRC_ALPHA, GlStateManager.DestFactor.ONE_MINUS_SRC_ALPHA, GlStateManager.SourceFactor.ONE, GlStateManager.DestFactor.ONE_MINUS_SRC_ALPHA); - - oitCompositeProgram.bind(); - - GlTextureUnit.T0.makeActive(); - GlStateManager._bindTexture(accum); - - GlTextureUnit.T1.makeActive(); - GlStateManager._bindTexture(reveal); - - // Empty VAO, the actual full screen triangle is generated in the vertex shader - GlStateManager._glBindVertexArray(vao); - - GL46.glDrawArrays(GL46.GL_TRIANGLES, 0, 3); - } - - public void delete() { - GL46.glDeleteTextures(accum); - GL46.glDeleteTextures(reveal); - GL46.glDeleteFramebuffers(fbo); - GL46.glDeleteVertexArrays(vao); - } - - private void createTextures(int width, int height) { - if (lastWidth == width && lastHeight == height) { - return; - } - - lastWidth = width; - lastHeight = height; - - GL46.glDeleteTextures(accum); - GL46.glDeleteTextures(reveal); - - accum = GL46.glCreateTextures(GL46.GL_TEXTURE_2D); - reveal = GL46.glCreateTextures(GL46.GL_TEXTURE_2D); - - GL46.glNamedFramebufferDrawBuffers(fbo, new int[]{GL46.GL_COLOR_ATTACHMENT0, GL46.GL_COLOR_ATTACHMENT1}); - - GL46.glNamedFramebufferTexture(fbo, GL46.GL_COLOR_ATTACHMENT0, accum, 0); - GL46.glNamedFramebufferTexture(fbo, GL46.GL_COLOR_ATTACHMENT1, reveal, 0); - - GL46.glTextureStorage2D(accum, 1, GL32.GL_RGBA32F, width, height); - GL46.glTextureStorage2D(reveal, 1, GL32.GL_R8, width, height); - - for (int tex : new int[]{accum, reveal}) { - GL46.glTextureParameteri(tex, GL32.GL_TEXTURE_MIN_FILTER, GL32.GL_NEAREST); - GL46.glTextureParameteri(tex, GL32.GL_TEXTURE_MAG_FILTER, GL32.GL_NEAREST); - GL46.glTextureParameteri(tex, GL32.GL_TEXTURE_COMPARE_MODE, GL32.GL_NONE); - GL46.glTextureParameteri(tex, GL32.GL_TEXTURE_WRAP_S, GL32.GL_CLAMP_TO_EDGE); - GL46.glTextureParameteri(tex, GL32.GL_TEXTURE_WRAP_T, GL32.GL_CLAMP_TO_EDGE); - } - } -} diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/gl/GlCompat.java b/common/src/backend/java/dev/engine_room/flywheel/backend/gl/GlCompat.java index 636b9ac4c..b07070bc7 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/gl/GlCompat.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/gl/GlCompat.java @@ -1,7 +1,5 @@ package dev.engine_room.flywheel.backend.gl; -import java.nio.ByteBuffer; - import org.jetbrains.annotations.UnknownNullability; import org.lwjgl.PointerBuffer; import org.lwjgl.opengl.GL; @@ -13,6 +11,7 @@ import org.lwjgl.opengl.GL43; import org.lwjgl.opengl.GLCapabilities; import org.lwjgl.opengl.KHRShaderSubgroup; import org.lwjgl.system.MemoryStack; +import org.lwjgl.system.MemoryUtil; import dev.engine_room.flywheel.backend.FlwBackend; import dev.engine_room.flywheel.backend.compile.core.Compilation; @@ -67,10 +66,11 @@ public final class GlCompat { */ public static void safeShaderSource(int glId, CharSequence source) { try (MemoryStack stack = MemoryStack.stackPush()) { - final ByteBuffer sourceBuffer = stack.UTF8(source, true); + var sourceBuffer = MemoryUtil.memUTF8(source, true); final PointerBuffer pointers = stack.mallocPointer(1); pointers.put(sourceBuffer); GL20C.nglShaderSource(glId, 1, pointers.address0(), 0); + MemoryUtil.memFree(sourceBuffer); } } diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/glsl/parse/ShaderField.java b/common/src/backend/java/dev/engine_room/flywheel/backend/glsl/parse/ShaderField.java index 3f7871d43..c36bf4f7f 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/glsl/parse/ShaderField.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/glsl/parse/ShaderField.java @@ -1,6 +1,5 @@ package dev.engine_room.flywheel.backend.glsl.parse; -import java.util.regex.Matcher; import java.util.regex.Pattern; import org.jetbrains.annotations.Nullable; @@ -34,20 +33,21 @@ public class ShaderField { * Scan the source for function definitions and "parse" them into objects that contain properties of the function. */ public static ImmutableMap parseFields(SourceLines source) { - Matcher matcher = PATTERN.matcher(source); + // Matcher matcher = PATTERN.matcher(source); + // + // ImmutableMap.Builder fields = ImmutableMap.builder(); + // while (matcher.find()) { + // Span self = Span.fromMatcher(source, matcher); + // Span location = Span.fromMatcher(source, matcher, 1); + // Span decoration = Span.fromMatcher(source, matcher, 2); + // Span type = Span.fromMatcher(source, matcher, 3); + // Span name = Span.fromMatcher(source, matcher, 4); + // + // fields.put(location.get(), new ShaderField(self, location, decoration, type, name)); + // } - ImmutableMap.Builder fields = ImmutableMap.builder(); - while (matcher.find()) { - Span self = Span.fromMatcher(source, matcher); - Span location = Span.fromMatcher(source, matcher, 1); - Span decoration = Span.fromMatcher(source, matcher, 2); - Span type = Span.fromMatcher(source, matcher, 3); - Span name = Span.fromMatcher(source, matcher, 4); - - fields.put(location.get(), new ShaderField(self, location, decoration, type, name)); - } - - return fields.build(); + return ImmutableMap.builder() + .build(); } public enum Qualifier { diff --git a/common/src/backend/resources/assets/flywheel/flywheel/internal/common.frag b/common/src/backend/resources/assets/flywheel/flywheel/internal/common.frag index 7bc4ec54d..afd8ae647 100644 --- a/common/src/backend/resources/assets/flywheel/flywheel/internal/common.frag +++ b/common/src/backend/resources/assets/flywheel/flywheel/internal/common.frag @@ -1,6 +1,7 @@ #include "flywheel:internal/packed_material.glsl" #include "flywheel:internal/diffuse.glsl" #include "flywheel:internal/colorizer.glsl" +#include "flywheel:internal/mboit/moment_oit.glsl" // optimize discard usage #if defined(GL_ARB_conservative_depth) && defined(_FLW_USE_DISCARD) @@ -18,13 +19,13 @@ flat in uvec2 _flw_ids; #endif #ifdef _FLW_OIT - -// your first render target which is used to accumulate pre-multiplied color values -layout (location = 0) out vec4 accum; - -// your second render target which is used to store pixel revealage -layout (location = 1) out float reveal; - +#ifdef _FLW_GENERATE_MOMENTS +layout (location = 0) out float _flw_zerothMoment_out; +layout (location = 1) out vec4 _flw_moments_out; +#endif +#ifdef _FLW_RESOLVE_MOMENTS +layout (location = 0) out vec4 _flw_accumulate_out; +#endif #else out vec4 _flw_outputColor; @@ -118,26 +119,40 @@ void _flw_main() { color = flw_fogFilter(color); - color.a = 0.9; - #ifdef _FLW_OIT + float linearDepth = linearize_depth(gl_FragCoord.z, _flw_cullData.znear, _flw_cullData.zfar); - float depth = linearize_depth(gl_FragCoord.z, _flw_cullData.znear, _flw_cullData.zfar); + float lnNear = log(_flw_cullData.znear); + float lnFar = log(_flw_cullData.zfar); - // insert your favorite weighting function here. the color-based factor - // avoids color pollution from the edges of wispy clouds. the z-based - // factor gives precedence to nearer surfaces - //float weight = clamp(pow(min(1.0, color.a * 10.0) + 0.01, 3.0) * 1e8 * pow(1.0 - gl_FragCoord.z * 0.9, 3.0), 1e-2, 3e3); - float weight = max(min(1.0, max(max(color.r, color.g), color.b) * color.a), color.a) * - clamp(0.03 / (1e-5 + pow(depth / 200, 4.0)), 1e-2, 3e3); + float depth = (log(linearDepth) - lnNear); - // blend func: GL_ONE, GL_ONE - // switch to pre-multiplied alpha and weight - accum = vec4(color.rgb * color.a, color.a) * weight; + depth /= lnFar - lnNear; - // blend func: GL_ZERO, GL_ONE_MINUS_SRC_ALPHA - reveal = color.a; + depth = clamp(depth * 2. - 1., -1., 1.); + #ifdef _FLW_GENERATE_MOMENTS + + generateMoments(depth, 1 - color.a, vec4(0), _flw_zerothMoment_out, _flw_moments_out); + + #endif + #ifdef _FLW_RESOLVE_MOMENTS + + float tt; + float td; + resolveMoments(td, tt, depth, gl_FragCoord.xy); + + if (abs(td) < 1e-5) { + discard; + } + + color.rgb *= color.a; + + color *= td; + + _flw_accumulate_out = color; + + #endif #else _flw_outputColor = color; diff --git a/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/oit_composite.frag b/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/oit_composite.frag index 6c5a67419..3afe92839 100644 --- a/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/oit_composite.frag +++ b/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/oit_composite.frag @@ -1,48 +1,20 @@ -// shader outputs layout (location = 0) out vec4 frag; -// color accumulation buffer -layout (binding = 0) uniform sampler2D accum; - -// revealage threshold buffer -layout (binding = 1) uniform sampler2D reveal; - -// epsilon number -const float EPSILON = 0.00001f; - -// calculate floating point numbers equality accurately -bool isApproximatelyEqual(float a, float b) { - return abs(a - b) <= (abs(a) < abs(b) ? abs(b) : abs(a)) * EPSILON; -} - -// get the max value between three values -float max3(vec3 v) { - return max(max(v.x, v.y), v.z); -} +layout (binding = 0) uniform sampler2D zerothMoment; +layout (binding = 1) uniform sampler2D accumulate; void main() { - // fragment coordination ivec2 coords = ivec2(gl_FragCoord.xy); - // fragment revealage - float revealage = texelFetch(reveal, coords, 0).r; + float b0 = texelFetch(zerothMoment, coords, 0).r; - // save the blending and color texture fetch cost if there is not a transparent fragment - if (isApproximatelyEqual(revealage, 1.0f)) { + if (b0 < 1e-5) { discard; } - // fragment color - vec4 accumulation = texelFetch(accum, coords, 0); + vec4 accumulation = texelFetch(accumulate, coords, 0); - // suppress overflow - if (isinf(max3(abs(accumulation.rgb)))) { - accumulation.rgb = vec3(accumulation.a); - } + vec3 normalizedAccumulation = accumulation.rgb / max(accumulation.a, 1e-5); - // prevent floating point precision bug - vec3 average_color = accumulation.rgb / max(accumulation.a, EPSILON); - - // blend pixels - frag = vec4(average_color, 1.0f - revealage); + frag = vec4(normalizedAccumulation, exp(-b0)); } diff --git a/common/src/backend/resources/assets/flywheel/flywheel/internal/mboit/complex_algebra.glsl b/common/src/backend/resources/assets/flywheel/flywheel/internal/mboit/complex_algebra.glsl new file mode 100644 index 000000000..b2cf49485 --- /dev/null +++ b/common/src/backend/resources/assets/flywheel/flywheel/internal/mboit/complex_algebra.glsl @@ -0,0 +1,203 @@ +/*! \file + This header defines utility functions to deal with complex numbers and + complex polynomials.*/ + +/*! Returns the complex conjugate of the given complex number (i.e. it changes + the sign of the y-component).*/ +vec2 Conjugate(vec2 Z){ + return vec2(Z.x, -Z.y); +} +/*! This function implements complex multiplication.*/ +vec2 Multiply(vec2 LHS, vec2 RHS){ + return vec2(LHS.x*RHS.x-LHS.y*RHS.y, LHS.x*RHS.y+LHS.y*RHS.x); +} +/*! This function computes the magnitude of the given complex number.*/ +float Magnitude(vec2 Z){ + return sqrt(dot(Z, Z)); +} +/*! This function computes the quotient of two complex numbers. The denominator + must not be zero.*/ +vec2 Divide(vec2 Numerator, vec2 Denominator){ + return vec2(Numerator.x*Denominator.x+Numerator.y*Denominator.y, -Numerator.x*Denominator.y+Numerator.y*Denominator.x)/dot(Denominator, Denominator); +} +/*! This function divides a real number by a complex number. The denominator + must not be zero.*/ +vec2 Divide(float Numerator, vec2 Denominator){ + return vec2(Numerator*Denominator.x, -Numerator*Denominator.y)/dot(Denominator, Denominator); +} +/*! This function implements computation of the reciprocal of the given non- + zero complex number.*/ +vec2 Reciprocal(vec2 Z){ + return vec2(Z.x, -Z.y)/dot(Z, Z); +} +/*! This utility function implements complex squaring.*/ +vec2 Square(vec2 Z){ + return vec2(Z.x*Z.x-Z.y*Z.y, 2.0f*Z.x*Z.y); +} +/*! This utility function implements complex computation of the third power.*/ +vec2 Cube(vec2 Z){ + return Multiply(Square(Z), Z); +} +/*! This utility function computes one square root of the given complex value. + The other one can be found using the unary minus operator. + \warning This function is continuous but not defined on the negative real + axis (and cannot be continued continuously there). + \sa SquareRoot() */ +vec2 SquareRootUnsafe(vec2 Z){ + float ZLengthSq=dot(Z, Z); + float ZLengthInv=rsqrt(ZLengthSq); + vec2 UnnormalizedRoot=Z*ZLengthInv+vec2(1.0f, 0.0f); + float UnnormalizedRootLengthSq=dot(UnnormalizedRoot, UnnormalizedRoot); + float NormalizationFactorInvSq=UnnormalizedRootLengthSq*ZLengthInv; + float NormalizationFactor=rsqrt(NormalizationFactorInvSq); + return NormalizationFactor*UnnormalizedRoot; +} +/*! This utility function computes one square root of the given complex value. + The other one can be found using the unary minus operator. + \note This function has discontinuities for values with real part zero. + \sa SquareRootUnsafe() */ +vec2 SquareRoot(vec2 Z){ + vec2 ZPositiveRealPart=vec2(abs(Z.x), Z.y); + vec2 ComputedRoot=SquareRootUnsafe(ZPositiveRealPart); + return (Z.x>=0.0)?ComputedRoot:ComputedRoot.yx; +} +/*! This utility function computes one cubic root of the given complex value. The + other roots can be found by multiplication by cubic roots of unity. + \note This function has various discontinuities.*/ +vec2 CubicRoot(vec2 Z){ + float Argument=atan2(Z.y, Z.x); + float NewArgument=Argument/3.0f; + vec2 NormalizedRoot; + sincos(NewArgument, NormalizedRoot.y, NormalizedRoot.x); + return NormalizedRoot*pow(dot(Z, Z), 1.0f/6.0f); +} + +/*! @{ + Returns the complex conjugate of the given complex vector (i.e. it changes the + second column resp the y-component).*/ +mat2x2 Conjugate(mat2x2 Vector){ + return mat2x2(Vector[0].x, -Vector[0].y, Vector[1].x, -Vector[1].y); +} +mat3x2 Conjugate(mat3x2 Vector){ + return mat3x2(Vector[0].x, -Vector[0].y, Vector[1].x, -Vector[1].y, Vector[2].x, -Vector[2].y); +} +mat4x2 Conjugate(mat4x2 Vector){ + return mat4x2(Vector[0].x, -Vector[0].y, Vector[1].x, -Vector[1].y, Vector[2].x, -Vector[2].y, Vector[3].x, -Vector[3].y); +} +void Conjugate(out vec2 OutConjugateVector[5], vec2 Vector[5]){ + for (int i=0;i!=5;++i){ + OutConjugateVector[i]=vec2(Vector[i].x, -Vector[i].x); + } +} +//!@} + +/*! Returns the real part of a complex number as real.*/ +float RealPart(vec2 Z){ + return Z.x; +} + +/*! Given coefficients of a quadratic polynomial A*x^2+B*x+C, this function + outputs its two complex roots.*/ +void SolveQuadratic(out vec2 pOutRoot[2], vec2 A, vec2 B, vec2 C) +{ + // Normalize the coefficients + vec2 InvA=Reciprocal(A); + B=Multiply(B, InvA); + C=Multiply(C, InvA); + // Divide the middle coefficient by two + B*=0.5f; + // Apply the quadratic formula + vec2 DiscriminantRoot=SquareRoot(Square(B)-C); + pOutRoot[0]=-B-DiscriminantRoot; + pOutRoot[1]=-B+DiscriminantRoot; +} + +/*! Given coefficients of a cubic polynomial A*x^3+B*x^2+C*x+D, this function + outputs its three complex roots.*/ +void SolveCubicBlinn(out vec2 pOutRoot[3], vec2 A, vec2 B, vec2 C, vec2 D) +{ + // Normalize the polynomial + vec2 InvA=Reciprocal(A); + B=Multiply(B, InvA); + C=Multiply(C, InvA); + D=Multiply(D, InvA); + // Divide middle coefficients by three + B/=3.0f; + C/=3.0f; + // Compute the Hessian and the discriminant + vec2 Delta00=-Square(B)+C; + vec2 Delta01=-Multiply(C, B)+D; + vec2 Delta11=Multiply(B, D)-Square(C); + vec2 Discriminant=4.0f*Multiply(Delta00, Delta11)-Square(Delta01); + // Compute coefficients of the depressed cubic + // (third is zero, fourth is one) + vec2 DepressedD=-2.0f*Multiply(B, Delta00)+Delta01; + vec2 DepressedC=Delta00; + // Take the cubic root of a complex number avoiding cancellation + vec2 DiscriminantRoot=SquareRoot(-Discriminant); + DiscriminantRoot=faceforward(DiscriminantRoot, DiscriminantRoot, DepressedD); + vec2 CubedRoot=DiscriminantRoot-DepressedD; + vec2 FirstRoot=CubicRoot(0.5f*CubedRoot); + vec2 pCubicRoot[3]={ + FirstRoot, + Multiply(vec2(-0.5f, -0.5f*sqrt(3.0f)), FirstRoot), + Multiply(vec2(-0.5f, 0.5f*sqrt(3.0f)), FirstRoot) + }; + // Also compute the reciprocal cubic roots + vec2 InvFirstRoot=Reciprocal(FirstRoot); + vec2 pInvCubicRoot[3]={ + InvFirstRoot, + Multiply(vec2(-0.5f, 0.5f*sqrt(3.0f)), InvFirstRoot), + Multiply(vec2(-0.5f, -0.5f*sqrt(3.0f)), InvFirstRoot) + }; + // Turn them into roots of the depressed cubic and revert the depression + // transform + + for (int i=0;i!=3;++i) + { + pOutRoot[i]=pCubicRoot[i]-Multiply(DepressedC, pInvCubicRoot[i])-B; + } +} + + +/*! Given coefficients of a quartic polynomial A*x^4+B*x^3+C*x^2+D*x+E, this + function outputs its four complex roots.*/ +void SolveQuarticNeumark(out vec2 pOutRoot[4], vec2 A, vec2 B, vec2 C, vec2 D, vec2 E) +{ + // Normalize the polynomial + vec2 InvA=Reciprocal(A); + B=Multiply(B, InvA); + C=Multiply(C, InvA); + D=Multiply(D, InvA); + E=Multiply(E, InvA); + // Construct a normalized cubic + vec2 P=-2.0f*C; + vec2 Q=Square(C)+Multiply(B, D)-4.0f*E; + vec2 R=Square(D)+Multiply(Square(B), E)-Multiply(Multiply(B, C), D); + // Compute a root that is not the smallest of the cubic + vec2 pCubicRoot[3]; + SolveCubicBlinn(pCubicRoot, vec2(1.0f, 0.0f), P, Q, R); + vec2 y=(dot(pCubicRoot[1], pCubicRoot[1])>dot(pCubicRoot[0], pCubicRoot[0]))?pCubicRoot[1]:pCubicRoot[0]; + + // Solve a quadratic to obtain linear coefficients for quadratic polynomials + vec2 BB=Square(B); + vec2 fy=4.0f*y; + vec2 BB_fy=BB-fy; + vec2 tmp=SquareRoot(BB_fy); + vec2 G=(B+tmp)*0.5f; + vec2 g=(B-tmp)*0.5f; + // Construct the corresponding constant coefficients + vec2 Z=C-y; + tmp=Divide(0.5f*Multiply(B, Z)-D, tmp); + vec2 H=Z*0.5f+tmp; + vec2 h=Z*0.5f-tmp; + + // Compute the roots + vec2 pQuadraticRoot[2]; + SolveQuadratic(pQuadraticRoot, vec2(1.0f, 0.0f), G, H); + pOutRoot[0]=pQuadraticRoot[0]; + pOutRoot[1]=pQuadraticRoot[1]; + SolveQuadratic(pQuadraticRoot, vec2(1.0f, 0.0f), g, h); + pOutRoot[2]=pQuadraticRoot[0]; + pOutRoot[3]=pQuadraticRoot[1]; +} diff --git a/common/src/backend/resources/assets/flywheel/flywheel/internal/mboit/moment_math.glsl b/common/src/backend/resources/assets/flywheel/flywheel/internal/mboit/moment_math.glsl new file mode 100644 index 000000000..0c2d23b72 --- /dev/null +++ b/common/src/backend/resources/assets/flywheel/flywheel/internal/mboit/moment_math.glsl @@ -0,0 +1,500 @@ +/*! \file + This header provides utility functions to reconstruct the transmittance + from a given vector of power moments (4, 6 or 8 power moments) at a + specified depth. As prerequisite, utility functions for computing the real + roots of polynomials up to degree four are defined. +*/ + +//#include "flywheel:internal/mboit/trigonometric_moment_math.glsl" + +void sincos(float theta, out float s, out float c) { + s = sin(theta); + c = cos(theta); +} + +float saturate(float a) { + return clamp(a, 0., 1.); +} + + + +/*! Given coefficients of a quadratic polynomial A*x^2+B*x+C, this function + outputs its two real roots.*/ +vec2 solveQuadratic(vec3 coeffs) +{ + coeffs[1] *= 0.5; + + float x1, x2, tmp; + + tmp = (coeffs[1] * coeffs[1] - coeffs[0] * coeffs[2]); + if (coeffs[1] >= 0) { + tmp = sqrt(tmp); + x1 = (-coeffs[2]) / (coeffs[1] + tmp); + x2 = (-coeffs[1] - tmp) / coeffs[0]; + } else { + tmp = sqrt(tmp); + x1 = (-coeffs[1] + tmp) / coeffs[0]; + x2 = coeffs[2] / (-coeffs[1] + tmp); + } + return vec2(x1, x2); +} + +/*! Code taken from the blog "Moments in Graphics" by Christoph Peters. + http://momentsingraphics.de/?p=105 + This function computes the three real roots of a cubic polynomial + Coefficient[0]+Coefficient[1]*x+Coefficient[2]*x^2+Coefficient[3]*x^3.*/ +vec3 SolveCubic(vec4 Coefficient) { + // Normalize the polynomial + Coefficient.xyz /= Coefficient.w; + // Divide middle coefficients by three + Coefficient.yz /= 3.0f; + // Compute the Hessian and the discrimant + vec3 Delta = vec3( + fma(-Coefficient.z, Coefficient.z, Coefficient.y), + fma(-Coefficient.y, Coefficient.z, Coefficient.x), + dot(vec2(Coefficient.z, -Coefficient.y), Coefficient.xy) + ); + float Discriminant = dot(vec2(4.0f*Delta.x, -Delta.y), Delta.zy); + // Compute coefficients of the depressed cubic + // (third is zero, fourth is one) + vec2 Depressed = vec2( + fma(-2.0f*Coefficient.z, Delta.x, Delta.y), + Delta.x + ); + // Take the cubic root of a normalized complex number + float Theta = atan(sqrt(Discriminant), -Depressed.x) / 3.0f; + vec2 CubicRoot; + sincos(Theta, CubicRoot.y, CubicRoot.x); + // Compute the three roots, scale appropriately and + // revert the depression transform + vec3 Root = vec3( + CubicRoot.x, + dot(vec2(-0.5f, -0.5f*sqrt(3.0f)), CubicRoot), + dot(vec2(-0.5f, 0.5f*sqrt(3.0f)), CubicRoot) + ); + Root = fma(vec3(2.0f*sqrt(-Depressed.y)), Root, vec3(-Coefficient.z)); + return Root; +} + +/*! Given coefficients of a cubic polynomial + coeffs[0]+coeffs[1]*x+coeffs[2]*x^2+coeffs[3]*x^3 with three real roots, + this function returns the root of least magnitude.*/ +float solveCubicBlinnSmallest(vec4 coeffs) +{ + coeffs.xyz /= coeffs.w; + coeffs.yz /= 3.0; + + vec3 delta = vec3(fma(-coeffs.z, coeffs.z, coeffs.y), fma(-coeffs.z, coeffs.y, coeffs.x), coeffs.z * coeffs.x - coeffs.y * coeffs.y); + float discriminant = 4.0 * delta.x * delta.z - delta.y * delta.y; + + vec2 depressed = vec2(delta.z, -coeffs.x * delta.y + 2.0 * coeffs.y * delta.z); + float theta = abs(atan(coeffs.x * sqrt(discriminant), -depressed.y)) / 3.0; + vec2 sin_cos; + sincos(theta, sin_cos.x, sin_cos.y); + float tmp = 2.0 * sqrt(-depressed.x); + vec2 x = vec2(tmp * sin_cos.y, tmp * (-0.5 * sin_cos.y - 0.5 * sqrt(3.0) * sin_cos.x)); + vec2 s = (x.x + x.y < 2.0 * coeffs.y) ? vec2(-coeffs.x, x.x + coeffs.y) : vec2(-coeffs.x, x.y + coeffs.y); + + return s.x / s.y; +} + +/*! Given coefficients of a quartic polynomial + coeffs[0]+coeffs[1]*x+coeffs[2]*x^2+coeffs[3]*x^3+coeffs[4]*x^4 with four + real roots, this function returns all roots.*/ +vec4 solveQuarticNeumark(float coeffs[5]) +{ + // Normalization + float B = coeffs[3] / coeffs[4]; + float C = coeffs[2] / coeffs[4]; + float D = coeffs[1] / coeffs[4]; + float E = coeffs[0] / coeffs[4]; + + // Compute coefficients of the cubic resolvent + float P = -2.0*C; + float Q = C*C + B*D - 4.0*E; + float R = D*D + B*B*E -B*C*D; + + // Obtain the smallest cubic root + float y = solveCubicBlinnSmallest(vec4(R, Q, P, 1.0)); + + float BB = B*B; + float fy = 4.0 * y; + float BB_fy = BB - fy; + + float Z = C - y; + float ZZ = Z*Z; + float fE = 4.0 * E; + float ZZ_fE = ZZ - fE; + + float G, g, H, h; + // Compute the coefficients of the quadratics adaptively using the two + // proposed factorizations by Neumark. Choose the appropriate + // factorizations using the heuristic proposed by Herbison-Evans. + if (y < 0 || (ZZ + fE) * BB_fy > ZZ_fE * (BB + fy)) { + float tmp = sqrt(BB_fy); + G = (B + tmp) * 0.5; + g = (B - tmp) * 0.5; + + tmp = (B*Z - 2.0*D) / (2.0*tmp); + H = fma(Z, 0.5, tmp); + h = fma(Z, 0.5, -tmp); + } else { + float tmp = sqrt(ZZ_fE); + H = (Z + tmp) * 0.5; + h = (Z - tmp) * 0.5; + + tmp = (B*Z - 2.0*D) / (2.0*tmp); + G = fma(B, 0.5, tmp); + g = fma(B, 0.5, -tmp); + } + // Solve the quadratics + return vec4(solveQuadratic(vec3(1.0, G, H)), solveQuadratic(vec3(1.0, g, h))); +} + +/*! Definition of utility functions for quantization and dequantization of + power moments stored in 16 bits per moment. */ +void offsetMoments(inout vec2 b_even, inout vec2 b_odd, float sign) +{ + b_odd += 0.5 * sign; +} + +void quantizeMoments(out vec2 b_even_q, out vec2 b_odd_q, vec2 b_even, vec2 b_odd) +{ + b_odd_q = b_odd * mat2x2(1.5f, sqrt(3.0f)*0.5f, -2.0f, -sqrt(3.0f)*2.0f / 9.0f); + b_even_q = b_even * mat2x2(4.0f, 0.5f, -4.0f, 0.5f); +} + +void offsetAndDequantizeMoments(out vec2 b_even, out vec2 b_odd, vec2 b_even_q, vec2 b_odd_q) +{ + offsetMoments(b_even_q, b_odd_q, -1.0); + b_odd = b_odd_q * mat2x2(-1.0f / 3.0f, -0.75f, sqrt(3.0f), 0.75f*sqrt(3.0f)); + b_even = b_even_q * mat2x2(0.125f, -0.125f, 1.0f, 1.0f); +} + +void offsetMoments(inout vec3 b_even, inout vec3 b_odd, float sign) +{ + b_odd += 0.5 * sign; + b_even.z += 0.018888946f * sign; +} + +void quantizeMoments(out vec3 b_even_q, out vec3 b_odd_q, vec3 b_even, vec3 b_odd) +{ + const mat3x3 QuantizationMatrixOdd = mat3x3( + 2.5f, -1.87499864450f, 1.26583039016f, + -10.0f, 4.20757543111f, -1.47644882902f, + 8.0f, -1.83257678661f, 0.71061660238f); + const mat3x3 QuantizationMatrixEven = mat3x3( + 4.0f, 9.0f, -0.57759806484f, + -4.0f, -24.0f, 4.61936647543f, + 0.0f, 16.0f, -3.07953906655f); + b_odd_q = b_odd * QuantizationMatrixOdd; + b_even_q = b_even * QuantizationMatrixEven; +} + +void offsetAndDequantizeMoments(out vec3 b_even, out vec3 b_odd, vec3 b_even_q, vec3 b_odd_q) +{ + const mat3x3 QuantizationMatrixOdd = mat3x3( + -0.02877789192f, 0.09995235706f, 0.25893353755f, + 0.47635550422f, 0.84532580931f, 0.90779616657f, + 1.55242808973f, 1.05472570761f, 0.83327335647f); + const mat3x3 QuantizationMatrixEven = mat3x3( + 0.00001253044f, -0.24998746956f, -0.37498825271f, + 0.16668494186f, 0.16668494186f, 0.21876713299f, + 0.86602540579f, 0.86602540579f, 0.81189881793f); + offsetMoments(b_even_q, b_odd_q, -1.0); + b_odd = b_odd_q * QuantizationMatrixOdd; + b_even = b_even_q * QuantizationMatrixEven; +} + +void offsetMoments(inout vec4 b_even, inout vec4 b_odd, float sign) +{ + b_odd += 0.5 * sign; + b_even += vec4(0.972481993925964, 1.0, 0.999179192513328, 0.991778293073131) * sign; +} + +void quantizeMoments(out vec4 b_even_q, out vec4 b_odd_q, vec4 b_even, vec4 b_odd) +{ + const mat4x4 mat_odd = mat4x4(3.48044635732474, -27.5760737514826, 55.1267384344761, -31.5311110403183, + 1.26797185782836, -0.928755808743913, -2.07520453231032, 1.23598848322588, + -2.1671560004294, 6.17950199592966, -0.276515571579297, -4.23583042392097, + 0.974332879165755, -0.443426830933027, -0.360491648368785, 0.310149466050223); + const mat4x4 mat_even = mat4x4(0.280504133158527, -0.757633844606942, 0.392179589334688, -0.887531871812237, + -2.01362265883247, 0.221551373038988, -1.06107954265125, 2.83887201588367, + -7.31010494985321, 13.9855979699139, -0.114305766176437, -7.4361899359832, + -15.8954215629556, 79.6186327084103, -127.457278992502, 63.7349456687829); + b_odd_q = mat_odd * b_odd; + b_even_q = mat_even * b_even; +} + +void offsetAndDequantizeMoments(out vec4 b_even, out vec4 b_odd, vec4 b_even_q, vec4 b_odd_q) +{ + const mat4x4 mat_odd = mat4x4(-0.00482399708502382, -0.423201508674231, 0.0348312382605129, 1.67179208266592, + -0.0233402218644408, -0.832829097046478, 0.0193406040499625, 1.21021509068975, + -0.010888537031885, -0.926393772997063, -0.11723394414779, 0.983723301818275, + -0.0308713357806732, -0.937989172670245, -0.218033377677099, 0.845991731322996); + const mat4x4 mat_even = mat4x4(-0.976220278891035, -0.456139260269401, -0.0504335521016742, 0.000838800390651085, + -1.04828341778299, -0.229726640510149, 0.0259608334616091, -0.00133632693205861, + -1.03115268628604, -0.077844420809897, 0.00443408851014257, -0.0103744938457406, + -0.996038443434636, 0.0175438624416783, -0.0361414253243963, -0.00317839994022725); + offsetMoments(b_even_q, b_odd_q, -1.0); + b_odd = mat_odd * b_odd_q; + b_even = mat_even * b_even_q; +} + +/*! This function reconstructs the transmittance at the given depth from four + normalized power moments and the given zeroth moment.*/ +float computeTransmittanceAtDepthFrom4PowerMoments(float b_0, vec2 b_even, vec2 b_odd, float depth, float bias, float overestimation, vec4 bias_vector) +{ + vec4 b = vec4(b_odd.x, b_even.x, b_odd.y, b_even.y); + // Bias input data to avoid artifacts + b = mix(b, bias_vector, bias); + vec3 z; + z[0] = depth; + + // Compute a Cholesky factorization of the Hankel matrix B storing only non- + // trivial entries or related products + float L21D11=fma(-b[0], b[1], b[2]); + float D11=fma(-b[0], b[0], b[1]); + float InvD11=1.0f/D11; + float L21=L21D11*InvD11; + float SquaredDepthVariance=fma(-b[1], b[1], b[3]); + float D22=fma(-L21D11, L21, SquaredDepthVariance); + + // Obtain a scaled inverse image of bz=(1,z[0],z[0]*z[0])^T + vec3 c=vec3(1.0f, z[0], z[0]*z[0]); + // Forward substitution to solve L*c1=bz + c[1]-=b.x; + c[2]-=b.y+L21*c[1]; + // Scaling to solve D*c2=c1 + c[1]*=InvD11; + c[2]/=D22; + // Backward substitution to solve L^T*c3=c2 + c[1]-=L21*c[2]; + c[0]-=dot(c.yz, b.xy); + // Solve the quadratic equation c[0]+c[1]*z+c[2]*z^2 to obtain solutions + // z[1] and z[2] + float InvC2=1.0f/c[2]; + float p=c[1]*InvC2; + float q=c[0]*InvC2; + float D=(p*p*0.25f)-q; + float r=sqrt(D); + z[1]=-p*0.5f-r; + z[2]=-p*0.5f+r; + // Compute the absorbance by summing the appropriate weights + vec3 polynomial; + vec3 weight_factor = vec3(overestimation, (z[1] < z[0])?1.0f:0.0f, (z[2] < z[0])?1.0f:0.0f); + float f0=weight_factor[0]; + float f1=weight_factor[1]; + float f2=weight_factor[2]; + float f01=(f1-f0)/(z[1]-z[0]); + float f12=(f2-f1)/(z[2]-z[1]); + float f012=(f12-f01)/(z[2]-z[0]); + polynomial[0]=f012; + polynomial[1]=polynomial[0]; + polynomial[0]=f01-polynomial[0]*z[1]; + polynomial[2]=polynomial[1]; + polynomial[1]=polynomial[0]-polynomial[1]*z[0]; + polynomial[0]=f0-polynomial[0]*z[0]; + float absorbance = polynomial[0] + dot(b.xy, polynomial.yz);; + // Turn the normalized absorbance into transmittance + return saturate(exp(-b_0 * absorbance)); +} + +/*! This function reconstructs the transmittance at the given depth from six + normalized power moments and the given zeroth moment.*/ +float computeTransmittanceAtDepthFrom6PowerMoments(float b_0, vec3 b_even, vec3 b_odd, float depth, float bias, float overestimation, float bias_vector[6]) +{ + float b[6] = { b_odd.x, b_even.x, b_odd.y, b_even.y, b_odd.z, b_even.z }; + // Bias input data to avoid artifacts + for (int i = 0; i != 6; ++i) { + b[i] = mix(b[i], bias_vector[i], bias); + } + + vec4 z; + z[0] = depth; + + // Compute a Cholesky factorization of the Hankel matrix B storing only non- + // trivial entries or related products + float InvD11 = 1.0f / fma(-b[0], b[0], b[1]); + float L21D11 = fma(-b[0], b[1], b[2]); + float L21 = L21D11*InvD11; + float D22 = fma(-L21D11, L21, fma(-b[1], b[1], b[3])); + float L31D11 = fma(-b[0], b[2], b[3]); + float L31 = L31D11*InvD11; + float InvD22 = 1.0f / D22; + float L32D22 = fma(-L21D11, L31, fma(-b[1], b[2], b[4])); + float L32 = L32D22*InvD22; + float D33 = fma(-b[2], b[2], b[5]) - dot(vec2(L31D11, L32D22), vec2(L31, L32)); + float InvD33 = 1.0f / D33; + + // Construct the polynomial whose roots have to be points of support of the + // canonical distribution: bz=(1,z[0],z[0]*z[0],z[0]*z[0]*z[0])^T + vec4 c; + c[0] = 1.0f; + c[1] = z[0]; + c[2] = c[1] * z[0]; + c[3] = c[2] * z[0]; + // Forward substitution to solve L*c1=bz + c[1] -= b[0]; + c[2] -= fma(L21, c[1], b[1]); + c[3] -= b[2] + dot(vec2(L31, L32), c.yz); + // Scaling to solve D*c2=c1 + c.yzw *= vec3(InvD11, InvD22, InvD33); + // Backward substitution to solve L^T*c3=c2 + c[2] -= L32*c[3]; + c[1] -= dot(vec2(L21, L31), c.zw); + c[0] -= dot(vec3(b[0], b[1], b[2]), c.yzw); + + // Solve the cubic equation + z.yzw = SolveCubic(c); + + // Compute the absorbance by summing the appropriate weights + vec4 weigth_factor; + weigth_factor[0] = overestimation; + weigth_factor.yzw = vec3(greaterThan(z.yzw, z.xxx)); + // Construct an interpolation polynomial + float f0 = weigth_factor[0]; + float f1 = weigth_factor[1]; + float f2 = weigth_factor[2]; + float f3 = weigth_factor[3]; + float f01 = (f1 - f0) / (z[1] - z[0]); + float f12 = (f2 - f1) / (z[2] - z[1]); + float f23 = (f3 - f2) / (z[3] - z[2]); + float f012 = (f12 - f01) / (z[2] - z[0]); + float f123 = (f23 - f12) / (z[3] - z[1]); + float f0123 = (f123 - f012) / (z[3] - z[0]); + vec4 polynomial; + // f012+f0123 *(z-z2) + polynomial[0] = fma(-f0123, z[2], f012); + polynomial[1] = f0123; + // *(z-z1) +f01 + polynomial[2] = polynomial[1]; + polynomial[1] = fma(polynomial[1], -z[1], polynomial[0]); + polynomial[0] = fma(polynomial[0], -z[1], f01); + // *(z-z0) +f0 + polynomial[3] = polynomial[2]; + polynomial[2] = fma(polynomial[2], -z[0], polynomial[1]); + polynomial[1] = fma(polynomial[1], -z[0], polynomial[0]); + polynomial[0] = fma(polynomial[0], -z[0], f0); + float absorbance = dot(polynomial, vec4 (1.0, b[0], b[1], b[2])); + // Turn the normalized absorbance into transmittance + return saturate(exp(-b_0 * absorbance)); +} + +/*! This function reconstructs the transmittance at the given depth from eight + normalized power moments and the given zeroth moment.*/ +float computeTransmittanceAtDepthFrom8PowerMoments(float b_0, vec4 b_even, vec4 b_odd, float depth, float bias, float overestimation, float bias_vector[8]) +{ + float b[8] = { b_odd.x, b_even.x, b_odd.y, b_even.y, b_odd.z, b_even.z, b_odd.w, b_even.w }; + // Bias input data to avoid artifacts + for (int i = 0; i != 8; ++i) { + b[i] = mix(b[i], bias_vector[i], bias); + } + + float z[5]; + z[0] = depth; + + // Compute a Cholesky factorization of the Hankel matrix B storing only non-trivial entries or related products + float D22 = fma(-b[0], b[0], b[1]); + float InvD22 = 1.0 / D22; + float L32D22 = fma(-b[1], b[0], b[2]); + float L32 = L32D22 * InvD22; + float L42D22 = fma(-b[2], b[0], b[3]); + float L42 = L42D22 * InvD22; + float L52D22 = fma(-b[3], b[0], b[4]); + float L52 = L52D22 * InvD22; + + float D33 = fma(-L32, L32D22, fma(-b[1], b[1], b[3])); + float InvD33 = 1.0 / D33; + float L43D33 = fma(-L42, L32D22, fma(-b[2], b[1], b[4])); + float L43 = L43D33 * InvD33; + float L53D33 = fma(-L52, L32D22, fma(-b[3], b[1], b[5])); + float L53 = L53D33 * InvD33; + + float D44 = fma(-b[2], b[2], b[5]) - dot(vec2(L42, L43), vec2(L42D22, L43D33)); + float InvD44 = 1.0 / D44; + float L54D44 = fma(-b[3], b[2], b[6]) - dot(vec2(L52, L53), vec2(L42D22, L43D33)); + float L54 = L54D44 * InvD44; + + float D55 = fma(-b[3], b[3], b[7]) - dot(vec3(L52, L53, L54), vec3(L52D22, L53D33, L54D44)); + float InvD55 = 1.0 / D55; + + // Construct the polynomial whose roots have to be points of support of the + // Canonical distribution: + // bz = (1,z[0],z[0]^2,z[0]^3,z[0]^4)^T + float c[5]; + c[0] = 1.0; + c[1] = z[0]; + c[2] = c[1] * z[0]; + c[3] = c[2] * z[0]; + c[4] = c[3] * z[0]; + + // Forward substitution to solve L*c1 = bz + c[1] -= b[0]; + c[2] -= fma(L32, c[1], b[1]); + c[3] -= b[2] + dot(vec2(L42, L43), vec2(c[1], c[2])); + c[4] -= b[3] + dot(vec3(L52, L53, L54), vec3(c[1], c[2], c[3])); + + // Scaling to solve D*c2 = c1 + //c = c .*[1, InvD22, InvD33, InvD44, InvD55]; + c[1] *= InvD22; + c[2] *= InvD33; + c[3] *= InvD44; + c[4] *= InvD55; + + // Backward substitution to solve L^T*c3 = c2 + c[3] -= L54 * c[4]; + c[2] -= dot(vec2(L53, L43), vec2(c[4], c[3])); + c[1] -= dot(vec3(L52, L42, L32), vec3(c[4], c[3], c[2])); + c[0] -= dot(vec4(b[3], b[2], b[1], b[0]), vec4(c[4], c[3], c[2], c[1])); + + // Solve the quartic equation + vec4 zz = solveQuarticNeumark(c); + z[1] = zz[0]; + z[2] = zz[1]; + z[3] = zz[2]; + z[4] = zz[3]; + + // Compute the absorbance by summing the appropriate weights + vec4 weigth_factor = vec4(lessThanEqual(vec4(z[1], z[2], z[3], z[4]), z[0].xxxx)); + // Construct an interpolation polynomial + float f0 = overestimation; + float f1 = weigth_factor[0]; + float f2 = weigth_factor[1]; + float f3 = weigth_factor[2]; + float f4 = weigth_factor[3]; + float f01 = (f1 - f0) / (z[1] - z[0]); + float f12 = (f2 - f1) / (z[2] - z[1]); + float f23 = (f3 - f2) / (z[3] - z[2]); + float f34 = (f4 - f3) / (z[4] - z[3]); + float f012 = (f12 - f01) / (z[2] - z[0]); + float f123 = (f23 - f12) / (z[3] - z[1]); + float f234 = (f34 - f23) / (z[4] - z[2]); + float f0123 = (f123 - f012) / (z[3] - z[0]); + float f1234 = (f234 - f123) / (z[4] - z[1]); + float f01234 = (f1234 - f0123) / (z[4] - z[0]); + + float Polynomial_0; + vec4 Polynomial; + // f0123 + f01234 * (z - z3) + Polynomial_0 = fma(-f01234, z[3], f0123); + Polynomial[0] = f01234; + // * (z - z2) + f012 + Polynomial[1] = Polynomial[0]; + Polynomial[0] = fma(-Polynomial[0], z[2], Polynomial_0); + Polynomial_0 = fma(-Polynomial_0, z[2], f012); + // * (z - z1) + f01 + Polynomial[2] = Polynomial[1]; + Polynomial[1] = fma(-Polynomial[1], z[1], Polynomial[0]); + Polynomial[0] = fma(-Polynomial[0], z[1], Polynomial_0); + Polynomial_0 = fma(-Polynomial_0, z[1], f01); + // * (z - z0) + f1 + Polynomial[3] = Polynomial[2]; + Polynomial[2] = fma(-Polynomial[2], z[0], Polynomial[1]); + Polynomial[1] = fma(-Polynomial[1], z[0], Polynomial[0]); + Polynomial[0] = fma(-Polynomial[0], z[0], Polynomial_0); + Polynomial_0 = fma(-Polynomial_0, z[0], f0); + float absorbance = Polynomial_0 + dot(Polynomial, vec4(b[0], b[1], b[2], b[3])); + // Turn the normalized absorbance into transmittance + return saturate(exp(-b_0 * absorbance)); +} diff --git a/common/src/backend/resources/assets/flywheel/flywheel/internal/mboit/moment_oit.glsl b/common/src/backend/resources/assets/flywheel/flywheel/internal/mboit/moment_oit.glsl new file mode 100644 index 000000000..f26d5582e --- /dev/null +++ b/common/src/backend/resources/assets/flywheel/flywheel/internal/mboit/moment_oit.glsl @@ -0,0 +1,253 @@ +/*! \file + This header provides the functionality to create the vectors of moments and + to blend surfaces together with an appropriately reconstructed + transmittance. It is needed for both additive passes of moment-based OIT. +*/ + +//cbuffer MomentOIT +//{ +// struct { +// vec4 wrapping_zone_parameters; +// float overestimation; +// float moment_bias; +// }MomentOIT; +//}; + +#include "flywheel:internal/mboit/moment_math.glsl" + +const float moment_bias = 0.25; +const float overestimation = 0.25; +const vec4 wrapping_zone_parameters = vec4(0.); + + +void clip(float a) { + if (a < 0.) { + discard; + } +} + +// jozu: The trigonometric moments and higher order power moments rely on a second render target +// which the java side is not set up to support. Trying to enable them as is will cause compile errors also. +#define NUM_MOMENTS 4 + +#define SINGLE_PRECISION 1 + +#ifdef _FLW_GENERATE_MOMENTS +/*! Generation of moments in case that rasterizer ordered views are used. + This includes the case if moments are stored in 16 bits. */ + +/*! This functions relies on fixed function additive blending to compute the + vector of moments.moment vector. The shader that calls this function must + provide the required render targets.*/ +#if NUM_MOMENTS == 4 +void generateMoments(float depth, float transmittance, vec4 wrapping_zone_parameters, out float b_0, out vec4 b) +#elif NUM_MOMENTS == 6 +#if USE_R_RG_RBBA_FOR_MBOIT6 +void generateMoments(float depth, float transmittance, vec4 wrapping_zone_parameters, out float b_0, out vec2 b_12, out vec4 b_3456) +#else +void generateMoments(float depth, float transmittance, vec4 wrapping_zone_parameters, out float b_0, out vec2 b_12, out vec2 b_34, out vec2 b_56) +#endif +#elif NUM_MOMENTS == 8 +void generateMoments(float depth, float transmittance, vec4 wrapping_zone_parameters, out float b_0, out vec4 b_even, out vec4 b_odd) +#endif +{ + transmittance = max(transmittance, 0.000001); + float absorbance = -log(transmittance); + + b_0 = absorbance; + #if TRIGONOMETRIC + float phase = fma(depth, wrapping_zone_parameters.y, wrapping_zone_parameters.y); + vec2 circle_point = vec2(sin(phas), cos(phase)); + + vec2 circle_point_pow2 = Multiply(circle_point, circle_point); + #if NUM_MOMENTS == 4 + b = vec4(circle_point, circle_point_pow2) * absorbance; + #elif NUM_MOMENTS == 6 + b_12 = circle_point * absorbance; + #if USE_R_RG_RBBA_FOR_MBOIT6 + b_3456 = vec4(circle_point_pow2, Multiply(circle_point, circle_point_pow2)) * absorbance; + #else + b_34 = circle_point_pow2 * absorbance; + b_56 = Multiply(circle_point, circle_point_pow2) * absorbance; + #endif + #elif NUM_MOMENTS == 8 + b_even = vec4(circle_point_pow2, Multiply(circle_point_pow2, circle_point_pow2)) * absorbance; + b_odd = vec4(circle_point, Multiply(circle_point, circle_point_pow2)) * absorbance; + #endif + #else + float depth_pow2 = depth * depth; + float depth_pow4 = depth_pow2 * depth_pow2; + #if NUM_MOMENTS == 4 + b = vec4(depth, depth_pow2, depth_pow2 * depth, depth_pow4) * absorbance; + #elif NUM_MOMENTS == 6 + b_12 = vec2(depth, depth_pow2) * absorbance; + #if USE_R_RG_RBBA_FOR_MBOIT6 + b_3456 = vec4(depth_pow2 * depth, depth_pow4, depth_pow4 * depth, depth_pow4 * depth_pow2) * absorbance; + #else + b_34 = vec2(depth_pow2 * depth, depth_pow4) * absorbance; + b_56 = vec2(depth_pow4 * depth, depth_pow4 * depth_pow2) * absorbance; + #endif + #elif NUM_MOMENTS == 8 + float depth_pow6 = depth_pow4 * depth_pow2; + b_even = vec4(depth_pow2, depth_pow4, depth_pow6, depth_pow6 * depth_pow2) * absorbance; + b_odd = vec4(depth, depth_pow2 * depth, depth_pow4 * depth, depth_pow6 * depth) * absorbance; + #endif + #endif +} + +#else//MOMENT_GENERATION is disabled + +layout (binding = 7) uniform sampler2D _flw_zeroth_moment_sampler; +layout (binding = 8) uniform sampler2D _flw_moments_sampler; +#if USE_R_RG_RBBA_FOR_MBOIT6 +uniform sampler2D extra_moments; +#endif + +/*! This function is to be called from the shader that composites the + transparent fragments. It reads the moments and calls the appropriate + function to reconstruct the transmittance at the specified depth.*/ +void resolveMoments(out float transmittance_at_depth, out float total_transmittance, float depth, vec2 sv_pos) +{ + ivec2 idx0 = ivec2(sv_pos); + ivec2 idx1 = idx0; + + transmittance_at_depth = 1; + total_transmittance = 1; + + float b_0 = texelFetch(_flw_zeroth_moment_sampler, idx0, 0).x; + clip(b_0 - 0.00100050033f); + total_transmittance = exp(-b_0); + + #if NUM_MOMENTS == 4 + #if TRIGONOMETRIC + vec4 b_tmp = texelFetch(_flw_moments_sampler, idx0, 0); + vec2 trig_b[2]; + trig_b[0] = b_tmp.xy; + trig_b[1] = b_tmp.zw; + #if SINGLE_PRECISION + trig_b[0] /= b_0; + trig_b[1] /= b_0; + #else + trig_b[0] = fma(trig_b[0], 2.0, -1.0); + trig_b[1] = fma(trig_b[1], 2.0, -1.0); + #endif + transmittance_at_depth = computeTransmittanceAtDepthFrom2TrigonometricMoments(b_0, trig_b, depth, moment_bias, overestimation, wrapping_zone_parameters); + #else + vec4 b_1234 = texelFetch(_flw_moments_sampler, idx0, 0).xyzw; + #if SINGLE_PRECISION + vec2 b_even = b_1234.yw; + vec2 b_odd = b_1234.xz; + + b_even /= b_0; + b_odd /= b_0; + + const vec4 bias_vector = vec4(0, 0.375, 0, 0.375); + #else + vec2 b_even_q = b_1234.yw; + vec2 b_odd_q = b_1234.xz; + + // Dequantize the moments + vec2 b_even; + vec2 b_odd; + offsetAndDequantizeMoments(b_even, b_odd, b_even_q, b_odd_q); + const vec4 bias_vector = vec4(0, 0.628, 0, 0.628); + #endif + transmittance_at_depth = computeTransmittanceAtDepthFrom4PowerMoments(b_0, b_even, b_odd, depth, moment_bias, overestimation, bias_vector); + #endif + #elif NUM_MOMENTS == 6 + ivec2 idx2 = idx0; + #if TRIGONOMETRIC + vec2 trig_b[3]; + trig_b[0] = texelFetch(_flw_moments_sampler, idx0, 0).xy; + #if USE_R_RG_RBBA_FOR_MBOIT6 + vec4 tmp = texelFetch(extra_moments, idx0, 0); + trig_b[1] = tmp.xy; + trig_b[2] = tmp.zw; + #else + trig_b[1] = texelFetch(_flw_moments_sampler, idx1, 0).xy; + trig_b[2] = texelFetch(_flw_moments_sampler, idx2, 0).xy; + #endif + #if SINGLE_PRECISION + trig_b[0] /= b_0; + trig_b[1] /= b_0; + trig_b[2] /= b_0; + #else + trig_b[0] = fma(trig_b[0], 2.0, -1.0); + trig_b[1] = fma(trig_b[1], 2.0, -1.0); + trig_b[2] = fma(trig_b[2], 2.0, -1.0); + #endif + transmittance_at_depth = computeTransmittanceAtDepthFrom3TrigonometricMoments(b_0, trig_b, depth, moment_bias, overestimation, wrapping_zone_parameters); + #else + vec2 b_12 = texelFetch(_flw_moments_sampler, idx0, 0).xy; + #if USE_R_RG_RBBA_FOR_MBOIT6 + vec4 tmp = texelFetch(extra_moments, idx0, 0); + vec2 b_34 = tmp.xy; + vec2 b_56 = tmp.zw; + #else + vec2 b_34 = texelFetch(_flw_moments_sampler, idx1, 0).xy; + vec2 b_56 = texelFetch(_flw_moments_sampler, idx2, 0).xy; + #endif + #if SINGLE_PRECISION + vec3 b_even = vec3(b_12.y, b_34.y, b_56.y); + vec3 b_odd = vec3(b_12.x, b_34.x, b_56.x); + + b_even /= b_0; + b_odd /= b_0; + + const float bias_vector[6] = { 0, 0.48, 0, 0.451, 0, 0.45 }; + #else + vec3 b_even_q = vec3(b_12.y, b_34.y, b_56.y); + vec3 b_odd_q = vec3(b_12.x, b_34.x, b_56.x); + // Dequantize b_0 and the other moments + vec3 b_even; + vec3 b_odd; + offsetAndDequantizeMoments(b_even, b_odd, b_even_q, b_odd_q); + + const float bias_vector[6] = { 0, 0.5566, 0, 0.489, 0, 0.47869382 }; + #endif + transmittance_at_depth = computeTransmittanceAtDepthFrom6PowerMoments(b_0, b_even, b_odd, depth, moment_bias, overestimation, bias_vector); + #endif + #elif NUM_MOMENTS == 8 + #if TRIGONOMETRIC + vec4 b_tmp = texelFetch(_flw_moments_sampler, idx0, 0); + vec4 b_tmp2 = texelFetch(_flw_moments_sampler, idx1, 0); + #if SINGLE_PRECISION + vec2 trig_b[4] = { + b_tmp2.xy / b_0, + b_tmp.xy / b_0, + b_tmp2.zw / b_0, + b_tmp.zw / b_0 + }; + #else + vec2 trig_b[4] = { + fma(b_tmp2.xy, 2.0, -1.0), + fma(b_tmp.xy, 2.0, -1.0), + fma(b_tmp2.zw, 2.0, -1.0), + fma(b_tmp.zw, 2.0, -1.0) + }; + #endif + transmittance_at_depth = computeTransmittanceAtDepthFrom4TrigonometricMoments(b_0, trig_b, depth, moment_bias, overestimation, wrapping_zone_parameters); + #else + #if SINGLE_PRECISION + vec4 b_even = texelFetch(_flw_moments_sampler, idx0, 0); + vec4 b_odd = texelFetch(_flw_moments_sampler, idx1, 0); + + b_even /= b_0; + b_odd /= b_0; + const float bias_vector[8] = { 0, 0.75, 0, 0.67666666666666664, 0, 0.63, 0, 0.60030303030303034 }; + #else + vec4 b_even_q = texelFetch(_flw_moments_sampler, idx0, 0); + vec4 b_odd_q = texelFetch(_flw_moments_sampler, idx1, 0); + + // Dequantize the moments + vec4 b_even; + vec4 b_odd; + offsetAndDequantizeMoments(b_even, b_odd, b_even_q, b_odd_q); + const float bias_vector[8] = { 0, 0.42474916387959866, 0, 0.22407802675585284, 0, 0.15369230769230768, 0, 0.12900440529089119 }; + #endif + transmittance_at_depth = computeTransmittanceAtDepthFrom8PowerMoments(b_0, b_even, b_odd, depth, moment_bias, overestimation, bias_vector); + #endif + #endif + +} +#endif diff --git a/common/src/backend/resources/assets/flywheel/flywheel/internal/mboit/trigonometric_moment_math.glsl b/common/src/backend/resources/assets/flywheel/flywheel/internal/mboit/trigonometric_moment_math.glsl new file mode 100644 index 000000000..4fc142d8f --- /dev/null +++ b/common/src/backend/resources/assets/flywheel/flywheel/internal/mboit/trigonometric_moment_math.glsl @@ -0,0 +1,311 @@ +/*! \file + This header provides the utility functions to reconstruct the transmittance + from a given vector of trigonometric moments (2, 3 or 4 trigonometric + moments) at a specified depth.*/ +#include "flywheel:internal/mboit/complex_algebra.glsl" + +/*! This utility function turns a point on the unit circle into a scalar + parameter. It is guaranteed to grow monotonically for (cos(phi),sin(phi)) + with phi in 0 to 2*pi. There are no other guarantees. In particular it is + not an arclength parametrization. If you change this function, you must + also change circleToParameter() in MomentOIT.cpp.*/ +float circleToParameter(vec2 circle_point){ + float result=abs(circle_point.y)-abs(circle_point.x); + result=(circle_point.x<0.0f)?(2.0f-result):result; + return (circle_point.y<0.0f)?(6.0f-result):result; +} + +/*! This utility function returns the appropriate weight factor for a root at + the given location. Both inputs are supposed to be unit vectors. If a + circular arc going counter clockwise from (1.0,0.0) meets root first, it + returns 1.0, otherwise 0.0 or a linear ramp in the wrapping zone.*/ +float getRootWeightFactor(float reference_parameter, float root_parameter, vec4 wrapping_zone_parameters){ + float binary_weight_factor=(root_parameter Date: Mon, 17 Feb 2025 20:12:17 -0800 Subject: [PATCH 03/12] 8 moments - Use 8 power moments - Fix compile errors with trig moments --- .../flywheel/backend/Samplers.java | 3 +- .../engine/indirect/MboitFramebuffer.java | 73 +++++++++++++++---- .../flywheel/flywheel/internal/common.frag | 5 +- .../internal/mboit/complex_algebra.glsl | 16 +++- .../flywheel/internal/mboit/moment_math.glsl | 12 +-- .../flywheel/internal/mboit/moment_oit.glsl | 48 ++++++------ .../mboit/trigonometric_moment_math.glsl | 8 +- .../vanillin/visuals/ShulkerBoxVisual.java | 7 -- 8 files changed, 104 insertions(+), 68 deletions(-) diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/Samplers.java b/common/src/backend/java/dev/engine_room/flywheel/backend/Samplers.java index b6e40e291..e89acccbb 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/Samplers.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/Samplers.java @@ -12,5 +12,6 @@ public class Samplers { public static final GlTextureUnit LIGHT_SECTIONS = GlTextureUnit.T6; public static final GlTextureUnit ZEROTH_MOMENT = GlTextureUnit.T7; - public static final GlTextureUnit MOMENTS = GlTextureUnit.T8; + public static final GlTextureUnit MOMENTS0 = GlTextureUnit.T8; + public static final GlTextureUnit MOMENTS1 = GlTextureUnit.T9; } diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/MboitFramebuffer.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/MboitFramebuffer.java index 06d283f20..747020e73 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/MboitFramebuffer.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/MboitFramebuffer.java @@ -10,6 +10,7 @@ import dev.engine_room.flywheel.backend.Samplers; import dev.engine_room.flywheel.backend.compile.IndirectPrograms; import dev.engine_room.flywheel.backend.gl.GlTextureUnit; import net.minecraft.client.Minecraft; +import net.minecraft.util.Mth; public class MboitFramebuffer { @@ -18,7 +19,8 @@ public class MboitFramebuffer { private final int vao; public int zerothMoment; - public int moments; + public int moments0; + public int moments1; public int accumulate; private int lastWidth = -1; @@ -44,10 +46,11 @@ public class MboitFramebuffer { GL46.glNamedFramebufferTexture(fbo, GL46.GL_DEPTH_ATTACHMENT, mainRenderTarget.getDepthTextureId(), 0); - GL46.glNamedFramebufferDrawBuffers(fbo, new int[]{GL46.GL_COLOR_ATTACHMENT0, GL46.GL_COLOR_ATTACHMENT1}); + GL46.glNamedFramebufferDrawBuffers(fbo, new int[]{GL46.GL_COLOR_ATTACHMENT0, GL46.GL_COLOR_ATTACHMENT1, GL46.GL_COLOR_ATTACHMENT2}); GL46.glClearNamedFramebufferfv(fbo, GL46.GL_COLOR, 0, new float[]{0, 0, 0, 0}); GL46.glClearNamedFramebufferfv(fbo, GL46.GL_COLOR, 1, new float[]{0, 0, 0, 0}); + GL46.glClearNamedFramebufferfv(fbo, GL46.GL_COLOR, 2, new float[]{0, 0, 0, 0}); GlStateManager._glBindFramebuffer(GL46.GL_FRAMEBUFFER, fbo); } @@ -62,10 +65,13 @@ public class MboitFramebuffer { Samplers.ZEROTH_MOMENT.makeActive(); GlStateManager._bindTexture(zerothMoment); - Samplers.MOMENTS.makeActive(); - GlStateManager._bindTexture(moments); + Samplers.MOMENTS0.makeActive(); + GlStateManager._bindTexture(moments0); - GL46.glNamedFramebufferDrawBuffers(fbo, new int[]{GL46.GL_COLOR_ATTACHMENT2}); + Samplers.MOMENTS1.makeActive(); + GlStateManager._bindTexture(moments1); + + GL46.glNamedFramebufferDrawBuffers(fbo, new int[]{GL46.GL_COLOR_ATTACHMENT3}); GL46.glClearNamedFramebufferfv(fbo, GL46.GL_COLOR, 0, new float[]{0, 0, 0, 0}); @@ -100,13 +106,18 @@ public class MboitFramebuffer { } public void delete() { - GL46.glDeleteTextures(zerothMoment); - GL46.glDeleteTextures(moments); - GL46.glDeleteTextures(accumulate); + deleteTextures(); GL46.glDeleteFramebuffers(fbo); GL46.glDeleteVertexArrays(vao); } + private void deleteTextures() { + GL46.glDeleteTextures(zerothMoment); + GL46.glDeleteTextures(moments0); + GL46.glDeleteTextures(moments1); + GL46.glDeleteTextures(accumulate); + } + private void createTextures(int width, int height) { if (lastWidth == width && lastHeight == height) { return; @@ -115,16 +126,17 @@ public class MboitFramebuffer { lastWidth = width; lastHeight = height; - GL46.glDeleteTextures(zerothMoment); - GL46.glDeleteTextures(moments); - GL46.glDeleteTextures(accumulate); + deleteTextures(); zerothMoment = GL46.glCreateTextures(GL46.GL_TEXTURE_2D); - moments = GL46.glCreateTextures(GL46.GL_TEXTURE_2D); + moments0 = GL46.glCreateTextures(GL46.GL_TEXTURE_2D); + moments1 = GL46.glCreateTextures(GL46.GL_TEXTURE_2D); accumulate = GL46.glCreateTextures(GL46.GL_TEXTURE_2D); GL46.glTextureStorage2D(zerothMoment, 1, GL32.GL_R16F, width, height); - GL46.glTextureStorage2D(moments, 1, GL32.GL_RGBA16F, width, height); + GL46.glTextureStorage2D(moments0, 1, GL32.GL_RGBA16F, width, height); + GL46.glTextureStorage2D(moments1, 1, GL32.GL_RGBA16F, width, height); + GL46.glTextureStorage2D(accumulate, 1, GL32.GL_RGBA16F, width, height); // for (int tex : new int[]{zerothMoment, moments, composite}) { @@ -136,7 +148,38 @@ public class MboitFramebuffer { // } GL46.glNamedFramebufferTexture(fbo, GL46.GL_COLOR_ATTACHMENT0, zerothMoment, 0); - GL46.glNamedFramebufferTexture(fbo, GL46.GL_COLOR_ATTACHMENT1, moments, 0); - GL46.glNamedFramebufferTexture(fbo, GL46.GL_COLOR_ATTACHMENT2, accumulate, 0); + GL46.glNamedFramebufferTexture(fbo, GL46.GL_COLOR_ATTACHMENT1, moments0, 0); + GL46.glNamedFramebufferTexture(fbo, GL46.GL_COLOR_ATTACHMENT2, moments1, 0); + GL46.glNamedFramebufferTexture(fbo, GL46.GL_COLOR_ATTACHMENT3, accumulate, 0); + } + + float circleToParameter(float angle) { + float x = Mth.cos(angle); + float y = Mth.sin(angle); + float result = Mth.abs(y) - Mth.abs(x); + result = (x < 0.0f) ? (2.0f - result) : result; + result = (y < 0.0f) ? (6.0f - result) : result; + result += (angle >= 2.0f * Mth.PI) ? 8.0f : 0.0f; + return result; + } + + void computeWrappingZoneParameters(float[] out) { + computeWrappingZoneParameters(out, 0.1f * Mth.PI); + } + + /*! Given an angle in radians providing the size of the wrapping zone, this + function computes all constants required by the shader.*/ + void computeWrappingZoneParameters(float[] p_out_wrapping_zone_parameters, float new_wrapping_zone_angle) { + p_out_wrapping_zone_parameters[0] = new_wrapping_zone_angle; + p_out_wrapping_zone_parameters[1] = Mth.PI - 0.5f * new_wrapping_zone_angle; + if (new_wrapping_zone_angle <= 0.0f) { + p_out_wrapping_zone_parameters[2] = 0.0f; + p_out_wrapping_zone_parameters[3] = 0.0f; + } else { + float zone_end_parameter = 7; + float zone_begin_parameter = circleToParameter(2.0f * Mth.PI - new_wrapping_zone_angle); + p_out_wrapping_zone_parameters[2] = 1.0f / (zone_end_parameter - zone_begin_parameter); + p_out_wrapping_zone_parameters[3] = 1.0f - zone_end_parameter * p_out_wrapping_zone_parameters[2]; + } } } diff --git a/common/src/backend/resources/assets/flywheel/flywheel/internal/common.frag b/common/src/backend/resources/assets/flywheel/flywheel/internal/common.frag index afd8ae647..3cb61f8a5 100644 --- a/common/src/backend/resources/assets/flywheel/flywheel/internal/common.frag +++ b/common/src/backend/resources/assets/flywheel/flywheel/internal/common.frag @@ -21,7 +21,8 @@ flat in uvec2 _flw_ids; #ifdef _FLW_OIT #ifdef _FLW_GENERATE_MOMENTS layout (location = 0) out float _flw_zerothMoment_out; -layout (location = 1) out vec4 _flw_moments_out; +layout (location = 1) out vec4 _flw_moments0_out; +layout (location = 2) out vec4 _flw_moments1_out; #endif #ifdef _FLW_RESOLVE_MOMENTS layout (location = 0) out vec4 _flw_accumulate_out; @@ -133,7 +134,7 @@ void _flw_main() { #ifdef _FLW_GENERATE_MOMENTS - generateMoments(depth, 1 - color.a, vec4(0), _flw_zerothMoment_out, _flw_moments_out); + generateMoments(depth, 1 - color.a, _flw_zerothMoment_out, _flw_moments0_out, _flw_moments1_out); #endif #ifdef _FLW_RESOLVE_MOMENTS diff --git a/common/src/backend/resources/assets/flywheel/flywheel/internal/mboit/complex_algebra.glsl b/common/src/backend/resources/assets/flywheel/flywheel/internal/mboit/complex_algebra.glsl index b2cf49485..30848e3f7 100644 --- a/common/src/backend/resources/assets/flywheel/flywheel/internal/mboit/complex_algebra.glsl +++ b/common/src/backend/resources/assets/flywheel/flywheel/internal/mboit/complex_algebra.glsl @@ -2,6 +2,16 @@ This header defines utility functions to deal with complex numbers and complex polynomials.*/ +void sincos(float theta, out float s, out float c) { + s = sin(theta); + c = cos(theta); +} + +float saturate(float a) { + return clamp(a, 0., 1.); +} + + /*! Returns the complex conjugate of the given complex number (i.e. it changes the sign of the y-component).*/ vec2 Conjugate(vec2 Z){ @@ -45,11 +55,11 @@ vec2 Cube(vec2 Z){ \sa SquareRoot() */ vec2 SquareRootUnsafe(vec2 Z){ float ZLengthSq=dot(Z, Z); - float ZLengthInv=rsqrt(ZLengthSq); + float ZLengthInv=inversesqrt(ZLengthSq); vec2 UnnormalizedRoot=Z*ZLengthInv+vec2(1.0f, 0.0f); float UnnormalizedRootLengthSq=dot(UnnormalizedRoot, UnnormalizedRoot); float NormalizationFactorInvSq=UnnormalizedRootLengthSq*ZLengthInv; - float NormalizationFactor=rsqrt(NormalizationFactorInvSq); + float NormalizationFactor=inversesqrt(NormalizationFactorInvSq); return NormalizationFactor*UnnormalizedRoot; } /*! This utility function computes one square root of the given complex value. @@ -65,7 +75,7 @@ vec2 SquareRoot(vec2 Z){ other roots can be found by multiplication by cubic roots of unity. \note This function has various discontinuities.*/ vec2 CubicRoot(vec2 Z){ - float Argument=atan2(Z.y, Z.x); + float Argument=atan(Z.y, Z.x); float NewArgument=Argument/3.0f; vec2 NormalizedRoot; sincos(NewArgument, NormalizedRoot.y, NormalizedRoot.x); diff --git a/common/src/backend/resources/assets/flywheel/flywheel/internal/mboit/moment_math.glsl b/common/src/backend/resources/assets/flywheel/flywheel/internal/mboit/moment_math.glsl index 0c2d23b72..4f730eada 100644 --- a/common/src/backend/resources/assets/flywheel/flywheel/internal/mboit/moment_math.glsl +++ b/common/src/backend/resources/assets/flywheel/flywheel/internal/mboit/moment_math.glsl @@ -5,17 +5,7 @@ roots of polynomials up to degree four are defined. */ -//#include "flywheel:internal/mboit/trigonometric_moment_math.glsl" - -void sincos(float theta, out float s, out float c) { - s = sin(theta); - c = cos(theta); -} - -float saturate(float a) { - return clamp(a, 0., 1.); -} - +#include "flywheel:internal/mboit/trigonometric_moment_math.glsl" /*! Given coefficients of a quadratic polynomial A*x^2+B*x+C, this function diff --git a/common/src/backend/resources/assets/flywheel/flywheel/internal/mboit/moment_oit.glsl b/common/src/backend/resources/assets/flywheel/flywheel/internal/mboit/moment_oit.glsl index f26d5582e..23ef4ec03 100644 --- a/common/src/backend/resources/assets/flywheel/flywheel/internal/mboit/moment_oit.glsl +++ b/common/src/backend/resources/assets/flywheel/flywheel/internal/mboit/moment_oit.glsl @@ -17,7 +17,7 @@ const float moment_bias = 0.25; const float overestimation = 0.25; -const vec4 wrapping_zone_parameters = vec4(0.); +const vec4 wrapping_zone_parameters = vec4(0.31415927, 2.984513, 2.7934167, -18.553917); void clip(float a) { @@ -28,7 +28,7 @@ void clip(float a) { // jozu: The trigonometric moments and higher order power moments rely on a second render target // which the java side is not set up to support. Trying to enable them as is will cause compile errors also. -#define NUM_MOMENTS 4 +#define NUM_MOMENTS 8 #define SINGLE_PRECISION 1 @@ -40,15 +40,15 @@ void clip(float a) { vector of moments.moment vector. The shader that calls this function must provide the required render targets.*/ #if NUM_MOMENTS == 4 -void generateMoments(float depth, float transmittance, vec4 wrapping_zone_parameters, out float b_0, out vec4 b) +void generateMoments(float depth, float transmittance, out float b_0, out vec4 b) #elif NUM_MOMENTS == 6 #if USE_R_RG_RBBA_FOR_MBOIT6 -void generateMoments(float depth, float transmittance, vec4 wrapping_zone_parameters, out float b_0, out vec2 b_12, out vec4 b_3456) +void generateMoments(float depth, float transmittance, out float b_0, out vec2 b_12, out vec4 b_3456) #else -void generateMoments(float depth, float transmittance, vec4 wrapping_zone_parameters, out float b_0, out vec2 b_12, out vec2 b_34, out vec2 b_56) +void generateMoments(float depth, float transmittance, out float b_0, out vec2 b_12, out vec2 b_34, out vec2 b_56) #endif #elif NUM_MOMENTS == 8 -void generateMoments(float depth, float transmittance, vec4 wrapping_zone_parameters, out float b_0, out vec4 b_even, out vec4 b_odd) +void generateMoments(float depth, float transmittance, out float b_0, out vec4 b_even, out vec4 b_odd) #endif { transmittance = max(transmittance, 0.000001); @@ -57,7 +57,7 @@ void generateMoments(float depth, float transmittance, vec4 wrapping_zone_parame b_0 = absorbance; #if TRIGONOMETRIC float phase = fma(depth, wrapping_zone_parameters.y, wrapping_zone_parameters.y); - vec2 circle_point = vec2(sin(phas), cos(phase)); + vec2 circle_point = vec2(sin(phase), cos(phase)); vec2 circle_point_pow2 = Multiply(circle_point, circle_point); #if NUM_MOMENTS == 4 @@ -98,10 +98,8 @@ void generateMoments(float depth, float transmittance, vec4 wrapping_zone_parame #else//MOMENT_GENERATION is disabled layout (binding = 7) uniform sampler2D _flw_zeroth_moment_sampler; -layout (binding = 8) uniform sampler2D _flw_moments_sampler; -#if USE_R_RG_RBBA_FOR_MBOIT6 -uniform sampler2D extra_moments; -#endif +layout (binding = 8) uniform sampler2D _flw_moments0_sampler; +layout (binding = 9) uniform sampler2D _flw_moments1_sampler; /*! This function is to be called from the shader that composites the transparent fragments. It reads the moments and calls the appropriate @@ -120,7 +118,7 @@ void resolveMoments(out float transmittance_at_depth, out float total_transmitta #if NUM_MOMENTS == 4 #if TRIGONOMETRIC - vec4 b_tmp = texelFetch(_flw_moments_sampler, idx0, 0); + vec4 b_tmp = texelFetch(_flw_moments0_sampler, idx0, 0); vec2 trig_b[2]; trig_b[0] = b_tmp.xy; trig_b[1] = b_tmp.zw; @@ -133,7 +131,7 @@ void resolveMoments(out float transmittance_at_depth, out float total_transmitta #endif transmittance_at_depth = computeTransmittanceAtDepthFrom2TrigonometricMoments(b_0, trig_b, depth, moment_bias, overestimation, wrapping_zone_parameters); #else - vec4 b_1234 = texelFetch(_flw_moments_sampler, idx0, 0).xyzw; + vec4 b_1234 = texelFetch(_flw_moments0_sampler, idx0, 0).xyzw; #if SINGLE_PRECISION vec2 b_even = b_1234.yw; vec2 b_odd = b_1234.xz; @@ -158,14 +156,14 @@ void resolveMoments(out float transmittance_at_depth, out float total_transmitta ivec2 idx2 = idx0; #if TRIGONOMETRIC vec2 trig_b[3]; - trig_b[0] = texelFetch(_flw_moments_sampler, idx0, 0).xy; + trig_b[0] = texelFetch(_flw_moments0_sampler, idx0, 0).xy; #if USE_R_RG_RBBA_FOR_MBOIT6 vec4 tmp = texelFetch(extra_moments, idx0, 0); trig_b[1] = tmp.xy; trig_b[2] = tmp.zw; #else - trig_b[1] = texelFetch(_flw_moments_sampler, idx1, 0).xy; - trig_b[2] = texelFetch(_flw_moments_sampler, idx2, 0).xy; + trig_b[1] = texelFetch(_flw_moments1_sampler, idx1, 0).xy; + trig_b[2] = texelFetch(_flw_moments0_sampler, idx2, 0).xy; #endif #if SINGLE_PRECISION trig_b[0] /= b_0; @@ -178,14 +176,14 @@ void resolveMoments(out float transmittance_at_depth, out float total_transmitta #endif transmittance_at_depth = computeTransmittanceAtDepthFrom3TrigonometricMoments(b_0, trig_b, depth, moment_bias, overestimation, wrapping_zone_parameters); #else - vec2 b_12 = texelFetch(_flw_moments_sampler, idx0, 0).xy; + vec2 b_12 = texelFetch(_flw_moments0_sampler, idx0, 0).xy; #if USE_R_RG_RBBA_FOR_MBOIT6 vec4 tmp = texelFetch(extra_moments, idx0, 0); vec2 b_34 = tmp.xy; vec2 b_56 = tmp.zw; #else - vec2 b_34 = texelFetch(_flw_moments_sampler, idx1, 0).xy; - vec2 b_56 = texelFetch(_flw_moments_sampler, idx2, 0).xy; + vec2 b_34 = texelFetch(_flw_moments1_sampler, idx1, 0).xy; + vec2 b_56 = texelFetch(_flw_moments0_sampler, idx2, 0).xy; #endif #if SINGLE_PRECISION vec3 b_even = vec3(b_12.y, b_34.y, b_56.y); @@ -209,8 +207,8 @@ void resolveMoments(out float transmittance_at_depth, out float total_transmitta #endif #elif NUM_MOMENTS == 8 #if TRIGONOMETRIC - vec4 b_tmp = texelFetch(_flw_moments_sampler, idx0, 0); - vec4 b_tmp2 = texelFetch(_flw_moments_sampler, idx1, 0); + vec4 b_tmp = texelFetch(_flw_moments0_sampler, idx0, 0); + vec4 b_tmp2 = texelFetch(_flw_moments1_sampler, idx1, 0); #if SINGLE_PRECISION vec2 trig_b[4] = { b_tmp2.xy / b_0, @@ -229,15 +227,15 @@ void resolveMoments(out float transmittance_at_depth, out float total_transmitta transmittance_at_depth = computeTransmittanceAtDepthFrom4TrigonometricMoments(b_0, trig_b, depth, moment_bias, overestimation, wrapping_zone_parameters); #else #if SINGLE_PRECISION - vec4 b_even = texelFetch(_flw_moments_sampler, idx0, 0); - vec4 b_odd = texelFetch(_flw_moments_sampler, idx1, 0); + vec4 b_even = texelFetch(_flw_moments0_sampler, idx0, 0); + vec4 b_odd = texelFetch(_flw_moments1_sampler, idx1, 0); b_even /= b_0; b_odd /= b_0; const float bias_vector[8] = { 0, 0.75, 0, 0.67666666666666664, 0, 0.63, 0, 0.60030303030303034 }; #else - vec4 b_even_q = texelFetch(_flw_moments_sampler, idx0, 0); - vec4 b_odd_q = texelFetch(_flw_moments_sampler, idx1, 0); + vec4 b_even_q = texelFetch(_flw_moments0_sampler, idx0, 0); + vec4 b_odd_q = texelFetch(_flw_moments1_sampler, idx1, 0); // Dequantize the moments vec4 b_even; diff --git a/common/src/backend/resources/assets/flywheel/flywheel/internal/mboit/trigonometric_moment_math.glsl b/common/src/backend/resources/assets/flywheel/flywheel/internal/mboit/trigonometric_moment_math.glsl index 4fc142d8f..30fe6a65e 100644 --- a/common/src/backend/resources/assets/flywheel/flywheel/internal/mboit/trigonometric_moment_math.glsl +++ b/common/src/backend/resources/assets/flywheel/flywheel/internal/mboit/trigonometric_moment_math.glsl @@ -21,7 +21,7 @@ float circleToParameter(vec2 circle_point){ returns 1.0, otherwise 0.0 or a linear ramp in the wrapping zone.*/ float getRootWeightFactor(float reference_parameter, float root_parameter, vec4 wrapping_zone_parameters){ float binary_weight_factor=(root_parameter Date: Mon, 17 Feb 2025 23:16:39 -0800 Subject: [PATCH 04/12] Wave goodbye to order - Implement wavelet OIT - Needed to do the same normalization step as in MBOIT but that's not described in the blog post I followed - Use an expensive pseudo blue noise function to slightly correct banding artifacts --- .../flywheel/backend/Samplers.java | 5 +- .../backend/compile/PipelineCompiler.java | 5 +- .../engine/indirect/IndirectDrawManager.java | 22 +- ...itFramebuffer.java => OitFramebuffer.java} | 133 +++-- .../flywheel/flywheel/internal/common.frag | 268 ++++++++-- .../internal/indirect/oit_composite.frag | 79 ++- .../internal/mboit/complex_algebra.glsl | 213 -------- .../flywheel/internal/mboit/moment_math.glsl | 490 ------------------ .../flywheel/internal/mboit/moment_oit.glsl | 251 --------- .../mboit/trigonometric_moment_math.glsl | 311 ----------- 10 files changed, 386 insertions(+), 1391 deletions(-) rename common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/{MboitFramebuffer.java => OitFramebuffer.java} (56%) delete mode 100644 common/src/backend/resources/assets/flywheel/flywheel/internal/mboit/complex_algebra.glsl delete mode 100644 common/src/backend/resources/assets/flywheel/flywheel/internal/mboit/moment_math.glsl delete mode 100644 common/src/backend/resources/assets/flywheel/flywheel/internal/mboit/moment_oit.glsl delete mode 100644 common/src/backend/resources/assets/flywheel/flywheel/internal/mboit/trigonometric_moment_math.glsl diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/Samplers.java b/common/src/backend/java/dev/engine_room/flywheel/backend/Samplers.java index e89acccbb..272314894 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/Samplers.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/Samplers.java @@ -11,7 +11,6 @@ public class Samplers { public static final GlTextureUnit LIGHT_LUT = GlTextureUnit.T5; public static final GlTextureUnit LIGHT_SECTIONS = GlTextureUnit.T6; - public static final GlTextureUnit ZEROTH_MOMENT = GlTextureUnit.T7; - public static final GlTextureUnit MOMENTS0 = GlTextureUnit.T8; - public static final GlTextureUnit MOMENTS1 = GlTextureUnit.T9; + public static final GlTextureUnit DEPTH_RANGE = GlTextureUnit.T7; + public static final GlTextureUnit COEFFICIENTS = GlTextureUnit.T8; } diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/compile/PipelineCompiler.java b/common/src/backend/java/dev/engine_room/flywheel/backend/compile/PipelineCompiler.java index f33486315..388563f49 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/compile/PipelineCompiler.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/compile/PipelineCompiler.java @@ -230,8 +230,9 @@ public final class PipelineCompiler { public enum OitMode { OFF("", ""), - GENERATE("_FLW_GENERATE_MOMENTS", "_generate"), - RESOLVE("_FLW_RESOLVE_MOMENTS", "_resolve"), + DEPTH_RANGE("_FLW_DEPTH_RANGE", "_depth_range"), + GENERATE_COEFFICIENTS("_FLW_COLLECT_COEFFS", "_generate_coefficients"), + EVALUATE("_FLW_EVALUATE", "_resolve"), ; public final String define; diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectDrawManager.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectDrawManager.java index afc62d5af..09b5b3201 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectDrawManager.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectDrawManager.java @@ -49,7 +49,7 @@ public class IndirectDrawManager extends DrawManager> { private final DepthPyramid depthPyramid; - private final MboitFramebuffer wboitFrameBuffer; + private final OitFramebuffer wboitFrameBuffer; public IndirectDrawManager(IndirectPrograms programs) { this.programs = programs; @@ -66,7 +66,7 @@ public class IndirectDrawManager extends DrawManager> { depthPyramid = new DepthPyramid(programs); - wboitFrameBuffer = new MboitFramebuffer(programs); + wboitFrameBuffer = new OitFramebuffer(programs); } @Override @@ -146,16 +146,26 @@ public class IndirectDrawManager extends DrawManager> { group.submitSolid(); } - wboitFrameBuffer.generateMoments(); + wboitFrameBuffer.depthRange(); for (var group : cullingGroups.values()) { - group.submitTransparent(PipelineCompiler.OitMode.GENERATE); + group.submitTransparent(PipelineCompiler.OitMode.DEPTH_RANGE); } - wboitFrameBuffer.resolveMoments(); + wboitFrameBuffer.renderTransmittance(); for (var group : cullingGroups.values()) { - group.submitTransparent(PipelineCompiler.OitMode.RESOLVE); + group.submitTransparent(PipelineCompiler.OitMode.GENERATE_COEFFICIENTS); + } + + // wboitFrameBuffer.adjustBackgroundForTotalTransmittance(); + + // vertexArray.bindForDraw(); + + wboitFrameBuffer.shade(); + + for (var group : cullingGroups.values()) { + group.submitTransparent(PipelineCompiler.OitMode.EVALUATE); } wboitFrameBuffer.composite(); diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/MboitFramebuffer.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/OitFramebuffer.java similarity index 56% rename from common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/MboitFramebuffer.java rename to common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/OitFramebuffer.java index 747020e73..3d50fb76e 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/MboitFramebuffer.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/OitFramebuffer.java @@ -10,29 +10,27 @@ import dev.engine_room.flywheel.backend.Samplers; import dev.engine_room.flywheel.backend.compile.IndirectPrograms; import dev.engine_room.flywheel.backend.gl.GlTextureUnit; import net.minecraft.client.Minecraft; -import net.minecraft.util.Mth; -public class MboitFramebuffer { +public class OitFramebuffer { public final int fbo; private final IndirectPrograms programs; private final int vao; - public int zerothMoment; - public int moments0; - public int moments1; + public int depthBounds; + public int coefficients; public int accumulate; private int lastWidth = -1; private int lastHeight = -1; - public MboitFramebuffer(IndirectPrograms programs) { + public OitFramebuffer(IndirectPrograms programs) { this.programs = programs; fbo = GL46.glCreateFramebuffers(); vao = GL46.glCreateVertexArrays(); } - public void generateMoments() { + public void depthRange() { var mainRenderTarget = Minecraft.getInstance() .getMainRenderTarget(); @@ -42,36 +40,55 @@ public class MboitFramebuffer { RenderSystem.depthMask(false); RenderSystem.enableBlend(); RenderSystem.blendFunc(GlStateManager.SourceFactor.ONE, GlStateManager.DestFactor.ONE); - RenderSystem.blendEquation(GL46.GL_FUNC_ADD); + RenderSystem.blendEquation(GL46.GL_MAX); GL46.glNamedFramebufferTexture(fbo, GL46.GL_DEPTH_ATTACHMENT, mainRenderTarget.getDepthTextureId(), 0); - GL46.glNamedFramebufferDrawBuffers(fbo, new int[]{GL46.GL_COLOR_ATTACHMENT0, GL46.GL_COLOR_ATTACHMENT1, GL46.GL_COLOR_ATTACHMENT2}); + GL46.glNamedFramebufferDrawBuffers(fbo, new int[]{GL46.GL_COLOR_ATTACHMENT0}); - GL46.glClearNamedFramebufferfv(fbo, GL46.GL_COLOR, 0, new float[]{0, 0, 0, 0}); - GL46.glClearNamedFramebufferfv(fbo, GL46.GL_COLOR, 1, new float[]{0, 0, 0, 0}); - GL46.glClearNamedFramebufferfv(fbo, GL46.GL_COLOR, 2, new float[]{0, 0, 0, 0}); + var far = Minecraft.getInstance().gameRenderer.getDepthFar(); + + GL46.glClearNamedFramebufferfv(fbo, GL46.GL_COLOR, 0, new float[]{-far, -far, 0, 0}); GlStateManager._glBindFramebuffer(GL46.GL_FRAMEBUFFER, fbo); } - public void resolveMoments() { + public void renderTransmittance() { // No depth writes, but we'll still use the depth test RenderSystem.depthMask(false); RenderSystem.enableBlend(); RenderSystem.blendFunc(GlStateManager.SourceFactor.ONE, GlStateManager.DestFactor.ONE); RenderSystem.blendEquation(GL46.GL_FUNC_ADD); - Samplers.ZEROTH_MOMENT.makeActive(); - GlStateManager._bindTexture(zerothMoment); + Samplers.DEPTH_RANGE.makeActive(); + GlStateManager._bindTexture(depthBounds); - Samplers.MOMENTS0.makeActive(); - GlStateManager._bindTexture(moments0); + GL46.glNamedFramebufferDrawBuffers(fbo, new int[]{GL46.GL_COLOR_ATTACHMENT1, GL46.GL_COLOR_ATTACHMENT2, GL46.GL_COLOR_ATTACHMENT3, GL46.GL_COLOR_ATTACHMENT4}); - Samplers.MOMENTS1.makeActive(); - GlStateManager._bindTexture(moments1); + GL46.glClearNamedFramebufferfv(fbo, GL46.GL_COLOR, 0, new float[]{0, 0, 0, 0}); + GL46.glClearNamedFramebufferfv(fbo, GL46.GL_COLOR, 1, new float[]{0, 0, 0, 0}); + GL46.glClearNamedFramebufferfv(fbo, GL46.GL_COLOR, 2, new float[]{0, 0, 0, 0}); + GL46.glClearNamedFramebufferfv(fbo, GL46.GL_COLOR, 3, new float[]{0, 0, 0, 0}); - GL46.glNamedFramebufferDrawBuffers(fbo, new int[]{GL46.GL_COLOR_ATTACHMENT3}); + GlStateManager._glBindFramebuffer(GL46.GL_FRAMEBUFFER, fbo); + } + + public void shade() { + // No depth writes, but we'll still use the depth test + RenderSystem.depthMask(false); + RenderSystem.enableBlend(); + RenderSystem.blendFunc(GlStateManager.SourceFactor.ONE, GlStateManager.DestFactor.ONE); + RenderSystem.blendEquation(GL46.GL_FUNC_ADD); + + Samplers.DEPTH_RANGE.makeActive(); + GlStateManager._bindTexture(depthBounds); + + Samplers.COEFFICIENTS.makeActive(); + GlStateManager._bindTexture(0); + + GL46.glBindTextureUnit(Samplers.COEFFICIENTS.number, coefficients); + + GL46.glNamedFramebufferDrawBuffers(fbo, new int[]{GL46.GL_COLOR_ATTACHMENT5}); GL46.glClearNamedFramebufferfv(fbo, GL46.GL_COLOR, 0, new float[]{0, 0, 0, 0}); @@ -79,26 +96,27 @@ public class MboitFramebuffer { } public void composite() { + // No depth writes, but we'll still use the depth test + RenderSystem.depthMask(false); + RenderSystem.enableBlend(); + RenderSystem.blendFunc(GlStateManager.SourceFactor.ONE_MINUS_SRC_ALPHA, GlStateManager.DestFactor.SRC_ALPHA); + RenderSystem.blendEquation(GL46.GL_FUNC_ADD); + var mainRenderTarget = Minecraft.getInstance() .getMainRenderTarget(); mainRenderTarget.bindWrite(false); - var oitCompositeProgram = programs.getOitCompositeProgram(); - - GlStateManager._depthMask(false); - GlStateManager._depthFunc(GL46.GL_ALWAYS); - GlStateManager._enableBlend(); - RenderSystem.blendFunc(GlStateManager.SourceFactor.ONE_MINUS_SRC_ALPHA, GlStateManager.DestFactor.SRC_ALPHA); - - oitCompositeProgram.bind(); - GlTextureUnit.T0.makeActive(); - GlStateManager._bindTexture(zerothMoment); + GlStateManager._bindTexture(0); + GL46.glBindTextureUnit(0, coefficients); GlTextureUnit.T1.makeActive(); GlStateManager._bindTexture(accumulate); + programs.getOitCompositeProgram() + .bind(); + // Empty VAO, the actual full screen triangle is generated in the vertex shader GlStateManager._glBindVertexArray(vao); @@ -112,9 +130,8 @@ public class MboitFramebuffer { } private void deleteTextures() { - GL46.glDeleteTextures(zerothMoment); - GL46.glDeleteTextures(moments0); - GL46.glDeleteTextures(moments1); + GL46.glDeleteTextures(depthBounds); + GL46.glDeleteTextures(coefficients); GL46.glDeleteTextures(accumulate); } @@ -128,14 +145,12 @@ public class MboitFramebuffer { deleteTextures(); - zerothMoment = GL46.glCreateTextures(GL46.GL_TEXTURE_2D); - moments0 = GL46.glCreateTextures(GL46.GL_TEXTURE_2D); - moments1 = GL46.glCreateTextures(GL46.GL_TEXTURE_2D); + depthBounds = GL46.glCreateTextures(GL46.GL_TEXTURE_2D); + coefficients = GL46.glCreateTextures(GL46.GL_TEXTURE_2D_ARRAY); accumulate = GL46.glCreateTextures(GL46.GL_TEXTURE_2D); - GL46.glTextureStorage2D(zerothMoment, 1, GL32.GL_R16F, width, height); - GL46.glTextureStorage2D(moments0, 1, GL32.GL_RGBA16F, width, height); - GL46.glTextureStorage2D(moments1, 1, GL32.GL_RGBA16F, width, height); + GL46.glTextureStorage2D(depthBounds, 1, GL32.GL_RG32F, width, height); + GL46.glTextureStorage3D(coefficients, 1, GL32.GL_RGBA16F, width, height, 4); GL46.glTextureStorage2D(accumulate, 1, GL32.GL_RGBA16F, width, height); @@ -147,39 +162,11 @@ public class MboitFramebuffer { // GL46.glTextureParameteri(tex, GL32.GL_TEXTURE_WRAP_T, GL32.GL_CLAMP_TO_EDGE); // } - GL46.glNamedFramebufferTexture(fbo, GL46.GL_COLOR_ATTACHMENT0, zerothMoment, 0); - GL46.glNamedFramebufferTexture(fbo, GL46.GL_COLOR_ATTACHMENT1, moments0, 0); - GL46.glNamedFramebufferTexture(fbo, GL46.GL_COLOR_ATTACHMENT2, moments1, 0); - GL46.glNamedFramebufferTexture(fbo, GL46.GL_COLOR_ATTACHMENT3, accumulate, 0); - } - - float circleToParameter(float angle) { - float x = Mth.cos(angle); - float y = Mth.sin(angle); - float result = Mth.abs(y) - Mth.abs(x); - result = (x < 0.0f) ? (2.0f - result) : result; - result = (y < 0.0f) ? (6.0f - result) : result; - result += (angle >= 2.0f * Mth.PI) ? 8.0f : 0.0f; - return result; - } - - void computeWrappingZoneParameters(float[] out) { - computeWrappingZoneParameters(out, 0.1f * Mth.PI); - } - - /*! Given an angle in radians providing the size of the wrapping zone, this - function computes all constants required by the shader.*/ - void computeWrappingZoneParameters(float[] p_out_wrapping_zone_parameters, float new_wrapping_zone_angle) { - p_out_wrapping_zone_parameters[0] = new_wrapping_zone_angle; - p_out_wrapping_zone_parameters[1] = Mth.PI - 0.5f * new_wrapping_zone_angle; - if (new_wrapping_zone_angle <= 0.0f) { - p_out_wrapping_zone_parameters[2] = 0.0f; - p_out_wrapping_zone_parameters[3] = 0.0f; - } else { - float zone_end_parameter = 7; - float zone_begin_parameter = circleToParameter(2.0f * Mth.PI - new_wrapping_zone_angle); - p_out_wrapping_zone_parameters[2] = 1.0f / (zone_end_parameter - zone_begin_parameter); - p_out_wrapping_zone_parameters[3] = 1.0f - zone_end_parameter * p_out_wrapping_zone_parameters[2]; - } + GL46.glNamedFramebufferTexture(fbo, GL46.GL_COLOR_ATTACHMENT0, depthBounds, 0); + GL46.glNamedFramebufferTextureLayer(fbo, GL46.GL_COLOR_ATTACHMENT1, coefficients, 0, 0); + GL46.glNamedFramebufferTextureLayer(fbo, GL46.GL_COLOR_ATTACHMENT2, coefficients, 0, 1); + GL46.glNamedFramebufferTextureLayer(fbo, GL46.GL_COLOR_ATTACHMENT3, coefficients, 0, 2); + GL46.glNamedFramebufferTextureLayer(fbo, GL46.GL_COLOR_ATTACHMENT4, coefficients, 0, 3); + GL46.glNamedFramebufferTexture(fbo, GL46.GL_COLOR_ATTACHMENT5, accumulate, 0); } } diff --git a/common/src/backend/resources/assets/flywheel/flywheel/internal/common.frag b/common/src/backend/resources/assets/flywheel/flywheel/internal/common.frag index 3cb61f8a5..d4715ff3a 100644 --- a/common/src/backend/resources/assets/flywheel/flywheel/internal/common.frag +++ b/common/src/backend/resources/assets/flywheel/flywheel/internal/common.frag @@ -1,7 +1,6 @@ #include "flywheel:internal/packed_material.glsl" #include "flywheel:internal/diffuse.glsl" #include "flywheel:internal/colorizer.glsl" -#include "flywheel:internal/mboit/moment_oit.glsl" // optimize discard usage #if defined(GL_ARB_conservative_depth) && defined(_FLW_USE_DISCARD) @@ -19,14 +18,222 @@ flat in uvec2 _flw_ids; #endif #ifdef _FLW_OIT -#ifdef _FLW_GENERATE_MOMENTS -layout (location = 0) out float _flw_zerothMoment_out; -layout (location = 1) out vec4 _flw_moments0_out; -layout (location = 2) out vec4 _flw_moments1_out; + +#define TRANSPARENCY_WAVELET_RANK 3 +#define TRANSPARENCY_WAVELET_COEFFICIENT_COUNT 16 +#define floatN float +#define all(e) (e) +#define mad fma +#define lerp mix +#define Coefficients_Out vec4[4] +#define Coefficients_In sampler2DArray + +layout (binding = 7) uniform sampler2D _flw_depthRange; + +layout (binding = 8) uniform sampler2DArray _flw_coefficients; + +#define REMOVE_SIGNAL true + +#ifdef _FLW_DEPTH_RANGE + +layout (location = 0) out vec2 _flw_depthRange_out; + #endif -#ifdef _FLW_RESOLVE_MOMENTS -layout (location = 0) out vec4 _flw_accumulate_out; + + +#ifdef _FLW_COLLECT_COEFFS + + +layout (location = 0) out vec4 _flw_coeffs0; +layout (location = 1) out vec4 _flw_coeffs1; +layout (location = 2) out vec4 _flw_coeffs2; +layout (location = 3) out vec4 _flw_coeffs3; + +void add_to_index(inout Coefficients_Out coefficients, uint index, floatN addend) { + coefficients[index >> 2][index & 3u] = addend; +} + +void add_event_to_wavelets(inout Coefficients_Out coefficients, floatN signal, float depth) +{ + depth *= float(TRANSPARENCY_WAVELET_COEFFICIENT_COUNT-1) / TRANSPARENCY_WAVELET_COEFFICIENT_COUNT; + + int index = clamp(int(floor(depth * TRANSPARENCY_WAVELET_COEFFICIENT_COUNT)), 0, TRANSPARENCY_WAVELET_COEFFICIENT_COUNT - 1); + index += TRANSPARENCY_WAVELET_COEFFICIENT_COUNT - 1; + + for (int i = 0; i < (TRANSPARENCY_WAVELET_RANK+1); ++i) + { + int power = TRANSPARENCY_WAVELET_RANK - i; + int new_index = (index - 1) >> 1; + float k = float((new_index + 1) & ((1 << power) - 1)); + + int wavelet_sign = ((index & 1) << 1) - 1; + float wavelet_phase = ((index + 1) & 1) * exp2(-power); + floatN addend = mad(mad(-exp2(-power), k, depth), wavelet_sign, wavelet_phase) * exp2(power * 0.5) * signal; + add_to_index(coefficients, new_index, addend); + + index = new_index; + } + + floatN addend = mad(signal, -depth, signal); + add_to_index(coefficients, TRANSPARENCY_WAVELET_COEFFICIENT_COUNT - 1, addend); +} + +void add_transmittance_event_to_wavelets(inout Coefficients_Out coefficients, floatN transmittance, float depth) +{ + float absorbance = -log(max(transmittance, 0.00001));// transforming the signal from multiplicative transmittance to additive absorbance + add_event_to_wavelets(coefficients, absorbance, depth); +} + #endif + +#ifdef _FLW_EVALUATE + +layout (location = 0) out vec4 _flw_accumulate; + + +floatN get_coefficients(in Coefficients_In coefficients, uint index) { + return texelFetch(coefficients, ivec3(gl_FragCoord.xy, index >> 2), 0)[index & 3u]; +} + + floatN evaluate_wavelets(in Coefficients_In coefficients, float depth, floatN signal) +{ + floatN scale_coefficient = get_coefficients(coefficients, TRANSPARENCY_WAVELET_COEFFICIENT_COUNT - 1); + if (all(scale_coefficient == 0)) + { + return 0; + } + if (REMOVE_SIGNAL) + { + floatN scale_coefficient_addend = mad(signal, -depth, signal); + scale_coefficient -= scale_coefficient_addend; + } + + depth *= float(TRANSPARENCY_WAVELET_COEFFICIENT_COUNT-1) / TRANSPARENCY_WAVELET_COEFFICIENT_COUNT; + + float coefficient_depth = depth * TRANSPARENCY_WAVELET_COEFFICIENT_COUNT; + int index_b = clamp(int(floor(coefficient_depth)), 0, TRANSPARENCY_WAVELET_COEFFICIENT_COUNT - 1); + bool sample_a = index_b >= 1; + int index_a = sample_a ? (index_b - 1) : index_b; + + index_b += TRANSPARENCY_WAVELET_COEFFICIENT_COUNT - 1; + index_a += TRANSPARENCY_WAVELET_COEFFICIENT_COUNT - 1; + + floatN b = scale_coefficient; +floatN a = sample_a ? scale_coefficient : 0; + + for (int i = 0; i < (TRANSPARENCY_WAVELET_RANK+1); ++i) + { + int power = TRANSPARENCY_WAVELET_RANK - i; + + int new_index_b = (index_b - 1) >> 1; + int wavelet_sign_b = ((index_b & 1) << 1) - 1; + floatN coeff_b = get_coefficients(coefficients, new_index_b); + if (REMOVE_SIGNAL) + { + float wavelet_phase_b = ((index_b + 1) & 1) * exp2(-power); + float k = float((new_index_b + 1) & ((1 << power) - 1)); + floatN addend = mad(mad(-exp2(-power), k, depth), wavelet_sign_b, wavelet_phase_b) * exp2(power * 0.5) * signal; + coeff_b -= addend; + } + b -= exp2(float(power) * 0.5) * coeff_b * wavelet_sign_b; + index_b = new_index_b; + + if (sample_a) + { + int new_index_a = (index_a - 1) >> 1; + int wavelet_sign_a = ((index_a & 1) << 1) - 1; + floatN coeff_a = (new_index_a == new_index_b) ? coeff_b : get_coefficients(coefficients, new_index_a);// No addend here on purpose, the original signal didn't contribute to this coefficient + a -= exp2(float(power) * 0.5) * coeff_a * wavelet_sign_a; + index_a = new_index_a; + } + } + + float t = coefficient_depth >= TRANSPARENCY_WAVELET_COEFFICIENT_COUNT ? 1.0 : fract(coefficient_depth); + + return lerp(a, b, t); +} + + floatN evaluate_transmittance_wavelets(in Coefficients_In coefficients, float depth, floatN signal) +{ + floatN absorbance = evaluate_wavelets(coefficients, depth, signal); + return clamp(exp(-absorbance), 0., 1.);// undoing the transformation from absorbance back to transmittance +} + +#endif + +// TODO: blue noise texture +uint HilbertIndex(uvec2 p) { + uint i = 0u; + for (uint l = 0x4000u; l > 0u; l >>= 1u) { + uvec2 r = min(p & l, 1u); + + i = (i << 2u) | ((r.x * 3u) ^ r.y); + p = r.y == 0u ? (0x7FFFu * r.x) ^ p.yx : p; + } + return i; +} + +uint ReverseBits(uint x) { + x = ((x & 0xaaaaaaaau) >> 1) | ((x & 0x55555555u) << 1); + x = ((x & 0xccccccccu) >> 2) | ((x & 0x33333333u) << 2); + x = ((x & 0xf0f0f0f0u) >> 4) | ((x & 0x0f0f0f0fu) << 4); + x = ((x & 0xff00ff00u) >> 8) | ((x & 0x00ff00ffu) << 8); + return (x >> 16) | (x << 16); +} + +// from: https://psychopath.io/post/2021_01_30_building_a_better_lk_hash +uint OwenHash(uint x, uint seed) { // seed is any random number + x ^= x * 0x3d20adeau; + x += seed; + x *= (seed >> 16) | 1u; + x ^= x * 0x05526c56u; + x ^= x * 0x53a22864u; + return x; +} + +// https://www.shadertoy.com/view/ssBBW1 +float blue() { + uint m = HilbertIndex(uvec2(gl_FragCoord.xy));// map pixel coords to hilbert curve index + m = OwenHash(ReverseBits(m), 0xe7843fbfu);// owen-scramble hilbert index + m = OwenHash(ReverseBits(m), 0x8d8fb1e0u);// map hilbert index to sobol sequence and owen-scramble + float mask = float(ReverseBits(m)) / 4294967296.0;// convert to float + + return mask; +} + +uniform vec3 _flw_depthAdjust; + +float adjust_depth(float normalizedDepth) { + + float tentIn = abs(normalizedDepth * 2. - 1); + float tentIn2 = tentIn * tentIn; + float tentIn4 = tentIn2 * tentIn2; + float tent = 1 - (tentIn2 * tentIn4); + + float b = blue(); + + return normalizedDepth - b * tent * 0.08; +} + +float linearize_depth(float d, float zNear, float zFar) { + float z_n = 2.0 * d - 1.0; + return 2.0 * zNear * zFar / (zFar + zNear - z_n * (zFar - zNear)); +} + +float linear_depth() { + return linearize_depth(gl_FragCoord.z, _flw_cullData.znear, _flw_cullData.zfar); +} + +float depth() { + float linearDepth = linear_depth(); + + vec2 depthRange = texelFetch(_flw_depthRange, ivec2(gl_FragCoord.xy), 0).rg; + float depth = (linearDepth + depthRange.x) / (depthRange.x + depthRange.y); + + return adjust_depth(depth); +} + + #else out vec4 _flw_outputColor; @@ -47,11 +254,6 @@ float _flw_diffuseFactor() { } } -float linearize_depth(float d, float zNear, float zFar) { - float z_n = 2.0 * d - 1.0; - return 2.0 * zNear * zFar / (zFar + zNear - z_n * (zFar - zNear)); -} - void _flw_main() { flw_sampleColor = texture(flw_diffuseTex, flw_vertexTexCoord); flw_fragColor = flw_vertexColor * flw_sampleColor; @@ -121,39 +323,41 @@ void _flw_main() { color = flw_fogFilter(color); #ifdef _FLW_OIT - float linearDepth = linearize_depth(gl_FragCoord.z, _flw_cullData.znear, _flw_cullData.zfar); - float lnNear = log(_flw_cullData.znear); - float lnFar = log(_flw_cullData.zfar); + #ifdef _FLW_DEPTH_RANGE + float linearDepth = linear_depth(); - float depth = (log(linearDepth) - lnNear); + // Pad the depth by some unbalanced epsilons because minecraft has a lot of single-quad tranparency. + // The unbalance means our fragment will be considered closer to the screen in the normalization, + // which helps prevent unnecessary noise as it'll be closer to the edge of our tent function. + _flw_depthRange_out = vec2(-linearDepth + 1e-5, linearDepth + 1e-2); + #endif - depth /= lnFar - lnNear; + #ifdef _FLW_COLLECT_COEFFS - depth = clamp(depth * 2. - 1., -1., 1.); + Coefficients_Out result; + result[0] = vec4(0.); + result[1] = vec4(0.); + result[2] = vec4(0.); + result[3] = vec4(0.); - #ifdef _FLW_GENERATE_MOMENTS + add_transmittance_event_to_wavelets(result, 1. - color.a, depth()); - generateMoments(depth, 1 - color.a, _flw_zerothMoment_out, _flw_moments0_out, _flw_moments1_out); + _flw_coeffs0 = result[0]; + _flw_coeffs1 = result[1]; + _flw_coeffs2 = result[2]; + _flw_coeffs3 = result[3]; #endif - #ifdef _FLW_RESOLVE_MOMENTS - float tt; - float td; - resolveMoments(td, tt, depth, gl_FragCoord.xy); + #ifdef _FLW_EVALUATE - if (abs(td) < 1e-5) { - discard; - } + floatN transmittance = evaluate_transmittance_wavelets(_flw_coefficients, depth(), 1. - color.a); - color.rgb *= color.a; - - color *= td; - - _flw_accumulate_out = color; + _flw_accumulate = vec4(color.rgb * color.a, color.a) * transmittance; #endif + #else _flw_outputColor = color; diff --git a/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/oit_composite.frag b/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/oit_composite.frag index 3afe92839..1fee5f927 100644 --- a/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/oit_composite.frag +++ b/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/oit_composite.frag @@ -1,20 +1,79 @@ layout (location = 0) out vec4 frag; -layout (binding = 0) uniform sampler2D zerothMoment; -layout (binding = 1) uniform sampler2D accumulate; +layout (binding = 0) uniform sampler2DArray _flw_coefficients; +layout (binding = 1) uniform sampler2D _flw_accumulate; + +#define TRANSPARENCY_WAVELET_RANK 3 +#define TRANSPARENCY_WAVELET_COEFFICIENT_COUNT 16 +#define floatN float +#define all(e) (e) +#define mad fma +#define lerp mix +#define Coefficients_Out vec4[4] +#define Coefficients_In sampler2DArray + + +floatN get_coefficients(in Coefficients_In coefficients, uint index) { + return texelFetch(coefficients, ivec3(gl_FragCoord.xy, index >> 2), 0)[index & 3u]; +} + + floatN evaluate_wavelet_index(in Coefficients_In coefficients, int index) +{ + floatN result = 0; + + index += TRANSPARENCY_WAVELET_COEFFICIENT_COUNT - 1; + + for (int i = 0; i < (TRANSPARENCY_WAVELET_RANK+1); ++i) + { + int power = TRANSPARENCY_WAVELET_RANK - i; + int new_index = (index - 1) >> 1; + floatN coeff = get_coefficients(coefficients, new_index); + int wavelet_sign = ((index & 1) << 1) - 1; + result -= exp2(float(power) * 0.5) * coeff * wavelet_sign; + index = new_index; + } + return result; +} + + + floatN evaluate_wavelets(in Coefficients_In coefficients, float depth) +{ + floatN scale_coefficient = get_coefficients(coefficients, TRANSPARENCY_WAVELET_COEFFICIENT_COUNT - 1); + if (all(scale_coefficient == 0)) + { + return 0; + } + + depth *= float(TRANSPARENCY_WAVELET_COEFFICIENT_COUNT-1) / TRANSPARENCY_WAVELET_COEFFICIENT_COUNT; + + float coefficient_depth = depth * TRANSPARENCY_WAVELET_COEFFICIENT_COUNT; + int index = clamp(int(floor(coefficient_depth)), 0, TRANSPARENCY_WAVELET_COEFFICIENT_COUNT - 1); + + floatN a = 0; +floatN b = scale_coefficient + evaluate_wavelet_index(coefficients, index); + if (index > 0) { a = scale_coefficient + evaluate_wavelet_index(coefficients, index - 1); } + + float t = coefficient_depth >= TRANSPARENCY_WAVELET_COEFFICIENT_COUNT ? 1.0 : fract(coefficient_depth); + floatN signal = lerp(a, b, t);// You can experiment here with different types of interpolation as well + return signal; +} + + floatN evaluate_transmittance_wavelets(in Coefficients_In coefficients, float depth) +{ + floatN absorbance = evaluate_wavelets(coefficients, depth); + return clamp(exp(-absorbance), 0., 1.);// undoing the transformation from absorbance back to transmittance +} + +const float infinity = 1. / 0.; void main() { - ivec2 coords = ivec2(gl_FragCoord.xy); + vec4 texel = texelFetch(_flw_accumulate, ivec2(gl_FragCoord.xy), 0); - float b0 = texelFetch(zerothMoment, coords, 0).r; - - if (b0 < 1e-5) { + if (texel.a < 1e-5) { discard; } - vec4 accumulation = texelFetch(accumulate, coords, 0); + floatN total_transmittance = evaluate_transmittance_wavelets(_flw_coefficients, infinity); - vec3 normalizedAccumulation = accumulation.rgb / max(accumulation.a, 1e-5); - - frag = vec4(normalizedAccumulation, exp(-b0)); + frag = vec4(texel.rgb / texel.a, total_transmittance); } diff --git a/common/src/backend/resources/assets/flywheel/flywheel/internal/mboit/complex_algebra.glsl b/common/src/backend/resources/assets/flywheel/flywheel/internal/mboit/complex_algebra.glsl deleted file mode 100644 index 30848e3f7..000000000 --- a/common/src/backend/resources/assets/flywheel/flywheel/internal/mboit/complex_algebra.glsl +++ /dev/null @@ -1,213 +0,0 @@ -/*! \file - This header defines utility functions to deal with complex numbers and - complex polynomials.*/ - -void sincos(float theta, out float s, out float c) { - s = sin(theta); - c = cos(theta); -} - -float saturate(float a) { - return clamp(a, 0., 1.); -} - - -/*! Returns the complex conjugate of the given complex number (i.e. it changes - the sign of the y-component).*/ -vec2 Conjugate(vec2 Z){ - return vec2(Z.x, -Z.y); -} -/*! This function implements complex multiplication.*/ -vec2 Multiply(vec2 LHS, vec2 RHS){ - return vec2(LHS.x*RHS.x-LHS.y*RHS.y, LHS.x*RHS.y+LHS.y*RHS.x); -} -/*! This function computes the magnitude of the given complex number.*/ -float Magnitude(vec2 Z){ - return sqrt(dot(Z, Z)); -} -/*! This function computes the quotient of two complex numbers. The denominator - must not be zero.*/ -vec2 Divide(vec2 Numerator, vec2 Denominator){ - return vec2(Numerator.x*Denominator.x+Numerator.y*Denominator.y, -Numerator.x*Denominator.y+Numerator.y*Denominator.x)/dot(Denominator, Denominator); -} -/*! This function divides a real number by a complex number. The denominator - must not be zero.*/ -vec2 Divide(float Numerator, vec2 Denominator){ - return vec2(Numerator*Denominator.x, -Numerator*Denominator.y)/dot(Denominator, Denominator); -} -/*! This function implements computation of the reciprocal of the given non- - zero complex number.*/ -vec2 Reciprocal(vec2 Z){ - return vec2(Z.x, -Z.y)/dot(Z, Z); -} -/*! This utility function implements complex squaring.*/ -vec2 Square(vec2 Z){ - return vec2(Z.x*Z.x-Z.y*Z.y, 2.0f*Z.x*Z.y); -} -/*! This utility function implements complex computation of the third power.*/ -vec2 Cube(vec2 Z){ - return Multiply(Square(Z), Z); -} -/*! This utility function computes one square root of the given complex value. - The other one can be found using the unary minus operator. - \warning This function is continuous but not defined on the negative real - axis (and cannot be continued continuously there). - \sa SquareRoot() */ -vec2 SquareRootUnsafe(vec2 Z){ - float ZLengthSq=dot(Z, Z); - float ZLengthInv=inversesqrt(ZLengthSq); - vec2 UnnormalizedRoot=Z*ZLengthInv+vec2(1.0f, 0.0f); - float UnnormalizedRootLengthSq=dot(UnnormalizedRoot, UnnormalizedRoot); - float NormalizationFactorInvSq=UnnormalizedRootLengthSq*ZLengthInv; - float NormalizationFactor=inversesqrt(NormalizationFactorInvSq); - return NormalizationFactor*UnnormalizedRoot; -} -/*! This utility function computes one square root of the given complex value. - The other one can be found using the unary minus operator. - \note This function has discontinuities for values with real part zero. - \sa SquareRootUnsafe() */ -vec2 SquareRoot(vec2 Z){ - vec2 ZPositiveRealPart=vec2(abs(Z.x), Z.y); - vec2 ComputedRoot=SquareRootUnsafe(ZPositiveRealPart); - return (Z.x>=0.0)?ComputedRoot:ComputedRoot.yx; -} -/*! This utility function computes one cubic root of the given complex value. The - other roots can be found by multiplication by cubic roots of unity. - \note This function has various discontinuities.*/ -vec2 CubicRoot(vec2 Z){ - float Argument=atan(Z.y, Z.x); - float NewArgument=Argument/3.0f; - vec2 NormalizedRoot; - sincos(NewArgument, NormalizedRoot.y, NormalizedRoot.x); - return NormalizedRoot*pow(dot(Z, Z), 1.0f/6.0f); -} - -/*! @{ - Returns the complex conjugate of the given complex vector (i.e. it changes the - second column resp the y-component).*/ -mat2x2 Conjugate(mat2x2 Vector){ - return mat2x2(Vector[0].x, -Vector[0].y, Vector[1].x, -Vector[1].y); -} -mat3x2 Conjugate(mat3x2 Vector){ - return mat3x2(Vector[0].x, -Vector[0].y, Vector[1].x, -Vector[1].y, Vector[2].x, -Vector[2].y); -} -mat4x2 Conjugate(mat4x2 Vector){ - return mat4x2(Vector[0].x, -Vector[0].y, Vector[1].x, -Vector[1].y, Vector[2].x, -Vector[2].y, Vector[3].x, -Vector[3].y); -} -void Conjugate(out vec2 OutConjugateVector[5], vec2 Vector[5]){ - for (int i=0;i!=5;++i){ - OutConjugateVector[i]=vec2(Vector[i].x, -Vector[i].x); - } -} -//!@} - -/*! Returns the real part of a complex number as real.*/ -float RealPart(vec2 Z){ - return Z.x; -} - -/*! Given coefficients of a quadratic polynomial A*x^2+B*x+C, this function - outputs its two complex roots.*/ -void SolveQuadratic(out vec2 pOutRoot[2], vec2 A, vec2 B, vec2 C) -{ - // Normalize the coefficients - vec2 InvA=Reciprocal(A); - B=Multiply(B, InvA); - C=Multiply(C, InvA); - // Divide the middle coefficient by two - B*=0.5f; - // Apply the quadratic formula - vec2 DiscriminantRoot=SquareRoot(Square(B)-C); - pOutRoot[0]=-B-DiscriminantRoot; - pOutRoot[1]=-B+DiscriminantRoot; -} - -/*! Given coefficients of a cubic polynomial A*x^3+B*x^2+C*x+D, this function - outputs its three complex roots.*/ -void SolveCubicBlinn(out vec2 pOutRoot[3], vec2 A, vec2 B, vec2 C, vec2 D) -{ - // Normalize the polynomial - vec2 InvA=Reciprocal(A); - B=Multiply(B, InvA); - C=Multiply(C, InvA); - D=Multiply(D, InvA); - // Divide middle coefficients by three - B/=3.0f; - C/=3.0f; - // Compute the Hessian and the discriminant - vec2 Delta00=-Square(B)+C; - vec2 Delta01=-Multiply(C, B)+D; - vec2 Delta11=Multiply(B, D)-Square(C); - vec2 Discriminant=4.0f*Multiply(Delta00, Delta11)-Square(Delta01); - // Compute coefficients of the depressed cubic - // (third is zero, fourth is one) - vec2 DepressedD=-2.0f*Multiply(B, Delta00)+Delta01; - vec2 DepressedC=Delta00; - // Take the cubic root of a complex number avoiding cancellation - vec2 DiscriminantRoot=SquareRoot(-Discriminant); - DiscriminantRoot=faceforward(DiscriminantRoot, DiscriminantRoot, DepressedD); - vec2 CubedRoot=DiscriminantRoot-DepressedD; - vec2 FirstRoot=CubicRoot(0.5f*CubedRoot); - vec2 pCubicRoot[3]={ - FirstRoot, - Multiply(vec2(-0.5f, -0.5f*sqrt(3.0f)), FirstRoot), - Multiply(vec2(-0.5f, 0.5f*sqrt(3.0f)), FirstRoot) - }; - // Also compute the reciprocal cubic roots - vec2 InvFirstRoot=Reciprocal(FirstRoot); - vec2 pInvCubicRoot[3]={ - InvFirstRoot, - Multiply(vec2(-0.5f, 0.5f*sqrt(3.0f)), InvFirstRoot), - Multiply(vec2(-0.5f, -0.5f*sqrt(3.0f)), InvFirstRoot) - }; - // Turn them into roots of the depressed cubic and revert the depression - // transform - - for (int i=0;i!=3;++i) - { - pOutRoot[i]=pCubicRoot[i]-Multiply(DepressedC, pInvCubicRoot[i])-B; - } -} - - -/*! Given coefficients of a quartic polynomial A*x^4+B*x^3+C*x^2+D*x+E, this - function outputs its four complex roots.*/ -void SolveQuarticNeumark(out vec2 pOutRoot[4], vec2 A, vec2 B, vec2 C, vec2 D, vec2 E) -{ - // Normalize the polynomial - vec2 InvA=Reciprocal(A); - B=Multiply(B, InvA); - C=Multiply(C, InvA); - D=Multiply(D, InvA); - E=Multiply(E, InvA); - // Construct a normalized cubic - vec2 P=-2.0f*C; - vec2 Q=Square(C)+Multiply(B, D)-4.0f*E; - vec2 R=Square(D)+Multiply(Square(B), E)-Multiply(Multiply(B, C), D); - // Compute a root that is not the smallest of the cubic - vec2 pCubicRoot[3]; - SolveCubicBlinn(pCubicRoot, vec2(1.0f, 0.0f), P, Q, R); - vec2 y=(dot(pCubicRoot[1], pCubicRoot[1])>dot(pCubicRoot[0], pCubicRoot[0]))?pCubicRoot[1]:pCubicRoot[0]; - - // Solve a quadratic to obtain linear coefficients for quadratic polynomials - vec2 BB=Square(B); - vec2 fy=4.0f*y; - vec2 BB_fy=BB-fy; - vec2 tmp=SquareRoot(BB_fy); - vec2 G=(B+tmp)*0.5f; - vec2 g=(B-tmp)*0.5f; - // Construct the corresponding constant coefficients - vec2 Z=C-y; - tmp=Divide(0.5f*Multiply(B, Z)-D, tmp); - vec2 H=Z*0.5f+tmp; - vec2 h=Z*0.5f-tmp; - - // Compute the roots - vec2 pQuadraticRoot[2]; - SolveQuadratic(pQuadraticRoot, vec2(1.0f, 0.0f), G, H); - pOutRoot[0]=pQuadraticRoot[0]; - pOutRoot[1]=pQuadraticRoot[1]; - SolveQuadratic(pQuadraticRoot, vec2(1.0f, 0.0f), g, h); - pOutRoot[2]=pQuadraticRoot[0]; - pOutRoot[3]=pQuadraticRoot[1]; -} diff --git a/common/src/backend/resources/assets/flywheel/flywheel/internal/mboit/moment_math.glsl b/common/src/backend/resources/assets/flywheel/flywheel/internal/mboit/moment_math.glsl deleted file mode 100644 index 4f730eada..000000000 --- a/common/src/backend/resources/assets/flywheel/flywheel/internal/mboit/moment_math.glsl +++ /dev/null @@ -1,490 +0,0 @@ -/*! \file - This header provides utility functions to reconstruct the transmittance - from a given vector of power moments (4, 6 or 8 power moments) at a - specified depth. As prerequisite, utility functions for computing the real - roots of polynomials up to degree four are defined. -*/ - -#include "flywheel:internal/mboit/trigonometric_moment_math.glsl" - - -/*! Given coefficients of a quadratic polynomial A*x^2+B*x+C, this function - outputs its two real roots.*/ -vec2 solveQuadratic(vec3 coeffs) -{ - coeffs[1] *= 0.5; - - float x1, x2, tmp; - - tmp = (coeffs[1] * coeffs[1] - coeffs[0] * coeffs[2]); - if (coeffs[1] >= 0) { - tmp = sqrt(tmp); - x1 = (-coeffs[2]) / (coeffs[1] + tmp); - x2 = (-coeffs[1] - tmp) / coeffs[0]; - } else { - tmp = sqrt(tmp); - x1 = (-coeffs[1] + tmp) / coeffs[0]; - x2 = coeffs[2] / (-coeffs[1] + tmp); - } - return vec2(x1, x2); -} - -/*! Code taken from the blog "Moments in Graphics" by Christoph Peters. - http://momentsingraphics.de/?p=105 - This function computes the three real roots of a cubic polynomial - Coefficient[0]+Coefficient[1]*x+Coefficient[2]*x^2+Coefficient[3]*x^3.*/ -vec3 SolveCubic(vec4 Coefficient) { - // Normalize the polynomial - Coefficient.xyz /= Coefficient.w; - // Divide middle coefficients by three - Coefficient.yz /= 3.0f; - // Compute the Hessian and the discrimant - vec3 Delta = vec3( - fma(-Coefficient.z, Coefficient.z, Coefficient.y), - fma(-Coefficient.y, Coefficient.z, Coefficient.x), - dot(vec2(Coefficient.z, -Coefficient.y), Coefficient.xy) - ); - float Discriminant = dot(vec2(4.0f*Delta.x, -Delta.y), Delta.zy); - // Compute coefficients of the depressed cubic - // (third is zero, fourth is one) - vec2 Depressed = vec2( - fma(-2.0f*Coefficient.z, Delta.x, Delta.y), - Delta.x - ); - // Take the cubic root of a normalized complex number - float Theta = atan(sqrt(Discriminant), -Depressed.x) / 3.0f; - vec2 CubicRoot; - sincos(Theta, CubicRoot.y, CubicRoot.x); - // Compute the three roots, scale appropriately and - // revert the depression transform - vec3 Root = vec3( - CubicRoot.x, - dot(vec2(-0.5f, -0.5f*sqrt(3.0f)), CubicRoot), - dot(vec2(-0.5f, 0.5f*sqrt(3.0f)), CubicRoot) - ); - Root = fma(vec3(2.0f*sqrt(-Depressed.y)), Root, vec3(-Coefficient.z)); - return Root; -} - -/*! Given coefficients of a cubic polynomial - coeffs[0]+coeffs[1]*x+coeffs[2]*x^2+coeffs[3]*x^3 with three real roots, - this function returns the root of least magnitude.*/ -float solveCubicBlinnSmallest(vec4 coeffs) -{ - coeffs.xyz /= coeffs.w; - coeffs.yz /= 3.0; - - vec3 delta = vec3(fma(-coeffs.z, coeffs.z, coeffs.y), fma(-coeffs.z, coeffs.y, coeffs.x), coeffs.z * coeffs.x - coeffs.y * coeffs.y); - float discriminant = 4.0 * delta.x * delta.z - delta.y * delta.y; - - vec2 depressed = vec2(delta.z, -coeffs.x * delta.y + 2.0 * coeffs.y * delta.z); - float theta = abs(atan(coeffs.x * sqrt(discriminant), -depressed.y)) / 3.0; - vec2 sin_cos; - sincos(theta, sin_cos.x, sin_cos.y); - float tmp = 2.0 * sqrt(-depressed.x); - vec2 x = vec2(tmp * sin_cos.y, tmp * (-0.5 * sin_cos.y - 0.5 * sqrt(3.0) * sin_cos.x)); - vec2 s = (x.x + x.y < 2.0 * coeffs.y) ? vec2(-coeffs.x, x.x + coeffs.y) : vec2(-coeffs.x, x.y + coeffs.y); - - return s.x / s.y; -} - -/*! Given coefficients of a quartic polynomial - coeffs[0]+coeffs[1]*x+coeffs[2]*x^2+coeffs[3]*x^3+coeffs[4]*x^4 with four - real roots, this function returns all roots.*/ -vec4 solveQuarticNeumark(float coeffs[5]) -{ - // Normalization - float B = coeffs[3] / coeffs[4]; - float C = coeffs[2] / coeffs[4]; - float D = coeffs[1] / coeffs[4]; - float E = coeffs[0] / coeffs[4]; - - // Compute coefficients of the cubic resolvent - float P = -2.0*C; - float Q = C*C + B*D - 4.0*E; - float R = D*D + B*B*E -B*C*D; - - // Obtain the smallest cubic root - float y = solveCubicBlinnSmallest(vec4(R, Q, P, 1.0)); - - float BB = B*B; - float fy = 4.0 * y; - float BB_fy = BB - fy; - - float Z = C - y; - float ZZ = Z*Z; - float fE = 4.0 * E; - float ZZ_fE = ZZ - fE; - - float G, g, H, h; - // Compute the coefficients of the quadratics adaptively using the two - // proposed factorizations by Neumark. Choose the appropriate - // factorizations using the heuristic proposed by Herbison-Evans. - if (y < 0 || (ZZ + fE) * BB_fy > ZZ_fE * (BB + fy)) { - float tmp = sqrt(BB_fy); - G = (B + tmp) * 0.5; - g = (B - tmp) * 0.5; - - tmp = (B*Z - 2.0*D) / (2.0*tmp); - H = fma(Z, 0.5, tmp); - h = fma(Z, 0.5, -tmp); - } else { - float tmp = sqrt(ZZ_fE); - H = (Z + tmp) * 0.5; - h = (Z - tmp) * 0.5; - - tmp = (B*Z - 2.0*D) / (2.0*tmp); - G = fma(B, 0.5, tmp); - g = fma(B, 0.5, -tmp); - } - // Solve the quadratics - return vec4(solveQuadratic(vec3(1.0, G, H)), solveQuadratic(vec3(1.0, g, h))); -} - -/*! Definition of utility functions for quantization and dequantization of - power moments stored in 16 bits per moment. */ -void offsetMoments(inout vec2 b_even, inout vec2 b_odd, float sign) -{ - b_odd += 0.5 * sign; -} - -void quantizeMoments(out vec2 b_even_q, out vec2 b_odd_q, vec2 b_even, vec2 b_odd) -{ - b_odd_q = b_odd * mat2x2(1.5f, sqrt(3.0f)*0.5f, -2.0f, -sqrt(3.0f)*2.0f / 9.0f); - b_even_q = b_even * mat2x2(4.0f, 0.5f, -4.0f, 0.5f); -} - -void offsetAndDequantizeMoments(out vec2 b_even, out vec2 b_odd, vec2 b_even_q, vec2 b_odd_q) -{ - offsetMoments(b_even_q, b_odd_q, -1.0); - b_odd = b_odd_q * mat2x2(-1.0f / 3.0f, -0.75f, sqrt(3.0f), 0.75f*sqrt(3.0f)); - b_even = b_even_q * mat2x2(0.125f, -0.125f, 1.0f, 1.0f); -} - -void offsetMoments(inout vec3 b_even, inout vec3 b_odd, float sign) -{ - b_odd += 0.5 * sign; - b_even.z += 0.018888946f * sign; -} - -void quantizeMoments(out vec3 b_even_q, out vec3 b_odd_q, vec3 b_even, vec3 b_odd) -{ - const mat3x3 QuantizationMatrixOdd = mat3x3( - 2.5f, -1.87499864450f, 1.26583039016f, - -10.0f, 4.20757543111f, -1.47644882902f, - 8.0f, -1.83257678661f, 0.71061660238f); - const mat3x3 QuantizationMatrixEven = mat3x3( - 4.0f, 9.0f, -0.57759806484f, - -4.0f, -24.0f, 4.61936647543f, - 0.0f, 16.0f, -3.07953906655f); - b_odd_q = b_odd * QuantizationMatrixOdd; - b_even_q = b_even * QuantizationMatrixEven; -} - -void offsetAndDequantizeMoments(out vec3 b_even, out vec3 b_odd, vec3 b_even_q, vec3 b_odd_q) -{ - const mat3x3 QuantizationMatrixOdd = mat3x3( - -0.02877789192f, 0.09995235706f, 0.25893353755f, - 0.47635550422f, 0.84532580931f, 0.90779616657f, - 1.55242808973f, 1.05472570761f, 0.83327335647f); - const mat3x3 QuantizationMatrixEven = mat3x3( - 0.00001253044f, -0.24998746956f, -0.37498825271f, - 0.16668494186f, 0.16668494186f, 0.21876713299f, - 0.86602540579f, 0.86602540579f, 0.81189881793f); - offsetMoments(b_even_q, b_odd_q, -1.0); - b_odd = b_odd_q * QuantizationMatrixOdd; - b_even = b_even_q * QuantizationMatrixEven; -} - -void offsetMoments(inout vec4 b_even, inout vec4 b_odd, float sign) -{ - b_odd += 0.5 * sign; - b_even += vec4(0.972481993925964, 1.0, 0.999179192513328, 0.991778293073131) * sign; -} - -void quantizeMoments(out vec4 b_even_q, out vec4 b_odd_q, vec4 b_even, vec4 b_odd) -{ - const mat4x4 mat_odd = mat4x4(3.48044635732474, -27.5760737514826, 55.1267384344761, -31.5311110403183, - 1.26797185782836, -0.928755808743913, -2.07520453231032, 1.23598848322588, - -2.1671560004294, 6.17950199592966, -0.276515571579297, -4.23583042392097, - 0.974332879165755, -0.443426830933027, -0.360491648368785, 0.310149466050223); - const mat4x4 mat_even = mat4x4(0.280504133158527, -0.757633844606942, 0.392179589334688, -0.887531871812237, - -2.01362265883247, 0.221551373038988, -1.06107954265125, 2.83887201588367, - -7.31010494985321, 13.9855979699139, -0.114305766176437, -7.4361899359832, - -15.8954215629556, 79.6186327084103, -127.457278992502, 63.7349456687829); - b_odd_q = mat_odd * b_odd; - b_even_q = mat_even * b_even; -} - -void offsetAndDequantizeMoments(out vec4 b_even, out vec4 b_odd, vec4 b_even_q, vec4 b_odd_q) -{ - const mat4x4 mat_odd = mat4x4(-0.00482399708502382, -0.423201508674231, 0.0348312382605129, 1.67179208266592, - -0.0233402218644408, -0.832829097046478, 0.0193406040499625, 1.21021509068975, - -0.010888537031885, -0.926393772997063, -0.11723394414779, 0.983723301818275, - -0.0308713357806732, -0.937989172670245, -0.218033377677099, 0.845991731322996); - const mat4x4 mat_even = mat4x4(-0.976220278891035, -0.456139260269401, -0.0504335521016742, 0.000838800390651085, - -1.04828341778299, -0.229726640510149, 0.0259608334616091, -0.00133632693205861, - -1.03115268628604, -0.077844420809897, 0.00443408851014257, -0.0103744938457406, - -0.996038443434636, 0.0175438624416783, -0.0361414253243963, -0.00317839994022725); - offsetMoments(b_even_q, b_odd_q, -1.0); - b_odd = mat_odd * b_odd_q; - b_even = mat_even * b_even_q; -} - -/*! This function reconstructs the transmittance at the given depth from four - normalized power moments and the given zeroth moment.*/ -float computeTransmittanceAtDepthFrom4PowerMoments(float b_0, vec2 b_even, vec2 b_odd, float depth, float bias, float overestimation, vec4 bias_vector) -{ - vec4 b = vec4(b_odd.x, b_even.x, b_odd.y, b_even.y); - // Bias input data to avoid artifacts - b = mix(b, bias_vector, bias); - vec3 z; - z[0] = depth; - - // Compute a Cholesky factorization of the Hankel matrix B storing only non- - // trivial entries or related products - float L21D11=fma(-b[0], b[1], b[2]); - float D11=fma(-b[0], b[0], b[1]); - float InvD11=1.0f/D11; - float L21=L21D11*InvD11; - float SquaredDepthVariance=fma(-b[1], b[1], b[3]); - float D22=fma(-L21D11, L21, SquaredDepthVariance); - - // Obtain a scaled inverse image of bz=(1,z[0],z[0]*z[0])^T - vec3 c=vec3(1.0f, z[0], z[0]*z[0]); - // Forward substitution to solve L*c1=bz - c[1]-=b.x; - c[2]-=b.y+L21*c[1]; - // Scaling to solve D*c2=c1 - c[1]*=InvD11; - c[2]/=D22; - // Backward substitution to solve L^T*c3=c2 - c[1]-=L21*c[2]; - c[0]-=dot(c.yz, b.xy); - // Solve the quadratic equation c[0]+c[1]*z+c[2]*z^2 to obtain solutions - // z[1] and z[2] - float InvC2=1.0f/c[2]; - float p=c[1]*InvC2; - float q=c[0]*InvC2; - float D=(p*p*0.25f)-q; - float r=sqrt(D); - z[1]=-p*0.5f-r; - z[2]=-p*0.5f+r; - // Compute the absorbance by summing the appropriate weights - vec3 polynomial; - vec3 weight_factor = vec3(overestimation, (z[1] < z[0])?1.0f:0.0f, (z[2] < z[0])?1.0f:0.0f); - float f0=weight_factor[0]; - float f1=weight_factor[1]; - float f2=weight_factor[2]; - float f01=(f1-f0)/(z[1]-z[0]); - float f12=(f2-f1)/(z[2]-z[1]); - float f012=(f12-f01)/(z[2]-z[0]); - polynomial[0]=f012; - polynomial[1]=polynomial[0]; - polynomial[0]=f01-polynomial[0]*z[1]; - polynomial[2]=polynomial[1]; - polynomial[1]=polynomial[0]-polynomial[1]*z[0]; - polynomial[0]=f0-polynomial[0]*z[0]; - float absorbance = polynomial[0] + dot(b.xy, polynomial.yz);; - // Turn the normalized absorbance into transmittance - return saturate(exp(-b_0 * absorbance)); -} - -/*! This function reconstructs the transmittance at the given depth from six - normalized power moments and the given zeroth moment.*/ -float computeTransmittanceAtDepthFrom6PowerMoments(float b_0, vec3 b_even, vec3 b_odd, float depth, float bias, float overestimation, float bias_vector[6]) -{ - float b[6] = { b_odd.x, b_even.x, b_odd.y, b_even.y, b_odd.z, b_even.z }; - // Bias input data to avoid artifacts - for (int i = 0; i != 6; ++i) { - b[i] = mix(b[i], bias_vector[i], bias); - } - - vec4 z; - z[0] = depth; - - // Compute a Cholesky factorization of the Hankel matrix B storing only non- - // trivial entries or related products - float InvD11 = 1.0f / fma(-b[0], b[0], b[1]); - float L21D11 = fma(-b[0], b[1], b[2]); - float L21 = L21D11*InvD11; - float D22 = fma(-L21D11, L21, fma(-b[1], b[1], b[3])); - float L31D11 = fma(-b[0], b[2], b[3]); - float L31 = L31D11*InvD11; - float InvD22 = 1.0f / D22; - float L32D22 = fma(-L21D11, L31, fma(-b[1], b[2], b[4])); - float L32 = L32D22*InvD22; - float D33 = fma(-b[2], b[2], b[5]) - dot(vec2(L31D11, L32D22), vec2(L31, L32)); - float InvD33 = 1.0f / D33; - - // Construct the polynomial whose roots have to be points of support of the - // canonical distribution: bz=(1,z[0],z[0]*z[0],z[0]*z[0]*z[0])^T - vec4 c; - c[0] = 1.0f; - c[1] = z[0]; - c[2] = c[1] * z[0]; - c[3] = c[2] * z[0]; - // Forward substitution to solve L*c1=bz - c[1] -= b[0]; - c[2] -= fma(L21, c[1], b[1]); - c[3] -= b[2] + dot(vec2(L31, L32), c.yz); - // Scaling to solve D*c2=c1 - c.yzw *= vec3(InvD11, InvD22, InvD33); - // Backward substitution to solve L^T*c3=c2 - c[2] -= L32*c[3]; - c[1] -= dot(vec2(L21, L31), c.zw); - c[0] -= dot(vec3(b[0], b[1], b[2]), c.yzw); - - // Solve the cubic equation - z.yzw = SolveCubic(c); - - // Compute the absorbance by summing the appropriate weights - vec4 weigth_factor; - weigth_factor[0] = overestimation; - weigth_factor.yzw = vec3(greaterThan(z.yzw, z.xxx)); - // Construct an interpolation polynomial - float f0 = weigth_factor[0]; - float f1 = weigth_factor[1]; - float f2 = weigth_factor[2]; - float f3 = weigth_factor[3]; - float f01 = (f1 - f0) / (z[1] - z[0]); - float f12 = (f2 - f1) / (z[2] - z[1]); - float f23 = (f3 - f2) / (z[3] - z[2]); - float f012 = (f12 - f01) / (z[2] - z[0]); - float f123 = (f23 - f12) / (z[3] - z[1]); - float f0123 = (f123 - f012) / (z[3] - z[0]); - vec4 polynomial; - // f012+f0123 *(z-z2) - polynomial[0] = fma(-f0123, z[2], f012); - polynomial[1] = f0123; - // *(z-z1) +f01 - polynomial[2] = polynomial[1]; - polynomial[1] = fma(polynomial[1], -z[1], polynomial[0]); - polynomial[0] = fma(polynomial[0], -z[1], f01); - // *(z-z0) +f0 - polynomial[3] = polynomial[2]; - polynomial[2] = fma(polynomial[2], -z[0], polynomial[1]); - polynomial[1] = fma(polynomial[1], -z[0], polynomial[0]); - polynomial[0] = fma(polynomial[0], -z[0], f0); - float absorbance = dot(polynomial, vec4 (1.0, b[0], b[1], b[2])); - // Turn the normalized absorbance into transmittance - return saturate(exp(-b_0 * absorbance)); -} - -/*! This function reconstructs the transmittance at the given depth from eight - normalized power moments and the given zeroth moment.*/ -float computeTransmittanceAtDepthFrom8PowerMoments(float b_0, vec4 b_even, vec4 b_odd, float depth, float bias, float overestimation, float bias_vector[8]) -{ - float b[8] = { b_odd.x, b_even.x, b_odd.y, b_even.y, b_odd.z, b_even.z, b_odd.w, b_even.w }; - // Bias input data to avoid artifacts - for (int i = 0; i != 8; ++i) { - b[i] = mix(b[i], bias_vector[i], bias); - } - - float z[5]; - z[0] = depth; - - // Compute a Cholesky factorization of the Hankel matrix B storing only non-trivial entries or related products - float D22 = fma(-b[0], b[0], b[1]); - float InvD22 = 1.0 / D22; - float L32D22 = fma(-b[1], b[0], b[2]); - float L32 = L32D22 * InvD22; - float L42D22 = fma(-b[2], b[0], b[3]); - float L42 = L42D22 * InvD22; - float L52D22 = fma(-b[3], b[0], b[4]); - float L52 = L52D22 * InvD22; - - float D33 = fma(-L32, L32D22, fma(-b[1], b[1], b[3])); - float InvD33 = 1.0 / D33; - float L43D33 = fma(-L42, L32D22, fma(-b[2], b[1], b[4])); - float L43 = L43D33 * InvD33; - float L53D33 = fma(-L52, L32D22, fma(-b[3], b[1], b[5])); - float L53 = L53D33 * InvD33; - - float D44 = fma(-b[2], b[2], b[5]) - dot(vec2(L42, L43), vec2(L42D22, L43D33)); - float InvD44 = 1.0 / D44; - float L54D44 = fma(-b[3], b[2], b[6]) - dot(vec2(L52, L53), vec2(L42D22, L43D33)); - float L54 = L54D44 * InvD44; - - float D55 = fma(-b[3], b[3], b[7]) - dot(vec3(L52, L53, L54), vec3(L52D22, L53D33, L54D44)); - float InvD55 = 1.0 / D55; - - // Construct the polynomial whose roots have to be points of support of the - // Canonical distribution: - // bz = (1,z[0],z[0]^2,z[0]^3,z[0]^4)^T - float c[5]; - c[0] = 1.0; - c[1] = z[0]; - c[2] = c[1] * z[0]; - c[3] = c[2] * z[0]; - c[4] = c[3] * z[0]; - - // Forward substitution to solve L*c1 = bz - c[1] -= b[0]; - c[2] -= fma(L32, c[1], b[1]); - c[3] -= b[2] + dot(vec2(L42, L43), vec2(c[1], c[2])); - c[4] -= b[3] + dot(vec3(L52, L53, L54), vec3(c[1], c[2], c[3])); - - // Scaling to solve D*c2 = c1 - //c = c .*[1, InvD22, InvD33, InvD44, InvD55]; - c[1] *= InvD22; - c[2] *= InvD33; - c[3] *= InvD44; - c[4] *= InvD55; - - // Backward substitution to solve L^T*c3 = c2 - c[3] -= L54 * c[4]; - c[2] -= dot(vec2(L53, L43), vec2(c[4], c[3])); - c[1] -= dot(vec3(L52, L42, L32), vec3(c[4], c[3], c[2])); - c[0] -= dot(vec4(b[3], b[2], b[1], b[0]), vec4(c[4], c[3], c[2], c[1])); - - // Solve the quartic equation - vec4 zz = solveQuarticNeumark(c); - z[1] = zz[0]; - z[2] = zz[1]; - z[3] = zz[2]; - z[4] = zz[3]; - - // Compute the absorbance by summing the appropriate weights - vec4 weigth_factor = vec4(lessThanEqual(vec4(z[1], z[2], z[3], z[4]), z[0].xxxx)); - // Construct an interpolation polynomial - float f0 = overestimation; - float f1 = weigth_factor[0]; - float f2 = weigth_factor[1]; - float f3 = weigth_factor[2]; - float f4 = weigth_factor[3]; - float f01 = (f1 - f0) / (z[1] - z[0]); - float f12 = (f2 - f1) / (z[2] - z[1]); - float f23 = (f3 - f2) / (z[3] - z[2]); - float f34 = (f4 - f3) / (z[4] - z[3]); - float f012 = (f12 - f01) / (z[2] - z[0]); - float f123 = (f23 - f12) / (z[3] - z[1]); - float f234 = (f34 - f23) / (z[4] - z[2]); - float f0123 = (f123 - f012) / (z[3] - z[0]); - float f1234 = (f234 - f123) / (z[4] - z[1]); - float f01234 = (f1234 - f0123) / (z[4] - z[0]); - - float Polynomial_0; - vec4 Polynomial; - // f0123 + f01234 * (z - z3) - Polynomial_0 = fma(-f01234, z[3], f0123); - Polynomial[0] = f01234; - // * (z - z2) + f012 - Polynomial[1] = Polynomial[0]; - Polynomial[0] = fma(-Polynomial[0], z[2], Polynomial_0); - Polynomial_0 = fma(-Polynomial_0, z[2], f012); - // * (z - z1) + f01 - Polynomial[2] = Polynomial[1]; - Polynomial[1] = fma(-Polynomial[1], z[1], Polynomial[0]); - Polynomial[0] = fma(-Polynomial[0], z[1], Polynomial_0); - Polynomial_0 = fma(-Polynomial_0, z[1], f01); - // * (z - z0) + f1 - Polynomial[3] = Polynomial[2]; - Polynomial[2] = fma(-Polynomial[2], z[0], Polynomial[1]); - Polynomial[1] = fma(-Polynomial[1], z[0], Polynomial[0]); - Polynomial[0] = fma(-Polynomial[0], z[0], Polynomial_0); - Polynomial_0 = fma(-Polynomial_0, z[0], f0); - float absorbance = Polynomial_0 + dot(Polynomial, vec4(b[0], b[1], b[2], b[3])); - // Turn the normalized absorbance into transmittance - return saturate(exp(-b_0 * absorbance)); -} diff --git a/common/src/backend/resources/assets/flywheel/flywheel/internal/mboit/moment_oit.glsl b/common/src/backend/resources/assets/flywheel/flywheel/internal/mboit/moment_oit.glsl deleted file mode 100644 index 23ef4ec03..000000000 --- a/common/src/backend/resources/assets/flywheel/flywheel/internal/mboit/moment_oit.glsl +++ /dev/null @@ -1,251 +0,0 @@ -/*! \file - This header provides the functionality to create the vectors of moments and - to blend surfaces together with an appropriately reconstructed - transmittance. It is needed for both additive passes of moment-based OIT. -*/ - -//cbuffer MomentOIT -//{ -// struct { -// vec4 wrapping_zone_parameters; -// float overestimation; -// float moment_bias; -// }MomentOIT; -//}; - -#include "flywheel:internal/mboit/moment_math.glsl" - -const float moment_bias = 0.25; -const float overestimation = 0.25; -const vec4 wrapping_zone_parameters = vec4(0.31415927, 2.984513, 2.7934167, -18.553917); - - -void clip(float a) { - if (a < 0.) { - discard; - } -} - -// jozu: The trigonometric moments and higher order power moments rely on a second render target -// which the java side is not set up to support. Trying to enable them as is will cause compile errors also. -#define NUM_MOMENTS 8 - -#define SINGLE_PRECISION 1 - -#ifdef _FLW_GENERATE_MOMENTS -/*! Generation of moments in case that rasterizer ordered views are used. - This includes the case if moments are stored in 16 bits. */ - -/*! This functions relies on fixed function additive blending to compute the - vector of moments.moment vector. The shader that calls this function must - provide the required render targets.*/ -#if NUM_MOMENTS == 4 -void generateMoments(float depth, float transmittance, out float b_0, out vec4 b) -#elif NUM_MOMENTS == 6 -#if USE_R_RG_RBBA_FOR_MBOIT6 -void generateMoments(float depth, float transmittance, out float b_0, out vec2 b_12, out vec4 b_3456) -#else -void generateMoments(float depth, float transmittance, out float b_0, out vec2 b_12, out vec2 b_34, out vec2 b_56) -#endif -#elif NUM_MOMENTS == 8 -void generateMoments(float depth, float transmittance, out float b_0, out vec4 b_even, out vec4 b_odd) -#endif -{ - transmittance = max(transmittance, 0.000001); - float absorbance = -log(transmittance); - - b_0 = absorbance; - #if TRIGONOMETRIC - float phase = fma(depth, wrapping_zone_parameters.y, wrapping_zone_parameters.y); - vec2 circle_point = vec2(sin(phase), cos(phase)); - - vec2 circle_point_pow2 = Multiply(circle_point, circle_point); - #if NUM_MOMENTS == 4 - b = vec4(circle_point, circle_point_pow2) * absorbance; - #elif NUM_MOMENTS == 6 - b_12 = circle_point * absorbance; - #if USE_R_RG_RBBA_FOR_MBOIT6 - b_3456 = vec4(circle_point_pow2, Multiply(circle_point, circle_point_pow2)) * absorbance; - #else - b_34 = circle_point_pow2 * absorbance; - b_56 = Multiply(circle_point, circle_point_pow2) * absorbance; - #endif - #elif NUM_MOMENTS == 8 - b_even = vec4(circle_point_pow2, Multiply(circle_point_pow2, circle_point_pow2)) * absorbance; - b_odd = vec4(circle_point, Multiply(circle_point, circle_point_pow2)) * absorbance; - #endif - #else - float depth_pow2 = depth * depth; - float depth_pow4 = depth_pow2 * depth_pow2; - #if NUM_MOMENTS == 4 - b = vec4(depth, depth_pow2, depth_pow2 * depth, depth_pow4) * absorbance; - #elif NUM_MOMENTS == 6 - b_12 = vec2(depth, depth_pow2) * absorbance; - #if USE_R_RG_RBBA_FOR_MBOIT6 - b_3456 = vec4(depth_pow2 * depth, depth_pow4, depth_pow4 * depth, depth_pow4 * depth_pow2) * absorbance; - #else - b_34 = vec2(depth_pow2 * depth, depth_pow4) * absorbance; - b_56 = vec2(depth_pow4 * depth, depth_pow4 * depth_pow2) * absorbance; - #endif - #elif NUM_MOMENTS == 8 - float depth_pow6 = depth_pow4 * depth_pow2; - b_even = vec4(depth_pow2, depth_pow4, depth_pow6, depth_pow6 * depth_pow2) * absorbance; - b_odd = vec4(depth, depth_pow2 * depth, depth_pow4 * depth, depth_pow6 * depth) * absorbance; - #endif - #endif -} - -#else//MOMENT_GENERATION is disabled - -layout (binding = 7) uniform sampler2D _flw_zeroth_moment_sampler; -layout (binding = 8) uniform sampler2D _flw_moments0_sampler; -layout (binding = 9) uniform sampler2D _flw_moments1_sampler; - -/*! This function is to be called from the shader that composites the - transparent fragments. It reads the moments and calls the appropriate - function to reconstruct the transmittance at the specified depth.*/ -void resolveMoments(out float transmittance_at_depth, out float total_transmittance, float depth, vec2 sv_pos) -{ - ivec2 idx0 = ivec2(sv_pos); - ivec2 idx1 = idx0; - - transmittance_at_depth = 1; - total_transmittance = 1; - - float b_0 = texelFetch(_flw_zeroth_moment_sampler, idx0, 0).x; - clip(b_0 - 0.00100050033f); - total_transmittance = exp(-b_0); - - #if NUM_MOMENTS == 4 - #if TRIGONOMETRIC - vec4 b_tmp = texelFetch(_flw_moments0_sampler, idx0, 0); - vec2 trig_b[2]; - trig_b[0] = b_tmp.xy; - trig_b[1] = b_tmp.zw; - #if SINGLE_PRECISION - trig_b[0] /= b_0; - trig_b[1] /= b_0; - #else - trig_b[0] = fma(trig_b[0], 2.0, -1.0); - trig_b[1] = fma(trig_b[1], 2.0, -1.0); - #endif - transmittance_at_depth = computeTransmittanceAtDepthFrom2TrigonometricMoments(b_0, trig_b, depth, moment_bias, overestimation, wrapping_zone_parameters); - #else - vec4 b_1234 = texelFetch(_flw_moments0_sampler, idx0, 0).xyzw; - #if SINGLE_PRECISION - vec2 b_even = b_1234.yw; - vec2 b_odd = b_1234.xz; - - b_even /= b_0; - b_odd /= b_0; - - const vec4 bias_vector = vec4(0, 0.375, 0, 0.375); - #else - vec2 b_even_q = b_1234.yw; - vec2 b_odd_q = b_1234.xz; - - // Dequantize the moments - vec2 b_even; - vec2 b_odd; - offsetAndDequantizeMoments(b_even, b_odd, b_even_q, b_odd_q); - const vec4 bias_vector = vec4(0, 0.628, 0, 0.628); - #endif - transmittance_at_depth = computeTransmittanceAtDepthFrom4PowerMoments(b_0, b_even, b_odd, depth, moment_bias, overestimation, bias_vector); - #endif - #elif NUM_MOMENTS == 6 - ivec2 idx2 = idx0; - #if TRIGONOMETRIC - vec2 trig_b[3]; - trig_b[0] = texelFetch(_flw_moments0_sampler, idx0, 0).xy; - #if USE_R_RG_RBBA_FOR_MBOIT6 - vec4 tmp = texelFetch(extra_moments, idx0, 0); - trig_b[1] = tmp.xy; - trig_b[2] = tmp.zw; - #else - trig_b[1] = texelFetch(_flw_moments1_sampler, idx1, 0).xy; - trig_b[2] = texelFetch(_flw_moments0_sampler, idx2, 0).xy; - #endif - #if SINGLE_PRECISION - trig_b[0] /= b_0; - trig_b[1] /= b_0; - trig_b[2] /= b_0; - #else - trig_b[0] = fma(trig_b[0], 2.0, -1.0); - trig_b[1] = fma(trig_b[1], 2.0, -1.0); - trig_b[2] = fma(trig_b[2], 2.0, -1.0); - #endif - transmittance_at_depth = computeTransmittanceAtDepthFrom3TrigonometricMoments(b_0, trig_b, depth, moment_bias, overestimation, wrapping_zone_parameters); - #else - vec2 b_12 = texelFetch(_flw_moments0_sampler, idx0, 0).xy; - #if USE_R_RG_RBBA_FOR_MBOIT6 - vec4 tmp = texelFetch(extra_moments, idx0, 0); - vec2 b_34 = tmp.xy; - vec2 b_56 = tmp.zw; - #else - vec2 b_34 = texelFetch(_flw_moments1_sampler, idx1, 0).xy; - vec2 b_56 = texelFetch(_flw_moments0_sampler, idx2, 0).xy; - #endif - #if SINGLE_PRECISION - vec3 b_even = vec3(b_12.y, b_34.y, b_56.y); - vec3 b_odd = vec3(b_12.x, b_34.x, b_56.x); - - b_even /= b_0; - b_odd /= b_0; - - const float bias_vector[6] = { 0, 0.48, 0, 0.451, 0, 0.45 }; - #else - vec3 b_even_q = vec3(b_12.y, b_34.y, b_56.y); - vec3 b_odd_q = vec3(b_12.x, b_34.x, b_56.x); - // Dequantize b_0 and the other moments - vec3 b_even; - vec3 b_odd; - offsetAndDequantizeMoments(b_even, b_odd, b_even_q, b_odd_q); - - const float bias_vector[6] = { 0, 0.5566, 0, 0.489, 0, 0.47869382 }; - #endif - transmittance_at_depth = computeTransmittanceAtDepthFrom6PowerMoments(b_0, b_even, b_odd, depth, moment_bias, overestimation, bias_vector); - #endif - #elif NUM_MOMENTS == 8 - #if TRIGONOMETRIC - vec4 b_tmp = texelFetch(_flw_moments0_sampler, idx0, 0); - vec4 b_tmp2 = texelFetch(_flw_moments1_sampler, idx1, 0); - #if SINGLE_PRECISION - vec2 trig_b[4] = { - b_tmp2.xy / b_0, - b_tmp.xy / b_0, - b_tmp2.zw / b_0, - b_tmp.zw / b_0 - }; - #else - vec2 trig_b[4] = { - fma(b_tmp2.xy, 2.0, -1.0), - fma(b_tmp.xy, 2.0, -1.0), - fma(b_tmp2.zw, 2.0, -1.0), - fma(b_tmp.zw, 2.0, -1.0) - }; - #endif - transmittance_at_depth = computeTransmittanceAtDepthFrom4TrigonometricMoments(b_0, trig_b, depth, moment_bias, overestimation, wrapping_zone_parameters); - #else - #if SINGLE_PRECISION - vec4 b_even = texelFetch(_flw_moments0_sampler, idx0, 0); - vec4 b_odd = texelFetch(_flw_moments1_sampler, idx1, 0); - - b_even /= b_0; - b_odd /= b_0; - const float bias_vector[8] = { 0, 0.75, 0, 0.67666666666666664, 0, 0.63, 0, 0.60030303030303034 }; - #else - vec4 b_even_q = texelFetch(_flw_moments0_sampler, idx0, 0); - vec4 b_odd_q = texelFetch(_flw_moments1_sampler, idx1, 0); - - // Dequantize the moments - vec4 b_even; - vec4 b_odd; - offsetAndDequantizeMoments(b_even, b_odd, b_even_q, b_odd_q); - const float bias_vector[8] = { 0, 0.42474916387959866, 0, 0.22407802675585284, 0, 0.15369230769230768, 0, 0.12900440529089119 }; - #endif - transmittance_at_depth = computeTransmittanceAtDepthFrom8PowerMoments(b_0, b_even, b_odd, depth, moment_bias, overestimation, bias_vector); - #endif - #endif - -} -#endif diff --git a/common/src/backend/resources/assets/flywheel/flywheel/internal/mboit/trigonometric_moment_math.glsl b/common/src/backend/resources/assets/flywheel/flywheel/internal/mboit/trigonometric_moment_math.glsl deleted file mode 100644 index 30fe6a65e..000000000 --- a/common/src/backend/resources/assets/flywheel/flywheel/internal/mboit/trigonometric_moment_math.glsl +++ /dev/null @@ -1,311 +0,0 @@ -/*! \file - This header provides the utility functions to reconstruct the transmittance - from a given vector of trigonometric moments (2, 3 or 4 trigonometric - moments) at a specified depth.*/ -#include "flywheel:internal/mboit/complex_algebra.glsl" - -/*! This utility function turns a point on the unit circle into a scalar - parameter. It is guaranteed to grow monotonically for (cos(phi),sin(phi)) - with phi in 0 to 2*pi. There are no other guarantees. In particular it is - not an arclength parametrization. If you change this function, you must - also change circleToParameter() in MomentOIT.cpp.*/ -float circleToParameter(vec2 circle_point){ - float result=abs(circle_point.y)-abs(circle_point.x); - result=(circle_point.x<0.0f)?(2.0f-result):result; - return (circle_point.y<0.0f)?(6.0f-result):result; -} - -/*! This utility function returns the appropriate weight factor for a root at - the given location. Both inputs are supposed to be unit vectors. If a - circular arc going counter clockwise from (1.0,0.0) meets root first, it - returns 1.0, otherwise 0.0 or a linear ramp in the wrapping zone.*/ -float getRootWeightFactor(float reference_parameter, float root_parameter, vec4 wrapping_zone_parameters){ - float binary_weight_factor=(root_parameter Date: Fri, 21 Feb 2025 20:33:46 -0800 Subject: [PATCH 05/12] Inline tuning - Inline most defines, I don't plan on changing them - Fix depth passed to REMOVE_SIGNAL block - Use (blindly copied) optimized function in compositing - Make the noise factor a uniform --- .../engine/indirect/IndirectCullingGroup.java | 2 + .../flywheel/flywheel/internal/common.frag | 69 ++++++++--------- .../internal/indirect/oit_composite.frag | 76 +++++++++---------- 3 files changed, 71 insertions(+), 76 deletions(-) diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectCullingGroup.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectCullingGroup.java index 25e38ad11..6f3a52d35 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectCullingGroup.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectCullingGroup.java @@ -221,6 +221,8 @@ public class IndirectCullingGroup { // Don't need to do this unless the program changes. drawProgram.bind(); + + drawProgram.setFloat("_flw_blueNoiseFactor", 0.08f); } MaterialRenderState.setupOit(multiDraw.material); diff --git a/common/src/backend/resources/assets/flywheel/flywheel/internal/common.frag b/common/src/backend/resources/assets/flywheel/flywheel/internal/common.frag index d4715ff3a..c823e617b 100644 --- a/common/src/backend/resources/assets/flywheel/flywheel/internal/common.frag +++ b/common/src/backend/resources/assets/flywheel/flywheel/internal/common.frag @@ -21,19 +21,12 @@ flat in uvec2 _flw_ids; #define TRANSPARENCY_WAVELET_RANK 3 #define TRANSPARENCY_WAVELET_COEFFICIENT_COUNT 16 -#define floatN float -#define all(e) (e) -#define mad fma -#define lerp mix -#define Coefficients_Out vec4[4] -#define Coefficients_In sampler2DArray +#define REMOVE_SIGNAL true layout (binding = 7) uniform sampler2D _flw_depthRange; layout (binding = 8) uniform sampler2DArray _flw_coefficients; -#define REMOVE_SIGNAL true - #ifdef _FLW_DEPTH_RANGE layout (location = 0) out vec2 _flw_depthRange_out; @@ -49,11 +42,11 @@ layout (location = 1) out vec4 _flw_coeffs1; layout (location = 2) out vec4 _flw_coeffs2; layout (location = 3) out vec4 _flw_coeffs3; -void add_to_index(inout Coefficients_Out coefficients, uint index, floatN addend) { +void add_to_index(inout vec4[4] coefficients, uint index, float addend) { coefficients[index >> 2][index & 3u] = addend; } -void add_event_to_wavelets(inout Coefficients_Out coefficients, floatN signal, float depth) +void add_event_to_wavelets(inout vec4[4] coefficients, float signal, float depth) { depth *= float(TRANSPARENCY_WAVELET_COEFFICIENT_COUNT-1) / TRANSPARENCY_WAVELET_COEFFICIENT_COUNT; @@ -68,17 +61,17 @@ void add_event_to_wavelets(inout Coefficients_Out coefficients, floatN signal, f int wavelet_sign = ((index & 1) << 1) - 1; float wavelet_phase = ((index + 1) & 1) * exp2(-power); - floatN addend = mad(mad(-exp2(-power), k, depth), wavelet_sign, wavelet_phase) * exp2(power * 0.5) * signal; + float addend = fma(fma(-exp2(-power), k, depth), wavelet_sign, wavelet_phase) * exp2(power * 0.5) * signal; add_to_index(coefficients, new_index, addend); index = new_index; } - floatN addend = mad(signal, -depth, signal); + float addend = fma(signal, -depth, signal); add_to_index(coefficients, TRANSPARENCY_WAVELET_COEFFICIENT_COUNT - 1, addend); } -void add_transmittance_event_to_wavelets(inout Coefficients_Out coefficients, floatN transmittance, float depth) +void add_transmittance_event_to_wavelets(inout vec4[4] coefficients, float transmittance, float depth) { float absorbance = -log(max(transmittance, 0.00001));// transforming the signal from multiplicative transmittance to additive absorbance add_event_to_wavelets(coefficients, absorbance, depth); @@ -91,25 +84,26 @@ void add_transmittance_event_to_wavelets(inout Coefficients_Out coefficients, fl layout (location = 0) out vec4 _flw_accumulate; -floatN get_coefficients(in Coefficients_In coefficients, uint index) { +float get_coefficients(in sampler2DArray coefficients, uint index) { return texelFetch(coefficients, ivec3(gl_FragCoord.xy, index >> 2), 0)[index & 3u]; } - floatN evaluate_wavelets(in Coefficients_In coefficients, float depth, floatN signal) +float evaluate_wavelets(in sampler2DArray coefficients, float depth, float signal) { - floatN scale_coefficient = get_coefficients(coefficients, TRANSPARENCY_WAVELET_COEFFICIENT_COUNT - 1); - if (all(scale_coefficient == 0)) + float scale_coefficient = get_coefficients(coefficients, TRANSPARENCY_WAVELET_COEFFICIENT_COUNT - 1); + if (scale_coefficient == 0) { return 0; } - if (REMOVE_SIGNAL) - { - floatN scale_coefficient_addend = mad(signal, -depth, signal); - scale_coefficient -= scale_coefficient_addend; - } depth *= float(TRANSPARENCY_WAVELET_COEFFICIENT_COUNT-1) / TRANSPARENCY_WAVELET_COEFFICIENT_COUNT; + if (REMOVE_SIGNAL) + { + float scale_coefficient_addend = fma(signal, -depth, signal); + scale_coefficient -= scale_coefficient_addend; + } + float coefficient_depth = depth * TRANSPARENCY_WAVELET_COEFFICIENT_COUNT; int index_b = clamp(int(floor(coefficient_depth)), 0, TRANSPARENCY_WAVELET_COEFFICIENT_COUNT - 1); bool sample_a = index_b >= 1; @@ -118,8 +112,8 @@ floatN get_coefficients(in Coefficients_In coefficients, uint index) { index_b += TRANSPARENCY_WAVELET_COEFFICIENT_COUNT - 1; index_a += TRANSPARENCY_WAVELET_COEFFICIENT_COUNT - 1; - floatN b = scale_coefficient; -floatN a = sample_a ? scale_coefficient : 0; + float b = scale_coefficient; + float a = sample_a ? scale_coefficient : 0; for (int i = 0; i < (TRANSPARENCY_WAVELET_RANK+1); ++i) { @@ -127,12 +121,12 @@ floatN a = sample_a ? scale_coefficient : 0; int new_index_b = (index_b - 1) >> 1; int wavelet_sign_b = ((index_b & 1) << 1) - 1; - floatN coeff_b = get_coefficients(coefficients, new_index_b); + float coeff_b = get_coefficients(coefficients, new_index_b); if (REMOVE_SIGNAL) { float wavelet_phase_b = ((index_b + 1) & 1) * exp2(-power); float k = float((new_index_b + 1) & ((1 << power) - 1)); - floatN addend = mad(mad(-exp2(-power), k, depth), wavelet_sign_b, wavelet_phase_b) * exp2(power * 0.5) * signal; + float addend = fma(fma(-exp2(-power), k, depth), wavelet_sign_b, wavelet_phase_b) * exp2(power * 0.5) * signal; coeff_b -= addend; } b -= exp2(float(power) * 0.5) * coeff_b * wavelet_sign_b; @@ -142,7 +136,7 @@ floatN a = sample_a ? scale_coefficient : 0; { int new_index_a = (index_a - 1) >> 1; int wavelet_sign_a = ((index_a & 1) << 1) - 1; - floatN coeff_a = (new_index_a == new_index_b) ? coeff_b : get_coefficients(coefficients, new_index_a);// No addend here on purpose, the original signal didn't contribute to this coefficient + float coeff_a = (new_index_a == new_index_b) ? coeff_b : get_coefficients(coefficients, new_index_a);// No addend here on purpose, the original signal didn't contribute to this coefficient a -= exp2(float(power) * 0.5) * coeff_a * wavelet_sign_a; index_a = new_index_a; } @@ -150,12 +144,12 @@ floatN a = sample_a ? scale_coefficient : 0; float t = coefficient_depth >= TRANSPARENCY_WAVELET_COEFFICIENT_COUNT ? 1.0 : fract(coefficient_depth); - return lerp(a, b, t); + return mix(a, b, t); } - floatN evaluate_transmittance_wavelets(in Coefficients_In coefficients, float depth, floatN signal) +float evaluate_transmittance_wavelets(in sampler2DArray coefficients, float depth, float signal) { - floatN absorbance = evaluate_wavelets(coefficients, depth, signal); + float absorbance = evaluate_wavelets(coefficients, depth, signal); return clamp(exp(-absorbance), 0., 1.);// undoing the transformation from absorbance back to transmittance } @@ -201,9 +195,9 @@ float blue() { return mask; } -uniform vec3 _flw_depthAdjust; +uniform float _flw_blueNoiseFactor = 0.08; -float adjust_depth(float normalizedDepth) { +float tented_blue_noise(float normalizedDepth) { float tentIn = abs(normalizedDepth * 2. - 1); float tentIn2 = tentIn * tentIn; @@ -212,7 +206,7 @@ float adjust_depth(float normalizedDepth) { float b = blue(); - return normalizedDepth - b * tent * 0.08; + return b * tent; } float linearize_depth(float d, float zNear, float zFar) { @@ -228,9 +222,10 @@ float depth() { float linearDepth = linear_depth(); vec2 depthRange = texelFetch(_flw_depthRange, ivec2(gl_FragCoord.xy), 0).rg; - float depth = (linearDepth + depthRange.x) / (depthRange.x + depthRange.y); + float delta = depthRange.x + depthRange.y; + float depth = (linearDepth + depthRange.x) / delta; - return adjust_depth(depth); + return depth - tented_blue_noise(depth) * _flw_blueNoiseFactor; } @@ -335,7 +330,7 @@ void _flw_main() { #ifdef _FLW_COLLECT_COEFFS - Coefficients_Out result; + vec4[4] result; result[0] = vec4(0.); result[1] = vec4(0.); result[2] = vec4(0.); @@ -352,7 +347,7 @@ void _flw_main() { #ifdef _FLW_EVALUATE - floatN transmittance = evaluate_transmittance_wavelets(_flw_coefficients, depth(), 1. - color.a); + float transmittance = evaluate_transmittance_wavelets(_flw_coefficients, depth(), 1. - color.a); _flw_accumulate = vec4(color.rgb * color.a, color.a) * transmittance; diff --git a/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/oit_composite.frag b/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/oit_composite.frag index 1fee5f927..b09ed5acd 100644 --- a/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/oit_composite.frag +++ b/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/oit_composite.frag @@ -5,41 +5,15 @@ layout (binding = 1) uniform sampler2D _flw_accumulate; #define TRANSPARENCY_WAVELET_RANK 3 #define TRANSPARENCY_WAVELET_COEFFICIENT_COUNT 16 -#define floatN float -#define all(e) (e) -#define mad fma -#define lerp mix -#define Coefficients_Out vec4[4] -#define Coefficients_In sampler2DArray - -floatN get_coefficients(in Coefficients_In coefficients, uint index) { +float get_coefficients(in sampler2DArray coefficients, uint index) { return texelFetch(coefficients, ivec3(gl_FragCoord.xy, index >> 2), 0)[index & 3u]; } - floatN evaluate_wavelet_index(in Coefficients_In coefficients, int index) +float evaluate_wavelets(in sampler2DArray coefficients, float depth) { - floatN result = 0; - - index += TRANSPARENCY_WAVELET_COEFFICIENT_COUNT - 1; - - for (int i = 0; i < (TRANSPARENCY_WAVELET_RANK+1); ++i) - { - int power = TRANSPARENCY_WAVELET_RANK - i; - int new_index = (index - 1) >> 1; - floatN coeff = get_coefficients(coefficients, new_index); - int wavelet_sign = ((index & 1) << 1) - 1; - result -= exp2(float(power) * 0.5) * coeff * wavelet_sign; - index = new_index; - } - return result; -} - - - floatN evaluate_wavelets(in Coefficients_In coefficients, float depth) -{ - floatN scale_coefficient = get_coefficients(coefficients, TRANSPARENCY_WAVELET_COEFFICIENT_COUNT - 1); - if (all(scale_coefficient == 0)) + float scale_coefficient = get_coefficients(coefficients, TRANSPARENCY_WAVELET_COEFFICIENT_COUNT - 1); + if (scale_coefficient == 0) { return 0; } @@ -47,20 +21,44 @@ floatN get_coefficients(in Coefficients_In coefficients, uint index) { depth *= float(TRANSPARENCY_WAVELET_COEFFICIENT_COUNT-1) / TRANSPARENCY_WAVELET_COEFFICIENT_COUNT; float coefficient_depth = depth * TRANSPARENCY_WAVELET_COEFFICIENT_COUNT; - int index = clamp(int(floor(coefficient_depth)), 0, TRANSPARENCY_WAVELET_COEFFICIENT_COUNT - 1); + int index_b = clamp(int(floor(coefficient_depth)), 0, TRANSPARENCY_WAVELET_COEFFICIENT_COUNT - 1); + bool sample_a = index_b >= 1; + int index_a = sample_a ? (index_b - 1) : index_b; - floatN a = 0; -floatN b = scale_coefficient + evaluate_wavelet_index(coefficients, index); - if (index > 0) { a = scale_coefficient + evaluate_wavelet_index(coefficients, index - 1); } + index_b += TRANSPARENCY_WAVELET_COEFFICIENT_COUNT - 1; + index_a += TRANSPARENCY_WAVELET_COEFFICIENT_COUNT - 1; + + float b = scale_coefficient; + float a = sample_a ? scale_coefficient : 0; + + for (int i = 0; i < (TRANSPARENCY_WAVELET_RANK+1); ++i) + { + int power = TRANSPARENCY_WAVELET_RANK - i; + + int new_index_b = (index_b - 1) >> 1; + int wavelet_sign_b = ((index_b & 1) << 1) - 1; + float coeff_b = get_coefficients(coefficients, new_index_b); + b -= exp2(float(power) * 0.5) * coeff_b * wavelet_sign_b; + index_b = new_index_b; + + if (sample_a) + { + int new_index_a = (index_a - 1) >> 1; + int wavelet_sign_a = ((index_a & 1) << 1) - 1; + float coeff_a = (new_index_a == new_index_b) ? coeff_b : get_coefficients(coefficients, new_index_a); + a -= exp2(float(power) * 0.5) * coeff_a * wavelet_sign_a; + index_a = new_index_a; + } + } float t = coefficient_depth >= TRANSPARENCY_WAVELET_COEFFICIENT_COUNT ? 1.0 : fract(coefficient_depth); - floatN signal = lerp(a, b, t);// You can experiment here with different types of interpolation as well - return signal; + + return mix(a, b, t); } - floatN evaluate_transmittance_wavelets(in Coefficients_In coefficients, float depth) +float evaluate_transmittance_wavelets(in sampler2DArray coefficients, float depth) { - floatN absorbance = evaluate_wavelets(coefficients, depth); + float absorbance = evaluate_wavelets(coefficients, depth); return clamp(exp(-absorbance), 0., 1.);// undoing the transformation from absorbance back to transmittance } @@ -73,7 +71,7 @@ void main() { discard; } - floatN total_transmittance = evaluate_transmittance_wavelets(_flw_coefficients, infinity); + float total_transmittance = evaluate_transmittance_wavelets(_flw_coefficients, infinity); frag = vec4(texel.rgb / texel.a, total_transmittance); } From 8407d206baad49960b40e7ce1283109284b033d6 Mon Sep 17 00:00:00 2001 From: Jozufozu Date: Fri, 21 Feb 2025 22:27:05 -0800 Subject: [PATCH 06/12] Texture this - Add blue noise texture --- .../flywheel/backend/NoiseTextures.java | 43 +++++++ .../flywheel/backend/Samplers.java | 1 + .../engine/indirect/IndirectCullingGroup.java | 2 +- .../engine/indirect/OitFramebuffer.java | 13 ++- .../flywheel/flywheel/internal/common.frag | 110 ++++++------------ .../textures/flywheel/noise/blue/0.png | Bin 0 -> 7115 bytes .../backend/compile/FlwProgramsReloader.java | 2 + .../backend/compile/FlwProgramsReloader.java | 2 + 8 files changed, 97 insertions(+), 76 deletions(-) create mode 100644 common/src/backend/java/dev/engine_room/flywheel/backend/NoiseTextures.java create mode 100644 common/src/backend/resources/assets/flywheel/textures/flywheel/noise/blue/0.png diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/NoiseTextures.java b/common/src/backend/java/dev/engine_room/flywheel/backend/NoiseTextures.java new file mode 100644 index 000000000..157a64ded --- /dev/null +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/NoiseTextures.java @@ -0,0 +1,43 @@ +package dev.engine_room.flywheel.backend; + +import java.io.IOException; + +import org.jetbrains.annotations.UnknownNullability; + +import com.mojang.blaze3d.platform.NativeImage; + +import dev.engine_room.flywheel.api.Flywheel; +import net.minecraft.client.renderer.texture.DynamicTexture; +import net.minecraft.resources.ResourceLocation; +import net.minecraft.server.packs.resources.ResourceManager; + +public class NoiseTextures { + + public static final ResourceLocation NOISE_TEXTURE = Flywheel.rl("textures/flywheel/noise/blue/0.png"); + public static final int NOISE_LAYERS = 16; + + @UnknownNullability + public static DynamicTexture BLUE_NOISE; + + + public static void reload(ResourceManager manager) { + if (BLUE_NOISE != null) { + BLUE_NOISE.close(); + BLUE_NOISE = null; + } + var optional = manager.getResource(NOISE_TEXTURE); + + if (optional.isEmpty()) { + return; + } + + try (var is = optional.get() + .open()) { + var image = NativeImage.read(NativeImage.Format.LUMINANCE, is); + + BLUE_NOISE = new DynamicTexture(image); + } catch (IOException e) { + + } + } +} diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/Samplers.java b/common/src/backend/java/dev/engine_room/flywheel/backend/Samplers.java index 272314894..db27bc218 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/Samplers.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/Samplers.java @@ -13,4 +13,5 @@ public class Samplers { public static final GlTextureUnit DEPTH_RANGE = GlTextureUnit.T7; public static final GlTextureUnit COEFFICIENTS = GlTextureUnit.T8; + public static final GlTextureUnit NOISE = GlTextureUnit.T9; } diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectCullingGroup.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectCullingGroup.java index 6f3a52d35..886f76830 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectCullingGroup.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectCullingGroup.java @@ -222,7 +222,7 @@ public class IndirectCullingGroup { // Don't need to do this unless the program changes. drawProgram.bind(); - drawProgram.setFloat("_flw_blueNoiseFactor", 0.08f); + drawProgram.setFloat("_flw_blueNoiseFactor", 0.07f); } MaterialRenderState.setupOit(multiDraw.material); diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/OitFramebuffer.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/OitFramebuffer.java index 3d50fb76e..23260f106 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/OitFramebuffer.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/OitFramebuffer.java @@ -6,6 +6,7 @@ import org.lwjgl.opengl.GL46; import com.mojang.blaze3d.platform.GlStateManager; import com.mojang.blaze3d.systems.RenderSystem; +import dev.engine_room.flywheel.backend.NoiseTextures; import dev.engine_room.flywheel.backend.Samplers; import dev.engine_room.flywheel.backend.compile.IndirectPrograms; import dev.engine_room.flywheel.backend.gl.GlTextureUnit; @@ -63,6 +64,14 @@ public class OitFramebuffer { Samplers.DEPTH_RANGE.makeActive(); GlStateManager._bindTexture(depthBounds); + Samplers.NOISE.makeActive(); + NoiseTextures.BLUE_NOISE.bind(); + + NoiseTextures.BLUE_NOISE.setFilter(true, false); + GL46.glTextureParameteri(NoiseTextures.BLUE_NOISE.getId(), GL32.GL_TEXTURE_WRAP_S, GL32.GL_REPEAT); + GL46.glTextureParameteri(NoiseTextures.BLUE_NOISE.getId(), GL32.GL_TEXTURE_WRAP_T, GL32.GL_REPEAT); + + GL46.glNamedFramebufferDrawBuffers(fbo, new int[]{GL46.GL_COLOR_ATTACHMENT1, GL46.GL_COLOR_ATTACHMENT2, GL46.GL_COLOR_ATTACHMENT3, GL46.GL_COLOR_ATTACHMENT4}); GL46.glClearNamedFramebufferfv(fbo, GL46.GL_COLOR, 0, new float[]{0, 0, 0, 0}); @@ -85,9 +94,11 @@ public class OitFramebuffer { Samplers.COEFFICIENTS.makeActive(); GlStateManager._bindTexture(0); - GL46.glBindTextureUnit(Samplers.COEFFICIENTS.number, coefficients); + Samplers.NOISE.makeActive(); + NoiseTextures.BLUE_NOISE.bind(); + GL46.glNamedFramebufferDrawBuffers(fbo, new int[]{GL46.GL_COLOR_ATTACHMENT5}); GL46.glClearNamedFramebufferfv(fbo, GL46.GL_COLOR, 0, new float[]{0, 0, 0, 0}); diff --git a/common/src/backend/resources/assets/flywheel/flywheel/internal/common.frag b/common/src/backend/resources/assets/flywheel/flywheel/internal/common.frag index c823e617b..fe476a9c5 100644 --- a/common/src/backend/resources/assets/flywheel/flywheel/internal/common.frag +++ b/common/src/backend/resources/assets/flywheel/flywheel/internal/common.frag @@ -27,6 +27,42 @@ layout (binding = 7) uniform sampler2D _flw_depthRange; layout (binding = 8) uniform sampler2DArray _flw_coefficients; +layout (binding = 9) uniform sampler2D _flw_blueNoise; + + +uniform float _flw_blueNoiseFactor = 0.08; + +float tented_blue_noise(float normalizedDepth) { + + float tentIn = abs(normalizedDepth * 2. - 1); + float tentIn2 = tentIn * tentIn; + float tentIn4 = tentIn2 * tentIn2; + float tent = 1 - (tentIn2 * tentIn4); + + float b = texture(_flw_blueNoise, gl_FragCoord.xy / vec2(64)).r; + + return b * tent; +} + +float linearize_depth(float d, float zNear, float zFar) { + float z_n = 2.0 * d - 1.0; + return 2.0 * zNear * zFar / (zFar + zNear - z_n * (zFar - zNear)); +} + +float linear_depth() { + return linearize_depth(gl_FragCoord.z, _flw_cullData.znear, _flw_cullData.zfar); +} + +float depth() { + float linearDepth = linear_depth(); + + vec2 depthRange = texelFetch(_flw_depthRange, ivec2(gl_FragCoord.xy), 0).rg; + float delta = depthRange.x + depthRange.y; + float depth = (linearDepth + depthRange.x) / delta; + + return depth - tented_blue_noise(depth) * _flw_blueNoiseFactor; +} + #ifdef _FLW_DEPTH_RANGE layout (location = 0) out vec2 _flw_depthRange_out; @@ -155,80 +191,6 @@ float evaluate_transmittance_wavelets(in sampler2DArray coefficients, float dept #endif -// TODO: blue noise texture -uint HilbertIndex(uvec2 p) { - uint i = 0u; - for (uint l = 0x4000u; l > 0u; l >>= 1u) { - uvec2 r = min(p & l, 1u); - - i = (i << 2u) | ((r.x * 3u) ^ r.y); - p = r.y == 0u ? (0x7FFFu * r.x) ^ p.yx : p; - } - return i; -} - -uint ReverseBits(uint x) { - x = ((x & 0xaaaaaaaau) >> 1) | ((x & 0x55555555u) << 1); - x = ((x & 0xccccccccu) >> 2) | ((x & 0x33333333u) << 2); - x = ((x & 0xf0f0f0f0u) >> 4) | ((x & 0x0f0f0f0fu) << 4); - x = ((x & 0xff00ff00u) >> 8) | ((x & 0x00ff00ffu) << 8); - return (x >> 16) | (x << 16); -} - -// from: https://psychopath.io/post/2021_01_30_building_a_better_lk_hash -uint OwenHash(uint x, uint seed) { // seed is any random number - x ^= x * 0x3d20adeau; - x += seed; - x *= (seed >> 16) | 1u; - x ^= x * 0x05526c56u; - x ^= x * 0x53a22864u; - return x; -} - -// https://www.shadertoy.com/view/ssBBW1 -float blue() { - uint m = HilbertIndex(uvec2(gl_FragCoord.xy));// map pixel coords to hilbert curve index - m = OwenHash(ReverseBits(m), 0xe7843fbfu);// owen-scramble hilbert index - m = OwenHash(ReverseBits(m), 0x8d8fb1e0u);// map hilbert index to sobol sequence and owen-scramble - float mask = float(ReverseBits(m)) / 4294967296.0;// convert to float - - return mask; -} - -uniform float _flw_blueNoiseFactor = 0.08; - -float tented_blue_noise(float normalizedDepth) { - - float tentIn = abs(normalizedDepth * 2. - 1); - float tentIn2 = tentIn * tentIn; - float tentIn4 = tentIn2 * tentIn2; - float tent = 1 - (tentIn2 * tentIn4); - - float b = blue(); - - return b * tent; -} - -float linearize_depth(float d, float zNear, float zFar) { - float z_n = 2.0 * d - 1.0; - return 2.0 * zNear * zFar / (zFar + zNear - z_n * (zFar - zNear)); -} - -float linear_depth() { - return linearize_depth(gl_FragCoord.z, _flw_cullData.znear, _flw_cullData.zfar); -} - -float depth() { - float linearDepth = linear_depth(); - - vec2 depthRange = texelFetch(_flw_depthRange, ivec2(gl_FragCoord.xy), 0).rg; - float delta = depthRange.x + depthRange.y; - float depth = (linearDepth + depthRange.x) / delta; - - return depth - tented_blue_noise(depth) * _flw_blueNoiseFactor; -} - - #else out vec4 _flw_outputColor; diff --git a/common/src/backend/resources/assets/flywheel/textures/flywheel/noise/blue/0.png b/common/src/backend/resources/assets/flywheel/textures/flywheel/noise/blue/0.png new file mode 100644 index 0000000000000000000000000000000000000000..d1920c63aab8bcee44df5a8f9f98cdf361b11a80 GIT binary patch literal 7115 zcmV;+8#LsJP)_5bKbV*nrp^yzx@_t$BvCV?zkhCTW-1N+qZA*w%cxT_St90@y8z@tF5+L9CXk@ zF@F5`n03}!6mxkdE>LsK8ySAyD$Fz_uqK#x#!~2 zOD~P7rkX0Y-FDkJ>#Vcl*I$2)C!TmBV|)Mo_haRiSB{-_+9}RBc92Tf@3-=+L40@Jut! z6sMeWN+#-0a4)|2Vr;(o=JCiQkHnpK-Wlhge}3G2^UX2E6jQ{y>#iH8pMHAWefQnD znZTD=Vu^U@p@+hnYpu0bCL*~_HrXUvYq{Zd*Ik#NU2@4KaqF$O#=iURn~4g1_uY5T z^*;an^B6Q}P#kf@5i!p^^TdW5ZWt3zIAMJI?YDXT{`>EbNhX;je){RBc=OFSW6n9} z%y`84^UptL@vJYt-+%u-_xa$14|1=szWOSrpMLs$?x>@ViuU!_UuP9Pbk$W?Wfv_r z%{0@*O*h??4=B!(OD-8bdi026jyWa+dF-*rVuu}e$UxqC=bfzje*5hgUAlD10v>zp zu^GfmFTE6}opxGmx7~JOFPks3%rf!IFTX^mPMtEjYp=aF_n36jN#o>`PmVjL9dTJerQF%qy?Fk`U_JwQF`tCtTOr2_~2z-h1!8 z`1|j_W0h4_$*$-mc&qrTtF9W)KmUB3dFGk1{r20(lTSVw^Uptjtgylgndt1Z&mO~u z4a-0dKm71a?B$nVPQ;BEF(M24`s=S}Q9&LGXPIS|`2PFv6HQAmy>vDQjkefgi}>S@ zKVr7oX3P7>U{IOm*m^14c2bkRi#r8(x9Bl*1k`s-(~g7En+ zyX=zOMGwf*sXg}CBlo~bMHD0iqL^6hB}nw^-Me>E2`%or>#jKW+;d~?wbxEAE21uf zIt|%$(@hgya)8|K-Mh!c6HgpJ{P06O{P4pu-E`CCeTN)!NPP6sN71WSuV^@;%lJlM zbP>#Sz-K$|xMOYzLeD(&Oa?>rBr@%^)25iICSkF;uGt7JKw1LVUw?f*OOWt=uDRxl z8*aEE3o~ZSm{@)F)no3t=l*}UUEgc3y|SZ69(iPT5X#&;-+c4MhaY|z#~pWEb``A_ zSYUy8>ZzwvLlp6Z6HdtWP{aP{Nn_-8%{A9#;fegh3ojfETqX6^TW>|ro;@=u)zR4n z7hEtNc;JB)k#D~FCV98)vdiW*d{UU(Zo4fzW>G;4Ku5=o8<&Zpm`)yj^wHUgd+xa> z`FG)k7iNJ4dF7Q?W>|xTQ*@#ykaxia7o{$s|3l(XE;)_o+SS|Hd==IiHFA?_FUw_36Gt7{Mr#oT>b4owiWRoSWHIrygJ@wQ% zo6rdbrgp^5Ajh+9kL;70VD7NXSS!}Wh?H*FNoqQMC;2hzf7ou zB{qc;SVF`^Fn@oCVV% z)0hbkLWmI!!C%5A4#^Nc9Yi}~N$3>Q0y4JtWK&H?xVl7vYjIB5XUB@>}Uhz;uj9l;V!qT?C%XhDe>omWr(A&Ef| zbm6Yg%@FtqDaLC$166bbx4{=e%?2l)cw#n$g%VvcKpxa64(f=N28oH~)|AR(%@)wh z@mMyLDQp8-Fa%4&R3Zn`Ho|F%s5l-A0N?2Y=uupqZSE^wW$Px)e?U zn@Pcn0MppqmPW zhOCtn*dImgM=wB{=R+e-i%F*mFCjH6KnM$`KSUFvG=zRYi~I;P7BDg;T;_2+&C6F3{hW9X2WW2QmF@FYMJBnKcocnu{KTL8`+xQnZJYT`TZyz`Py z3Z}D$Gl~s$rcU=#dWE4S&Hxg!C+cdH^_?$>D4Hpbd}s@CG{k8HfoyEaCZMW|c!7tM zl)_7hc=2xh(_J(%|Kk;r^1gt_x{uvZde18q-q}>pV2JA+2lD#&@1MhvlbbFQP_9Ez z70S8zfkTc_>e9P(13O#Xj9Z8q%28Co!|^4&|%tRPZCm40_R=@ zf&o_pkYUGoD=^ik4HT9*5=}Y+&Nk8w)0ulf({#p0ogl$fV)%{2tV+>PjC~PJf@(jF z;3i_Sre+aq0!bT8d_+{d8bXH%D8xY*VI`hy)UefzqAKVHouj0>0cI8^kbIs6aRfqS za;-QG*fn zB{~$FU+NoW^qt)k%gF$RFpm98x9f@$w>^{aR?j@NHjrk#${)3@KarZ z>M$xf&PC|HKS+(;gqOGK{9S@p6jkD zE^0wKG}9d@w|~AAwSmbA%q0i1#$rm!(sWW{cxRAMAd{L!JZl*7F2TKQ)!aaoSfNU@ zGVrq|fpcAV+^^t@EKq0!S7-xm_i-fwn-SO>rB_gN(^&%-)M%ha;kQDvbJMDrp@?>A zDvCn2Jy5af9Cf!i6~#4b4H-6~%wWxTL5)b-fJggW6?KnQN>Dd(lbY*}E(#E0YP2S( z+<JUV#> z^2{Ob0de%;MCJ^IwGRG55g4PI{lunGATB9xMF6=Jy4hEA>`8Ob5H*|;2rLvecEi3T z0_sE#p9E(V#cy0TkZ=r(sGI~X;vQZM>I$fvY>9QTLjkclZlW9P2iojOR8fz%uwcni zXdR{%V9X&Dhs$`I1U05Y!pjv5a+F>?oB+zq2Vy071}*n;Bm23Z?Q;mRGzS`Rb;M;` zlGR~VrL4SM#VD%@O?7eF*o#Zv*03$Fs~ld44UwZ2*2Z;6(lO{#BsR+KtmD|M@Cr@+ z9g55eY>vHh8AF#nsJ_7spHaj8lCU2HqlrJliMnV3MdGSA%;-$BXa&kN3Dvtj3Ry2h zbX8TsQ}@`Oz*JqpD(f?|IJ6japog_kaO$sk^n}|syB zN~9H@z0oJA!*A506A-B|IttS44_fOxM{95uR4N;V6k_b2R^O@Nc&RkbSlD$7P)i;S z&V)o~Yy`fl?Fj1#YR<-0kY#6{Ay6QRuSAZOfwDiHLp{&enceyt!i^}niIMP@yGV*l z`5ZFXu$Z}ng3>E<1r$RG6f3%E5E?ZI`osuHjlei87(56^y)(l#oibvAkwgW+ZYZj1 z3PjZ{PMyJ+lPD^kFvgnkK@PuhT`}1nuA>H2IAmFiYe1bLhky|~-Beu95Hba&F;1+I z%<67M2jv!fU(& zb)3RaaAh^D2UT>&M(*>xAOvbnz6>#D0fL2s*0&b4pov){{rZ0i&=4`v5by@%9?3|wYUW&(kO7JMaExqgJwCIO33VC$T50z z0%E7=XaXq^!Yz#6#&MdYkxg*KMb&i%;qGVrYUp9;^gZn@&IsMP{FhR!3lb zL@73Cey}z;N(4pN89k|U2YrwnMDcvh(HP?`(S;DG;6P|2p8954d>TVMp@xOZ2{jFD z)}#^kRTu-DA~qeRq9ChUDzB2(B}#$DVs>ln2TNl zGCUC=fzc80#Yaf0Z;}mRx(1Er3dm7#ZljCfPQdV+E-5ldgF22YwiwWtu0fZx4835F z#%idRSEHtQRfCUrZUX1$IIS>L)%4~lD;b>S6ogXs6^uyfC@$MrpaMiC9n(R9pogLx zr|nHk?BRYzC8+2{z0Db$nE(GQ%N&Fy8ap?zy)WJ(}6wL6{ zaL8slr3;{FT9c6A@t1%IUgf|O#00LR3aDTuX5fVu#8Y4(uIe~x;L%aT4ZbT3Q6zLe zhdO&IvQgSP)-3K4EwcfXKqO~UNX;?HqMM&)8PO}YUi{D>;rH4iuM2PL(a7Y4psT)+7CLU`%{ zYN~oV;yS2<891W}>gWU^R6u$H()0s=sj@;V7>EjlPT;cy6^lOTioJ9e#i+f4f;aBc z1@urL)FHqI9!TXDgjD=aeB1%DJWuc-gHy3n4I}zZpautXGf0Av!1*ceyGb_*6)RID zLlLwHmhG@ZG}0j=gC+uo6mZosMZ$ZXR8V>Y>QIG#+yq6?&3^8;CMu$nMnITD6HR~) z&H!=H~1V(5LFpSk?$Q_`u=@4jCl14@FaWetK^ZJ2-89`z?M|_Bh zpwYz|;91|j6RqMk&EOUVO`=6O4OvEUV&^(7K`X-*+HnS=rofQnzB;>j%VaMIaX{kl znx!FxmIV`Y1v4N)tEO7Zn6H6Q+XM;D6cbnRPhucQ^z1F1YUZFfOpUW!jxX3 zJeOGN0%vibV%ekYi<1Z(Tpgw$1KeFF!t}^GJ_l9ZBx3bW54Y(C$m=ri7nde^qogU~ zV#83)nibqJpU~4U6tPwlfJ%ADdpnw71Kv(4b-=)bzbw=IP01;~4Z%=H>I({s^Ijrl zG-g2{s<8MA8HOK)VYT(WWDO=JLC@JwX9=BihUt%%P;%5W3PY>{5m3i(&(RV>UcU#n zCrz@3y%Z91++!AKPG1y~cUrFw_V`FkRNB0)gCMQa z{;Wf9criQEB^Do1XSb;D8c-TS|RELQZu1m^nK+)Y-O3#v55y7O?0;VfqC?QZq zC#X^|`r*33>cuN$5nCO$2H|qXsNV=d5EM#MQsx1I3eB8IPGX}I3TL(;x(dbi#Rf$* zD1RzTe87T|+5}uEF7?DS0UA3wgzF9|M2X-~YMoh71FR z(H7)zl_=p9*ce|`7{uJ;k0sU5;B5ew;Lf*IUWf6`e4wa=M>hq)StLtz(Fuz9GK&IL zNRpfaqM?Fv3k9~PKei?|D3MUTZ)AHY!!=Mwfl-&n5ogpO*4At$z&KLuO}8LR9{ElXpD4CL$59l2xj-F7Ra|F5 zhJPC+xdY3>RZs*&R1yGDQWVNgThNJ`*S9@_gEo5#%8;bUc&>XAfkM~uRPl71HCkWr z1e8mGI$ePxvEw_mLM2p~4B51Gtj~HC9VNxzOi=vN282j;IRXgk1U}iUer8T1R8vwQ zMNIGxr0`l-z)*51q(cI-F+};CmOu+`E2Pgk3C&PYD3d5;@_7Rqt0caXbx(b_h&JK| zcb%>OP^m9%6h?84zR-z|Vn98ESAI2CzssYh&gB$cbu9+6;OaQ4vpVP?16p223P6$s zOfe;aKJ_9*7j@Ol&%4>H!wJ+VkRT;T@u5bN8Ul^bhrfm_GY2|?u=54VQGWFCtTiM_ zAe|^Rw7waFCZj&8&@fGdLJep(v7u#MRI9 zt&i`%r*J<#(=eLS>eV+@1vLjConl!mM8LS^h>KV33cOedjv7l%VP<`<;F!u;2vxo3 zLm_ZuO}I}VKoLTGZXX>&KY>D$BI2tx6_3D~8#KeZh9~1Knkb~^L5jW20}$hLMT9U~ zLlo(hNl_yeu;Czk!6zNoMHS?)TmWng4~m03^$RP=ai5J1K`e|55JuKWLwLBLbos~I_rcCgN`J5uny`5p>ZF+3Jz53zdAsaAhJLl zCZ2dotaL=6)ExTE96D=1Gy!o=!ik(&*e+D@dV?2On=mB?S&D3|cFjHXg`jFQTT~p# zZ5HR%APyQ%X)0{ScAc;g3|2)eoS=x1#RRCcKEpLiO~Dn^s0umIAxYMRUhczV;$%AD|58EK#v(ku~aP_7buW@gu^{CTL6yOjdV{qN8(UGuSZo7DRAtcPF!i-H7z-BM6}5huUcWGL zpY9Sk$m2@jPB)-TQWTx96Jg>DfdtqTt6>u5P;>?zi69-Zm-T6cdpQstLFZPm)I~#% z$~w%j5;y8JKC3=|r=%cT|3^Svz?nz$e4krGcNL82(3yJ0W?<3=O( z3h~w^7IBO%zZZx=sXA(aBfY3!a?u&<8j5sLu;_uqbV0EVKK8dBC|gG|Y?_|XB?uvE zG=*U5WPK~FiPk32VpDu0x1vH`{Th`{)Q_tO?|*0z+Da`c$VvbJ002ovPDHLkV1haH B=@9?` literal 0 HcmV?d00001 diff --git a/fabric/src/backend/java/dev/engine_room/flywheel/backend/compile/FlwProgramsReloader.java b/fabric/src/backend/java/dev/engine_room/flywheel/backend/compile/FlwProgramsReloader.java index 1422ac19c..709e72465 100644 --- a/fabric/src/backend/java/dev/engine_room/flywheel/backend/compile/FlwProgramsReloader.java +++ b/fabric/src/backend/java/dev/engine_room/flywheel/backend/compile/FlwProgramsReloader.java @@ -1,6 +1,7 @@ package dev.engine_room.flywheel.backend.compile; import dev.engine_room.flywheel.api.Flywheel; +import dev.engine_room.flywheel.backend.NoiseTextures; import net.fabricmc.fabric.api.resource.SimpleSynchronousResourceReloadListener; import net.minecraft.resources.ResourceLocation; import net.minecraft.server.packs.resources.ResourceManager; @@ -16,6 +17,7 @@ public final class FlwProgramsReloader implements SimpleSynchronousResourceReloa @Override public void onResourceManagerReload(ResourceManager manager) { FlwPrograms.reload(manager); + NoiseTextures.reload(manager); } @Override diff --git a/forge/src/backend/java/dev/engine_room/flywheel/backend/compile/FlwProgramsReloader.java b/forge/src/backend/java/dev/engine_room/flywheel/backend/compile/FlwProgramsReloader.java index ada440f24..e3350f108 100644 --- a/forge/src/backend/java/dev/engine_room/flywheel/backend/compile/FlwProgramsReloader.java +++ b/forge/src/backend/java/dev/engine_room/flywheel/backend/compile/FlwProgramsReloader.java @@ -1,5 +1,6 @@ package dev.engine_room.flywheel.backend.compile; +import dev.engine_room.flywheel.backend.NoiseTextures; import net.minecraft.server.packs.resources.ResourceManager; import net.minecraft.server.packs.resources.ResourceManagerReloadListener; @@ -12,5 +13,6 @@ public final class FlwProgramsReloader implements ResourceManagerReloadListener @Override public void onResourceManagerReload(ResourceManager manager) { FlwPrograms.reload(manager); + NoiseTextures.reload(manager); } } From fe55693cbb488e05e88b20018a0a5eb96c8f26b9 Mon Sep 17 00:00:00 2001 From: Jozufozu Date: Sat, 22 Feb 2025 18:43:30 -0800 Subject: [PATCH 07/12] Cleanups and depth - Centralized wavelet code and roughly document - Add a full screen pass to render the depth at which transmittance falls to zero --- .../backend/compile/IndirectPrograms.java | 6 + .../engine/indirect/IndirectDrawManager.java | 4 +- .../engine/indirect/OitFramebuffer.java | 48 ++++- .../flywheel/flywheel/internal/common.frag | 122 +----------- .../flywheel/flywheel/internal/depth.glsl | 9 + .../internal/indirect/oit_composite.frag | 65 +----- .../flywheel/internal/indirect/oit_depth.frag | 56 ++++++ .../flywheel/flywheel/internal/wavelet.glsl | 186 ++++++++++++++++++ 8 files changed, 307 insertions(+), 189 deletions(-) create mode 100644 common/src/backend/resources/assets/flywheel/flywheel/internal/depth.glsl create mode 100644 common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/oit_depth.frag create mode 100644 common/src/backend/resources/assets/flywheel/flywheel/internal/wavelet.glsl diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/compile/IndirectPrograms.java b/common/src/backend/java/dev/engine_room/flywheel/backend/compile/IndirectPrograms.java index cc985b278..d69c8e2e0 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/compile/IndirectPrograms.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/compile/IndirectPrograms.java @@ -34,6 +34,7 @@ public class IndirectPrograms extends AtomicReferenceCounted { private static final ResourceLocation FULLSCREEN = Flywheel.rl("internal/indirect/fullscreen.vert"); private static final ResourceLocation OIT_COMPOSITE = Flywheel.rl("internal/indirect/oit_composite.frag"); + private static final ResourceLocation OIT_DEPTH = Flywheel.rl("internal/indirect/oit_depth.frag"); private static final Compile> CULL = new Compile<>(); private static final Compile UTIL = new Compile<>(); @@ -138,6 +139,7 @@ public class IndirectPrograms extends AtomicReferenceCounted { .link(UTIL.shader(GlCompat.MAX_GLSL_VERSION, ShaderType.FRAGMENT) .nameMapper(rl -> "fullscreen/" + ResourceUtil.toDebugFileNameNoExtension(rl)) .withResource(s -> s)) + .postLink((key, program) -> Uniforms.setUniformBlockBindings(program)) .harness("fullscreen", sources); } @@ -192,6 +194,10 @@ public class IndirectPrograms extends AtomicReferenceCounted { return fullscreen.get(OIT_COMPOSITE); } + public GlProgram getOitDepthProgram() { + return fullscreen.get(OIT_DEPTH); + } + @Override protected void _delete() { pipeline.delete(); diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectDrawManager.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectDrawManager.java index 09b5b3201..4587cbdad 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectDrawManager.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectDrawManager.java @@ -158,9 +158,9 @@ public class IndirectDrawManager extends DrawManager> { group.submitTransparent(PipelineCompiler.OitMode.GENERATE_COEFFICIENTS); } - // wboitFrameBuffer.adjustBackgroundForTotalTransmittance(); + wboitFrameBuffer.renderDepth(); - // vertexArray.bindForDraw(); + vertexArray.bindForDraw(); wboitFrameBuffer.shade(); diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/OitFramebuffer.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/OitFramebuffer.java index 23260f106..281e3e22b 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/OitFramebuffer.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/OitFramebuffer.java @@ -14,13 +14,13 @@ import net.minecraft.client.Minecraft; public class OitFramebuffer { - public final int fbo; private final IndirectPrograms programs; private final int vao; - public int depthBounds; - public int coefficients; - public int accumulate; + public int fbo = -1; + public int depthBounds = -1; + public int coefficients = -1; + public int accumulate = -1; private int lastWidth = -1; private int lastHeight = -1; @@ -39,6 +39,7 @@ public class OitFramebuffer { // No depth writes, but we'll still use the depth test RenderSystem.depthMask(false); + RenderSystem.colorMask(true, true, true, true); RenderSystem.enableBlend(); RenderSystem.blendFunc(GlStateManager.SourceFactor.ONE, GlStateManager.DestFactor.ONE); RenderSystem.blendEquation(GL46.GL_MAX); @@ -57,6 +58,7 @@ public class OitFramebuffer { public void renderTransmittance() { // No depth writes, but we'll still use the depth test RenderSystem.depthMask(false); + RenderSystem.colorMask(true, true, true, true); RenderSystem.enableBlend(); RenderSystem.blendFunc(GlStateManager.SourceFactor.ONE, GlStateManager.DestFactor.ONE); RenderSystem.blendEquation(GL46.GL_FUNC_ADD); @@ -85,6 +87,7 @@ public class OitFramebuffer { public void shade() { // No depth writes, but we'll still use the depth test RenderSystem.depthMask(false); + RenderSystem.colorMask(true, true, true, true); RenderSystem.enableBlend(); RenderSystem.blendFunc(GlStateManager.SourceFactor.ONE, GlStateManager.DestFactor.ONE); RenderSystem.blendEquation(GL46.GL_FUNC_ADD); @@ -106,9 +109,34 @@ public class OitFramebuffer { GlStateManager._glBindFramebuffer(GL46.GL_FRAMEBUFFER, fbo); } + public void renderDepth() { + // No depth writes, but we'll still use the depth test + RenderSystem.depthMask(true); + RenderSystem.colorMask(false, false, false, false); + RenderSystem.disableBlend(); + + Samplers.COEFFICIENTS.makeActive(); + GlStateManager._bindTexture(0); + GL46.glBindTextureUnit(0, coefficients); + + Samplers.DEPTH_RANGE.makeActive(); + GlStateManager._bindTexture(depthBounds); + + GL46.glNamedFramebufferDrawBuffers(fbo, new int[]{}); + + programs.getOitDepthProgram() + .bind(); + + // Empty VAO, the actual full screen triangle is generated in the vertex shader + GlStateManager._glBindVertexArray(vao); + + GL46.glDrawArrays(GL46.GL_TRIANGLES, 0, 3); + } + public void composite() { // No depth writes, but we'll still use the depth test RenderSystem.depthMask(false); + RenderSystem.colorMask(true, true, true, true); RenderSystem.enableBlend(); RenderSystem.blendFunc(GlStateManager.SourceFactor.ONE_MINUS_SRC_ALPHA, GlStateManager.DestFactor.SRC_ALPHA); RenderSystem.blendEquation(GL46.GL_FUNC_ADD); @@ -141,9 +169,15 @@ public class OitFramebuffer { } private void deleteTextures() { - GL46.glDeleteTextures(depthBounds); - GL46.glDeleteTextures(coefficients); - GL46.glDeleteTextures(accumulate); + if (depthBounds != -1) { + GL46.glDeleteTextures(depthBounds); + } + if (coefficients != -1) { + GL46.glDeleteTextures(coefficients); + } + if (accumulate != -1) { + GL46.glDeleteTextures(accumulate); + } } private void createTextures(int width, int height) { diff --git a/common/src/backend/resources/assets/flywheel/flywheel/internal/common.frag b/common/src/backend/resources/assets/flywheel/flywheel/internal/common.frag index fe476a9c5..aee865829 100644 --- a/common/src/backend/resources/assets/flywheel/flywheel/internal/common.frag +++ b/common/src/backend/resources/assets/flywheel/flywheel/internal/common.frag @@ -1,6 +1,8 @@ #include "flywheel:internal/packed_material.glsl" #include "flywheel:internal/diffuse.glsl" #include "flywheel:internal/colorizer.glsl" +#include "flywheel:internal/wavelet.glsl" +#include "flywheel:internal/depth.glsl" // optimize discard usage #if defined(GL_ARB_conservative_depth) && defined(_FLW_USE_DISCARD) @@ -19,10 +21,6 @@ flat in uvec2 _flw_ids; #ifdef _FLW_OIT -#define TRANSPARENCY_WAVELET_RANK 3 -#define TRANSPARENCY_WAVELET_COEFFICIENT_COUNT 16 -#define REMOVE_SIGNAL true - layout (binding = 7) uniform sampler2D _flw_depthRange; layout (binding = 8) uniform sampler2DArray _flw_coefficients; @@ -44,11 +42,6 @@ float tented_blue_noise(float normalizedDepth) { return b * tent; } -float linearize_depth(float d, float zNear, float zFar) { - float z_n = 2.0 * d - 1.0; - return 2.0 * zNear * zFar / (zFar + zNear - z_n * (zFar - zNear)); -} - float linear_depth() { return linearize_depth(gl_FragCoord.z, _flw_cullData.znear, _flw_cullData.zfar); } @@ -69,126 +62,19 @@ layout (location = 0) out vec2 _flw_depthRange_out; #endif - #ifdef _FLW_COLLECT_COEFFS - layout (location = 0) out vec4 _flw_coeffs0; layout (location = 1) out vec4 _flw_coeffs1; layout (location = 2) out vec4 _flw_coeffs2; layout (location = 3) out vec4 _flw_coeffs3; -void add_to_index(inout vec4[4] coefficients, uint index, float addend) { - coefficients[index >> 2][index & 3u] = addend; -} - -void add_event_to_wavelets(inout vec4[4] coefficients, float signal, float depth) -{ - depth *= float(TRANSPARENCY_WAVELET_COEFFICIENT_COUNT-1) / TRANSPARENCY_WAVELET_COEFFICIENT_COUNT; - - int index = clamp(int(floor(depth * TRANSPARENCY_WAVELET_COEFFICIENT_COUNT)), 0, TRANSPARENCY_WAVELET_COEFFICIENT_COUNT - 1); - index += TRANSPARENCY_WAVELET_COEFFICIENT_COUNT - 1; - - for (int i = 0; i < (TRANSPARENCY_WAVELET_RANK+1); ++i) - { - int power = TRANSPARENCY_WAVELET_RANK - i; - int new_index = (index - 1) >> 1; - float k = float((new_index + 1) & ((1 << power) - 1)); - - int wavelet_sign = ((index & 1) << 1) - 1; - float wavelet_phase = ((index + 1) & 1) * exp2(-power); - float addend = fma(fma(-exp2(-power), k, depth), wavelet_sign, wavelet_phase) * exp2(power * 0.5) * signal; - add_to_index(coefficients, new_index, addend); - - index = new_index; - } - - float addend = fma(signal, -depth, signal); - add_to_index(coefficients, TRANSPARENCY_WAVELET_COEFFICIENT_COUNT - 1, addend); -} - -void add_transmittance_event_to_wavelets(inout vec4[4] coefficients, float transmittance, float depth) -{ - float absorbance = -log(max(transmittance, 0.00001));// transforming the signal from multiplicative transmittance to additive absorbance - add_event_to_wavelets(coefficients, absorbance, depth); -} - #endif #ifdef _FLW_EVALUATE layout (location = 0) out vec4 _flw_accumulate; - -float get_coefficients(in sampler2DArray coefficients, uint index) { - return texelFetch(coefficients, ivec3(gl_FragCoord.xy, index >> 2), 0)[index & 3u]; -} - -float evaluate_wavelets(in sampler2DArray coefficients, float depth, float signal) -{ - float scale_coefficient = get_coefficients(coefficients, TRANSPARENCY_WAVELET_COEFFICIENT_COUNT - 1); - if (scale_coefficient == 0) - { - return 0; - } - - depth *= float(TRANSPARENCY_WAVELET_COEFFICIENT_COUNT-1) / TRANSPARENCY_WAVELET_COEFFICIENT_COUNT; - - if (REMOVE_SIGNAL) - { - float scale_coefficient_addend = fma(signal, -depth, signal); - scale_coefficient -= scale_coefficient_addend; - } - - float coefficient_depth = depth * TRANSPARENCY_WAVELET_COEFFICIENT_COUNT; - int index_b = clamp(int(floor(coefficient_depth)), 0, TRANSPARENCY_WAVELET_COEFFICIENT_COUNT - 1); - bool sample_a = index_b >= 1; - int index_a = sample_a ? (index_b - 1) : index_b; - - index_b += TRANSPARENCY_WAVELET_COEFFICIENT_COUNT - 1; - index_a += TRANSPARENCY_WAVELET_COEFFICIENT_COUNT - 1; - - float b = scale_coefficient; - float a = sample_a ? scale_coefficient : 0; - - for (int i = 0; i < (TRANSPARENCY_WAVELET_RANK+1); ++i) - { - int power = TRANSPARENCY_WAVELET_RANK - i; - - int new_index_b = (index_b - 1) >> 1; - int wavelet_sign_b = ((index_b & 1) << 1) - 1; - float coeff_b = get_coefficients(coefficients, new_index_b); - if (REMOVE_SIGNAL) - { - float wavelet_phase_b = ((index_b + 1) & 1) * exp2(-power); - float k = float((new_index_b + 1) & ((1 << power) - 1)); - float addend = fma(fma(-exp2(-power), k, depth), wavelet_sign_b, wavelet_phase_b) * exp2(power * 0.5) * signal; - coeff_b -= addend; - } - b -= exp2(float(power) * 0.5) * coeff_b * wavelet_sign_b; - index_b = new_index_b; - - if (sample_a) - { - int new_index_a = (index_a - 1) >> 1; - int wavelet_sign_a = ((index_a & 1) << 1) - 1; - float coeff_a = (new_index_a == new_index_b) ? coeff_b : get_coefficients(coefficients, new_index_a);// No addend here on purpose, the original signal didn't contribute to this coefficient - a -= exp2(float(power) * 0.5) * coeff_a * wavelet_sign_a; - index_a = new_index_a; - } - } - - float t = coefficient_depth >= TRANSPARENCY_WAVELET_COEFFICIENT_COUNT ? 1.0 : fract(coefficient_depth); - - return mix(a, b, t); -} - -float evaluate_transmittance_wavelets(in sampler2DArray coefficients, float depth, float signal) -{ - float absorbance = evaluate_wavelets(coefficients, depth, signal); - return clamp(exp(-absorbance), 0., 1.);// undoing the transformation from absorbance back to transmittance -} - #endif #else @@ -298,7 +184,7 @@ void _flw_main() { result[2] = vec4(0.); result[3] = vec4(0.); - add_transmittance_event_to_wavelets(result, 1. - color.a, depth()); + add_transmittance(result, 1. - color.a, depth()); _flw_coeffs0 = result[0]; _flw_coeffs1 = result[1]; @@ -309,7 +195,7 @@ void _flw_main() { #ifdef _FLW_EVALUATE - float transmittance = evaluate_transmittance_wavelets(_flw_coefficients, depth(), 1. - color.a); + float transmittance = signal_corrected_transmittance(_flw_coefficients, depth(), 1. - color.a); _flw_accumulate = vec4(color.rgb * color.a, color.a) * transmittance; diff --git a/common/src/backend/resources/assets/flywheel/flywheel/internal/depth.glsl b/common/src/backend/resources/assets/flywheel/flywheel/internal/depth.glsl new file mode 100644 index 000000000..7a63c4fba --- /dev/null +++ b/common/src/backend/resources/assets/flywheel/flywheel/internal/depth.glsl @@ -0,0 +1,9 @@ +float linearize_depth(float d, float zNear, float zFar) { + float z_n = 2.0 * d - 1.0; + return 2.0 * zNear * zFar / (zFar + zNear - z_n * (zFar - zNear)); +} + +float delinearize_depth(float linearDepth, float zNear, float zFar) { + float z_n = (2.0 * zNear * zFar / linearDepth) - (zFar + zNear); + return 0.5 * (z_n / (zNear - zFar) + 1.0); +} diff --git a/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/oit_composite.frag b/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/oit_composite.frag index b09ed5acd..733e2e5ae 100644 --- a/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/oit_composite.frag +++ b/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/oit_composite.frag @@ -1,69 +1,10 @@ +#include "flywheel:internal/wavelet.glsl" + layout (location = 0) out vec4 frag; layout (binding = 0) uniform sampler2DArray _flw_coefficients; layout (binding = 1) uniform sampler2D _flw_accumulate; -#define TRANSPARENCY_WAVELET_RANK 3 -#define TRANSPARENCY_WAVELET_COEFFICIENT_COUNT 16 - -float get_coefficients(in sampler2DArray coefficients, uint index) { - return texelFetch(coefficients, ivec3(gl_FragCoord.xy, index >> 2), 0)[index & 3u]; -} - -float evaluate_wavelets(in sampler2DArray coefficients, float depth) -{ - float scale_coefficient = get_coefficients(coefficients, TRANSPARENCY_WAVELET_COEFFICIENT_COUNT - 1); - if (scale_coefficient == 0) - { - return 0; - } - - depth *= float(TRANSPARENCY_WAVELET_COEFFICIENT_COUNT-1) / TRANSPARENCY_WAVELET_COEFFICIENT_COUNT; - - float coefficient_depth = depth * TRANSPARENCY_WAVELET_COEFFICIENT_COUNT; - int index_b = clamp(int(floor(coefficient_depth)), 0, TRANSPARENCY_WAVELET_COEFFICIENT_COUNT - 1); - bool sample_a = index_b >= 1; - int index_a = sample_a ? (index_b - 1) : index_b; - - index_b += TRANSPARENCY_WAVELET_COEFFICIENT_COUNT - 1; - index_a += TRANSPARENCY_WAVELET_COEFFICIENT_COUNT - 1; - - float b = scale_coefficient; - float a = sample_a ? scale_coefficient : 0; - - for (int i = 0; i < (TRANSPARENCY_WAVELET_RANK+1); ++i) - { - int power = TRANSPARENCY_WAVELET_RANK - i; - - int new_index_b = (index_b - 1) >> 1; - int wavelet_sign_b = ((index_b & 1) << 1) - 1; - float coeff_b = get_coefficients(coefficients, new_index_b); - b -= exp2(float(power) * 0.5) * coeff_b * wavelet_sign_b; - index_b = new_index_b; - - if (sample_a) - { - int new_index_a = (index_a - 1) >> 1; - int wavelet_sign_a = ((index_a & 1) << 1) - 1; - float coeff_a = (new_index_a == new_index_b) ? coeff_b : get_coefficients(coefficients, new_index_a); - a -= exp2(float(power) * 0.5) * coeff_a * wavelet_sign_a; - index_a = new_index_a; - } - } - - float t = coefficient_depth >= TRANSPARENCY_WAVELET_COEFFICIENT_COUNT ? 1.0 : fract(coefficient_depth); - - return mix(a, b, t); -} - -float evaluate_transmittance_wavelets(in sampler2DArray coefficients, float depth) -{ - float absorbance = evaluate_wavelets(coefficients, depth); - return clamp(exp(-absorbance), 0., 1.);// undoing the transformation from absorbance back to transmittance -} - -const float infinity = 1. / 0.; - void main() { vec4 texel = texelFetch(_flw_accumulate, ivec2(gl_FragCoord.xy), 0); @@ -71,7 +12,7 @@ void main() { discard; } - float total_transmittance = evaluate_transmittance_wavelets(_flw_coefficients, infinity); + float total_transmittance = total_transmittance(_flw_coefficients); frag = vec4(texel.rgb / texel.a, total_transmittance); } diff --git a/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/oit_depth.frag b/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/oit_depth.frag new file mode 100644 index 000000000..1f127b445 --- /dev/null +++ b/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/oit_depth.frag @@ -0,0 +1,56 @@ +#include "flywheel:internal/uniforms/frame.glsl" +#include "flywheel:internal/wavelet.glsl" +#include "flywheel:internal/depth.glsl" + +layout (binding = 7) uniform sampler2D _flw_depthRange; + +layout (binding = 8) uniform sampler2DArray _flw_coefficients; + +float eye_depth_from_normalized_transparency_depth(float tDepth) { + vec2 depthRange = texelFetch(_flw_depthRange, ivec2(gl_FragCoord.xy), 0).rg; + + float delta = depthRange.x + depthRange.y; + + return tDepth * delta - depthRange.x; +} + +void main() { + float threshold = 0.0001; + + // + // If transmittance an infinite depth is above the threshold, it doesn't ever become + // zero, so we can bail out. + // + float transmittance_at_far_depth = total_transmittance(_flw_coefficients); + if (transmittance_at_far_depth > threshold) { + discard; + } + + float normalized_depth_at_zero_transmittance = 1.0; + float sample_depth = 0.5; + float delta = 0.25; + + // + // Quick & Dirty way to binary search through the transmittance function + // looking for a value that's below the threshold. + // + int steps = 6; + for (int i = 0; i < steps; ++i) { + float transmittance = transmittance(_flw_coefficients, sample_depth); + if (transmittance <= threshold) { + normalized_depth_at_zero_transmittance = sample_depth; + sample_depth -= delta; + } else { + sample_depth += delta; + } + delta *= 0.5; + } + + // + // Searching inside the transparency depth bounds, so have to transform that to + // a world-space linear-depth and that into a device depth we can output into + // the currently bound depth buffer. + // + float eyeDepth = eye_depth_from_normalized_transparency_depth(normalized_depth_at_zero_transmittance); + gl_FragDepth = delinearize_depth(eyeDepth, _flw_cullData.znear, _flw_cullData.zfar); +} diff --git a/common/src/backend/resources/assets/flywheel/flywheel/internal/wavelet.glsl b/common/src/backend/resources/assets/flywheel/flywheel/internal/wavelet.glsl new file mode 100644 index 000000000..6691e3d16 --- /dev/null +++ b/common/src/backend/resources/assets/flywheel/flywheel/internal/wavelet.glsl @@ -0,0 +1,186 @@ +#define TRANSPARENCY_WAVELET_RANK 3 +#define TRANSPARENCY_WAVELET_COEFFICIENT_COUNT 16 + + +// ------------------------------------------------------------------------- +// WRITING +// ------------------------------------------------------------------------- + +void add_to_index(inout vec4[4] coefficients, uint index, float addend) { + coefficients[index >> 2][index & 3u] = addend; +} + +void add_absorbance(inout vec4[4] coefficients, float signal, float depth) { + depth *= float(TRANSPARENCY_WAVELET_COEFFICIENT_COUNT-1) / TRANSPARENCY_WAVELET_COEFFICIENT_COUNT; + + int index = clamp(int(floor(depth * TRANSPARENCY_WAVELET_COEFFICIENT_COUNT)), 0, TRANSPARENCY_WAVELET_COEFFICIENT_COUNT - 1); + index += TRANSPARENCY_WAVELET_COEFFICIENT_COUNT - 1; + + for (int i = 0; i < (TRANSPARENCY_WAVELET_RANK+1); ++i) { + int power = TRANSPARENCY_WAVELET_RANK - i; + int new_index = (index - 1) >> 1; + float k = float((new_index + 1) & ((1 << power) - 1)); + + int wavelet_sign = ((index & 1) << 1) - 1; + float wavelet_phase = ((index + 1) & 1) * exp2(-power); + float addend = fma(fma(-exp2(-power), k, depth), wavelet_sign, wavelet_phase) * exp2(power * 0.5) * signal; + add_to_index(coefficients, new_index, addend); + + index = new_index; + } + + float addend = fma(signal, -depth, signal); + add_to_index(coefficients, TRANSPARENCY_WAVELET_COEFFICIENT_COUNT - 1, addend); +} + +void add_transmittance(inout vec4[4] coefficients, float transmittance, float depth) { + float absorbance = -log(max(transmittance, 0.00001));// transforming the signal from multiplicative transmittance to additive absorbance + add_absorbance(coefficients, absorbance, depth); +} + +// ------------------------------------------------------------------------- +// READING +// ------------------------------------------------------------------------- + +// TODO: maybe we could reduce the number of texel fetches below? +float get_coefficients(in sampler2DArray coefficients, uint index) { + return texelFetch(coefficients, ivec3(gl_FragCoord.xy, index >> 2), 0)[index & 3u]; +} + +/// Compute the total absorbance, as if at infinite depth. +float total_absorbance(in sampler2DArray coefficients) { + float scale_coefficient = get_coefficients(coefficients, TRANSPARENCY_WAVELET_COEFFICIENT_COUNT - 1); + if (scale_coefficient == 0) { + return 0; + } + + int index_b = TRANSPARENCY_WAVELET_COEFFICIENT_COUNT - 1; + + index_b += TRANSPARENCY_WAVELET_COEFFICIENT_COUNT - 1; + + float b = scale_coefficient; + + for (int i = 0; i < (TRANSPARENCY_WAVELET_RANK+1); ++i) { + int power = TRANSPARENCY_WAVELET_RANK - i; + + int new_index_b = (index_b - 1) >> 1; + int wavelet_sign_b = ((index_b & 1) << 1) - 1; + float coeff_b = get_coefficients(coefficients, new_index_b); + b -= exp2(float(power) * 0.5) * coeff_b * wavelet_sign_b; + index_b = new_index_b; + } + + return b; +} + +/// Compute the absorbance at a given normalized depth. +float absorbance(in sampler2DArray coefficients, float depth) { + float scale_coefficient = get_coefficients(coefficients, TRANSPARENCY_WAVELET_COEFFICIENT_COUNT - 1); + if (scale_coefficient == 0) { + return 0; + } + + depth *= float(TRANSPARENCY_WAVELET_COEFFICIENT_COUNT-1) / TRANSPARENCY_WAVELET_COEFFICIENT_COUNT; + + float coefficient_depth = depth * TRANSPARENCY_WAVELET_COEFFICIENT_COUNT; + int index_b = clamp(int(floor(coefficient_depth)), 0, TRANSPARENCY_WAVELET_COEFFICIENT_COUNT - 1); + bool sample_a = index_b >= 1; + int index_a = sample_a ? (index_b - 1) : index_b; + + index_b += TRANSPARENCY_WAVELET_COEFFICIENT_COUNT - 1; + index_a += TRANSPARENCY_WAVELET_COEFFICIENT_COUNT - 1; + + float b = scale_coefficient; + float a = sample_a ? scale_coefficient : 0; + + for (int i = 0; i < (TRANSPARENCY_WAVELET_RANK+1); ++i) { + int power = TRANSPARENCY_WAVELET_RANK - i; + + int new_index_b = (index_b - 1) >> 1; + int wavelet_sign_b = ((index_b & 1) << 1) - 1; + float coeff_b = get_coefficients(coefficients, new_index_b); + b -= exp2(float(power) * 0.5) * coeff_b * wavelet_sign_b; + index_b = new_index_b; + + if (sample_a) { + int new_index_a = (index_a - 1) >> 1; + int wavelet_sign_a = ((index_a & 1) << 1) - 1; + float coeff_a = (new_index_a == new_index_b) ? coeff_b : get_coefficients(coefficients, new_index_a); + a -= exp2(float(power) * 0.5) * coeff_a * wavelet_sign_a; + index_a = new_index_a; + } + } + + float t = coefficient_depth >= TRANSPARENCY_WAVELET_COEFFICIENT_COUNT ? 1.0 : fract(coefficient_depth); + + return mix(a, b, t); +} + +/// Compute the absorbance at a given normalized depth, +/// correcting for self-occlusion by undoing the previously recorded absorbance event. +float signal_corrected_absorbance(in sampler2DArray coefficients, float depth, float signal) { + float scale_coefficient = get_coefficients(coefficients, TRANSPARENCY_WAVELET_COEFFICIENT_COUNT - 1); + if (scale_coefficient == 0) { + return 0; + } + + depth *= float(TRANSPARENCY_WAVELET_COEFFICIENT_COUNT-1) / TRANSPARENCY_WAVELET_COEFFICIENT_COUNT; + + float scale_coefficient_addend = fma(signal, -depth, signal); + scale_coefficient -= scale_coefficient_addend; + + float coefficient_depth = depth * TRANSPARENCY_WAVELET_COEFFICIENT_COUNT; + int index_b = clamp(int(floor(coefficient_depth)), 0, TRANSPARENCY_WAVELET_COEFFICIENT_COUNT - 1); + bool sample_a = index_b >= 1; + int index_a = sample_a ? (index_b - 1) : index_b; + + index_b += TRANSPARENCY_WAVELET_COEFFICIENT_COUNT - 1; + index_a += TRANSPARENCY_WAVELET_COEFFICIENT_COUNT - 1; + + float b = scale_coefficient; + float a = sample_a ? scale_coefficient : 0; + + for (int i = 0; i < (TRANSPARENCY_WAVELET_RANK+1); ++i) { + int power = TRANSPARENCY_WAVELET_RANK - i; + + int new_index_b = (index_b - 1) >> 1; + int wavelet_sign_b = ((index_b & 1) << 1) - 1; + float coeff_b = get_coefficients(coefficients, new_index_b); + + float wavelet_phase_b = ((index_b + 1) & 1) * exp2(-power); + float k = float((new_index_b + 1) & ((1 << power) - 1)); + float addend = fma(fma(-exp2(-power), k, depth), wavelet_sign_b, wavelet_phase_b) * exp2(power * 0.5) * signal; + coeff_b -= addend; + + b -= exp2(float(power) * 0.5) * coeff_b * wavelet_sign_b; + index_b = new_index_b; + + if (sample_a) { + int new_index_a = (index_a - 1) >> 1; + int wavelet_sign_a = ((index_a & 1) << 1) - 1; + float coeff_a = (new_index_a == new_index_b) ? coeff_b : get_coefficients(coefficients, new_index_a);// No addend here on purpose, the original signal didn't contribute to this coefficient + a -= exp2(float(power) * 0.5) * coeff_a * wavelet_sign_a; + index_a = new_index_a; + } + } + + float t = coefficient_depth >= TRANSPARENCY_WAVELET_COEFFICIENT_COUNT ? 1.0 : fract(coefficient_depth); + + return mix(a, b, t); +} + +// Helpers below to deal directly in transmittance. + +#define ABSORBANCE_TO_TRANSMITTANCE(a) clamp(exp(-(a)), 0., 1.) + +float total_transmittance(in sampler2DArray coefficients) { + return ABSORBANCE_TO_TRANSMITTANCE(total_absorbance(coefficients)); +} + +float transmittance(in sampler2DArray coefficients, float depth) { + return ABSORBANCE_TO_TRANSMITTANCE(absorbance(coefficients, depth)); +} + +float signal_corrected_transmittance(in sampler2DArray coefficients, float depth, float signal) { + return ABSORBANCE_TO_TRANSMITTANCE(signal_corrected_absorbance(coefficients, depth, signal)); +} From 19c97df115a1b8b0dbc490c1d5fc375d11f7b0ef Mon Sep 17 00:00:00 2001 From: Jozufozu Date: Sat, 22 Feb 2025 21:15:37 -0800 Subject: [PATCH 08/12] A win for window sizes - Fix transparent objects disappearing when the window is resized - Clean up OitFramebuffer some more, add a quick description of each render pass - Move oit noise factor to frame uniforms --- .../flywheel/backend/NoiseTextures.java | 17 ++- .../engine/indirect/IndirectDrawManager.java | 17 +-- .../engine/indirect/OitFramebuffer.java | 113 +++++++++++------- .../backend/engine/uniform/FrameUniforms.java | 3 + .../flywheel/flywheel/internal/common.frag | 5 +- .../flywheel/internal/uniforms/frame.glsl | 2 + .../flywheel/noise/{blue/0.png => blue.png} | Bin 7 files changed, 98 insertions(+), 59 deletions(-) rename common/src/backend/resources/assets/flywheel/textures/flywheel/noise/{blue/0.png => blue.png} (100%) diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/NoiseTextures.java b/common/src/backend/java/dev/engine_room/flywheel/backend/NoiseTextures.java index 157a64ded..1fcbf99d3 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/NoiseTextures.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/NoiseTextures.java @@ -3,23 +3,23 @@ package dev.engine_room.flywheel.backend; import java.io.IOException; import org.jetbrains.annotations.UnknownNullability; +import org.lwjgl.opengl.GL32; import com.mojang.blaze3d.platform.NativeImage; +import com.mojang.blaze3d.systems.RenderSystem; import dev.engine_room.flywheel.api.Flywheel; +import dev.engine_room.flywheel.backend.gl.GlTextureUnit; import net.minecraft.client.renderer.texture.DynamicTexture; import net.minecraft.resources.ResourceLocation; import net.minecraft.server.packs.resources.ResourceManager; public class NoiseTextures { - - public static final ResourceLocation NOISE_TEXTURE = Flywheel.rl("textures/flywheel/noise/blue/0.png"); - public static final int NOISE_LAYERS = 16; + public static final ResourceLocation NOISE_TEXTURE = Flywheel.rl("textures/flywheel/noise/blue.png"); @UnknownNullability public static DynamicTexture BLUE_NOISE; - public static void reload(ResourceManager manager) { if (BLUE_NOISE != null) { BLUE_NOISE.close(); @@ -36,6 +36,15 @@ public class NoiseTextures { var image = NativeImage.read(NativeImage.Format.LUMINANCE, is); BLUE_NOISE = new DynamicTexture(image); + + GlTextureUnit.T0.makeActive(); + BLUE_NOISE.bind(); + + NoiseTextures.BLUE_NOISE.setFilter(true, false); + RenderSystem.texParameter(GL32.GL_TEXTURE_2D, GL32.GL_TEXTURE_WRAP_S, GL32.GL_REPEAT); + RenderSystem.texParameter(GL32.GL_TEXTURE_2D, GL32.GL_TEXTURE_WRAP_T, GL32.GL_REPEAT); + + RenderSystem.bindTexture(0); } catch (IOException e) { } diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectDrawManager.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectDrawManager.java index 4587cbdad..ea77372da 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectDrawManager.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectDrawManager.java @@ -49,7 +49,7 @@ public class IndirectDrawManager extends DrawManager> { private final DepthPyramid depthPyramid; - private final OitFramebuffer wboitFrameBuffer; + private final OitFramebuffer oitFramebuffer; public IndirectDrawManager(IndirectPrograms programs) { this.programs = programs; @@ -66,7 +66,7 @@ public class IndirectDrawManager extends DrawManager> { depthPyramid = new DepthPyramid(programs); - wboitFrameBuffer = new OitFramebuffer(programs); + oitFramebuffer = new OitFramebuffer(programs); } @Override @@ -146,29 +146,32 @@ public class IndirectDrawManager extends DrawManager> { group.submitSolid(); } - wboitFrameBuffer.depthRange(); + oitFramebuffer.prepare(); + + oitFramebuffer.depthRange(); for (var group : cullingGroups.values()) { group.submitTransparent(PipelineCompiler.OitMode.DEPTH_RANGE); } - wboitFrameBuffer.renderTransmittance(); + oitFramebuffer.renderTransmittance(); for (var group : cullingGroups.values()) { group.submitTransparent(PipelineCompiler.OitMode.GENERATE_COEFFICIENTS); } - wboitFrameBuffer.renderDepth(); + oitFramebuffer.renderDepth(); + // Need to bind this again because we just drew a full screen quad for OIT. vertexArray.bindForDraw(); - wboitFrameBuffer.shade(); + oitFramebuffer.shade(); for (var group : cullingGroups.values()) { group.submitTransparent(PipelineCompiler.OitMode.EVALUATE); } - wboitFrameBuffer.composite(); + oitFramebuffer.composite(); MaterialRenderState.reset(); TextureBinder.resetLightAndOverlay(); diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/OitFramebuffer.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/OitFramebuffer.java index 281e3e22b..6ebf3e30c 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/OitFramebuffer.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/OitFramebuffer.java @@ -13,7 +13,6 @@ import dev.engine_room.flywheel.backend.gl.GlTextureUnit; import net.minecraft.client.Minecraft; public class OitFramebuffer { - private final IndirectPrograms programs; private final int vao; @@ -27,16 +26,25 @@ public class OitFramebuffer { public OitFramebuffer(IndirectPrograms programs) { this.programs = programs; - fbo = GL46.glCreateFramebuffers(); vao = GL46.glCreateVertexArrays(); } - public void depthRange() { + /** + * Set up the framebuffer. + */ + public void prepare() { var mainRenderTarget = Minecraft.getInstance() .getMainRenderTarget(); - createTextures(mainRenderTarget.width, mainRenderTarget.height); + maybeResizeFBO(mainRenderTarget.width, mainRenderTarget.height); + GL46.glNamedFramebufferTexture(fbo, GL46.GL_DEPTH_ATTACHMENT, mainRenderTarget.getDepthTextureId(), 0); + } + + /** + * Render out the min and max depth per fragment. + */ + public void depthRange() { // No depth writes, but we'll still use the depth test RenderSystem.depthMask(false); RenderSystem.colorMask(true, true, true, true); @@ -44,8 +52,6 @@ public class OitFramebuffer { RenderSystem.blendFunc(GlStateManager.SourceFactor.ONE, GlStateManager.DestFactor.ONE); RenderSystem.blendEquation(GL46.GL_MAX); - GL46.glNamedFramebufferTexture(fbo, GL46.GL_DEPTH_ATTACHMENT, mainRenderTarget.getDepthTextureId(), 0); - GL46.glNamedFramebufferDrawBuffers(fbo, new int[]{GL46.GL_COLOR_ATTACHMENT0}); var far = Minecraft.getInstance().gameRenderer.getDepthFar(); @@ -55,6 +61,9 @@ public class OitFramebuffer { GlStateManager._glBindFramebuffer(GL46.GL_FRAMEBUFFER, fbo); } + /** + * Generate the coefficients to the transmittance function. + */ public void renderTransmittance() { // No depth writes, but we'll still use the depth test RenderSystem.depthMask(false); @@ -69,11 +78,6 @@ public class OitFramebuffer { Samplers.NOISE.makeActive(); NoiseTextures.BLUE_NOISE.bind(); - NoiseTextures.BLUE_NOISE.setFilter(true, false); - GL46.glTextureParameteri(NoiseTextures.BLUE_NOISE.getId(), GL32.GL_TEXTURE_WRAP_S, GL32.GL_REPEAT); - GL46.glTextureParameteri(NoiseTextures.BLUE_NOISE.getId(), GL32.GL_TEXTURE_WRAP_T, GL32.GL_REPEAT); - - GL46.glNamedFramebufferDrawBuffers(fbo, new int[]{GL46.GL_COLOR_ATTACHMENT1, GL46.GL_COLOR_ATTACHMENT2, GL46.GL_COLOR_ATTACHMENT3, GL46.GL_COLOR_ATTACHMENT4}); GL46.glClearNamedFramebufferfv(fbo, GL46.GL_COLOR, 0, new float[]{0, 0, 0, 0}); @@ -84,6 +88,34 @@ public class OitFramebuffer { GlStateManager._glBindFramebuffer(GL46.GL_FRAMEBUFFER, fbo); } + /** + * If any fragment has its transmittance fall off to zero, search the transmittance + * function to determine at what depth that occurs and write out to the depth buffer. + */ + public void renderDepth() { + // No depth writes, but we'll still use the depth test + RenderSystem.depthMask(true); + RenderSystem.colorMask(false, false, false, false); + RenderSystem.disableBlend(); + + Samplers.COEFFICIENTS.makeActive(); + RenderSystem.bindTexture(0); + GL46.glBindTextureUnit(0, coefficients); + + Samplers.DEPTH_RANGE.makeActive(); + RenderSystem.bindTexture(depthBounds); + + GL46.glNamedFramebufferDrawBuffers(fbo, new int[]{}); + + programs.getOitDepthProgram() + .bind(); + + drawFullscreenQuad(); + } + + /** + * Sample the transmittance function and accumulate. + */ public void shade() { // No depth writes, but we'll still use the depth test RenderSystem.depthMask(false); @@ -109,30 +141,9 @@ public class OitFramebuffer { GlStateManager._glBindFramebuffer(GL46.GL_FRAMEBUFFER, fbo); } - public void renderDepth() { - // No depth writes, but we'll still use the depth test - RenderSystem.depthMask(true); - RenderSystem.colorMask(false, false, false, false); - RenderSystem.disableBlend(); - - Samplers.COEFFICIENTS.makeActive(); - GlStateManager._bindTexture(0); - GL46.glBindTextureUnit(0, coefficients); - - Samplers.DEPTH_RANGE.makeActive(); - GlStateManager._bindTexture(depthBounds); - - GL46.glNamedFramebufferDrawBuffers(fbo, new int[]{}); - - programs.getOitDepthProgram() - .bind(); - - // Empty VAO, the actual full screen triangle is generated in the vertex shader - GlStateManager._glBindVertexArray(vao); - - GL46.glDrawArrays(GL46.GL_TRIANGLES, 0, 3); - } - + /** + * Composite the accumulated luminance onto the main framebuffer. + */ public void composite() { // No depth writes, but we'll still use the depth test RenderSystem.depthMask(false); @@ -147,27 +158,30 @@ public class OitFramebuffer { mainRenderTarget.bindWrite(false); GlTextureUnit.T0.makeActive(); - GlStateManager._bindTexture(0); + RenderSystem.bindTexture(0); GL46.glBindTextureUnit(0, coefficients); GlTextureUnit.T1.makeActive(); - GlStateManager._bindTexture(accumulate); + RenderSystem.bindTexture(accumulate); programs.getOitCompositeProgram() .bind(); - // Empty VAO, the actual full screen triangle is generated in the vertex shader - GlStateManager._glBindVertexArray(vao); - - GL46.glDrawArrays(GL46.GL_TRIANGLES, 0, 3); + drawFullscreenQuad(); } public void delete() { deleteTextures(); - GL46.glDeleteFramebuffers(fbo); GL46.glDeleteVertexArrays(vao); } + private void drawFullscreenQuad() { + // Empty VAO, the actual full screen triangle is generated in the vertex shader + GlStateManager._glBindVertexArray(vao); + + GL46.glDrawArrays(GL46.GL_TRIANGLES, 0, 3); + } + private void deleteTextures() { if (depthBounds != -1) { GL46.glDeleteTextures(depthBounds); @@ -178,9 +192,19 @@ public class OitFramebuffer { if (accumulate != -1) { GL46.glDeleteTextures(accumulate); } + if (fbo != -1) { + GL46.glDeleteFramebuffers(fbo); + } + + // We sometimes get the same texture ID back when creating new textures, + // so bind zero to clear the GlStateManager + Samplers.COEFFICIENTS.makeActive(); + RenderSystem.bindTexture(0); + Samplers.DEPTH_RANGE.makeActive(); + RenderSystem.bindTexture(0); } - private void createTextures(int width, int height) { + private void maybeResizeFBO(int width, int height) { if (lastWidth == width && lastHeight == height) { return; } @@ -190,13 +214,14 @@ public class OitFramebuffer { deleteTextures(); + fbo = GL46.glCreateFramebuffers(); + depthBounds = GL46.glCreateTextures(GL46.GL_TEXTURE_2D); coefficients = GL46.glCreateTextures(GL46.GL_TEXTURE_2D_ARRAY); accumulate = GL46.glCreateTextures(GL46.GL_TEXTURE_2D); GL46.glTextureStorage2D(depthBounds, 1, GL32.GL_RG32F, width, height); GL46.glTextureStorage3D(coefficients, 1, GL32.GL_RGBA16F, width, height, 4); - GL46.glTextureStorage2D(accumulate, 1, GL32.GL_RGBA16F, width, height); // for (int tex : new int[]{zerothMoment, moments, composite}) { diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/uniform/FrameUniforms.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/uniform/FrameUniforms.java index 07e8a9e5c..ce2fd4c6f 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/uniform/FrameUniforms.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/uniform/FrameUniforms.java @@ -116,6 +116,9 @@ public final class FrameUniforms extends UniformWriter { ptr = writeInt(ptr, debugMode); + // OIT noise factor + ptr = writeFloat(ptr, 0.07f); + firstWrite = false; BUFFER.markDirty(); } diff --git a/common/src/backend/resources/assets/flywheel/flywheel/internal/common.frag b/common/src/backend/resources/assets/flywheel/flywheel/internal/common.frag index aee865829..2693798a0 100644 --- a/common/src/backend/resources/assets/flywheel/flywheel/internal/common.frag +++ b/common/src/backend/resources/assets/flywheel/flywheel/internal/common.frag @@ -27,9 +27,6 @@ layout (binding = 8) uniform sampler2DArray _flw_coefficients; layout (binding = 9) uniform sampler2D _flw_blueNoise; - -uniform float _flw_blueNoiseFactor = 0.08; - float tented_blue_noise(float normalizedDepth) { float tentIn = abs(normalizedDepth * 2. - 1); @@ -53,7 +50,7 @@ float depth() { float delta = depthRange.x + depthRange.y; float depth = (linearDepth + depthRange.x) / delta; - return depth - tented_blue_noise(depth) * _flw_blueNoiseFactor; + return depth - tented_blue_noise(depth) * _flw_oitNoise; } #ifdef _FLW_DEPTH_RANGE diff --git a/common/src/backend/resources/assets/flywheel/flywheel/internal/uniforms/frame.glsl b/common/src/backend/resources/assets/flywheel/flywheel/internal/uniforms/frame.glsl index f96f502fc..90c5f3f21 100644 --- a/common/src/backend/resources/assets/flywheel/flywheel/internal/uniforms/frame.glsl +++ b/common/src/backend/resources/assets/flywheel/flywheel/internal/uniforms/frame.glsl @@ -62,6 +62,8 @@ layout(std140) uniform _FlwFrameUniforms { uint flw_cameraInBlock; uint _flw_debugMode; + + float _flw_oitNoise; }; #define flw_renderOrigin (_flw_renderOrigin.xyz) diff --git a/common/src/backend/resources/assets/flywheel/textures/flywheel/noise/blue/0.png b/common/src/backend/resources/assets/flywheel/textures/flywheel/noise/blue.png similarity index 100% rename from common/src/backend/resources/assets/flywheel/textures/flywheel/noise/blue/0.png rename to common/src/backend/resources/assets/flywheel/textures/flywheel/noise/blue.png From 452d912e7bf991c71d279460707d265d408a24e0 Mon Sep 17 00:00:00 2001 From: Jozufozu Date: Sat, 22 Feb 2025 23:02:23 -0800 Subject: [PATCH 09/12] Fabulously transparent - Write out depth in the composite pass - When fabulous is enabled, write to the item entity target - Flip the total transmittance back to alpha when compositing so it can be consumed by the blit shader --- .../engine/indirect/IndirectDrawManager.java | 4 +- .../engine/indirect/OitFramebuffer.java | 105 ++++++++++-------- .../backend/engine/uniform/FrameUniforms.java | 3 +- .../internal/indirect/oit_composite.frag | 13 ++- 4 files changed, 72 insertions(+), 53 deletions(-) diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectDrawManager.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectDrawManager.java index ea77372da..42b967ba2 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectDrawManager.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectDrawManager.java @@ -160,7 +160,7 @@ public class IndirectDrawManager extends DrawManager> { group.submitTransparent(PipelineCompiler.OitMode.GENERATE_COEFFICIENTS); } - oitFramebuffer.renderDepth(); + oitFramebuffer.renderDepthFromTransmittance(); // Need to bind this again because we just drew a full screen quad for OIT. vertexArray.bindForDraw(); @@ -198,6 +198,8 @@ public class IndirectDrawManager extends DrawManager> { lightBuffers.delete(); matrixBuffer.delete(); + + oitFramebuffer.delete(); } public void renderCrumbling(List crumblingBlocks) { diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/OitFramebuffer.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/OitFramebuffer.java index 6ebf3e30c..40916c3b0 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/OitFramebuffer.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/OitFramebuffer.java @@ -3,6 +3,7 @@ package dev.engine_room.flywheel.backend.engine.indirect; import org.lwjgl.opengl.GL32; import org.lwjgl.opengl.GL46; +import com.mojang.blaze3d.pipeline.RenderTarget; import com.mojang.blaze3d.platform.GlStateManager; import com.mojang.blaze3d.systems.RenderSystem; @@ -33,19 +34,40 @@ public class OitFramebuffer { * Set up the framebuffer. */ public void prepare() { - var mainRenderTarget = Minecraft.getInstance() - .getMainRenderTarget(); + RenderTarget renderTarget; - maybeResizeFBO(mainRenderTarget.width, mainRenderTarget.height); + if (Minecraft.useShaderTransparency()) { + renderTarget = Minecraft.getInstance().levelRenderer.getItemEntityTarget(); - GL46.glNamedFramebufferTexture(fbo, GL46.GL_DEPTH_ATTACHMENT, mainRenderTarget.getDepthTextureId(), 0); + renderTarget.copyDepthFrom(Minecraft.getInstance() + .getMainRenderTarget()); + } else { + renderTarget = Minecraft.getInstance() + .getMainRenderTarget(); + } + + maybeResizeFBO(renderTarget.width, renderTarget.height); + + GL46.glNamedFramebufferTexture(fbo, GL46.GL_DEPTH_ATTACHMENT, renderTarget.getDepthTextureId(), 0); + + Samplers.COEFFICIENTS.makeActive(); + RenderSystem.bindTexture(0); + GL46.glBindTextureUnit(Samplers.COEFFICIENTS.number, coefficients); + + Samplers.DEPTH_RANGE.makeActive(); + RenderSystem.bindTexture(depthBounds); + + Samplers.NOISE.makeActive(); + NoiseTextures.BLUE_NOISE.bind(); + + GlStateManager._glBindFramebuffer(GL46.GL_FRAMEBUFFER, fbo); } /** * Render out the min and max depth per fragment. */ public void depthRange() { - // No depth writes, but we'll still use the depth test + // No depth writes, but we'll still use the depth test. RenderSystem.depthMask(false); RenderSystem.colorMask(true, true, true, true); RenderSystem.enableBlend(); @@ -57,8 +79,6 @@ public class OitFramebuffer { var far = Minecraft.getInstance().gameRenderer.getDepthFar(); GL46.glClearNamedFramebufferfv(fbo, GL46.GL_COLOR, 0, new float[]{-far, -far, 0, 0}); - - GlStateManager._glBindFramebuffer(GL46.GL_FRAMEBUFFER, fbo); } /** @@ -72,38 +92,24 @@ public class OitFramebuffer { RenderSystem.blendFunc(GlStateManager.SourceFactor.ONE, GlStateManager.DestFactor.ONE); RenderSystem.blendEquation(GL46.GL_FUNC_ADD); - Samplers.DEPTH_RANGE.makeActive(); - GlStateManager._bindTexture(depthBounds); - - Samplers.NOISE.makeActive(); - NoiseTextures.BLUE_NOISE.bind(); - GL46.glNamedFramebufferDrawBuffers(fbo, new int[]{GL46.GL_COLOR_ATTACHMENT1, GL46.GL_COLOR_ATTACHMENT2, GL46.GL_COLOR_ATTACHMENT3, GL46.GL_COLOR_ATTACHMENT4}); GL46.glClearNamedFramebufferfv(fbo, GL46.GL_COLOR, 0, new float[]{0, 0, 0, 0}); GL46.glClearNamedFramebufferfv(fbo, GL46.GL_COLOR, 1, new float[]{0, 0, 0, 0}); GL46.glClearNamedFramebufferfv(fbo, GL46.GL_COLOR, 2, new float[]{0, 0, 0, 0}); GL46.glClearNamedFramebufferfv(fbo, GL46.GL_COLOR, 3, new float[]{0, 0, 0, 0}); - - GlStateManager._glBindFramebuffer(GL46.GL_FRAMEBUFFER, fbo); } /** * If any fragment has its transmittance fall off to zero, search the transmittance * function to determine at what depth that occurs and write out to the depth buffer. */ - public void renderDepth() { - // No depth writes, but we'll still use the depth test + public void renderDepthFromTransmittance() { + // Only write to depth, not color. RenderSystem.depthMask(true); RenderSystem.colorMask(false, false, false, false); RenderSystem.disableBlend(); - - Samplers.COEFFICIENTS.makeActive(); - RenderSystem.bindTexture(0); - GL46.glBindTextureUnit(0, coefficients); - - Samplers.DEPTH_RANGE.makeActive(); - RenderSystem.bindTexture(depthBounds); + RenderSystem.depthFunc(GL32.GL_ALWAYS); GL46.glNamedFramebufferDrawBuffers(fbo, new int[]{}); @@ -124,50 +130,53 @@ public class OitFramebuffer { RenderSystem.blendFunc(GlStateManager.SourceFactor.ONE, GlStateManager.DestFactor.ONE); RenderSystem.blendEquation(GL46.GL_FUNC_ADD); - Samplers.DEPTH_RANGE.makeActive(); - GlStateManager._bindTexture(depthBounds); - - Samplers.COEFFICIENTS.makeActive(); - GlStateManager._bindTexture(0); - GL46.glBindTextureUnit(Samplers.COEFFICIENTS.number, coefficients); - - Samplers.NOISE.makeActive(); - NoiseTextures.BLUE_NOISE.bind(); - GL46.glNamedFramebufferDrawBuffers(fbo, new int[]{GL46.GL_COLOR_ATTACHMENT5}); GL46.glClearNamedFramebufferfv(fbo, GL46.GL_COLOR, 0, new float[]{0, 0, 0, 0}); - - GlStateManager._glBindFramebuffer(GL46.GL_FRAMEBUFFER, fbo); } /** * Composite the accumulated luminance onto the main framebuffer. */ public void composite() { - // No depth writes, but we'll still use the depth test - RenderSystem.depthMask(false); + if (Minecraft.useShaderTransparency()) { + Minecraft.getInstance().levelRenderer.getItemEntityTarget() + .bindWrite(false); + } else { + Minecraft.getInstance() + .getMainRenderTarget() + .bindWrite(false); + } + + // The composite shader writes out the closest depth to gl_FragDepth. + // depthMask = true: OIT stuff renders on top of other transparent stuff. + // depthMask = false: other transparent stuff renders on top of OIT stuff. + // If Neo gets wavelet OIT we can use their hooks to be correct with everything. + RenderSystem.depthMask(true); RenderSystem.colorMask(true, true, true, true); RenderSystem.enableBlend(); - RenderSystem.blendFunc(GlStateManager.SourceFactor.ONE_MINUS_SRC_ALPHA, GlStateManager.DestFactor.SRC_ALPHA); + + // We rely on the blend func to achieve: + // final color = (1 - transmittance_total) * sum(color_f * alpha_f * transmittance_f) / sum(alpha_f * transmittance_f) + // + color_dst * transmittance_total + // + // Though note that the alpha value we emit in the fragment shader is actually (1. - transmittance_total). + // The extra inversion step is so we can have a sane alpha value written out for the fabulous blit shader to consume. + RenderSystem.blendFuncSeparate(GlStateManager.SourceFactor.SRC_ALPHA, GlStateManager.DestFactor.ONE_MINUS_SRC_ALPHA, GlStateManager.SourceFactor.ONE, GlStateManager.DestFactor.ONE_MINUS_SRC_ALPHA); RenderSystem.blendEquation(GL46.GL_FUNC_ADD); - - var mainRenderTarget = Minecraft.getInstance() - .getMainRenderTarget(); - - mainRenderTarget.bindWrite(false); + RenderSystem.depthFunc(GL32.GL_ALWAYS); GlTextureUnit.T0.makeActive(); - RenderSystem.bindTexture(0); - GL46.glBindTextureUnit(0, coefficients); - - GlTextureUnit.T1.makeActive(); RenderSystem.bindTexture(accumulate); programs.getOitCompositeProgram() .bind(); drawFullscreenQuad(); + + Minecraft.getInstance() + .getMainRenderTarget() + .bindWrite(false); } public void delete() { diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/uniform/FrameUniforms.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/uniform/FrameUniforms.java index ce2fd4c6f..a7395630d 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/uniform/FrameUniforms.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/uniform/FrameUniforms.java @@ -13,6 +13,7 @@ import dev.engine_room.flywheel.backend.mixin.LevelRendererAccessor; import net.minecraft.Util; import net.minecraft.client.Camera; import net.minecraft.client.Minecraft; +import net.minecraft.client.renderer.GameRenderer; import net.minecraft.core.BlockPos; import net.minecraft.core.Vec3i; import net.minecraft.world.level.Level; @@ -198,7 +199,7 @@ public final class FrameUniforms extends UniformWriter { int pyramidHeight = DepthPyramid.mip0Size(mainRenderTarget.height); int pyramidDepth = DepthPyramid.getImageMipLevels(pyramidWidth, pyramidHeight); - ptr = writeFloat(ptr, 0.05F); // zNear + ptr = writeFloat(ptr, GameRenderer.PROJECTION_Z_NEAR); // zNear ptr = writeFloat(ptr, mc.gameRenderer.getDepthFar()); // zFar ptr = writeFloat(ptr, PROJECTION.m00()); // P00 ptr = writeFloat(ptr, PROJECTION.m11()); // P11 diff --git a/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/oit_composite.frag b/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/oit_composite.frag index 733e2e5ae..2aa577e84 100644 --- a/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/oit_composite.frag +++ b/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/oit_composite.frag @@ -1,9 +1,12 @@ #include "flywheel:internal/wavelet.glsl" +#include "flywheel:internal/depth.glsl" +#include "flywheel:internal/uniforms/frame.glsl" layout (location = 0) out vec4 frag; -layout (binding = 0) uniform sampler2DArray _flw_coefficients; -layout (binding = 1) uniform sampler2D _flw_accumulate; +layout (binding = 0) uniform sampler2D _flw_accumulate; +layout (binding = 7) uniform sampler2D _flw_depthRange; +layout (binding = 8) uniform sampler2DArray _flw_coefficients; void main() { vec4 texel = texelFetch(_flw_accumulate, ivec2(gl_FragCoord.xy), 0); @@ -14,5 +17,9 @@ void main() { float total_transmittance = total_transmittance(_flw_coefficients); - frag = vec4(texel.rgb / texel.a, total_transmittance); + frag = vec4(texel.rgb / texel.a, 1. - total_transmittance); + + float minDepth = -texelFetch(_flw_depthRange, ivec2(gl_FragCoord.xy), 0).r; + + gl_FragDepth = delinearize_depth(minDepth, _flw_cullData.znear, _flw_cullData.zfar); } From 026eb905668620930b019249ff35670a92c5457c Mon Sep 17 00:00:00 2001 From: Jozufozu Date: Sun, 23 Feb 2025 15:07:46 -0800 Subject: [PATCH 10/12] If you say so - Only do OIT for materials marked ORDER_INDEPENDENT - Clean up some of the indirect frame logic, early out if there's nothing to do --- .../backend/engine/MaterialRenderState.java | 2 +- .../engine/indirect/IndirectCullingGroup.java | 48 +++++-------- .../engine/indirect/IndirectDrawManager.java | 67 ++++++++++++------- 3 files changed, 59 insertions(+), 58 deletions(-) diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/MaterialRenderState.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/MaterialRenderState.java index 5f8a2c9b1..ec98d5c09 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/MaterialRenderState.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/MaterialRenderState.java @@ -146,7 +146,7 @@ public final class MaterialRenderState { RenderSystem.enableBlend(); RenderSystem.blendFuncSeparate(GlStateManager.SourceFactor.DST_COLOR, GlStateManager.DestFactor.SRC_COLOR, GlStateManager.SourceFactor.ONE, GlStateManager.DestFactor.ZERO); } - case TRANSLUCENT, ORDER_INDEPENDENT -> { + case TRANSLUCENT -> { RenderSystem.enableBlend(); RenderSystem.blendFuncSeparate(GlStateManager.SourceFactor.SRC_ALPHA, GlStateManager.DestFactor.ONE_MINUS_SRC_ALPHA, GlStateManager.SourceFactor.ONE, GlStateManager.DestFactor.ONE_MINUS_SRC_ALPHA); } diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectCullingGroup.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectCullingGroup.java index 886f76830..3e73bfe1c 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectCullingGroup.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectCullingGroup.java @@ -38,7 +38,7 @@ public class IndirectCullingGroup { private final List> instancers = new ArrayList<>(); private final List indirectDraws = new ArrayList<>(); private final List multiDraws = new ArrayList<>(); - private final List transparentDraws = new ArrayList<>(); + private final List oitDraws = new ArrayList<>(); private final IndirectPrograms programs; private final GlProgram cullProgram; @@ -57,7 +57,7 @@ public class IndirectCullingGroup { cullProgram = programs.getCullingProgram(instanceType); } - public void flushInstancers() { + public boolean flushInstancers() { instanceCountThisFrame = 0; int modelIndex = 0; for (var iterator = instancers.iterator(); iterator.hasNext(); ) { @@ -79,13 +79,17 @@ public class IndirectCullingGroup { if (indirectDraws.removeIf(IndirectDraw::deleted)) { needsDrawSort = true; } + + var out = indirectDraws.isEmpty(); + + if (out) { + delete(); + } + + return out; } public void upload(StagingBuffer stagingBuffer) { - if (nothingToDo()) { - return; - } - buffers.updateCounts(instanceCountThisFrame, instancers.size(), indirectDraws.size()); // Upload only instances that have changed. @@ -107,10 +111,6 @@ public class IndirectCullingGroup { } public void dispatchCull() { - if (nothingToDo()) { - return; - } - Uniforms.bindAll(); cullProgram.bind(); @@ -119,21 +119,17 @@ public class IndirectCullingGroup { } public void dispatchApply() { - if (nothingToDo()) { - return; - } - buffers.bindForApply(); glDispatchCompute(GlCompat.getComputeGroupCount(indirectDraws.size()), 1, 1); } - private boolean nothingToDo() { - return indirectDraws.isEmpty() || instanceCountThisFrame == 0; + public boolean hasOitDraws() { + return !oitDraws.isEmpty(); } private void sortDraws() { multiDraws.clear(); - transparentDraws.clear(); + oitDraws.clear(); // sort by visual type, then material indirectDraws.sort(DRAW_COMPARATOR); @@ -143,7 +139,7 @@ public class IndirectCullingGroup { // if the next draw call has a different VisualType or Material, start a new MultiDraw if (i == indirectDraws.size() - 1 || incompatibleDraws(draw1, indirectDraws.get(i + 1))) { var dst = draw1.material() - .transparency() == Transparency.TRANSLUCENT ? transparentDraws : multiDraws; + .transparency() == Transparency.ORDER_INDEPENDENT ? oitDraws : multiDraws; dst.add(new MultiDraw(draw1.material(), draw1.isEmbedded(), start, i + 1)); start = i + 1; } @@ -178,7 +174,7 @@ public class IndirectCullingGroup { } public void submitSolid() { - if (nothingToDo()) { + if (multiDraws.isEmpty()) { return; } @@ -204,7 +200,7 @@ public class IndirectCullingGroup { } public void submitTransparent(PipelineCompiler.OitMode oit) { - if (nothingToDo()) { + if (oitDraws.isEmpty()) { return; } @@ -214,7 +210,7 @@ public class IndirectCullingGroup { GlProgram lastProgram = null; - for (var multiDraw : transparentDraws) { + for (var multiDraw : oitDraws) { var drawProgram = programs.getIndirectProgram(instanceType, multiDraw.embedded ? ContextShader.EMBEDDED : ContextShader.DEFAULT, multiDraw.material, oit); if (drawProgram != lastProgram) { lastProgram = drawProgram; @@ -290,16 +286,6 @@ public class IndirectCullingGroup { buffers.delete(); } - public boolean checkEmptyAndDelete() { - var out = indirectDraws.isEmpty(); - - if (out) { - delete(); - } - - return out; - } - private record MultiDraw(Material material, boolean embedded, int start, int end) { private void submit(GlProgram drawProgram) { GlCompat.safeMultiDrawElementsIndirect(drawProgram, GL_TRIANGLES, GL_UNSIGNED_INT, this.start, this.end, IndirectBuffers.DRAW_COMMAND_STRIDE); diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectDrawManager.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectDrawManager.java index 42b967ba2..262da1c2e 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectDrawManager.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectDrawManager.java @@ -85,13 +85,11 @@ public class IndirectDrawManager extends DrawManager> { public void render(LightStorage lightStorage, EnvironmentStorage environmentStorage) { super.render(lightStorage, environmentStorage); - for (var group : cullingGroups.values()) { - group.flushInstancers(); - } - + // Flush instance counts, page mappings, and prune empty groups. cullingGroups.values() - .removeIf(IndirectCullingGroup::checkEmptyAndDelete); + .removeIf(IndirectCullingGroup::flushInstancers); + // Instancers may have been emptied in the above call, now remove them here. instancers.values() .removeIf(instancer -> instancer.instanceCount() == 0); @@ -99,6 +97,12 @@ public class IndirectDrawManager extends DrawManager> { stagingBuffer.reclaim(); + // Genuinely nothing to do, we can just early out. + // Still process the mesh pool and reclaim fenced staging regions though. + if (cullingGroups.isEmpty()) { + return; + } + lightBuffers.flush(stagingBuffer, lightStorage); matrixBuffer.flush(stagingBuffer, environmentStorage); @@ -146,33 +150,44 @@ public class IndirectDrawManager extends DrawManager> { group.submitSolid(); } - oitFramebuffer.prepare(); - - oitFramebuffer.depthRange(); - + // Let's avoid invoking the oit chain if we don't have anything to do + boolean useOit = false; for (var group : cullingGroups.values()) { - group.submitTransparent(PipelineCompiler.OitMode.DEPTH_RANGE); + if (group.hasOitDraws()) { + useOit = true; + break; + } } - oitFramebuffer.renderTransmittance(); + if (useOit) { + oitFramebuffer.prepare(); - for (var group : cullingGroups.values()) { - group.submitTransparent(PipelineCompiler.OitMode.GENERATE_COEFFICIENTS); + oitFramebuffer.depthRange(); + + for (var group : cullingGroups.values()) { + group.submitTransparent(PipelineCompiler.OitMode.DEPTH_RANGE); + } + + oitFramebuffer.renderTransmittance(); + + for (var group : cullingGroups.values()) { + group.submitTransparent(PipelineCompiler.OitMode.GENERATE_COEFFICIENTS); + } + + oitFramebuffer.renderDepthFromTransmittance(); + + // Need to bind this again because we just drew a full screen quad for OIT. + vertexArray.bindForDraw(); + + oitFramebuffer.shade(); + + for (var group : cullingGroups.values()) { + group.submitTransparent(PipelineCompiler.OitMode.EVALUATE); + } + + oitFramebuffer.composite(); } - oitFramebuffer.renderDepthFromTransmittance(); - - // Need to bind this again because we just drew a full screen quad for OIT. - vertexArray.bindForDraw(); - - oitFramebuffer.shade(); - - for (var group : cullingGroups.values()) { - group.submitTransparent(PipelineCompiler.OitMode.EVALUATE); - } - - oitFramebuffer.composite(); - MaterialRenderState.reset(); TextureBinder.resetLightAndOverlay(); } From 5fc07a751323335347ea65cc164855fa91c355b4 Mon Sep 17 00:00:00 2001 From: Jozufozu Date: Sun, 23 Feb 2025 16:21:04 -0800 Subject: [PATCH 11/12] Direct or indirect, that is the question - Add non-dsa paths for the OitFramebuffer, behind a gl capabilities check --- .../engine/indirect/IndirectDrawManager.java | 2 +- .../engine/indirect/OitFramebuffer.java | 162 +++++++++++++----- .../flywheel/backend/gl/GlCompat.java | 10 ++ 3 files changed, 126 insertions(+), 48 deletions(-) diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectDrawManager.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectDrawManager.java index 262da1c2e..1e38fedbd 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectDrawManager.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectDrawManager.java @@ -179,7 +179,7 @@ public class IndirectDrawManager extends DrawManager> { // Need to bind this again because we just drew a full screen quad for OIT. vertexArray.bindForDraw(); - oitFramebuffer.shade(); + oitFramebuffer.accumulate(); for (var group : cullingGroups.values()) { group.submitTransparent(PipelineCompiler.OitMode.EVALUATE); diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/OitFramebuffer.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/OitFramebuffer.java index 40916c3b0..d184acf02 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/OitFramebuffer.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/OitFramebuffer.java @@ -10,10 +10,17 @@ import com.mojang.blaze3d.systems.RenderSystem; import dev.engine_room.flywheel.backend.NoiseTextures; import dev.engine_room.flywheel.backend.Samplers; import dev.engine_room.flywheel.backend.compile.IndirectPrograms; +import dev.engine_room.flywheel.backend.gl.GlCompat; import dev.engine_room.flywheel.backend.gl.GlTextureUnit; import net.minecraft.client.Minecraft; public class OitFramebuffer { + public static final float[] CLEAR_TO_ZERO = {0, 0, 0, 0}; + public static final int[] DEPTH_RANGE_DRAW_BUFFERS = {GL46.GL_COLOR_ATTACHMENT0}; + public static final int[] RENDER_TRANSMITTANCE_DRAW_BUFFERS = {GL46.GL_COLOR_ATTACHMENT1, GL46.GL_COLOR_ATTACHMENT2, GL46.GL_COLOR_ATTACHMENT3, GL46.GL_COLOR_ATTACHMENT4}; + public static final int[] ACCUMULATE_DRAW_BUFFERS = {GL46.GL_COLOR_ATTACHMENT5}; + public static final int[] DEPTH_ONLY_DRAW_BUFFERS = {}; + private final IndirectPrograms programs; private final int vao; @@ -27,7 +34,11 @@ public class OitFramebuffer { public OitFramebuffer(IndirectPrograms programs) { this.programs = programs; - vao = GL46.glCreateVertexArrays(); + if (GlCompat.SUPPORTS_DSA) { + vao = GL46.glCreateVertexArrays(); + } else { + vao = GL32.glGenVertexArrays(); + } } /** @@ -48,11 +59,10 @@ public class OitFramebuffer { maybeResizeFBO(renderTarget.width, renderTarget.height); - GL46.glNamedFramebufferTexture(fbo, GL46.GL_DEPTH_ATTACHMENT, renderTarget.getDepthTextureId(), 0); - Samplers.COEFFICIENTS.makeActive(); + // Bind zero to render system to make sure we clear their internal state RenderSystem.bindTexture(0); - GL46.glBindTextureUnit(Samplers.COEFFICIENTS.number, coefficients); + GL32.glBindTexture(GL32.GL_TEXTURE_2D_ARRAY, coefficients); Samplers.DEPTH_RANGE.makeActive(); RenderSystem.bindTexture(depthBounds); @@ -60,7 +70,8 @@ public class OitFramebuffer { Samplers.NOISE.makeActive(); NoiseTextures.BLUE_NOISE.bind(); - GlStateManager._glBindFramebuffer(GL46.GL_FRAMEBUFFER, fbo); + GlStateManager._glBindFramebuffer(GL32.GL_FRAMEBUFFER, fbo); + GL32.glFramebufferTexture(GL32.GL_FRAMEBUFFER, GL32.GL_DEPTH_ATTACHMENT, renderTarget.getDepthTextureId(), 0); } /** @@ -72,13 +83,18 @@ public class OitFramebuffer { RenderSystem.colorMask(true, true, true, true); RenderSystem.enableBlend(); RenderSystem.blendFunc(GlStateManager.SourceFactor.ONE, GlStateManager.DestFactor.ONE); - RenderSystem.blendEquation(GL46.GL_MAX); - - GL46.glNamedFramebufferDrawBuffers(fbo, new int[]{GL46.GL_COLOR_ATTACHMENT0}); + RenderSystem.blendEquation(GL32.GL_MAX); var far = Minecraft.getInstance().gameRenderer.getDepthFar(); - GL46.glClearNamedFramebufferfv(fbo, GL46.GL_COLOR, 0, new float[]{-far, -far, 0, 0}); + if (GlCompat.SUPPORTS_DSA) { + GL46.glNamedFramebufferDrawBuffers(fbo, DEPTH_RANGE_DRAW_BUFFERS); + GL46.glClearNamedFramebufferfv(fbo, GL46.GL_COLOR, 0, new float[]{-far, -far, 0, 0}); + } else { + GL32.glDrawBuffers(DEPTH_RANGE_DRAW_BUFFERS); + RenderSystem.clearColor(-far, -far, 0, 0); + RenderSystem.clear(GL32.GL_COLOR_BUFFER_BIT, false); + } } /** @@ -90,14 +106,20 @@ public class OitFramebuffer { RenderSystem.colorMask(true, true, true, true); RenderSystem.enableBlend(); RenderSystem.blendFunc(GlStateManager.SourceFactor.ONE, GlStateManager.DestFactor.ONE); - RenderSystem.blendEquation(GL46.GL_FUNC_ADD); + RenderSystem.blendEquation(GL32.GL_FUNC_ADD); - GL46.glNamedFramebufferDrawBuffers(fbo, new int[]{GL46.GL_COLOR_ATTACHMENT1, GL46.GL_COLOR_ATTACHMENT2, GL46.GL_COLOR_ATTACHMENT3, GL46.GL_COLOR_ATTACHMENT4}); + if (GlCompat.SUPPORTS_DSA) { + GL46.glNamedFramebufferDrawBuffers(fbo, RENDER_TRANSMITTANCE_DRAW_BUFFERS); - GL46.glClearNamedFramebufferfv(fbo, GL46.GL_COLOR, 0, new float[]{0, 0, 0, 0}); - GL46.glClearNamedFramebufferfv(fbo, GL46.GL_COLOR, 1, new float[]{0, 0, 0, 0}); - GL46.glClearNamedFramebufferfv(fbo, GL46.GL_COLOR, 2, new float[]{0, 0, 0, 0}); - GL46.glClearNamedFramebufferfv(fbo, GL46.GL_COLOR, 3, new float[]{0, 0, 0, 0}); + GL46.glClearNamedFramebufferfv(fbo, GL46.GL_COLOR, 0, CLEAR_TO_ZERO); + GL46.glClearNamedFramebufferfv(fbo, GL46.GL_COLOR, 1, CLEAR_TO_ZERO); + GL46.glClearNamedFramebufferfv(fbo, GL46.GL_COLOR, 2, CLEAR_TO_ZERO); + GL46.glClearNamedFramebufferfv(fbo, GL46.GL_COLOR, 3, CLEAR_TO_ZERO); + } else { + GL32.glDrawBuffers(RENDER_TRANSMITTANCE_DRAW_BUFFERS); + RenderSystem.clearColor(0, 0, 0, 0); + RenderSystem.clear(GL32.GL_COLOR_BUFFER_BIT, false); + } } /** @@ -111,7 +133,11 @@ public class OitFramebuffer { RenderSystem.disableBlend(); RenderSystem.depthFunc(GL32.GL_ALWAYS); - GL46.glNamedFramebufferDrawBuffers(fbo, new int[]{}); + if (GlCompat.SUPPORTS_DSA) { + GL46.glNamedFramebufferDrawBuffers(fbo, DEPTH_ONLY_DRAW_BUFFERS); + } else { + GL32.glDrawBuffers(DEPTH_ONLY_DRAW_BUFFERS); + } programs.getOitDepthProgram() .bind(); @@ -122,17 +148,23 @@ public class OitFramebuffer { /** * Sample the transmittance function and accumulate. */ - public void shade() { + public void accumulate() { // No depth writes, but we'll still use the depth test RenderSystem.depthMask(false); RenderSystem.colorMask(true, true, true, true); RenderSystem.enableBlend(); RenderSystem.blendFunc(GlStateManager.SourceFactor.ONE, GlStateManager.DestFactor.ONE); - RenderSystem.blendEquation(GL46.GL_FUNC_ADD); + RenderSystem.blendEquation(GL32.GL_FUNC_ADD); - GL46.glNamedFramebufferDrawBuffers(fbo, new int[]{GL46.GL_COLOR_ATTACHMENT5}); + if (GlCompat.SUPPORTS_DSA) { + GL46.glNamedFramebufferDrawBuffers(fbo, ACCUMULATE_DRAW_BUFFERS); - GL46.glClearNamedFramebufferfv(fbo, GL46.GL_COLOR, 0, new float[]{0, 0, 0, 0}); + GL46.glClearNamedFramebufferfv(fbo, GL46.GL_COLOR, 0, CLEAR_TO_ZERO); + } else { + GL32.glDrawBuffers(ACCUMULATE_DRAW_BUFFERS); + RenderSystem.clearColor(0, 0, 0, 0); + RenderSystem.clear(GL32.GL_COLOR_BUFFER_BIT, false); + } } /** @@ -163,7 +195,7 @@ public class OitFramebuffer { // Though note that the alpha value we emit in the fragment shader is actually (1. - transmittance_total). // The extra inversion step is so we can have a sane alpha value written out for the fabulous blit shader to consume. RenderSystem.blendFuncSeparate(GlStateManager.SourceFactor.SRC_ALPHA, GlStateManager.DestFactor.ONE_MINUS_SRC_ALPHA, GlStateManager.SourceFactor.ONE, GlStateManager.DestFactor.ONE_MINUS_SRC_ALPHA); - RenderSystem.blendEquation(GL46.GL_FUNC_ADD); + RenderSystem.blendEquation(GL32.GL_FUNC_ADD); RenderSystem.depthFunc(GL32.GL_ALWAYS); GlTextureUnit.T0.makeActive(); @@ -181,28 +213,28 @@ public class OitFramebuffer { public void delete() { deleteTextures(); - GL46.glDeleteVertexArrays(vao); + GL32.glDeleteVertexArrays(vao); } private void drawFullscreenQuad() { // Empty VAO, the actual full screen triangle is generated in the vertex shader GlStateManager._glBindVertexArray(vao); - GL46.glDrawArrays(GL46.GL_TRIANGLES, 0, 3); + GL32.glDrawArrays(GL32.GL_TRIANGLES, 0, 3); } private void deleteTextures() { if (depthBounds != -1) { - GL46.glDeleteTextures(depthBounds); + GL32.glDeleteTextures(depthBounds); } if (coefficients != -1) { - GL46.glDeleteTextures(coefficients); + GL32.glDeleteTextures(coefficients); } if (accumulate != -1) { - GL46.glDeleteTextures(accumulate); + GL32.glDeleteTextures(accumulate); } if (fbo != -1) { - GL46.glDeleteFramebuffers(fbo); + GL32.glDeleteFramebuffers(fbo); } // We sometimes get the same texture ID back when creating new textures, @@ -223,29 +255,65 @@ public class OitFramebuffer { deleteTextures(); - fbo = GL46.glCreateFramebuffers(); + if (GlCompat.SUPPORTS_DSA) { + fbo = GL46.glCreateFramebuffers(); - depthBounds = GL46.glCreateTextures(GL46.GL_TEXTURE_2D); - coefficients = GL46.glCreateTextures(GL46.GL_TEXTURE_2D_ARRAY); - accumulate = GL46.glCreateTextures(GL46.GL_TEXTURE_2D); + depthBounds = GL46.glCreateTextures(GL46.GL_TEXTURE_2D); + coefficients = GL46.glCreateTextures(GL46.GL_TEXTURE_2D_ARRAY); + accumulate = GL46.glCreateTextures(GL46.GL_TEXTURE_2D); - GL46.glTextureStorage2D(depthBounds, 1, GL32.GL_RG32F, width, height); - GL46.glTextureStorage3D(coefficients, 1, GL32.GL_RGBA16F, width, height, 4); - GL46.glTextureStorage2D(accumulate, 1, GL32.GL_RGBA16F, width, height); + GL46.glTextureStorage2D(depthBounds, 1, GL32.GL_RG32F, width, height); + GL46.glTextureStorage3D(coefficients, 1, GL32.GL_RGBA16F, width, height, 4); + GL46.glTextureStorage2D(accumulate, 1, GL32.GL_RGBA16F, width, height); - // for (int tex : new int[]{zerothMoment, moments, composite}) { - // GL46.glTextureParameteri(tex, GL32.GL_TEXTURE_MIN_FILTER, GL32.GL_NEAREST); - // GL46.glTextureParameteri(tex, GL32.GL_TEXTURE_MAG_FILTER, GL32.GL_NEAREST); - // GL46.glTextureParameteri(tex, GL32.GL_TEXTURE_COMPARE_MODE, GL32.GL_NONE); - // GL46.glTextureParameteri(tex, GL32.GL_TEXTURE_WRAP_S, GL32.GL_CLAMP_TO_EDGE); - // GL46.glTextureParameteri(tex, GL32.GL_TEXTURE_WRAP_T, GL32.GL_CLAMP_TO_EDGE); - // } + GL46.glNamedFramebufferTexture(fbo, GL32.GL_COLOR_ATTACHMENT0, depthBounds, 0); + GL46.glNamedFramebufferTextureLayer(fbo, GL32.GL_COLOR_ATTACHMENT1, coefficients, 0, 0); + GL46.glNamedFramebufferTextureLayer(fbo, GL32.GL_COLOR_ATTACHMENT2, coefficients, 0, 1); + GL46.glNamedFramebufferTextureLayer(fbo, GL32.GL_COLOR_ATTACHMENT3, coefficients, 0, 2); + GL46.glNamedFramebufferTextureLayer(fbo, GL32.GL_COLOR_ATTACHMENT4, coefficients, 0, 3); + GL46.glNamedFramebufferTexture(fbo, GL32.GL_COLOR_ATTACHMENT5, accumulate, 0); + } else { + fbo = GL46.glGenFramebuffers(); - GL46.glNamedFramebufferTexture(fbo, GL46.GL_COLOR_ATTACHMENT0, depthBounds, 0); - GL46.glNamedFramebufferTextureLayer(fbo, GL46.GL_COLOR_ATTACHMENT1, coefficients, 0, 0); - GL46.glNamedFramebufferTextureLayer(fbo, GL46.GL_COLOR_ATTACHMENT2, coefficients, 0, 1); - GL46.glNamedFramebufferTextureLayer(fbo, GL46.GL_COLOR_ATTACHMENT3, coefficients, 0, 2); - GL46.glNamedFramebufferTextureLayer(fbo, GL46.GL_COLOR_ATTACHMENT4, coefficients, 0, 3); - GL46.glNamedFramebufferTexture(fbo, GL46.GL_COLOR_ATTACHMENT5, accumulate, 0); + depthBounds = GL32.glGenTextures(); + coefficients = GL32.glGenTextures(); + accumulate = GL32.glGenTextures(); + + GlTextureUnit.T0.makeActive(); + RenderSystem.bindTexture(0); + + GL32.glBindTexture(GL32.GL_TEXTURE_2D, depthBounds); + GL32.glTexImage2D(GL32.GL_TEXTURE_2D, 0, GL32.GL_RG32F, width, height, 0, GL46.GL_RGBA, GL46.GL_BYTE, 0); + + GL32.glTexParameteri(GL32.GL_TEXTURE_2D, GL32.GL_TEXTURE_MIN_FILTER, GL32.GL_NEAREST); + GL32.glTexParameteri(GL32.GL_TEXTURE_2D, GL32.GL_TEXTURE_MAG_FILTER, GL32.GL_NEAREST); + GL32.glTexParameteri(GL32.GL_TEXTURE_2D, GL32.GL_TEXTURE_WRAP_S, GL32.GL_CLAMP_TO_EDGE); + GL32.glTexParameteri(GL32.GL_TEXTURE_2D, GL32.GL_TEXTURE_WRAP_T, GL32.GL_CLAMP_TO_EDGE); + + GL32.glBindTexture(GL32.GL_TEXTURE_2D_ARRAY, coefficients); + GL32.glTexImage3D(GL32.GL_TEXTURE_2D_ARRAY, 0, GL32.GL_RGBA16F, width, height, 4, 0, GL46.GL_RGBA, GL46.GL_BYTE, 0); + + GL32.glTexParameteri(GL32.GL_TEXTURE_2D_ARRAY, GL32.GL_TEXTURE_MIN_FILTER, GL32.GL_NEAREST); + GL32.glTexParameteri(GL32.GL_TEXTURE_2D_ARRAY, GL32.GL_TEXTURE_MAG_FILTER, GL32.GL_NEAREST); + GL32.glTexParameteri(GL32.GL_TEXTURE_2D_ARRAY, GL32.GL_TEXTURE_WRAP_S, GL32.GL_CLAMP_TO_EDGE); + GL32.glTexParameteri(GL32.GL_TEXTURE_2D_ARRAY, GL32.GL_TEXTURE_WRAP_T, GL32.GL_CLAMP_TO_EDGE); + + GL32.glBindTexture(GL32.GL_TEXTURE_2D, accumulate); + GL32.glTexImage2D(GL32.GL_TEXTURE_2D, 0, GL32.GL_RGBA16F, width, height, 0, GL46.GL_RGBA, GL46.GL_BYTE, 0); + + GL32.glTexParameteri(GL32.GL_TEXTURE_2D, GL32.GL_TEXTURE_MIN_FILTER, GL32.GL_NEAREST); + GL32.glTexParameteri(GL32.GL_TEXTURE_2D, GL32.GL_TEXTURE_MAG_FILTER, GL32.GL_NEAREST); + GL32.glTexParameteri(GL32.GL_TEXTURE_2D, GL32.GL_TEXTURE_WRAP_S, GL32.GL_CLAMP_TO_EDGE); + GL32.glTexParameteri(GL32.GL_TEXTURE_2D, GL32.GL_TEXTURE_WRAP_T, GL32.GL_CLAMP_TO_EDGE); + + GlStateManager._glBindFramebuffer(GL32.GL_FRAMEBUFFER, fbo); + + GL46.glFramebufferTexture(GL32.GL_FRAMEBUFFER, GL32.GL_COLOR_ATTACHMENT0, depthBounds, 0); + GL46.glFramebufferTextureLayer(GL32.GL_FRAMEBUFFER, GL32.GL_COLOR_ATTACHMENT1, coefficients, 0, 0); + GL46.glFramebufferTextureLayer(GL32.GL_FRAMEBUFFER, GL32.GL_COLOR_ATTACHMENT2, coefficients, 0, 1); + GL46.glFramebufferTextureLayer(GL32.GL_FRAMEBUFFER, GL32.GL_COLOR_ATTACHMENT3, coefficients, 0, 2); + GL46.glFramebufferTextureLayer(GL32.GL_FRAMEBUFFER, GL32.GL_COLOR_ATTACHMENT4, coefficients, 0, 3); + GL46.glFramebufferTexture(GL32.GL_FRAMEBUFFER, GL32.GL_COLOR_ATTACHMENT5, accumulate, 0); + } } } diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/gl/GlCompat.java b/common/src/backend/java/dev/engine_room/flywheel/backend/gl/GlCompat.java index b07070bc7..d93c72426 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/gl/GlCompat.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/gl/GlCompat.java @@ -39,6 +39,8 @@ public final class GlCompat { public static final boolean ALLOW_DSA = true; public static final GlslVersion MAX_GLSL_VERSION = maxGlslVersion(); + public static final boolean SUPPORTS_DSA = ALLOW_DSA && isDsaSupported(); + public static final boolean SUPPORTS_INSTANCING = isInstancingSupported(); public static final boolean SUPPORTS_INDIRECT = isIndirectSupported(); @@ -165,6 +167,14 @@ public final class GlCompat { && CAPABILITIES.GL_ARB_vertex_attrib_binding; } + private static boolean isDsaSupported() { + if (CAPABILITIES == null) { + return false; + } + + return CAPABILITIES.GL_ARB_direct_state_access; + } + /** * Try to compile a shader with progressively lower glsl versions. * The first version to compile successfully is returned. From 6af08f542634a215cbca571ab419420ee45de1d2 Mon Sep 17 00:00:00 2001 From: Jozufozu Date: Sun, 23 Feb 2025 18:31:23 -0800 Subject: [PATCH 12/12] Transparency, for instance - Get wavelet oit working on instancing - Fix shaders not compiling on glsl 150 - Add missing extensions for indirect - Fix implicit int -> uint casts - Explicitly bind new sampler locations on the java side - Ensure FMA is always available, even if only defined through a hack - Inline a lot of the instanced draw manager to make it easier to sort draws and separate oit draws - Move oit full screen pass programs to a separate class, shared by instancing and indirect --- .../backend/compile/IndirectPrograms.java | 36 ++--- .../backend/compile/InstancingPrograms.java | 17 ++- .../flywheel/backend/compile/OitPrograms.java | 68 ++++++++++ .../backend/compile/PipelineCompiler.java | 16 +++ .../engine/indirect/IndirectDrawManager.java | 2 +- .../engine/indirect/OitFramebuffer.java | 6 +- .../instancing/InstancedDrawManager.java | 128 +++++++++++++++++- .../instancing/InstancedRenderStage.java | 106 --------------- .../flywheel/backend/gl/GlCompat.java | 3 +- .../flywheel/flywheel/internal/common.frag | 18 +-- .../internal/{indirect => }/fullscreen.vert | 0 .../flywheel/flywheel/internal/light_lut.glsl | 30 ++-- .../{indirect => }/oit_composite.frag | 8 +- .../internal/{indirect => }/oit_depth.frag | 4 +- .../flywheel/flywheel/internal/wavelet.glsl | 9 +- 15 files changed, 266 insertions(+), 185 deletions(-) create mode 100644 common/src/backend/java/dev/engine_room/flywheel/backend/compile/OitPrograms.java delete mode 100644 common/src/backend/java/dev/engine_room/flywheel/backend/engine/instancing/InstancedRenderStage.java rename common/src/backend/resources/assets/flywheel/flywheel/internal/{indirect => }/fullscreen.vert (100%) rename common/src/backend/resources/assets/flywheel/flywheel/internal/{indirect => }/oit_composite.frag (73%) rename common/src/backend/resources/assets/flywheel/flywheel/internal/{indirect => }/oit_depth.frag (93%) diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/compile/IndirectPrograms.java b/common/src/backend/java/dev/engine_room/flywheel/backend/compile/IndirectPrograms.java index d69c8e2e0..3dd2979f6 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/compile/IndirectPrograms.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/compile/IndirectPrograms.java @@ -32,10 +32,6 @@ public class IndirectPrograms extends AtomicReferenceCounted { private static final ResourceLocation DOWNSAMPLE_FIRST = Flywheel.rl("internal/indirect/downsample_first.glsl"); private static final ResourceLocation DOWNSAMPLE_SECOND = Flywheel.rl("internal/indirect/downsample_second.glsl"); - private static final ResourceLocation FULLSCREEN = Flywheel.rl("internal/indirect/fullscreen.vert"); - private static final ResourceLocation OIT_COMPOSITE = Flywheel.rl("internal/indirect/oit_composite.frag"); - private static final ResourceLocation OIT_DEPTH = Flywheel.rl("internal/indirect/oit_depth.frag"); - private static final Compile> CULL = new Compile<>(); private static final Compile UTIL = new Compile<>(); @@ -48,13 +44,13 @@ public class IndirectPrograms extends AtomicReferenceCounted { private final PipelineCompiler pipeline; private final CompilationHarness> culling; private final CompilationHarness utils; - private final CompilationHarness fullscreen; + private final OitPrograms oitPrograms; - private IndirectPrograms(PipelineCompiler pipeline, CompilationHarness> culling, CompilationHarness utils, CompilationHarness fullscreen) { + private IndirectPrograms(PipelineCompiler pipeline, CompilationHarness> culling, CompilationHarness utils, OitPrograms oitPrograms) { this.pipeline = pipeline; this.culling = culling; this.utils = utils; - this.fullscreen = fullscreen; + this.oitPrograms = oitPrograms; } private static List getExtensions(GlslVersion glslVersion) { @@ -64,9 +60,11 @@ public class IndirectPrograms extends AtomicReferenceCounted { } if (glslVersion.compareTo(GlslVersion.V420) < 0) { extensions.add("GL_ARB_shading_language_420pack"); + extensions.add("GL_ARB_shader_image_load_store"); } if (glslVersion.compareTo(GlslVersion.V430) < 0) { extensions.add("GL_ARB_shader_storage_buffer_object"); + extensions.add("GL_ARB_shader_image_size"); } if (glslVersion.compareTo(GlslVersion.V460) < 0) { extensions.add("GL_ARB_shader_draw_parameters"); @@ -93,7 +91,7 @@ public class IndirectPrograms extends AtomicReferenceCounted { var pipelineCompiler = PipelineCompiler.create(sources, Pipelines.INDIRECT, vertexComponents, fragmentComponents, EXTENSIONS); var cullingCompiler = createCullingCompiler(sources); var utilCompiler = createUtilCompiler(sources); - var fullscreenCompiler = createFullscreenCompiler(sources); + var fullscreenCompiler = OitPrograms.createFullscreenCompiler(sources); IndirectPrograms newInstance = new IndirectPrograms(pipelineCompiler, cullingCompiler, utilCompiler, fullscreenCompiler); @@ -131,18 +129,6 @@ public class IndirectPrograms extends AtomicReferenceCounted { .harness("utilities", sources); } - private static CompilationHarness createFullscreenCompiler(ShaderSources sources) { - return UTIL.program() - .link(UTIL.shader(GlCompat.MAX_GLSL_VERSION, ShaderType.VERTEX) - .nameMapper($ -> "fullscreen/fullscreen") - .withResource(FULLSCREEN)) - .link(UTIL.shader(GlCompat.MAX_GLSL_VERSION, ShaderType.FRAGMENT) - .nameMapper(rl -> "fullscreen/" + ResourceUtil.toDebugFileNameNoExtension(rl)) - .withResource(s -> s)) - .postLink((key, program) -> Uniforms.setUniformBlockBindings(program)) - .harness("fullscreen", sources); - } - static void setInstance(@Nullable IndirectPrograms newInstance) { if (instance != null) { instance.release(); @@ -190,12 +176,8 @@ public class IndirectPrograms extends AtomicReferenceCounted { return utils.get(DOWNSAMPLE_SECOND); } - public GlProgram getOitCompositeProgram() { - return fullscreen.get(OIT_COMPOSITE); - } - - public GlProgram getOitDepthProgram() { - return fullscreen.get(OIT_DEPTH); + public OitPrograms oitPrograms() { + return oitPrograms; } @Override @@ -203,6 +185,6 @@ public class IndirectPrograms extends AtomicReferenceCounted { pipeline.delete(); culling.delete(); utils.delete(); - fullscreen.delete(); + oitPrograms.delete(); } } diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/compile/InstancingPrograms.java b/common/src/backend/java/dev/engine_room/flywheel/backend/compile/InstancingPrograms.java index 392435849..96704fbbc 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/compile/InstancingPrograms.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/compile/InstancingPrograms.java @@ -23,8 +23,11 @@ public class InstancingPrograms extends AtomicReferenceCounted { private final PipelineCompiler pipeline; - private InstancingPrograms(PipelineCompiler pipeline) { + private final OitPrograms oitPrograms; + + private InstancingPrograms(PipelineCompiler pipeline, OitPrograms oitPrograms) { this.pipeline = pipeline; + this.oitPrograms = oitPrograms; } private static List getExtensions(GlslVersion glslVersion) { @@ -41,7 +44,8 @@ public class InstancingPrograms extends AtomicReferenceCounted { } var pipelineCompiler = PipelineCompiler.create(sources, Pipelines.INSTANCING, vertexComponents, fragmentComponents, EXTENSIONS); - InstancingPrograms newInstance = new InstancingPrograms(pipelineCompiler); + var fullscreen = OitPrograms.createFullscreenCompiler(sources); + InstancingPrograms newInstance = new InstancingPrograms(pipelineCompiler, fullscreen); setInstance(newInstance); } @@ -69,12 +73,17 @@ public class InstancingPrograms extends AtomicReferenceCounted { setInstance(null); } - public GlProgram get(InstanceType instanceType, ContextShader contextShader, Material material) { - return pipeline.get(instanceType, contextShader, material, PipelineCompiler.OitMode.OFF); + public GlProgram get(InstanceType instanceType, ContextShader contextShader, Material material, PipelineCompiler.OitMode mode) { + return pipeline.get(instanceType, contextShader, material, mode); + } + + public OitPrograms oitPrograms() { + return oitPrograms; } @Override protected void _delete() { pipeline.delete(); + oitPrograms.delete(); } } diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/compile/OitPrograms.java b/common/src/backend/java/dev/engine_room/flywheel/backend/compile/OitPrograms.java new file mode 100644 index 000000000..75bc1a0c3 --- /dev/null +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/compile/OitPrograms.java @@ -0,0 +1,68 @@ +package dev.engine_room.flywheel.backend.compile; + +import dev.engine_room.flywheel.api.Flywheel; +import dev.engine_room.flywheel.backend.Samplers; +import dev.engine_room.flywheel.backend.compile.core.CompilationHarness; +import dev.engine_room.flywheel.backend.compile.core.Compile; +import dev.engine_room.flywheel.backend.engine.uniform.Uniforms; +import dev.engine_room.flywheel.backend.gl.GlCompat; +import dev.engine_room.flywheel.backend.gl.GlTextureUnit; +import dev.engine_room.flywheel.backend.gl.shader.GlProgram; +import dev.engine_room.flywheel.backend.gl.shader.ShaderType; +import dev.engine_room.flywheel.backend.glsl.GlslVersion; +import dev.engine_room.flywheel.backend.glsl.ShaderSources; +import dev.engine_room.flywheel.lib.util.ResourceUtil; +import net.minecraft.resources.ResourceLocation; + +public class OitPrograms { + private static final ResourceLocation FULLSCREEN = Flywheel.rl("internal/fullscreen.vert"); + static final ResourceLocation OIT_COMPOSITE = Flywheel.rl("internal/oit_composite.frag"); + static final ResourceLocation OIT_DEPTH = Flywheel.rl("internal/oit_depth.frag"); + + private static final Compile COMPILE = new Compile<>(); + + private final CompilationHarness harness; + + public OitPrograms(CompilationHarness harness) { + this.harness = harness; + } + + public static OitPrograms createFullscreenCompiler(ShaderSources sources) { + var harness = COMPILE.program() + .link(COMPILE.shader(GlCompat.MAX_GLSL_VERSION, ShaderType.VERTEX) + .nameMapper($ -> "fullscreen/fullscreen") + .withResource(FULLSCREEN)) + .link(COMPILE.shader(GlCompat.MAX_GLSL_VERSION, ShaderType.FRAGMENT) + .nameMapper(rl -> "fullscreen/" + ResourceUtil.toDebugFileNameNoExtension(rl)) + .onCompile((rl, compilation) -> { + if (GlCompat.MAX_GLSL_VERSION.compareTo(GlslVersion.V400) < 0) { + // Need to define FMA for the wavelet calculations + compilation.define("fma(a, b, c) ((a) * (b) + (c))"); + } + }) + .withResource(s -> s)) + .postLink((key, program) -> { + program.bind(); + Uniforms.setUniformBlockBindings(program); + program.setSamplerBinding("_flw_accumulate", GlTextureUnit.T0); + program.setSamplerBinding("_flw_depthRange", Samplers.DEPTH_RANGE); + program.setSamplerBinding("_flw_coefficients", Samplers.COEFFICIENTS); + + GlProgram.unbind(); + }) + .harness("fullscreen", sources); + return new OitPrograms(harness); + } + + public GlProgram getOitCompositeProgram() { + return harness.get(OitPrograms.OIT_COMPOSITE); + } + + public GlProgram getOitDepthProgram() { + return harness.get(OitPrograms.OIT_DEPTH); + } + + public void delete() { + harness.delete(); + } +} diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/compile/PipelineCompiler.java b/common/src/backend/java/dev/engine_room/flywheel/backend/compile/PipelineCompiler.java index 388563f49..3bd507bde 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/compile/PipelineCompiler.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/compile/PipelineCompiler.java @@ -24,6 +24,7 @@ import dev.engine_room.flywheel.backend.engine.uniform.Uniforms; import dev.engine_room.flywheel.backend.gl.GlCompat; import dev.engine_room.flywheel.backend.gl.shader.GlProgram; import dev.engine_room.flywheel.backend.gl.shader.ShaderType; +import dev.engine_room.flywheel.backend.glsl.GlslVersion; import dev.engine_room.flywheel.backend.glsl.ShaderSources; import dev.engine_room.flywheel.backend.glsl.SourceComponent; import dev.engine_room.flywheel.backend.glsl.generate.FnSignature; @@ -96,6 +97,12 @@ public final class PipelineCompiler { return "pipeline/" + pipeline.compilerMarker() + "/" + instance + "/" + material + "_" + context + debug; }) .requireExtensions(extensions) + .onCompile((rl, compilation) -> { + if (GlCompat.MAX_GLSL_VERSION.compareTo(GlslVersion.V400) < 0 && !extensions.contains("GL_ARB_gpu_shader5")) { + // Only define fma if it wouldn't be declared by gpu shader 5 + compilation.define("fma(a, b, c) ((a) * (b) + (c))"); + } + }) .onCompile((key, comp) -> key.contextShader() .onCompile(comp)) .onCompile((key, comp) -> BackendConfig.INSTANCE.lightSmoothness() @@ -133,6 +140,12 @@ public final class PipelineCompiler { }) .requireExtensions(extensions) .enableExtension("GL_ARB_conservative_depth") + .onCompile((rl, compilation) -> { + if (GlCompat.MAX_GLSL_VERSION.compareTo(GlslVersion.V400) < 0 && !extensions.contains("GL_ARB_gpu_shader5")) { + // Only define fma if it wouldn't be declared by gpu shader 5 + compilation.define("fma(a, b, c) ((a) * (b) + (c))"); + } + }) .onCompile((key, comp) -> key.contextShader() .onCompile(comp)) .onCompile((key, comp) -> BackendConfig.INSTANCE.lightSmoothness() @@ -178,6 +191,9 @@ public final class PipelineCompiler { program.setSamplerBinding("flw_diffuseTex", Samplers.DIFFUSE); program.setSamplerBinding("flw_overlayTex", Samplers.OVERLAY); program.setSamplerBinding("flw_lightTex", Samplers.LIGHT); + program.setSamplerBinding("_flw_depthRange", Samplers.DEPTH_RANGE); + program.setSamplerBinding("_flw_coefficients", Samplers.COEFFICIENTS); + program.setSamplerBinding("_flw_blueNoise", Samplers.NOISE); pipeline.onLink() .accept(program); key.contextShader() diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectDrawManager.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectDrawManager.java index 1e38fedbd..68472b971 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectDrawManager.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectDrawManager.java @@ -66,7 +66,7 @@ public class IndirectDrawManager extends DrawManager> { depthPyramid = new DepthPyramid(programs); - oitFramebuffer = new OitFramebuffer(programs); + oitFramebuffer = new OitFramebuffer(programs.oitPrograms()); } @Override diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/OitFramebuffer.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/OitFramebuffer.java index d184acf02..693fe9cef 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/OitFramebuffer.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/OitFramebuffer.java @@ -9,7 +9,7 @@ import com.mojang.blaze3d.systems.RenderSystem; import dev.engine_room.flywheel.backend.NoiseTextures; import dev.engine_room.flywheel.backend.Samplers; -import dev.engine_room.flywheel.backend.compile.IndirectPrograms; +import dev.engine_room.flywheel.backend.compile.OitPrograms; import dev.engine_room.flywheel.backend.gl.GlCompat; import dev.engine_room.flywheel.backend.gl.GlTextureUnit; import net.minecraft.client.Minecraft; @@ -21,7 +21,7 @@ public class OitFramebuffer { public static final int[] ACCUMULATE_DRAW_BUFFERS = {GL46.GL_COLOR_ATTACHMENT5}; public static final int[] DEPTH_ONLY_DRAW_BUFFERS = {}; - private final IndirectPrograms programs; + private final OitPrograms programs; private final int vao; public int fbo = -1; @@ -32,7 +32,7 @@ public class OitFramebuffer { private int lastWidth = -1; private int lastHeight = -1; - public OitFramebuffer(IndirectPrograms programs) { + public OitFramebuffer(OitPrograms programs) { this.programs = programs; if (GlCompat.SUPPORTS_DSA) { vao = GL46.glCreateVertexArrays(); diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/instancing/InstancedDrawManager.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/instancing/InstancedDrawManager.java index e2a97ddf7..0ef90065f 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/instancing/InstancedDrawManager.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/instancing/InstancedDrawManager.java @@ -1,13 +1,17 @@ package dev.engine_room.flywheel.backend.engine.instancing; +import java.util.ArrayList; +import java.util.Comparator; import java.util.List; import dev.engine_room.flywheel.api.backend.Engine; import dev.engine_room.flywheel.api.instance.Instance; import dev.engine_room.flywheel.api.material.Material; +import dev.engine_room.flywheel.api.material.Transparency; import dev.engine_room.flywheel.backend.Samplers; import dev.engine_room.flywheel.backend.compile.ContextShader; import dev.engine_room.flywheel.backend.compile.InstancingPrograms; +import dev.engine_room.flywheel.backend.compile.PipelineCompiler; import dev.engine_room.flywheel.backend.engine.AbstractInstancer; import dev.engine_room.flywheel.backend.engine.CommonCrumbling; import dev.engine_room.flywheel.backend.engine.DrawManager; @@ -19,6 +23,7 @@ import dev.engine_room.flywheel.backend.engine.MaterialRenderState; import dev.engine_room.flywheel.backend.engine.MeshPool; import dev.engine_room.flywheel.backend.engine.TextureBinder; import dev.engine_room.flywheel.backend.engine.embed.EnvironmentStorage; +import dev.engine_room.flywheel.backend.engine.indirect.OitFramebuffer; import dev.engine_room.flywheel.backend.engine.uniform.Uniforms; import dev.engine_room.flywheel.backend.gl.TextureBuffer; import dev.engine_room.flywheel.backend.gl.array.GlVertexArray; @@ -28,7 +33,16 @@ import net.minecraft.client.Minecraft; import net.minecraft.client.resources.model.ModelBakery; public class InstancedDrawManager extends DrawManager> { - private final InstancedRenderStage draws = new InstancedRenderStage(); + private static final Comparator DRAW_COMPARATOR = Comparator.comparing(InstancedDraw::bias) + .thenComparing(InstancedDraw::indexOfMeshInModel) + .thenComparing(InstancedDraw::material, MaterialRenderState.COMPARATOR); + + private final List allDraws = new ArrayList<>(); + private boolean needSort = false; + + private final List draws = new ArrayList<>(); + private final List oitDraws = new ArrayList<>(); + private final InstancingPrograms programs; /** * A map of vertex types to their mesh pools. @@ -38,6 +52,8 @@ public class InstancedDrawManager extends DrawManager> { private final TextureBuffer instanceTexture; private final InstancedLight light; + private final OitFramebuffer oitFramebuffer; + public InstancedDrawManager(InstancingPrograms programs) { programs.acquire(); this.programs = programs; @@ -48,6 +64,9 @@ public class InstancedDrawManager extends DrawManager> { light = new InstancedLight(); meshPool.bind(vao); + + oitFramebuffer = new OitFramebuffer(programs.oitPrograms()); + } @Override @@ -66,13 +85,31 @@ public class InstancedDrawManager extends DrawManager> { }); // Remove the draw calls for any instancers we deleted. - draws.flush(); + needSort |= allDraws.removeIf(InstancedDraw::deleted); + + if (needSort) { + allDraws.sort(DRAW_COMPARATOR); + + draws.clear(); + oitDraws.clear(); + + for (var draw : allDraws) { + if (draw.material() + .transparency() == Transparency.ORDER_INDEPENDENT) { + oitDraws.add(draw); + } else { + draws.add(draw); + } + } + + needSort = false; + } meshPool.flush(); light.flush(lightStorage); - if (draws.isEmpty()) { + if (allDraws.isEmpty()) { return; } @@ -81,18 +118,92 @@ public class InstancedDrawManager extends DrawManager> { TextureBinder.bindLightAndOverlay(); light.bind(); - draws.draw(instanceTexture, programs); + submitDraws(); + + if (!oitDraws.isEmpty()) { + oitFramebuffer.prepare(); + + oitFramebuffer.depthRange(); + + submitOitDraws(PipelineCompiler.OitMode.DEPTH_RANGE); + + oitFramebuffer.renderTransmittance(); + + submitOitDraws(PipelineCompiler.OitMode.GENERATE_COEFFICIENTS); + + oitFramebuffer.renderDepthFromTransmittance(); + + // Need to bind this again because we just drew a full screen quad for OIT. + vao.bindForDraw(); + + oitFramebuffer.accumulate(); + + submitOitDraws(PipelineCompiler.OitMode.EVALUATE); + + oitFramebuffer.composite(); + } MaterialRenderState.reset(); TextureBinder.resetLightAndOverlay(); } + private void submitDraws() { + for (var drawCall : draws) { + var material = drawCall.material(); + var groupKey = drawCall.groupKey; + var environment = groupKey.environment(); + + var program = programs.get(groupKey.instanceType(), environment.contextShader(), material, PipelineCompiler.OitMode.OFF); + program.bind(); + + environment.setupDraw(program); + + uploadMaterialUniform(program, material); + + program.setUInt("_flw_vertexOffset", drawCall.mesh() + .baseVertex()); + + MaterialRenderState.setup(material); + + Samplers.INSTANCE_BUFFER.makeActive(); + + drawCall.render(instanceTexture); + } + } + + private void submitOitDraws(PipelineCompiler.OitMode mode) { + for (var drawCall : oitDraws) { + var material = drawCall.material(); + var groupKey = drawCall.groupKey; + var environment = groupKey.environment(); + + var program = programs.get(groupKey.instanceType(), environment.contextShader(), material, mode); + program.bind(); + + environment.setupDraw(program); + + uploadMaterialUniform(program, material); + + program.setUInt("_flw_vertexOffset", drawCall.mesh() + .baseVertex()); + + MaterialRenderState.setupOit(material); + + Samplers.INSTANCE_BUFFER.makeActive(); + + drawCall.render(instanceTexture); + } + } + @Override public void delete() { instancers.values() .forEach(InstancedInstancer::delete); - draws.delete(); + allDraws.forEach(InstancedDraw::delete); + allDraws.clear(); + draws.clear(); + oitDraws.clear(); meshPool.delete(); instanceTexture.delete(); @@ -101,6 +212,8 @@ public class InstancedDrawManager extends DrawManager> { light.delete(); + oitFramebuffer.delete(); + super.delete(); } @@ -122,7 +235,8 @@ public class InstancedDrawManager extends DrawManager> { GroupKey groupKey = new GroupKey<>(key.type(), key.environment()); InstancedDraw instancedDraw = new InstancedDraw(instancer, mesh, groupKey, entry.material(), key.bias(), i); - draws.put(groupKey, instancedDraw); + allDraws.add(instancedDraw); + needSort = true; instancer.addDrawCall(instancedDraw); } } @@ -165,7 +279,7 @@ public class InstancedDrawManager extends DrawManager> { for (InstancedDraw draw : instancer.draws()) { CommonCrumbling.applyCrumblingProperties(crumblingMaterial, draw.material()); - var program = programs.get(shader.instanceType(), ContextShader.CRUMBLING, crumblingMaterial); + var program = programs.get(shader.instanceType(), ContextShader.CRUMBLING, crumblingMaterial, PipelineCompiler.OitMode.OFF); program.bind(); program.setInt("_flw_baseInstance", index); uploadMaterialUniform(program, crumblingMaterial); diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/instancing/InstancedRenderStage.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/instancing/InstancedRenderStage.java deleted file mode 100644 index 38614226c..000000000 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/instancing/InstancedRenderStage.java +++ /dev/null @@ -1,106 +0,0 @@ -package dev.engine_room.flywheel.backend.engine.instancing; - -import static dev.engine_room.flywheel.backend.engine.instancing.InstancedDrawManager.uploadMaterialUniform; - -import java.util.ArrayList; -import java.util.Comparator; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -import dev.engine_room.flywheel.backend.Samplers; -import dev.engine_room.flywheel.backend.compile.InstancingPrograms; -import dev.engine_room.flywheel.backend.engine.GroupKey; -import dev.engine_room.flywheel.backend.engine.MaterialRenderState; -import dev.engine_room.flywheel.backend.gl.TextureBuffer; - -public class InstancedRenderStage { - private static final Comparator DRAW_COMPARATOR = Comparator.comparing(InstancedDraw::bias) - .thenComparing(InstancedDraw::indexOfMeshInModel) - .thenComparing(InstancedDraw::material, MaterialRenderState.COMPARATOR); - - private final Map, DrawGroup> groups = new HashMap<>(); - - public InstancedRenderStage() { - } - - public void delete() { - groups.values() - .forEach(DrawGroup::delete); - groups.clear(); - } - - public void put(GroupKey groupKey, InstancedDraw instancedDraw) { - groups.computeIfAbsent(groupKey, $ -> new DrawGroup()) - .put(instancedDraw); - } - - public boolean isEmpty() { - return groups.isEmpty(); - } - - public void flush() { - groups.values() - .forEach(DrawGroup::flush); - - groups.values() - .removeIf(DrawGroup::isEmpty); - } - - public void draw(TextureBuffer instanceTexture, InstancingPrograms programs) { - for (var entry : groups.entrySet()) { - var shader = entry.getKey(); - var drawCalls = entry.getValue(); - - var environment = shader.environment(); - - for (var drawCall : drawCalls.draws) { - var material = drawCall.material(); - - var program = programs.get(shader.instanceType(), environment.contextShader(), material); - program.bind(); - - environment.setupDraw(program); - - uploadMaterialUniform(program, material); - - program.setUInt("_flw_vertexOffset", drawCall.mesh() - .baseVertex()); - - MaterialRenderState.setup(material); - - Samplers.INSTANCE_BUFFER.makeActive(); - - drawCall.render(instanceTexture); - } - } - } - - public static class DrawGroup { - private final List draws = new ArrayList<>(); - private boolean needSort = false; - - public void put(InstancedDraw instancedDraw) { - draws.add(instancedDraw); - needSort = true; - } - - public void delete() { - draws.forEach(InstancedDraw::delete); - draws.clear(); - } - - public void flush() { - needSort |= draws.removeIf(InstancedDraw::deleted); - - if (needSort) { - draws.sort(DRAW_COMPARATOR); - needSort = false; - } - } - - public boolean isEmpty() { - return draws.isEmpty(); - } - } -} diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/gl/GlCompat.java b/common/src/backend/java/dev/engine_room/flywheel/backend/gl/GlCompat.java index d93c72426..742b8770c 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/gl/GlCompat.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/gl/GlCompat.java @@ -163,8 +163,7 @@ public final class GlCompat { && CAPABILITIES.GL_ARB_multi_draw_indirect && CAPABILITIES.GL_ARB_shader_draw_parameters && CAPABILITIES.GL_ARB_shader_storage_buffer_object - && CAPABILITIES.GL_ARB_shading_language_420pack - && CAPABILITIES.GL_ARB_vertex_attrib_binding; + && CAPABILITIES.GL_ARB_shading_language_420pack && CAPABILITIES.GL_ARB_vertex_attrib_binding && CAPABILITIES.GL_ARB_shader_image_load_store && CAPABILITIES.GL_ARB_shader_image_size; } private static boolean isDsaSupported() { diff --git a/common/src/backend/resources/assets/flywheel/flywheel/internal/common.frag b/common/src/backend/resources/assets/flywheel/flywheel/internal/common.frag index 2693798a0..6f2c04ce2 100644 --- a/common/src/backend/resources/assets/flywheel/flywheel/internal/common.frag +++ b/common/src/backend/resources/assets/flywheel/flywheel/internal/common.frag @@ -21,11 +21,11 @@ flat in uvec2 _flw_ids; #ifdef _FLW_OIT -layout (binding = 7) uniform sampler2D _flw_depthRange; +uniform sampler2D _flw_depthRange; -layout (binding = 8) uniform sampler2DArray _flw_coefficients; +uniform sampler2DArray _flw_coefficients; -layout (binding = 9) uniform sampler2D _flw_blueNoise; +uniform sampler2D _flw_blueNoise; float tented_blue_noise(float normalizedDepth) { @@ -55,22 +55,22 @@ float depth() { #ifdef _FLW_DEPTH_RANGE -layout (location = 0) out vec2 _flw_depthRange_out; +out vec2 _flw_depthRange_out; #endif #ifdef _FLW_COLLECT_COEFFS -layout (location = 0) out vec4 _flw_coeffs0; -layout (location = 1) out vec4 _flw_coeffs1; -layout (location = 2) out vec4 _flw_coeffs2; -layout (location = 3) out vec4 _flw_coeffs3; +out vec4 _flw_coeffs0; +out vec4 _flw_coeffs1; +out vec4 _flw_coeffs2; +out vec4 _flw_coeffs3; #endif #ifdef _FLW_EVALUATE -layout (location = 0) out vec4 _flw_accumulate; +out vec4 _flw_accumulate; #endif diff --git a/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/fullscreen.vert b/common/src/backend/resources/assets/flywheel/flywheel/internal/fullscreen.vert similarity index 100% rename from common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/fullscreen.vert rename to common/src/backend/resources/assets/flywheel/flywheel/internal/fullscreen.vert diff --git a/common/src/backend/resources/assets/flywheel/flywheel/internal/light_lut.glsl b/common/src/backend/resources/assets/flywheel/flywheel/internal/light_lut.glsl index e9ec12039..3c2a7e796 100644 --- a/common/src/backend/resources/assets/flywheel/flywheel/internal/light_lut.glsl +++ b/common/src/backend/resources/assets/flywheel/flywheel/internal/light_lut.glsl @@ -1,12 +1,12 @@ -const uint _FLW_BLOCKS_PER_SECTION = 18 * 18 * 18; +const uint _FLW_BLOCKS_PER_SECTION = 18u * 18u * 18u; const uint _FLW_LIGHT_SIZE_BYTES = _FLW_BLOCKS_PER_SECTION; -const uint _FLW_SOLID_SIZE_BYTES = ((_FLW_BLOCKS_PER_SECTION + 31) / 32) * 4; +const uint _FLW_SOLID_SIZE_BYTES = ((_FLW_BLOCKS_PER_SECTION + 31u) / 32u) * 4u; const uint _FLW_LIGHT_START_BYTES = _FLW_SOLID_SIZE_BYTES; const uint _FLW_LIGHT_SECTION_SIZE_BYTES = _FLW_SOLID_SIZE_BYTES + _FLW_LIGHT_SIZE_BYTES; -const uint _FLW_SOLID_START_INTS = 0; -const uint _FLW_LIGHT_START_INTS = _FLW_SOLID_SIZE_BYTES / 4; -const uint _FLW_LIGHT_SECTION_SIZE_INTS = _FLW_LIGHT_SECTION_SIZE_BYTES / 4; +const uint _FLW_SOLID_START_INTS = 0u; +const uint _FLW_LIGHT_START_INTS = _FLW_SOLID_SIZE_BYTES / 4u; +const uint _FLW_LIGHT_SECTION_SIZE_INTS = _FLW_LIGHT_SECTION_SIZE_BYTES / 4u; const uint _FLW_COMPLETELY_SOLID = 0x7FFFFFFu; const float _FLW_EPSILON = 1e-5; @@ -29,39 +29,39 @@ bool _flw_nextLut(uint base, int coord, out uint next) { // The base coordinate. int start = int(_flw_indexLut(base)); // The width of the coordinate span. - uint size = _flw_indexLut(base + 1); + uint size = _flw_indexLut(base + 1u); // Index of the coordinate in the span. int i = coord - start; - if (i < 0 || i >= size) { + if (i < 0 || i >= int(size)) { // We missed. return true; } - next = _flw_indexLut(base + 2 + i); + next = _flw_indexLut(base + 2u + uint(i)); return false; } bool _flw_chunkCoordToSectionIndex(ivec3 sectionPos, out uint index) { uint first; - if (_flw_nextLut(0, sectionPos.y, first) || first == 0) { + if (_flw_nextLut(0u, sectionPos.y, first) || first == 0u) { return true; } uint second; - if (_flw_nextLut(first, sectionPos.x, second) || second == 0) { + if (_flw_nextLut(first, sectionPos.x, second) || second == 0u) { return true; } uint sectionIndex; - if (_flw_nextLut(second, sectionPos.z, sectionIndex) || sectionIndex == 0) { + if (_flw_nextLut(second, sectionPos.z, sectionIndex) || sectionIndex == 0u) { return true; } // The index is written as 1-based so we can properly detect missing sections. - index = sectionIndex - 1; + index = sectionIndex - 1u; return false; } @@ -87,7 +87,7 @@ bool _flw_isSolid(uint sectionOffset, uvec3 blockInSectionPos) { uint word = _flw_indexLight(sectionOffset + _FLW_SOLID_START_INTS + uintOffset); - return (word & (1u << bitInWordOffset)) != 0; + return (word & (1u << bitInWordOffset)) != 0u; } bool flw_lightFetch(ivec3 blockPos, out vec2 lightCoord) { @@ -98,7 +98,7 @@ bool flw_lightFetch(ivec3 blockPos, out vec2 lightCoord) { // The offset of the section in the light buffer. uint sectionOffset = lightSectionIndex * _FLW_LIGHT_SECTION_SIZE_INTS; - uvec3 blockInSectionPos = (blockPos & 0xF) + 1; + uvec3 blockInSectionPos = uvec3((blockPos & 0xF) + 1); lightCoord = vec2(_flw_lightAt(sectionOffset, blockInSectionPos)) * _FLW_LIGHT_NORMALIZER; return true; @@ -106,7 +106,7 @@ bool flw_lightFetch(ivec3 blockPos, out vec2 lightCoord) { uint _flw_fetchSolid3x3x3(uint sectionOffset, ivec3 blockInSectionPos) { - uint ret = 0; + uint ret = 0u; // The formatter does NOT like these macros // @formatter:off diff --git a/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/oit_composite.frag b/common/src/backend/resources/assets/flywheel/flywheel/internal/oit_composite.frag similarity index 73% rename from common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/oit_composite.frag rename to common/src/backend/resources/assets/flywheel/flywheel/internal/oit_composite.frag index 2aa577e84..f63f2dc97 100644 --- a/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/oit_composite.frag +++ b/common/src/backend/resources/assets/flywheel/flywheel/internal/oit_composite.frag @@ -2,11 +2,11 @@ #include "flywheel:internal/depth.glsl" #include "flywheel:internal/uniforms/frame.glsl" -layout (location = 0) out vec4 frag; +out vec4 frag; -layout (binding = 0) uniform sampler2D _flw_accumulate; -layout (binding = 7) uniform sampler2D _flw_depthRange; -layout (binding = 8) uniform sampler2DArray _flw_coefficients; +uniform sampler2D _flw_accumulate; +uniform sampler2D _flw_depthRange; +uniform sampler2DArray _flw_coefficients; void main() { vec4 texel = texelFetch(_flw_accumulate, ivec2(gl_FragCoord.xy), 0); diff --git a/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/oit_depth.frag b/common/src/backend/resources/assets/flywheel/flywheel/internal/oit_depth.frag similarity index 93% rename from common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/oit_depth.frag rename to common/src/backend/resources/assets/flywheel/flywheel/internal/oit_depth.frag index 1f127b445..c5b782d64 100644 --- a/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/oit_depth.frag +++ b/common/src/backend/resources/assets/flywheel/flywheel/internal/oit_depth.frag @@ -2,9 +2,9 @@ #include "flywheel:internal/wavelet.glsl" #include "flywheel:internal/depth.glsl" -layout (binding = 7) uniform sampler2D _flw_depthRange; +uniform sampler2D _flw_depthRange; -layout (binding = 8) uniform sampler2DArray _flw_coefficients; +uniform sampler2DArray _flw_coefficients; float eye_depth_from_normalized_transparency_depth(float tDepth) { vec2 depthRange = texelFetch(_flw_depthRange, ivec2(gl_FragCoord.xy), 0).rg; diff --git a/common/src/backend/resources/assets/flywheel/flywheel/internal/wavelet.glsl b/common/src/backend/resources/assets/flywheel/flywheel/internal/wavelet.glsl index 6691e3d16..8bfaeb620 100644 --- a/common/src/backend/resources/assets/flywheel/flywheel/internal/wavelet.glsl +++ b/common/src/backend/resources/assets/flywheel/flywheel/internal/wavelet.glsl @@ -1,13 +1,12 @@ #define TRANSPARENCY_WAVELET_RANK 3 #define TRANSPARENCY_WAVELET_COEFFICIENT_COUNT 16 - // ------------------------------------------------------------------------- // WRITING // ------------------------------------------------------------------------- -void add_to_index(inout vec4[4] coefficients, uint index, float addend) { - coefficients[index >> 2][index & 3u] = addend; +void add_to_index(inout vec4[4] coefficients, int index, float addend) { + coefficients[index >> 2][index & 3] = addend; } void add_absorbance(inout vec4[4] coefficients, float signal, float depth) { @@ -43,8 +42,8 @@ void add_transmittance(inout vec4[4] coefficients, float transmittance, float de // ------------------------------------------------------------------------- // TODO: maybe we could reduce the number of texel fetches below? -float get_coefficients(in sampler2DArray coefficients, uint index) { - return texelFetch(coefficients, ivec3(gl_FragCoord.xy, index >> 2), 0)[index & 3u]; +float get_coefficients(in sampler2DArray coefficients, int index) { + return texelFetch(coefficients, ivec3(gl_FragCoord.xy, index >> 2), 0)[index & 3]; } /// Compute the total absorbance, as if at infinite depth.