From ec45287cfa07869471951ba33d510fabfb2cd6c4 Mon Sep 17 00:00:00 2001 From: Jozufozu Date: Wed, 4 Sep 2024 11:05:04 -0500 Subject: [PATCH] Joining the occult - Implement hi-z occlusion culling - Generate depth pyramid just before issuing cull dispatches - Currently use raw texel fetches but this may be causing loss - Add _flw_cullData to frame uniforms --- .../backend/compile/IndirectPrograms.java | 13 ++- .../backend/engine/indirect/DepthPyramid.java | 106 ++++++++++++++++++ .../engine/indirect/IndirectDrawManager.java | 11 ++ .../backend/engine/uniform/FrameUniforms.java | 16 ++- .../flywheel/backend/gl/shader/GlProgram.java | 11 ++ .../flywheel/internal/indirect/cull.glsl | 59 +++++++++- .../internal/indirect/depth_reduce.glsl | 29 +++++ .../flywheel/internal/uniforms/frame.glsl | 12 ++ 8 files changed, 251 insertions(+), 6 deletions(-) create mode 100644 common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/DepthPyramid.java create mode 100644 common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/depth_reduce.glsl diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/compile/IndirectPrograms.java b/common/src/backend/java/dev/engine_room/flywheel/backend/compile/IndirectPrograms.java index 3b705754a..df1696657 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/compile/IndirectPrograms.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/compile/IndirectPrograms.java @@ -30,6 +30,7 @@ public class IndirectPrograms extends AtomicReferenceCounted { private static final ResourceLocation CULL_SHADER_MAIN = Flywheel.rl("internal/indirect/cull.glsl"); private static final ResourceLocation APPLY_SHADER_MAIN = Flywheel.rl("internal/indirect/apply.glsl"); private static final ResourceLocation SCATTER_SHADER_MAIN = Flywheel.rl("internal/indirect/scatter.glsl"); + private static final ResourceLocation DEPTH_REDUCE_SHADER_MAIN = Flywheel.rl("internal/indirect/depth_reduce.glsl"); private static final Compile> CULL = new Compile<>(); private static final Compile UTIL = new Compile<>(); @@ -44,12 +45,14 @@ public class IndirectPrograms extends AtomicReferenceCounted { private final Map, GlProgram> culling; private final GlProgram apply; private final GlProgram scatter; + private final GlProgram depthReduce; - private IndirectPrograms(Map pipeline, Map, GlProgram> culling, GlProgram apply, GlProgram scatter) { + private IndirectPrograms(Map pipeline, Map, GlProgram> culling, GlProgram apply, GlProgram scatter, GlProgram depthReduce) { this.pipeline = pipeline; this.culling = culling; this.apply = apply; this.scatter = scatter; + this.depthReduce = depthReduce; } private static List getExtensions(GlslVersion glslVersion) { @@ -94,10 +97,10 @@ public class IndirectPrograms extends AtomicReferenceCounted { try { var pipelineResult = pipelineCompiler.compileAndReportErrors(pipelineKeys); var cullingResult = cullingCompiler.compileAndReportErrors(createCullingKeys()); - var utils = utilCompiler.compileAndReportErrors(List.of(APPLY_SHADER_MAIN, SCATTER_SHADER_MAIN)); + var utils = utilCompiler.compileAndReportErrors(List.of(APPLY_SHADER_MAIN, SCATTER_SHADER_MAIN, DEPTH_REDUCE_SHADER_MAIN)); if (pipelineResult != null && cullingResult != null && utils != null) { - newInstance = new IndirectPrograms(pipelineResult, cullingResult, utils.get(APPLY_SHADER_MAIN), utils.get(SCATTER_SHADER_MAIN)); + newInstance = new IndirectPrograms(pipelineResult, cullingResult, utils.get(APPLY_SHADER_MAIN), utils.get(SCATTER_SHADER_MAIN), utils.get(DEPTH_REDUCE_SHADER_MAIN)); } } catch (Throwable t) { FlwPrograms.LOGGER.error("Failed to compile indirect programs", t); @@ -184,6 +187,10 @@ public class IndirectPrograms extends AtomicReferenceCounted { return scatter; } + public GlProgram getDepthReduceProgram() { + return depthReduce; + } + @Override protected void _delete() { pipeline.values() diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/DepthPyramid.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/DepthPyramid.java new file mode 100644 index 000000000..30e9524c7 --- /dev/null +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/DepthPyramid.java @@ -0,0 +1,106 @@ +package dev.engine_room.flywheel.backend.engine.indirect; + +import org.lwjgl.opengl.GL32; +import org.lwjgl.opengl.GL46; + +import com.mojang.blaze3d.platform.GlStateManager; + +import dev.engine_room.flywheel.backend.gl.shader.GlProgram; +import dev.engine_room.flywheel.lib.math.MoreMath; +import net.minecraft.client.Minecraft; + +public class DepthPyramid { + private final GlProgram depthReduceProgram; + + public final int pyramidTextureId; + + private int lastWidth = -1; + private int lastHeight = -1; + + public DepthPyramid(GlProgram depthReduceProgram) { + this.depthReduceProgram = depthReduceProgram; + + pyramidTextureId = GL32.glGenTextures(); + + GlStateManager._bindTexture(pyramidTextureId); + GlStateManager._texParameter(GL32.GL_TEXTURE_2D, GL32.GL_TEXTURE_MIN_FILTER, GL32.GL_NEAREST); + GlStateManager._texParameter(GL32.GL_TEXTURE_2D, GL32.GL_TEXTURE_MAG_FILTER, GL32.GL_NEAREST); + GlStateManager._texParameter(GL32.GL_TEXTURE_2D, GL32.GL_TEXTURE_COMPARE_MODE, GL32.GL_NONE); + GlStateManager._texParameter(GL32.GL_TEXTURE_2D, GL32.GL_TEXTURE_WRAP_S, GL32.GL_CLAMP_TO_EDGE); + GlStateManager._texParameter(GL32.GL_TEXTURE_2D, GL32.GL_TEXTURE_WRAP_T, GL32.GL_CLAMP_TO_EDGE); + + } + + public void generate() { + var mainRenderTarget = Minecraft.getInstance() + .getMainRenderTarget(); + + int width = mainRenderTarget.width; + int height = mainRenderTarget.height; + + int mipLevels = getImageMipLevels(width, height); + + createPyramidMips(mipLevels, width, height); + + int depthBufferId = mainRenderTarget.getDepthTextureId(); + + GlStateManager._bindTexture(depthBufferId); + + GL46.glMemoryBarrier(GL46.GL_FRAMEBUFFER_BARRIER_BIT); + + GL46.glActiveTexture(GL32.GL_TEXTURE1); + + depthReduceProgram.bind(); + + for (int i = 0; i < mipLevels; i++) { + int mipWidth = Math.max(1, width >> i); + int mipHeight = Math.max(1, height >> i); + + int srcTexture = (i == 0) ? depthBufferId : pyramidTextureId; + GL46.glBindTexture(GL32.GL_TEXTURE_2D, srcTexture); + + GL46.glBindImageTexture(0, pyramidTextureId, i, false, 0, GL32.GL_WRITE_ONLY, GL32.GL_R32F); + + depthReduceProgram.setUVec2("imageSize", mipWidth, mipHeight); + depthReduceProgram.setInt("lod", Math.max(0, i - 1)); + + GL46.glDispatchCompute(MoreMath.ceilingDiv(mipWidth, 8), MoreMath.ceilingDiv(mipHeight, 8), 1); + + GL46.glMemoryBarrier(GL46.GL_TEXTURE_FETCH_BARRIER_BIT); + } + } + + public void delete() { + GL32.glDeleteTextures(pyramidTextureId); + } + + private void createPyramidMips(int mipLevels, int width, int height) { + if (lastWidth == width && lastHeight == height) { + return; + } + + lastWidth = width; + lastHeight = height; + + GL32.glBindTexture(GL32.GL_TEXTURE_2D, pyramidTextureId); + + for (int i = 0; i < mipLevels; i++) { + int mipWidth = Math.max(1, width >> (i + 1)); + int mipHeight = Math.max(1, height >> (i + 1)); + + GL32.glTexImage2D(GL32.GL_TEXTURE_2D, i, GL32.GL_R32F, mipWidth, mipHeight, 0, GL32.GL_RED, GL32.GL_FLOAT, 0); + } + } + + private static int getImageMipLevels(int width, int height) { + int result = 1; + + while (width > 2 && height > 2) { + result++; + width /= 2; + height /= 2; + } + + return result; + } +} diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectDrawManager.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectDrawManager.java index adabbf653..494403045 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectDrawManager.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectDrawManager.java @@ -12,6 +12,8 @@ import java.util.HashMap; import java.util.List; import java.util.Map; +import org.lwjgl.opengl.GL46; + import dev.engine_room.flywheel.api.backend.Engine; import dev.engine_room.flywheel.api.instance.Instance; import dev.engine_room.flywheel.api.instance.InstanceType; @@ -46,6 +48,8 @@ public class IndirectDrawManager extends DrawManager> { private final LightBuffers lightBuffers; private final MatrixBuffer matrixBuffer; + private final DepthPyramid depthPyramid; + private boolean needsBarrier = false; public IndirectDrawManager(IndirectPrograms programs) { @@ -58,6 +62,8 @@ public class IndirectDrawManager extends DrawManager> { meshPool.bind(vertexArray); lightBuffers = new LightBuffers(); matrixBuffer = new MatrixBuffer(); + + depthPyramid = new DepthPyramid(programs.getDepthReduceProgram()); } @Override @@ -136,6 +142,8 @@ public class IndirectDrawManager extends DrawManager> { stagingBuffer.flush(); + depthPyramid.generate(); + // We could probably save some driver calls here when there are // actually zero instances, but that feels like a very rare case @@ -143,6 +151,9 @@ public class IndirectDrawManager extends DrawManager> { matrixBuffer.bind(); + GL46.glActiveTexture(GL46.GL_TEXTURE0); + GL46.glBindTexture(GL46.GL_TEXTURE_2D, depthPyramid.pyramidTextureId); + for (var group : cullingGroups.values()) { group.dispatchCull(); } diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/uniform/FrameUniforms.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/uniform/FrameUniforms.java index 33bb81901..b19370099 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/uniform/FrameUniforms.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/uniform/FrameUniforms.java @@ -17,7 +17,7 @@ import net.minecraft.world.level.Level; import net.minecraft.world.phys.Vec3; public final class FrameUniforms extends UniformWriter { - private static final int SIZE = 96 + 64 * 9 + 16 * 5 + 8 * 2 + 8 + 4 * 10; + private static final int SIZE = 96 + 64 * 9 + 16 * 5 + 8 * 2 + 8 + 4 * 16; static final UniformBuffer BUFFER = new UniformBuffer(Uniforms.FRAME_INDEX, SIZE); private static final Matrix4f VIEW = new Matrix4f(); @@ -112,6 +112,8 @@ public final class FrameUniforms extends UniformWriter { ptr = writeInt(ptr, debugMode); + ptr = writeCullData(ptr); + firstWrite = false; BUFFER.markDirty(); } @@ -179,6 +181,18 @@ public final class FrameUniforms extends UniformWriter { return writeInFluidAndBlock(ptr, level, blockPos, cameraPos); } + private static long writeCullData(long ptr) { + ptr = writeFloat(ptr, 0.05F); // zNear + ptr = writeFloat(ptr, Minecraft.getInstance().gameRenderer.getDepthFar()); // zFar + ptr = writeFloat(ptr, PROJECTION.m00()); // P00 + ptr = writeFloat(ptr, PROJECTION.m11()); // P11 + ptr = writeFloat(ptr, Minecraft.getInstance().getMainRenderTarget().width >> 1); // pyramidWidth + ptr = writeFloat(ptr, Minecraft.getInstance().getMainRenderTarget().height >> 1); // pyramidHeight + ptr = writeInt(ptr, 0); // useMin + + return ptr; + } + /** * Writes the frustum planes of the given projection matrix to the given buffer.

* Uses a different format that is friendly towards an optimized instruction-parallel diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/gl/shader/GlProgram.java b/common/src/backend/java/dev/engine_room/flywheel/backend/gl/shader/GlProgram.java index b221fdddf..9438ef355 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/gl/shader/GlProgram.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/gl/shader/GlProgram.java @@ -11,6 +11,7 @@ import static org.lwjgl.opengl.GL20.glUniform4f; import static org.lwjgl.opengl.GL20.glUniformMatrix3fv; import static org.lwjgl.opengl.GL20.glUniformMatrix4fv; import static org.lwjgl.opengl.GL30.glUniform1ui; +import static org.lwjgl.opengl.GL30.glUniform2ui; import static org.lwjgl.opengl.GL31.GL_INVALID_INDEX; import static org.lwjgl.opengl.GL31.glGetUniformBlockIndex; import static org.lwjgl.opengl.GL31.glUniformBlockBinding; @@ -118,6 +119,16 @@ public class GlProgram extends GlObject { glUniform1ui(uniform, value); } + public void setUVec2(String name, int x, int y) { + int uniform = getUniformLocation(name); + + if (uniform < 0) { + return; + } + + glUniform2ui(uniform, x, y); + } + public void setInt(String glslName, int value) { int uniform = getUniformLocation(glslName); diff --git a/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/cull.glsl b/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/cull.glsl index e128b0daf..6d8be7aaf 100644 --- a/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/cull.glsl +++ b/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/cull.glsl @@ -23,10 +23,12 @@ layout(std430, binding = _FLW_MODEL_BUFFER_BINDING) restrict buffer ModelBuffer ModelDescriptor _flw_models[]; }; -layout(std430, binding = _FLW_MATRIX_BUFFER_BINDING) restrict buffer MatrixBuffer { +layout(std430, binding = _FLW_MATRIX_BUFFER_BINDING) restrict readonly buffer MatrixBuffer { Matrices _flw_matrices[]; }; +layout(binding = 0) uniform sampler2D _flw_depthPyramid; + // Disgustingly vectorized sphere frustum intersection taking advantage of ahead of time packing. // Only uses 6 fmas and some boolean ops. // See also: @@ -40,6 +42,28 @@ bool _flw_testSphere(vec3 center, float radius) { return all(xyInside) && all(zInside); } +bool projectSphere(vec3 c, float r, float znear, float P00, float P11, out vec4 aabb) { + if (c.z > r + znear) { + return false; + } + + vec3 cr = c * r; + float czr2 = c.z * c.z - r * r; + + float vx = sqrt(c.x * c.x + czr2); + float minx = (vx * c.x - cr.z) / (vx * c.z + cr.x); + float maxx = (vx * c.x + cr.z) / (vx * c.z - cr.x); + + float vy = sqrt(c.y * c.y + czr2); + float miny = (vy * c.y - cr.z) / (vy * c.z + cr.y); + float maxy = (vy * c.y + cr.z) / (vy * c.z - cr.y); + + aabb = vec4(minx * P00, miny * P11, maxx * P00, maxy * P11); + aabb = aabb.xwzy * vec4(-0.5f, -0.5f, -0.5f, -0.5f) + vec4(0.5f); // clip space -> uv space + + return true; +} + bool _flw_isVisible(uint instanceIndex, uint modelIndex) { uint matrixIndex = _flw_models[modelIndex].matrixIndex; BoundingSphere sphere = _flw_models[modelIndex].boundingSphere; @@ -56,7 +80,38 @@ bool _flw_isVisible(uint instanceIndex, uint modelIndex) { transformBoundingSphere(_flw_matrices[matrixIndex].pose, center, radius); } - return _flw_testSphere(center, radius); + bool isVisible = _flw_testSphere(center, radius); + + if (isVisible) { + transformBoundingSphere(flw_view, center, radius); + + vec4 aabb; + if (projectSphere(center, radius, _flw_cullData.znear, _flw_cullData.P00, _flw_cullData.P11, aabb)) + { + float width = (aabb.z - aabb.x) * _flw_cullData.pyramidWidth; + float height = (aabb.w - aabb.y) * _flw_cullData.pyramidHeight; + + float level = floor(log2(max(width, height))); + + float depth01 = textureLod(_flw_depthPyramid, aabb.xw, level).r; + float depth11 = textureLod(_flw_depthPyramid, aabb.zw, level).r; + float depth10 = textureLod(_flw_depthPyramid, aabb.zy, level).r; + float depth00 = textureLod(_flw_depthPyramid, aabb.xy, level).r; + + float depth; + if (_flw_cullData.useMin == 0) { + depth = max(max(depth00, depth01), max(depth10, depth11)); + } else { + depth = min(min(depth00, depth01), min(depth10, depth11)); + } + + float depthSphere = 1. + _flw_cullData.znear / (center.z + radius); + + isVisible = isVisible && depthSphere <= depth; + } + } + + return isVisible; } void main() { diff --git a/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/depth_reduce.glsl b/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/depth_reduce.glsl new file mode 100644 index 000000000..42bcd7f4e --- /dev/null +++ b/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/depth_reduce.glsl @@ -0,0 +1,29 @@ +layout(local_size_x = 8, local_size_y = 8) in; + +layout(binding = 0, r32f) uniform writeonly image2D outImage; +layout(binding = 1) uniform sampler2D inImage; + +uniform uvec2 imageSize; +uniform int lod; + +uniform int useMin = 0; + +void main() { + uvec2 pos = gl_GlobalInvocationID.xy; + + ivec2 samplePos = ivec2(pos) * 2; + + float depth01 = texelFetchOffset(inImage, samplePos, lod, ivec2(0, 1)).r; + float depth11 = texelFetchOffset(inImage, samplePos, lod, ivec2(1, 1)).r; + float depth10 = texelFetchOffset(inImage, samplePos, lod, ivec2(1, 0)).r; + float depth00 = texelFetchOffset(inImage, samplePos, lod, ivec2(0, 0)).r; + + float depth; + if (useMin == 0) { + depth = max(max(depth00, depth01), max(depth10, depth11)); + } else { + depth = min(min(depth00, depth01), min(depth10, depth11)); + } + + imageStore(outImage, ivec2(pos), vec4(depth)); +} diff --git a/common/src/backend/resources/assets/flywheel/flywheel/internal/uniforms/frame.glsl b/common/src/backend/resources/assets/flywheel/flywheel/internal/uniforms/frame.glsl index 4b3cfe69f..4ce722400 100644 --- a/common/src/backend/resources/assets/flywheel/flywheel/internal/uniforms/frame.glsl +++ b/common/src/backend/resources/assets/flywheel/flywheel/internal/uniforms/frame.glsl @@ -9,6 +9,16 @@ struct FrustumPlanes { vec2 zW; // }; +struct _FlwCullData { + float znear; + float zfar; + float P00; + float P11; + float pyramidWidth; + float pyramidHeight; + uint useMin; +}; + layout(std140) uniform _FlwFrameUniforms { FrustumPlanes flw_frustumPlanes; @@ -47,6 +57,8 @@ layout(std140) uniform _FlwFrameUniforms { uint flw_cameraInBlock; uint _flw_debugMode; + + _FlwCullData _flw_cullData; }; #define flw_renderOrigin (_flw_renderOrigin.xyz)