From f12aa15daeef2055323001a285608700b5e58c12 Mon Sep 17 00:00:00 2001 From: Jozufozu Date: Mon, 9 Sep 2024 14:20:25 -0700 Subject: [PATCH] It's alive - Fix crash by resetting the indirect dispatch buffer each frame - Use DSA + immutable storage for depth pyramid and visibility buffer - In pass two, check against the thread count written out in pass one to early return - Require a draw barrier after each apply dispatch - Use a storage array for the last frame visibility buffer --- .../backend/engine/indirect/DepthPyramid.java | 36 +++++++----------- .../engine/indirect/IndirectBuffers.java | 4 +- .../engine/indirect/IndirectCullingGroup.java | 11 +++++- .../engine/indirect/VisibilityBuffer.java | 38 ++++++++++--------- .../flywheel/backend/gl/GlCompat.java | 3 ++ .../flywheel/internal/indirect/dispatch.glsl | 6 +++ .../internal/indirect/early_cull.glsl | 8 +--- .../flywheel/internal/indirect/late_cull.glsl | 8 +++- .../internal/indirect/read_visibility.glsl | 2 +- 9 files changed, 65 insertions(+), 51 deletions(-) create mode 100644 common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/dispatch.glsl diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/DepthPyramid.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/DepthPyramid.java index 49afe0776..1ba12b86f 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/DepthPyramid.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/DepthPyramid.java @@ -13,23 +13,13 @@ import net.minecraft.client.Minecraft; public class DepthPyramid { private final GlProgram depthReduceProgram; - public final int pyramidTextureId; + public int pyramidTextureId = -1; private int lastWidth = -1; private int lastHeight = -1; public DepthPyramid(GlProgram depthReduceProgram) { this.depthReduceProgram = depthReduceProgram; - - pyramidTextureId = GL32.glGenTextures(); - - GlStateManager._bindTexture(pyramidTextureId); - GlStateManager._texParameter(GL32.GL_TEXTURE_2D, GL32.GL_TEXTURE_MIN_FILTER, GL32.GL_NEAREST); - GlStateManager._texParameter(GL32.GL_TEXTURE_2D, GL32.GL_TEXTURE_MAG_FILTER, GL32.GL_NEAREST); - GlStateManager._texParameter(GL32.GL_TEXTURE_2D, GL32.GL_TEXTURE_COMPARE_MODE, GL32.GL_NONE); - GlStateManager._texParameter(GL32.GL_TEXTURE_2D, GL32.GL_TEXTURE_WRAP_S, GL32.GL_CLAMP_TO_EDGE); - GlStateManager._texParameter(GL32.GL_TEXTURE_2D, GL32.GL_TEXTURE_WRAP_T, GL32.GL_CLAMP_TO_EDGE); - } public void generate() { @@ -45,12 +35,9 @@ public class DepthPyramid { int depthBufferId = mainRenderTarget.getDepthTextureId(); - GlTextureUnit.T1.makeActive(); - GlStateManager._bindTexture(depthBufferId); - GL46.glMemoryBarrier(GL46.GL_FRAMEBUFFER_BARRIER_BIT); - GL46.glActiveTexture(GL32.GL_TEXTURE1); + GlTextureUnit.T1.makeActive(); depthReduceProgram.bind(); @@ -73,7 +60,10 @@ public class DepthPyramid { } public void delete() { - GL32.glDeleteTextures(pyramidTextureId); + if (pyramidTextureId != -1) { + GL32.glDeleteTextures(pyramidTextureId); + pyramidTextureId = -1; + } } private void createPyramidMips(int mipLevels, int width, int height) { @@ -84,14 +74,16 @@ public class DepthPyramid { lastWidth = width; lastHeight = height; - GL32.glBindTexture(GL32.GL_TEXTURE_2D, pyramidTextureId); + delete(); - for (int i = 0; i < mipLevels; i++) { - int mipWidth = mipSize(width, i); - int mipHeight = mipSize(height, i); + pyramidTextureId = GL46.glCreateTextures(GL46.GL_TEXTURE_2D); + GL46.glTextureStorage2D(pyramidTextureId, mipLevels, GL32.GL_R32F, width, height); - GL32.glTexImage2D(GL32.GL_TEXTURE_2D, i, GL32.GL_R32F, mipWidth, mipHeight, 0, GL32.GL_RED, GL32.GL_FLOAT, 0); - } + GL46.glTextureParameteri(pyramidTextureId, GL32.GL_TEXTURE_MIN_FILTER, GL32.GL_NEAREST); + GL46.glTextureParameteri(pyramidTextureId, GL32.GL_TEXTURE_MAG_FILTER, GL32.GL_NEAREST); + GL46.glTextureParameteri(pyramidTextureId, GL32.GL_TEXTURE_COMPARE_MODE, GL32.GL_NONE); + GL46.glTextureParameteri(pyramidTextureId, GL32.GL_TEXTURE_WRAP_S, GL32.GL_CLAMP_TO_EDGE); + GL46.glTextureParameteri(pyramidTextureId, GL32.GL_TEXTURE_WRAP_T, GL32.GL_CLAMP_TO_EDGE); } public static int mipSize(int mip0Size, int level) { diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectBuffers.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectBuffers.java index cfc6cdcf6..fdd97bc85 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectBuffers.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectBuffers.java @@ -105,7 +105,7 @@ public class IndirectBuffers { MemoryUtil.memPutInt(ptr + DRAW_HANDLE_OFFSET, draw.handle()); MemoryUtil.memPutAddress(ptr + PASS_TWO_DISPATCH_SIZE_OFFSET, passTwoDispatch.capacity()); - MemoryUtil.memPutAddress(ptr + PASS_TWO_INSTANCE_INDEX_SIZE_OFFSET, passTwoInstanceIndex.capacity()); + MemoryUtil.memPutAddress(ptr + PASS_TWO_INSTANCE_INDEX_SIZE_OFFSET, INT_SIZE * instanceCount); MemoryUtil.memPutAddress(ptr + PAGE_FRAME_DESCRIPTOR_SIZE_OFFSET, objectStorage.frameDescriptorBuffer.capacity()); MemoryUtil.memPutAddress(ptr + INSTANCE_SIZE_OFFSET, objectStorage.objectBuffer.capacity()); MemoryUtil.memPutAddress(ptr + DRAW_INSTANCE_INDEX_SIZE_OFFSET, INT_SIZE * instanceCount); @@ -118,7 +118,7 @@ public class IndirectBuffers { } public void bindForCullPassTwo() { - multiBind(1, 5); + multiBind(0, 6); GlBufferType.DISPATCH_INDIRECT_BUFFER.bind(passTwoDispatch.handle()); } diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectCullingGroup.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectCullingGroup.java index 278d95b6c..0d3693187 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectCullingGroup.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectCullingGroup.java @@ -14,6 +14,8 @@ import java.util.EnumMap; import java.util.List; import java.util.Map; +import org.lwjgl.system.MemoryUtil; + import dev.engine_room.flywheel.api.instance.Instance; import dev.engine_room.flywheel.api.instance.InstanceType; import dev.engine_room.flywheel.api.material.Material; @@ -127,7 +129,12 @@ public class IndirectCullingGroup { uploadDraws(stagingBuffer); - needsDrawBarrier = true; + stagingBuffer.enqueueCopy(4 * Integer.BYTES, buffers.passTwoDispatch.handle(), 0, ptr -> { + MemoryUtil.memPutInt(ptr, 0); + MemoryUtil.memPutInt(ptr + 4, 1); + MemoryUtil.memPutInt(ptr + 8, 1); + MemoryUtil.memPutInt(ptr + 12, 0); + }); } public void dispatchCull() { @@ -163,6 +170,8 @@ public class IndirectCullingGroup { buffers.bindForApply(); glDispatchCompute(GlCompat.getComputeGroupCount(indirectDraws.size()), 1, 1); + + needsDrawBarrier = true; } public void dispatchModelReset() { diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/VisibilityBuffer.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/VisibilityBuffer.java index b3810a882..8cfeaecc9 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/VisibilityBuffer.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/VisibilityBuffer.java @@ -20,8 +20,8 @@ public class VisibilityBuffer { private static final int ATTACHMENT = GL30.GL_COLOR_ATTACHMENT1; private final GlProgram readVisibilityProgram; - private final ResizableStorageBuffer lastFrameVisibility; - private final int textureId; + private final ResizableStorageArray lastFrameVisibility; + private int textureId = -1; private int lastWidth = -1; private int lastHeight = -1; @@ -30,14 +30,7 @@ public class VisibilityBuffer { public VisibilityBuffer(GlProgram readVisibilityProgram) { this.readVisibilityProgram = readVisibilityProgram; - lastFrameVisibility = new ResizableStorageBuffer(); - textureId = GL32.glGenTextures(); - - GlStateManager._bindTexture(textureId); - GlStateManager._texParameter(GL32.GL_TEXTURE_2D, GL32.GL_TEXTURE_MIN_FILTER, GL32.GL_NEAREST); - GlStateManager._texParameter(GL32.GL_TEXTURE_2D, GL32.GL_TEXTURE_MAG_FILTER, GL32.GL_NEAREST); - GlStateManager._texParameter(GL32.GL_TEXTURE_2D, GL32.GL_TEXTURE_WRAP_S, GL32.GL_CLAMP_TO_EDGE); - GlStateManager._texParameter(GL32.GL_TEXTURE_2D, GL32.GL_TEXTURE_WRAP_T, GL32.GL_CLAMP_TO_EDGE); + lastFrameVisibility = new ResizableStorageArray(Integer.BYTES, 1.25f); } public void read(int pageCount) { @@ -45,7 +38,7 @@ public class VisibilityBuffer { return; } - lastFrameVisibility.ensureCapacity((long) pageCount << 2); + lastFrameVisibility.ensureCapacity(pageCount); GL46.nglClearNamedBufferData(lastFrameVisibility.handle(), GL46.GL_R32UI, GL46.GL_RED_INTEGER, GL46.GL_UNSIGNED_INT, 0); @@ -95,7 +88,15 @@ public class VisibilityBuffer { } public void delete() { - GL32.glDeleteTextures(textureId); + deleteTexture(); + lastFrameVisibility.delete(); + } + + private void deleteTexture() { + if (textureId != -1) { + GL32.glDeleteTextures(textureId); + textureId = -1; + } } public void clear() { @@ -117,11 +118,14 @@ public class VisibilityBuffer { lastWidth = width; lastHeight = height; - GlTextureUnit.T0.makeActive(); - GlStateManager._bindTexture(textureId); + deleteTexture(); - // TODO: DSA texture storage? - GL32.glTexImage2D(GL32.GL_TEXTURE_2D, 0, GL32.GL_R32UI, width, height, 0, GL32.GL_RED_INTEGER, GL32.GL_UNSIGNED_INT, 0); - GlStateManager._bindTexture(0); + textureId = GL46.glCreateTextures(GL46.GL_TEXTURE_2D); + GL46.glTextureStorage2D(textureId, 1, GL32.GL_R32UI, width, height); + + GL46.glTextureParameteri(textureId, GL32.GL_TEXTURE_MIN_FILTER, GL32.GL_NEAREST); + GL46.glTextureParameteri(textureId, GL32.GL_TEXTURE_MAG_FILTER, GL32.GL_NEAREST); + GL46.glTextureParameteri(textureId, GL32.GL_TEXTURE_WRAP_S, GL32.GL_CLAMP_TO_EDGE); + GL46.glTextureParameteri(textureId, GL32.GL_TEXTURE_WRAP_T, GL32.GL_CLAMP_TO_EDGE); } } diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/gl/GlCompat.java b/common/src/backend/java/dev/engine_room/flywheel/backend/gl/GlCompat.java index b7efa23ed..8d80254c3 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/gl/GlCompat.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/gl/GlCompat.java @@ -10,6 +10,7 @@ import org.lwjgl.opengl.GL20C; import org.lwjgl.opengl.GL31C; import org.lwjgl.opengl.GL40; import org.lwjgl.opengl.GL43; +import org.lwjgl.opengl.GL46; import org.lwjgl.opengl.GLCapabilities; import org.lwjgl.opengl.KHRShaderSubgroup; import org.lwjgl.system.MemoryStack; @@ -42,6 +43,8 @@ public final class GlCompat { public static final boolean SUPPORTS_INSTANCING = isInstancingSupported(); public static final boolean SUPPORTS_INDIRECT = isIndirectSupported(); + public static final int MAX_SHADER_STORAGE_BUFFER_BINDINGS = GL46.glGetInteger(GL46.GL_MAX_SHADER_STORAGE_BUFFER_BINDINGS); + private GlCompat() { } diff --git a/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/dispatch.glsl b/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/dispatch.glsl new file mode 100644 index 000000000..b0989a7a7 --- /dev/null +++ b/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/dispatch.glsl @@ -0,0 +1,6 @@ +struct _FlwLateCullDispatch { + uint x; + uint y; + uint z; + uint threadCount; +}; diff --git a/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/early_cull.glsl b/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/early_cull.glsl index e64869552..840d3813d 100644 --- a/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/early_cull.glsl +++ b/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/early_cull.glsl @@ -3,18 +3,12 @@ #include "flywheel:internal/uniforms/uniforms.glsl" #include "flywheel:util/matrix.glsl" #include "flywheel:internal/indirect/matrices.glsl" +#include "flywheel:internal/indirect/dispatch.glsl" layout(local_size_x = 32) in; uniform uint _flw_visibilityReadOffsetPages; -struct _FlwLateCullDispatch { - uint x; - uint y; - uint z; - uint threadCount; -}; - layout(std430, binding = _FLW_PASS_TWO_DISPATCH_BUFFER_BINDING) restrict buffer PassTwoDispatchBuffer { _FlwLateCullDispatch _flw_lateCullDispatch; }; diff --git a/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/late_cull.glsl b/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/late_cull.glsl index 574170cbe..4a32340b9 100644 --- a/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/late_cull.glsl +++ b/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/late_cull.glsl @@ -3,9 +3,15 @@ #include "flywheel:internal/uniforms/uniforms.glsl" #include "flywheel:util/matrix.glsl" #include "flywheel:internal/indirect/matrices.glsl" +#include "flywheel:internal/indirect/dispatch.glsl" layout(local_size_x = 32) in; + +layout(std430, binding = _FLW_PASS_TWO_DISPATCH_BUFFER_BINDING) restrict buffer PassTwoDispatchBuffer { + _FlwLateCullDispatch _flw_lateCullDispatch; +}; + layout(std430, binding = _FLW_PASS_TWO_INSTANCE_INDEX_BUFFER_BINDING) restrict readonly buffer PassTwoIndexBuffer { uint _flw_passTwoIndices[]; }; @@ -111,7 +117,7 @@ bool _flw_isVisible(uint instanceIndex, uint modelIndex) { } void main() { - if (gl_GlobalInvocationID.x >= _flw_passTwoIndices.length()) { + if (gl_GlobalInvocationID.x >= _flw_lateCullDispatch.threadCount) { return; } diff --git a/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/read_visibility.glsl b/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/read_visibility.glsl index 3a4d9d9dc..52d4c655f 100644 --- a/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/read_visibility.glsl +++ b/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/read_visibility.glsl @@ -1,6 +1,6 @@ #include "flywheel:internal/indirect/buffer_bindings.glsl" -layout(local_size_x = 8, local_size_y = 8) in; +layout(local_size_x = 16, local_size_y = 16) in; layout(binding = 0) uniform usampler2D visBuffer;