It's alive

- Fix crash by resetting the indirect dispatch buffer each frame
- Use DSA + immutable storage for depth pyramid and visibility buffer
- In pass two, check against the thread count written out in pass one to
  early return
- Require a draw barrier after each apply dispatch
- Use a storage array for the last frame visibility buffer
This commit is contained in:
Jozufozu 2024-09-09 14:20:25 -07:00
parent 4552716b74
commit f12aa15dae
9 changed files with 65 additions and 51 deletions

View File

@ -13,23 +13,13 @@ import net.minecraft.client.Minecraft;
public class DepthPyramid {
private final GlProgram depthReduceProgram;
public final int pyramidTextureId;
public int pyramidTextureId = -1;
private int lastWidth = -1;
private int lastHeight = -1;
public DepthPyramid(GlProgram depthReduceProgram) {
this.depthReduceProgram = depthReduceProgram;
pyramidTextureId = GL32.glGenTextures();
GlStateManager._bindTexture(pyramidTextureId);
GlStateManager._texParameter(GL32.GL_TEXTURE_2D, GL32.GL_TEXTURE_MIN_FILTER, GL32.GL_NEAREST);
GlStateManager._texParameter(GL32.GL_TEXTURE_2D, GL32.GL_TEXTURE_MAG_FILTER, GL32.GL_NEAREST);
GlStateManager._texParameter(GL32.GL_TEXTURE_2D, GL32.GL_TEXTURE_COMPARE_MODE, GL32.GL_NONE);
GlStateManager._texParameter(GL32.GL_TEXTURE_2D, GL32.GL_TEXTURE_WRAP_S, GL32.GL_CLAMP_TO_EDGE);
GlStateManager._texParameter(GL32.GL_TEXTURE_2D, GL32.GL_TEXTURE_WRAP_T, GL32.GL_CLAMP_TO_EDGE);
}
public void generate() {
@ -45,12 +35,9 @@ public class DepthPyramid {
int depthBufferId = mainRenderTarget.getDepthTextureId();
GlTextureUnit.T1.makeActive();
GlStateManager._bindTexture(depthBufferId);
GL46.glMemoryBarrier(GL46.GL_FRAMEBUFFER_BARRIER_BIT);
GL46.glActiveTexture(GL32.GL_TEXTURE1);
GlTextureUnit.T1.makeActive();
depthReduceProgram.bind();
@ -73,7 +60,10 @@ public class DepthPyramid {
}
public void delete() {
if (pyramidTextureId != -1) {
GL32.glDeleteTextures(pyramidTextureId);
pyramidTextureId = -1;
}
}
private void createPyramidMips(int mipLevels, int width, int height) {
@ -84,14 +74,16 @@ public class DepthPyramid {
lastWidth = width;
lastHeight = height;
GL32.glBindTexture(GL32.GL_TEXTURE_2D, pyramidTextureId);
delete();
for (int i = 0; i < mipLevels; i++) {
int mipWidth = mipSize(width, i);
int mipHeight = mipSize(height, i);
pyramidTextureId = GL46.glCreateTextures(GL46.GL_TEXTURE_2D);
GL46.glTextureStorage2D(pyramidTextureId, mipLevels, GL32.GL_R32F, width, height);
GL32.glTexImage2D(GL32.GL_TEXTURE_2D, i, GL32.GL_R32F, mipWidth, mipHeight, 0, GL32.GL_RED, GL32.GL_FLOAT, 0);
}
GL46.glTextureParameteri(pyramidTextureId, GL32.GL_TEXTURE_MIN_FILTER, GL32.GL_NEAREST);
GL46.glTextureParameteri(pyramidTextureId, GL32.GL_TEXTURE_MAG_FILTER, GL32.GL_NEAREST);
GL46.glTextureParameteri(pyramidTextureId, GL32.GL_TEXTURE_COMPARE_MODE, GL32.GL_NONE);
GL46.glTextureParameteri(pyramidTextureId, GL32.GL_TEXTURE_WRAP_S, GL32.GL_CLAMP_TO_EDGE);
GL46.glTextureParameteri(pyramidTextureId, GL32.GL_TEXTURE_WRAP_T, GL32.GL_CLAMP_TO_EDGE);
}
public static int mipSize(int mip0Size, int level) {

View File

@ -105,7 +105,7 @@ public class IndirectBuffers {
MemoryUtil.memPutInt(ptr + DRAW_HANDLE_OFFSET, draw.handle());
MemoryUtil.memPutAddress(ptr + PASS_TWO_DISPATCH_SIZE_OFFSET, passTwoDispatch.capacity());
MemoryUtil.memPutAddress(ptr + PASS_TWO_INSTANCE_INDEX_SIZE_OFFSET, passTwoInstanceIndex.capacity());
MemoryUtil.memPutAddress(ptr + PASS_TWO_INSTANCE_INDEX_SIZE_OFFSET, INT_SIZE * instanceCount);
MemoryUtil.memPutAddress(ptr + PAGE_FRAME_DESCRIPTOR_SIZE_OFFSET, objectStorage.frameDescriptorBuffer.capacity());
MemoryUtil.memPutAddress(ptr + INSTANCE_SIZE_OFFSET, objectStorage.objectBuffer.capacity());
MemoryUtil.memPutAddress(ptr + DRAW_INSTANCE_INDEX_SIZE_OFFSET, INT_SIZE * instanceCount);
@ -118,7 +118,7 @@ public class IndirectBuffers {
}
public void bindForCullPassTwo() {
multiBind(1, 5);
multiBind(0, 6);
GlBufferType.DISPATCH_INDIRECT_BUFFER.bind(passTwoDispatch.handle());
}

View File

@ -14,6 +14,8 @@ import java.util.EnumMap;
import java.util.List;
import java.util.Map;
import org.lwjgl.system.MemoryUtil;
import dev.engine_room.flywheel.api.instance.Instance;
import dev.engine_room.flywheel.api.instance.InstanceType;
import dev.engine_room.flywheel.api.material.Material;
@ -127,7 +129,12 @@ public class IndirectCullingGroup<I extends Instance> {
uploadDraws(stagingBuffer);
needsDrawBarrier = true;
stagingBuffer.enqueueCopy(4 * Integer.BYTES, buffers.passTwoDispatch.handle(), 0, ptr -> {
MemoryUtil.memPutInt(ptr, 0);
MemoryUtil.memPutInt(ptr + 4, 1);
MemoryUtil.memPutInt(ptr + 8, 1);
MemoryUtil.memPutInt(ptr + 12, 0);
});
}
public void dispatchCull() {
@ -163,6 +170,8 @@ public class IndirectCullingGroup<I extends Instance> {
buffers.bindForApply();
glDispatchCompute(GlCompat.getComputeGroupCount(indirectDraws.size()), 1, 1);
needsDrawBarrier = true;
}
public void dispatchModelReset() {

View File

@ -20,8 +20,8 @@ public class VisibilityBuffer {
private static final int ATTACHMENT = GL30.GL_COLOR_ATTACHMENT1;
private final GlProgram readVisibilityProgram;
private final ResizableStorageBuffer lastFrameVisibility;
private final int textureId;
private final ResizableStorageArray lastFrameVisibility;
private int textureId = -1;
private int lastWidth = -1;
private int lastHeight = -1;
@ -30,14 +30,7 @@ public class VisibilityBuffer {
public VisibilityBuffer(GlProgram readVisibilityProgram) {
this.readVisibilityProgram = readVisibilityProgram;
lastFrameVisibility = new ResizableStorageBuffer();
textureId = GL32.glGenTextures();
GlStateManager._bindTexture(textureId);
GlStateManager._texParameter(GL32.GL_TEXTURE_2D, GL32.GL_TEXTURE_MIN_FILTER, GL32.GL_NEAREST);
GlStateManager._texParameter(GL32.GL_TEXTURE_2D, GL32.GL_TEXTURE_MAG_FILTER, GL32.GL_NEAREST);
GlStateManager._texParameter(GL32.GL_TEXTURE_2D, GL32.GL_TEXTURE_WRAP_S, GL32.GL_CLAMP_TO_EDGE);
GlStateManager._texParameter(GL32.GL_TEXTURE_2D, GL32.GL_TEXTURE_WRAP_T, GL32.GL_CLAMP_TO_EDGE);
lastFrameVisibility = new ResizableStorageArray(Integer.BYTES, 1.25f);
}
public void read(int pageCount) {
@ -45,7 +38,7 @@ public class VisibilityBuffer {
return;
}
lastFrameVisibility.ensureCapacity((long) pageCount << 2);
lastFrameVisibility.ensureCapacity(pageCount);
GL46.nglClearNamedBufferData(lastFrameVisibility.handle(), GL46.GL_R32UI, GL46.GL_RED_INTEGER, GL46.GL_UNSIGNED_INT, 0);
@ -95,7 +88,15 @@ public class VisibilityBuffer {
}
public void delete() {
deleteTexture();
lastFrameVisibility.delete();
}
private void deleteTexture() {
if (textureId != -1) {
GL32.glDeleteTextures(textureId);
textureId = -1;
}
}
public void clear() {
@ -117,11 +118,14 @@ public class VisibilityBuffer {
lastWidth = width;
lastHeight = height;
GlTextureUnit.T0.makeActive();
GlStateManager._bindTexture(textureId);
deleteTexture();
// TODO: DSA texture storage?
GL32.glTexImage2D(GL32.GL_TEXTURE_2D, 0, GL32.GL_R32UI, width, height, 0, GL32.GL_RED_INTEGER, GL32.GL_UNSIGNED_INT, 0);
GlStateManager._bindTexture(0);
textureId = GL46.glCreateTextures(GL46.GL_TEXTURE_2D);
GL46.glTextureStorage2D(textureId, 1, GL32.GL_R32UI, width, height);
GL46.glTextureParameteri(textureId, GL32.GL_TEXTURE_MIN_FILTER, GL32.GL_NEAREST);
GL46.glTextureParameteri(textureId, GL32.GL_TEXTURE_MAG_FILTER, GL32.GL_NEAREST);
GL46.glTextureParameteri(textureId, GL32.GL_TEXTURE_WRAP_S, GL32.GL_CLAMP_TO_EDGE);
GL46.glTextureParameteri(textureId, GL32.GL_TEXTURE_WRAP_T, GL32.GL_CLAMP_TO_EDGE);
}
}

View File

@ -10,6 +10,7 @@ import org.lwjgl.opengl.GL20C;
import org.lwjgl.opengl.GL31C;
import org.lwjgl.opengl.GL40;
import org.lwjgl.opengl.GL43;
import org.lwjgl.opengl.GL46;
import org.lwjgl.opengl.GLCapabilities;
import org.lwjgl.opengl.KHRShaderSubgroup;
import org.lwjgl.system.MemoryStack;
@ -42,6 +43,8 @@ public final class GlCompat {
public static final boolean SUPPORTS_INSTANCING = isInstancingSupported();
public static final boolean SUPPORTS_INDIRECT = isIndirectSupported();
public static final int MAX_SHADER_STORAGE_BUFFER_BINDINGS = GL46.glGetInteger(GL46.GL_MAX_SHADER_STORAGE_BUFFER_BINDINGS);
private GlCompat() {
}

View File

@ -0,0 +1,6 @@
struct _FlwLateCullDispatch {
uint x;
uint y;
uint z;
uint threadCount;
};

View File

@ -3,18 +3,12 @@
#include "flywheel:internal/uniforms/uniforms.glsl"
#include "flywheel:util/matrix.glsl"
#include "flywheel:internal/indirect/matrices.glsl"
#include "flywheel:internal/indirect/dispatch.glsl"
layout(local_size_x = 32) in;
uniform uint _flw_visibilityReadOffsetPages;
struct _FlwLateCullDispatch {
uint x;
uint y;
uint z;
uint threadCount;
};
layout(std430, binding = _FLW_PASS_TWO_DISPATCH_BUFFER_BINDING) restrict buffer PassTwoDispatchBuffer {
_FlwLateCullDispatch _flw_lateCullDispatch;
};

View File

@ -3,9 +3,15 @@
#include "flywheel:internal/uniforms/uniforms.glsl"
#include "flywheel:util/matrix.glsl"
#include "flywheel:internal/indirect/matrices.glsl"
#include "flywheel:internal/indirect/dispatch.glsl"
layout(local_size_x = 32) in;
layout(std430, binding = _FLW_PASS_TWO_DISPATCH_BUFFER_BINDING) restrict buffer PassTwoDispatchBuffer {
_FlwLateCullDispatch _flw_lateCullDispatch;
};
layout(std430, binding = _FLW_PASS_TWO_INSTANCE_INDEX_BUFFER_BINDING) restrict readonly buffer PassTwoIndexBuffer {
uint _flw_passTwoIndices[];
};
@ -111,7 +117,7 @@ bool _flw_isVisible(uint instanceIndex, uint modelIndex) {
}
void main() {
if (gl_GlobalInvocationID.x >= _flw_passTwoIndices.length()) {
if (gl_GlobalInvocationID.x >= _flw_lateCullDispatch.threadCount) {
return;
}

View File

@ -1,6 +1,6 @@
#include "flywheel:internal/indirect/buffer_bindings.glsl"
layout(local_size_x = 8, local_size_y = 8) in;
layout(local_size_x = 16, local_size_y = 16) in;
layout(binding = 0) uniform usampler2D visBuffer;