Buff to the buffers

- Smarter multibind logic
- Make offsets in IndirectBuffers dependent on BufferBindings
- Organize buffer bindings based on where they're used to allow each
  pass to bind exactly which buffers it needs
- Add stub dispatchCullPassTwo to IndirectCullingGroup
- Add pass two buffers to IndirectBuffers
This commit is contained in:
Jozufozu 2024-09-08 11:00:04 -05:00
parent 9009bfe730
commit 1edb72ac19
8 changed files with 133 additions and 79 deletions

View File

@ -1,14 +1,18 @@
package dev.engine_room.flywheel.backend.engine.indirect;
public final class BufferBindings {
public static final int INSTANCE = 0;
public static final int TARGET = 1;
public static final int PASS_TWO_DISPATCH = 0;
public static final int PASS_TWO_INSTANCE_INDEX = 1;
public static final int PAGE_FRAME_DESCRIPTOR = 2;
public static final int MODEL = 3;
public static final int DRAW = 4;
public static final int LIGHT_LUT = 5;
public static final int LIGHT_SECTION = 6;
public static final int MATRICES = 7;
public static final int INSTANCE = 3;
public static final int DRAW_INSTANCE_INDEX = 4;
public static final int MODEL = 5;
public static final int DRAW = 6;
public static final int LIGHT_LUT = 7;
public static final int LIGHT_SECTION = 8;
public static final int MATRICES = 9;
public static final int LAST_FRAME_VISIBILITY = 10;
private BufferBindings() {
}

View File

@ -11,7 +11,7 @@ import dev.engine_room.flywheel.lib.memory.MemoryBlock;
public class IndirectBuffers {
// Number of vbos created.
public static final int BUFFER_COUNT = 5;
public static final int BUFFER_COUNT = 7;
public static final long INT_SIZE = Integer.BYTES;
public static final long PTR_SIZE = Pointer.POINTER_SIZE;
@ -30,18 +30,23 @@ public class IndirectBuffers {
private static final long BUFFERS_SIZE_BYTES = SIZE_OFFSET + BUFFER_COUNT * PTR_SIZE;
// Offsets to the vbos
private static final long INSTANCE_HANDLE_OFFSET = HANDLE_OFFSET;
private static final long TARGET_HANDLE_OFFSET = INT_SIZE;
private static final long PAGE_FRAME_DESCRIPTOR_HANDLE_OFFSET = INT_SIZE * 2;
private static final long MODEL_HANDLE_OFFSET = INT_SIZE * 3;
private static final long DRAW_HANDLE_OFFSET = INT_SIZE * 4;
private static final long PASS_TWO_DISPATCH_HANDLE_OFFSET = HANDLE_OFFSET + BufferBindings.PASS_TWO_DISPATCH * INT_SIZE;
private static final long PASS_TWO_INSTANCE_INDEX_HANDLE_OFFSET = HANDLE_OFFSET + BufferBindings.PASS_TWO_INSTANCE_INDEX * INT_SIZE;
private static final long PAGE_FRAME_DESCRIPTOR_HANDLE_OFFSET = HANDLE_OFFSET + BufferBindings.PAGE_FRAME_DESCRIPTOR * INT_SIZE;
private static final long INSTANCE_HANDLE_OFFSET = HANDLE_OFFSET + BufferBindings.INSTANCE * INT_SIZE;
private static final long DRAW_INSTANCE_INDEX_HANDLE_OFFSET = HANDLE_OFFSET + BufferBindings.DRAW_INSTANCE_INDEX * INT_SIZE;
private static final long MODEL_HANDLE_OFFSET = HANDLE_OFFSET + BufferBindings.MODEL * INT_SIZE;
private static final long DRAW_HANDLE_OFFSET = HANDLE_OFFSET + BufferBindings.DRAW * INT_SIZE;
// Offsets to the sizes
private static final long INSTANCE_SIZE_OFFSET = SIZE_OFFSET;
private static final long TARGET_SIZE_OFFSET = SIZE_OFFSET + PTR_SIZE;
private static final long PAGE_FRAME_DESCRIPTOR_SIZE_OFFSET = SIZE_OFFSET + PTR_SIZE * 2;
private static final long MODEL_SIZE_OFFSET = SIZE_OFFSET + PTR_SIZE * 3;
private static final long DRAW_SIZE_OFFSET = SIZE_OFFSET + PTR_SIZE * 4;
private static final long PASS_TWO_DISPATCH_SIZE_OFFSET = SIZE_OFFSET + BufferBindings.PASS_TWO_DISPATCH * PTR_SIZE;
private static final long PASS_TWO_INSTANCE_INDEX_SIZE_OFFSET = SIZE_OFFSET + BufferBindings.PASS_TWO_INSTANCE_INDEX * PTR_SIZE;
private static final long PAGE_FRAME_DESCRIPTOR_SIZE_OFFSET = SIZE_OFFSET + BufferBindings.PAGE_FRAME_DESCRIPTOR * PTR_SIZE;
private static final long INSTANCE_SIZE_OFFSET = SIZE_OFFSET + BufferBindings.INSTANCE * PTR_SIZE;
private static final long DRAW_INSTANCE_INDEX_SIZE_OFFSET = SIZE_OFFSET + BufferBindings.DRAW_INSTANCE_INDEX * PTR_SIZE;
private static final long MODEL_SIZE_OFFSET = SIZE_OFFSET + BufferBindings.MODEL * PTR_SIZE;
private static final long DRAW_SIZE_OFFSET = SIZE_OFFSET + BufferBindings.DRAW * PTR_SIZE;
private static final float INSTANCE_GROWTH_FACTOR = 1.25f;
private static final float MODEL_GROWTH_FACTOR = 2f;
@ -62,8 +67,10 @@ public class IndirectBuffers {
private final MemoryBlock multiBindBlock;
private final long instanceStride;
public final ResizableStorageBuffer passTwoDispatch;
public final ResizableStorageArray passTwoInstanceIndex;
public final ObjectStorage objectStorage;
public final ResizableStorageArray target;
public final ResizableStorageArray drawInstanceIndex;
public final ResizableStorageArray model;
public final ResizableStorageArray draw;
@ -71,59 +78,79 @@ public class IndirectBuffers {
this.instanceStride = instanceStride;
this.multiBindBlock = MemoryBlock.calloc(BUFFERS_SIZE_BYTES, 1);
passTwoDispatch = new ResizableStorageBuffer();
passTwoInstanceIndex = new ResizableStorageArray(INT_SIZE, INSTANCE_GROWTH_FACTOR);
objectStorage = new ObjectStorage(instanceStride);
target = new ResizableStorageArray(INT_SIZE, INSTANCE_GROWTH_FACTOR);
drawInstanceIndex = new ResizableStorageArray(INT_SIZE, INSTANCE_GROWTH_FACTOR);
model = new ResizableStorageArray(MODEL_STRIDE, MODEL_GROWTH_FACTOR);
draw = new ResizableStorageArray(DRAW_COMMAND_STRIDE, DRAW_GROWTH_FACTOR);
passTwoDispatch.ensureCapacity(INT_SIZE * 4);
}
void updateCounts(int instanceCount, int modelCount, int drawCount) {
target.ensureCapacity(instanceCount);
drawInstanceIndex.ensureCapacity(instanceCount);
passTwoDispatch.ensureCapacity(instanceCount);
model.ensureCapacity(modelCount);
draw.ensureCapacity(drawCount);
final long ptr = multiBindBlock.ptr();
MemoryUtil.memPutInt(ptr + INSTANCE_HANDLE_OFFSET, objectStorage.objectBuffer.handle());
MemoryUtil.memPutInt(ptr + TARGET_HANDLE_OFFSET, target.handle());
MemoryUtil.memPutInt(ptr + PASS_TWO_DISPATCH_HANDLE_OFFSET, passTwoDispatch.handle());
MemoryUtil.memPutInt(ptr + PASS_TWO_INSTANCE_INDEX_HANDLE_OFFSET, objectStorage.frameDescriptorBuffer.handle());
MemoryUtil.memPutInt(ptr + PAGE_FRAME_DESCRIPTOR_HANDLE_OFFSET, objectStorage.frameDescriptorBuffer.handle());
MemoryUtil.memPutInt(ptr + INSTANCE_HANDLE_OFFSET, objectStorage.objectBuffer.handle());
MemoryUtil.memPutInt(ptr + DRAW_INSTANCE_INDEX_HANDLE_OFFSET, drawInstanceIndex.handle());
MemoryUtil.memPutInt(ptr + MODEL_HANDLE_OFFSET, model.handle());
MemoryUtil.memPutInt(ptr + DRAW_HANDLE_OFFSET, draw.handle());
MemoryUtil.memPutAddress(ptr + INSTANCE_SIZE_OFFSET, objectStorage.objectBuffer.capacity());
MemoryUtil.memPutAddress(ptr + TARGET_SIZE_OFFSET, INT_SIZE * instanceCount);
MemoryUtil.memPutAddress(ptr + PASS_TWO_DISPATCH_SIZE_OFFSET, passTwoDispatch.capacity());
MemoryUtil.memPutAddress(ptr + PASS_TWO_INSTANCE_INDEX_SIZE_OFFSET, objectStorage.frameDescriptorBuffer.capacity());
MemoryUtil.memPutAddress(ptr + PAGE_FRAME_DESCRIPTOR_SIZE_OFFSET, objectStorage.frameDescriptorBuffer.capacity());
MemoryUtil.memPutAddress(ptr + INSTANCE_SIZE_OFFSET, objectStorage.objectBuffer.capacity());
MemoryUtil.memPutAddress(ptr + DRAW_INSTANCE_INDEX_SIZE_OFFSET, INT_SIZE * instanceCount);
MemoryUtil.memPutAddress(ptr + MODEL_SIZE_OFFSET, MODEL_STRIDE * modelCount);
MemoryUtil.memPutAddress(ptr + DRAW_SIZE_OFFSET, DRAW_COMMAND_STRIDE * drawCount);
}
public void bindForCompute() {
multiBind();
public void bindForCullPassOne() {
multiBind(0, 6);
}
public void bindForCullPassTwo() {
multiBind(1, 5);
GlBufferType.DISPATCH_INDIRECT_BUFFER.bind(passTwoDispatch.handle());
}
public void bindForApply() {
multiBind(5, 2);
}
public void bindForDraw() {
multiBind();
multiBind(3, 4);
GlBufferType.DRAW_INDIRECT_BUFFER.bind(draw.handle());
}
private void multiBind() {
final long ptr = multiBindBlock.ptr();
nglBindBuffersRange(GL_SHADER_STORAGE_BUFFER, BufferBindings.INSTANCE, IndirectBuffers.BUFFER_COUNT, ptr, ptr + OFFSET_OFFSET, ptr + SIZE_OFFSET);
}
/**
* Bind all buffers except the draw command buffer.
*/
public void bindForCrumbling() {
multiBind(3, 3);
}
private void multiBind(int base, int count) {
final long ptr = multiBindBlock.ptr();
nglBindBuffersRange(GL_SHADER_STORAGE_BUFFER, BufferBindings.INSTANCE, 4, ptr, ptr + OFFSET_OFFSET, ptr + SIZE_OFFSET);
nglBindBuffersRange(GL_SHADER_STORAGE_BUFFER, base, count, ptr + base * INT_SIZE, ptr + OFFSET_OFFSET + base * PTR_SIZE, ptr + SIZE_OFFSET + base * PTR_SIZE);
}
public void delete() {
multiBindBlock.free();
objectStorage.delete();
target.delete();
drawInstanceIndex.delete();
model.delete();
draw.delete();
passTwoDispatch.delete();
passTwoInstanceIndex.delete();
}
}

View File

@ -6,6 +6,7 @@ import static org.lwjgl.opengl.GL30.glUniform1ui;
import static org.lwjgl.opengl.GL42.GL_COMMAND_BARRIER_BIT;
import static org.lwjgl.opengl.GL42.glMemoryBarrier;
import static org.lwjgl.opengl.GL43.glDispatchCompute;
import static org.lwjgl.opengl.GL43.glDispatchComputeIndirect;
import java.util.ArrayList;
import java.util.Comparator;
@ -44,7 +45,8 @@ public class IndirectCullingGroup<I extends Instance> {
private final Map<VisualType, List<MultiDraw>> multiDraws = new EnumMap<>(VisualType.class);
private final IndirectPrograms programs;
private final GlProgram cullProgram;
private final GlProgram earlyCull;
private final GlProgram lateCull;
private boolean needsDrawBarrier;
private boolean needsDrawSort;
@ -63,7 +65,8 @@ public class IndirectCullingGroup<I extends Instance> {
buffers = new IndirectBuffers(instanceStride);
this.programs = programs;
cullProgram = programs.getCullingProgram(instanceType);
earlyCull = programs.getCullingProgram(instanceType);
lateCull = programs.getCullPassTwoProgram(instanceType);
}
public void flushInstancers() {
@ -133,20 +136,32 @@ public class IndirectCullingGroup<I extends Instance> {
}
Uniforms.bindAll();
cullProgram.bind();
earlyCull.bind();
cullProgram.setUInt("_flw_visibilityReadOffsetPages", visibilityReadOffsetPages);
earlyCull.setUInt("_flw_visibilityReadOffsetPages", visibilityReadOffsetPages);
buffers.bindForCompute();
buffers.bindForCullPassOne();
glDispatchCompute(buffers.objectStorage.capacity(), 1, 1);
}
public void dispatchCullPassTwo() {
if (nothingToDo()) {
return;
}
Uniforms.bindAll();
lateCull.bind();
buffers.bindForCullPassTwo();
glDispatchComputeIndirect(0);
}
public void dispatchApply() {
if (nothingToDo()) {
return;
}
buffers.bindForCompute();
buffers.bindForApply();
glDispatchCompute(GlCompat.getComputeGroupCount(indirectDraws.size()), 1, 1);
}

View File

@ -20,7 +20,7 @@ public class VisibilityBuffer {
private static final int ATTACHMENT = GL30.GL_COLOR_ATTACHMENT1;
private final GlProgram readVisibilityProgram;
private final ResizableStorageBuffer visibilityBitset;
private final ResizableStorageBuffer lastFrameVisibility;
private final int textureId;
private int lastWidth = -1;
@ -30,7 +30,7 @@ public class VisibilityBuffer {
public VisibilityBuffer(GlProgram readVisibilityProgram) {
this.readVisibilityProgram = readVisibilityProgram;
visibilityBitset = new ResizableStorageBuffer();
lastFrameVisibility = new ResizableStorageBuffer();
textureId = GL32.glGenTextures();
GlStateManager._bindTexture(textureId);
@ -45,16 +45,16 @@ public class VisibilityBuffer {
return;
}
visibilityBitset.ensureCapacity((long) pageCount << 2);
lastFrameVisibility.ensureCapacity((long) pageCount << 2);
GL46.nglClearNamedBufferData(visibilityBitset.handle(), GL46.GL_R32UI, GL46.GL_RED_INTEGER, GL46.GL_UNSIGNED_INT, 0);
GL46.nglClearNamedBufferData(lastFrameVisibility.handle(), GL46.GL_R32UI, GL46.GL_RED_INTEGER, GL46.GL_UNSIGNED_INT, 0);
if (lastWidth == -1 || lastHeight == -1) {
return;
}
readVisibilityProgram.bind();
GL46.glBindBufferBase(GL46.GL_SHADER_STORAGE_BUFFER, 0, visibilityBitset.handle());
GL46.glBindBufferBase(GL46.GL_SHADER_STORAGE_BUFFER, BufferBindings.LAST_FRAME_VISIBILITY, lastFrameVisibility.handle());
GlTextureUnit.T0.makeActive();
GlStateManager._bindTexture(textureId);

View File

@ -1,11 +1,17 @@
#define _FLW_INSTANCE_BUFFER_BINDING 0
#define _FLW_TARGET_BUFFER_BINDING 1
#define _FLW_PAGE_FRAME_DESCRIPTOR_BUFFER_BINDING 2
#define _FLW_MODEL_BUFFER_BINDING 3
#define _FLW_DRAW_BUFFER_BINDING 4
#define _FLW_LIGHT_LUT_BUFFER_BINDING 5
#define _FLW_LIGHT_SECTIONS_BUFFER_BINDING 6
#define _FLW_MATRIX_BUFFER_BINDING 7
#define _FLW_PASS_TWO_BUFFER_BINDING 8
#define _FLW_LATE_CULL_BUFFER_BINDING 9
// Per culling group
#define _FLW_PASS_TWO_DISPATCH_BUFFER_BINDING 0 // cull1
#define _FLW_PASS_TWO_INSTANCE_INDEX_BUFFER_BINDING 1 // cull1, cull2
#define _FLW_PAGE_FRAME_DESCRIPTOR_BUFFER_BINDING 2 // cull1, cull2
#define _FLW_INSTANCE_BUFFER_BINDING 3 // cull1, cull2, draw
#define _FLW_DRAW_INSTANCE_INDEX_BUFFER_BINDING 4 // cull1, cull2, draw
#define _FLW_MODEL_BUFFER_BINDING 5 // cull1, cull2, apply
#define _FLW_DRAW_BUFFER_BINDING 6 // apply, draw
// Global to the engine
#define _FLW_LIGHT_LUT_BUFFER_BINDING 7
#define _FLW_LIGHT_SECTIONS_BUFFER_BINDING 8
#define _FLW_MATRIX_BUFFER_BINDING 9
#define _FLW_LAST_FRAME_VISIBILITY_BUFFER_BINDING 10

View File

@ -8,14 +8,25 @@ layout(local_size_x = 32) in;
uniform uint _flw_visibilityReadOffsetPages;
layout(std430, binding = _FLW_TARGET_BUFFER_BINDING) restrict writeonly buffer DrawIndexBuffer {
uint _flw_drawIndices[];
struct _FlwLateCullDispatch {
uint x;
uint y;
uint z;
uint threadCount;
};
layout(std430, binding = _FLW_PASS_TWO_BUFFER_BINDING) restrict writeonly buffer PassTwoIndexBuffer {
layout(std430, binding = _FLW_PASS_TWO_DISPATCH_BUFFER_BINDING) restrict buffer PassTwoDispatchBuffer {
_FlwLateCullDispatch _flw_lateCullDispatch;
};
layout(std430, binding = _FLW_PASS_TWO_INSTANCE_INDEX_BUFFER_BINDING) restrict readonly buffer PassTwoIndexBuffer {
uint _flw_passTwoIndicies[];
};
layout(std430, binding = _FLW_DRAW_INSTANCE_INDEX_BUFFER_BINDING) restrict writeonly buffer DrawIndexBuffer {
uint _flw_drawIndices[];
};
// High 6 bits for the number of instances in the page.
const uint _FLW_PAGE_COUNT_OFFSET = 26u;
// Bottom 26 bits for the model index.
@ -26,7 +37,7 @@ layout(std430, binding = _FLW_PAGE_FRAME_DESCRIPTOR_BUFFER_BINDING) restrict rea
};
layout(std430, binding = _FLW_LAST_FRAME_VISIBILITY_BUFFER_BINDING) restrict readonly buffer LastFrameVisibilityBuffer {
uint _flw_visibleFlag[];
uint _flw_lastFrameVisibility[];
};
layout(std430, binding = _FLW_MODEL_BUFFER_BINDING) restrict buffer ModelBuffer {
@ -37,17 +48,6 @@ layout(std430, binding = _FLW_MATRIX_BUFFER_BINDING) restrict readonly buffer Ma
Matrices _flw_matrices[];
};
struct _FlwLateCullDispatch {
uint x;
uint y;
uint z;
uint threadCount;
};
layout(std430, binding = _FLW_LATE_CULL_BUFFER_BINDING) restrict buffer LateCullBuffer {
_FlwLateCullDispatch _flw_lateCullDispatch;
};
// Disgustingly vectorized sphere frustum intersection taking advantage of ahead of time packing.
// Only uses 6 fmas and some boolean ops.
// See also:
@ -103,7 +103,7 @@ void main() {
return;
}
uint pageVisibility = _flw_visibleFlag[_flw_visibilityReadOffsetPages + pageIndex];
uint pageVisibility = _flw_lastFrameVisibility[_flw_visibilityReadOffsetPages + pageIndex];
if ((pageVisibility & (1u << gl_LocalInvocationID.x)) != 0u) {
// This instance was visibile last frame, it should be rendered early.

View File

@ -6,12 +6,12 @@
layout(local_size_x = 32) in;
layout(std430, binding = _FLW_TARGET_BUFFER_BINDING) restrict writeonly buffer DrawIndexBuffer {
uint _flw_drawIndices[];
layout(std430, binding = _FLW_PASS_TWO_INSTANCE_INDEX_BUFFER_BINDING) restrict readonly buffer PassTwoIndexBuffer {
uint _flw_passTwoIndicies[];
};
layout(std430, binding = _FLW_PASS_TWO_BUFFER_BINDING) restrict readonly buffer PassTwoIndexBuffer {
uint _flw_passTwoIndicies[];
layout(std430, binding = _FLW_DRAW_INSTANCE_INDEX_BUFFER_BINDING) restrict writeonly buffer DrawIndexBuffer {
uint _flw_drawIndices[];
};
layout(std430, binding = _FLW_PAGE_FRAME_DESCRIPTOR_BUFFER_BINDING) restrict readonly buffer PageFrameDescriptorBuffer {

View File

@ -1,9 +1,11 @@
#include "flywheel:internal/indirect/buffer_bindings.glsl"
layout(local_size_x = 8, local_size_y = 8) in;
layout(binding = 0) uniform usampler2D visBuffer;
layout(std430) restrict buffer VisibleFlagBuffer {
uint _flw_visibleFlag[];
layout(std430, binding = _FLW_LAST_FRAME_VISIBILITY_BUFFER_BINDING) restrict buffer LastFrameVisibilityBuffer {
uint _flw_lastFrameVisibility[];
};
void main() {
@ -21,5 +23,5 @@ void main() {
uint mask = 1u << (instanceID & 31u);
atomicOr(_flw_visibleFlag[index], mask);
atomicOr(_flw_lastFrameVisibility[index], mask);
}