From cba04adc91bd8eb3a92df526c9c492f067bbd041 Mon Sep 17 00:00:00 2001 From: Jozufozu Date: Thu, 7 Dec 2023 13:43:48 -0800 Subject: [PATCH] More smarter less upload - Only write draw calls when they change. - Update baseInstance in the apply shader. - Simplify object upload logic. --- .../backend/engine/AbstractInstancer.java | 2 +- .../engine/indirect/IndirectCullingGroup.java | 38 +++++++++++-------- .../backend/engine/indirect/IndirectDraw.java | 4 +- .../engine/indirect/IndirectInstancer.java | 31 +++++++++++---- .../engine/indirect/IndirectModel.java | 12 +----- .../engine/indirect/StagingBuffer.java | 3 +- .../flywheel/internal/indirect/apply.glsl | 4 +- 7 files changed, 55 insertions(+), 39 deletions(-) diff --git a/src/main/java/com/jozufozu/flywheel/backend/engine/AbstractInstancer.java b/src/main/java/com/jozufozu/flywheel/backend/engine/AbstractInstancer.java index c9d534576..444dabb05 100644 --- a/src/main/java/com/jozufozu/flywheel/backend/engine/AbstractInstancer.java +++ b/src/main/java/com/jozufozu/flywheel/backend/engine/AbstractInstancer.java @@ -15,7 +15,6 @@ public abstract class AbstractInstancer implements Instancer protected final ArrayList instances = new ArrayList<>(); protected final ArrayList handles = new ArrayList<>(); - // TODO: atomic bitset? protected final BitSet changed = new BitSet(); protected final BitSet deleted = new BitSet(); @@ -45,6 +44,7 @@ public abstract class AbstractInstancer implements Instancer if (index < 0 || index >= getInstanceCount()) { return; } + // TODO: Atomic bitset. Synchronizing here blocks the task executor and causes massive overhead. synchronized (lock) { changed.set(index); } diff --git a/src/main/java/com/jozufozu/flywheel/backend/engine/indirect/IndirectCullingGroup.java b/src/main/java/com/jozufozu/flywheel/backend/engine/indirect/IndirectCullingGroup.java index 0b5a022f2..5512b96a1 100644 --- a/src/main/java/com/jozufozu/flywheel/backend/engine/indirect/IndirectCullingGroup.java +++ b/src/main/java/com/jozufozu/flywheel/backend/engine/indirect/IndirectCullingGroup.java @@ -45,7 +45,7 @@ public class IndirectCullingGroup { private final List indirectDraws = new ArrayList<>(); private final Map> multiDraws = new EnumMap<>(RenderStage.class); private boolean needsDrawBarrier; - private boolean needsSortDraws; + private boolean hasNewDraws; private int instanceCountThisFrame; IndirectCullingGroup(InstanceType instanceType) { @@ -72,15 +72,23 @@ public class IndirectCullingGroup { buffers.updateCounts(instanceCountThisFrame, indirectModels.size(), indirectDraws.size()); - if (needsSortDraws) { - sortDraws(); - needsSortDraws = false; - } - + // Must flush the mesh pool first so everything else has the right baseVertex and baseIndex. meshPool.flush(stagingBuffer); + + // Upload only objects that have changed. uploadObjects(stagingBuffer); + + // We need to upload the models every frame to reset the instance count. uploadModels(stagingBuffer); - uploadDraws(stagingBuffer); + + if (hasNewDraws) { + sortDraws(); + // Draws, however, only need to be updated when we get new ones. + // The instanceCount and baseInstance will be updated by the applyProgram, + // and all other fields are constant to the lifetime of the draw. + uploadDraws(stagingBuffer); + hasNewDraws = false; + } } public void dispatchCull() { @@ -161,7 +169,7 @@ public class IndirectCullingGroup { indirectDraws.add(new IndirectDraw(indirectModel, entry.getKey(), bufferedMesh, stage)); } - needsSortDraws = true; + hasNewDraws = true; } public void submit(RenderStage stage) { @@ -193,9 +201,9 @@ public class IndirectCullingGroup { private void uploadObjects(StagingBuffer stagingBuffer) { long pos = 0; - for (IndirectModel batch : indirectModels) { - var instanceCount = batch.instancer.getInstanceCount(); - batch.writeObjects(stagingBuffer, pos, buffers.object.handle()); + for (IndirectModel model : indirectModels) { + var instanceCount = model.instancer.getInstanceCount(); + model.writeObjects(stagingBuffer, pos, buffers.object.handle()); pos += instanceCount * objectStride; } @@ -216,15 +224,15 @@ public class IndirectCullingGroup { } private void writeModels(long writePtr) { - for (var batch : indirectModels) { - batch.write(writePtr); + for (var model : indirectModels) { + model.write(writePtr); writePtr += IndirectBuffers.MODEL_STRIDE; } } private void writeCommands(long writePtr) { - for (var batch : indirectDraws) { - batch.write(writePtr); + for (var draw : indirectDraws) { + draw.write(writePtr); writePtr += IndirectBuffers.DRAW_COMMAND_STRIDE; } } diff --git a/src/main/java/com/jozufozu/flywheel/backend/engine/indirect/IndirectDraw.java b/src/main/java/com/jozufozu/flywheel/backend/engine/indirect/IndirectDraw.java index ee0cfb206..718d198e0 100644 --- a/src/main/java/com/jozufozu/flywheel/backend/engine/indirect/IndirectDraw.java +++ b/src/main/java/com/jozufozu/flywheel/backend/engine/indirect/IndirectDraw.java @@ -47,9 +47,9 @@ public class IndirectDraw { MemoryUtil.memPutInt(ptr + 4, 0); // instanceCount - to be set by the apply shader MemoryUtil.memPutInt(ptr + 8, mesh.firstIndex()); // firstIndex MemoryUtil.memPutInt(ptr + 12, mesh.baseVertex()); // baseVertex - MemoryUtil.memPutInt(ptr + 16, model.baseInstance()); // baseInstance + MemoryUtil.memPutInt(ptr + 16, 0); // baseInstance - to be set by the apply shader - MemoryUtil.memPutInt(ptr + 20, model.index); // modelIndex + MemoryUtil.memPutInt(ptr + 20, model.index); // modelIndex - never changes MemoryUtil.memPutInt(ptr + 24, materialVertexIndex); // materialVertexIndex MemoryUtil.memPutInt(ptr + 28, materialFragmentIndex); // materialFragmentIndex diff --git a/src/main/java/com/jozufozu/flywheel/backend/engine/indirect/IndirectInstancer.java b/src/main/java/com/jozufozu/flywheel/backend/engine/indirect/IndirectInstancer.java index 757b867ac..806bccaea 100644 --- a/src/main/java/com/jozufozu/flywheel/backend/engine/indirect/IndirectInstancer.java +++ b/src/main/java/com/jozufozu/flywheel/backend/engine/indirect/IndirectInstancer.java @@ -12,6 +12,8 @@ public class IndirectInstancer extends AbstractInstancer private final InstanceWriter writer; private int modelIndex; + private long lastStartPos = -1; + public IndirectInstancer(InstanceType type) { super(type); long instanceStride = type.getLayout() @@ -24,21 +26,38 @@ public class IndirectInstancer extends AbstractInstancer removeDeletedInstances(); } - public void writeChanged(StagingBuffer stagingBuffer, long start, int dstVbo) { + public void write(StagingBuffer stagingBuffer, long startPos, int dstVbo) { + if (shouldWriteAll(startPos)) { + writeAll(stagingBuffer, startPos, dstVbo); + } else { + writeChanged(stagingBuffer, startPos, dstVbo); + } + + changed.clear(); + lastStartPos = startPos; + } + + private boolean shouldWriteAll(long startPos) { + // If enough of the buffer has changed, write the whole thing to avoid the overhead of a bunch of small writes. + return startPos != lastStartPos || moreThanTwoThirdsChanged(); + } + + private boolean moreThanTwoThirdsChanged() { + return (changed.cardinality() * 3) > (instances.size() * 2); + } + + private void writeChanged(StagingBuffer stagingBuffer, long start, int dstVbo) { int count = instances.size(); for (int i = changed.nextSetBit(0); i >= 0 && i < count; i = changed.nextSetBit(i + 1)) { var instance = instances.get(i); stagingBuffer.enqueueCopy(objectStride, dstVbo, start + i * objectStride, ptr -> writeOne(ptr, instance)); } - changed.clear(); } - public void writeAll(StagingBuffer stagingBuffer, long start, int dstVbo) { + private void writeAll(StagingBuffer stagingBuffer, long start, int dstVbo) { long totalSize = objectStride * instances.size(); stagingBuffer.enqueueCopy(totalSize, dstVbo, start, this::writeAll); - - changed.clear(); } private void writeAll(long ptr) { @@ -49,9 +68,7 @@ public class IndirectInstancer extends AbstractInstancer } private void writeOne(long ptr, I instance) { - // write modelID MemoryUtil.memPutInt(ptr, modelIndex); - // write object writer.write(ptr + IndirectBuffers.INT_SIZE, instance); } diff --git a/src/main/java/com/jozufozu/flywheel/backend/engine/indirect/IndirectModel.java b/src/main/java/com/jozufozu/flywheel/backend/engine/indirect/IndirectModel.java index 3942eb102..41d61079b 100644 --- a/src/main/java/com/jozufozu/flywheel/backend/engine/indirect/IndirectModel.java +++ b/src/main/java/com/jozufozu/flywheel/backend/engine/indirect/IndirectModel.java @@ -9,7 +9,6 @@ public class IndirectModel { private final Vector4fc boundingSphere; private int baseInstance = -1; - private boolean needsFullWrite = true; public IndirectModel(IndirectInstancer instancer, int index, Vector4fc boundingSphere) { this.instancer = instancer; @@ -23,20 +22,11 @@ public class IndirectModel { public void prepare(int baseInstance) { instancer.update(); - if (baseInstance == this.baseInstance) { - needsFullWrite = false; - return; - } this.baseInstance = baseInstance; - needsFullWrite = true; } public void writeObjects(StagingBuffer stagingBuffer, long start, int dstVbo) { - if (needsFullWrite) { - instancer.writeAll(stagingBuffer, start, dstVbo); - } else { - instancer.writeChanged(stagingBuffer, start, dstVbo); - } + instancer.write(stagingBuffer, start, dstVbo); } public void write(long ptr) { diff --git a/src/main/java/com/jozufozu/flywheel/backend/engine/indirect/StagingBuffer.java b/src/main/java/com/jozufozu/flywheel/backend/engine/indirect/StagingBuffer.java index ec1e4ea9b..5d79e1e57 100644 --- a/src/main/java/com/jozufozu/flywheel/backend/engine/indirect/StagingBuffer.java +++ b/src/main/java/com/jozufozu/flywheel/backend/engine/indirect/StagingBuffer.java @@ -14,7 +14,8 @@ import com.jozufozu.flywheel.lib.memory.MemoryBlock; import it.unimi.dsi.fastutil.PriorityQueue; import it.unimi.dsi.fastutil.objects.ObjectArrayFIFOQueue; -// https://github.com/CaffeineMC/sodium-fabric/blob/dev/src/main/java/me/jellysquid/mods/sodium/client/gl/arena/staging/MappedStagingBuffer.java +// Used https://github.com/CaffeineMC/sodium-fabric/blob/dev/src/main/java/me/jellysquid/mods/sodium/client/gl/arena/staging/MappedStagingBuffer.java +// as a reference for implementation. Modified to be less safe and to allow for writing directly into the staging buffer. public class StagingBuffer { private static final long DEFAULT_CAPACITY = 1024 * 1024 * 16; private static final int STORAGE_FLAGS = GL45C.GL_MAP_PERSISTENT_BIT | GL45C.GL_MAP_WRITE_BIT | GL45C.GL_CLIENT_STORAGE_BIT; diff --git a/src/main/resources/assets/flywheel/flywheel/internal/indirect/apply.glsl b/src/main/resources/assets/flywheel/flywheel/internal/indirect/apply.glsl index 6c697615c..4c23bf5f3 100644 --- a/src/main/resources/assets/flywheel/flywheel/internal/indirect/apply.glsl +++ b/src/main/resources/assets/flywheel/flywheel/internal/indirect/apply.glsl @@ -21,6 +21,6 @@ void main() { } uint modelIndex = drawCommands[drawIndex].modelIndex; - uint instanceCount = models[modelIndex].instanceCount; - drawCommands[drawIndex].instanceCount = instanceCount; + drawCommands[drawIndex].instanceCount = models[modelIndex].instanceCount; + drawCommands[drawIndex].baseInstance = models[modelIndex].baseInstance; }