From c0d1e736e4cdc5cd978cb52c079de5c6f3d8bd77 Mon Sep 17 00:00:00 2001 From: Jozufozu Date: Tue, 5 Dec 2023 00:28:04 -0800 Subject: [PATCH] OSHA Theatre - Add staging buffer for indirect using unholy amounts of unsafe. - Make GlFence RAII. - Make all IndirectBuffers GPU only and use a shared staging buffer for transfers. --- .../backend/engine/InstancerStorage.java | 2 +- .../engine/batching/BatchedDrawManager.java | 4 +- .../engine/batching/BatchingEngine.java | 2 +- .../engine/indirect/IndirectBuffers.java | 52 +--- .../engine/indirect/IndirectCullingGroup.java | 59 +++-- .../engine/indirect/IndirectDrawManager.java | 29 ++- .../engine/indirect/IndirectEngine.java | 4 +- .../engine/indirect/IndirectInstancer.java | 69 ++++-- .../engine/indirect/IndirectMeshPool.java | 7 +- .../engine/indirect/IndirectModel.java | 6 +- .../engine/indirect/StagingBuffer.java | 230 ++++++++++++++++++ .../backend/engine/instancing/EBOCache.java | 6 +- .../instancing/InstancedDrawManager.java | 4 +- .../engine/instancing/InstancingEngine.java | 2 +- .../com/jozufozu/flywheel/gl/GlFence.java | 48 +--- .../jozufozu/flywheel/gl/buffer/Buffer.java | 2 + .../jozufozu/flywheel/gl/buffer/GlBuffer.java | 13 +- .../flywheel/gl/buffer/MappedBuffer.java | 4 +- 18 files changed, 389 insertions(+), 154 deletions(-) create mode 100644 src/main/java/com/jozufozu/flywheel/backend/engine/indirect/StagingBuffer.java diff --git a/src/main/java/com/jozufozu/flywheel/backend/engine/InstancerStorage.java b/src/main/java/com/jozufozu/flywheel/backend/engine/InstancerStorage.java index 6956b97f2..085bab90a 100644 --- a/src/main/java/com/jozufozu/flywheel/backend/engine/InstancerStorage.java +++ b/src/main/java/com/jozufozu/flywheel/backend/engine/InstancerStorage.java @@ -66,7 +66,7 @@ public abstract class InstancerStorage> { } } - public void invalidate() { + public void delete() { instancers.clear(); uninitializedInstancers.clear(); diff --git a/src/main/java/com/jozufozu/flywheel/backend/engine/batching/BatchedDrawManager.java b/src/main/java/com/jozufozu/flywheel/backend/engine/batching/BatchedDrawManager.java index 9c645e3a2..a14eb5d3e 100644 --- a/src/main/java/com/jozufozu/flywheel/backend/engine/batching/BatchedDrawManager.java +++ b/src/main/java/com/jozufozu/flywheel/backend/engine/batching/BatchedDrawManager.java @@ -69,8 +69,8 @@ class BatchedDrawManager extends InstancerStorage> { } @Override - public void invalidate() { - super.invalidate(); + public void delete() { + super.delete(); meshPools.values() .forEach(BatchedMeshPool::delete); diff --git a/src/main/java/com/jozufozu/flywheel/backend/engine/batching/BatchingEngine.java b/src/main/java/com/jozufozu/flywheel/backend/engine/batching/BatchingEngine.java index 64d2bf5b3..f14b7f0ef 100644 --- a/src/main/java/com/jozufozu/flywheel/backend/engine/batching/BatchingEngine.java +++ b/src/main/java/com/jozufozu/flywheel/backend/engine/batching/BatchingEngine.java @@ -58,6 +58,6 @@ public class BatchingEngine extends AbstractEngine { @Override public void delete() { - drawManager.invalidate(); + drawManager.delete(); } } diff --git a/src/main/java/com/jozufozu/flywheel/backend/engine/indirect/IndirectBuffers.java b/src/main/java/com/jozufozu/flywheel/backend/engine/indirect/IndirectBuffers.java index 90ef672ad..ab77e0503 100644 --- a/src/main/java/com/jozufozu/flywheel/backend/engine/indirect/IndirectBuffers.java +++ b/src/main/java/com/jozufozu/flywheel/backend/engine/indirect/IndirectBuffers.java @@ -2,19 +2,12 @@ package com.jozufozu.flywheel.backend.engine.indirect; import static org.lwjgl.opengl.GL15.glDeleteBuffers; import static org.lwjgl.opengl.GL15.nglDeleteBuffers; -import static org.lwjgl.opengl.GL30.GL_MAP_FLUSH_EXPLICIT_BIT; -import static org.lwjgl.opengl.GL30.GL_MAP_WRITE_BIT; import static org.lwjgl.opengl.GL43.GL_SHADER_STORAGE_BUFFER; -import static org.lwjgl.opengl.GL44.GL_DYNAMIC_STORAGE_BIT; -import static org.lwjgl.opengl.GL44.GL_MAP_PERSISTENT_BIT; import static org.lwjgl.opengl.GL44.nglBindBuffersRange; import static org.lwjgl.opengl.GL45.glCopyNamedBufferSubData; import static org.lwjgl.opengl.GL45.glCreateBuffers; -import static org.lwjgl.opengl.GL45.glFlushMappedNamedBufferRange; import static org.lwjgl.opengl.GL45.glNamedBufferStorage; import static org.lwjgl.opengl.GL45.nglCreateBuffers; -import static org.lwjgl.opengl.GL45.nglMapNamedBufferRange; -import static org.lwjgl.opengl.GL45.nglNamedBufferSubData; import org.lwjgl.system.MemoryUtil; import org.lwjgl.system.Pointer; @@ -23,6 +16,7 @@ import com.jozufozu.flywheel.gl.buffer.GlBufferType; import com.jozufozu.flywheel.lib.memory.FlwMemoryTracker; import com.jozufozu.flywheel.lib.memory.MemoryBlock; +// TODO: better abstractions public class IndirectBuffers { // Number of vbos created. public static final int BUFFER_COUNT = 4; @@ -36,12 +30,6 @@ public class IndirectBuffers { public static final long MODEL_STRIDE = 24; - // BITS - private static final int SUB_DATA_BITS = GL_DYNAMIC_STORAGE_BIT; - private static final int PERSISTENT_BITS = GL_MAP_PERSISTENT_BIT | GL_MAP_WRITE_BIT; - private static final int MAP_BITS = PERSISTENT_BITS | GL_MAP_FLUSH_EXPLICIT_BIT; - private static final int GPU_ONLY_BITS = 0; - // Offsets to the vbos private static final long VBO_OFFSET = 0; private static final long OBJECT_OFFSET = VBO_OFFSET; @@ -74,12 +62,11 @@ public class IndirectBuffers { */ private final MemoryBlock buffers; private final long objectStride; - private int object; - private int target; - private int model; - private int draw; + public int object; + public int target; + public int model; + public int draw; - long objectPtr; MemoryBlock modelPtr; MemoryBlock drawPtr; @@ -135,8 +122,8 @@ public class IndirectBuffers { int objectNew = MemoryUtil.memGetInt(ptr + OBJECT_OFFSET); int targetNew = MemoryUtil.memGetInt(ptr + TARGET_OFFSET); - glNamedBufferStorage(objectNew, objectSize, PERSISTENT_BITS); - glNamedBufferStorage(targetNew, targetSize, GPU_ONLY_BITS); + glNamedBufferStorage(objectNew, objectSize, 0); + glNamedBufferStorage(targetNew, targetSize, 0); glCopyNamedBufferSubData(object, objectNew, 0, 0, objectStride * maxObjectCount); glCopyNamedBufferSubData(target, targetNew, 0, 0, INT_SIZE * maxObjectCount); @@ -147,11 +134,10 @@ public class IndirectBuffers { object = objectNew; target = targetNew; } else { - glNamedBufferStorage(object, objectSize, PERSISTENT_BITS); - glNamedBufferStorage(target, targetSize, GPU_ONLY_BITS); + glNamedBufferStorage(object, objectSize, 0); + glNamedBufferStorage(target, targetSize, 0); } - objectPtr = nglMapNamedBufferRange(object, 0, objectSize, MAP_BITS); maxObjectCount = objectCount; FlwMemoryTracker._allocGPUMemory(maxObjectCount * objectStride); @@ -164,7 +150,7 @@ public class IndirectBuffers { if (maxModelCount > 0) { int modelNew = glCreateBuffers(); - glNamedBufferStorage(modelNew, modelSize, SUB_DATA_BITS); + glNamedBufferStorage(modelNew, modelSize, 0); glDeleteBuffers(model); @@ -172,7 +158,7 @@ public class IndirectBuffers { model = modelNew; modelPtr = modelPtr.realloc(modelSize); } else { - glNamedBufferStorage(model, modelSize, SUB_DATA_BITS); + glNamedBufferStorage(model, modelSize, 0); modelPtr = MemoryBlock.malloc(modelSize); } maxModelCount = modelCount; @@ -186,7 +172,7 @@ public class IndirectBuffers { if (maxDrawCount > 0) { int drawNew = glCreateBuffers(); - glNamedBufferStorage(drawNew, drawSize, SUB_DATA_BITS); + glNamedBufferStorage(drawNew, drawSize, 0); glDeleteBuffers(draw); @@ -194,7 +180,7 @@ public class IndirectBuffers { draw = drawNew; drawPtr = drawPtr.realloc(drawSize); } else { - glNamedBufferStorage(draw, drawSize, SUB_DATA_BITS); + glNamedBufferStorage(draw, drawSize, 0); drawPtr = MemoryBlock.malloc(drawSize); } maxDrawCount = drawCount; @@ -227,18 +213,6 @@ public class IndirectBuffers { nglBindBuffersRange(GL_SHADER_STORAGE_BUFFER, 0, IndirectBuffers.BUFFER_COUNT, ptr, ptr + OFFSET_OFFSET, ptr + SIZE_OFFSET); } - void flushObjects(long length) { - glFlushMappedNamedBufferRange(object, 0, length); - } - - void flushModels(long length) { - nglNamedBufferSubData(model, 0, length, modelPtr.ptr()); - } - - void flushDrawCommands(long length) { - nglNamedBufferSubData(draw, 0, length, drawPtr.ptr()); - } - public void delete() { nglDeleteBuffers(BUFFER_COUNT, buffers.ptr()); buffers.free(); diff --git a/src/main/java/com/jozufozu/flywheel/backend/engine/indirect/IndirectCullingGroup.java b/src/main/java/com/jozufozu/flywheel/backend/engine/indirect/IndirectCullingGroup.java index ed0afdddd..79f5cd535 100644 --- a/src/main/java/com/jozufozu/flywheel/backend/engine/indirect/IndirectCullingGroup.java +++ b/src/main/java/com/jozufozu/flywheel/backend/engine/indirect/IndirectCullingGroup.java @@ -15,6 +15,8 @@ import java.util.EnumMap; import java.util.List; import java.util.Map; +import org.lwjgl.system.MemoryUtil; + import com.jozufozu.flywheel.api.event.RenderStage; import com.jozufozu.flywheel.api.instance.Instance; import com.jozufozu.flywheel.api.instance.InstanceType; @@ -27,6 +29,7 @@ import com.jozufozu.flywheel.backend.engine.UniformBuffer; import com.jozufozu.flywheel.gl.GlCompat; import com.jozufozu.flywheel.gl.shader.GlProgram; import com.jozufozu.flywheel.lib.context.Contexts; +import com.jozufozu.flywheel.lib.memory.MemoryBlock; import com.jozufozu.flywheel.lib.model.ModelUtil; public class IndirectCullingGroup { @@ -99,7 +102,7 @@ public class IndirectCullingGroup { } } - public void flush() { + public void flush(StagingBuffer stagingBuffer) { needsDrawBarrier = true; instanceCountThisFrame = calculateTotalInstanceCountAndPrepareBatches(); @@ -114,10 +117,10 @@ public class IndirectCullingGroup { needsSortDraws = false; } - meshPool.flush(); - uploadInstances(); - uploadModels(); - uploadIndirectCommands(); + meshPool.flush(stagingBuffer); + uploadInstances(stagingBuffer); + uploadModels(stagingBuffer); + uploadIndirectCommands(stagingBuffer); } public void dispatchCull() { @@ -173,35 +176,55 @@ public class IndirectCullingGroup { } } - private void uploadInstances() { - long objectPtr = buffers.objectPtr; - + private void uploadInstances(StagingBuffer stagingBuffer) { + long pos = 0; for (IndirectModel batch : indirectModels) { var instanceCount = batch.instancer.getInstanceCount(); - batch.writeObjects(objectPtr); + batch.writeObjects(stagingBuffer, pos, buffers.object); - objectPtr += instanceCount * objectStride; + pos += instanceCount * objectStride; } - - buffers.flushObjects(objectPtr - buffers.objectPtr); } - private void uploadModels() { - long writePtr = buffers.modelPtr.ptr(); + private void uploadModels(StagingBuffer stagingBuffer) { + var totalSize = indirectModels.size() * IndirectBuffers.MODEL_STRIDE; + long writePtr = stagingBuffer.reserveForTransferTo(totalSize, buffers.model, 0); + + if (writePtr == MemoryUtil.NULL) { + var block = MemoryBlock.malloc(totalSize); + writeModels(block.ptr()); + stagingBuffer.enqueueCopy(block.ptr(), totalSize, buffers.model, 0); + block.free(); + } else { + writeModels(writePtr); + } + } + + private void writeModels(long writePtr) { for (var batch : indirectModels) { batch.writeModel(writePtr); writePtr += IndirectBuffers.MODEL_STRIDE; } - buffers.flushModels(writePtr - buffers.modelPtr.ptr()); } - private void uploadIndirectCommands() { - long writePtr = buffers.drawPtr.ptr(); + private void uploadIndirectCommands(StagingBuffer stagingBuffer) { + var totalSize = indirectDraws.size() * IndirectBuffers.DRAW_COMMAND_STRIDE; + long writePtr = stagingBuffer.reserveForTransferTo(totalSize, buffers.draw, 0); + if (writePtr == MemoryUtil.NULL) { + var block = MemoryBlock.malloc(totalSize); + writeCommands(block.ptr()); + stagingBuffer.enqueueCopy(block.ptr(), totalSize, buffers.draw, 0); + block.free(); + } else { + writeCommands(writePtr); + } + } + + private void writeCommands(long writePtr) { for (var batch : indirectDraws) { batch.writeIndirectCommand(writePtr); writePtr += IndirectBuffers.DRAW_COMMAND_STRIDE; } - buffers.flushDrawCommands(writePtr - buffers.drawPtr.ptr()); } private int calculateTotalInstanceCountAndPrepareBatches() { diff --git a/src/main/java/com/jozufozu/flywheel/backend/engine/indirect/IndirectDrawManager.java b/src/main/java/com/jozufozu/flywheel/backend/engine/indirect/IndirectDrawManager.java index fe8943076..3a8b91434 100644 --- a/src/main/java/com/jozufozu/flywheel/backend/engine/indirect/IndirectDrawManager.java +++ b/src/main/java/com/jozufozu/flywheel/backend/engine/indirect/IndirectDrawManager.java @@ -11,7 +11,8 @@ import com.jozufozu.flywheel.backend.engine.InstancerKey; import com.jozufozu.flywheel.backend.engine.InstancerStorage; public class IndirectDrawManager extends InstancerStorage> { - public final Map, IndirectCullingGroup> renderLists = new HashMap<>(); + private final StagingBuffer stagingBuffer = new StagingBuffer(); + public final Map, IndirectCullingGroup> cullingGroups = new HashMap<>(); @Override protected IndirectInstancer create(InstanceType type) { @@ -20,13 +21,13 @@ public class IndirectDrawManager extends InstancerStorage> @Override protected void add(InstancerKey key, IndirectInstancer instancer, Model model, RenderStage stage) { - var indirectList = (IndirectCullingGroup) renderLists.computeIfAbsent(key.type(), IndirectCullingGroup::new); + var indirectList = (IndirectCullingGroup) cullingGroups.computeIfAbsent(key.type(), IndirectCullingGroup::new); indirectList.add((IndirectInstancer) instancer, stage, model); } public boolean hasStage(RenderStage stage) { - for (var list : renderLists.values()) { + for (var list : cullingGroups.values()) { if (list.hasStage(stage)) { return true; } @@ -38,25 +39,31 @@ public class IndirectDrawManager extends InstancerStorage> public void flush() { super.flush(); - for (var group : renderLists.values()) { - group.flush(); + stagingBuffer.reclaim(); + + for (var group : cullingGroups.values()) { + group.flush(stagingBuffer); } - for (var group : renderLists.values()) { + stagingBuffer.flush(); + + for (var group : cullingGroups.values()) { group.dispatchCull(); } - for (var group : renderLists.values()) { + for (var group : cullingGroups.values()) { group.dispatchApply(); } } @Override - public void invalidate() { - super.invalidate(); + public void delete() { + super.delete(); - renderLists.values() + cullingGroups.values() .forEach(IndirectCullingGroup::delete); - renderLists.clear(); + cullingGroups.clear(); + + stagingBuffer.delete(); } } diff --git a/src/main/java/com/jozufozu/flywheel/backend/engine/indirect/IndirectEngine.java b/src/main/java/com/jozufozu/flywheel/backend/engine/indirect/IndirectEngine.java index c39f52399..c9377b145 100644 --- a/src/main/java/com/jozufozu/flywheel/backend/engine/indirect/IndirectEngine.java +++ b/src/main/java/com/jozufozu/flywheel/backend/engine/indirect/IndirectEngine.java @@ -61,7 +61,7 @@ public class IndirectEngine extends AbstractEngine { GlTextureUnit.T2.makeActive(); RenderSystem.bindTexture(RenderSystem.getShaderTexture(2)); - for (var list : drawManager.renderLists.values()) { + for (var list : drawManager.cullingGroups.values()) { list.submit(stage); } @@ -85,6 +85,6 @@ public class IndirectEngine extends AbstractEngine { @Override public void delete() { - drawManager.invalidate(); + drawManager.delete(); } } diff --git a/src/main/java/com/jozufozu/flywheel/backend/engine/indirect/IndirectInstancer.java b/src/main/java/com/jozufozu/flywheel/backend/engine/indirect/IndirectInstancer.java index eebbe8bf2..5c9ceb2a1 100644 --- a/src/main/java/com/jozufozu/flywheel/backend/engine/indirect/IndirectInstancer.java +++ b/src/main/java/com/jozufozu/flywheel/backend/engine/indirect/IndirectInstancer.java @@ -6,46 +6,79 @@ import com.jozufozu.flywheel.api.instance.Instance; import com.jozufozu.flywheel.api.instance.InstanceType; import com.jozufozu.flywheel.api.instance.InstanceWriter; import com.jozufozu.flywheel.backend.engine.AbstractInstancer; +import com.jozufozu.flywheel.lib.memory.MemoryBlock; public class IndirectInstancer extends AbstractInstancer { private final long instanceStride; private final long objectStride; + private final InstanceWriter writer; public IndirectInstancer(InstanceType type) { super(type); this.instanceStride = type.getLayout() .getStride(); this.objectStride = instanceStride + IndirectBuffers.INT_SIZE; + writer = this.type.getWriter(); } public void update() { removeDeletedInstances(); } - public void writeSparse(long objectPtr, int batchID) { + public void writeSparse(StagingBuffer stagingBuffer, long start, int modelID, int dstVbo) { int count = instances.size(); - InstanceWriter writer = type.getWriter(); + // Backup buffer for when we can't write to the staging buffer. + MemoryBlock backup = null; for (int i = changed.nextSetBit(0); i >= 0 && i < count; i = changed.nextSetBit(i + 1)) { - long ptr = objectPtr + objectStride * i; - // write batchID - MemoryUtil.memPutInt(ptr, batchID); - // write object - writer.write(ptr + IndirectBuffers.INT_SIZE, instances.get(i)); + long ptr = stagingBuffer.reserveForTransferTo(objectStride, dstVbo, start + i * objectStride); + if (ptr == MemoryUtil.NULL) { + // Staging buffer can't fit this object, so we'll have to write it to a backup buffer. + if (backup == null) { + backup = MemoryBlock.malloc(objectStride); + } + writeOne(backup.ptr(), instances.get(i), modelID); + + stagingBuffer.enqueueCopy(backup.ptr(), objectStride, dstVbo, start + i * objectStride); + } else { + writeOne(ptr, instances.get(i), modelID); + } } changed.clear(); + + // Free the backup buffer if we allocated one. + if (backup != null) { + backup.free(); + } + } + + public void writeFull(StagingBuffer stagingBuffer, long start, int modelID, int dstVbo) { + long totalSize = objectStride * instances.size(); + + long ptr = stagingBuffer.reserveForTransferTo(totalSize, dstVbo, start); + + if (ptr != MemoryUtil.NULL) { + writeAll(ptr, modelID); + } else { + var block = MemoryBlock.malloc(totalSize); + writeAll(block.ptr(), modelID); + stagingBuffer.enqueueCopy(block.ptr(), totalSize, dstVbo, start); + block.free(); + } + + changed.clear(); } - public void writeFull(long objectPtr, int modelID) { - InstanceWriter writer = type.getWriter(); - for (I object : instances) { - // write modelID - MemoryUtil.memPutInt(objectPtr, modelID); - objectPtr += IndirectBuffers.INT_SIZE; - - // write object - writer.write(objectPtr, object); - objectPtr += instanceStride; + private void writeAll(long ptr, int modelID) { + for (I instance : instances) { + writeOne(ptr, instance, modelID); + ptr += objectStride; } - changed.clear(); + } + + private void writeOne(long ptr, I instance, int modelID) { + // write modelID + MemoryUtil.memPutInt(ptr, modelID); + // write object + writer.write(ptr + IndirectBuffers.INT_SIZE, instance); } } diff --git a/src/main/java/com/jozufozu/flywheel/backend/engine/indirect/IndirectMeshPool.java b/src/main/java/com/jozufozu/flywheel/backend/engine/indirect/IndirectMeshPool.java index d2b2cc915..c3cafdc0d 100644 --- a/src/main/java/com/jozufozu/flywheel/backend/engine/indirect/IndirectMeshPool.java +++ b/src/main/java/com/jozufozu/flywheel/backend/engine/indirect/IndirectMeshPool.java @@ -60,14 +60,15 @@ public class IndirectMeshPool { return meshes.get(mesh); } - public void flush() { + public void flush(StagingBuffer stagingBuffer) { if (dirty) { - uploadAll(); + // TODO: use the staging buffer and be smarter about allocation in general. + uploadAll(stagingBuffer); dirty = false; } } - private void uploadAll() { + private void uploadAll(StagingBuffer stagingBuffer) { long neededSize = 0; int maxQuadIndexCount = 0; int nonQuadIndexCount = 0; diff --git a/src/main/java/com/jozufozu/flywheel/backend/engine/indirect/IndirectModel.java b/src/main/java/com/jozufozu/flywheel/backend/engine/indirect/IndirectModel.java index 3959982e5..b19561531 100644 --- a/src/main/java/com/jozufozu/flywheel/backend/engine/indirect/IndirectModel.java +++ b/src/main/java/com/jozufozu/flywheel/backend/engine/indirect/IndirectModel.java @@ -35,11 +35,11 @@ public class IndirectModel { needsFullWrite = true; } - public void writeObjects(long objectPtr) { + public void writeObjects(StagingBuffer stagingBuffer, long start, int dstVbo) { if (needsFullWrite) { - instancer.writeFull(objectPtr, id); + instancer.writeFull(stagingBuffer, start, id, dstVbo); } else { - instancer.writeSparse(objectPtr, id); + instancer.writeSparse(stagingBuffer, start, id, dstVbo); } } } diff --git a/src/main/java/com/jozufozu/flywheel/backend/engine/indirect/StagingBuffer.java b/src/main/java/com/jozufozu/flywheel/backend/engine/indirect/StagingBuffer.java new file mode 100644 index 000000000..ca7ba8e8d --- /dev/null +++ b/src/main/java/com/jozufozu/flywheel/backend/engine/indirect/StagingBuffer.java @@ -0,0 +1,230 @@ +package com.jozufozu.flywheel.backend.engine.indirect; + +import java.util.ArrayList; +import java.util.List; + +import org.lwjgl.opengl.GL45C; +import org.lwjgl.system.MemoryUtil; + +import com.jozufozu.flywheel.gl.GlFence; +import com.jozufozu.flywheel.lib.memory.FlwMemoryTracker; + +import it.unimi.dsi.fastutil.PriorityQueue; +import it.unimi.dsi.fastutil.objects.ObjectArrayFIFOQueue; + +// https://github.com/CaffeineMC/sodium-fabric/blob/dev/src/main/java/me/jellysquid/mods/sodium/client/gl/arena/staging/MappedStagingBuffer.java +public class StagingBuffer { + private static final long DEFAULT_CAPACITY = 1024 * 1024 * 8; + private static final int STORAGE_FLAGS = GL45C.GL_MAP_PERSISTENT_BIT | GL45C.GL_MAP_WRITE_BIT | GL45C.GL_CLIENT_STORAGE_BIT; + private static final int MAP_FLAGS = GL45C.GL_MAP_PERSISTENT_BIT | GL45C.GL_MAP_WRITE_BIT | GL45C.GL_MAP_FLUSH_EXPLICIT_BIT | GL45C.GL_MAP_INVALIDATE_BUFFER_BIT; + + private final int vbo; + private final long map; + private final long capacity; + + private long start = 0; + private long pos = 0; + + private long totalAvailable; + + private final OverflowStagingBuffer overflow = new OverflowStagingBuffer(); + private final PriorityQueue transfers = new ObjectArrayFIFOQueue<>(); + private final PriorityQueue fencedRegions = new ObjectArrayFIFOQueue<>(); + + public StagingBuffer() { + this(DEFAULT_CAPACITY); + } + + public StagingBuffer(long capacity) { + this.capacity = capacity; + vbo = GL45C.glCreateBuffers(); + + GL45C.glNamedBufferStorage(vbo, capacity, STORAGE_FLAGS); + map = GL45C.nglMapNamedBufferRange(vbo, 0, capacity, MAP_FLAGS); + + totalAvailable = capacity; + + FlwMemoryTracker._allocCPUMemory(capacity); + } + + /** + * Enqueue a copy from the given pointer to the given VBO. + * + * @param ptr The pointer to copy from. + * @param size The size of the copy. + * @param dstVbo The VBO to copy to. + * @param dstOffset The offset in the destination VBO. + */ + public void enqueueCopy(long ptr, long size, int dstVbo, long dstOffset) { + if (size > totalAvailable) { + overflow.enqueueCopy(ptr, size, dstVbo, dstOffset); + return; + } + + long remaining = capacity - pos; + + if (size > remaining) { + long split = size - remaining; + + // Put the first span at the tail of the buffer... + MemoryUtil.memCopy(ptr, map + pos, remaining); + transfers.enqueue(new Transfer(pos, dstVbo, dstOffset, remaining)); + + // ... and the rest at the head. + MemoryUtil.memCopy(ptr + remaining, map, split); + transfers.enqueue(new Transfer(0, dstVbo, dstOffset + remaining, split)); + + pos = split; + } else { + MemoryUtil.memCopy(ptr, map + pos, size); + transfers.enqueue(new Transfer(pos, dstVbo, dstOffset, size)); + + pos += size; + } + + totalAvailable -= size; + } + + /** + * Reserve space in this buffer for a transfer to another VBO. + *
+ * You must ensure that your writes are complete before the next call to {@link #flush}. + *
+ * This will generally be a more efficient way to transfer data as it avoids a copy, however, + * this method does not allow for non-contiguous writes, so you should fall back to + * {@link #enqueueCopy} if this returns {@link MemoryUtil#NULL}. + * + * @param size The size of the transfer you wish to make. + * @param dstVbo The VBO you wish to transfer to. + * @param dstOffset The offset in the destination VBO. + * @return A pointer to the reserved space, or {@link MemoryUtil#NULL} if there is not enough contiguous space. + */ + public long reserveForTransferTo(long size, int dstVbo, long dstOffset) { + // Don't need to check totalAvailable here because that's a looser constraint than the bytes remaining. + long remaining = capacity - pos; + if (size > remaining) { + return MemoryUtil.NULL; + } + + long out = map + pos; + + transfers.enqueue(new Transfer(pos, dstVbo, dstOffset, size)); + + pos += size; + + totalAvailable -= size; + + return out; + } + + public void flush() { + if (transfers.isEmpty()) { + return; + } + + if (pos < start) { + // we rolled around, need to flush 2 ranges. + GL45C.glFlushMappedNamedBufferRange(vbo, start, capacity - start); + GL45C.glFlushMappedNamedBufferRange(vbo, 0, pos); + } else { + GL45C.glFlushMappedNamedBufferRange(vbo, start, pos - start); + } + + long usedCapacity = 0; + + for (Transfer transfer : consolidateCopies(transfers)) { + usedCapacity += transfer.size; + + GL45C.glCopyNamedBufferSubData(vbo, transfer.dstVbo, transfer.srcOffset, transfer.dstOffset, transfer.size); + } + + fencedRegions.enqueue(new FencedRegion(new GlFence(), usedCapacity)); + + start = pos; + } + + private static List consolidateCopies(PriorityQueue queue) { + List merged = new ArrayList<>(); + Transfer last = null; + + while (!queue.isEmpty()) { + Transfer transfer = queue.dequeue(); + + if (last != null) { + if (areContiguous(last, transfer)) { + last.size += transfer.size; + continue; + } + } + + merged.add(last = new Transfer(transfer)); + } + + return merged; + } + + private static boolean areContiguous(Transfer last, Transfer transfer) { + return last.dstVbo == transfer.dstVbo && last.dstOffset + last.size == transfer.dstOffset && last.srcOffset + last.size == transfer.srcOffset; + } + + public void reclaim() { + while (!fencedRegions.isEmpty()) { + var region = fencedRegions.first(); + if (!region.fence.isSignaled()) { + // We can't reclaim this region yet, and we know that all the regions after it are also not ready. + break; + } + fencedRegions.dequeue(); + + region.fence.delete(); + + totalAvailable += region.capacity; + } + } + + public void delete() { + GL45C.glUnmapNamedBuffer(vbo); + GL45C.glDeleteBuffers(vbo); + overflow.delete(); + + FlwMemoryTracker._freeCPUMemory(capacity); + } + + private static final class Transfer { + private final long srcOffset; + private final int dstVbo; + private final long dstOffset; + private long size; + + private Transfer(long srcOffset, int dstVbo, long dstOffset, long size) { + this.srcOffset = srcOffset; + this.dstVbo = dstVbo; + this.dstOffset = dstOffset; + this.size = size; + } + + public Transfer(Transfer other) { + this(other.srcOffset, other.dstVbo, other.dstOffset, other.size); + } + } + + private record FencedRegion(GlFence fence, long capacity) { + } + + private static class OverflowStagingBuffer { + private final int vbo; + + public OverflowStagingBuffer() { + vbo = GL45C.glCreateBuffers(); + } + + public void enqueueCopy(long ptr, long size, int dstVbo, long dstOffset) { + GL45C.nglNamedBufferData(vbo, size, ptr, GL45C.GL_STREAM_COPY); + GL45C.glCopyNamedBufferSubData(vbo, dstVbo, 0, dstOffset, size); + } + + public void delete() { + GL45C.glDeleteBuffers(vbo); + } + } +} diff --git a/src/main/java/com/jozufozu/flywheel/backend/engine/instancing/EBOCache.java b/src/main/java/com/jozufozu/flywheel/backend/engine/instancing/EBOCache.java index 1fefedb4c..2c9e64a51 100644 --- a/src/main/java/com/jozufozu/flywheel/backend/engine/instancing/EBOCache.java +++ b/src/main/java/com/jozufozu/flywheel/backend/engine/instancing/EBOCache.java @@ -8,7 +8,7 @@ import org.lwjgl.system.MemoryUtil; import com.jozufozu.flywheel.api.model.IndexSequence; import com.jozufozu.flywheel.gl.GlNumericType; -import com.jozufozu.flywheel.gl.buffer.GlBuffer; +import com.jozufozu.flywheel.gl.buffer.Buffer; import com.jozufozu.flywheel.gl.buffer.GlBufferUsage; import com.jozufozu.flywheel.lib.memory.FlwMemoryTracker; import com.jozufozu.flywheel.lib.model.QuadIndexSequence; @@ -59,12 +59,12 @@ public class EBOCache { @NotNull private static Entry create(IndexSequence provider, int indexCount) { int byteSize = indexCount * GlNumericType.UINT.byteWidth(); - var ebo = GlBuffer.IMPL.create(); + var ebo = Buffer.IMPL.create(); final long ptr = MemoryUtil.nmemAlloc(byteSize); provider.fill(ptr, indexCount); - GlBuffer.IMPL.data(ebo, byteSize, ptr, GlBufferUsage.STATIC_DRAW.glEnum); + Buffer.IMPL.data(ebo, byteSize, ptr, GlBufferUsage.STATIC_DRAW.glEnum); FlwMemoryTracker._allocGPUMemory(byteSize); MemoryUtil.nmemFree(ptr); diff --git a/src/main/java/com/jozufozu/flywheel/backend/engine/instancing/InstancedDrawManager.java b/src/main/java/com/jozufozu/flywheel/backend/engine/instancing/InstancedDrawManager.java index c50946dfd..ca45b09eb 100644 --- a/src/main/java/com/jozufozu/flywheel/backend/engine/instancing/InstancedDrawManager.java +++ b/src/main/java/com/jozufozu/flywheel/backend/engine/instancing/InstancedDrawManager.java @@ -39,8 +39,8 @@ public class InstancedDrawManager extends InstancerStorage meshPool.flush(); } - public void invalidate() { - super.invalidate(); + public void delete() { + super.delete(); meshPool.delete(); diff --git a/src/main/java/com/jozufozu/flywheel/backend/engine/instancing/InstancingEngine.java b/src/main/java/com/jozufozu/flywheel/backend/engine/instancing/InstancingEngine.java index 24e9c6801..663e2313f 100644 --- a/src/main/java/com/jozufozu/flywheel/backend/engine/instancing/InstancingEngine.java +++ b/src/main/java/com/jozufozu/flywheel/backend/engine/instancing/InstancingEngine.java @@ -96,7 +96,7 @@ public class InstancingEngine extends AbstractEngine { @Override public void delete() { - drawManager.invalidate(); + drawManager.delete(); } private void render(InstancedDrawManager.DrawSet drawSet) { diff --git a/src/main/java/com/jozufozu/flywheel/gl/GlFence.java b/src/main/java/com/jozufozu/flywheel/gl/GlFence.java index 3d57aa55a..e1ab94fd8 100644 --- a/src/main/java/com/jozufozu/flywheel/gl/GlFence.java +++ b/src/main/java/com/jozufozu/flywheel/gl/GlFence.java @@ -1,68 +1,34 @@ package com.jozufozu.flywheel.gl; import static org.lwjgl.opengl.GL32.GL_SIGNALED; -import static org.lwjgl.opengl.GL32.GL_SYNC_FLUSH_COMMANDS_BIT; import static org.lwjgl.opengl.GL32.GL_SYNC_GPU_COMMANDS_COMPLETE; import static org.lwjgl.opengl.GL32.GL_SYNC_STATUS; -import static org.lwjgl.opengl.GL32.GL_TIMEOUT_IGNORED; -import static org.lwjgl.opengl.GL32.glClientWaitSync; import static org.lwjgl.opengl.GL32.glDeleteSync; import static org.lwjgl.opengl.GL32.glFenceSync; +import static org.lwjgl.opengl.GL32.nglGetSynciv; -import org.lwjgl.opengl.GL32; import org.lwjgl.system.MemoryStack; import org.lwjgl.system.MemoryUtil; -// https://github.com/CaffeineMC/sodium-fabric/blob/da17fc8d0cb1a4e82fe6956ac4f07a63d32eca5a/components/gfx-opengl/src/main/java/net/caffeinemc/gfx/opengl/sync/GlFence.java public class GlFence { + private final long fence; - private long fence; - - public void post() { - clear(); - + public GlFence() { fence = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); } - public void clear() { - if (fence != 0) { - glDeleteSync(fence); - fence = 0; - } - } - - public boolean poll() { - if (fence != 0) { - poll0(); - } - - return fence == 0; - } - - private void poll0() { + public boolean isSignaled() { int result; try (var memoryStack = MemoryStack.stackPush()) { long checkPtr = memoryStack.ncalloc(Integer.BYTES, 0, Integer.BYTES); - GL32.nglGetSynciv(fence, GL_SYNC_STATUS, 1, MemoryUtil.NULL, checkPtr); + nglGetSynciv(fence, GL_SYNC_STATUS, 1, MemoryUtil.NULL, checkPtr); result = MemoryUtil.memGetInt(checkPtr); } - - if (result == GL_SIGNALED) { - glDeleteSync(fence); - fence = 0; - } + return result == GL_SIGNALED; } - public void waitSync() { - if (poll()) { - return; - } - - glClientWaitSync(fence, GL_SYNC_FLUSH_COMMANDS_BIT, GL_TIMEOUT_IGNORED); - + public void delete() { glDeleteSync(fence); - - fence = 0; } } diff --git a/src/main/java/com/jozufozu/flywheel/gl/buffer/Buffer.java b/src/main/java/com/jozufozu/flywheel/gl/buffer/Buffer.java index 0983f5608..f2e4bb65f 100644 --- a/src/main/java/com/jozufozu/flywheel/gl/buffer/Buffer.java +++ b/src/main/java/com/jozufozu/flywheel/gl/buffer/Buffer.java @@ -9,6 +9,8 @@ import org.lwjgl.system.Checks; import com.jozufozu.flywheel.gl.GlCompat; public interface Buffer { + Buffer IMPL = new DSA().fallback(); + int create(); void data(int vbo, long size, long ptr, int glEnum); diff --git a/src/main/java/com/jozufozu/flywheel/gl/buffer/GlBuffer.java b/src/main/java/com/jozufozu/flywheel/gl/buffer/GlBuffer.java index ed8c4118f..001eacf81 100644 --- a/src/main/java/com/jozufozu/flywheel/gl/buffer/GlBuffer.java +++ b/src/main/java/com/jozufozu/flywheel/gl/buffer/GlBuffer.java @@ -10,7 +10,6 @@ import com.mojang.blaze3d.platform.GlStateManager; import it.unimi.dsi.fastutil.longs.LongUnaryOperator; public class GlBuffer extends GlObject { - public static final Buffer IMPL = new Buffer.DSA().fallback(); protected final GlBufferUsage usage; /** * The size (in bytes) of the buffer on the GPU. @@ -26,7 +25,7 @@ public class GlBuffer extends GlObject { } public GlBuffer(GlBufferUsage usage) { - handle(IMPL.create()); + handle(Buffer.IMPL.create()); this.usage = usage; } @@ -57,7 +56,7 @@ public class GlBuffer extends GlObject { private void alloc(long capacity) { increaseSize(capacity); - IMPL.data(handle(), size, MemoryUtil.NULL, usage.glEnum); + Buffer.IMPL.data(handle(), size, MemoryUtil.NULL, usage.glEnum); FlwMemoryTracker._allocGPUMemory(size); } @@ -67,9 +66,9 @@ public class GlBuffer extends GlObject { increaseSize(capacity); int oldHandle = handle(); - int newHandle = IMPL.create(); - IMPL.data(newHandle, size, MemoryUtil.NULL, usage.glEnum); - IMPL.copyData(oldHandle, newHandle, 0, 0, oldSize); + int newHandle = Buffer.IMPL.create(); + Buffer.IMPL.data(newHandle, size, MemoryUtil.NULL, usage.glEnum); + Buffer.IMPL.copyData(oldHandle, newHandle, 0, 0, oldSize); GlStateManager._glDeleteBuffers(oldHandle); handle(newHandle); @@ -85,7 +84,7 @@ public class GlBuffer extends GlObject { public void upload(MemoryBlock directBuffer) { FlwMemoryTracker._freeGPUMemory(size); - IMPL.data(handle(), directBuffer.size(), directBuffer.ptr(), usage.glEnum); + Buffer.IMPL.data(handle(), directBuffer.size(), directBuffer.ptr(), usage.glEnum); size = directBuffer.size(); FlwMemoryTracker._allocGPUMemory(size); } diff --git a/src/main/java/com/jozufozu/flywheel/gl/buffer/MappedBuffer.java b/src/main/java/com/jozufozu/flywheel/gl/buffer/MappedBuffer.java index 7d54eda3b..51bdc1a10 100644 --- a/src/main/java/com/jozufozu/flywheel/gl/buffer/MappedBuffer.java +++ b/src/main/java/com/jozufozu/flywheel/gl/buffer/MappedBuffer.java @@ -15,7 +15,7 @@ public class MappedBuffer implements AutoCloseable { public MappedBuffer(int glBuffer, long size) { this.glBuffer = glBuffer; - ptr = GlBuffer.IMPL.mapRange(glBuffer, 0, size, GL_MAP_WRITE_BIT); + ptr = Buffer.IMPL.mapRange(glBuffer, 0, size, GL_MAP_WRITE_BIT); if (ptr == MemoryUtil.NULL) { throw new GlException(GlError.poll(), "Could not map buffer"); @@ -32,7 +32,7 @@ public class MappedBuffer implements AutoCloseable { return; } - GlBuffer.IMPL.unmap(glBuffer); + Buffer.IMPL.unmap(glBuffer); ptr = NULL; } }