OSHA Theatre

- Add staging buffer for indirect using unholy amounts of unsafe.
- Make GlFence RAII.
- Make all IndirectBuffers GPU only and use a shared staging buffer for
  transfers.
This commit is contained in:
Jozufozu 2023-12-05 00:28:04 -08:00
parent de0876c2ee
commit c0d1e736e4
18 changed files with 389 additions and 154 deletions

View file

@ -66,7 +66,7 @@ public abstract class InstancerStorage<N extends AbstractInstancer<?>> {
} }
} }
public void invalidate() { public void delete() {
instancers.clear(); instancers.clear();
uninitializedInstancers.clear(); uninitializedInstancers.clear();

View file

@ -69,8 +69,8 @@ class BatchedDrawManager extends InstancerStorage<BatchedInstancer<?>> {
} }
@Override @Override
public void invalidate() { public void delete() {
super.invalidate(); super.delete();
meshPools.values() meshPools.values()
.forEach(BatchedMeshPool::delete); .forEach(BatchedMeshPool::delete);

View file

@ -58,6 +58,6 @@ public class BatchingEngine extends AbstractEngine {
@Override @Override
public void delete() { public void delete() {
drawManager.invalidate(); drawManager.delete();
} }
} }

View file

@ -2,19 +2,12 @@ package com.jozufozu.flywheel.backend.engine.indirect;
import static org.lwjgl.opengl.GL15.glDeleteBuffers; import static org.lwjgl.opengl.GL15.glDeleteBuffers;
import static org.lwjgl.opengl.GL15.nglDeleteBuffers; import static org.lwjgl.opengl.GL15.nglDeleteBuffers;
import static org.lwjgl.opengl.GL30.GL_MAP_FLUSH_EXPLICIT_BIT;
import static org.lwjgl.opengl.GL30.GL_MAP_WRITE_BIT;
import static org.lwjgl.opengl.GL43.GL_SHADER_STORAGE_BUFFER; import static org.lwjgl.opengl.GL43.GL_SHADER_STORAGE_BUFFER;
import static org.lwjgl.opengl.GL44.GL_DYNAMIC_STORAGE_BIT;
import static org.lwjgl.opengl.GL44.GL_MAP_PERSISTENT_BIT;
import static org.lwjgl.opengl.GL44.nglBindBuffersRange; import static org.lwjgl.opengl.GL44.nglBindBuffersRange;
import static org.lwjgl.opengl.GL45.glCopyNamedBufferSubData; import static org.lwjgl.opengl.GL45.glCopyNamedBufferSubData;
import static org.lwjgl.opengl.GL45.glCreateBuffers; import static org.lwjgl.opengl.GL45.glCreateBuffers;
import static org.lwjgl.opengl.GL45.glFlushMappedNamedBufferRange;
import static org.lwjgl.opengl.GL45.glNamedBufferStorage; import static org.lwjgl.opengl.GL45.glNamedBufferStorage;
import static org.lwjgl.opengl.GL45.nglCreateBuffers; import static org.lwjgl.opengl.GL45.nglCreateBuffers;
import static org.lwjgl.opengl.GL45.nglMapNamedBufferRange;
import static org.lwjgl.opengl.GL45.nglNamedBufferSubData;
import org.lwjgl.system.MemoryUtil; import org.lwjgl.system.MemoryUtil;
import org.lwjgl.system.Pointer; import org.lwjgl.system.Pointer;
@ -23,6 +16,7 @@ import com.jozufozu.flywheel.gl.buffer.GlBufferType;
import com.jozufozu.flywheel.lib.memory.FlwMemoryTracker; import com.jozufozu.flywheel.lib.memory.FlwMemoryTracker;
import com.jozufozu.flywheel.lib.memory.MemoryBlock; import com.jozufozu.flywheel.lib.memory.MemoryBlock;
// TODO: better abstractions
public class IndirectBuffers { public class IndirectBuffers {
// Number of vbos created. // Number of vbos created.
public static final int BUFFER_COUNT = 4; public static final int BUFFER_COUNT = 4;
@ -36,12 +30,6 @@ public class IndirectBuffers {
public static final long MODEL_STRIDE = 24; public static final long MODEL_STRIDE = 24;
// BITS
private static final int SUB_DATA_BITS = GL_DYNAMIC_STORAGE_BIT;
private static final int PERSISTENT_BITS = GL_MAP_PERSISTENT_BIT | GL_MAP_WRITE_BIT;
private static final int MAP_BITS = PERSISTENT_BITS | GL_MAP_FLUSH_EXPLICIT_BIT;
private static final int GPU_ONLY_BITS = 0;
// Offsets to the vbos // Offsets to the vbos
private static final long VBO_OFFSET = 0; private static final long VBO_OFFSET = 0;
private static final long OBJECT_OFFSET = VBO_OFFSET; private static final long OBJECT_OFFSET = VBO_OFFSET;
@ -74,12 +62,11 @@ public class IndirectBuffers {
*/ */
private final MemoryBlock buffers; private final MemoryBlock buffers;
private final long objectStride; private final long objectStride;
private int object; public int object;
private int target; public int target;
private int model; public int model;
private int draw; public int draw;
long objectPtr;
MemoryBlock modelPtr; MemoryBlock modelPtr;
MemoryBlock drawPtr; MemoryBlock drawPtr;
@ -135,8 +122,8 @@ public class IndirectBuffers {
int objectNew = MemoryUtil.memGetInt(ptr + OBJECT_OFFSET); int objectNew = MemoryUtil.memGetInt(ptr + OBJECT_OFFSET);
int targetNew = MemoryUtil.memGetInt(ptr + TARGET_OFFSET); int targetNew = MemoryUtil.memGetInt(ptr + TARGET_OFFSET);
glNamedBufferStorage(objectNew, objectSize, PERSISTENT_BITS); glNamedBufferStorage(objectNew, objectSize, 0);
glNamedBufferStorage(targetNew, targetSize, GPU_ONLY_BITS); glNamedBufferStorage(targetNew, targetSize, 0);
glCopyNamedBufferSubData(object, objectNew, 0, 0, objectStride * maxObjectCount); glCopyNamedBufferSubData(object, objectNew, 0, 0, objectStride * maxObjectCount);
glCopyNamedBufferSubData(target, targetNew, 0, 0, INT_SIZE * maxObjectCount); glCopyNamedBufferSubData(target, targetNew, 0, 0, INT_SIZE * maxObjectCount);
@ -147,11 +134,10 @@ public class IndirectBuffers {
object = objectNew; object = objectNew;
target = targetNew; target = targetNew;
} else { } else {
glNamedBufferStorage(object, objectSize, PERSISTENT_BITS); glNamedBufferStorage(object, objectSize, 0);
glNamedBufferStorage(target, targetSize, GPU_ONLY_BITS); glNamedBufferStorage(target, targetSize, 0);
} }
objectPtr = nglMapNamedBufferRange(object, 0, objectSize, MAP_BITS);
maxObjectCount = objectCount; maxObjectCount = objectCount;
FlwMemoryTracker._allocGPUMemory(maxObjectCount * objectStride); FlwMemoryTracker._allocGPUMemory(maxObjectCount * objectStride);
@ -164,7 +150,7 @@ public class IndirectBuffers {
if (maxModelCount > 0) { if (maxModelCount > 0) {
int modelNew = glCreateBuffers(); int modelNew = glCreateBuffers();
glNamedBufferStorage(modelNew, modelSize, SUB_DATA_BITS); glNamedBufferStorage(modelNew, modelSize, 0);
glDeleteBuffers(model); glDeleteBuffers(model);
@ -172,7 +158,7 @@ public class IndirectBuffers {
model = modelNew; model = modelNew;
modelPtr = modelPtr.realloc(modelSize); modelPtr = modelPtr.realloc(modelSize);
} else { } else {
glNamedBufferStorage(model, modelSize, SUB_DATA_BITS); glNamedBufferStorage(model, modelSize, 0);
modelPtr = MemoryBlock.malloc(modelSize); modelPtr = MemoryBlock.malloc(modelSize);
} }
maxModelCount = modelCount; maxModelCount = modelCount;
@ -186,7 +172,7 @@ public class IndirectBuffers {
if (maxDrawCount > 0) { if (maxDrawCount > 0) {
int drawNew = glCreateBuffers(); int drawNew = glCreateBuffers();
glNamedBufferStorage(drawNew, drawSize, SUB_DATA_BITS); glNamedBufferStorage(drawNew, drawSize, 0);
glDeleteBuffers(draw); glDeleteBuffers(draw);
@ -194,7 +180,7 @@ public class IndirectBuffers {
draw = drawNew; draw = drawNew;
drawPtr = drawPtr.realloc(drawSize); drawPtr = drawPtr.realloc(drawSize);
} else { } else {
glNamedBufferStorage(draw, drawSize, SUB_DATA_BITS); glNamedBufferStorage(draw, drawSize, 0);
drawPtr = MemoryBlock.malloc(drawSize); drawPtr = MemoryBlock.malloc(drawSize);
} }
maxDrawCount = drawCount; maxDrawCount = drawCount;
@ -227,18 +213,6 @@ public class IndirectBuffers {
nglBindBuffersRange(GL_SHADER_STORAGE_BUFFER, 0, IndirectBuffers.BUFFER_COUNT, ptr, ptr + OFFSET_OFFSET, ptr + SIZE_OFFSET); nglBindBuffersRange(GL_SHADER_STORAGE_BUFFER, 0, IndirectBuffers.BUFFER_COUNT, ptr, ptr + OFFSET_OFFSET, ptr + SIZE_OFFSET);
} }
void flushObjects(long length) {
glFlushMappedNamedBufferRange(object, 0, length);
}
void flushModels(long length) {
nglNamedBufferSubData(model, 0, length, modelPtr.ptr());
}
void flushDrawCommands(long length) {
nglNamedBufferSubData(draw, 0, length, drawPtr.ptr());
}
public void delete() { public void delete() {
nglDeleteBuffers(BUFFER_COUNT, buffers.ptr()); nglDeleteBuffers(BUFFER_COUNT, buffers.ptr());
buffers.free(); buffers.free();

View file

@ -15,6 +15,8 @@ import java.util.EnumMap;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import org.lwjgl.system.MemoryUtil;
import com.jozufozu.flywheel.api.event.RenderStage; import com.jozufozu.flywheel.api.event.RenderStage;
import com.jozufozu.flywheel.api.instance.Instance; import com.jozufozu.flywheel.api.instance.Instance;
import com.jozufozu.flywheel.api.instance.InstanceType; import com.jozufozu.flywheel.api.instance.InstanceType;
@ -27,6 +29,7 @@ import com.jozufozu.flywheel.backend.engine.UniformBuffer;
import com.jozufozu.flywheel.gl.GlCompat; import com.jozufozu.flywheel.gl.GlCompat;
import com.jozufozu.flywheel.gl.shader.GlProgram; import com.jozufozu.flywheel.gl.shader.GlProgram;
import com.jozufozu.flywheel.lib.context.Contexts; import com.jozufozu.flywheel.lib.context.Contexts;
import com.jozufozu.flywheel.lib.memory.MemoryBlock;
import com.jozufozu.flywheel.lib.model.ModelUtil; import com.jozufozu.flywheel.lib.model.ModelUtil;
public class IndirectCullingGroup<I extends Instance> { public class IndirectCullingGroup<I extends Instance> {
@ -99,7 +102,7 @@ public class IndirectCullingGroup<I extends Instance> {
} }
} }
public void flush() { public void flush(StagingBuffer stagingBuffer) {
needsDrawBarrier = true; needsDrawBarrier = true;
instanceCountThisFrame = calculateTotalInstanceCountAndPrepareBatches(); instanceCountThisFrame = calculateTotalInstanceCountAndPrepareBatches();
@ -114,10 +117,10 @@ public class IndirectCullingGroup<I extends Instance> {
needsSortDraws = false; needsSortDraws = false;
} }
meshPool.flush(); meshPool.flush(stagingBuffer);
uploadInstances(); uploadInstances(stagingBuffer);
uploadModels(); uploadModels(stagingBuffer);
uploadIndirectCommands(); uploadIndirectCommands(stagingBuffer);
} }
public void dispatchCull() { public void dispatchCull() {
@ -173,35 +176,55 @@ public class IndirectCullingGroup<I extends Instance> {
} }
} }
private void uploadInstances() { private void uploadInstances(StagingBuffer stagingBuffer) {
long objectPtr = buffers.objectPtr; long pos = 0;
for (IndirectModel batch : indirectModels) { for (IndirectModel batch : indirectModels) {
var instanceCount = batch.instancer.getInstanceCount(); var instanceCount = batch.instancer.getInstanceCount();
batch.writeObjects(objectPtr); batch.writeObjects(stagingBuffer, pos, buffers.object);
objectPtr += instanceCount * objectStride; pos += instanceCount * objectStride;
} }
buffers.flushObjects(objectPtr - buffers.objectPtr);
} }
private void uploadModels() { private void uploadModels(StagingBuffer stagingBuffer) {
long writePtr = buffers.modelPtr.ptr(); var totalSize = indirectModels.size() * IndirectBuffers.MODEL_STRIDE;
long writePtr = stagingBuffer.reserveForTransferTo(totalSize, buffers.model, 0);
if (writePtr == MemoryUtil.NULL) {
var block = MemoryBlock.malloc(totalSize);
writeModels(block.ptr());
stagingBuffer.enqueueCopy(block.ptr(), totalSize, buffers.model, 0);
block.free();
} else {
writeModels(writePtr);
}
}
private void writeModels(long writePtr) {
for (var batch : indirectModels) { for (var batch : indirectModels) {
batch.writeModel(writePtr); batch.writeModel(writePtr);
writePtr += IndirectBuffers.MODEL_STRIDE; writePtr += IndirectBuffers.MODEL_STRIDE;
} }
buffers.flushModels(writePtr - buffers.modelPtr.ptr());
} }
private void uploadIndirectCommands() { private void uploadIndirectCommands(StagingBuffer stagingBuffer) {
long writePtr = buffers.drawPtr.ptr(); var totalSize = indirectDraws.size() * IndirectBuffers.DRAW_COMMAND_STRIDE;
long writePtr = stagingBuffer.reserveForTransferTo(totalSize, buffers.draw, 0);
if (writePtr == MemoryUtil.NULL) {
var block = MemoryBlock.malloc(totalSize);
writeCommands(block.ptr());
stagingBuffer.enqueueCopy(block.ptr(), totalSize, buffers.draw, 0);
block.free();
} else {
writeCommands(writePtr);
}
}
private void writeCommands(long writePtr) {
for (var batch : indirectDraws) { for (var batch : indirectDraws) {
batch.writeIndirectCommand(writePtr); batch.writeIndirectCommand(writePtr);
writePtr += IndirectBuffers.DRAW_COMMAND_STRIDE; writePtr += IndirectBuffers.DRAW_COMMAND_STRIDE;
} }
buffers.flushDrawCommands(writePtr - buffers.drawPtr.ptr());
} }
private int calculateTotalInstanceCountAndPrepareBatches() { private int calculateTotalInstanceCountAndPrepareBatches() {

View file

@ -11,7 +11,8 @@ import com.jozufozu.flywheel.backend.engine.InstancerKey;
import com.jozufozu.flywheel.backend.engine.InstancerStorage; import com.jozufozu.flywheel.backend.engine.InstancerStorage;
public class IndirectDrawManager extends InstancerStorage<IndirectInstancer<?>> { public class IndirectDrawManager extends InstancerStorage<IndirectInstancer<?>> {
public final Map<InstanceType<?>, IndirectCullingGroup<?>> renderLists = new HashMap<>(); private final StagingBuffer stagingBuffer = new StagingBuffer();
public final Map<InstanceType<?>, IndirectCullingGroup<?>> cullingGroups = new HashMap<>();
@Override @Override
protected <I extends Instance> IndirectInstancer<?> create(InstanceType<I> type) { protected <I extends Instance> IndirectInstancer<?> create(InstanceType<I> type) {
@ -20,13 +21,13 @@ public class IndirectDrawManager extends InstancerStorage<IndirectInstancer<?>>
@Override @Override
protected <I extends Instance> void add(InstancerKey<I> key, IndirectInstancer<?> instancer, Model model, RenderStage stage) { protected <I extends Instance> void add(InstancerKey<I> key, IndirectInstancer<?> instancer, Model model, RenderStage stage) {
var indirectList = (IndirectCullingGroup<I>) renderLists.computeIfAbsent(key.type(), IndirectCullingGroup::new); var indirectList = (IndirectCullingGroup<I>) cullingGroups.computeIfAbsent(key.type(), IndirectCullingGroup::new);
indirectList.add((IndirectInstancer<I>) instancer, stage, model); indirectList.add((IndirectInstancer<I>) instancer, stage, model);
} }
public boolean hasStage(RenderStage stage) { public boolean hasStage(RenderStage stage) {
for (var list : renderLists.values()) { for (var list : cullingGroups.values()) {
if (list.hasStage(stage)) { if (list.hasStage(stage)) {
return true; return true;
} }
@ -38,25 +39,31 @@ public class IndirectDrawManager extends InstancerStorage<IndirectInstancer<?>>
public void flush() { public void flush() {
super.flush(); super.flush();
for (var group : renderLists.values()) { stagingBuffer.reclaim();
group.flush();
for (var group : cullingGroups.values()) {
group.flush(stagingBuffer);
} }
for (var group : renderLists.values()) { stagingBuffer.flush();
for (var group : cullingGroups.values()) {
group.dispatchCull(); group.dispatchCull();
} }
for (var group : renderLists.values()) { for (var group : cullingGroups.values()) {
group.dispatchApply(); group.dispatchApply();
} }
} }
@Override @Override
public void invalidate() { public void delete() {
super.invalidate(); super.delete();
renderLists.values() cullingGroups.values()
.forEach(IndirectCullingGroup::delete); .forEach(IndirectCullingGroup::delete);
renderLists.clear(); cullingGroups.clear();
stagingBuffer.delete();
} }
} }

View file

@ -61,7 +61,7 @@ public class IndirectEngine extends AbstractEngine {
GlTextureUnit.T2.makeActive(); GlTextureUnit.T2.makeActive();
RenderSystem.bindTexture(RenderSystem.getShaderTexture(2)); RenderSystem.bindTexture(RenderSystem.getShaderTexture(2));
for (var list : drawManager.renderLists.values()) { for (var list : drawManager.cullingGroups.values()) {
list.submit(stage); list.submit(stage);
} }
@ -85,6 +85,6 @@ public class IndirectEngine extends AbstractEngine {
@Override @Override
public void delete() { public void delete() {
drawManager.invalidate(); drawManager.delete();
} }
} }

View file

@ -6,46 +6,79 @@ import com.jozufozu.flywheel.api.instance.Instance;
import com.jozufozu.flywheel.api.instance.InstanceType; import com.jozufozu.flywheel.api.instance.InstanceType;
import com.jozufozu.flywheel.api.instance.InstanceWriter; import com.jozufozu.flywheel.api.instance.InstanceWriter;
import com.jozufozu.flywheel.backend.engine.AbstractInstancer; import com.jozufozu.flywheel.backend.engine.AbstractInstancer;
import com.jozufozu.flywheel.lib.memory.MemoryBlock;
public class IndirectInstancer<I extends Instance> extends AbstractInstancer<I> { public class IndirectInstancer<I extends Instance> extends AbstractInstancer<I> {
private final long instanceStride; private final long instanceStride;
private final long objectStride; private final long objectStride;
private final InstanceWriter<I> writer;
public IndirectInstancer(InstanceType<I> type) { public IndirectInstancer(InstanceType<I> type) {
super(type); super(type);
this.instanceStride = type.getLayout() this.instanceStride = type.getLayout()
.getStride(); .getStride();
this.objectStride = instanceStride + IndirectBuffers.INT_SIZE; this.objectStride = instanceStride + IndirectBuffers.INT_SIZE;
writer = this.type.getWriter();
} }
public void update() { public void update() {
removeDeletedInstances(); removeDeletedInstances();
} }
public void writeSparse(long objectPtr, int batchID) { public void writeSparse(StagingBuffer stagingBuffer, long start, int modelID, int dstVbo) {
int count = instances.size(); int count = instances.size();
InstanceWriter<I> writer = type.getWriter(); // Backup buffer for when we can't write to the staging buffer.
MemoryBlock backup = null;
for (int i = changed.nextSetBit(0); i >= 0 && i < count; i = changed.nextSetBit(i + 1)) { for (int i = changed.nextSetBit(0); i >= 0 && i < count; i = changed.nextSetBit(i + 1)) {
long ptr = objectPtr + objectStride * i; long ptr = stagingBuffer.reserveForTransferTo(objectStride, dstVbo, start + i * objectStride);
// write batchID if (ptr == MemoryUtil.NULL) {
MemoryUtil.memPutInt(ptr, batchID); // Staging buffer can't fit this object, so we'll have to write it to a backup buffer.
// write object if (backup == null) {
writer.write(ptr + IndirectBuffers.INT_SIZE, instances.get(i)); backup = MemoryBlock.malloc(objectStride);
}
writeOne(backup.ptr(), instances.get(i), modelID);
stagingBuffer.enqueueCopy(backup.ptr(), objectStride, dstVbo, start + i * objectStride);
} else {
writeOne(ptr, instances.get(i), modelID);
}
} }
changed.clear(); changed.clear();
// Free the backup buffer if we allocated one.
if (backup != null) {
backup.free();
}
}
public void writeFull(StagingBuffer stagingBuffer, long start, int modelID, int dstVbo) {
long totalSize = objectStride * instances.size();
long ptr = stagingBuffer.reserveForTransferTo(totalSize, dstVbo, start);
if (ptr != MemoryUtil.NULL) {
writeAll(ptr, modelID);
} else {
var block = MemoryBlock.malloc(totalSize);
writeAll(block.ptr(), modelID);
stagingBuffer.enqueueCopy(block.ptr(), totalSize, dstVbo, start);
block.free();
}
changed.clear();
} }
public void writeFull(long objectPtr, int modelID) { private void writeAll(long ptr, int modelID) {
InstanceWriter<I> writer = type.getWriter(); for (I instance : instances) {
for (I object : instances) { writeOne(ptr, instance, modelID);
// write modelID ptr += objectStride;
MemoryUtil.memPutInt(objectPtr, modelID);
objectPtr += IndirectBuffers.INT_SIZE;
// write object
writer.write(objectPtr, object);
objectPtr += instanceStride;
} }
changed.clear(); }
private void writeOne(long ptr, I instance, int modelID) {
// write modelID
MemoryUtil.memPutInt(ptr, modelID);
// write object
writer.write(ptr + IndirectBuffers.INT_SIZE, instance);
} }
} }

View file

@ -60,14 +60,15 @@ public class IndirectMeshPool {
return meshes.get(mesh); return meshes.get(mesh);
} }
public void flush() { public void flush(StagingBuffer stagingBuffer) {
if (dirty) { if (dirty) {
uploadAll(); // TODO: use the staging buffer and be smarter about allocation in general.
uploadAll(stagingBuffer);
dirty = false; dirty = false;
} }
} }
private void uploadAll() { private void uploadAll(StagingBuffer stagingBuffer) {
long neededSize = 0; long neededSize = 0;
int maxQuadIndexCount = 0; int maxQuadIndexCount = 0;
int nonQuadIndexCount = 0; int nonQuadIndexCount = 0;

View file

@ -35,11 +35,11 @@ public class IndirectModel {
needsFullWrite = true; needsFullWrite = true;
} }
public void writeObjects(long objectPtr) { public void writeObjects(StagingBuffer stagingBuffer, long start, int dstVbo) {
if (needsFullWrite) { if (needsFullWrite) {
instancer.writeFull(objectPtr, id); instancer.writeFull(stagingBuffer, start, id, dstVbo);
} else { } else {
instancer.writeSparse(objectPtr, id); instancer.writeSparse(stagingBuffer, start, id, dstVbo);
} }
} }
} }

View file

@ -0,0 +1,230 @@
package com.jozufozu.flywheel.backend.engine.indirect;
import java.util.ArrayList;
import java.util.List;
import org.lwjgl.opengl.GL45C;
import org.lwjgl.system.MemoryUtil;
import com.jozufozu.flywheel.gl.GlFence;
import com.jozufozu.flywheel.lib.memory.FlwMemoryTracker;
import it.unimi.dsi.fastutil.PriorityQueue;
import it.unimi.dsi.fastutil.objects.ObjectArrayFIFOQueue;
// https://github.com/CaffeineMC/sodium-fabric/blob/dev/src/main/java/me/jellysquid/mods/sodium/client/gl/arena/staging/MappedStagingBuffer.java
public class StagingBuffer {
private static final long DEFAULT_CAPACITY = 1024 * 1024 * 8;
private static final int STORAGE_FLAGS = GL45C.GL_MAP_PERSISTENT_BIT | GL45C.GL_MAP_WRITE_BIT | GL45C.GL_CLIENT_STORAGE_BIT;
private static final int MAP_FLAGS = GL45C.GL_MAP_PERSISTENT_BIT | GL45C.GL_MAP_WRITE_BIT | GL45C.GL_MAP_FLUSH_EXPLICIT_BIT | GL45C.GL_MAP_INVALIDATE_BUFFER_BIT;
private final int vbo;
private final long map;
private final long capacity;
private long start = 0;
private long pos = 0;
private long totalAvailable;
private final OverflowStagingBuffer overflow = new OverflowStagingBuffer();
private final PriorityQueue<Transfer> transfers = new ObjectArrayFIFOQueue<>();
private final PriorityQueue<FencedRegion> fencedRegions = new ObjectArrayFIFOQueue<>();
public StagingBuffer() {
this(DEFAULT_CAPACITY);
}
public StagingBuffer(long capacity) {
this.capacity = capacity;
vbo = GL45C.glCreateBuffers();
GL45C.glNamedBufferStorage(vbo, capacity, STORAGE_FLAGS);
map = GL45C.nglMapNamedBufferRange(vbo, 0, capacity, MAP_FLAGS);
totalAvailable = capacity;
FlwMemoryTracker._allocCPUMemory(capacity);
}
/**
* Enqueue a copy from the given pointer to the given VBO.
*
* @param ptr The pointer to copy from.
* @param size The size of the copy.
* @param dstVbo The VBO to copy to.
* @param dstOffset The offset in the destination VBO.
*/
public void enqueueCopy(long ptr, long size, int dstVbo, long dstOffset) {
if (size > totalAvailable) {
overflow.enqueueCopy(ptr, size, dstVbo, dstOffset);
return;
}
long remaining = capacity - pos;
if (size > remaining) {
long split = size - remaining;
// Put the first span at the tail of the buffer...
MemoryUtil.memCopy(ptr, map + pos, remaining);
transfers.enqueue(new Transfer(pos, dstVbo, dstOffset, remaining));
// ... and the rest at the head.
MemoryUtil.memCopy(ptr + remaining, map, split);
transfers.enqueue(new Transfer(0, dstVbo, dstOffset + remaining, split));
pos = split;
} else {
MemoryUtil.memCopy(ptr, map + pos, size);
transfers.enqueue(new Transfer(pos, dstVbo, dstOffset, size));
pos += size;
}
totalAvailable -= size;
}
/**
* Reserve space in this buffer for a transfer to another VBO.
* <br>
* You must ensure that your writes are complete before the next call to {@link #flush}.
* <br>
* This will generally be a more efficient way to transfer data as it avoids a copy, however,
* this method does not allow for non-contiguous writes, so you should fall back to
* {@link #enqueueCopy} if this returns {@link MemoryUtil#NULL}.
*
* @param size The size of the transfer you wish to make.
* @param dstVbo The VBO you wish to transfer to.
* @param dstOffset The offset in the destination VBO.
* @return A pointer to the reserved space, or {@link MemoryUtil#NULL} if there is not enough contiguous space.
*/
public long reserveForTransferTo(long size, int dstVbo, long dstOffset) {
// Don't need to check totalAvailable here because that's a looser constraint than the bytes remaining.
long remaining = capacity - pos;
if (size > remaining) {
return MemoryUtil.NULL;
}
long out = map + pos;
transfers.enqueue(new Transfer(pos, dstVbo, dstOffset, size));
pos += size;
totalAvailable -= size;
return out;
}
public void flush() {
if (transfers.isEmpty()) {
return;
}
if (pos < start) {
// we rolled around, need to flush 2 ranges.
GL45C.glFlushMappedNamedBufferRange(vbo, start, capacity - start);
GL45C.glFlushMappedNamedBufferRange(vbo, 0, pos);
} else {
GL45C.glFlushMappedNamedBufferRange(vbo, start, pos - start);
}
long usedCapacity = 0;
for (Transfer transfer : consolidateCopies(transfers)) {
usedCapacity += transfer.size;
GL45C.glCopyNamedBufferSubData(vbo, transfer.dstVbo, transfer.srcOffset, transfer.dstOffset, transfer.size);
}
fencedRegions.enqueue(new FencedRegion(new GlFence(), usedCapacity));
start = pos;
}
private static List<Transfer> consolidateCopies(PriorityQueue<Transfer> queue) {
List<Transfer> merged = new ArrayList<>();
Transfer last = null;
while (!queue.isEmpty()) {
Transfer transfer = queue.dequeue();
if (last != null) {
if (areContiguous(last, transfer)) {
last.size += transfer.size;
continue;
}
}
merged.add(last = new Transfer(transfer));
}
return merged;
}
private static boolean areContiguous(Transfer last, Transfer transfer) {
return last.dstVbo == transfer.dstVbo && last.dstOffset + last.size == transfer.dstOffset && last.srcOffset + last.size == transfer.srcOffset;
}
public void reclaim() {
while (!fencedRegions.isEmpty()) {
var region = fencedRegions.first();
if (!region.fence.isSignaled()) {
// We can't reclaim this region yet, and we know that all the regions after it are also not ready.
break;
}
fencedRegions.dequeue();
region.fence.delete();
totalAvailable += region.capacity;
}
}
public void delete() {
GL45C.glUnmapNamedBuffer(vbo);
GL45C.glDeleteBuffers(vbo);
overflow.delete();
FlwMemoryTracker._freeCPUMemory(capacity);
}
private static final class Transfer {
private final long srcOffset;
private final int dstVbo;
private final long dstOffset;
private long size;
private Transfer(long srcOffset, int dstVbo, long dstOffset, long size) {
this.srcOffset = srcOffset;
this.dstVbo = dstVbo;
this.dstOffset = dstOffset;
this.size = size;
}
public Transfer(Transfer other) {
this(other.srcOffset, other.dstVbo, other.dstOffset, other.size);
}
}
private record FencedRegion(GlFence fence, long capacity) {
}
private static class OverflowStagingBuffer {
private final int vbo;
public OverflowStagingBuffer() {
vbo = GL45C.glCreateBuffers();
}
public void enqueueCopy(long ptr, long size, int dstVbo, long dstOffset) {
GL45C.nglNamedBufferData(vbo, size, ptr, GL45C.GL_STREAM_COPY);
GL45C.glCopyNamedBufferSubData(vbo, dstVbo, 0, dstOffset, size);
}
public void delete() {
GL45C.glDeleteBuffers(vbo);
}
}
}

View file

@ -8,7 +8,7 @@ import org.lwjgl.system.MemoryUtil;
import com.jozufozu.flywheel.api.model.IndexSequence; import com.jozufozu.flywheel.api.model.IndexSequence;
import com.jozufozu.flywheel.gl.GlNumericType; import com.jozufozu.flywheel.gl.GlNumericType;
import com.jozufozu.flywheel.gl.buffer.GlBuffer; import com.jozufozu.flywheel.gl.buffer.Buffer;
import com.jozufozu.flywheel.gl.buffer.GlBufferUsage; import com.jozufozu.flywheel.gl.buffer.GlBufferUsage;
import com.jozufozu.flywheel.lib.memory.FlwMemoryTracker; import com.jozufozu.flywheel.lib.memory.FlwMemoryTracker;
import com.jozufozu.flywheel.lib.model.QuadIndexSequence; import com.jozufozu.flywheel.lib.model.QuadIndexSequence;
@ -59,12 +59,12 @@ public class EBOCache {
@NotNull @NotNull
private static Entry create(IndexSequence provider, int indexCount) { private static Entry create(IndexSequence provider, int indexCount) {
int byteSize = indexCount * GlNumericType.UINT.byteWidth(); int byteSize = indexCount * GlNumericType.UINT.byteWidth();
var ebo = GlBuffer.IMPL.create(); var ebo = Buffer.IMPL.create();
final long ptr = MemoryUtil.nmemAlloc(byteSize); final long ptr = MemoryUtil.nmemAlloc(byteSize);
provider.fill(ptr, indexCount); provider.fill(ptr, indexCount);
GlBuffer.IMPL.data(ebo, byteSize, ptr, GlBufferUsage.STATIC_DRAW.glEnum); Buffer.IMPL.data(ebo, byteSize, ptr, GlBufferUsage.STATIC_DRAW.glEnum);
FlwMemoryTracker._allocGPUMemory(byteSize); FlwMemoryTracker._allocGPUMemory(byteSize);
MemoryUtil.nmemFree(ptr); MemoryUtil.nmemFree(ptr);

View file

@ -39,8 +39,8 @@ public class InstancedDrawManager extends InstancerStorage<InstancedInstancer<?>
meshPool.flush(); meshPool.flush();
} }
public void invalidate() { public void delete() {
super.invalidate(); super.delete();
meshPool.delete(); meshPool.delete();

View file

@ -96,7 +96,7 @@ public class InstancingEngine extends AbstractEngine {
@Override @Override
public void delete() { public void delete() {
drawManager.invalidate(); drawManager.delete();
} }
private void render(InstancedDrawManager.DrawSet drawSet) { private void render(InstancedDrawManager.DrawSet drawSet) {

View file

@ -1,68 +1,34 @@
package com.jozufozu.flywheel.gl; package com.jozufozu.flywheel.gl;
import static org.lwjgl.opengl.GL32.GL_SIGNALED; import static org.lwjgl.opengl.GL32.GL_SIGNALED;
import static org.lwjgl.opengl.GL32.GL_SYNC_FLUSH_COMMANDS_BIT;
import static org.lwjgl.opengl.GL32.GL_SYNC_GPU_COMMANDS_COMPLETE; import static org.lwjgl.opengl.GL32.GL_SYNC_GPU_COMMANDS_COMPLETE;
import static org.lwjgl.opengl.GL32.GL_SYNC_STATUS; import static org.lwjgl.opengl.GL32.GL_SYNC_STATUS;
import static org.lwjgl.opengl.GL32.GL_TIMEOUT_IGNORED;
import static org.lwjgl.opengl.GL32.glClientWaitSync;
import static org.lwjgl.opengl.GL32.glDeleteSync; import static org.lwjgl.opengl.GL32.glDeleteSync;
import static org.lwjgl.opengl.GL32.glFenceSync; import static org.lwjgl.opengl.GL32.glFenceSync;
import static org.lwjgl.opengl.GL32.nglGetSynciv;
import org.lwjgl.opengl.GL32;
import org.lwjgl.system.MemoryStack; import org.lwjgl.system.MemoryStack;
import org.lwjgl.system.MemoryUtil; import org.lwjgl.system.MemoryUtil;
// https://github.com/CaffeineMC/sodium-fabric/blob/da17fc8d0cb1a4e82fe6956ac4f07a63d32eca5a/components/gfx-opengl/src/main/java/net/caffeinemc/gfx/opengl/sync/GlFence.java
public class GlFence { public class GlFence {
private final long fence;
private long fence; public GlFence() {
public void post() {
clear();
fence = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); fence = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
} }
public void clear() { public boolean isSignaled() {
if (fence != 0) {
glDeleteSync(fence);
fence = 0;
}
}
public boolean poll() {
if (fence != 0) {
poll0();
}
return fence == 0;
}
private void poll0() {
int result; int result;
try (var memoryStack = MemoryStack.stackPush()) { try (var memoryStack = MemoryStack.stackPush()) {
long checkPtr = memoryStack.ncalloc(Integer.BYTES, 0, Integer.BYTES); long checkPtr = memoryStack.ncalloc(Integer.BYTES, 0, Integer.BYTES);
GL32.nglGetSynciv(fence, GL_SYNC_STATUS, 1, MemoryUtil.NULL, checkPtr); nglGetSynciv(fence, GL_SYNC_STATUS, 1, MemoryUtil.NULL, checkPtr);
result = MemoryUtil.memGetInt(checkPtr); result = MemoryUtil.memGetInt(checkPtr);
} }
return result == GL_SIGNALED;
if (result == GL_SIGNALED) {
glDeleteSync(fence);
fence = 0;
}
} }
public void waitSync() { public void delete() {
if (poll()) {
return;
}
glClientWaitSync(fence, GL_SYNC_FLUSH_COMMANDS_BIT, GL_TIMEOUT_IGNORED);
glDeleteSync(fence); glDeleteSync(fence);
fence = 0;
} }
} }

View file

@ -9,6 +9,8 @@ import org.lwjgl.system.Checks;
import com.jozufozu.flywheel.gl.GlCompat; import com.jozufozu.flywheel.gl.GlCompat;
public interface Buffer { public interface Buffer {
Buffer IMPL = new DSA().fallback();
int create(); int create();
void data(int vbo, long size, long ptr, int glEnum); void data(int vbo, long size, long ptr, int glEnum);

View file

@ -10,7 +10,6 @@ import com.mojang.blaze3d.platform.GlStateManager;
import it.unimi.dsi.fastutil.longs.LongUnaryOperator; import it.unimi.dsi.fastutil.longs.LongUnaryOperator;
public class GlBuffer extends GlObject { public class GlBuffer extends GlObject {
public static final Buffer IMPL = new Buffer.DSA().fallback();
protected final GlBufferUsage usage; protected final GlBufferUsage usage;
/** /**
* The size (in bytes) of the buffer on the GPU. * The size (in bytes) of the buffer on the GPU.
@ -26,7 +25,7 @@ public class GlBuffer extends GlObject {
} }
public GlBuffer(GlBufferUsage usage) { public GlBuffer(GlBufferUsage usage) {
handle(IMPL.create()); handle(Buffer.IMPL.create());
this.usage = usage; this.usage = usage;
} }
@ -57,7 +56,7 @@ public class GlBuffer extends GlObject {
private void alloc(long capacity) { private void alloc(long capacity) {
increaseSize(capacity); increaseSize(capacity);
IMPL.data(handle(), size, MemoryUtil.NULL, usage.glEnum); Buffer.IMPL.data(handle(), size, MemoryUtil.NULL, usage.glEnum);
FlwMemoryTracker._allocGPUMemory(size); FlwMemoryTracker._allocGPUMemory(size);
} }
@ -67,9 +66,9 @@ public class GlBuffer extends GlObject {
increaseSize(capacity); increaseSize(capacity);
int oldHandle = handle(); int oldHandle = handle();
int newHandle = IMPL.create(); int newHandle = Buffer.IMPL.create();
IMPL.data(newHandle, size, MemoryUtil.NULL, usage.glEnum); Buffer.IMPL.data(newHandle, size, MemoryUtil.NULL, usage.glEnum);
IMPL.copyData(oldHandle, newHandle, 0, 0, oldSize); Buffer.IMPL.copyData(oldHandle, newHandle, 0, 0, oldSize);
GlStateManager._glDeleteBuffers(oldHandle); GlStateManager._glDeleteBuffers(oldHandle);
handle(newHandle); handle(newHandle);
@ -85,7 +84,7 @@ public class GlBuffer extends GlObject {
public void upload(MemoryBlock directBuffer) { public void upload(MemoryBlock directBuffer) {
FlwMemoryTracker._freeGPUMemory(size); FlwMemoryTracker._freeGPUMemory(size);
IMPL.data(handle(), directBuffer.size(), directBuffer.ptr(), usage.glEnum); Buffer.IMPL.data(handle(), directBuffer.size(), directBuffer.ptr(), usage.glEnum);
size = directBuffer.size(); size = directBuffer.size();
FlwMemoryTracker._allocGPUMemory(size); FlwMemoryTracker._allocGPUMemory(size);
} }

View file

@ -15,7 +15,7 @@ public class MappedBuffer implements AutoCloseable {
public MappedBuffer(int glBuffer, long size) { public MappedBuffer(int glBuffer, long size) {
this.glBuffer = glBuffer; this.glBuffer = glBuffer;
ptr = GlBuffer.IMPL.mapRange(glBuffer, 0, size, GL_MAP_WRITE_BIT); ptr = Buffer.IMPL.mapRange(glBuffer, 0, size, GL_MAP_WRITE_BIT);
if (ptr == MemoryUtil.NULL) { if (ptr == MemoryUtil.NULL) {
throw new GlException(GlError.poll(), "Could not map buffer"); throw new GlException(GlError.poll(), "Could not map buffer");
@ -32,7 +32,7 @@ public class MappedBuffer implements AutoCloseable {
return; return;
} }
GlBuffer.IMPL.unmap(glBuffer); Buffer.IMPL.unmap(glBuffer);
ptr = NULL; ptr = NULL;
} }
} }