mirror of
https://github.com/Jozufozu/Flywheel.git
synced 2025-01-07 12:56:31 +01:00
OSHA Theatre
- Add staging buffer for indirect using unholy amounts of unsafe. - Make GlFence RAII. - Make all IndirectBuffers GPU only and use a shared staging buffer for transfers.
This commit is contained in:
parent
de0876c2ee
commit
c0d1e736e4
18 changed files with 389 additions and 154 deletions
|
@ -66,7 +66,7 @@ public abstract class InstancerStorage<N extends AbstractInstancer<?>> {
|
|||
}
|
||||
}
|
||||
|
||||
public void invalidate() {
|
||||
public void delete() {
|
||||
instancers.clear();
|
||||
uninitializedInstancers.clear();
|
||||
|
||||
|
|
|
@ -69,8 +69,8 @@ class BatchedDrawManager extends InstancerStorage<BatchedInstancer<?>> {
|
|||
}
|
||||
|
||||
@Override
|
||||
public void invalidate() {
|
||||
super.invalidate();
|
||||
public void delete() {
|
||||
super.delete();
|
||||
|
||||
meshPools.values()
|
||||
.forEach(BatchedMeshPool::delete);
|
||||
|
|
|
@ -58,6 +58,6 @@ public class BatchingEngine extends AbstractEngine {
|
|||
|
||||
@Override
|
||||
public void delete() {
|
||||
drawManager.invalidate();
|
||||
drawManager.delete();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -2,19 +2,12 @@ package com.jozufozu.flywheel.backend.engine.indirect;
|
|||
|
||||
import static org.lwjgl.opengl.GL15.glDeleteBuffers;
|
||||
import static org.lwjgl.opengl.GL15.nglDeleteBuffers;
|
||||
import static org.lwjgl.opengl.GL30.GL_MAP_FLUSH_EXPLICIT_BIT;
|
||||
import static org.lwjgl.opengl.GL30.GL_MAP_WRITE_BIT;
|
||||
import static org.lwjgl.opengl.GL43.GL_SHADER_STORAGE_BUFFER;
|
||||
import static org.lwjgl.opengl.GL44.GL_DYNAMIC_STORAGE_BIT;
|
||||
import static org.lwjgl.opengl.GL44.GL_MAP_PERSISTENT_BIT;
|
||||
import static org.lwjgl.opengl.GL44.nglBindBuffersRange;
|
||||
import static org.lwjgl.opengl.GL45.glCopyNamedBufferSubData;
|
||||
import static org.lwjgl.opengl.GL45.glCreateBuffers;
|
||||
import static org.lwjgl.opengl.GL45.glFlushMappedNamedBufferRange;
|
||||
import static org.lwjgl.opengl.GL45.glNamedBufferStorage;
|
||||
import static org.lwjgl.opengl.GL45.nglCreateBuffers;
|
||||
import static org.lwjgl.opengl.GL45.nglMapNamedBufferRange;
|
||||
import static org.lwjgl.opengl.GL45.nglNamedBufferSubData;
|
||||
|
||||
import org.lwjgl.system.MemoryUtil;
|
||||
import org.lwjgl.system.Pointer;
|
||||
|
@ -23,6 +16,7 @@ import com.jozufozu.flywheel.gl.buffer.GlBufferType;
|
|||
import com.jozufozu.flywheel.lib.memory.FlwMemoryTracker;
|
||||
import com.jozufozu.flywheel.lib.memory.MemoryBlock;
|
||||
|
||||
// TODO: better abstractions
|
||||
public class IndirectBuffers {
|
||||
// Number of vbos created.
|
||||
public static final int BUFFER_COUNT = 4;
|
||||
|
@ -36,12 +30,6 @@ public class IndirectBuffers {
|
|||
|
||||
public static final long MODEL_STRIDE = 24;
|
||||
|
||||
// BITS
|
||||
private static final int SUB_DATA_BITS = GL_DYNAMIC_STORAGE_BIT;
|
||||
private static final int PERSISTENT_BITS = GL_MAP_PERSISTENT_BIT | GL_MAP_WRITE_BIT;
|
||||
private static final int MAP_BITS = PERSISTENT_BITS | GL_MAP_FLUSH_EXPLICIT_BIT;
|
||||
private static final int GPU_ONLY_BITS = 0;
|
||||
|
||||
// Offsets to the vbos
|
||||
private static final long VBO_OFFSET = 0;
|
||||
private static final long OBJECT_OFFSET = VBO_OFFSET;
|
||||
|
@ -74,12 +62,11 @@ public class IndirectBuffers {
|
|||
*/
|
||||
private final MemoryBlock buffers;
|
||||
private final long objectStride;
|
||||
private int object;
|
||||
private int target;
|
||||
private int model;
|
||||
private int draw;
|
||||
public int object;
|
||||
public int target;
|
||||
public int model;
|
||||
public int draw;
|
||||
|
||||
long objectPtr;
|
||||
MemoryBlock modelPtr;
|
||||
MemoryBlock drawPtr;
|
||||
|
||||
|
@ -135,8 +122,8 @@ public class IndirectBuffers {
|
|||
int objectNew = MemoryUtil.memGetInt(ptr + OBJECT_OFFSET);
|
||||
int targetNew = MemoryUtil.memGetInt(ptr + TARGET_OFFSET);
|
||||
|
||||
glNamedBufferStorage(objectNew, objectSize, PERSISTENT_BITS);
|
||||
glNamedBufferStorage(targetNew, targetSize, GPU_ONLY_BITS);
|
||||
glNamedBufferStorage(objectNew, objectSize, 0);
|
||||
glNamedBufferStorage(targetNew, targetSize, 0);
|
||||
|
||||
glCopyNamedBufferSubData(object, objectNew, 0, 0, objectStride * maxObjectCount);
|
||||
glCopyNamedBufferSubData(target, targetNew, 0, 0, INT_SIZE * maxObjectCount);
|
||||
|
@ -147,11 +134,10 @@ public class IndirectBuffers {
|
|||
object = objectNew;
|
||||
target = targetNew;
|
||||
} else {
|
||||
glNamedBufferStorage(object, objectSize, PERSISTENT_BITS);
|
||||
glNamedBufferStorage(target, targetSize, GPU_ONLY_BITS);
|
||||
glNamedBufferStorage(object, objectSize, 0);
|
||||
glNamedBufferStorage(target, targetSize, 0);
|
||||
}
|
||||
|
||||
objectPtr = nglMapNamedBufferRange(object, 0, objectSize, MAP_BITS);
|
||||
maxObjectCount = objectCount;
|
||||
|
||||
FlwMemoryTracker._allocGPUMemory(maxObjectCount * objectStride);
|
||||
|
@ -164,7 +150,7 @@ public class IndirectBuffers {
|
|||
if (maxModelCount > 0) {
|
||||
int modelNew = glCreateBuffers();
|
||||
|
||||
glNamedBufferStorage(modelNew, modelSize, SUB_DATA_BITS);
|
||||
glNamedBufferStorage(modelNew, modelSize, 0);
|
||||
|
||||
glDeleteBuffers(model);
|
||||
|
||||
|
@ -172,7 +158,7 @@ public class IndirectBuffers {
|
|||
model = modelNew;
|
||||
modelPtr = modelPtr.realloc(modelSize);
|
||||
} else {
|
||||
glNamedBufferStorage(model, modelSize, SUB_DATA_BITS);
|
||||
glNamedBufferStorage(model, modelSize, 0);
|
||||
modelPtr = MemoryBlock.malloc(modelSize);
|
||||
}
|
||||
maxModelCount = modelCount;
|
||||
|
@ -186,7 +172,7 @@ public class IndirectBuffers {
|
|||
if (maxDrawCount > 0) {
|
||||
int drawNew = glCreateBuffers();
|
||||
|
||||
glNamedBufferStorage(drawNew, drawSize, SUB_DATA_BITS);
|
||||
glNamedBufferStorage(drawNew, drawSize, 0);
|
||||
|
||||
glDeleteBuffers(draw);
|
||||
|
||||
|
@ -194,7 +180,7 @@ public class IndirectBuffers {
|
|||
draw = drawNew;
|
||||
drawPtr = drawPtr.realloc(drawSize);
|
||||
} else {
|
||||
glNamedBufferStorage(draw, drawSize, SUB_DATA_BITS);
|
||||
glNamedBufferStorage(draw, drawSize, 0);
|
||||
drawPtr = MemoryBlock.malloc(drawSize);
|
||||
}
|
||||
maxDrawCount = drawCount;
|
||||
|
@ -227,18 +213,6 @@ public class IndirectBuffers {
|
|||
nglBindBuffersRange(GL_SHADER_STORAGE_BUFFER, 0, IndirectBuffers.BUFFER_COUNT, ptr, ptr + OFFSET_OFFSET, ptr + SIZE_OFFSET);
|
||||
}
|
||||
|
||||
void flushObjects(long length) {
|
||||
glFlushMappedNamedBufferRange(object, 0, length);
|
||||
}
|
||||
|
||||
void flushModels(long length) {
|
||||
nglNamedBufferSubData(model, 0, length, modelPtr.ptr());
|
||||
}
|
||||
|
||||
void flushDrawCommands(long length) {
|
||||
nglNamedBufferSubData(draw, 0, length, drawPtr.ptr());
|
||||
}
|
||||
|
||||
public void delete() {
|
||||
nglDeleteBuffers(BUFFER_COUNT, buffers.ptr());
|
||||
buffers.free();
|
||||
|
|
|
@ -15,6 +15,8 @@ import java.util.EnumMap;
|
|||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import org.lwjgl.system.MemoryUtil;
|
||||
|
||||
import com.jozufozu.flywheel.api.event.RenderStage;
|
||||
import com.jozufozu.flywheel.api.instance.Instance;
|
||||
import com.jozufozu.flywheel.api.instance.InstanceType;
|
||||
|
@ -27,6 +29,7 @@ import com.jozufozu.flywheel.backend.engine.UniformBuffer;
|
|||
import com.jozufozu.flywheel.gl.GlCompat;
|
||||
import com.jozufozu.flywheel.gl.shader.GlProgram;
|
||||
import com.jozufozu.flywheel.lib.context.Contexts;
|
||||
import com.jozufozu.flywheel.lib.memory.MemoryBlock;
|
||||
import com.jozufozu.flywheel.lib.model.ModelUtil;
|
||||
|
||||
public class IndirectCullingGroup<I extends Instance> {
|
||||
|
@ -99,7 +102,7 @@ public class IndirectCullingGroup<I extends Instance> {
|
|||
}
|
||||
}
|
||||
|
||||
public void flush() {
|
||||
public void flush(StagingBuffer stagingBuffer) {
|
||||
needsDrawBarrier = true;
|
||||
instanceCountThisFrame = calculateTotalInstanceCountAndPrepareBatches();
|
||||
|
||||
|
@ -114,10 +117,10 @@ public class IndirectCullingGroup<I extends Instance> {
|
|||
needsSortDraws = false;
|
||||
}
|
||||
|
||||
meshPool.flush();
|
||||
uploadInstances();
|
||||
uploadModels();
|
||||
uploadIndirectCommands();
|
||||
meshPool.flush(stagingBuffer);
|
||||
uploadInstances(stagingBuffer);
|
||||
uploadModels(stagingBuffer);
|
||||
uploadIndirectCommands(stagingBuffer);
|
||||
}
|
||||
|
||||
public void dispatchCull() {
|
||||
|
@ -173,35 +176,55 @@ public class IndirectCullingGroup<I extends Instance> {
|
|||
}
|
||||
}
|
||||
|
||||
private void uploadInstances() {
|
||||
long objectPtr = buffers.objectPtr;
|
||||
|
||||
private void uploadInstances(StagingBuffer stagingBuffer) {
|
||||
long pos = 0;
|
||||
for (IndirectModel batch : indirectModels) {
|
||||
var instanceCount = batch.instancer.getInstanceCount();
|
||||
batch.writeObjects(objectPtr);
|
||||
batch.writeObjects(stagingBuffer, pos, buffers.object);
|
||||
|
||||
objectPtr += instanceCount * objectStride;
|
||||
pos += instanceCount * objectStride;
|
||||
}
|
||||
|
||||
buffers.flushObjects(objectPtr - buffers.objectPtr);
|
||||
}
|
||||
|
||||
private void uploadModels() {
|
||||
long writePtr = buffers.modelPtr.ptr();
|
||||
private void uploadModels(StagingBuffer stagingBuffer) {
|
||||
var totalSize = indirectModels.size() * IndirectBuffers.MODEL_STRIDE;
|
||||
long writePtr = stagingBuffer.reserveForTransferTo(totalSize, buffers.model, 0);
|
||||
|
||||
if (writePtr == MemoryUtil.NULL) {
|
||||
var block = MemoryBlock.malloc(totalSize);
|
||||
writeModels(block.ptr());
|
||||
stagingBuffer.enqueueCopy(block.ptr(), totalSize, buffers.model, 0);
|
||||
block.free();
|
||||
} else {
|
||||
writeModels(writePtr);
|
||||
}
|
||||
}
|
||||
|
||||
private void writeModels(long writePtr) {
|
||||
for (var batch : indirectModels) {
|
||||
batch.writeModel(writePtr);
|
||||
writePtr += IndirectBuffers.MODEL_STRIDE;
|
||||
}
|
||||
buffers.flushModels(writePtr - buffers.modelPtr.ptr());
|
||||
}
|
||||
|
||||
private void uploadIndirectCommands() {
|
||||
long writePtr = buffers.drawPtr.ptr();
|
||||
private void uploadIndirectCommands(StagingBuffer stagingBuffer) {
|
||||
var totalSize = indirectDraws.size() * IndirectBuffers.DRAW_COMMAND_STRIDE;
|
||||
long writePtr = stagingBuffer.reserveForTransferTo(totalSize, buffers.draw, 0);
|
||||
if (writePtr == MemoryUtil.NULL) {
|
||||
var block = MemoryBlock.malloc(totalSize);
|
||||
writeCommands(block.ptr());
|
||||
stagingBuffer.enqueueCopy(block.ptr(), totalSize, buffers.draw, 0);
|
||||
block.free();
|
||||
} else {
|
||||
writeCommands(writePtr);
|
||||
}
|
||||
}
|
||||
|
||||
private void writeCommands(long writePtr) {
|
||||
for (var batch : indirectDraws) {
|
||||
batch.writeIndirectCommand(writePtr);
|
||||
writePtr += IndirectBuffers.DRAW_COMMAND_STRIDE;
|
||||
}
|
||||
buffers.flushDrawCommands(writePtr - buffers.drawPtr.ptr());
|
||||
}
|
||||
|
||||
private int calculateTotalInstanceCountAndPrepareBatches() {
|
||||
|
|
|
@ -11,7 +11,8 @@ import com.jozufozu.flywheel.backend.engine.InstancerKey;
|
|||
import com.jozufozu.flywheel.backend.engine.InstancerStorage;
|
||||
|
||||
public class IndirectDrawManager extends InstancerStorage<IndirectInstancer<?>> {
|
||||
public final Map<InstanceType<?>, IndirectCullingGroup<?>> renderLists = new HashMap<>();
|
||||
private final StagingBuffer stagingBuffer = new StagingBuffer();
|
||||
public final Map<InstanceType<?>, IndirectCullingGroup<?>> cullingGroups = new HashMap<>();
|
||||
|
||||
@Override
|
||||
protected <I extends Instance> IndirectInstancer<?> create(InstanceType<I> type) {
|
||||
|
@ -20,13 +21,13 @@ public class IndirectDrawManager extends InstancerStorage<IndirectInstancer<?>>
|
|||
|
||||
@Override
|
||||
protected <I extends Instance> void add(InstancerKey<I> key, IndirectInstancer<?> instancer, Model model, RenderStage stage) {
|
||||
var indirectList = (IndirectCullingGroup<I>) renderLists.computeIfAbsent(key.type(), IndirectCullingGroup::new);
|
||||
var indirectList = (IndirectCullingGroup<I>) cullingGroups.computeIfAbsent(key.type(), IndirectCullingGroup::new);
|
||||
|
||||
indirectList.add((IndirectInstancer<I>) instancer, stage, model);
|
||||
}
|
||||
|
||||
public boolean hasStage(RenderStage stage) {
|
||||
for (var list : renderLists.values()) {
|
||||
for (var list : cullingGroups.values()) {
|
||||
if (list.hasStage(stage)) {
|
||||
return true;
|
||||
}
|
||||
|
@ -38,25 +39,31 @@ public class IndirectDrawManager extends InstancerStorage<IndirectInstancer<?>>
|
|||
public void flush() {
|
||||
super.flush();
|
||||
|
||||
for (var group : renderLists.values()) {
|
||||
group.flush();
|
||||
stagingBuffer.reclaim();
|
||||
|
||||
for (var group : cullingGroups.values()) {
|
||||
group.flush(stagingBuffer);
|
||||
}
|
||||
|
||||
for (var group : renderLists.values()) {
|
||||
stagingBuffer.flush();
|
||||
|
||||
for (var group : cullingGroups.values()) {
|
||||
group.dispatchCull();
|
||||
}
|
||||
|
||||
for (var group : renderLists.values()) {
|
||||
for (var group : cullingGroups.values()) {
|
||||
group.dispatchApply();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void invalidate() {
|
||||
super.invalidate();
|
||||
public void delete() {
|
||||
super.delete();
|
||||
|
||||
renderLists.values()
|
||||
cullingGroups.values()
|
||||
.forEach(IndirectCullingGroup::delete);
|
||||
renderLists.clear();
|
||||
cullingGroups.clear();
|
||||
|
||||
stagingBuffer.delete();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -61,7 +61,7 @@ public class IndirectEngine extends AbstractEngine {
|
|||
GlTextureUnit.T2.makeActive();
|
||||
RenderSystem.bindTexture(RenderSystem.getShaderTexture(2));
|
||||
|
||||
for (var list : drawManager.renderLists.values()) {
|
||||
for (var list : drawManager.cullingGroups.values()) {
|
||||
list.submit(stage);
|
||||
}
|
||||
|
||||
|
@ -85,6 +85,6 @@ public class IndirectEngine extends AbstractEngine {
|
|||
|
||||
@Override
|
||||
public void delete() {
|
||||
drawManager.invalidate();
|
||||
drawManager.delete();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -6,46 +6,79 @@ import com.jozufozu.flywheel.api.instance.Instance;
|
|||
import com.jozufozu.flywheel.api.instance.InstanceType;
|
||||
import com.jozufozu.flywheel.api.instance.InstanceWriter;
|
||||
import com.jozufozu.flywheel.backend.engine.AbstractInstancer;
|
||||
import com.jozufozu.flywheel.lib.memory.MemoryBlock;
|
||||
|
||||
public class IndirectInstancer<I extends Instance> extends AbstractInstancer<I> {
|
||||
private final long instanceStride;
|
||||
private final long objectStride;
|
||||
private final InstanceWriter<I> writer;
|
||||
|
||||
public IndirectInstancer(InstanceType<I> type) {
|
||||
super(type);
|
||||
this.instanceStride = type.getLayout()
|
||||
.getStride();
|
||||
this.objectStride = instanceStride + IndirectBuffers.INT_SIZE;
|
||||
writer = this.type.getWriter();
|
||||
}
|
||||
|
||||
public void update() {
|
||||
removeDeletedInstances();
|
||||
}
|
||||
|
||||
public void writeSparse(long objectPtr, int batchID) {
|
||||
public void writeSparse(StagingBuffer stagingBuffer, long start, int modelID, int dstVbo) {
|
||||
int count = instances.size();
|
||||
InstanceWriter<I> writer = type.getWriter();
|
||||
// Backup buffer for when we can't write to the staging buffer.
|
||||
MemoryBlock backup = null;
|
||||
for (int i = changed.nextSetBit(0); i >= 0 && i < count; i = changed.nextSetBit(i + 1)) {
|
||||
long ptr = objectPtr + objectStride * i;
|
||||
// write batchID
|
||||
MemoryUtil.memPutInt(ptr, batchID);
|
||||
// write object
|
||||
writer.write(ptr + IndirectBuffers.INT_SIZE, instances.get(i));
|
||||
long ptr = stagingBuffer.reserveForTransferTo(objectStride, dstVbo, start + i * objectStride);
|
||||
if (ptr == MemoryUtil.NULL) {
|
||||
// Staging buffer can't fit this object, so we'll have to write it to a backup buffer.
|
||||
if (backup == null) {
|
||||
backup = MemoryBlock.malloc(objectStride);
|
||||
}
|
||||
writeOne(backup.ptr(), instances.get(i), modelID);
|
||||
|
||||
stagingBuffer.enqueueCopy(backup.ptr(), objectStride, dstVbo, start + i * objectStride);
|
||||
} else {
|
||||
writeOne(ptr, instances.get(i), modelID);
|
||||
}
|
||||
}
|
||||
changed.clear();
|
||||
|
||||
// Free the backup buffer if we allocated one.
|
||||
if (backup != null) {
|
||||
backup.free();
|
||||
}
|
||||
}
|
||||
|
||||
public void writeFull(StagingBuffer stagingBuffer, long start, int modelID, int dstVbo) {
|
||||
long totalSize = objectStride * instances.size();
|
||||
|
||||
long ptr = stagingBuffer.reserveForTransferTo(totalSize, dstVbo, start);
|
||||
|
||||
if (ptr != MemoryUtil.NULL) {
|
||||
writeAll(ptr, modelID);
|
||||
} else {
|
||||
var block = MemoryBlock.malloc(totalSize);
|
||||
writeAll(block.ptr(), modelID);
|
||||
stagingBuffer.enqueueCopy(block.ptr(), totalSize, dstVbo, start);
|
||||
block.free();
|
||||
}
|
||||
|
||||
changed.clear();
|
||||
}
|
||||
|
||||
public void writeFull(long objectPtr, int modelID) {
|
||||
InstanceWriter<I> writer = type.getWriter();
|
||||
for (I object : instances) {
|
||||
// write modelID
|
||||
MemoryUtil.memPutInt(objectPtr, modelID);
|
||||
objectPtr += IndirectBuffers.INT_SIZE;
|
||||
|
||||
// write object
|
||||
writer.write(objectPtr, object);
|
||||
objectPtr += instanceStride;
|
||||
private void writeAll(long ptr, int modelID) {
|
||||
for (I instance : instances) {
|
||||
writeOne(ptr, instance, modelID);
|
||||
ptr += objectStride;
|
||||
}
|
||||
changed.clear();
|
||||
}
|
||||
|
||||
private void writeOne(long ptr, I instance, int modelID) {
|
||||
// write modelID
|
||||
MemoryUtil.memPutInt(ptr, modelID);
|
||||
// write object
|
||||
writer.write(ptr + IndirectBuffers.INT_SIZE, instance);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -60,14 +60,15 @@ public class IndirectMeshPool {
|
|||
return meshes.get(mesh);
|
||||
}
|
||||
|
||||
public void flush() {
|
||||
public void flush(StagingBuffer stagingBuffer) {
|
||||
if (dirty) {
|
||||
uploadAll();
|
||||
// TODO: use the staging buffer and be smarter about allocation in general.
|
||||
uploadAll(stagingBuffer);
|
||||
dirty = false;
|
||||
}
|
||||
}
|
||||
|
||||
private void uploadAll() {
|
||||
private void uploadAll(StagingBuffer stagingBuffer) {
|
||||
long neededSize = 0;
|
||||
int maxQuadIndexCount = 0;
|
||||
int nonQuadIndexCount = 0;
|
||||
|
|
|
@ -35,11 +35,11 @@ public class IndirectModel {
|
|||
needsFullWrite = true;
|
||||
}
|
||||
|
||||
public void writeObjects(long objectPtr) {
|
||||
public void writeObjects(StagingBuffer stagingBuffer, long start, int dstVbo) {
|
||||
if (needsFullWrite) {
|
||||
instancer.writeFull(objectPtr, id);
|
||||
instancer.writeFull(stagingBuffer, start, id, dstVbo);
|
||||
} else {
|
||||
instancer.writeSparse(objectPtr, id);
|
||||
instancer.writeSparse(stagingBuffer, start, id, dstVbo);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,230 @@
|
|||
package com.jozufozu.flywheel.backend.engine.indirect;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import org.lwjgl.opengl.GL45C;
|
||||
import org.lwjgl.system.MemoryUtil;
|
||||
|
||||
import com.jozufozu.flywheel.gl.GlFence;
|
||||
import com.jozufozu.flywheel.lib.memory.FlwMemoryTracker;
|
||||
|
||||
import it.unimi.dsi.fastutil.PriorityQueue;
|
||||
import it.unimi.dsi.fastutil.objects.ObjectArrayFIFOQueue;
|
||||
|
||||
// https://github.com/CaffeineMC/sodium-fabric/blob/dev/src/main/java/me/jellysquid/mods/sodium/client/gl/arena/staging/MappedStagingBuffer.java
|
||||
public class StagingBuffer {
|
||||
private static final long DEFAULT_CAPACITY = 1024 * 1024 * 8;
|
||||
private static final int STORAGE_FLAGS = GL45C.GL_MAP_PERSISTENT_BIT | GL45C.GL_MAP_WRITE_BIT | GL45C.GL_CLIENT_STORAGE_BIT;
|
||||
private static final int MAP_FLAGS = GL45C.GL_MAP_PERSISTENT_BIT | GL45C.GL_MAP_WRITE_BIT | GL45C.GL_MAP_FLUSH_EXPLICIT_BIT | GL45C.GL_MAP_INVALIDATE_BUFFER_BIT;
|
||||
|
||||
private final int vbo;
|
||||
private final long map;
|
||||
private final long capacity;
|
||||
|
||||
private long start = 0;
|
||||
private long pos = 0;
|
||||
|
||||
private long totalAvailable;
|
||||
|
||||
private final OverflowStagingBuffer overflow = new OverflowStagingBuffer();
|
||||
private final PriorityQueue<Transfer> transfers = new ObjectArrayFIFOQueue<>();
|
||||
private final PriorityQueue<FencedRegion> fencedRegions = new ObjectArrayFIFOQueue<>();
|
||||
|
||||
public StagingBuffer() {
|
||||
this(DEFAULT_CAPACITY);
|
||||
}
|
||||
|
||||
public StagingBuffer(long capacity) {
|
||||
this.capacity = capacity;
|
||||
vbo = GL45C.glCreateBuffers();
|
||||
|
||||
GL45C.glNamedBufferStorage(vbo, capacity, STORAGE_FLAGS);
|
||||
map = GL45C.nglMapNamedBufferRange(vbo, 0, capacity, MAP_FLAGS);
|
||||
|
||||
totalAvailable = capacity;
|
||||
|
||||
FlwMemoryTracker._allocCPUMemory(capacity);
|
||||
}
|
||||
|
||||
/**
|
||||
* Enqueue a copy from the given pointer to the given VBO.
|
||||
*
|
||||
* @param ptr The pointer to copy from.
|
||||
* @param size The size of the copy.
|
||||
* @param dstVbo The VBO to copy to.
|
||||
* @param dstOffset The offset in the destination VBO.
|
||||
*/
|
||||
public void enqueueCopy(long ptr, long size, int dstVbo, long dstOffset) {
|
||||
if (size > totalAvailable) {
|
||||
overflow.enqueueCopy(ptr, size, dstVbo, dstOffset);
|
||||
return;
|
||||
}
|
||||
|
||||
long remaining = capacity - pos;
|
||||
|
||||
if (size > remaining) {
|
||||
long split = size - remaining;
|
||||
|
||||
// Put the first span at the tail of the buffer...
|
||||
MemoryUtil.memCopy(ptr, map + pos, remaining);
|
||||
transfers.enqueue(new Transfer(pos, dstVbo, dstOffset, remaining));
|
||||
|
||||
// ... and the rest at the head.
|
||||
MemoryUtil.memCopy(ptr + remaining, map, split);
|
||||
transfers.enqueue(new Transfer(0, dstVbo, dstOffset + remaining, split));
|
||||
|
||||
pos = split;
|
||||
} else {
|
||||
MemoryUtil.memCopy(ptr, map + pos, size);
|
||||
transfers.enqueue(new Transfer(pos, dstVbo, dstOffset, size));
|
||||
|
||||
pos += size;
|
||||
}
|
||||
|
||||
totalAvailable -= size;
|
||||
}
|
||||
|
||||
/**
|
||||
* Reserve space in this buffer for a transfer to another VBO.
|
||||
* <br>
|
||||
* You must ensure that your writes are complete before the next call to {@link #flush}.
|
||||
* <br>
|
||||
* This will generally be a more efficient way to transfer data as it avoids a copy, however,
|
||||
* this method does not allow for non-contiguous writes, so you should fall back to
|
||||
* {@link #enqueueCopy} if this returns {@link MemoryUtil#NULL}.
|
||||
*
|
||||
* @param size The size of the transfer you wish to make.
|
||||
* @param dstVbo The VBO you wish to transfer to.
|
||||
* @param dstOffset The offset in the destination VBO.
|
||||
* @return A pointer to the reserved space, or {@link MemoryUtil#NULL} if there is not enough contiguous space.
|
||||
*/
|
||||
public long reserveForTransferTo(long size, int dstVbo, long dstOffset) {
|
||||
// Don't need to check totalAvailable here because that's a looser constraint than the bytes remaining.
|
||||
long remaining = capacity - pos;
|
||||
if (size > remaining) {
|
||||
return MemoryUtil.NULL;
|
||||
}
|
||||
|
||||
long out = map + pos;
|
||||
|
||||
transfers.enqueue(new Transfer(pos, dstVbo, dstOffset, size));
|
||||
|
||||
pos += size;
|
||||
|
||||
totalAvailable -= size;
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
public void flush() {
|
||||
if (transfers.isEmpty()) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (pos < start) {
|
||||
// we rolled around, need to flush 2 ranges.
|
||||
GL45C.glFlushMappedNamedBufferRange(vbo, start, capacity - start);
|
||||
GL45C.glFlushMappedNamedBufferRange(vbo, 0, pos);
|
||||
} else {
|
||||
GL45C.glFlushMappedNamedBufferRange(vbo, start, pos - start);
|
||||
}
|
||||
|
||||
long usedCapacity = 0;
|
||||
|
||||
for (Transfer transfer : consolidateCopies(transfers)) {
|
||||
usedCapacity += transfer.size;
|
||||
|
||||
GL45C.glCopyNamedBufferSubData(vbo, transfer.dstVbo, transfer.srcOffset, transfer.dstOffset, transfer.size);
|
||||
}
|
||||
|
||||
fencedRegions.enqueue(new FencedRegion(new GlFence(), usedCapacity));
|
||||
|
||||
start = pos;
|
||||
}
|
||||
|
||||
private static List<Transfer> consolidateCopies(PriorityQueue<Transfer> queue) {
|
||||
List<Transfer> merged = new ArrayList<>();
|
||||
Transfer last = null;
|
||||
|
||||
while (!queue.isEmpty()) {
|
||||
Transfer transfer = queue.dequeue();
|
||||
|
||||
if (last != null) {
|
||||
if (areContiguous(last, transfer)) {
|
||||
last.size += transfer.size;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
merged.add(last = new Transfer(transfer));
|
||||
}
|
||||
|
||||
return merged;
|
||||
}
|
||||
|
||||
private static boolean areContiguous(Transfer last, Transfer transfer) {
|
||||
return last.dstVbo == transfer.dstVbo && last.dstOffset + last.size == transfer.dstOffset && last.srcOffset + last.size == transfer.srcOffset;
|
||||
}
|
||||
|
||||
public void reclaim() {
|
||||
while (!fencedRegions.isEmpty()) {
|
||||
var region = fencedRegions.first();
|
||||
if (!region.fence.isSignaled()) {
|
||||
// We can't reclaim this region yet, and we know that all the regions after it are also not ready.
|
||||
break;
|
||||
}
|
||||
fencedRegions.dequeue();
|
||||
|
||||
region.fence.delete();
|
||||
|
||||
totalAvailable += region.capacity;
|
||||
}
|
||||
}
|
||||
|
||||
public void delete() {
|
||||
GL45C.glUnmapNamedBuffer(vbo);
|
||||
GL45C.glDeleteBuffers(vbo);
|
||||
overflow.delete();
|
||||
|
||||
FlwMemoryTracker._freeCPUMemory(capacity);
|
||||
}
|
||||
|
||||
private static final class Transfer {
|
||||
private final long srcOffset;
|
||||
private final int dstVbo;
|
||||
private final long dstOffset;
|
||||
private long size;
|
||||
|
||||
private Transfer(long srcOffset, int dstVbo, long dstOffset, long size) {
|
||||
this.srcOffset = srcOffset;
|
||||
this.dstVbo = dstVbo;
|
||||
this.dstOffset = dstOffset;
|
||||
this.size = size;
|
||||
}
|
||||
|
||||
public Transfer(Transfer other) {
|
||||
this(other.srcOffset, other.dstVbo, other.dstOffset, other.size);
|
||||
}
|
||||
}
|
||||
|
||||
private record FencedRegion(GlFence fence, long capacity) {
|
||||
}
|
||||
|
||||
private static class OverflowStagingBuffer {
|
||||
private final int vbo;
|
||||
|
||||
public OverflowStagingBuffer() {
|
||||
vbo = GL45C.glCreateBuffers();
|
||||
}
|
||||
|
||||
public void enqueueCopy(long ptr, long size, int dstVbo, long dstOffset) {
|
||||
GL45C.nglNamedBufferData(vbo, size, ptr, GL45C.GL_STREAM_COPY);
|
||||
GL45C.glCopyNamedBufferSubData(vbo, dstVbo, 0, dstOffset, size);
|
||||
}
|
||||
|
||||
public void delete() {
|
||||
GL45C.glDeleteBuffers(vbo);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -8,7 +8,7 @@ import org.lwjgl.system.MemoryUtil;
|
|||
|
||||
import com.jozufozu.flywheel.api.model.IndexSequence;
|
||||
import com.jozufozu.flywheel.gl.GlNumericType;
|
||||
import com.jozufozu.flywheel.gl.buffer.GlBuffer;
|
||||
import com.jozufozu.flywheel.gl.buffer.Buffer;
|
||||
import com.jozufozu.flywheel.gl.buffer.GlBufferUsage;
|
||||
import com.jozufozu.flywheel.lib.memory.FlwMemoryTracker;
|
||||
import com.jozufozu.flywheel.lib.model.QuadIndexSequence;
|
||||
|
@ -59,12 +59,12 @@ public class EBOCache {
|
|||
@NotNull
|
||||
private static Entry create(IndexSequence provider, int indexCount) {
|
||||
int byteSize = indexCount * GlNumericType.UINT.byteWidth();
|
||||
var ebo = GlBuffer.IMPL.create();
|
||||
var ebo = Buffer.IMPL.create();
|
||||
|
||||
final long ptr = MemoryUtil.nmemAlloc(byteSize);
|
||||
provider.fill(ptr, indexCount);
|
||||
|
||||
GlBuffer.IMPL.data(ebo, byteSize, ptr, GlBufferUsage.STATIC_DRAW.glEnum);
|
||||
Buffer.IMPL.data(ebo, byteSize, ptr, GlBufferUsage.STATIC_DRAW.glEnum);
|
||||
FlwMemoryTracker._allocGPUMemory(byteSize);
|
||||
|
||||
MemoryUtil.nmemFree(ptr);
|
||||
|
|
|
@ -39,8 +39,8 @@ public class InstancedDrawManager extends InstancerStorage<InstancedInstancer<?>
|
|||
meshPool.flush();
|
||||
}
|
||||
|
||||
public void invalidate() {
|
||||
super.invalidate();
|
||||
public void delete() {
|
||||
super.delete();
|
||||
|
||||
meshPool.delete();
|
||||
|
||||
|
|
|
@ -96,7 +96,7 @@ public class InstancingEngine extends AbstractEngine {
|
|||
|
||||
@Override
|
||||
public void delete() {
|
||||
drawManager.invalidate();
|
||||
drawManager.delete();
|
||||
}
|
||||
|
||||
private void render(InstancedDrawManager.DrawSet drawSet) {
|
||||
|
|
|
@ -1,68 +1,34 @@
|
|||
package com.jozufozu.flywheel.gl;
|
||||
|
||||
import static org.lwjgl.opengl.GL32.GL_SIGNALED;
|
||||
import static org.lwjgl.opengl.GL32.GL_SYNC_FLUSH_COMMANDS_BIT;
|
||||
import static org.lwjgl.opengl.GL32.GL_SYNC_GPU_COMMANDS_COMPLETE;
|
||||
import static org.lwjgl.opengl.GL32.GL_SYNC_STATUS;
|
||||
import static org.lwjgl.opengl.GL32.GL_TIMEOUT_IGNORED;
|
||||
import static org.lwjgl.opengl.GL32.glClientWaitSync;
|
||||
import static org.lwjgl.opengl.GL32.glDeleteSync;
|
||||
import static org.lwjgl.opengl.GL32.glFenceSync;
|
||||
import static org.lwjgl.opengl.GL32.nglGetSynciv;
|
||||
|
||||
import org.lwjgl.opengl.GL32;
|
||||
import org.lwjgl.system.MemoryStack;
|
||||
import org.lwjgl.system.MemoryUtil;
|
||||
|
||||
// https://github.com/CaffeineMC/sodium-fabric/blob/da17fc8d0cb1a4e82fe6956ac4f07a63d32eca5a/components/gfx-opengl/src/main/java/net/caffeinemc/gfx/opengl/sync/GlFence.java
|
||||
public class GlFence {
|
||||
private final long fence;
|
||||
|
||||
private long fence;
|
||||
|
||||
public void post() {
|
||||
clear();
|
||||
|
||||
public GlFence() {
|
||||
fence = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
|
||||
}
|
||||
|
||||
public void clear() {
|
||||
if (fence != 0) {
|
||||
glDeleteSync(fence);
|
||||
fence = 0;
|
||||
}
|
||||
}
|
||||
|
||||
public boolean poll() {
|
||||
if (fence != 0) {
|
||||
poll0();
|
||||
}
|
||||
|
||||
return fence == 0;
|
||||
}
|
||||
|
||||
private void poll0() {
|
||||
public boolean isSignaled() {
|
||||
int result;
|
||||
try (var memoryStack = MemoryStack.stackPush()) {
|
||||
long checkPtr = memoryStack.ncalloc(Integer.BYTES, 0, Integer.BYTES);
|
||||
GL32.nglGetSynciv(fence, GL_SYNC_STATUS, 1, MemoryUtil.NULL, checkPtr);
|
||||
nglGetSynciv(fence, GL_SYNC_STATUS, 1, MemoryUtil.NULL, checkPtr);
|
||||
|
||||
result = MemoryUtil.memGetInt(checkPtr);
|
||||
}
|
||||
|
||||
if (result == GL_SIGNALED) {
|
||||
glDeleteSync(fence);
|
||||
fence = 0;
|
||||
}
|
||||
return result == GL_SIGNALED;
|
||||
}
|
||||
|
||||
public void waitSync() {
|
||||
if (poll()) {
|
||||
return;
|
||||
}
|
||||
|
||||
glClientWaitSync(fence, GL_SYNC_FLUSH_COMMANDS_BIT, GL_TIMEOUT_IGNORED);
|
||||
|
||||
public void delete() {
|
||||
glDeleteSync(fence);
|
||||
|
||||
fence = 0;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -9,6 +9,8 @@ import org.lwjgl.system.Checks;
|
|||
import com.jozufozu.flywheel.gl.GlCompat;
|
||||
|
||||
public interface Buffer {
|
||||
Buffer IMPL = new DSA().fallback();
|
||||
|
||||
int create();
|
||||
|
||||
void data(int vbo, long size, long ptr, int glEnum);
|
||||
|
|
|
@ -10,7 +10,6 @@ import com.mojang.blaze3d.platform.GlStateManager;
|
|||
import it.unimi.dsi.fastutil.longs.LongUnaryOperator;
|
||||
|
||||
public class GlBuffer extends GlObject {
|
||||
public static final Buffer IMPL = new Buffer.DSA().fallback();
|
||||
protected final GlBufferUsage usage;
|
||||
/**
|
||||
* The size (in bytes) of the buffer on the GPU.
|
||||
|
@ -26,7 +25,7 @@ public class GlBuffer extends GlObject {
|
|||
}
|
||||
|
||||
public GlBuffer(GlBufferUsage usage) {
|
||||
handle(IMPL.create());
|
||||
handle(Buffer.IMPL.create());
|
||||
this.usage = usage;
|
||||
}
|
||||
|
||||
|
@ -57,7 +56,7 @@ public class GlBuffer extends GlObject {
|
|||
|
||||
private void alloc(long capacity) {
|
||||
increaseSize(capacity);
|
||||
IMPL.data(handle(), size, MemoryUtil.NULL, usage.glEnum);
|
||||
Buffer.IMPL.data(handle(), size, MemoryUtil.NULL, usage.glEnum);
|
||||
FlwMemoryTracker._allocGPUMemory(size);
|
||||
}
|
||||
|
||||
|
@ -67,9 +66,9 @@ public class GlBuffer extends GlObject {
|
|||
increaseSize(capacity);
|
||||
|
||||
int oldHandle = handle();
|
||||
int newHandle = IMPL.create();
|
||||
IMPL.data(newHandle, size, MemoryUtil.NULL, usage.glEnum);
|
||||
IMPL.copyData(oldHandle, newHandle, 0, 0, oldSize);
|
||||
int newHandle = Buffer.IMPL.create();
|
||||
Buffer.IMPL.data(newHandle, size, MemoryUtil.NULL, usage.glEnum);
|
||||
Buffer.IMPL.copyData(oldHandle, newHandle, 0, 0, oldSize);
|
||||
GlStateManager._glDeleteBuffers(oldHandle);
|
||||
handle(newHandle);
|
||||
|
||||
|
@ -85,7 +84,7 @@ public class GlBuffer extends GlObject {
|
|||
|
||||
public void upload(MemoryBlock directBuffer) {
|
||||
FlwMemoryTracker._freeGPUMemory(size);
|
||||
IMPL.data(handle(), directBuffer.size(), directBuffer.ptr(), usage.glEnum);
|
||||
Buffer.IMPL.data(handle(), directBuffer.size(), directBuffer.ptr(), usage.glEnum);
|
||||
size = directBuffer.size();
|
||||
FlwMemoryTracker._allocGPUMemory(size);
|
||||
}
|
||||
|
|
|
@ -15,7 +15,7 @@ public class MappedBuffer implements AutoCloseable {
|
|||
public MappedBuffer(int glBuffer, long size) {
|
||||
this.glBuffer = glBuffer;
|
||||
|
||||
ptr = GlBuffer.IMPL.mapRange(glBuffer, 0, size, GL_MAP_WRITE_BIT);
|
||||
ptr = Buffer.IMPL.mapRange(glBuffer, 0, size, GL_MAP_WRITE_BIT);
|
||||
|
||||
if (ptr == MemoryUtil.NULL) {
|
||||
throw new GlException(GlError.poll(), "Could not map buffer");
|
||||
|
@ -32,7 +32,7 @@ public class MappedBuffer implements AutoCloseable {
|
|||
return;
|
||||
}
|
||||
|
||||
GlBuffer.IMPL.unmap(glBuffer);
|
||||
Buffer.IMPL.unmap(glBuffer);
|
||||
ptr = NULL;
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue