From 12c7cdfda5e2af283b5928117a5609e60d25eff6 Mon Sep 17 00:00:00 2001 From: Jozufozu Date: Sat, 31 Aug 2024 17:57:49 -0500 Subject: [PATCH] Paging Dr. Instancer - Goal: avoid needing to re-upload everything when instance count for one instancer changes - Solution: store instances in pages of 32 - Allocate pages in a GPU arena - Store one uint per page to indicate which model the instances in the page belong to, and how many instances are actually stored in the page - Instancers eagerly allocate and free pages as their instance count changes - Instancers will not necessarily store instances contiguously anymore, but that's okay because any given cull workgroup will only reference a single page - Culling threads *will* write instances contiguously however, and so we still need to keep track of a base instance per instancer, and the target buffer logic does not change --- .../engine/{Arena.java => AbstractArena.java} | 34 ++-- .../backend/engine/AbstractInstancer.java | 12 +- .../flywheel/backend/engine/CpuArena.java | 30 ++++ .../flywheel/backend/engine/LightStorage.java | 4 +- .../engine/embed/EnvironmentStorage.java | 4 +- .../engine/indirect/IndirectBuffers.java | 20 +-- .../engine/indirect/IndirectCullingGroup.java | 19 +-- .../backend/engine/indirect/IndirectDraw.java | 8 +- .../engine/indirect/IndirectInstancer.java | 131 ++++++++------- .../engine/indirect/InstancePager.java | 158 ++++++++++++++++++ .../flywheel/internal/indirect/cull.glsl | 25 ++- 11 files changed, 326 insertions(+), 119 deletions(-) rename common/src/backend/java/dev/engine_room/flywheel/backend/engine/{Arena.java => AbstractArena.java} (55%) create mode 100644 common/src/backend/java/dev/engine_room/flywheel/backend/engine/CpuArena.java create mode 100644 common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/InstancePager.java diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/Arena.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/AbstractArena.java similarity index 55% rename from common/src/backend/java/dev/engine_room/flywheel/backend/engine/Arena.java rename to common/src/backend/java/dev/engine_room/flywheel/backend/engine/AbstractArena.java index e7aa67071..23d023006 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/Arena.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/AbstractArena.java @@ -1,23 +1,17 @@ package dev.engine_room.flywheel.backend.engine; -import dev.engine_room.flywheel.lib.memory.MemoryBlock; import it.unimi.dsi.fastutil.ints.IntArrayList; import it.unimi.dsi.fastutil.ints.IntList; -public class Arena { - private final long elementSizeBytes; - - private MemoryBlock memoryBlock; - - // Monotonic index, generally represents the size of the arena. - private int top = 0; +public abstract class AbstractArena { + protected final long elementSizeBytes; // List of free indices. private final IntList freeStack = new IntArrayList(); + // Monotonic index, generally represents the size of the arena. + private int top = 0; - public Arena(long elementSizeBytes, int initialCapacity) { + public AbstractArena(long elementSizeBytes) { this.elementSizeBytes = elementSizeBytes; - - memoryBlock = MemoryBlock.malloc(elementSizeBytes * initialCapacity); } public int alloc() { @@ -27,8 +21,8 @@ public class Arena { } // Make sure there's room to increment top. - if (top * elementSizeBytes >= memoryBlock.size()) { - memoryBlock = memoryBlock.realloc(memoryBlock.size() * 2); + if (top * elementSizeBytes >= byteCapacity()) { + resize(); } // Return the top index and increment. @@ -40,19 +34,15 @@ public class Arena { freeStack.add(i); } - public long indexToPointer(int i) { - return memoryBlock.ptr() + i * elementSizeBytes; - } - - public void delete() { - memoryBlock.free(); + public long byteOffsetOf(int i) { + return i * elementSizeBytes; } public int capacity() { return top; } - public long byteCapacity() { - return memoryBlock.size(); - } + public abstract long byteCapacity(); + + protected abstract void resize(); } diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/AbstractInstancer.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/AbstractInstancer.java index 73f8c1714..744bb12a0 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/AbstractInstancer.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/AbstractInstancer.java @@ -122,12 +122,12 @@ public abstract class AbstractInstancer implements Instancer if (writePos < newSize) { // Since we'll be shifting everything into this space we can consider it all changed. - changed.set(writePos, newSize); + setRangeChanged(writePos, newSize); } // We definitely shouldn't consider the deleted instances as changed though, // else we might try some out of bounds accesses later. - changed.clear(newSize, oldSize); + clearChangedRange(newSize, oldSize); // Punch out the deleted instances, shifting over surviving instances to fill their place. for (int scanPos = writePos; (scanPos < oldSize) && (writePos < newSize); scanPos++, writePos++) { @@ -155,6 +155,14 @@ public abstract class AbstractInstancer implements Instancer .clear(); } + protected void clearChangedRange(int start, int end) { + changed.clear(start, end); + } + + protected void setRangeChanged(int start, int end) { + changed.set(start, end); + } + /** * Clear all instances without freeing resources. */ diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/CpuArena.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/CpuArena.java new file mode 100644 index 000000000..c1c7843d7 --- /dev/null +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/CpuArena.java @@ -0,0 +1,30 @@ +package dev.engine_room.flywheel.backend.engine; + +import dev.engine_room.flywheel.lib.memory.MemoryBlock; + +public class CpuArena extends AbstractArena { + + private MemoryBlock memoryBlock; + + public CpuArena(long elementSizeBytes, int initialCapacity) { + super(elementSizeBytes); + + memoryBlock = MemoryBlock.malloc(elementSizeBytes * initialCapacity); + } + + public long indexToPointer(int i) { + return memoryBlock.ptr() + i * elementSizeBytes; + } + + public void delete() { + memoryBlock.free(); + } + + public long byteCapacity() { + return memoryBlock.size(); + } + + protected void resize() { + memoryBlock = memoryBlock.realloc(memoryBlock.size() * 2); + } +} diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/LightStorage.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/LightStorage.java index 214a56795..47e884a52 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/LightStorage.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/LightStorage.java @@ -46,7 +46,7 @@ public class LightStorage { private final LevelAccessor level; - private final Arena arena; + private final CpuArena arena; private final Long2IntMap section2ArenaIndex = new Long2IntOpenHashMap(); { section2ArenaIndex.defaultReturnValue(INVALID_SECTION); @@ -62,7 +62,7 @@ public class LightStorage { public LightStorage(LevelAccessor level) { this.level = level; - arena = new Arena(SECTION_SIZE_BYTES, DEFAULT_ARENA_CAPACITY_SECTIONS); + arena = new CpuArena(SECTION_SIZE_BYTES, DEFAULT_ARENA_CAPACITY_SECTIONS); } /** diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/embed/EnvironmentStorage.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/embed/EnvironmentStorage.java index 2b707a3b8..85ad55387 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/embed/EnvironmentStorage.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/embed/EnvironmentStorage.java @@ -1,6 +1,6 @@ package dev.engine_room.flywheel.backend.engine.embed; -import dev.engine_room.flywheel.backend.engine.Arena; +import dev.engine_room.flywheel.backend.engine.CpuArena; import it.unimi.dsi.fastutil.objects.ReferenceLinkedOpenHashSet; import it.unimi.dsi.fastutil.objects.ReferenceSet; @@ -13,7 +13,7 @@ public class EnvironmentStorage { // Note than the arena starts indexing at zero, but we reserve zero for the identity matrix. // Any time an ID from the arena is written we want to add one to it. - public final Arena arena = new Arena(MATRIX_SIZE_BYTES, 32); + public final CpuArena arena = new CpuArena(MATRIX_SIZE_BYTES, 32); { // Reserve the identity matrix. Burns a few bytes but oh well. diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectBuffers.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectBuffers.java index b0766e171..f82d32c10 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectBuffers.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectBuffers.java @@ -61,9 +61,9 @@ public class IndirectBuffers { */ private final MemoryBlock multiBindBlock; private final long instanceStride; - public final ResizableStorageArray instance; + + public final InstancePager pageFile; public final ResizableStorageArray target; - public final ResizableStorageArray modelIndex; public final ResizableStorageArray model; public final ResizableStorageArray draw; @@ -71,30 +71,27 @@ public class IndirectBuffers { this.instanceStride = instanceStride; this.multiBindBlock = MemoryBlock.calloc(BUFFERS_SIZE_BYTES, 1); - instance = new ResizableStorageArray(instanceStride, INSTANCE_GROWTH_FACTOR); + pageFile = new InstancePager(instanceStride); target = new ResizableStorageArray(INT_SIZE, INSTANCE_GROWTH_FACTOR); - modelIndex = new ResizableStorageArray(INT_SIZE, INSTANCE_GROWTH_FACTOR); model = new ResizableStorageArray(MODEL_STRIDE, MODEL_GROWTH_FACTOR); draw = new ResizableStorageArray(DRAW_COMMAND_STRIDE, DRAW_GROWTH_FACTOR); } void updateCounts(int instanceCount, int modelCount, int drawCount) { - instance.ensureCapacity(instanceCount); target.ensureCapacity(instanceCount); - modelIndex.ensureCapacity(instanceCount); model.ensureCapacity(modelCount); draw.ensureCapacity(drawCount); final long ptr = multiBindBlock.ptr(); - MemoryUtil.memPutInt(ptr + INSTANCE_HANDLE_OFFSET, instance.handle()); + MemoryUtil.memPutInt(ptr + INSTANCE_HANDLE_OFFSET, pageFile.storage.handle()); MemoryUtil.memPutInt(ptr + TARGET_HANDLE_OFFSET, target.handle()); - MemoryUtil.memPutInt(ptr + MODEL_INDEX_HANDLE_OFFSET, modelIndex.handle()); + MemoryUtil.memPutInt(ptr + MODEL_INDEX_HANDLE_OFFSET, pageFile.pageTable.handle()); MemoryUtil.memPutInt(ptr + MODEL_HANDLE_OFFSET, model.handle()); MemoryUtil.memPutInt(ptr + DRAW_HANDLE_OFFSET, draw.handle()); - MemoryUtil.memPutAddress(ptr + INSTANCE_SIZE_OFFSET, instanceStride * instanceCount); + MemoryUtil.memPutAddress(ptr + INSTANCE_SIZE_OFFSET, pageFile.storage.byteCapacity()); MemoryUtil.memPutAddress(ptr + TARGET_SIZE_OFFSET, INT_SIZE * instanceCount); - MemoryUtil.memPutAddress(ptr + MODEL_INDEX_SIZE_OFFSET, INT_SIZE * instanceCount); + MemoryUtil.memPutAddress(ptr + MODEL_INDEX_SIZE_OFFSET, pageFile.pageTable.byteCapacity()); MemoryUtil.memPutAddress(ptr + MODEL_SIZE_OFFSET, MODEL_STRIDE * modelCount); MemoryUtil.memPutAddress(ptr + DRAW_SIZE_OFFSET, DRAW_COMMAND_STRIDE * drawCount); } @@ -124,9 +121,8 @@ public class IndirectBuffers { public void delete() { multiBindBlock.free(); - instance.delete(); + pageFile.delete(); target.delete(); - modelIndex.delete(); model.delete(); draw.delete(); } diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectCullingGroup.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectCullingGroup.java index 9a376ab14..8a25d5df5 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectCullingGroup.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectCullingGroup.java @@ -74,8 +74,8 @@ public class IndirectCullingGroup { continue; } - instancer.modelIndex = modelIndex; - instancer.baseInstance = instanceCountThisFrame; + instancer.modelIndex(modelIndex); + instancer.baseInstance(instanceCountThisFrame); instanceCountThisFrame += instanceCount; modelIndex++; @@ -96,6 +96,8 @@ public class IndirectCullingGroup { // Upload only instances that have changed. uploadInstances(stagingBuffer); + buffers.pageFile.uploadTable(stagingBuffer); + // We need to upload the models every frame to reset the instance count. uploadModels(stagingBuffer); @@ -118,7 +120,7 @@ public class IndirectCullingGroup { cullProgram.bind(); buffers.bindForCompute(); - glDispatchCompute(GlCompat.getComputeGroupCount(instanceCountThisFrame), 1, 1); + glDispatchCompute(buffers.pageFile.capacity(), 1, 1); } public void dispatchApply() { @@ -171,7 +173,9 @@ public class IndirectCullingGroup { } public void add(IndirectInstancer instancer, InstancerKey key, MeshPool meshPool) { - instancer.modelIndex = instancers.size(); + instancer.pageFile = buffers.pageFile.createPage(); + instancer.modelIndex(instancers.size()); + instancers.add(instancer); List meshes = key.model() @@ -242,12 +246,7 @@ public class IndirectCullingGroup { private void uploadInstances(StagingBuffer stagingBuffer) { for (var instancer : instancers) { - instancer.uploadInstances(stagingBuffer, buffers.instance.handle()); - } - - for (var instancer : instancers) { - instancer.uploadModelIndices(stagingBuffer, buffers.modelIndex.handle()); - instancer.resetChanged(); + instancer.uploadInstances(stagingBuffer, buffers.pageFile.storage.handle()); } } diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectDraw.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectDraw.java index fb763d006..48517d1a2 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectDraw.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectDraw.java @@ -72,9 +72,9 @@ public class IndirectDraw { MemoryUtil.memPutInt(ptr + 4, 0); // instanceCount - to be set by the apply shader MemoryUtil.memPutInt(ptr + 8, mesh.firstIndex()); // firstIndex MemoryUtil.memPutInt(ptr + 12, mesh.baseVertex()); // baseVertex - MemoryUtil.memPutInt(ptr + 16, instancer.baseInstance); // baseInstance + MemoryUtil.memPutInt(ptr + 16, instancer.baseInstance()); // baseInstance - MemoryUtil.memPutInt(ptr + 20, instancer.modelIndex); // modelIndex + MemoryUtil.memPutInt(ptr + 20, instancer.modelIndex()); // modelIndex MemoryUtil.memPutInt(ptr + 24, instancer.environment.matrixIndex()); // matrixIndex @@ -89,9 +89,9 @@ public class IndirectDraw { MemoryUtil.memPutInt(ptr + 4, 1); // instanceCount - only drawing one instance MemoryUtil.memPutInt(ptr + 8, mesh.firstIndex()); // firstIndex MemoryUtil.memPutInt(ptr + 12, mesh.baseVertex()); // baseVertex - MemoryUtil.memPutInt(ptr + 16, instancer.baseInstance + instanceIndex); // baseInstance + MemoryUtil.memPutInt(ptr + 16, instancer.baseInstance() + instanceIndex); // baseInstance - MemoryUtil.memPutInt(ptr + 20, instancer.modelIndex); // modelIndex + MemoryUtil.memPutInt(ptr + 20, instancer.modelIndex()); // modelIndex MemoryUtil.memPutInt(ptr + 24, instancer.environment.matrixIndex()); // matrixIndex diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectInstancer.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectInstancer.java index 75dc2b8e3..541765870 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectInstancer.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectInstancer.java @@ -12,6 +12,7 @@ import dev.engine_room.flywheel.api.instance.InstanceWriter; import dev.engine_room.flywheel.api.model.Model; import dev.engine_room.flywheel.backend.engine.AbstractInstancer; import dev.engine_room.flywheel.backend.engine.embed.Environment; +import dev.engine_room.flywheel.backend.util.AtomicBitSet; import dev.engine_room.flywheel.lib.math.MoreMath; public class IndirectInstancer extends AbstractInstancer { @@ -20,11 +21,12 @@ public class IndirectInstancer extends AbstractInstancer private final List associatedDraws = new ArrayList<>(); private final Vector4fc boundingSphere; - public int modelIndex = -1; - public int baseInstance = -1; - private int lastModelIndex = -1; - private int lastBaseInstance = -1; - private int lastInstanceCount = -1; + private final AtomicBitSet changedPages = new AtomicBitSet(); + + public InstancePager.Allocation pageFile; + + private int modelIndex = -1; + private int baseInstance = -1; public IndirectInstancer(InstanceType type, Environment environment, Model model) { super(type, environment); @@ -34,6 +36,29 @@ public class IndirectInstancer extends AbstractInstancer boundingSphere = model.boundingSphere(); } + @Override + public void notifyDirty(int index) { + if (index < 0 || index >= instanceCount()) { + return; + } + changed.set(index); + changedPages.set(pageFile.object2Page(index)); + } + + @Override + protected void setRangeChanged(int start, int end) { + super.setRangeChanged(start, end); + + changedPages.set(pageFile.object2Page(start), pageFile.object2Page(end)); + } + + @Override + protected void clearChangedRange(int start, int end) { + super.clearChangedRange(start, end); + + // changedPages.clear(pageFile.object2Page(start), pageFile); + } + public void addDraw(IndirectDraw draw) { associatedDraws.add(draw); } @@ -44,6 +69,8 @@ public class IndirectInstancer extends AbstractInstancer public void update() { removeDeletedInstances(); + + pageFile.activeCount(instanceCount()); } public void writeModel(long ptr) { @@ -57,71 +84,38 @@ public class IndirectInstancer extends AbstractInstancer } public void uploadInstances(StagingBuffer stagingBuffer, int instanceVbo) { - long baseByte = baseInstance * instanceStride; + int numPages = pageFile.pageCount(); - if (baseInstance != lastBaseInstance) { - uploadAllInstances(stagingBuffer, baseByte, instanceVbo); - } else { - uploadChangedInstances(stagingBuffer, baseByte, instanceVbo); - } - } + var instanceCount = instances.size(); - public void uploadModelIndices(StagingBuffer stagingBuffer, int modelIndexVbo) { - long modelIndexBaseByte = baseInstance * IndirectBuffers.INT_SIZE; + for (int page = 0; page < numPages; page++) { + page = changedPages.nextSetBit(0); - if (baseInstance != lastBaseInstance || modelIndex != lastModelIndex || instances.size() > lastInstanceCount) { - uploadAllModelIndices(stagingBuffer, modelIndexBaseByte, modelIndexVbo); - } - } - - public void resetChanged() { - lastModelIndex = modelIndex; - lastBaseInstance = baseInstance; - lastInstanceCount = instances.size(); - changed.clear(); - } - - private void uploadChangedInstances(StagingBuffer stagingBuffer, long baseByte, int instanceVbo) { - changed.forEachSetSpan((startInclusive, endInclusive) -> { - // Generally we're good about ensuring we don't have changed bits set out of bounds, but check just in case - if (startInclusive >= instances.size()) { - return; + if (page == -1) { + break; } - int actualEnd = Math.min(endInclusive, instances.size() - 1); - int instanceCount = actualEnd - startInclusive + 1; - long totalSize = instanceCount * instanceStride; + int startObject = pageFile.page2Object(page); - stagingBuffer.enqueueCopy(totalSize, instanceVbo, baseByte + startInclusive * instanceStride, ptr -> { - for (int i = startInclusive; i <= actualEnd; i++) { - var instance = instances.get(i); - writer.write(ptr, instance); + if (startObject >= instanceCount) { + break; + } + + int endObject = Math.min(instanceCount, pageFile.page2Object(page + 1) - 1); + + long baseByte = pageFile.page2ByteOffset(page); + long size = (endObject - startObject) * instanceStride; + + stagingBuffer.enqueueCopy(size, instanceVbo, baseByte, ptr -> { + for (int i = startObject; i < endObject; i++) { + writer.write(ptr, instances.get(i)); ptr += instanceStride; } }); - }); - } + } - private void uploadAllInstances(StagingBuffer stagingBuffer, long baseByte, int instanceVbo) { - long totalSize = instances.size() * instanceStride; - - stagingBuffer.enqueueCopy(totalSize, instanceVbo, baseByte, ptr -> { - for (I instance : instances) { - writer.write(ptr, instance); - ptr += instanceStride; - } - }); - } - - private void uploadAllModelIndices(StagingBuffer stagingBuffer, long modelIndexBaseByte, int modelIndexVbo) { - long modelIndexTotalSize = instances.size() * IndirectBuffers.INT_SIZE; - - stagingBuffer.enqueueCopy(modelIndexTotalSize, modelIndexVbo, modelIndexBaseByte, ptr -> { - for (int i = 0; i < instances.size(); i++) { - MemoryUtil.memPutInt(ptr, modelIndex); - ptr += IndirectBuffers.INT_SIZE; - } - }); + changed.clear(); + changedPages.clear(); } @Override @@ -130,4 +124,21 @@ public class IndirectInstancer extends AbstractInstancer draw.delete(); } } + + public void modelIndex(int modelIndex) { + this.modelIndex = modelIndex; + pageFile.modelIndex(modelIndex); + } + + public int modelIndex() { + return modelIndex; + } + + public void baseInstance(int baseInstance) { + this.baseInstance = baseInstance; + } + + public int baseInstance() { + return baseInstance; + } } diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/InstancePager.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/InstancePager.java new file mode 100644 index 000000000..018a640a7 --- /dev/null +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/InstancePager.java @@ -0,0 +1,158 @@ +package dev.engine_room.flywheel.backend.engine.indirect; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +import org.jetbrains.annotations.UnknownNullability; +import org.lwjgl.system.MemoryUtil; + +import dev.engine_room.flywheel.backend.engine.AbstractArena; +import dev.engine_room.flywheel.lib.memory.MemoryBlock; + +public class InstancePager extends AbstractArena { + // 32 objects per page. Allows for convenient bitsets on the gpu. + public static final int DEFAULT_PAGE_SIZE_OBJECTS = 5; + public static final int INITIAL_PAGES_ALLOCATED = 4; + + private final int log2PageSize; + /** + * The number of objects in a page. + */ + private final int pageSize; + + private final long objectSizeBytes; + + @UnknownNullability + private MemoryBlock pageData; + + private final int pageMask; + public final ResizableStorageArray storage; + public final ResizableStorageArray pageTable; + + private final List allocations = new ArrayList<>(); + + public InstancePager(long objectSizeBytes) { + this(DEFAULT_PAGE_SIZE_OBJECTS, objectSizeBytes); + } + + public InstancePager(int log2PageSize, long objectSizeBytes) { + super((1L << log2PageSize) * objectSizeBytes); + this.log2PageSize = log2PageSize; + this.pageSize = 1 << log2PageSize; + this.pageMask = pageSize - 1; + this.objectSizeBytes = objectSizeBytes; + + this.storage = new ResizableStorageArray(this.elementSizeBytes); + this.pageTable = new ResizableStorageArray(Integer.BYTES); + } + + public Allocation createPage() { + var out = new Allocation(); + allocations.add(out); + return out; + } + + @Override + public long byteCapacity() { + return storage.byteCapacity(); + } + + @Override + protected void resize() { + if (pageData == null) { + pageData = MemoryBlock.malloc(INITIAL_PAGES_ALLOCATED * Integer.BYTES); + storage.ensureCapacity(INITIAL_PAGES_ALLOCATED); + pageTable.ensureCapacity(INITIAL_PAGES_ALLOCATED); + } else { + pageData = pageData.realloc(pageData.size() * 2); + storage.ensureCapacity(storage.capacity() * 2); + pageTable.ensureCapacity(pageTable.capacity() * 2); + } + } + + public void uploadTable(StagingBuffer stagingBuffer) { + for (Allocation allocation : allocations) { + allocation.updatePageTable(); + } + stagingBuffer.enqueueCopy(pageData.ptr(), pageData.size(), pageTable.handle(), 0); + } + + public void delete() { + storage.delete(); + pageTable.delete(); + pageData.free(); + } + + public class Allocation { + public int[] pages = new int[0]; + + private int modelIndex = -1; + + public void modelIndex(int modelIndex) { + if (this.modelIndex != modelIndex) { + this.modelIndex = modelIndex; + } + } + + private void updatePageTable() { + var ptr = pageData.ptr(); + + int fullPage = (modelIndex & 0x3FFFFF) | 0x8000000; + + for (int page : pages) { + MemoryUtil.memPutInt(ptr + page * Integer.BYTES, fullPage); + } + } + + public void activeCount(int objectCount) { + var neededPages = object2Page((objectCount + pageMask)); + + var oldLength = pages.length; + + if (oldLength > neededPages) { + shrink(oldLength, neededPages); + } else if (oldLength < neededPages) { + grow(neededPages, oldLength); + } + } + + private void grow(int neededPages, int oldLength) { + pages = Arrays.copyOf(pages, neededPages); + + for (int i = oldLength; i < neededPages; i++) { + pages[i] = InstancePager.this.alloc(); + } + } + + private void shrink(int oldLength, int neededPages) { + for (int i = oldLength - 1; i > neededPages; i--) { + var page = pages[i]; + InstancePager.this.free(page); + MemoryUtil.memPutInt(pageData.ptr() + page * Integer.BYTES, 0); + } + + pages = Arrays.copyOf(pages, neededPages); + } + + public int capacity() { + return pages.length << log2PageSize; + } + + public int pageCount() { + return pages.length; + } + + public int object2Page(int objectIndex) { + return objectIndex >> log2PageSize; + } + + public int page2Object(int pageIndex) { + return pageIndex << log2PageSize; + } + + public long page2ByteOffset(int page) { + return InstancePager.this.byteOffsetOf(pages[page]); + } + } +} diff --git a/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/cull.glsl b/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/cull.glsl index 65d5baae0..e45f4ec3d 100644 --- a/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/cull.glsl +++ b/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/cull.glsl @@ -4,14 +4,19 @@ #include "flywheel:util/matrix.glsl" #include "flywheel:internal/indirect/matrices.glsl" -layout(local_size_x = _FLW_SUBGROUP_SIZE) in; +layout(local_size_x = 32) in; layout(std430, binding = _FLW_TARGET_BUFFER_BINDING) restrict writeonly buffer TargetBuffer { uint _flw_instanceIndices[]; }; +// High 6 bits for the number of instances in the page. +const uint _FLW_PAGE_COUNT_OFFSET = 25u; +// Bottom 24 bits for the model index. +const uint _FLW_MODEL_INDEX_MASK = 0x3FFFFFF; + layout(std430, binding = _FLW_MODEL_INDEX_BUFFER_BINDING) restrict readonly buffer ModelIndexBuffer { - uint _flw_modelIndices[]; + uint _flw_pageTable[]; }; layout(std430, binding = _FLW_MODEL_BUFFER_BINDING) restrict buffer ModelBuffer { @@ -55,13 +60,23 @@ bool _flw_isVisible(uint instanceIndex, uint modelIndex) { } void main() { - uint instanceIndex = gl_GlobalInvocationID.x; + uint pageIndex = gl_WorkGroupID.x; - if (instanceIndex >= _flw_modelIndices.length()) { + if (pageIndex >= _flw_pageTable.length()) { return; } - uint modelIndex = _flw_modelIndices[instanceIndex]; + uint packedModelIndexAndCount = _flw_pageTable[pageIndex]; + + uint pageInstanceCount = packedModelIndexAndCount >> _FLW_PAGE_COUNT_OFFSET; + + if (gl_LocalInvocationID.x >= pageInstanceCount) { + return; + } + + uint instanceIndex = gl_GlobalInvocationID.x; + + uint modelIndex = packedModelIndexAndCount & _FLW_MODEL_INDEX_MASK; if (_flw_isVisible(instanceIndex, modelIndex)) { uint localIndex = atomicAdd(_flw_models[modelIndex].instanceCount, 1);