From 12c7cdfda5e2af283b5928117a5609e60d25eff6 Mon Sep 17 00:00:00 2001 From: Jozufozu Date: Sat, 31 Aug 2024 17:57:49 -0500 Subject: [PATCH 01/17] Paging Dr. Instancer - Goal: avoid needing to re-upload everything when instance count for one instancer changes - Solution: store instances in pages of 32 - Allocate pages in a GPU arena - Store one uint per page to indicate which model the instances in the page belong to, and how many instances are actually stored in the page - Instancers eagerly allocate and free pages as their instance count changes - Instancers will not necessarily store instances contiguously anymore, but that's okay because any given cull workgroup will only reference a single page - Culling threads *will* write instances contiguously however, and so we still need to keep track of a base instance per instancer, and the target buffer logic does not change --- .../engine/{Arena.java => AbstractArena.java} | 34 ++-- .../backend/engine/AbstractInstancer.java | 12 +- .../flywheel/backend/engine/CpuArena.java | 30 ++++ .../flywheel/backend/engine/LightStorage.java | 4 +- .../engine/embed/EnvironmentStorage.java | 4 +- .../engine/indirect/IndirectBuffers.java | 20 +-- .../engine/indirect/IndirectCullingGroup.java | 19 +-- .../backend/engine/indirect/IndirectDraw.java | 8 +- .../engine/indirect/IndirectInstancer.java | 131 ++++++++------- .../engine/indirect/InstancePager.java | 158 ++++++++++++++++++ .../flywheel/internal/indirect/cull.glsl | 25 ++- 11 files changed, 326 insertions(+), 119 deletions(-) rename common/src/backend/java/dev/engine_room/flywheel/backend/engine/{Arena.java => AbstractArena.java} (55%) create mode 100644 common/src/backend/java/dev/engine_room/flywheel/backend/engine/CpuArena.java create mode 100644 common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/InstancePager.java diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/Arena.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/AbstractArena.java similarity index 55% rename from common/src/backend/java/dev/engine_room/flywheel/backend/engine/Arena.java rename to common/src/backend/java/dev/engine_room/flywheel/backend/engine/AbstractArena.java index e7aa67071..23d023006 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/Arena.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/AbstractArena.java @@ -1,23 +1,17 @@ package dev.engine_room.flywheel.backend.engine; -import dev.engine_room.flywheel.lib.memory.MemoryBlock; import it.unimi.dsi.fastutil.ints.IntArrayList; import it.unimi.dsi.fastutil.ints.IntList; -public class Arena { - private final long elementSizeBytes; - - private MemoryBlock memoryBlock; - - // Monotonic index, generally represents the size of the arena. - private int top = 0; +public abstract class AbstractArena { + protected final long elementSizeBytes; // List of free indices. private final IntList freeStack = new IntArrayList(); + // Monotonic index, generally represents the size of the arena. + private int top = 0; - public Arena(long elementSizeBytes, int initialCapacity) { + public AbstractArena(long elementSizeBytes) { this.elementSizeBytes = elementSizeBytes; - - memoryBlock = MemoryBlock.malloc(elementSizeBytes * initialCapacity); } public int alloc() { @@ -27,8 +21,8 @@ public class Arena { } // Make sure there's room to increment top. - if (top * elementSizeBytes >= memoryBlock.size()) { - memoryBlock = memoryBlock.realloc(memoryBlock.size() * 2); + if (top * elementSizeBytes >= byteCapacity()) { + resize(); } // Return the top index and increment. @@ -40,19 +34,15 @@ public class Arena { freeStack.add(i); } - public long indexToPointer(int i) { - return memoryBlock.ptr() + i * elementSizeBytes; - } - - public void delete() { - memoryBlock.free(); + public long byteOffsetOf(int i) { + return i * elementSizeBytes; } public int capacity() { return top; } - public long byteCapacity() { - return memoryBlock.size(); - } + public abstract long byteCapacity(); + + protected abstract void resize(); } diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/AbstractInstancer.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/AbstractInstancer.java index 73f8c1714..744bb12a0 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/AbstractInstancer.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/AbstractInstancer.java @@ -122,12 +122,12 @@ public abstract class AbstractInstancer implements Instancer if (writePos < newSize) { // Since we'll be shifting everything into this space we can consider it all changed. - changed.set(writePos, newSize); + setRangeChanged(writePos, newSize); } // We definitely shouldn't consider the deleted instances as changed though, // else we might try some out of bounds accesses later. - changed.clear(newSize, oldSize); + clearChangedRange(newSize, oldSize); // Punch out the deleted instances, shifting over surviving instances to fill their place. for (int scanPos = writePos; (scanPos < oldSize) && (writePos < newSize); scanPos++, writePos++) { @@ -155,6 +155,14 @@ public abstract class AbstractInstancer implements Instancer .clear(); } + protected void clearChangedRange(int start, int end) { + changed.clear(start, end); + } + + protected void setRangeChanged(int start, int end) { + changed.set(start, end); + } + /** * Clear all instances without freeing resources. */ diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/CpuArena.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/CpuArena.java new file mode 100644 index 000000000..c1c7843d7 --- /dev/null +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/CpuArena.java @@ -0,0 +1,30 @@ +package dev.engine_room.flywheel.backend.engine; + +import dev.engine_room.flywheel.lib.memory.MemoryBlock; + +public class CpuArena extends AbstractArena { + + private MemoryBlock memoryBlock; + + public CpuArena(long elementSizeBytes, int initialCapacity) { + super(elementSizeBytes); + + memoryBlock = MemoryBlock.malloc(elementSizeBytes * initialCapacity); + } + + public long indexToPointer(int i) { + return memoryBlock.ptr() + i * elementSizeBytes; + } + + public void delete() { + memoryBlock.free(); + } + + public long byteCapacity() { + return memoryBlock.size(); + } + + protected void resize() { + memoryBlock = memoryBlock.realloc(memoryBlock.size() * 2); + } +} diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/LightStorage.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/LightStorage.java index 214a56795..47e884a52 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/LightStorage.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/LightStorage.java @@ -46,7 +46,7 @@ public class LightStorage { private final LevelAccessor level; - private final Arena arena; + private final CpuArena arena; private final Long2IntMap section2ArenaIndex = new Long2IntOpenHashMap(); { section2ArenaIndex.defaultReturnValue(INVALID_SECTION); @@ -62,7 +62,7 @@ public class LightStorage { public LightStorage(LevelAccessor level) { this.level = level; - arena = new Arena(SECTION_SIZE_BYTES, DEFAULT_ARENA_CAPACITY_SECTIONS); + arena = new CpuArena(SECTION_SIZE_BYTES, DEFAULT_ARENA_CAPACITY_SECTIONS); } /** diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/embed/EnvironmentStorage.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/embed/EnvironmentStorage.java index 2b707a3b8..85ad55387 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/embed/EnvironmentStorage.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/embed/EnvironmentStorage.java @@ -1,6 +1,6 @@ package dev.engine_room.flywheel.backend.engine.embed; -import dev.engine_room.flywheel.backend.engine.Arena; +import dev.engine_room.flywheel.backend.engine.CpuArena; import it.unimi.dsi.fastutil.objects.ReferenceLinkedOpenHashSet; import it.unimi.dsi.fastutil.objects.ReferenceSet; @@ -13,7 +13,7 @@ public class EnvironmentStorage { // Note than the arena starts indexing at zero, but we reserve zero for the identity matrix. // Any time an ID from the arena is written we want to add one to it. - public final Arena arena = new Arena(MATRIX_SIZE_BYTES, 32); + public final CpuArena arena = new CpuArena(MATRIX_SIZE_BYTES, 32); { // Reserve the identity matrix. Burns a few bytes but oh well. diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectBuffers.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectBuffers.java index b0766e171..f82d32c10 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectBuffers.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectBuffers.java @@ -61,9 +61,9 @@ public class IndirectBuffers { */ private final MemoryBlock multiBindBlock; private final long instanceStride; - public final ResizableStorageArray instance; + + public final InstancePager pageFile; public final ResizableStorageArray target; - public final ResizableStorageArray modelIndex; public final ResizableStorageArray model; public final ResizableStorageArray draw; @@ -71,30 +71,27 @@ public class IndirectBuffers { this.instanceStride = instanceStride; this.multiBindBlock = MemoryBlock.calloc(BUFFERS_SIZE_BYTES, 1); - instance = new ResizableStorageArray(instanceStride, INSTANCE_GROWTH_FACTOR); + pageFile = new InstancePager(instanceStride); target = new ResizableStorageArray(INT_SIZE, INSTANCE_GROWTH_FACTOR); - modelIndex = new ResizableStorageArray(INT_SIZE, INSTANCE_GROWTH_FACTOR); model = new ResizableStorageArray(MODEL_STRIDE, MODEL_GROWTH_FACTOR); draw = new ResizableStorageArray(DRAW_COMMAND_STRIDE, DRAW_GROWTH_FACTOR); } void updateCounts(int instanceCount, int modelCount, int drawCount) { - instance.ensureCapacity(instanceCount); target.ensureCapacity(instanceCount); - modelIndex.ensureCapacity(instanceCount); model.ensureCapacity(modelCount); draw.ensureCapacity(drawCount); final long ptr = multiBindBlock.ptr(); - MemoryUtil.memPutInt(ptr + INSTANCE_HANDLE_OFFSET, instance.handle()); + MemoryUtil.memPutInt(ptr + INSTANCE_HANDLE_OFFSET, pageFile.storage.handle()); MemoryUtil.memPutInt(ptr + TARGET_HANDLE_OFFSET, target.handle()); - MemoryUtil.memPutInt(ptr + MODEL_INDEX_HANDLE_OFFSET, modelIndex.handle()); + MemoryUtil.memPutInt(ptr + MODEL_INDEX_HANDLE_OFFSET, pageFile.pageTable.handle()); MemoryUtil.memPutInt(ptr + MODEL_HANDLE_OFFSET, model.handle()); MemoryUtil.memPutInt(ptr + DRAW_HANDLE_OFFSET, draw.handle()); - MemoryUtil.memPutAddress(ptr + INSTANCE_SIZE_OFFSET, instanceStride * instanceCount); + MemoryUtil.memPutAddress(ptr + INSTANCE_SIZE_OFFSET, pageFile.storage.byteCapacity()); MemoryUtil.memPutAddress(ptr + TARGET_SIZE_OFFSET, INT_SIZE * instanceCount); - MemoryUtil.memPutAddress(ptr + MODEL_INDEX_SIZE_OFFSET, INT_SIZE * instanceCount); + MemoryUtil.memPutAddress(ptr + MODEL_INDEX_SIZE_OFFSET, pageFile.pageTable.byteCapacity()); MemoryUtil.memPutAddress(ptr + MODEL_SIZE_OFFSET, MODEL_STRIDE * modelCount); MemoryUtil.memPutAddress(ptr + DRAW_SIZE_OFFSET, DRAW_COMMAND_STRIDE * drawCount); } @@ -124,9 +121,8 @@ public class IndirectBuffers { public void delete() { multiBindBlock.free(); - instance.delete(); + pageFile.delete(); target.delete(); - modelIndex.delete(); model.delete(); draw.delete(); } diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectCullingGroup.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectCullingGroup.java index 9a376ab14..8a25d5df5 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectCullingGroup.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectCullingGroup.java @@ -74,8 +74,8 @@ public class IndirectCullingGroup { continue; } - instancer.modelIndex = modelIndex; - instancer.baseInstance = instanceCountThisFrame; + instancer.modelIndex(modelIndex); + instancer.baseInstance(instanceCountThisFrame); instanceCountThisFrame += instanceCount; modelIndex++; @@ -96,6 +96,8 @@ public class IndirectCullingGroup { // Upload only instances that have changed. uploadInstances(stagingBuffer); + buffers.pageFile.uploadTable(stagingBuffer); + // We need to upload the models every frame to reset the instance count. uploadModels(stagingBuffer); @@ -118,7 +120,7 @@ public class IndirectCullingGroup { cullProgram.bind(); buffers.bindForCompute(); - glDispatchCompute(GlCompat.getComputeGroupCount(instanceCountThisFrame), 1, 1); + glDispatchCompute(buffers.pageFile.capacity(), 1, 1); } public void dispatchApply() { @@ -171,7 +173,9 @@ public class IndirectCullingGroup { } public void add(IndirectInstancer instancer, InstancerKey key, MeshPool meshPool) { - instancer.modelIndex = instancers.size(); + instancer.pageFile = buffers.pageFile.createPage(); + instancer.modelIndex(instancers.size()); + instancers.add(instancer); List meshes = key.model() @@ -242,12 +246,7 @@ public class IndirectCullingGroup { private void uploadInstances(StagingBuffer stagingBuffer) { for (var instancer : instancers) { - instancer.uploadInstances(stagingBuffer, buffers.instance.handle()); - } - - for (var instancer : instancers) { - instancer.uploadModelIndices(stagingBuffer, buffers.modelIndex.handle()); - instancer.resetChanged(); + instancer.uploadInstances(stagingBuffer, buffers.pageFile.storage.handle()); } } diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectDraw.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectDraw.java index fb763d006..48517d1a2 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectDraw.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectDraw.java @@ -72,9 +72,9 @@ public class IndirectDraw { MemoryUtil.memPutInt(ptr + 4, 0); // instanceCount - to be set by the apply shader MemoryUtil.memPutInt(ptr + 8, mesh.firstIndex()); // firstIndex MemoryUtil.memPutInt(ptr + 12, mesh.baseVertex()); // baseVertex - MemoryUtil.memPutInt(ptr + 16, instancer.baseInstance); // baseInstance + MemoryUtil.memPutInt(ptr + 16, instancer.baseInstance()); // baseInstance - MemoryUtil.memPutInt(ptr + 20, instancer.modelIndex); // modelIndex + MemoryUtil.memPutInt(ptr + 20, instancer.modelIndex()); // modelIndex MemoryUtil.memPutInt(ptr + 24, instancer.environment.matrixIndex()); // matrixIndex @@ -89,9 +89,9 @@ public class IndirectDraw { MemoryUtil.memPutInt(ptr + 4, 1); // instanceCount - only drawing one instance MemoryUtil.memPutInt(ptr + 8, mesh.firstIndex()); // firstIndex MemoryUtil.memPutInt(ptr + 12, mesh.baseVertex()); // baseVertex - MemoryUtil.memPutInt(ptr + 16, instancer.baseInstance + instanceIndex); // baseInstance + MemoryUtil.memPutInt(ptr + 16, instancer.baseInstance() + instanceIndex); // baseInstance - MemoryUtil.memPutInt(ptr + 20, instancer.modelIndex); // modelIndex + MemoryUtil.memPutInt(ptr + 20, instancer.modelIndex()); // modelIndex MemoryUtil.memPutInt(ptr + 24, instancer.environment.matrixIndex()); // matrixIndex diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectInstancer.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectInstancer.java index 75dc2b8e3..541765870 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectInstancer.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectInstancer.java @@ -12,6 +12,7 @@ import dev.engine_room.flywheel.api.instance.InstanceWriter; import dev.engine_room.flywheel.api.model.Model; import dev.engine_room.flywheel.backend.engine.AbstractInstancer; import dev.engine_room.flywheel.backend.engine.embed.Environment; +import dev.engine_room.flywheel.backend.util.AtomicBitSet; import dev.engine_room.flywheel.lib.math.MoreMath; public class IndirectInstancer extends AbstractInstancer { @@ -20,11 +21,12 @@ public class IndirectInstancer extends AbstractInstancer private final List associatedDraws = new ArrayList<>(); private final Vector4fc boundingSphere; - public int modelIndex = -1; - public int baseInstance = -1; - private int lastModelIndex = -1; - private int lastBaseInstance = -1; - private int lastInstanceCount = -1; + private final AtomicBitSet changedPages = new AtomicBitSet(); + + public InstancePager.Allocation pageFile; + + private int modelIndex = -1; + private int baseInstance = -1; public IndirectInstancer(InstanceType type, Environment environment, Model model) { super(type, environment); @@ -34,6 +36,29 @@ public class IndirectInstancer extends AbstractInstancer boundingSphere = model.boundingSphere(); } + @Override + public void notifyDirty(int index) { + if (index < 0 || index >= instanceCount()) { + return; + } + changed.set(index); + changedPages.set(pageFile.object2Page(index)); + } + + @Override + protected void setRangeChanged(int start, int end) { + super.setRangeChanged(start, end); + + changedPages.set(pageFile.object2Page(start), pageFile.object2Page(end)); + } + + @Override + protected void clearChangedRange(int start, int end) { + super.clearChangedRange(start, end); + + // changedPages.clear(pageFile.object2Page(start), pageFile); + } + public void addDraw(IndirectDraw draw) { associatedDraws.add(draw); } @@ -44,6 +69,8 @@ public class IndirectInstancer extends AbstractInstancer public void update() { removeDeletedInstances(); + + pageFile.activeCount(instanceCount()); } public void writeModel(long ptr) { @@ -57,71 +84,38 @@ public class IndirectInstancer extends AbstractInstancer } public void uploadInstances(StagingBuffer stagingBuffer, int instanceVbo) { - long baseByte = baseInstance * instanceStride; + int numPages = pageFile.pageCount(); - if (baseInstance != lastBaseInstance) { - uploadAllInstances(stagingBuffer, baseByte, instanceVbo); - } else { - uploadChangedInstances(stagingBuffer, baseByte, instanceVbo); - } - } + var instanceCount = instances.size(); - public void uploadModelIndices(StagingBuffer stagingBuffer, int modelIndexVbo) { - long modelIndexBaseByte = baseInstance * IndirectBuffers.INT_SIZE; + for (int page = 0; page < numPages; page++) { + page = changedPages.nextSetBit(0); - if (baseInstance != lastBaseInstance || modelIndex != lastModelIndex || instances.size() > lastInstanceCount) { - uploadAllModelIndices(stagingBuffer, modelIndexBaseByte, modelIndexVbo); - } - } - - public void resetChanged() { - lastModelIndex = modelIndex; - lastBaseInstance = baseInstance; - lastInstanceCount = instances.size(); - changed.clear(); - } - - private void uploadChangedInstances(StagingBuffer stagingBuffer, long baseByte, int instanceVbo) { - changed.forEachSetSpan((startInclusive, endInclusive) -> { - // Generally we're good about ensuring we don't have changed bits set out of bounds, but check just in case - if (startInclusive >= instances.size()) { - return; + if (page == -1) { + break; } - int actualEnd = Math.min(endInclusive, instances.size() - 1); - int instanceCount = actualEnd - startInclusive + 1; - long totalSize = instanceCount * instanceStride; + int startObject = pageFile.page2Object(page); - stagingBuffer.enqueueCopy(totalSize, instanceVbo, baseByte + startInclusive * instanceStride, ptr -> { - for (int i = startInclusive; i <= actualEnd; i++) { - var instance = instances.get(i); - writer.write(ptr, instance); + if (startObject >= instanceCount) { + break; + } + + int endObject = Math.min(instanceCount, pageFile.page2Object(page + 1) - 1); + + long baseByte = pageFile.page2ByteOffset(page); + long size = (endObject - startObject) * instanceStride; + + stagingBuffer.enqueueCopy(size, instanceVbo, baseByte, ptr -> { + for (int i = startObject; i < endObject; i++) { + writer.write(ptr, instances.get(i)); ptr += instanceStride; } }); - }); - } + } - private void uploadAllInstances(StagingBuffer stagingBuffer, long baseByte, int instanceVbo) { - long totalSize = instances.size() * instanceStride; - - stagingBuffer.enqueueCopy(totalSize, instanceVbo, baseByte, ptr -> { - for (I instance : instances) { - writer.write(ptr, instance); - ptr += instanceStride; - } - }); - } - - private void uploadAllModelIndices(StagingBuffer stagingBuffer, long modelIndexBaseByte, int modelIndexVbo) { - long modelIndexTotalSize = instances.size() * IndirectBuffers.INT_SIZE; - - stagingBuffer.enqueueCopy(modelIndexTotalSize, modelIndexVbo, modelIndexBaseByte, ptr -> { - for (int i = 0; i < instances.size(); i++) { - MemoryUtil.memPutInt(ptr, modelIndex); - ptr += IndirectBuffers.INT_SIZE; - } - }); + changed.clear(); + changedPages.clear(); } @Override @@ -130,4 +124,21 @@ public class IndirectInstancer extends AbstractInstancer draw.delete(); } } + + public void modelIndex(int modelIndex) { + this.modelIndex = modelIndex; + pageFile.modelIndex(modelIndex); + } + + public int modelIndex() { + return modelIndex; + } + + public void baseInstance(int baseInstance) { + this.baseInstance = baseInstance; + } + + public int baseInstance() { + return baseInstance; + } } diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/InstancePager.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/InstancePager.java new file mode 100644 index 000000000..018a640a7 --- /dev/null +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/InstancePager.java @@ -0,0 +1,158 @@ +package dev.engine_room.flywheel.backend.engine.indirect; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +import org.jetbrains.annotations.UnknownNullability; +import org.lwjgl.system.MemoryUtil; + +import dev.engine_room.flywheel.backend.engine.AbstractArena; +import dev.engine_room.flywheel.lib.memory.MemoryBlock; + +public class InstancePager extends AbstractArena { + // 32 objects per page. Allows for convenient bitsets on the gpu. + public static final int DEFAULT_PAGE_SIZE_OBJECTS = 5; + public static final int INITIAL_PAGES_ALLOCATED = 4; + + private final int log2PageSize; + /** + * The number of objects in a page. + */ + private final int pageSize; + + private final long objectSizeBytes; + + @UnknownNullability + private MemoryBlock pageData; + + private final int pageMask; + public final ResizableStorageArray storage; + public final ResizableStorageArray pageTable; + + private final List allocations = new ArrayList<>(); + + public InstancePager(long objectSizeBytes) { + this(DEFAULT_PAGE_SIZE_OBJECTS, objectSizeBytes); + } + + public InstancePager(int log2PageSize, long objectSizeBytes) { + super((1L << log2PageSize) * objectSizeBytes); + this.log2PageSize = log2PageSize; + this.pageSize = 1 << log2PageSize; + this.pageMask = pageSize - 1; + this.objectSizeBytes = objectSizeBytes; + + this.storage = new ResizableStorageArray(this.elementSizeBytes); + this.pageTable = new ResizableStorageArray(Integer.BYTES); + } + + public Allocation createPage() { + var out = new Allocation(); + allocations.add(out); + return out; + } + + @Override + public long byteCapacity() { + return storage.byteCapacity(); + } + + @Override + protected void resize() { + if (pageData == null) { + pageData = MemoryBlock.malloc(INITIAL_PAGES_ALLOCATED * Integer.BYTES); + storage.ensureCapacity(INITIAL_PAGES_ALLOCATED); + pageTable.ensureCapacity(INITIAL_PAGES_ALLOCATED); + } else { + pageData = pageData.realloc(pageData.size() * 2); + storage.ensureCapacity(storage.capacity() * 2); + pageTable.ensureCapacity(pageTable.capacity() * 2); + } + } + + public void uploadTable(StagingBuffer stagingBuffer) { + for (Allocation allocation : allocations) { + allocation.updatePageTable(); + } + stagingBuffer.enqueueCopy(pageData.ptr(), pageData.size(), pageTable.handle(), 0); + } + + public void delete() { + storage.delete(); + pageTable.delete(); + pageData.free(); + } + + public class Allocation { + public int[] pages = new int[0]; + + private int modelIndex = -1; + + public void modelIndex(int modelIndex) { + if (this.modelIndex != modelIndex) { + this.modelIndex = modelIndex; + } + } + + private void updatePageTable() { + var ptr = pageData.ptr(); + + int fullPage = (modelIndex & 0x3FFFFF) | 0x8000000; + + for (int page : pages) { + MemoryUtil.memPutInt(ptr + page * Integer.BYTES, fullPage); + } + } + + public void activeCount(int objectCount) { + var neededPages = object2Page((objectCount + pageMask)); + + var oldLength = pages.length; + + if (oldLength > neededPages) { + shrink(oldLength, neededPages); + } else if (oldLength < neededPages) { + grow(neededPages, oldLength); + } + } + + private void grow(int neededPages, int oldLength) { + pages = Arrays.copyOf(pages, neededPages); + + for (int i = oldLength; i < neededPages; i++) { + pages[i] = InstancePager.this.alloc(); + } + } + + private void shrink(int oldLength, int neededPages) { + for (int i = oldLength - 1; i > neededPages; i--) { + var page = pages[i]; + InstancePager.this.free(page); + MemoryUtil.memPutInt(pageData.ptr() + page * Integer.BYTES, 0); + } + + pages = Arrays.copyOf(pages, neededPages); + } + + public int capacity() { + return pages.length << log2PageSize; + } + + public int pageCount() { + return pages.length; + } + + public int object2Page(int objectIndex) { + return objectIndex >> log2PageSize; + } + + public int page2Object(int pageIndex) { + return pageIndex << log2PageSize; + } + + public long page2ByteOffset(int page) { + return InstancePager.this.byteOffsetOf(pages[page]); + } + } +} diff --git a/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/cull.glsl b/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/cull.glsl index 65d5baae0..e45f4ec3d 100644 --- a/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/cull.glsl +++ b/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/cull.glsl @@ -4,14 +4,19 @@ #include "flywheel:util/matrix.glsl" #include "flywheel:internal/indirect/matrices.glsl" -layout(local_size_x = _FLW_SUBGROUP_SIZE) in; +layout(local_size_x = 32) in; layout(std430, binding = _FLW_TARGET_BUFFER_BINDING) restrict writeonly buffer TargetBuffer { uint _flw_instanceIndices[]; }; +// High 6 bits for the number of instances in the page. +const uint _FLW_PAGE_COUNT_OFFSET = 25u; +// Bottom 24 bits for the model index. +const uint _FLW_MODEL_INDEX_MASK = 0x3FFFFFF; + layout(std430, binding = _FLW_MODEL_INDEX_BUFFER_BINDING) restrict readonly buffer ModelIndexBuffer { - uint _flw_modelIndices[]; + uint _flw_pageTable[]; }; layout(std430, binding = _FLW_MODEL_BUFFER_BINDING) restrict buffer ModelBuffer { @@ -55,13 +60,23 @@ bool _flw_isVisible(uint instanceIndex, uint modelIndex) { } void main() { - uint instanceIndex = gl_GlobalInvocationID.x; + uint pageIndex = gl_WorkGroupID.x; - if (instanceIndex >= _flw_modelIndices.length()) { + if (pageIndex >= _flw_pageTable.length()) { return; } - uint modelIndex = _flw_modelIndices[instanceIndex]; + uint packedModelIndexAndCount = _flw_pageTable[pageIndex]; + + uint pageInstanceCount = packedModelIndexAndCount >> _FLW_PAGE_COUNT_OFFSET; + + if (gl_LocalInvocationID.x >= pageInstanceCount) { + return; + } + + uint instanceIndex = gl_GlobalInvocationID.x; + + uint modelIndex = packedModelIndexAndCount & _FLW_MODEL_INDEX_MASK; if (_flw_isVisible(instanceIndex, modelIndex)) { uint localIndex = atomicAdd(_flw_models[modelIndex].instanceCount, 1); From 1138208e3159746096013789e29afe4647ee31c9 Mon Sep 17 00:00:00 2001 From: Jozufozu Date: Sun, 1 Sep 2024 12:34:38 -0500 Subject: [PATCH 02/17] Growing pains - Fix bit logic on the GPU - Manually manage the size of the storage and pageTable buffers - Make object2Page and page2Object static - Fix instance writing loop - Fix page table always having full pages - Fix allocations not shrinking --- .../backend/engine/AbstractArena.java | 4 +- .../flywheel/backend/engine/CpuArena.java | 2 +- .../engine/indirect/IndirectBuffers.java | 4 +- .../engine/indirect/IndirectInstancer.java | 16 ++-- .../engine/indirect/InstancePager.java | 89 +++++++++---------- .../flywheel/internal/indirect/cull.glsl | 4 +- 6 files changed, 55 insertions(+), 64 deletions(-) diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/AbstractArena.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/AbstractArena.java index 23d023006..2493a339b 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/AbstractArena.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/AbstractArena.java @@ -22,7 +22,7 @@ public abstract class AbstractArena { // Make sure there's room to increment top. if (top * elementSizeBytes >= byteCapacity()) { - resize(); + grow(); } // Return the top index and increment. @@ -44,5 +44,5 @@ public abstract class AbstractArena { public abstract long byteCapacity(); - protected abstract void resize(); + protected abstract void grow(); } diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/CpuArena.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/CpuArena.java index c1c7843d7..33dfa3812 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/CpuArena.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/CpuArena.java @@ -24,7 +24,7 @@ public class CpuArena extends AbstractArena { return memoryBlock.size(); } - protected void resize() { + protected void grow() { memoryBlock = memoryBlock.realloc(memoryBlock.size() * 2); } } diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectBuffers.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectBuffers.java index f82d32c10..1ce962eec 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectBuffers.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectBuffers.java @@ -89,9 +89,9 @@ public class IndirectBuffers { MemoryUtil.memPutInt(ptr + MODEL_HANDLE_OFFSET, model.handle()); MemoryUtil.memPutInt(ptr + DRAW_HANDLE_OFFSET, draw.handle()); - MemoryUtil.memPutAddress(ptr + INSTANCE_SIZE_OFFSET, pageFile.storage.byteCapacity()); + MemoryUtil.memPutAddress(ptr + INSTANCE_SIZE_OFFSET, pageFile.storage.capacity()); MemoryUtil.memPutAddress(ptr + TARGET_SIZE_OFFSET, INT_SIZE * instanceCount); - MemoryUtil.memPutAddress(ptr + MODEL_INDEX_SIZE_OFFSET, pageFile.pageTable.byteCapacity()); + MemoryUtil.memPutAddress(ptr + MODEL_INDEX_SIZE_OFFSET, pageFile.pageTable.capacity()); MemoryUtil.memPutAddress(ptr + MODEL_SIZE_OFFSET, MODEL_STRIDE * modelCount); MemoryUtil.memPutAddress(ptr + DRAW_SIZE_OFFSET, DRAW_COMMAND_STRIDE * drawCount); } diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectInstancer.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectInstancer.java index 541765870..bbd78d88e 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectInstancer.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectInstancer.java @@ -42,14 +42,14 @@ public class IndirectInstancer extends AbstractInstancer return; } changed.set(index); - changedPages.set(pageFile.object2Page(index)); + changedPages.set(InstancePager.object2Page(index)); } @Override protected void setRangeChanged(int start, int end) { super.setRangeChanged(start, end); - changedPages.set(pageFile.object2Page(start), pageFile.object2Page(end)); + changedPages.set(InstancePager.object2Page(start), InstancePager.object2Page(end) + 1); } @Override @@ -88,20 +88,14 @@ public class IndirectInstancer extends AbstractInstancer var instanceCount = instances.size(); - for (int page = 0; page < numPages; page++) { - page = changedPages.nextSetBit(0); - - if (page == -1) { - break; - } - - int startObject = pageFile.page2Object(page); + for (int page = changedPages.nextSetBit(0); page >= 0 && page < numPages; page = changedPages.nextSetBit(page + 1)) { + int startObject = InstancePager.page2Object(page); if (startObject >= instanceCount) { break; } - int endObject = Math.min(instanceCount, pageFile.page2Object(page + 1) - 1); + int endObject = Math.min(instanceCount, InstancePager.page2Object(page + 1)); long baseByte = pageFile.page2ByteOffset(page); long size = (endObject - startObject) * instanceStride; diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/InstancePager.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/InstancePager.java index 018a640a7..9c746be3d 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/InstancePager.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/InstancePager.java @@ -4,7 +4,6 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.List; -import org.jetbrains.annotations.UnknownNullability; import org.lwjgl.system.MemoryUtil; import dev.engine_room.flywheel.backend.engine.AbstractArena; @@ -12,39 +11,39 @@ import dev.engine_room.flywheel.lib.memory.MemoryBlock; public class InstancePager extends AbstractArena { // 32 objects per page. Allows for convenient bitsets on the gpu. - public static final int DEFAULT_PAGE_SIZE_OBJECTS = 5; - public static final int INITIAL_PAGES_ALLOCATED = 4; + public static final int LOG_2_PAGE_SIZE = 5; + public static final int PAGE_SIZE = 1 << LOG_2_PAGE_SIZE; + public static final int PAGE_MASK = PAGE_SIZE - 1; - private final int log2PageSize; - /** - * The number of objects in a page. - */ - private final int pageSize; + public static final int INITIAL_PAGES_ALLOCATED = 4; private final long objectSizeBytes; - @UnknownNullability private MemoryBlock pageData; - private final int pageMask; - public final ResizableStorageArray storage; - public final ResizableStorageArray pageTable; + public final ResizableStorageBuffer storage; + public final ResizableStorageBuffer pageTable; private final List allocations = new ArrayList<>(); public InstancePager(long objectSizeBytes) { - this(DEFAULT_PAGE_SIZE_OBJECTS, objectSizeBytes); - } - - public InstancePager(int log2PageSize, long objectSizeBytes) { - super((1L << log2PageSize) * objectSizeBytes); - this.log2PageSize = log2PageSize; - this.pageSize = 1 << log2PageSize; - this.pageMask = pageSize - 1; + super(PAGE_SIZE * objectSizeBytes); this.objectSizeBytes = objectSizeBytes; - this.storage = new ResizableStorageArray(this.elementSizeBytes); - this.pageTable = new ResizableStorageArray(Integer.BYTES); + this.storage = new ResizableStorageBuffer(); + this.pageTable = new ResizableStorageBuffer(); + + pageData = MemoryBlock.malloc(INITIAL_PAGES_ALLOCATED * Integer.BYTES); + storage.ensureCapacity(INITIAL_PAGES_ALLOCATED * elementSizeBytes); + pageTable.ensureCapacity(INITIAL_PAGES_ALLOCATED * Integer.BYTES); + } + + public static int object2Page(int objectIndex) { + return objectIndex >> LOG_2_PAGE_SIZE; + } + + public static int page2Object(int pageIndex) { + return pageIndex << LOG_2_PAGE_SIZE; } public Allocation createPage() { @@ -55,20 +54,14 @@ public class InstancePager extends AbstractArena { @Override public long byteCapacity() { - return storage.byteCapacity(); + return storage.capacity(); } @Override - protected void resize() { - if (pageData == null) { - pageData = MemoryBlock.malloc(INITIAL_PAGES_ALLOCATED * Integer.BYTES); - storage.ensureCapacity(INITIAL_PAGES_ALLOCATED); - pageTable.ensureCapacity(INITIAL_PAGES_ALLOCATED); - } else { - pageData = pageData.realloc(pageData.size() * 2); - storage.ensureCapacity(storage.capacity() * 2); - pageTable.ensureCapacity(pageTable.capacity() * 2); - } + protected void grow() { + pageData = pageData.realloc(pageData.size() * 2); + storage.ensureCapacity(storage.capacity() * 2); + pageTable.ensureCapacity(pageTable.capacity() * 2); } public void uploadTable(StagingBuffer stagingBuffer) { @@ -88,6 +81,7 @@ public class InstancePager extends AbstractArena { public int[] pages = new int[0]; private int modelIndex = -1; + private int activeCount = 0; public void modelIndex(int modelIndex) { if (this.modelIndex != modelIndex) { @@ -96,17 +90,28 @@ public class InstancePager extends AbstractArena { } private void updatePageTable() { + if (pages.length == 0) { + return; + } + var ptr = pageData.ptr(); - int fullPage = (modelIndex & 0x3FFFFF) | 0x8000000; + int fullPage = (modelIndex & 0x3FFFFF) | (32 << 26); - for (int page : pages) { + int remainder = activeCount; + + for (int i = 0; i < pages.length - 1; i++) { + int page = pages[i]; MemoryUtil.memPutInt(ptr + page * Integer.BYTES, fullPage); + remainder -= PAGE_SIZE; } + + MemoryUtil.memPutInt(ptr + pages[pages.length - 1] * Integer.BYTES, (modelIndex & 0x3FFFFF) | (remainder << 26)); } public void activeCount(int objectCount) { - var neededPages = object2Page((objectCount + pageMask)); + var neededPages = object2Page((objectCount + PAGE_MASK)); + activeCount = objectCount; var oldLength = pages.length; @@ -126,7 +131,7 @@ public class InstancePager extends AbstractArena { } private void shrink(int oldLength, int neededPages) { - for (int i = oldLength - 1; i > neededPages; i--) { + for (int i = oldLength - 1; i >= neededPages; i--) { var page = pages[i]; InstancePager.this.free(page); MemoryUtil.memPutInt(pageData.ptr() + page * Integer.BYTES, 0); @@ -136,21 +141,13 @@ public class InstancePager extends AbstractArena { } public int capacity() { - return pages.length << log2PageSize; + return pages.length << LOG_2_PAGE_SIZE; } public int pageCount() { return pages.length; } - public int object2Page(int objectIndex) { - return objectIndex >> log2PageSize; - } - - public int page2Object(int pageIndex) { - return pageIndex << log2PageSize; - } - public long page2ByteOffset(int page) { return InstancePager.this.byteOffsetOf(pages[page]); } diff --git a/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/cull.glsl b/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/cull.glsl index e45f4ec3d..4186f470d 100644 --- a/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/cull.glsl +++ b/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/cull.glsl @@ -11,8 +11,8 @@ layout(std430, binding = _FLW_TARGET_BUFFER_BINDING) restrict writeonly buffer T }; // High 6 bits for the number of instances in the page. -const uint _FLW_PAGE_COUNT_OFFSET = 25u; -// Bottom 24 bits for the model index. +const uint _FLW_PAGE_COUNT_OFFSET = 26u; +// Bottom 26 bits for the model index. const uint _FLW_MODEL_INDEX_MASK = 0x3FFFFFF; layout(std430, binding = _FLW_MODEL_INDEX_BUFFER_BINDING) restrict readonly buffer ModelIndexBuffer { From e83a308a46c1ccf2c6eab38b37c1eeba6597035c Mon Sep 17 00:00:00 2001 From: Jozufozu Date: Tue, 3 Sep 2024 10:56:46 -0500 Subject: [PATCH 03/17] On-call paging - Only update the page table when an allocation is resized - Only upload the page table after it's uploaded - Combine various setters for InstancePager.Allocation and IndirectInstancer - Free pages when an allocation is deleted --- .../backend/engine/AbstractInstancer.java | 6 +- .../engine/indirect/IndirectBuffers.java | 4 +- .../engine/indirect/IndirectCullingGroup.java | 9 +- .../engine/indirect/IndirectInstancer.java | 22 +-- .../engine/indirect/InstancePager.java | 154 ++++++++++++------ 5 files changed, 113 insertions(+), 82 deletions(-) diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/AbstractInstancer.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/AbstractInstancer.java index 744bb12a0..16aa88b64 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/AbstractInstancer.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/AbstractInstancer.java @@ -127,7 +127,7 @@ public abstract class AbstractInstancer implements Instancer // We definitely shouldn't consider the deleted instances as changed though, // else we might try some out of bounds accesses later. - clearChangedRange(newSize, oldSize); + changed.clear(newSize, oldSize); // Punch out the deleted instances, shifting over surviving instances to fill their place. for (int scanPos = writePos; (scanPos < oldSize) && (writePos < newSize); scanPos++, writePos++) { @@ -155,10 +155,6 @@ public abstract class AbstractInstancer implements Instancer .clear(); } - protected void clearChangedRange(int start, int end) { - changed.clear(start, end); - } - protected void setRangeChanged(int start, int end) { changed.set(start, end); } diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectBuffers.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectBuffers.java index 1ce962eec..193efd004 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectBuffers.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectBuffers.java @@ -83,13 +83,13 @@ public class IndirectBuffers { draw.ensureCapacity(drawCount); final long ptr = multiBindBlock.ptr(); - MemoryUtil.memPutInt(ptr + INSTANCE_HANDLE_OFFSET, pageFile.storage.handle()); + MemoryUtil.memPutInt(ptr + INSTANCE_HANDLE_OFFSET, pageFile.objects.handle()); MemoryUtil.memPutInt(ptr + TARGET_HANDLE_OFFSET, target.handle()); MemoryUtil.memPutInt(ptr + MODEL_INDEX_HANDLE_OFFSET, pageFile.pageTable.handle()); MemoryUtil.memPutInt(ptr + MODEL_HANDLE_OFFSET, model.handle()); MemoryUtil.memPutInt(ptr + DRAW_HANDLE_OFFSET, draw.handle()); - MemoryUtil.memPutAddress(ptr + INSTANCE_SIZE_OFFSET, pageFile.storage.capacity()); + MemoryUtil.memPutAddress(ptr + INSTANCE_SIZE_OFFSET, pageFile.objects.capacity()); MemoryUtil.memPutAddress(ptr + TARGET_SIZE_OFFSET, INT_SIZE * instanceCount); MemoryUtil.memPutAddress(ptr + MODEL_INDEX_SIZE_OFFSET, pageFile.pageTable.capacity()); MemoryUtil.memPutAddress(ptr + MODEL_SIZE_OFFSET, MODEL_STRIDE * modelCount); diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectCullingGroup.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectCullingGroup.java index 8a25d5df5..da4f8cfe8 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectCullingGroup.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectCullingGroup.java @@ -74,8 +74,7 @@ public class IndirectCullingGroup { continue; } - instancer.modelIndex(modelIndex); - instancer.baseInstance(instanceCountThisFrame); + instancer.postUpdate(modelIndex, instanceCountThisFrame); instanceCountThisFrame += instanceCount; modelIndex++; @@ -173,8 +172,8 @@ public class IndirectCullingGroup { } public void add(IndirectInstancer instancer, InstancerKey key, MeshPool meshPool) { - instancer.pageFile = buffers.pageFile.createPage(); - instancer.modelIndex(instancers.size()); + instancer.pageFile = buffers.pageFile.createAllocation(); + instancer.postUpdate(instancers.size(), -1); instancers.add(instancer); @@ -246,7 +245,7 @@ public class IndirectCullingGroup { private void uploadInstances(StagingBuffer stagingBuffer) { for (var instancer : instancers) { - instancer.uploadInstances(stagingBuffer, buffers.pageFile.storage.handle()); + instancer.uploadInstances(stagingBuffer, buffers.pageFile.objects.handle()); } } diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectInstancer.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectInstancer.java index bbd78d88e..da825ed63 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectInstancer.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectInstancer.java @@ -52,13 +52,6 @@ public class IndirectInstancer extends AbstractInstancer changedPages.set(InstancePager.object2Page(start), InstancePager.object2Page(end) + 1); } - @Override - protected void clearChangedRange(int start, int end) { - super.clearChangedRange(start, end); - - // changedPages.clear(pageFile.object2Page(start), pageFile); - } - public void addDraw(IndirectDraw draw) { associatedDraws.add(draw); } @@ -69,8 +62,12 @@ public class IndirectInstancer extends AbstractInstancer public void update() { removeDeletedInstances(); + } - pageFile.activeCount(instanceCount()); + public void postUpdate(int modelIndex, int baseInstance) { + this.modelIndex = modelIndex; + this.baseInstance = baseInstance; + pageFile.update(modelIndex, instanceCount()); } public void writeModel(long ptr) { @@ -117,21 +114,14 @@ public class IndirectInstancer extends AbstractInstancer for (IndirectDraw draw : draws()) { draw.delete(); } - } - public void modelIndex(int modelIndex) { - this.modelIndex = modelIndex; - pageFile.modelIndex(modelIndex); + pageFile.delete(); } public int modelIndex() { return modelIndex; } - public void baseInstance(int baseInstance) { - this.baseInstance = baseInstance; - } - public int baseInstance() { return baseInstance; } diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/InstancePager.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/InstancePager.java index 9c746be3d..a70b98321 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/InstancePager.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/InstancePager.java @@ -1,8 +1,6 @@ package dev.engine_room.flywheel.backend.engine.indirect; -import java.util.ArrayList; import java.util.Arrays; -import java.util.List; import org.lwjgl.system.MemoryUtil; @@ -17,24 +15,20 @@ public class InstancePager extends AbstractArena { public static final int INITIAL_PAGES_ALLOCATED = 4; - private final long objectSizeBytes; - - private MemoryBlock pageData; - - public final ResizableStorageBuffer storage; + private MemoryBlock pageTableData; + public final ResizableStorageBuffer objects; public final ResizableStorageBuffer pageTable; - private final List allocations = new ArrayList<>(); + private boolean needsUpload = false; public InstancePager(long objectSizeBytes) { super(PAGE_SIZE * objectSizeBytes); - this.objectSizeBytes = objectSizeBytes; - this.storage = new ResizableStorageBuffer(); + this.objects = new ResizableStorageBuffer(); this.pageTable = new ResizableStorageBuffer(); - pageData = MemoryBlock.malloc(INITIAL_PAGES_ALLOCATED * Integer.BYTES); - storage.ensureCapacity(INITIAL_PAGES_ALLOCATED * elementSizeBytes); + pageTableData = MemoryBlock.malloc(INITIAL_PAGES_ALLOCATED * Integer.BYTES); + objects.ensureCapacity(INITIAL_PAGES_ALLOCATED * elementSizeBytes); pageTable.ensureCapacity(INITIAL_PAGES_ALLOCATED * Integer.BYTES); } @@ -46,79 +40,122 @@ public class InstancePager extends AbstractArena { return pageIndex << LOG_2_PAGE_SIZE; } - public Allocation createPage() { - var out = new Allocation(); - allocations.add(out); - return out; + public Allocation createAllocation() { + return new Allocation(); } @Override public long byteCapacity() { - return storage.capacity(); + return objects.capacity(); + } + + @Override + public void free(int i) { + super.free(i); + MemoryUtil.memPutInt(ptrForPage(i), 0); } @Override protected void grow() { - pageData = pageData.realloc(pageData.size() * 2); - storage.ensureCapacity(storage.capacity() * 2); + pageTableData = pageTableData.realloc(pageTableData.size() * 2); + objects.ensureCapacity(objects.capacity() * 2); pageTable.ensureCapacity(pageTable.capacity() * 2); } public void uploadTable(StagingBuffer stagingBuffer) { - for (Allocation allocation : allocations) { - allocation.updatePageTable(); + if (!needsUpload) { + return; } - stagingBuffer.enqueueCopy(pageData.ptr(), pageData.size(), pageTable.handle(), 0); + // We could be smarter about which spans are uploaded but this thing is so small it's probably not worth it. + stagingBuffer.enqueueCopy(pageTableData.ptr(), pageTableData.size(), pageTable.handle(), 0); + needsUpload = false; } public void delete() { - storage.delete(); + objects.delete(); pageTable.delete(); - pageData.free(); + pageTableData.free(); + } + + private long ptrForPage(int page) { + return pageTableData.ptr() + (long) page * Integer.BYTES; } public class Allocation { - public int[] pages = new int[0]; + public static final int[] EMPTY_ALLOCATION = new int[0]; + public int[] pages = EMPTY_ALLOCATION; private int modelIndex = -1; - private int activeCount = 0; + private int objectCount = 0; - public void modelIndex(int modelIndex) { - if (this.modelIndex != modelIndex) { - this.modelIndex = modelIndex; + /** + * Calculates the page descriptor for the given page index. + * Runs under the assumption than all pages are full except maybe the last one. + */ + private int calculatePageDescriptor(int pageIndex) { + int countInPage; + if (objectCount % PAGE_SIZE != 0 && pageIndex == pages.length - 1) { + // Last page && it isn't full -> use the remainder. + countInPage = objectCount & PAGE_MASK; + } else if (objectCount > 0) { + // Full page. + countInPage = PAGE_SIZE; + } else { + // Empty page, this shouldn't be reachable because we eagerly free empty pages. + countInPage = 0; } + return (modelIndex & 0x3FFFFF) | (countInPage << 26); } - private void updatePageTable() { - if (pages.length == 0) { + public void update(int modelIndex, int objectCount) { + boolean incremental = this.modelIndex == modelIndex; + + if (incremental && objectCount == this.objectCount) { + // Nothing will change. return; } - var ptr = pageData.ptr(); + InstancePager.this.needsUpload = true; - int fullPage = (modelIndex & 0x3FFFFF) | (32 << 26); - - int remainder = activeCount; - - for (int i = 0; i < pages.length - 1; i++) { - int page = pages[i]; - MemoryUtil.memPutInt(ptr + page * Integer.BYTES, fullPage); - remainder -= PAGE_SIZE; - } - - MemoryUtil.memPutInt(ptr + pages[pages.length - 1] * Integer.BYTES, (modelIndex & 0x3FFFFF) | (remainder << 26)); - } - - public void activeCount(int objectCount) { - var neededPages = object2Page((objectCount + PAGE_MASK)); - activeCount = objectCount; + this.modelIndex = modelIndex; + this.objectCount = objectCount; var oldLength = pages.length; + var newLength = object2Page((objectCount + PAGE_MASK)); - if (oldLength > neededPages) { - shrink(oldLength, neededPages); - } else if (oldLength < neededPages) { - grow(neededPages, oldLength); + if (oldLength > newLength) { + // Eagerly free the now unnecessary pages. + // shrink will zero out the pageTable entries for the freed pages. + shrink(oldLength, newLength); + + if (incremental) { + // Only update the last page, everything else is unchanged. + updateRange(newLength - 1, newLength); + } + } else if (oldLength < newLength) { + // Allocate new pages to fit the new object count. + grow(newLength, oldLength); + + if (incremental) { + // Update the old last page + all new pages + updateRange(oldLength - 1, newLength); + } + } else { + if (incremental) { + // Only update the last page. + updateRange(oldLength - 1, oldLength); + } + } + + if (!incremental) { + // Update all pages. + updateRange(0, newLength); + } + } + + private void updateRange(int start, int oldLength) { + for (int i = start; i < oldLength; i++) { + MemoryUtil.memPutInt(ptrForPage(pages[i]), calculatePageDescriptor(i)); } } @@ -126,7 +163,8 @@ public class InstancePager extends AbstractArena { pages = Arrays.copyOf(pages, neededPages); for (int i = oldLength; i < neededPages; i++) { - pages[i] = InstancePager.this.alloc(); + var page = InstancePager.this.alloc(); + pages[i] = page; } } @@ -134,7 +172,6 @@ public class InstancePager extends AbstractArena { for (int i = oldLength - 1; i >= neededPages; i--) { var page = pages[i]; InstancePager.this.free(page); - MemoryUtil.memPutInt(pageData.ptr() + page * Integer.BYTES, 0); } pages = Arrays.copyOf(pages, neededPages); @@ -151,5 +188,14 @@ public class InstancePager extends AbstractArena { public long page2ByteOffset(int page) { return InstancePager.this.byteOffsetOf(pages[page]); } + + public void delete() { + for (int page : pages) { + InstancePager.this.free(page); + } + pages = EMPTY_ALLOCATION; + modelIndex = -1; + objectCount = 0; + } } } From 2537584a22dab9d25b6396ce38c96d3a6d12fa82 Mon Sep 17 00:00:00 2001 From: Jozufozu Date: Tue, 3 Sep 2024 11:23:21 -0500 Subject: [PATCH 04/17] The hardest problem - Rename most InstancePager terminology - Rename MODEL_INDEX buffer stuffs --- .../engine/indirect/BufferBindings.java | 2 +- .../engine/indirect/IndirectBuffers.java | 18 +- .../engine/indirect/IndirectCullingGroup.java | 8 +- .../engine/indirect/IndirectInstancer.java | 19 +- ...{InstancePager.java => ObjectStorage.java} | 167 ++++++++++-------- .../internal/indirect/buffer_bindings.glsl | 2 +- .../flywheel/internal/indirect/cull.glsl | 8 +- 7 files changed, 121 insertions(+), 103 deletions(-) rename common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/{InstancePager.java => ObjectStorage.java} (59%) diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/BufferBindings.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/BufferBindings.java index 479eaed74..658096695 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/BufferBindings.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/BufferBindings.java @@ -3,7 +3,7 @@ package dev.engine_room.flywheel.backend.engine.indirect; public final class BufferBindings { public static final int INSTANCE = 0; public static final int TARGET = 1; - public static final int MODEL_INDEX = 2; + public static final int PAGE_FRAME_DESCRIPTOR = 2; public static final int MODEL = 3; public static final int DRAW = 4; public static final int LIGHT_LUT = 5; diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectBuffers.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectBuffers.java index 193efd004..498030fe7 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectBuffers.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectBuffers.java @@ -32,14 +32,14 @@ public class IndirectBuffers { // Offsets to the vbos private static final long INSTANCE_HANDLE_OFFSET = HANDLE_OFFSET; private static final long TARGET_HANDLE_OFFSET = INT_SIZE; - private static final long MODEL_INDEX_HANDLE_OFFSET = INT_SIZE * 2; + private static final long PAGE_FRAME_DESCRIPTOR_HANDLE_OFFSET = INT_SIZE * 2; private static final long MODEL_HANDLE_OFFSET = INT_SIZE * 3; private static final long DRAW_HANDLE_OFFSET = INT_SIZE * 4; // Offsets to the sizes private static final long INSTANCE_SIZE_OFFSET = SIZE_OFFSET; private static final long TARGET_SIZE_OFFSET = SIZE_OFFSET + PTR_SIZE; - private static final long MODEL_INDEX_SIZE_OFFSET = SIZE_OFFSET + PTR_SIZE * 2; + private static final long PAGE_FRAME_DESCRIPTOR_SIZE_OFFSET = SIZE_OFFSET + PTR_SIZE * 2; private static final long MODEL_SIZE_OFFSET = SIZE_OFFSET + PTR_SIZE * 3; private static final long DRAW_SIZE_OFFSET = SIZE_OFFSET + PTR_SIZE * 4; @@ -62,7 +62,7 @@ public class IndirectBuffers { private final MemoryBlock multiBindBlock; private final long instanceStride; - public final InstancePager pageFile; + public final ObjectStorage objectStorage; public final ResizableStorageArray target; public final ResizableStorageArray model; public final ResizableStorageArray draw; @@ -71,7 +71,7 @@ public class IndirectBuffers { this.instanceStride = instanceStride; this.multiBindBlock = MemoryBlock.calloc(BUFFERS_SIZE_BYTES, 1); - pageFile = new InstancePager(instanceStride); + objectStorage = new ObjectStorage(instanceStride); target = new ResizableStorageArray(INT_SIZE, INSTANCE_GROWTH_FACTOR); model = new ResizableStorageArray(MODEL_STRIDE, MODEL_GROWTH_FACTOR); draw = new ResizableStorageArray(DRAW_COMMAND_STRIDE, DRAW_GROWTH_FACTOR); @@ -83,15 +83,15 @@ public class IndirectBuffers { draw.ensureCapacity(drawCount); final long ptr = multiBindBlock.ptr(); - MemoryUtil.memPutInt(ptr + INSTANCE_HANDLE_OFFSET, pageFile.objects.handle()); + MemoryUtil.memPutInt(ptr + INSTANCE_HANDLE_OFFSET, objectStorage.objectBuffer.handle()); MemoryUtil.memPutInt(ptr + TARGET_HANDLE_OFFSET, target.handle()); - MemoryUtil.memPutInt(ptr + MODEL_INDEX_HANDLE_OFFSET, pageFile.pageTable.handle()); + MemoryUtil.memPutInt(ptr + PAGE_FRAME_DESCRIPTOR_HANDLE_OFFSET, objectStorage.frameDescriptorBuffer.handle()); MemoryUtil.memPutInt(ptr + MODEL_HANDLE_OFFSET, model.handle()); MemoryUtil.memPutInt(ptr + DRAW_HANDLE_OFFSET, draw.handle()); - MemoryUtil.memPutAddress(ptr + INSTANCE_SIZE_OFFSET, pageFile.objects.capacity()); + MemoryUtil.memPutAddress(ptr + INSTANCE_SIZE_OFFSET, objectStorage.objectBuffer.capacity()); MemoryUtil.memPutAddress(ptr + TARGET_SIZE_OFFSET, INT_SIZE * instanceCount); - MemoryUtil.memPutAddress(ptr + MODEL_INDEX_SIZE_OFFSET, pageFile.pageTable.capacity()); + MemoryUtil.memPutAddress(ptr + PAGE_FRAME_DESCRIPTOR_SIZE_OFFSET, objectStorage.frameDescriptorBuffer.capacity()); MemoryUtil.memPutAddress(ptr + MODEL_SIZE_OFFSET, MODEL_STRIDE * modelCount); MemoryUtil.memPutAddress(ptr + DRAW_SIZE_OFFSET, DRAW_COMMAND_STRIDE * drawCount); } @@ -121,7 +121,7 @@ public class IndirectBuffers { public void delete() { multiBindBlock.free(); - pageFile.delete(); + objectStorage.delete(); target.delete(); model.delete(); draw.delete(); diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectCullingGroup.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectCullingGroup.java index da4f8cfe8..961d5b2ce 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectCullingGroup.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectCullingGroup.java @@ -95,7 +95,7 @@ public class IndirectCullingGroup { // Upload only instances that have changed. uploadInstances(stagingBuffer); - buffers.pageFile.uploadTable(stagingBuffer); + buffers.objectStorage.uploadDescriptors(stagingBuffer); // We need to upload the models every frame to reset the instance count. uploadModels(stagingBuffer); @@ -119,7 +119,7 @@ public class IndirectCullingGroup { cullProgram.bind(); buffers.bindForCompute(); - glDispatchCompute(buffers.pageFile.capacity(), 1, 1); + glDispatchCompute(buffers.objectStorage.capacity(), 1, 1); } public void dispatchApply() { @@ -172,7 +172,7 @@ public class IndirectCullingGroup { } public void add(IndirectInstancer instancer, InstancerKey key, MeshPool meshPool) { - instancer.pageFile = buffers.pageFile.createAllocation(); + instancer.mapping = buffers.objectStorage.createMapping(); instancer.postUpdate(instancers.size(), -1); instancers.add(instancer); @@ -245,7 +245,7 @@ public class IndirectCullingGroup { private void uploadInstances(StagingBuffer stagingBuffer) { for (var instancer : instancers) { - instancer.uploadInstances(stagingBuffer, buffers.pageFile.objects.handle()); + instancer.uploadInstances(stagingBuffer, buffers.objectStorage.objectBuffer.handle()); } } diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectInstancer.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectInstancer.java index da825ed63..fc8e1361e 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectInstancer.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectInstancer.java @@ -3,6 +3,7 @@ package dev.engine_room.flywheel.backend.engine.indirect; import java.util.ArrayList; import java.util.List; +import org.jetbrains.annotations.UnknownNullability; import org.joml.Vector4fc; import org.lwjgl.system.MemoryUtil; @@ -23,7 +24,7 @@ public class IndirectInstancer extends AbstractInstancer private final AtomicBitSet changedPages = new AtomicBitSet(); - public InstancePager.Allocation pageFile; + public ObjectStorage.@UnknownNullability Mapping mapping; private int modelIndex = -1; private int baseInstance = -1; @@ -42,14 +43,14 @@ public class IndirectInstancer extends AbstractInstancer return; } changed.set(index); - changedPages.set(InstancePager.object2Page(index)); + changedPages.set(ObjectStorage.objectIndex2PageIndex(index)); } @Override protected void setRangeChanged(int start, int end) { super.setRangeChanged(start, end); - changedPages.set(InstancePager.object2Page(start), InstancePager.object2Page(end) + 1); + changedPages.set(ObjectStorage.objectIndex2PageIndex(start), ObjectStorage.objectIndex2PageIndex(end) + 1); } public void addDraw(IndirectDraw draw) { @@ -67,7 +68,7 @@ public class IndirectInstancer extends AbstractInstancer public void postUpdate(int modelIndex, int baseInstance) { this.modelIndex = modelIndex; this.baseInstance = baseInstance; - pageFile.update(modelIndex, instanceCount()); + mapping.update(modelIndex, instanceCount()); } public void writeModel(long ptr) { @@ -81,20 +82,20 @@ public class IndirectInstancer extends AbstractInstancer } public void uploadInstances(StagingBuffer stagingBuffer, int instanceVbo) { - int numPages = pageFile.pageCount(); + int numPages = mapping.pageCount(); var instanceCount = instances.size(); for (int page = changedPages.nextSetBit(0); page >= 0 && page < numPages; page = changedPages.nextSetBit(page + 1)) { - int startObject = InstancePager.page2Object(page); + int startObject = ObjectStorage.pageIndex2ObjectIndex(page); if (startObject >= instanceCount) { break; } - int endObject = Math.min(instanceCount, InstancePager.page2Object(page + 1)); + int endObject = Math.min(instanceCount, ObjectStorage.pageIndex2ObjectIndex(page + 1)); - long baseByte = pageFile.page2ByteOffset(page); + long baseByte = mapping.page2ByteOffset(page); long size = (endObject - startObject) * instanceStride; stagingBuffer.enqueueCopy(size, instanceVbo, baseByte, ptr -> { @@ -115,7 +116,7 @@ public class IndirectInstancer extends AbstractInstancer draw.delete(); } - pageFile.delete(); + mapping.delete(); } public int modelIndex() { diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/InstancePager.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/ObjectStorage.java similarity index 59% rename from common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/InstancePager.java rename to common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/ObjectStorage.java index a70b98321..69b0ddef4 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/InstancePager.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/ObjectStorage.java @@ -7,7 +7,7 @@ import org.lwjgl.system.MemoryUtil; import dev.engine_room.flywheel.backend.engine.AbstractArena; import dev.engine_room.flywheel.lib.memory.MemoryBlock; -public class InstancePager extends AbstractArena { +public class ObjectStorage extends AbstractArena { // 32 objects per page. Allows for convenient bitsets on the gpu. public static final int LOG_2_PAGE_SIZE = 5; public static final int PAGE_SIZE = 1 << LOG_2_PAGE_SIZE; @@ -15,38 +15,39 @@ public class InstancePager extends AbstractArena { public static final int INITIAL_PAGES_ALLOCATED = 4; - private MemoryBlock pageTableData; - public final ResizableStorageBuffer objects; - public final ResizableStorageBuffer pageTable; + /** + * The GPU side buffer containing all the objects, logically divided into page frames. + */ + public final ResizableStorageBuffer objectBuffer; + /** + * The GPU side buffer containing 32 bit descriptors for each page frame. + */ + public final ResizableStorageBuffer frameDescriptorBuffer; + /** + * The CPU side memory block containing the page descriptors. + */ + private MemoryBlock frameDescriptors; private boolean needsUpload = false; - public InstancePager(long objectSizeBytes) { + public ObjectStorage(long objectSizeBytes) { super(PAGE_SIZE * objectSizeBytes); - this.objects = new ResizableStorageBuffer(); - this.pageTable = new ResizableStorageBuffer(); + this.objectBuffer = new ResizableStorageBuffer(); + this.frameDescriptorBuffer = new ResizableStorageBuffer(); - pageTableData = MemoryBlock.malloc(INITIAL_PAGES_ALLOCATED * Integer.BYTES); - objects.ensureCapacity(INITIAL_PAGES_ALLOCATED * elementSizeBytes); - pageTable.ensureCapacity(INITIAL_PAGES_ALLOCATED * Integer.BYTES); + objectBuffer.ensureCapacity(INITIAL_PAGES_ALLOCATED * elementSizeBytes); + frameDescriptorBuffer.ensureCapacity(INITIAL_PAGES_ALLOCATED * Integer.BYTES); + frameDescriptors = MemoryBlock.malloc(INITIAL_PAGES_ALLOCATED * Integer.BYTES); } - public static int object2Page(int objectIndex) { - return objectIndex >> LOG_2_PAGE_SIZE; - } - - public static int page2Object(int pageIndex) { - return pageIndex << LOG_2_PAGE_SIZE; - } - - public Allocation createAllocation() { - return new Allocation(); + public Mapping createMapping() { + return new Mapping(); } @Override public long byteCapacity() { - return objects.capacity(); + return objectBuffer.capacity(); } @Override @@ -57,56 +58,57 @@ public class InstancePager extends AbstractArena { @Override protected void grow() { - pageTableData = pageTableData.realloc(pageTableData.size() * 2); - objects.ensureCapacity(objects.capacity() * 2); - pageTable.ensureCapacity(pageTable.capacity() * 2); + objectBuffer.ensureCapacity(objectBuffer.capacity() * 2); + frameDescriptorBuffer.ensureCapacity(frameDescriptorBuffer.capacity() * 2); + frameDescriptors = frameDescriptors.realloc(frameDescriptors.size() * 2); } - public void uploadTable(StagingBuffer stagingBuffer) { + public void uploadDescriptors(StagingBuffer stagingBuffer) { if (!needsUpload) { return; } // We could be smarter about which spans are uploaded but this thing is so small it's probably not worth it. - stagingBuffer.enqueueCopy(pageTableData.ptr(), pageTableData.size(), pageTable.handle(), 0); + stagingBuffer.enqueueCopy(frameDescriptors.ptr(), frameDescriptors.size(), frameDescriptorBuffer.handle(), 0); needsUpload = false; } public void delete() { - objects.delete(); - pageTable.delete(); - pageTableData.free(); + objectBuffer.delete(); + frameDescriptorBuffer.delete(); + frameDescriptors.free(); } private long ptrForPage(int page) { - return pageTableData.ptr() + (long) page * Integer.BYTES; + return frameDescriptors.ptr() + (long) page * Integer.BYTES; } - public class Allocation { - public static final int[] EMPTY_ALLOCATION = new int[0]; - public int[] pages = EMPTY_ALLOCATION; + public static int objectIndex2PageIndex(int objectIndex) { + return objectIndex >> LOG_2_PAGE_SIZE; + } + + public static int pageIndex2ObjectIndex(int pageIndex) { + return pageIndex << LOG_2_PAGE_SIZE; + } + + /** + * Maps serial object indices to pages, and manages the allocation of pages. + */ + public class Mapping { + private static final int[] EMPTY_ALLOCATION = new int[0]; + private int[] pages = EMPTY_ALLOCATION; private int modelIndex = -1; private int objectCount = 0; /** - * Calculates the page descriptor for the given page index. - * Runs under the assumption than all pages are full except maybe the last one. + * Adjust this allocation to the given model index and object count. + * + *

This method triggers eager resizing of the allocation to fit the new object count. + * If the model index is different from the current one, all frame descriptors will be updated. + * + * @param modelIndex The model index the objects in this allocation are associated with. + * @param objectCount The number of objects in this allocation. */ - private int calculatePageDescriptor(int pageIndex) { - int countInPage; - if (objectCount % PAGE_SIZE != 0 && pageIndex == pages.length - 1) { - // Last page && it isn't full -> use the remainder. - countInPage = objectCount & PAGE_MASK; - } else if (objectCount > 0) { - // Full page. - countInPage = PAGE_SIZE; - } else { - // Empty page, this shouldn't be reachable because we eagerly free empty pages. - countInPage = 0; - } - return (modelIndex & 0x3FFFFF) | (countInPage << 26); - } - public void update(int modelIndex, int objectCount) { boolean incremental = this.modelIndex == modelIndex; @@ -115,13 +117,13 @@ public class InstancePager extends AbstractArena { return; } - InstancePager.this.needsUpload = true; + ObjectStorage.this.needsUpload = true; this.modelIndex = modelIndex; this.objectCount = objectCount; var oldLength = pages.length; - var newLength = object2Page((objectCount + PAGE_MASK)); + var newLength = objectIndex2PageIndex((objectCount + PAGE_MASK)); if (oldLength > newLength) { // Eagerly free the now unnecessary pages. @@ -153,6 +155,42 @@ public class InstancePager extends AbstractArena { } } + public int pageCount() { + return pages.length; + } + + public long page2ByteOffset(int page) { + return ObjectStorage.this.byteOffsetOf(pages[page]); + } + + public void delete() { + for (int page : pages) { + ObjectStorage.this.free(page); + } + pages = EMPTY_ALLOCATION; + modelIndex = -1; + objectCount = 0; + } + + /** + * Calculates the page descriptor for the given page index. + * Runs under the assumption than all pages are full except maybe the last one. + */ + private int calculatePageDescriptor(int pageIndex) { + int countInPage; + if (objectCount % PAGE_SIZE != 0 && pageIndex == pages.length - 1) { + // Last page && it isn't full -> use the remainder. + countInPage = objectCount & PAGE_MASK; + } else if (objectCount > 0) { + // Full page. + countInPage = PAGE_SIZE; + } else { + // Empty page, this shouldn't be reachable because we eagerly free empty pages. + countInPage = 0; + } + return (modelIndex & 0x3FFFFF) | (countInPage << 26); + } + private void updateRange(int start, int oldLength) { for (int i = start; i < oldLength; i++) { MemoryUtil.memPutInt(ptrForPage(pages[i]), calculatePageDescriptor(i)); @@ -163,7 +201,7 @@ public class InstancePager extends AbstractArena { pages = Arrays.copyOf(pages, neededPages); for (int i = oldLength; i < neededPages; i++) { - var page = InstancePager.this.alloc(); + var page = ObjectStorage.this.alloc(); pages[i] = page; } } @@ -171,31 +209,10 @@ public class InstancePager extends AbstractArena { private void shrink(int oldLength, int neededPages) { for (int i = oldLength - 1; i >= neededPages; i--) { var page = pages[i]; - InstancePager.this.free(page); + ObjectStorage.this.free(page); } pages = Arrays.copyOf(pages, neededPages); } - - public int capacity() { - return pages.length << LOG_2_PAGE_SIZE; - } - - public int pageCount() { - return pages.length; - } - - public long page2ByteOffset(int page) { - return InstancePager.this.byteOffsetOf(pages[page]); - } - - public void delete() { - for (int page : pages) { - InstancePager.this.free(page); - } - pages = EMPTY_ALLOCATION; - modelIndex = -1; - objectCount = 0; - } } } diff --git a/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/buffer_bindings.glsl b/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/buffer_bindings.glsl index 346adfa93..449836630 100644 --- a/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/buffer_bindings.glsl +++ b/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/buffer_bindings.glsl @@ -1,6 +1,6 @@ #define _FLW_INSTANCE_BUFFER_BINDING 0 #define _FLW_TARGET_BUFFER_BINDING 1 -#define _FLW_MODEL_INDEX_BUFFER_BINDING 2 +#define _FLW_PAGE_FRAME_DESCRIPTOR_BUFFER_BINDING 2 #define _FLW_MODEL_BUFFER_BINDING 3 #define _FLW_DRAW_BUFFER_BINDING 4 #define _FLW_LIGHT_LUT_BUFFER_BINDING 5 diff --git a/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/cull.glsl b/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/cull.glsl index 4186f470d..e128b0daf 100644 --- a/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/cull.glsl +++ b/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/cull.glsl @@ -15,8 +15,8 @@ const uint _FLW_PAGE_COUNT_OFFSET = 26u; // Bottom 26 bits for the model index. const uint _FLW_MODEL_INDEX_MASK = 0x3FFFFFF; -layout(std430, binding = _FLW_MODEL_INDEX_BUFFER_BINDING) restrict readonly buffer ModelIndexBuffer { - uint _flw_pageTable[]; +layout(std430, binding = _FLW_PAGE_FRAME_DESCRIPTOR_BUFFER_BINDING) restrict readonly buffer PageFrameDescriptorBuffer { + uint _flw_pageFrameDescriptors[]; }; layout(std430, binding = _FLW_MODEL_BUFFER_BINDING) restrict buffer ModelBuffer { @@ -62,11 +62,11 @@ bool _flw_isVisible(uint instanceIndex, uint modelIndex) { void main() { uint pageIndex = gl_WorkGroupID.x; - if (pageIndex >= _flw_pageTable.length()) { + if (pageIndex >= _flw_pageFrameDescriptors.length()) { return; } - uint packedModelIndexAndCount = _flw_pageTable[pageIndex]; + uint packedModelIndexAndCount = _flw_pageFrameDescriptors[pageIndex]; uint pageInstanceCount = packedModelIndexAndCount >> _FLW_PAGE_COUNT_OFFSET; From ec45287cfa07869471951ba33d510fabfb2cd6c4 Mon Sep 17 00:00:00 2001 From: Jozufozu Date: Wed, 4 Sep 2024 11:05:04 -0500 Subject: [PATCH 05/17] Joining the occult - Implement hi-z occlusion culling - Generate depth pyramid just before issuing cull dispatches - Currently use raw texel fetches but this may be causing loss - Add _flw_cullData to frame uniforms --- .../backend/compile/IndirectPrograms.java | 13 ++- .../backend/engine/indirect/DepthPyramid.java | 106 ++++++++++++++++++ .../engine/indirect/IndirectDrawManager.java | 11 ++ .../backend/engine/uniform/FrameUniforms.java | 16 ++- .../flywheel/backend/gl/shader/GlProgram.java | 11 ++ .../flywheel/internal/indirect/cull.glsl | 59 +++++++++- .../internal/indirect/depth_reduce.glsl | 29 +++++ .../flywheel/internal/uniforms/frame.glsl | 12 ++ 8 files changed, 251 insertions(+), 6 deletions(-) create mode 100644 common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/DepthPyramid.java create mode 100644 common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/depth_reduce.glsl diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/compile/IndirectPrograms.java b/common/src/backend/java/dev/engine_room/flywheel/backend/compile/IndirectPrograms.java index 3b705754a..df1696657 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/compile/IndirectPrograms.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/compile/IndirectPrograms.java @@ -30,6 +30,7 @@ public class IndirectPrograms extends AtomicReferenceCounted { private static final ResourceLocation CULL_SHADER_MAIN = Flywheel.rl("internal/indirect/cull.glsl"); private static final ResourceLocation APPLY_SHADER_MAIN = Flywheel.rl("internal/indirect/apply.glsl"); private static final ResourceLocation SCATTER_SHADER_MAIN = Flywheel.rl("internal/indirect/scatter.glsl"); + private static final ResourceLocation DEPTH_REDUCE_SHADER_MAIN = Flywheel.rl("internal/indirect/depth_reduce.glsl"); private static final Compile> CULL = new Compile<>(); private static final Compile UTIL = new Compile<>(); @@ -44,12 +45,14 @@ public class IndirectPrograms extends AtomicReferenceCounted { private final Map, GlProgram> culling; private final GlProgram apply; private final GlProgram scatter; + private final GlProgram depthReduce; - private IndirectPrograms(Map pipeline, Map, GlProgram> culling, GlProgram apply, GlProgram scatter) { + private IndirectPrograms(Map pipeline, Map, GlProgram> culling, GlProgram apply, GlProgram scatter, GlProgram depthReduce) { this.pipeline = pipeline; this.culling = culling; this.apply = apply; this.scatter = scatter; + this.depthReduce = depthReduce; } private static List getExtensions(GlslVersion glslVersion) { @@ -94,10 +97,10 @@ public class IndirectPrograms extends AtomicReferenceCounted { try { var pipelineResult = pipelineCompiler.compileAndReportErrors(pipelineKeys); var cullingResult = cullingCompiler.compileAndReportErrors(createCullingKeys()); - var utils = utilCompiler.compileAndReportErrors(List.of(APPLY_SHADER_MAIN, SCATTER_SHADER_MAIN)); + var utils = utilCompiler.compileAndReportErrors(List.of(APPLY_SHADER_MAIN, SCATTER_SHADER_MAIN, DEPTH_REDUCE_SHADER_MAIN)); if (pipelineResult != null && cullingResult != null && utils != null) { - newInstance = new IndirectPrograms(pipelineResult, cullingResult, utils.get(APPLY_SHADER_MAIN), utils.get(SCATTER_SHADER_MAIN)); + newInstance = new IndirectPrograms(pipelineResult, cullingResult, utils.get(APPLY_SHADER_MAIN), utils.get(SCATTER_SHADER_MAIN), utils.get(DEPTH_REDUCE_SHADER_MAIN)); } } catch (Throwable t) { FlwPrograms.LOGGER.error("Failed to compile indirect programs", t); @@ -184,6 +187,10 @@ public class IndirectPrograms extends AtomicReferenceCounted { return scatter; } + public GlProgram getDepthReduceProgram() { + return depthReduce; + } + @Override protected void _delete() { pipeline.values() diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/DepthPyramid.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/DepthPyramid.java new file mode 100644 index 000000000..30e9524c7 --- /dev/null +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/DepthPyramid.java @@ -0,0 +1,106 @@ +package dev.engine_room.flywheel.backend.engine.indirect; + +import org.lwjgl.opengl.GL32; +import org.lwjgl.opengl.GL46; + +import com.mojang.blaze3d.platform.GlStateManager; + +import dev.engine_room.flywheel.backend.gl.shader.GlProgram; +import dev.engine_room.flywheel.lib.math.MoreMath; +import net.minecraft.client.Minecraft; + +public class DepthPyramid { + private final GlProgram depthReduceProgram; + + public final int pyramidTextureId; + + private int lastWidth = -1; + private int lastHeight = -1; + + public DepthPyramid(GlProgram depthReduceProgram) { + this.depthReduceProgram = depthReduceProgram; + + pyramidTextureId = GL32.glGenTextures(); + + GlStateManager._bindTexture(pyramidTextureId); + GlStateManager._texParameter(GL32.GL_TEXTURE_2D, GL32.GL_TEXTURE_MIN_FILTER, GL32.GL_NEAREST); + GlStateManager._texParameter(GL32.GL_TEXTURE_2D, GL32.GL_TEXTURE_MAG_FILTER, GL32.GL_NEAREST); + GlStateManager._texParameter(GL32.GL_TEXTURE_2D, GL32.GL_TEXTURE_COMPARE_MODE, GL32.GL_NONE); + GlStateManager._texParameter(GL32.GL_TEXTURE_2D, GL32.GL_TEXTURE_WRAP_S, GL32.GL_CLAMP_TO_EDGE); + GlStateManager._texParameter(GL32.GL_TEXTURE_2D, GL32.GL_TEXTURE_WRAP_T, GL32.GL_CLAMP_TO_EDGE); + + } + + public void generate() { + var mainRenderTarget = Minecraft.getInstance() + .getMainRenderTarget(); + + int width = mainRenderTarget.width; + int height = mainRenderTarget.height; + + int mipLevels = getImageMipLevels(width, height); + + createPyramidMips(mipLevels, width, height); + + int depthBufferId = mainRenderTarget.getDepthTextureId(); + + GlStateManager._bindTexture(depthBufferId); + + GL46.glMemoryBarrier(GL46.GL_FRAMEBUFFER_BARRIER_BIT); + + GL46.glActiveTexture(GL32.GL_TEXTURE1); + + depthReduceProgram.bind(); + + for (int i = 0; i < mipLevels; i++) { + int mipWidth = Math.max(1, width >> i); + int mipHeight = Math.max(1, height >> i); + + int srcTexture = (i == 0) ? depthBufferId : pyramidTextureId; + GL46.glBindTexture(GL32.GL_TEXTURE_2D, srcTexture); + + GL46.glBindImageTexture(0, pyramidTextureId, i, false, 0, GL32.GL_WRITE_ONLY, GL32.GL_R32F); + + depthReduceProgram.setUVec2("imageSize", mipWidth, mipHeight); + depthReduceProgram.setInt("lod", Math.max(0, i - 1)); + + GL46.glDispatchCompute(MoreMath.ceilingDiv(mipWidth, 8), MoreMath.ceilingDiv(mipHeight, 8), 1); + + GL46.glMemoryBarrier(GL46.GL_TEXTURE_FETCH_BARRIER_BIT); + } + } + + public void delete() { + GL32.glDeleteTextures(pyramidTextureId); + } + + private void createPyramidMips(int mipLevels, int width, int height) { + if (lastWidth == width && lastHeight == height) { + return; + } + + lastWidth = width; + lastHeight = height; + + GL32.glBindTexture(GL32.GL_TEXTURE_2D, pyramidTextureId); + + for (int i = 0; i < mipLevels; i++) { + int mipWidth = Math.max(1, width >> (i + 1)); + int mipHeight = Math.max(1, height >> (i + 1)); + + GL32.glTexImage2D(GL32.GL_TEXTURE_2D, i, GL32.GL_R32F, mipWidth, mipHeight, 0, GL32.GL_RED, GL32.GL_FLOAT, 0); + } + } + + private static int getImageMipLevels(int width, int height) { + int result = 1; + + while (width > 2 && height > 2) { + result++; + width /= 2; + height /= 2; + } + + return result; + } +} diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectDrawManager.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectDrawManager.java index adabbf653..494403045 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectDrawManager.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectDrawManager.java @@ -12,6 +12,8 @@ import java.util.HashMap; import java.util.List; import java.util.Map; +import org.lwjgl.opengl.GL46; + import dev.engine_room.flywheel.api.backend.Engine; import dev.engine_room.flywheel.api.instance.Instance; import dev.engine_room.flywheel.api.instance.InstanceType; @@ -46,6 +48,8 @@ public class IndirectDrawManager extends DrawManager> { private final LightBuffers lightBuffers; private final MatrixBuffer matrixBuffer; + private final DepthPyramid depthPyramid; + private boolean needsBarrier = false; public IndirectDrawManager(IndirectPrograms programs) { @@ -58,6 +62,8 @@ public class IndirectDrawManager extends DrawManager> { meshPool.bind(vertexArray); lightBuffers = new LightBuffers(); matrixBuffer = new MatrixBuffer(); + + depthPyramid = new DepthPyramid(programs.getDepthReduceProgram()); } @Override @@ -136,6 +142,8 @@ public class IndirectDrawManager extends DrawManager> { stagingBuffer.flush(); + depthPyramid.generate(); + // We could probably save some driver calls here when there are // actually zero instances, but that feels like a very rare case @@ -143,6 +151,9 @@ public class IndirectDrawManager extends DrawManager> { matrixBuffer.bind(); + GL46.glActiveTexture(GL46.GL_TEXTURE0); + GL46.glBindTexture(GL46.GL_TEXTURE_2D, depthPyramid.pyramidTextureId); + for (var group : cullingGroups.values()) { group.dispatchCull(); } diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/uniform/FrameUniforms.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/uniform/FrameUniforms.java index 33bb81901..b19370099 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/uniform/FrameUniforms.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/uniform/FrameUniforms.java @@ -17,7 +17,7 @@ import net.minecraft.world.level.Level; import net.minecraft.world.phys.Vec3; public final class FrameUniforms extends UniformWriter { - private static final int SIZE = 96 + 64 * 9 + 16 * 5 + 8 * 2 + 8 + 4 * 10; + private static final int SIZE = 96 + 64 * 9 + 16 * 5 + 8 * 2 + 8 + 4 * 16; static final UniformBuffer BUFFER = new UniformBuffer(Uniforms.FRAME_INDEX, SIZE); private static final Matrix4f VIEW = new Matrix4f(); @@ -112,6 +112,8 @@ public final class FrameUniforms extends UniformWriter { ptr = writeInt(ptr, debugMode); + ptr = writeCullData(ptr); + firstWrite = false; BUFFER.markDirty(); } @@ -179,6 +181,18 @@ public final class FrameUniforms extends UniformWriter { return writeInFluidAndBlock(ptr, level, blockPos, cameraPos); } + private static long writeCullData(long ptr) { + ptr = writeFloat(ptr, 0.05F); // zNear + ptr = writeFloat(ptr, Minecraft.getInstance().gameRenderer.getDepthFar()); // zFar + ptr = writeFloat(ptr, PROJECTION.m00()); // P00 + ptr = writeFloat(ptr, PROJECTION.m11()); // P11 + ptr = writeFloat(ptr, Minecraft.getInstance().getMainRenderTarget().width >> 1); // pyramidWidth + ptr = writeFloat(ptr, Minecraft.getInstance().getMainRenderTarget().height >> 1); // pyramidHeight + ptr = writeInt(ptr, 0); // useMin + + return ptr; + } + /** * Writes the frustum planes of the given projection matrix to the given buffer.

* Uses a different format that is friendly towards an optimized instruction-parallel diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/gl/shader/GlProgram.java b/common/src/backend/java/dev/engine_room/flywheel/backend/gl/shader/GlProgram.java index b221fdddf..9438ef355 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/gl/shader/GlProgram.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/gl/shader/GlProgram.java @@ -11,6 +11,7 @@ import static org.lwjgl.opengl.GL20.glUniform4f; import static org.lwjgl.opengl.GL20.glUniformMatrix3fv; import static org.lwjgl.opengl.GL20.glUniformMatrix4fv; import static org.lwjgl.opengl.GL30.glUniform1ui; +import static org.lwjgl.opengl.GL30.glUniform2ui; import static org.lwjgl.opengl.GL31.GL_INVALID_INDEX; import static org.lwjgl.opengl.GL31.glGetUniformBlockIndex; import static org.lwjgl.opengl.GL31.glUniformBlockBinding; @@ -118,6 +119,16 @@ public class GlProgram extends GlObject { glUniform1ui(uniform, value); } + public void setUVec2(String name, int x, int y) { + int uniform = getUniformLocation(name); + + if (uniform < 0) { + return; + } + + glUniform2ui(uniform, x, y); + } + public void setInt(String glslName, int value) { int uniform = getUniformLocation(glslName); diff --git a/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/cull.glsl b/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/cull.glsl index e128b0daf..6d8be7aaf 100644 --- a/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/cull.glsl +++ b/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/cull.glsl @@ -23,10 +23,12 @@ layout(std430, binding = _FLW_MODEL_BUFFER_BINDING) restrict buffer ModelBuffer ModelDescriptor _flw_models[]; }; -layout(std430, binding = _FLW_MATRIX_BUFFER_BINDING) restrict buffer MatrixBuffer { +layout(std430, binding = _FLW_MATRIX_BUFFER_BINDING) restrict readonly buffer MatrixBuffer { Matrices _flw_matrices[]; }; +layout(binding = 0) uniform sampler2D _flw_depthPyramid; + // Disgustingly vectorized sphere frustum intersection taking advantage of ahead of time packing. // Only uses 6 fmas and some boolean ops. // See also: @@ -40,6 +42,28 @@ bool _flw_testSphere(vec3 center, float radius) { return all(xyInside) && all(zInside); } +bool projectSphere(vec3 c, float r, float znear, float P00, float P11, out vec4 aabb) { + if (c.z > r + znear) { + return false; + } + + vec3 cr = c * r; + float czr2 = c.z * c.z - r * r; + + float vx = sqrt(c.x * c.x + czr2); + float minx = (vx * c.x - cr.z) / (vx * c.z + cr.x); + float maxx = (vx * c.x + cr.z) / (vx * c.z - cr.x); + + float vy = sqrt(c.y * c.y + czr2); + float miny = (vy * c.y - cr.z) / (vy * c.z + cr.y); + float maxy = (vy * c.y + cr.z) / (vy * c.z - cr.y); + + aabb = vec4(minx * P00, miny * P11, maxx * P00, maxy * P11); + aabb = aabb.xwzy * vec4(-0.5f, -0.5f, -0.5f, -0.5f) + vec4(0.5f); // clip space -> uv space + + return true; +} + bool _flw_isVisible(uint instanceIndex, uint modelIndex) { uint matrixIndex = _flw_models[modelIndex].matrixIndex; BoundingSphere sphere = _flw_models[modelIndex].boundingSphere; @@ -56,7 +80,38 @@ bool _flw_isVisible(uint instanceIndex, uint modelIndex) { transformBoundingSphere(_flw_matrices[matrixIndex].pose, center, radius); } - return _flw_testSphere(center, radius); + bool isVisible = _flw_testSphere(center, radius); + + if (isVisible) { + transformBoundingSphere(flw_view, center, radius); + + vec4 aabb; + if (projectSphere(center, radius, _flw_cullData.znear, _flw_cullData.P00, _flw_cullData.P11, aabb)) + { + float width = (aabb.z - aabb.x) * _flw_cullData.pyramidWidth; + float height = (aabb.w - aabb.y) * _flw_cullData.pyramidHeight; + + float level = floor(log2(max(width, height))); + + float depth01 = textureLod(_flw_depthPyramid, aabb.xw, level).r; + float depth11 = textureLod(_flw_depthPyramid, aabb.zw, level).r; + float depth10 = textureLod(_flw_depthPyramid, aabb.zy, level).r; + float depth00 = textureLod(_flw_depthPyramid, aabb.xy, level).r; + + float depth; + if (_flw_cullData.useMin == 0) { + depth = max(max(depth00, depth01), max(depth10, depth11)); + } else { + depth = min(min(depth00, depth01), min(depth10, depth11)); + } + + float depthSphere = 1. + _flw_cullData.znear / (center.z + radius); + + isVisible = isVisible && depthSphere <= depth; + } + } + + return isVisible; } void main() { diff --git a/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/depth_reduce.glsl b/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/depth_reduce.glsl new file mode 100644 index 000000000..42bcd7f4e --- /dev/null +++ b/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/depth_reduce.glsl @@ -0,0 +1,29 @@ +layout(local_size_x = 8, local_size_y = 8) in; + +layout(binding = 0, r32f) uniform writeonly image2D outImage; +layout(binding = 1) uniform sampler2D inImage; + +uniform uvec2 imageSize; +uniform int lod; + +uniform int useMin = 0; + +void main() { + uvec2 pos = gl_GlobalInvocationID.xy; + + ivec2 samplePos = ivec2(pos) * 2; + + float depth01 = texelFetchOffset(inImage, samplePos, lod, ivec2(0, 1)).r; + float depth11 = texelFetchOffset(inImage, samplePos, lod, ivec2(1, 1)).r; + float depth10 = texelFetchOffset(inImage, samplePos, lod, ivec2(1, 0)).r; + float depth00 = texelFetchOffset(inImage, samplePos, lod, ivec2(0, 0)).r; + + float depth; + if (useMin == 0) { + depth = max(max(depth00, depth01), max(depth10, depth11)); + } else { + depth = min(min(depth00, depth01), min(depth10, depth11)); + } + + imageStore(outImage, ivec2(pos), vec4(depth)); +} diff --git a/common/src/backend/resources/assets/flywheel/flywheel/internal/uniforms/frame.glsl b/common/src/backend/resources/assets/flywheel/flywheel/internal/uniforms/frame.glsl index 4b3cfe69f..4ce722400 100644 --- a/common/src/backend/resources/assets/flywheel/flywheel/internal/uniforms/frame.glsl +++ b/common/src/backend/resources/assets/flywheel/flywheel/internal/uniforms/frame.glsl @@ -9,6 +9,16 @@ struct FrustumPlanes { vec2 zW; // }; +struct _FlwCullData { + float znear; + float zfar; + float P00; + float P11; + float pyramidWidth; + float pyramidHeight; + uint useMin; +}; + layout(std140) uniform _FlwFrameUniforms { FrustumPlanes flw_frustumPlanes; @@ -47,6 +57,8 @@ layout(std140) uniform _FlwFrameUniforms { uint flw_cameraInBlock; uint _flw_debugMode; + + _FlwCullData _flw_cullData; }; #define flw_renderOrigin (_flw_renderOrigin.xyz) From 074ee34dd4106885f4b6dedfeb54c3060a449200 Mon Sep 17 00:00:00 2001 From: Jozufozu Date: Thu, 5 Sep 2024 12:38:05 -0500 Subject: [PATCH 06/17] The depths of the rabbit hole - Fix mip levels being half the size they should be - Use the next lowest po2 from the main render target size for mip 0 - Map from dst texel to src texel rather than naively multiply by 2 - Clamp the estimated mip level in the cull shader - Use texel fetches in the cull shader (not sure if necessary?) --- .../backend/engine/indirect/DepthPyramid.java | 28 ++++++++++++------- .../backend/engine/uniform/FrameUniforms.java | 17 ++++++++--- .../flywheel/internal/indirect/cull.glsl | 16 +++++++---- .../internal/indirect/depth_reduce.glsl | 6 ++-- .../flywheel/internal/uniforms/frame.glsl | 1 + 5 files changed, 47 insertions(+), 21 deletions(-) diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/DepthPyramid.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/DepthPyramid.java index 30e9524c7..cb17f5276 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/DepthPyramid.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/DepthPyramid.java @@ -35,8 +35,8 @@ public class DepthPyramid { var mainRenderTarget = Minecraft.getInstance() .getMainRenderTarget(); - int width = mainRenderTarget.width; - int height = mainRenderTarget.height; + int width = mip0Size(mainRenderTarget.width); + int height = mip0Size(mainRenderTarget.height); int mipLevels = getImageMipLevels(width, height); @@ -53,15 +53,15 @@ public class DepthPyramid { depthReduceProgram.bind(); for (int i = 0; i < mipLevels; i++) { - int mipWidth = Math.max(1, width >> i); - int mipHeight = Math.max(1, height >> i); + int mipWidth = mipSize(width, i); + int mipHeight = mipSize(height, i); int srcTexture = (i == 0) ? depthBufferId : pyramidTextureId; GL46.glBindTexture(GL32.GL_TEXTURE_2D, srcTexture); GL46.glBindImageTexture(0, pyramidTextureId, i, false, 0, GL32.GL_WRITE_ONLY, GL32.GL_R32F); - depthReduceProgram.setUVec2("imageSize", mipWidth, mipHeight); + depthReduceProgram.setVec2("imageSize", mipWidth, mipHeight); depthReduceProgram.setInt("lod", Math.max(0, i - 1)); GL46.glDispatchCompute(MoreMath.ceilingDiv(mipWidth, 8), MoreMath.ceilingDiv(mipHeight, 8), 1); @@ -85,20 +85,28 @@ public class DepthPyramid { GL32.glBindTexture(GL32.GL_TEXTURE_2D, pyramidTextureId); for (int i = 0; i < mipLevels; i++) { - int mipWidth = Math.max(1, width >> (i + 1)); - int mipHeight = Math.max(1, height >> (i + 1)); + int mipWidth = mipSize(width, i); + int mipHeight = mipSize(height, i); GL32.glTexImage2D(GL32.GL_TEXTURE_2D, i, GL32.GL_R32F, mipWidth, mipHeight, 0, GL32.GL_RED, GL32.GL_FLOAT, 0); } } - private static int getImageMipLevels(int width, int height) { + public static int mipSize(int mip0Size, int level) { + return Math.max(1, mip0Size >> level); + } + + public static int mip0Size(int screenSize) { + return Integer.highestOneBit(screenSize); + } + + public static int getImageMipLevels(int width, int height) { int result = 1; while (width > 2 && height > 2) { result++; - width /= 2; - height /= 2; + width >>= 1; + height >>= 1; } return result; diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/uniform/FrameUniforms.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/uniform/FrameUniforms.java index b19370099..3f7b8e871 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/uniform/FrameUniforms.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/uniform/FrameUniforms.java @@ -8,6 +8,7 @@ import org.lwjgl.system.MemoryUtil; import dev.engine_room.flywheel.api.RenderContext; import dev.engine_room.flywheel.api.visualization.VisualizationManager; +import dev.engine_room.flywheel.backend.engine.indirect.DepthPyramid; import dev.engine_room.flywheel.backend.mixin.LevelRendererAccessor; import net.minecraft.client.Camera; import net.minecraft.client.Minecraft; @@ -17,7 +18,7 @@ import net.minecraft.world.level.Level; import net.minecraft.world.phys.Vec3; public final class FrameUniforms extends UniformWriter { - private static final int SIZE = 96 + 64 * 9 + 16 * 5 + 8 * 2 + 8 + 4 * 16; + private static final int SIZE = 96 + 64 * 9 + 16 * 5 + 8 * 2 + 8 + 4 * 17; static final UniformBuffer BUFFER = new UniformBuffer(Uniforms.FRAME_INDEX, SIZE); private static final Matrix4f VIEW = new Matrix4f(); @@ -182,12 +183,20 @@ public final class FrameUniforms extends UniformWriter { } private static long writeCullData(long ptr) { + var mc = Minecraft.getInstance(); + var mainRenderTarget = mc.getMainRenderTarget(); + + int pyramidWidth = DepthPyramid.mip0Size(mainRenderTarget.width); + int pyramidHeight = DepthPyramid.mip0Size(mainRenderTarget.height); + int pyramidDepth = DepthPyramid.getImageMipLevels(pyramidWidth, pyramidHeight); + ptr = writeFloat(ptr, 0.05F); // zNear - ptr = writeFloat(ptr, Minecraft.getInstance().gameRenderer.getDepthFar()); // zFar + ptr = writeFloat(ptr, mc.gameRenderer.getDepthFar()); // zFar ptr = writeFloat(ptr, PROJECTION.m00()); // P00 ptr = writeFloat(ptr, PROJECTION.m11()); // P11 - ptr = writeFloat(ptr, Minecraft.getInstance().getMainRenderTarget().width >> 1); // pyramidWidth - ptr = writeFloat(ptr, Minecraft.getInstance().getMainRenderTarget().height >> 1); // pyramidHeight + ptr = writeFloat(ptr, pyramidWidth); // pyramidWidth + ptr = writeFloat(ptr, pyramidHeight); // pyramidHeight + ptr = writeInt(ptr, pyramidDepth - 1); // pyramidLevels ptr = writeInt(ptr, 0); // useMin return ptr; diff --git a/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/cull.glsl b/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/cull.glsl index 6d8be7aaf..1b6436a42 100644 --- a/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/cull.glsl +++ b/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/cull.glsl @@ -91,12 +91,18 @@ bool _flw_isVisible(uint instanceIndex, uint modelIndex) { float width = (aabb.z - aabb.x) * _flw_cullData.pyramidWidth; float height = (aabb.w - aabb.y) * _flw_cullData.pyramidHeight; - float level = floor(log2(max(width, height))); + int level = clamp(0, int(ceil(log2(max(width, height)))), _flw_cullData.pyramidLevels); - float depth01 = textureLod(_flw_depthPyramid, aabb.xw, level).r; - float depth11 = textureLod(_flw_depthPyramid, aabb.zw, level).r; - float depth10 = textureLod(_flw_depthPyramid, aabb.zy, level).r; - float depth00 = textureLod(_flw_depthPyramid, aabb.xy, level).r; + ivec2 levelSize = textureSize(_flw_depthPyramid, level); + + ivec4 levelSizePair = ivec4(levelSize, levelSize); + + ivec4 bounds = ivec4(aabb * vec4(levelSizePair)); + + float depth01 = texelFetch(_flw_depthPyramid, bounds.xw, level).r; + float depth11 = texelFetch(_flw_depthPyramid, bounds.zw, level).r; + float depth10 = texelFetch(_flw_depthPyramid, bounds.zy, level).r; + float depth00 = texelFetch(_flw_depthPyramid, bounds.xy, level).r; float depth; if (_flw_cullData.useMin == 0) { diff --git a/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/depth_reduce.glsl b/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/depth_reduce.glsl index 42bcd7f4e..49bbbf947 100644 --- a/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/depth_reduce.glsl +++ b/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/depth_reduce.glsl @@ -3,7 +3,7 @@ layout(local_size_x = 8, local_size_y = 8) in; layout(binding = 0, r32f) uniform writeonly image2D outImage; layout(binding = 1) uniform sampler2D inImage; -uniform uvec2 imageSize; +uniform vec2 imageSize; uniform int lod; uniform int useMin = 0; @@ -11,7 +11,9 @@ uniform int useMin = 0; void main() { uvec2 pos = gl_GlobalInvocationID.xy; - ivec2 samplePos = ivec2(pos) * 2; + // Map the output texel to an input texel. Properly do the division because generating mip0 maps from the actual + // full resolution depth buffer and the aspect ratio may be different from our Po2 pyramid. + ivec2 samplePos = ivec2(floor(vec2(pos) * vec2(textureSize(inImage, lod)) / imageSize)); float depth01 = texelFetchOffset(inImage, samplePos, lod, ivec2(0, 1)).r; float depth11 = texelFetchOffset(inImage, samplePos, lod, ivec2(1, 1)).r; diff --git a/common/src/backend/resources/assets/flywheel/flywheel/internal/uniforms/frame.glsl b/common/src/backend/resources/assets/flywheel/flywheel/internal/uniforms/frame.glsl index 4ce722400..05b3110dc 100644 --- a/common/src/backend/resources/assets/flywheel/flywheel/internal/uniforms/frame.glsl +++ b/common/src/backend/resources/assets/flywheel/flywheel/internal/uniforms/frame.glsl @@ -16,6 +16,7 @@ struct _FlwCullData { float P11; float pyramidWidth; float pyramidHeight; + int pyramidLevels; uint useMin; }; From ce51e1f5346fd10b32c4cc9d1588ec55acaa8dbe Mon Sep 17 00:00:00 2001 From: Jozufozu Date: Thu, 5 Sep 2024 13:45:24 -0500 Subject: [PATCH 07/17] Near stability - Fix near plane rejection logic - Fix lod clamp --- .../assets/flywheel/flywheel/internal/indirect/cull.glsl | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/cull.glsl b/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/cull.glsl index 1b6436a42..c1ab067bd 100644 --- a/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/cull.glsl +++ b/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/cull.glsl @@ -43,7 +43,8 @@ bool _flw_testSphere(vec3 center, float radius) { } bool projectSphere(vec3 c, float r, float znear, float P00, float P11, out vec4 aabb) { - if (c.z > r + znear) { + // Closest point on the sphere is between the camera and the near plane, don't even attempt to cull. + if (c.z + r > -znear) { return false; } @@ -91,7 +92,7 @@ bool _flw_isVisible(uint instanceIndex, uint modelIndex) { float width = (aabb.z - aabb.x) * _flw_cullData.pyramidWidth; float height = (aabb.w - aabb.y) * _flw_cullData.pyramidHeight; - int level = clamp(0, int(ceil(log2(max(width, height)))), _flw_cullData.pyramidLevels); + int level = clamp(int(ceil(log2(max(width, height)))), 0, _flw_cullData.pyramidLevels); ivec2 levelSize = textureSize(_flw_depthPyramid, level); From 77d64aa5a2e83037687c20217243a3c14086a5fe Mon Sep 17 00:00:00 2001 From: Jozufozu Date: Fri, 6 Sep 2024 15:49:32 -0500 Subject: [PATCH 08/17] I can see you - Add visibility buffer fbo attachment - Write instance id to visbuffer - Move instance id in/out from common to impl shaders --- .../engine/indirect/IndirectDrawManager.java | 10 +++ .../engine/indirect/VisibilityBuffer.java | 87 +++++++++++++++++++ .../flywheel/flywheel/internal/common.frag | 8 +- .../flywheel/flywheel/internal/common.vert | 6 +- .../flywheel/internal/indirect/main.frag | 8 +- .../flywheel/internal/indirect/main.vert | 11 ++- .../internal/indirect/read_visibility.glsl | 25 ++++++ .../flywheel/internal/instancing/main.frag | 4 +- .../flywheel/internal/instancing/main.vert | 6 +- 9 files changed, 151 insertions(+), 14 deletions(-) create mode 100644 common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/VisibilityBuffer.java create mode 100644 common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/read_visibility.glsl diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectDrawManager.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectDrawManager.java index 494403045..7f71d72d0 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectDrawManager.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectDrawManager.java @@ -49,6 +49,7 @@ public class IndirectDrawManager extends DrawManager> { private final MatrixBuffer matrixBuffer; private final DepthPyramid depthPyramid; + private final VisibilityBuffer visibilityBuffer; private boolean needsBarrier = false; @@ -64,6 +65,7 @@ public class IndirectDrawManager extends DrawManager> { matrixBuffer = new MatrixBuffer(); depthPyramid = new DepthPyramid(programs.getDepthReduceProgram()); + visibilityBuffer = new VisibilityBuffer(); } @Override @@ -100,6 +102,8 @@ public class IndirectDrawManager extends DrawManager> { matrixBuffer.bind(); Uniforms.bindAll(); + visibilityBuffer.attach(); + if (needsBarrier) { glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT); needsBarrier = false; @@ -111,6 +115,8 @@ public class IndirectDrawManager extends DrawManager> { MaterialRenderState.reset(); TextureBinder.resetLightAndOverlay(); + + visibilityBuffer.detach(); } } @@ -185,6 +191,10 @@ public class IndirectDrawManager extends DrawManager> { crumblingDrawBuffer.delete(); programs.release(); + + depthPyramid.delete(); + + visibilityBuffer.delete(); } public void renderCrumbling(List crumblingBlocks) { diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/VisibilityBuffer.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/VisibilityBuffer.java new file mode 100644 index 000000000..025d7bf1f --- /dev/null +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/VisibilityBuffer.java @@ -0,0 +1,87 @@ +package dev.engine_room.flywheel.backend.engine.indirect; + +import org.lwjgl.opengl.GL30; +import org.lwjgl.opengl.GL32; +import org.lwjgl.opengl.GL46; + +import com.mojang.blaze3d.platform.GlStateManager; + +import dev.engine_room.flywheel.backend.FlwBackend; +import dev.engine_room.flywheel.backend.gl.GlTextureUnit; +import it.unimi.dsi.fastutil.ints.IntArraySet; +import it.unimi.dsi.fastutil.ints.IntSet; +import net.minecraft.client.Minecraft; + +public class VisibilityBuffer { + private static final int ATTACHMENT = GL30.GL_COLOR_ATTACHMENT1; + + private final int textureId; + + private int lastWidth = -1; + private int lastHeight = -1; + + private final IntSet attached = new IntArraySet(); + + public VisibilityBuffer() { + textureId = GL32.glGenTextures(); + + GlStateManager._bindTexture(textureId); + GlStateManager._texParameter(GL32.GL_TEXTURE_2D, GL32.GL_TEXTURE_MIN_FILTER, GL32.GL_NEAREST); + GlStateManager._texParameter(GL32.GL_TEXTURE_2D, GL32.GL_TEXTURE_MAG_FILTER, GL32.GL_NEAREST); + GlStateManager._texParameter(GL32.GL_TEXTURE_2D, GL32.GL_TEXTURE_WRAP_S, GL32.GL_CLAMP_TO_EDGE); + GlStateManager._texParameter(GL32.GL_TEXTURE_2D, GL32.GL_TEXTURE_WRAP_T, GL32.GL_CLAMP_TO_EDGE); + } + + public void attach() { + // TODO: clear the vis buffer. maybe do this when we read it? + + var mainRenderTarget = Minecraft.getInstance() + .getMainRenderTarget(); + + setupTexture(mainRenderTarget.width, mainRenderTarget.height); + + if (attached.add(mainRenderTarget.frameBufferId)) { + GL46.glNamedFramebufferTexture(mainRenderTarget.frameBufferId, ATTACHMENT, textureId, 0); + + try { + mainRenderTarget.checkStatus(); + } catch (Exception e) { + FlwBackend.LOGGER.error("Error attaching visbuffer", e); + } + } + + // Enable writes + GL46.glNamedFramebufferDrawBuffers(mainRenderTarget.frameBufferId, new int[] { GL30.GL_COLOR_ATTACHMENT0, ATTACHMENT }); + } + + public void detach() { + var mainRenderTarget = Minecraft.getInstance() + .getMainRenderTarget(); + + // Disable writes + GL46.glNamedFramebufferDrawBuffers(mainRenderTarget.frameBufferId, new int[] { GL30.GL_COLOR_ATTACHMENT0 }); + } + + public void delete() { + GL32.glDeleteTextures(textureId); + } + + private void setupTexture(int width, int height) { + if (lastWidth == width && lastHeight == height) { + return; + } + + // Need to rebind to all fbos because an attachment becomes incomplete when it's resized + attached.clear(); + + lastWidth = width; + lastHeight = height; + + GlTextureUnit.T0.makeActive(); + GlStateManager._bindTexture(textureId); + + // TODO: DSA texture storage? + GL32.glTexImage2D(GL32.GL_TEXTURE_2D, 0, GL32.GL_R32UI, width, height, 0, GL32.GL_RED_INTEGER, GL32.GL_UNSIGNED_INT, 0); + GlStateManager._bindTexture(0); + } +} diff --git a/common/src/backend/resources/assets/flywheel/flywheel/internal/common.frag b/common/src/backend/resources/assets/flywheel/flywheel/internal/common.frag index aac8334b3..6e367fd3e 100644 --- a/common/src/backend/resources/assets/flywheel/flywheel/internal/common.frag +++ b/common/src/backend/resources/assets/flywheel/flywheel/internal/common.frag @@ -13,9 +13,7 @@ uniform sampler2D _flw_crumblingTex; in vec2 _flw_crumblingTexCoord; #endif -flat in uint _flw_instanceID; - -out vec4 _flw_outputColor; +layout(location = 0) out vec4 _flw_outputColor; float _flw_diffuseFactor() { if (flw_material.diffuse) { @@ -29,7 +27,7 @@ float _flw_diffuseFactor() { } } -void _flw_main() { +void _flw_main(uint instanceID) { flw_sampleColor = texture(flw_diffuseTex, flw_vertexTexCoord); flw_fragColor = flw_vertexColor * flw_sampleColor; flw_fragOverlay = flw_vertexOverlay; @@ -72,7 +70,7 @@ void _flw_main() { color = vec4(flw_vertexNormal * .5 + .5, 1.); break; case 2u: - color = _flw_id2Color(_flw_instanceID); + color = _flw_id2Color(instanceID); break; case 3u: color = vec4(vec2((flw_fragLight * 15.0 + 0.5) / 16.), 0., 1.); diff --git a/common/src/backend/resources/assets/flywheel/flywheel/internal/common.vert b/common/src/backend/resources/assets/flywheel/flywheel/internal/common.vert index 214ab12d2..888114d5d 100644 --- a/common/src/backend/resources/assets/flywheel/flywheel/internal/common.vert +++ b/common/src/backend/resources/assets/flywheel/flywheel/internal/common.vert @@ -71,9 +71,7 @@ mat4 _flw_modelMatrix; mat3 _flw_normalMatrix; #endif -flat out uint _flw_instanceID; - -void _flw_main(in FlwInstance instance, in uint stableInstanceID) { +void _flw_main(in FlwInstance instance) { _flw_layoutVertex(); flw_instanceVertex(instance); flw_materialVertex(); @@ -92,6 +90,4 @@ void _flw_main(in FlwInstance instance, in uint stableInstanceID) { flw_distance = fogDistance(flw_vertexPos.xyz, flw_cameraPos, flw_fogShape); gl_Position = flw_viewProjection * flw_vertexPos; - - _flw_instanceID = stableInstanceID; } diff --git a/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/main.frag b/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/main.frag index 7d528ce24..b0f246634 100644 --- a/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/main.frag +++ b/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/main.frag @@ -4,10 +4,16 @@ flat in uvec3 _flw_packedMaterial; +flat in uint _flw_instanceID; + +layout(location = 1) out uint _flw_out_instanceID; + void main() { _flw_uberMaterialFragmentIndex = _flw_packedMaterial.x; _flw_unpackUint2x16(_flw_packedMaterial.y, _flw_uberFogIndex, _flw_uberCutoutIndex); _flw_unpackMaterialProperties(_flw_packedMaterial.z, flw_material); - _flw_main(); + _flw_main(_flw_instanceID); + + _flw_out_instanceID = _flw_instanceID; } diff --git a/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/main.vert b/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/main.vert index e53dff313..9cc21ce97 100644 --- a/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/main.vert +++ b/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/main.vert @@ -21,8 +21,14 @@ layout(std430, binding = _FLW_MATRIX_BUFFER_BINDING) restrict buffer MatrixBuffe uniform uint _flw_baseDraw; +// We read the visibility buffer for all culling groups into a single shared buffer. +// This offset is used to know where each culling group starts. +uniform uint _flw_globalInstanceIdOffset = 0; + flat out uvec3 _flw_packedMaterial; +flat out uint _flw_instanceID; + void main() { #if __VERSION__ < 460 uint drawIndex = gl_DrawIDARB + _flw_baseDraw; @@ -49,5 +55,8 @@ void main() { #endif FlwInstance instance = _flw_unpackInstance(instanceIndex); - _flw_main(instance, instanceIndex); + _flw_main(instance); + + // Add 1 because a 0 instance id means null. + _flw_instanceID = _flw_globalInstanceIdOffset + instanceIndex + 1; } diff --git a/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/read_visibility.glsl b/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/read_visibility.glsl new file mode 100644 index 000000000..dad817b98 --- /dev/null +++ b/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/read_visibility.glsl @@ -0,0 +1,25 @@ +layout(local_size_x = 8, local_size_y = 8) in; + +layout(binding = 0) uniform usampler2D visBuffer; + +layout(std430) restrict buffer VisibleFlagBuffer { + uint _flw_visibleFlag[]; +}; + +void main() { + uint instanceID = texelFetch(visBuffer, ivec2(gl_GlobalInvocationID.xy), 0).r; + + // Null instance id. + if (instanceID == 0) { + return; + } + + // Adjust for null to find the actual index. + instanceID = instanceID - 1; + + uint index = instanceID >> 5; + + uint mask = 1u << (instanceID & 31u); + + atomicOr(_flw_visibleFlag[index], mask); +} diff --git a/common/src/backend/resources/assets/flywheel/flywheel/internal/instancing/main.frag b/common/src/backend/resources/assets/flywheel/flywheel/internal/instancing/main.frag index cfd7dfea1..d88d748f2 100644 --- a/common/src/backend/resources/assets/flywheel/flywheel/internal/instancing/main.frag +++ b/common/src/backend/resources/assets/flywheel/flywheel/internal/instancing/main.frag @@ -3,10 +3,12 @@ uniform uvec4 _flw_packedMaterial; +flat in uint _flw_instanceID; + void main() { _flw_uberMaterialFragmentIndex = _flw_packedMaterial.y; _flw_unpackUint2x16(_flw_packedMaterial.z, _flw_uberFogIndex, _flw_uberCutoutIndex); _flw_unpackMaterialProperties(_flw_packedMaterial.w, flw_material); - _flw_main(); + _flw_main(_flw_instanceID); } diff --git a/common/src/backend/resources/assets/flywheel/flywheel/internal/instancing/main.vert b/common/src/backend/resources/assets/flywheel/flywheel/internal/instancing/main.vert index 30a863917..bb0a9be65 100644 --- a/common/src/backend/resources/assets/flywheel/flywheel/internal/instancing/main.vert +++ b/common/src/backend/resources/assets/flywheel/flywheel/internal/instancing/main.vert @@ -10,6 +10,8 @@ uniform mat4 _flw_modelMatrixUniform; uniform mat3 _flw_normalMatrixUniform; #endif +flat out uint _flw_instanceID; + void main() { _flw_uberMaterialVertexIndex = _flw_packedMaterial.x; _flw_unpackMaterialProperties(_flw_packedMaterial.w, flw_material); @@ -21,5 +23,7 @@ void main() { _flw_normalMatrix = _flw_normalMatrixUniform; #endif - _flw_main(instance, uint(gl_InstanceID)); + _flw_main(instance); + + _flw_instanceID = gl_InstanceID; } From 9009bfe730f42f2f7330e1d3986f024ea3c7489f Mon Sep 17 00:00:00 2001 From: Jozufozu Date: Sun, 8 Sep 2024 09:57:11 -0500 Subject: [PATCH 09/17] Observe - Actually compile and run visibility read shader - Clear the visbuffer and readbuffer each frame - Track culling group page counts between frames - Fix texture binding issues between visbuffer and depth pyramid - Add early and late cull shaders - Compile early and late shaders separately - Move util shader list to a static field --- .../backend/compile/IndirectPrograms.java | 43 ++++-- .../backend/engine/indirect/DepthPyramid.java | 4 +- .../engine/indirect/IndirectCullingGroup.java | 23 +++- .../engine/indirect/IndirectDrawManager.java | 21 ++- .../engine/indirect/VisibilityBuffer.java | 38 +++++- .../internal/indirect/buffer_bindings.glsl | 3 + .../internal/indirect/early_cull.glsl | 123 +++++++++++++++++ .../flywheel/internal/indirect/late_cull.glsl | 124 ++++++++++++++++++ .../flywheel/internal/indirect/main.vert | 4 +- 9 files changed, 360 insertions(+), 23 deletions(-) create mode 100644 common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/early_cull.glsl create mode 100644 common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/late_cull.glsl diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/compile/IndirectPrograms.java b/common/src/backend/java/dev/engine_room/flywheel/backend/compile/IndirectPrograms.java index df1696657..bb140cf56 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/compile/IndirectPrograms.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/compile/IndirectPrograms.java @@ -27,10 +27,13 @@ import net.minecraft.resources.ResourceLocation; public class IndirectPrograms extends AtomicReferenceCounted { private static final ResourceLocation CULL_SHADER_API_IMPL = Flywheel.rl("internal/indirect/cull_api_impl.glsl"); - private static final ResourceLocation CULL_SHADER_MAIN = Flywheel.rl("internal/indirect/cull.glsl"); + private static final ResourceLocation CULL_SHADER_MAIN = Flywheel.rl("internal/indirect/early_cull.glsl"); + private static final ResourceLocation PASS2_SHADER_MAIN = Flywheel.rl("internal/indirect/late_cull.glsl"); private static final ResourceLocation APPLY_SHADER_MAIN = Flywheel.rl("internal/indirect/apply.glsl"); private static final ResourceLocation SCATTER_SHADER_MAIN = Flywheel.rl("internal/indirect/scatter.glsl"); private static final ResourceLocation DEPTH_REDUCE_SHADER_MAIN = Flywheel.rl("internal/indirect/depth_reduce.glsl"); + private static final ResourceLocation READ_VISIBILITY_SHADER_MAIN = Flywheel.rl("internal/indirect/read_visibility.glsl"); + public static final List UTIL_SHADERS = List.of(APPLY_SHADER_MAIN, SCATTER_SHADER_MAIN, DEPTH_REDUCE_SHADER_MAIN, READ_VISIBILITY_SHADER_MAIN); private static final Compile> CULL = new Compile<>(); private static final Compile UTIL = new Compile<>(); @@ -43,16 +46,20 @@ public class IndirectPrograms extends AtomicReferenceCounted { private final Map pipeline; private final Map, GlProgram> culling; + private final Map, GlProgram> cullPassTwo; private final GlProgram apply; private final GlProgram scatter; private final GlProgram depthReduce; + private final GlProgram readVisibility; - private IndirectPrograms(Map pipeline, Map, GlProgram> culling, GlProgram apply, GlProgram scatter, GlProgram depthReduce) { + private IndirectPrograms(Map pipeline, Map, GlProgram> culling, Map, GlProgram> cullPassTwo, GlProgram apply, GlProgram scatter, GlProgram depthReduce, GlProgram readVisibility) { this.pipeline = pipeline; this.culling = culling; + this.cullPassTwo = cullPassTwo; this.apply = apply; this.scatter = scatter; this.depthReduce = depthReduce; + this.readVisibility = readVisibility; } private static List getExtensions(GlslVersion glslVersion) { @@ -91,23 +98,27 @@ public class IndirectPrograms extends AtomicReferenceCounted { IndirectPrograms newInstance = null; var pipelineCompiler = PipelineCompiler.create(sources, Pipelines.INDIRECT, vertexComponents, fragmentComponents, EXTENSIONS); - var cullingCompiler = createCullingCompiler(sources); + var pass1Compiler = createCullingCompiler(sources, CULL_SHADER_MAIN, "early_cull"); + var pass2Compiler = createCullingCompiler(sources, PASS2_SHADER_MAIN, "late_cull"); var utilCompiler = createUtilCompiler(sources); + var cullingKeys = createCullingKeys(); try { var pipelineResult = pipelineCompiler.compileAndReportErrors(pipelineKeys); - var cullingResult = cullingCompiler.compileAndReportErrors(createCullingKeys()); - var utils = utilCompiler.compileAndReportErrors(List.of(APPLY_SHADER_MAIN, SCATTER_SHADER_MAIN, DEPTH_REDUCE_SHADER_MAIN)); + var pass1Result = pass1Compiler.compileAndReportErrors(cullingKeys); + var pass2Result = pass2Compiler.compileAndReportErrors(cullingKeys); + var utils = utilCompiler.compileAndReportErrors(UTIL_SHADERS); - if (pipelineResult != null && cullingResult != null && utils != null) { - newInstance = new IndirectPrograms(pipelineResult, cullingResult, utils.get(APPLY_SHADER_MAIN), utils.get(SCATTER_SHADER_MAIN), utils.get(DEPTH_REDUCE_SHADER_MAIN)); + if (pipelineResult != null && pass1Result != null && pass2Result != null && utils != null) { + newInstance = new IndirectPrograms(pipelineResult, pass1Result, pass2Result, utils.get(APPLY_SHADER_MAIN), utils.get(SCATTER_SHADER_MAIN), utils.get(DEPTH_REDUCE_SHADER_MAIN), utils.get(READ_VISIBILITY_SHADER_MAIN)); } } catch (Throwable t) { FlwPrograms.LOGGER.error("Failed to compile indirect programs", t); } pipelineCompiler.delete(); - cullingCompiler.delete(); + pass1Compiler.delete(); + pass2Compiler.delete(); utilCompiler.delete(); setInstance(newInstance); @@ -116,19 +127,19 @@ public class IndirectPrograms extends AtomicReferenceCounted { /** * A compiler for cull shaders, parameterized by the instance type. */ - private static CompilationHarness> createCullingCompiler(ShaderSources sources) { + private static CompilationHarness> createCullingCompiler(ShaderSources sources, ResourceLocation main, String name) { return CULL.program() .link(CULL.shader(GlCompat.MAX_GLSL_VERSION, ShaderType.COMPUTE) - .nameMapper(instanceType -> "culling/" + ResourceUtil.toDebugFileNameNoExtension(instanceType.cullShader())) + .nameMapper(instanceType -> name + "/" + ResourceUtil.toDebugFileNameNoExtension(instanceType.cullShader())) .requireExtensions(COMPUTE_EXTENSIONS) .define("_FLW_SUBGROUP_SIZE", GlCompat.SUBGROUP_SIZE) .withResource(CULL_SHADER_API_IMPL) .withComponent(InstanceStructComponent::new) .withResource(InstanceType::cullShader) .withComponent(SsboInstanceComponent::new) - .withResource(CULL_SHADER_MAIN)) + .withResource(main)) .postLink((key, program) -> Uniforms.setUniformBlockBindings(program)) - .harness("culling", sources); + .harness(name, sources); } /** @@ -179,6 +190,10 @@ public class IndirectPrograms extends AtomicReferenceCounted { return culling.get(instanceType); } + public GlProgram getCullPassTwoProgram(InstanceType instanceType) { + return cullPassTwo.get(instanceType); + } + public GlProgram getApplyProgram() { return apply; } @@ -191,6 +206,10 @@ public class IndirectPrograms extends AtomicReferenceCounted { return depthReduce; } + public GlProgram getReadVisibilityProgram() { + return readVisibility; + } + @Override protected void _delete() { pipeline.values() diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/DepthPyramid.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/DepthPyramid.java index cb17f5276..49afe0776 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/DepthPyramid.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/DepthPyramid.java @@ -5,6 +5,7 @@ import org.lwjgl.opengl.GL46; import com.mojang.blaze3d.platform.GlStateManager; +import dev.engine_room.flywheel.backend.gl.GlTextureUnit; import dev.engine_room.flywheel.backend.gl.shader.GlProgram; import dev.engine_room.flywheel.lib.math.MoreMath; import net.minecraft.client.Minecraft; @@ -44,6 +45,7 @@ public class DepthPyramid { int depthBufferId = mainRenderTarget.getDepthTextureId(); + GlTextureUnit.T1.makeActive(); GlStateManager._bindTexture(depthBufferId); GL46.glMemoryBarrier(GL46.GL_FRAMEBUFFER_BARRIER_BIT); @@ -57,7 +59,7 @@ public class DepthPyramid { int mipHeight = mipSize(height, i); int srcTexture = (i == 0) ? depthBufferId : pyramidTextureId; - GL46.glBindTexture(GL32.GL_TEXTURE_2D, srcTexture); + GlStateManager._bindTexture(srcTexture); GL46.glBindImageTexture(0, pyramidTextureId, i, false, 0, GL32.GL_WRITE_ONLY, GL32.GL_R32F); diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectCullingGroup.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectCullingGroup.java index 961d5b2ce..45b764404 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectCullingGroup.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectCullingGroup.java @@ -48,7 +48,13 @@ public class IndirectCullingGroup { private boolean needsDrawBarrier; private boolean needsDrawSort; - private int instanceCountThisFrame; + public int instanceCountThisFrame; + + private int pagesLastFrame = 0; + private int pagesThisFrame = 0; + + private int visibilityWriteOffsetPages = 0; + private int visibilityReadOffsetPages = 0; IndirectCullingGroup(InstanceType instanceType, IndirectPrograms programs) { this.instanceType = instanceType; @@ -85,6 +91,17 @@ public class IndirectCullingGroup { } } + public int flipVisibilityOffsets(int visibilityWriteOffsetPages) { + this.visibilityReadOffsetPages = this.visibilityWriteOffsetPages; + this.visibilityWriteOffsetPages = visibilityWriteOffsetPages; + + pagesLastFrame = pagesThisFrame; + + pagesThisFrame = buffers.objectStorage.capacity(); + + return pagesThisFrame; + } + public void upload(StagingBuffer stagingBuffer) { if (nothingToDo()) { return; @@ -118,6 +135,8 @@ public class IndirectCullingGroup { Uniforms.bindAll(); cullProgram.bind(); + cullProgram.setUInt("_flw_visibilityReadOffsetPages", visibilityReadOffsetPages); + buffers.bindForCompute(); glDispatchCompute(buffers.objectStorage.capacity(), 1, 1); } @@ -211,6 +230,8 @@ public class IndirectCullingGroup { // Don't need to do this unless the program changes. drawProgram.bind(); baseDrawUniformLoc = drawProgram.getUniformLocation("_flw_baseDraw"); + + drawProgram.setUInt("_flw_visibilityWriteOffsetInstances", visibilityWriteOffsetPages << ObjectStorage.LOG_2_PAGE_SIZE); } glUniform1ui(baseDrawUniformLoc, multiDraw.start); diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectDrawManager.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectDrawManager.java index 7f71d72d0..b0083f241 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectDrawManager.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectDrawManager.java @@ -12,7 +12,7 @@ import java.util.HashMap; import java.util.List; import java.util.Map; -import org.lwjgl.opengl.GL46; +import com.mojang.blaze3d.platform.GlStateManager; import dev.engine_room.flywheel.api.backend.Engine; import dev.engine_room.flywheel.api.instance.Instance; @@ -31,6 +31,7 @@ import dev.engine_room.flywheel.backend.engine.TextureBinder; import dev.engine_room.flywheel.backend.engine.embed.EnvironmentStorage; import dev.engine_room.flywheel.backend.engine.uniform.Uniforms; import dev.engine_room.flywheel.backend.gl.GlStateTracker; +import dev.engine_room.flywheel.backend.gl.GlTextureUnit; import dev.engine_room.flywheel.backend.gl.array.GlVertexArray; import dev.engine_room.flywheel.backend.gl.buffer.GlBuffer; import dev.engine_room.flywheel.backend.gl.buffer.GlBufferType; @@ -51,6 +52,8 @@ public class IndirectDrawManager extends DrawManager> { private final DepthPyramid depthPyramid; private final VisibilityBuffer visibilityBuffer; + private int totalPagesLastFrame = 0; + private boolean needsBarrier = false; public IndirectDrawManager(IndirectPrograms programs) { @@ -65,7 +68,7 @@ public class IndirectDrawManager extends DrawManager> { matrixBuffer = new MatrixBuffer(); depthPyramid = new DepthPyramid(programs.getDepthReduceProgram()); - visibilityBuffer = new VisibilityBuffer(); + visibilityBuffer = new VisibilityBuffer(programs.getReadVisibilityProgram()); } @Override @@ -128,12 +131,20 @@ public class IndirectDrawManager extends DrawManager> { group.flushInstancers(); } + visibilityBuffer.read(totalPagesLastFrame); + visibilityBuffer.clear(); + cullingGroups.values() .removeIf(IndirectCullingGroup::checkEmptyAndDelete); instancers.values() .removeIf(instancer -> instancer.instanceCount() == 0); + int totalPagesThisFrame = 0; + for (var group : cullingGroups.values()) { + totalPagesThisFrame += group.flipVisibilityOffsets(totalPagesThisFrame); + } + meshPool.flush(); stagingBuffer.reclaim(); @@ -157,8 +168,8 @@ public class IndirectDrawManager extends DrawManager> { matrixBuffer.bind(); - GL46.glActiveTexture(GL46.GL_TEXTURE0); - GL46.glBindTexture(GL46.GL_TEXTURE_2D, depthPyramid.pyramidTextureId); + GlTextureUnit.T0.makeActive(); + GlStateManager._bindTexture(depthPyramid.pyramidTextureId); for (var group : cullingGroups.values()) { group.dispatchCull(); @@ -174,6 +185,8 @@ public class IndirectDrawManager extends DrawManager> { } needsBarrier = true; + + totalPagesLastFrame = totalPagesThisFrame; } @Override diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/VisibilityBuffer.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/VisibilityBuffer.java index 025d7bf1f..b0a600792 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/VisibilityBuffer.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/VisibilityBuffer.java @@ -3,18 +3,24 @@ package dev.engine_room.flywheel.backend.engine.indirect; import org.lwjgl.opengl.GL30; import org.lwjgl.opengl.GL32; import org.lwjgl.opengl.GL46; +import org.lwjgl.opengl.GL46C; import com.mojang.blaze3d.platform.GlStateManager; import dev.engine_room.flywheel.backend.FlwBackend; import dev.engine_room.flywheel.backend.gl.GlTextureUnit; +import dev.engine_room.flywheel.backend.gl.shader.GlProgram; +import dev.engine_room.flywheel.lib.math.MoreMath; import it.unimi.dsi.fastutil.ints.IntArraySet; import it.unimi.dsi.fastutil.ints.IntSet; import net.minecraft.client.Minecraft; public class VisibilityBuffer { + private static final int READ_GROUP_SIZE = 16; private static final int ATTACHMENT = GL30.GL_COLOR_ATTACHMENT1; + private final GlProgram readVisibilityProgram; + private final ResizableStorageBuffer visibilityBitset; private final int textureId; private int lastWidth = -1; @@ -22,7 +28,9 @@ public class VisibilityBuffer { private final IntSet attached = new IntArraySet(); - public VisibilityBuffer() { + public VisibilityBuffer(GlProgram readVisibilityProgram) { + this.readVisibilityProgram = readVisibilityProgram; + visibilityBitset = new ResizableStorageBuffer(); textureId = GL32.glGenTextures(); GlStateManager._bindTexture(textureId); @@ -32,9 +40,29 @@ public class VisibilityBuffer { GlStateManager._texParameter(GL32.GL_TEXTURE_2D, GL32.GL_TEXTURE_WRAP_T, GL32.GL_CLAMP_TO_EDGE); } - public void attach() { - // TODO: clear the vis buffer. maybe do this when we read it? + public void read(int pageCount) { + if (pageCount == 0) { + return; + } + visibilityBitset.ensureCapacity((long) pageCount << 2); + + GL46.nglClearNamedBufferData(visibilityBitset.handle(), GL46.GL_R32UI, GL46.GL_RED_INTEGER, GL46.GL_UNSIGNED_INT, 0); + + if (lastWidth == -1 || lastHeight == -1) { + return; + } + + readVisibilityProgram.bind(); + GL46.glBindBufferBase(GL46.GL_SHADER_STORAGE_BUFFER, 0, visibilityBitset.handle()); + + GlTextureUnit.T0.makeActive(); + GlStateManager._bindTexture(textureId); + + GL46.glDispatchCompute(MoreMath.ceilingDiv(lastWidth, READ_GROUP_SIZE), MoreMath.ceilingDiv(lastHeight, READ_GROUP_SIZE), 1); + } + + public void attach() { var mainRenderTarget = Minecraft.getInstance() .getMainRenderTarget(); @@ -66,6 +94,10 @@ public class VisibilityBuffer { GL32.glDeleteTextures(textureId); } + public void clear() { + GL46C.nglClearTexImage(textureId, 0, GL32.GL_RED_INTEGER, GL32.GL_UNSIGNED_INT, 0); + } + private void setupTexture(int width, int height) { if (lastWidth == width && lastHeight == height) { return; diff --git a/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/buffer_bindings.glsl b/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/buffer_bindings.glsl index 449836630..7e818f13c 100644 --- a/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/buffer_bindings.glsl +++ b/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/buffer_bindings.glsl @@ -6,3 +6,6 @@ #define _FLW_LIGHT_LUT_BUFFER_BINDING 5 #define _FLW_LIGHT_SECTIONS_BUFFER_BINDING 6 #define _FLW_MATRIX_BUFFER_BINDING 7 +#define _FLW_PASS_TWO_BUFFER_BINDING 8 +#define _FLW_LATE_CULL_BUFFER_BINDING 9 +#define _FLW_LAST_FRAME_VISIBILITY_BUFFER_BINDING 10 diff --git a/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/early_cull.glsl b/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/early_cull.glsl new file mode 100644 index 000000000..404c29ca9 --- /dev/null +++ b/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/early_cull.glsl @@ -0,0 +1,123 @@ +#include "flywheel:internal/indirect/buffer_bindings.glsl" +#include "flywheel:internal/indirect/model_descriptor.glsl" +#include "flywheel:internal/uniforms/uniforms.glsl" +#include "flywheel:util/matrix.glsl" +#include "flywheel:internal/indirect/matrices.glsl" + +layout(local_size_x = 32) in; + +uniform uint _flw_visibilityReadOffsetPages; + +layout(std430, binding = _FLW_TARGET_BUFFER_BINDING) restrict writeonly buffer DrawIndexBuffer { + uint _flw_drawIndices[]; +}; + +layout(std430, binding = _FLW_PASS_TWO_BUFFER_BINDING) restrict writeonly buffer PassTwoIndexBuffer { + uint _flw_passTwoIndicies[]; +}; + +// High 6 bits for the number of instances in the page. +const uint _FLW_PAGE_COUNT_OFFSET = 26u; +// Bottom 26 bits for the model index. +const uint _FLW_MODEL_INDEX_MASK = 0x3FFFFFF; + +layout(std430, binding = _FLW_PAGE_FRAME_DESCRIPTOR_BUFFER_BINDING) restrict readonly buffer PageFrameDescriptorBuffer { + uint _flw_pageFrameDescriptors[]; +}; + +layout(std430, binding = _FLW_LAST_FRAME_VISIBILITY_BUFFER_BINDING) restrict readonly buffer LastFrameVisibilityBuffer { + uint _flw_visibleFlag[]; +}; + +layout(std430, binding = _FLW_MODEL_BUFFER_BINDING) restrict buffer ModelBuffer { + ModelDescriptor _flw_models[]; +}; + +layout(std430, binding = _FLW_MATRIX_BUFFER_BINDING) restrict readonly buffer MatrixBuffer { + Matrices _flw_matrices[]; +}; + +struct _FlwLateCullDispatch { + uint x; + uint y; + uint z; + uint threadCount; +}; + +layout(std430, binding = _FLW_LATE_CULL_BUFFER_BINDING) restrict buffer LateCullBuffer { + _FlwLateCullDispatch _flw_lateCullDispatch; +}; + +// Disgustingly vectorized sphere frustum intersection taking advantage of ahead of time packing. +// Only uses 6 fmas and some boolean ops. +// See also: +// flywheel:uniform/flywheel.glsl +// dev.engine_room.flywheel.lib.math.MatrixMath.writePackedFrustumPlanes +// org.joml.FrustumIntersection.testSphere +bool _flw_testSphere(vec3 center, float radius) { + bvec4 xyInside = greaterThanEqual(fma(flw_frustumPlanes.xyX, center.xxxx, fma(flw_frustumPlanes.xyY, center.yyyy, fma(flw_frustumPlanes.xyZ, center.zzzz, flw_frustumPlanes.xyW))), -radius.xxxx); + bvec2 zInside = greaterThanEqual(fma(flw_frustumPlanes.zX, center.xx, fma(flw_frustumPlanes.zY, center.yy, fma(flw_frustumPlanes.zZ, center.zz, flw_frustumPlanes.zW))), -radius.xx); + + return all(xyInside) && all(zInside); +} + +bool _flw_isVisible(uint instanceIndex, uint modelIndex) { + uint matrixIndex = _flw_models[modelIndex].matrixIndex; + BoundingSphere sphere = _flw_models[modelIndex].boundingSphere; + + vec3 center; + float radius; + _flw_unpackBoundingSphere(sphere, center, radius); + + FlwInstance instance = _flw_unpackInstance(instanceIndex); + + flw_transformBoundingSphere(instance, center, radius); + + if (matrixIndex > 0) { + transformBoundingSphere(_flw_matrices[matrixIndex].pose, center, radius); + } + + return _flw_testSphere(center, radius); +} + +void main() { + uint pageIndex = gl_WorkGroupID.x; + + if (pageIndex >= _flw_pageFrameDescriptors.length()) { + return; + } + + uint packedModelIndexAndCount = _flw_pageFrameDescriptors[pageIndex]; + + uint pageInstanceCount = packedModelIndexAndCount >> _FLW_PAGE_COUNT_OFFSET; + + if (gl_LocalInvocationID.x >= pageInstanceCount) { + return; + } + + uint instanceIndex = gl_GlobalInvocationID.x; + + uint modelIndex = packedModelIndexAndCount & _FLW_MODEL_INDEX_MASK; + + if (!_flw_isVisible(instanceIndex, modelIndex)) { + return; + } + + uint pageVisibility = _flw_visibleFlag[_flw_visibilityReadOffsetPages + pageIndex]; + + if ((pageVisibility & (1u << gl_LocalInvocationID.x)) != 0u) { + // This instance was visibile last frame, it should be rendered early. + uint localIndex = atomicAdd(_flw_models[modelIndex].instanceCount, 1); + uint targetIndex = _flw_models[modelIndex].baseInstance + localIndex; + _flw_drawIndices[targetIndex] = instanceIndex; + } else { + // Try again later to see if it's been disoccluded. + uint targetIndex = atomicAdd(_flw_lateCullDispatch.threadCount, 1); + _flw_passTwoIndices[targetIndex] = instanceIndex; + + if (targetIndex % 32u == 0u) { + // This thread wrote an index that will be at the start of a new workgroup later + atomicAdd(_flw_lateCullDispatch.x, 1); + } + } +} diff --git a/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/late_cull.glsl b/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/late_cull.glsl new file mode 100644 index 000000000..4d32b81d1 --- /dev/null +++ b/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/late_cull.glsl @@ -0,0 +1,124 @@ +#include "flywheel:internal/indirect/buffer_bindings.glsl" +#include "flywheel:internal/indirect/model_descriptor.glsl" +#include "flywheel:internal/uniforms/uniforms.glsl" +#include "flywheel:util/matrix.glsl" +#include "flywheel:internal/indirect/matrices.glsl" + +layout(local_size_x = 32) in; + +layout(std430, binding = _FLW_TARGET_BUFFER_BINDING) restrict writeonly buffer DrawIndexBuffer { + uint _flw_drawIndices[]; +}; + +layout(std430, binding = _FLW_PASS_TWO_BUFFER_BINDING) restrict readonly buffer PassTwoIndexBuffer { + uint _flw_passTwoIndicies[]; +}; + +layout(std430, binding = _FLW_PAGE_FRAME_DESCRIPTOR_BUFFER_BINDING) restrict readonly buffer PageFrameDescriptorBuffer { + uint _flw_pageFrameDescriptors[]; +}; + +layout(std430, binding = _FLW_MODEL_BUFFER_BINDING) restrict buffer ModelBuffer { + ModelDescriptor _flw_models[]; +}; + +layout(std430, binding = _FLW_MATRIX_BUFFER_BINDING) restrict readonly buffer MatrixBuffer { + Matrices _flw_matrices[]; +}; + +layout(binding = 0) uniform sampler2D _flw_depthPyramid; + +bool projectSphere(vec3 c, float r, float znear, float P00, float P11, out vec4 aabb) { + // Closest point on the sphere is between the camera and the near plane, don't even attempt to cull. + if (c.z + r > -znear) { + return false; + } + + vec3 cr = c * r; + float czr2 = c.z * c.z - r * r; + + float vx = sqrt(c.x * c.x + czr2); + float minx = (vx * c.x - cr.z) / (vx * c.z + cr.x); + float maxx = (vx * c.x + cr.z) / (vx * c.z - cr.x); + + float vy = sqrt(c.y * c.y + czr2); + float miny = (vy * c.y - cr.z) / (vy * c.z + cr.y); + float maxy = (vy * c.y + cr.z) / (vy * c.z - cr.y); + + aabb = vec4(minx * P00, miny * P11, maxx * P00, maxy * P11); + aabb = aabb.xwzy * vec4(-0.5f, -0.5f, -0.5f, -0.5f) + vec4(0.5f); // clip space -> uv space + + return true; +} + +bool _flw_isVisible(uint instanceIndex, uint modelIndex) { + uint matrixIndex = _flw_models[modelIndex].matrixIndex; + BoundingSphere sphere = _flw_models[modelIndex].boundingSphere; + + vec3 center; + float radius; + _flw_unpackBoundingSphere(sphere, center, radius); + + FlwInstance instance = _flw_unpackInstance(instanceIndex); + + flw_transformBoundingSphere(instance, center, radius); + + if (matrixIndex > 0) { + transformBoundingSphere(_flw_matrices[matrixIndex].pose, center, radius); + } + + transformBoundingSphere(flw_view, center, radius); + + vec4 aabb; + if (projectSphere(center, radius, _flw_cullData.znear, _flw_cullData.P00, _flw_cullData.P11, aabb)) + { + float width = (aabb.z - aabb.x) * _flw_cullData.pyramidWidth; + float height = (aabb.w - aabb.y) * _flw_cullData.pyramidHeight; + + int level = clamp(int(ceil(log2(max(width, height)))), 0, _flw_cullData.pyramidLevels); + + ivec2 levelSize = textureSize(_flw_depthPyramid, level); + + ivec4 levelSizePair = ivec4(levelSize, levelSize); + + ivec4 bounds = ivec4(aabb * vec4(levelSizePair)); + + float depth01 = texelFetch(_flw_depthPyramid, bounds.xw, level).r; + float depth11 = texelFetch(_flw_depthPyramid, bounds.zw, level).r; + float depth10 = texelFetch(_flw_depthPyramid, bounds.zy, level).r; + float depth00 = texelFetch(_flw_depthPyramid, bounds.xy, level).r; + + float depth; + if (_flw_cullData.useMin == 0) { + depth = max(max(depth00, depth01), max(depth10, depth11)); + } else { + depth = min(min(depth00, depth01), min(depth10, depth11)); + } + + float depthSphere = 1. + _flw_cullData.znear / (center.z + radius); + + return depthSphere <= depth; + } + + return true; +} + +void main() { + if (gl_GlobalInvocationID.x >= _flw_passTwoIndicies.length()) { + return; + } + + uint instanceIndex = _flw_passTwoIndices[gl_GlobalInvocationID.x]; + + uint pageIndex = instanceIndex >> 5; + + uint packedModelIndexAndCount = _flw_pageFrameDescriptors[pageIndex]; + + uint modelIndex = packedModelIndexAndCount & _FLW_MODEL_INDEX_MASK; + + if (_flw_isVisible(instanceIndex, modelIndex)) { + uint localIndex = atomicAdd(_flw_models[modelIndex].instanceCount, 1); + uint targetIndex = _flw_models[modelIndex].baseInstance + localIndex; + _flw_instanceIndices[targetIndex] = instanceIndex; + } +} diff --git a/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/main.vert b/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/main.vert index 9cc21ce97..151e7d64a 100644 --- a/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/main.vert +++ b/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/main.vert @@ -23,7 +23,7 @@ uniform uint _flw_baseDraw; // We read the visibility buffer for all culling groups into a single shared buffer. // This offset is used to know where each culling group starts. -uniform uint _flw_globalInstanceIdOffset = 0; +uniform uint _flw_visibilityWriteOffsetInstances = 0; flat out uvec3 _flw_packedMaterial; @@ -58,5 +58,5 @@ void main() { _flw_main(instance); // Add 1 because a 0 instance id means null. - _flw_instanceID = _flw_globalInstanceIdOffset + instanceIndex + 1; + _flw_instanceID = _flw_visibilityWriteOffsetInstances + instanceIndex + 1; } From 1edb72ac1993ae7eb9e2f174a1dbc1db123419b7 Mon Sep 17 00:00:00 2001 From: Jozufozu Date: Sun, 8 Sep 2024 11:00:04 -0500 Subject: [PATCH 10/17] Buff to the buffers - Smarter multibind logic - Make offsets in IndirectBuffers dependent on BufferBindings - Organize buffer bindings based on where they're used to allow each pass to bind exactly which buffers it needs - Add stub dispatchCullPassTwo to IndirectCullingGroup - Add pass two buffers to IndirectBuffers --- .../engine/indirect/BufferBindings.java | 18 ++-- .../engine/indirect/IndirectBuffers.java | 83 ++++++++++++------- .../engine/indirect/IndirectCullingGroup.java | 27 ++++-- .../engine/indirect/VisibilityBuffer.java | 10 +-- .../internal/indirect/buffer_bindings.glsl | 26 +++--- .../internal/indirect/early_cull.glsl | 32 +++---- .../flywheel/internal/indirect/late_cull.glsl | 8 +- .../internal/indirect/read_visibility.glsl | 8 +- 8 files changed, 133 insertions(+), 79 deletions(-) diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/BufferBindings.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/BufferBindings.java index 658096695..67b0a9dc6 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/BufferBindings.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/BufferBindings.java @@ -1,14 +1,18 @@ package dev.engine_room.flywheel.backend.engine.indirect; public final class BufferBindings { - public static final int INSTANCE = 0; - public static final int TARGET = 1; + public static final int PASS_TWO_DISPATCH = 0; + public static final int PASS_TWO_INSTANCE_INDEX = 1; public static final int PAGE_FRAME_DESCRIPTOR = 2; - public static final int MODEL = 3; - public static final int DRAW = 4; - public static final int LIGHT_LUT = 5; - public static final int LIGHT_SECTION = 6; - public static final int MATRICES = 7; + public static final int INSTANCE = 3; + public static final int DRAW_INSTANCE_INDEX = 4; + public static final int MODEL = 5; + public static final int DRAW = 6; + + public static final int LIGHT_LUT = 7; + public static final int LIGHT_SECTION = 8; + public static final int MATRICES = 9; + public static final int LAST_FRAME_VISIBILITY = 10; private BufferBindings() { } diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectBuffers.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectBuffers.java index 498030fe7..82be8783a 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectBuffers.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectBuffers.java @@ -11,7 +11,7 @@ import dev.engine_room.flywheel.lib.memory.MemoryBlock; public class IndirectBuffers { // Number of vbos created. - public static final int BUFFER_COUNT = 5; + public static final int BUFFER_COUNT = 7; public static final long INT_SIZE = Integer.BYTES; public static final long PTR_SIZE = Pointer.POINTER_SIZE; @@ -30,18 +30,23 @@ public class IndirectBuffers { private static final long BUFFERS_SIZE_BYTES = SIZE_OFFSET + BUFFER_COUNT * PTR_SIZE; // Offsets to the vbos - private static final long INSTANCE_HANDLE_OFFSET = HANDLE_OFFSET; - private static final long TARGET_HANDLE_OFFSET = INT_SIZE; - private static final long PAGE_FRAME_DESCRIPTOR_HANDLE_OFFSET = INT_SIZE * 2; - private static final long MODEL_HANDLE_OFFSET = INT_SIZE * 3; - private static final long DRAW_HANDLE_OFFSET = INT_SIZE * 4; + private static final long PASS_TWO_DISPATCH_HANDLE_OFFSET = HANDLE_OFFSET + BufferBindings.PASS_TWO_DISPATCH * INT_SIZE; + private static final long PASS_TWO_INSTANCE_INDEX_HANDLE_OFFSET = HANDLE_OFFSET + BufferBindings.PASS_TWO_INSTANCE_INDEX * INT_SIZE; + private static final long PAGE_FRAME_DESCRIPTOR_HANDLE_OFFSET = HANDLE_OFFSET + BufferBindings.PAGE_FRAME_DESCRIPTOR * INT_SIZE; + private static final long INSTANCE_HANDLE_OFFSET = HANDLE_OFFSET + BufferBindings.INSTANCE * INT_SIZE; + private static final long DRAW_INSTANCE_INDEX_HANDLE_OFFSET = HANDLE_OFFSET + BufferBindings.DRAW_INSTANCE_INDEX * INT_SIZE; + private static final long MODEL_HANDLE_OFFSET = HANDLE_OFFSET + BufferBindings.MODEL * INT_SIZE; + private static final long DRAW_HANDLE_OFFSET = HANDLE_OFFSET + BufferBindings.DRAW * INT_SIZE; // Offsets to the sizes - private static final long INSTANCE_SIZE_OFFSET = SIZE_OFFSET; - private static final long TARGET_SIZE_OFFSET = SIZE_OFFSET + PTR_SIZE; - private static final long PAGE_FRAME_DESCRIPTOR_SIZE_OFFSET = SIZE_OFFSET + PTR_SIZE * 2; - private static final long MODEL_SIZE_OFFSET = SIZE_OFFSET + PTR_SIZE * 3; - private static final long DRAW_SIZE_OFFSET = SIZE_OFFSET + PTR_SIZE * 4; + private static final long PASS_TWO_DISPATCH_SIZE_OFFSET = SIZE_OFFSET + BufferBindings.PASS_TWO_DISPATCH * PTR_SIZE; + private static final long PASS_TWO_INSTANCE_INDEX_SIZE_OFFSET = SIZE_OFFSET + BufferBindings.PASS_TWO_INSTANCE_INDEX * PTR_SIZE; + private static final long PAGE_FRAME_DESCRIPTOR_SIZE_OFFSET = SIZE_OFFSET + BufferBindings.PAGE_FRAME_DESCRIPTOR * PTR_SIZE; + private static final long INSTANCE_SIZE_OFFSET = SIZE_OFFSET + BufferBindings.INSTANCE * PTR_SIZE; + private static final long DRAW_INSTANCE_INDEX_SIZE_OFFSET = SIZE_OFFSET + BufferBindings.DRAW_INSTANCE_INDEX * PTR_SIZE; + private static final long MODEL_SIZE_OFFSET = SIZE_OFFSET + BufferBindings.MODEL * PTR_SIZE; + private static final long DRAW_SIZE_OFFSET = SIZE_OFFSET + BufferBindings.DRAW * PTR_SIZE; + private static final float INSTANCE_GROWTH_FACTOR = 1.25f; private static final float MODEL_GROWTH_FACTOR = 2f; @@ -62,8 +67,10 @@ public class IndirectBuffers { private final MemoryBlock multiBindBlock; private final long instanceStride; + public final ResizableStorageBuffer passTwoDispatch; + public final ResizableStorageArray passTwoInstanceIndex; public final ObjectStorage objectStorage; - public final ResizableStorageArray target; + public final ResizableStorageArray drawInstanceIndex; public final ResizableStorageArray model; public final ResizableStorageArray draw; @@ -71,59 +78,79 @@ public class IndirectBuffers { this.instanceStride = instanceStride; this.multiBindBlock = MemoryBlock.calloc(BUFFERS_SIZE_BYTES, 1); + passTwoDispatch = new ResizableStorageBuffer(); + passTwoInstanceIndex = new ResizableStorageArray(INT_SIZE, INSTANCE_GROWTH_FACTOR); objectStorage = new ObjectStorage(instanceStride); - target = new ResizableStorageArray(INT_SIZE, INSTANCE_GROWTH_FACTOR); + drawInstanceIndex = new ResizableStorageArray(INT_SIZE, INSTANCE_GROWTH_FACTOR); model = new ResizableStorageArray(MODEL_STRIDE, MODEL_GROWTH_FACTOR); draw = new ResizableStorageArray(DRAW_COMMAND_STRIDE, DRAW_GROWTH_FACTOR); + + passTwoDispatch.ensureCapacity(INT_SIZE * 4); } void updateCounts(int instanceCount, int modelCount, int drawCount) { - target.ensureCapacity(instanceCount); + drawInstanceIndex.ensureCapacity(instanceCount); + passTwoDispatch.ensureCapacity(instanceCount); model.ensureCapacity(modelCount); draw.ensureCapacity(drawCount); final long ptr = multiBindBlock.ptr(); - MemoryUtil.memPutInt(ptr + INSTANCE_HANDLE_OFFSET, objectStorage.objectBuffer.handle()); - MemoryUtil.memPutInt(ptr + TARGET_HANDLE_OFFSET, target.handle()); + + MemoryUtil.memPutInt(ptr + PASS_TWO_DISPATCH_HANDLE_OFFSET, passTwoDispatch.handle()); + MemoryUtil.memPutInt(ptr + PASS_TWO_INSTANCE_INDEX_HANDLE_OFFSET, objectStorage.frameDescriptorBuffer.handle()); MemoryUtil.memPutInt(ptr + PAGE_FRAME_DESCRIPTOR_HANDLE_OFFSET, objectStorage.frameDescriptorBuffer.handle()); + MemoryUtil.memPutInt(ptr + INSTANCE_HANDLE_OFFSET, objectStorage.objectBuffer.handle()); + MemoryUtil.memPutInt(ptr + DRAW_INSTANCE_INDEX_HANDLE_OFFSET, drawInstanceIndex.handle()); MemoryUtil.memPutInt(ptr + MODEL_HANDLE_OFFSET, model.handle()); MemoryUtil.memPutInt(ptr + DRAW_HANDLE_OFFSET, draw.handle()); - MemoryUtil.memPutAddress(ptr + INSTANCE_SIZE_OFFSET, objectStorage.objectBuffer.capacity()); - MemoryUtil.memPutAddress(ptr + TARGET_SIZE_OFFSET, INT_SIZE * instanceCount); + MemoryUtil.memPutAddress(ptr + PASS_TWO_DISPATCH_SIZE_OFFSET, passTwoDispatch.capacity()); + MemoryUtil.memPutAddress(ptr + PASS_TWO_INSTANCE_INDEX_SIZE_OFFSET, objectStorage.frameDescriptorBuffer.capacity()); MemoryUtil.memPutAddress(ptr + PAGE_FRAME_DESCRIPTOR_SIZE_OFFSET, objectStorage.frameDescriptorBuffer.capacity()); + MemoryUtil.memPutAddress(ptr + INSTANCE_SIZE_OFFSET, objectStorage.objectBuffer.capacity()); + MemoryUtil.memPutAddress(ptr + DRAW_INSTANCE_INDEX_SIZE_OFFSET, INT_SIZE * instanceCount); MemoryUtil.memPutAddress(ptr + MODEL_SIZE_OFFSET, MODEL_STRIDE * modelCount); MemoryUtil.memPutAddress(ptr + DRAW_SIZE_OFFSET, DRAW_COMMAND_STRIDE * drawCount); } - public void bindForCompute() { - multiBind(); + public void bindForCullPassOne() { + multiBind(0, 6); + } + + public void bindForCullPassTwo() { + multiBind(1, 5); + GlBufferType.DISPATCH_INDIRECT_BUFFER.bind(passTwoDispatch.handle()); + } + + public void bindForApply() { + multiBind(5, 2); } public void bindForDraw() { - multiBind(); + multiBind(3, 4); GlBufferType.DRAW_INDIRECT_BUFFER.bind(draw.handle()); } - private void multiBind() { - final long ptr = multiBindBlock.ptr(); - nglBindBuffersRange(GL_SHADER_STORAGE_BUFFER, BufferBindings.INSTANCE, IndirectBuffers.BUFFER_COUNT, ptr, ptr + OFFSET_OFFSET, ptr + SIZE_OFFSET); - } - /** * Bind all buffers except the draw command buffer. */ public void bindForCrumbling() { + multiBind(3, 3); + } + + private void multiBind(int base, int count) { final long ptr = multiBindBlock.ptr(); - nglBindBuffersRange(GL_SHADER_STORAGE_BUFFER, BufferBindings.INSTANCE, 4, ptr, ptr + OFFSET_OFFSET, ptr + SIZE_OFFSET); + nglBindBuffersRange(GL_SHADER_STORAGE_BUFFER, base, count, ptr + base * INT_SIZE, ptr + OFFSET_OFFSET + base * PTR_SIZE, ptr + SIZE_OFFSET + base * PTR_SIZE); } public void delete() { multiBindBlock.free(); objectStorage.delete(); - target.delete(); + drawInstanceIndex.delete(); model.delete(); draw.delete(); + passTwoDispatch.delete(); + passTwoInstanceIndex.delete(); } } diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectCullingGroup.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectCullingGroup.java index 45b764404..0290f41cf 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectCullingGroup.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectCullingGroup.java @@ -6,6 +6,7 @@ import static org.lwjgl.opengl.GL30.glUniform1ui; import static org.lwjgl.opengl.GL42.GL_COMMAND_BARRIER_BIT; import static org.lwjgl.opengl.GL42.glMemoryBarrier; import static org.lwjgl.opengl.GL43.glDispatchCompute; +import static org.lwjgl.opengl.GL43.glDispatchComputeIndirect; import java.util.ArrayList; import java.util.Comparator; @@ -44,7 +45,8 @@ public class IndirectCullingGroup { private final Map> multiDraws = new EnumMap<>(VisualType.class); private final IndirectPrograms programs; - private final GlProgram cullProgram; + private final GlProgram earlyCull; + private final GlProgram lateCull; private boolean needsDrawBarrier; private boolean needsDrawSort; @@ -63,7 +65,8 @@ public class IndirectCullingGroup { buffers = new IndirectBuffers(instanceStride); this.programs = programs; - cullProgram = programs.getCullingProgram(instanceType); + earlyCull = programs.getCullingProgram(instanceType); + lateCull = programs.getCullPassTwoProgram(instanceType); } public void flushInstancers() { @@ -133,20 +136,32 @@ public class IndirectCullingGroup { } Uniforms.bindAll(); - cullProgram.bind(); + earlyCull.bind(); - cullProgram.setUInt("_flw_visibilityReadOffsetPages", visibilityReadOffsetPages); + earlyCull.setUInt("_flw_visibilityReadOffsetPages", visibilityReadOffsetPages); - buffers.bindForCompute(); + buffers.bindForCullPassOne(); glDispatchCompute(buffers.objectStorage.capacity(), 1, 1); } + public void dispatchCullPassTwo() { + if (nothingToDo()) { + return; + } + + Uniforms.bindAll(); + lateCull.bind(); + + buffers.bindForCullPassTwo(); + glDispatchComputeIndirect(0); + } + public void dispatchApply() { if (nothingToDo()) { return; } - buffers.bindForCompute(); + buffers.bindForApply(); glDispatchCompute(GlCompat.getComputeGroupCount(indirectDraws.size()), 1, 1); } diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/VisibilityBuffer.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/VisibilityBuffer.java index b0a600792..85a26e816 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/VisibilityBuffer.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/VisibilityBuffer.java @@ -20,7 +20,7 @@ public class VisibilityBuffer { private static final int ATTACHMENT = GL30.GL_COLOR_ATTACHMENT1; private final GlProgram readVisibilityProgram; - private final ResizableStorageBuffer visibilityBitset; + private final ResizableStorageBuffer lastFrameVisibility; private final int textureId; private int lastWidth = -1; @@ -30,7 +30,7 @@ public class VisibilityBuffer { public VisibilityBuffer(GlProgram readVisibilityProgram) { this.readVisibilityProgram = readVisibilityProgram; - visibilityBitset = new ResizableStorageBuffer(); + lastFrameVisibility = new ResizableStorageBuffer(); textureId = GL32.glGenTextures(); GlStateManager._bindTexture(textureId); @@ -45,16 +45,16 @@ public class VisibilityBuffer { return; } - visibilityBitset.ensureCapacity((long) pageCount << 2); + lastFrameVisibility.ensureCapacity((long) pageCount << 2); - GL46.nglClearNamedBufferData(visibilityBitset.handle(), GL46.GL_R32UI, GL46.GL_RED_INTEGER, GL46.GL_UNSIGNED_INT, 0); + GL46.nglClearNamedBufferData(lastFrameVisibility.handle(), GL46.GL_R32UI, GL46.GL_RED_INTEGER, GL46.GL_UNSIGNED_INT, 0); if (lastWidth == -1 || lastHeight == -1) { return; } readVisibilityProgram.bind(); - GL46.glBindBufferBase(GL46.GL_SHADER_STORAGE_BUFFER, 0, visibilityBitset.handle()); + GL46.glBindBufferBase(GL46.GL_SHADER_STORAGE_BUFFER, BufferBindings.LAST_FRAME_VISIBILITY, lastFrameVisibility.handle()); GlTextureUnit.T0.makeActive(); GlStateManager._bindTexture(textureId); diff --git a/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/buffer_bindings.glsl b/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/buffer_bindings.glsl index 7e818f13c..87eb99051 100644 --- a/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/buffer_bindings.glsl +++ b/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/buffer_bindings.glsl @@ -1,11 +1,17 @@ -#define _FLW_INSTANCE_BUFFER_BINDING 0 -#define _FLW_TARGET_BUFFER_BINDING 1 -#define _FLW_PAGE_FRAME_DESCRIPTOR_BUFFER_BINDING 2 -#define _FLW_MODEL_BUFFER_BINDING 3 -#define _FLW_DRAW_BUFFER_BINDING 4 -#define _FLW_LIGHT_LUT_BUFFER_BINDING 5 -#define _FLW_LIGHT_SECTIONS_BUFFER_BINDING 6 -#define _FLW_MATRIX_BUFFER_BINDING 7 -#define _FLW_PASS_TWO_BUFFER_BINDING 8 -#define _FLW_LATE_CULL_BUFFER_BINDING 9 +// Per culling group +#define _FLW_PASS_TWO_DISPATCH_BUFFER_BINDING 0 // cull1 +#define _FLW_PASS_TWO_INSTANCE_INDEX_BUFFER_BINDING 1 // cull1, cull2 +#define _FLW_PAGE_FRAME_DESCRIPTOR_BUFFER_BINDING 2 // cull1, cull2 +#define _FLW_INSTANCE_BUFFER_BINDING 3 // cull1, cull2, draw +#define _FLW_DRAW_INSTANCE_INDEX_BUFFER_BINDING 4 // cull1, cull2, draw +#define _FLW_MODEL_BUFFER_BINDING 5 // cull1, cull2, apply +#define _FLW_DRAW_BUFFER_BINDING 6 // apply, draw + + +// Global to the engine +#define _FLW_LIGHT_LUT_BUFFER_BINDING 7 +#define _FLW_LIGHT_SECTIONS_BUFFER_BINDING 8 + +#define _FLW_MATRIX_BUFFER_BINDING 9 + #define _FLW_LAST_FRAME_VISIBILITY_BUFFER_BINDING 10 diff --git a/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/early_cull.glsl b/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/early_cull.glsl index 404c29ca9..4a69f709a 100644 --- a/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/early_cull.glsl +++ b/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/early_cull.glsl @@ -8,14 +8,25 @@ layout(local_size_x = 32) in; uniform uint _flw_visibilityReadOffsetPages; -layout(std430, binding = _FLW_TARGET_BUFFER_BINDING) restrict writeonly buffer DrawIndexBuffer { - uint _flw_drawIndices[]; +struct _FlwLateCullDispatch { + uint x; + uint y; + uint z; + uint threadCount; }; -layout(std430, binding = _FLW_PASS_TWO_BUFFER_BINDING) restrict writeonly buffer PassTwoIndexBuffer { +layout(std430, binding = _FLW_PASS_TWO_DISPATCH_BUFFER_BINDING) restrict buffer PassTwoDispatchBuffer { + _FlwLateCullDispatch _flw_lateCullDispatch; +}; + +layout(std430, binding = _FLW_PASS_TWO_INSTANCE_INDEX_BUFFER_BINDING) restrict readonly buffer PassTwoIndexBuffer { uint _flw_passTwoIndicies[]; }; +layout(std430, binding = _FLW_DRAW_INSTANCE_INDEX_BUFFER_BINDING) restrict writeonly buffer DrawIndexBuffer { + uint _flw_drawIndices[]; +}; + // High 6 bits for the number of instances in the page. const uint _FLW_PAGE_COUNT_OFFSET = 26u; // Bottom 26 bits for the model index. @@ -26,7 +37,7 @@ layout(std430, binding = _FLW_PAGE_FRAME_DESCRIPTOR_BUFFER_BINDING) restrict rea }; layout(std430, binding = _FLW_LAST_FRAME_VISIBILITY_BUFFER_BINDING) restrict readonly buffer LastFrameVisibilityBuffer { - uint _flw_visibleFlag[]; + uint _flw_lastFrameVisibility[]; }; layout(std430, binding = _FLW_MODEL_BUFFER_BINDING) restrict buffer ModelBuffer { @@ -37,17 +48,6 @@ layout(std430, binding = _FLW_MATRIX_BUFFER_BINDING) restrict readonly buffer Ma Matrices _flw_matrices[]; }; -struct _FlwLateCullDispatch { - uint x; - uint y; - uint z; - uint threadCount; -}; - -layout(std430, binding = _FLW_LATE_CULL_BUFFER_BINDING) restrict buffer LateCullBuffer { - _FlwLateCullDispatch _flw_lateCullDispatch; -}; - // Disgustingly vectorized sphere frustum intersection taking advantage of ahead of time packing. // Only uses 6 fmas and some boolean ops. // See also: @@ -103,7 +103,7 @@ void main() { return; } - uint pageVisibility = _flw_visibleFlag[_flw_visibilityReadOffsetPages + pageIndex]; + uint pageVisibility = _flw_lastFrameVisibility[_flw_visibilityReadOffsetPages + pageIndex]; if ((pageVisibility & (1u << gl_LocalInvocationID.x)) != 0u) { // This instance was visibile last frame, it should be rendered early. diff --git a/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/late_cull.glsl b/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/late_cull.glsl index 4d32b81d1..101e6328f 100644 --- a/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/late_cull.glsl +++ b/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/late_cull.glsl @@ -6,12 +6,12 @@ layout(local_size_x = 32) in; -layout(std430, binding = _FLW_TARGET_BUFFER_BINDING) restrict writeonly buffer DrawIndexBuffer { - uint _flw_drawIndices[]; +layout(std430, binding = _FLW_PASS_TWO_INSTANCE_INDEX_BUFFER_BINDING) restrict readonly buffer PassTwoIndexBuffer { + uint _flw_passTwoIndicies[]; }; -layout(std430, binding = _FLW_PASS_TWO_BUFFER_BINDING) restrict readonly buffer PassTwoIndexBuffer { - uint _flw_passTwoIndicies[]; +layout(std430, binding = _FLW_DRAW_INSTANCE_INDEX_BUFFER_BINDING) restrict writeonly buffer DrawIndexBuffer { + uint _flw_drawIndices[]; }; layout(std430, binding = _FLW_PAGE_FRAME_DESCRIPTOR_BUFFER_BINDING) restrict readonly buffer PageFrameDescriptorBuffer { diff --git a/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/read_visibility.glsl b/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/read_visibility.glsl index dad817b98..3a4d9d9dc 100644 --- a/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/read_visibility.glsl +++ b/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/read_visibility.glsl @@ -1,9 +1,11 @@ +#include "flywheel:internal/indirect/buffer_bindings.glsl" + layout(local_size_x = 8, local_size_y = 8) in; layout(binding = 0) uniform usampler2D visBuffer; -layout(std430) restrict buffer VisibleFlagBuffer { - uint _flw_visibleFlag[]; +layout(std430, binding = _FLW_LAST_FRAME_VISIBILITY_BUFFER_BINDING) restrict buffer LastFrameVisibilityBuffer { + uint _flw_lastFrameVisibility[]; }; void main() { @@ -21,5 +23,5 @@ void main() { uint mask = 1u << (instanceID & 31u); - atomicOr(_flw_visibleFlag[index], mask); + atomicOr(_flw_lastFrameVisibility[index], mask); } From b6ed3cefda79716ad1acdb7451ba93e4b0e798ba Mon Sep 17 00:00:00 2001 From: Jozufozu Date: Sun, 8 Sep 2024 11:28:41 -0500 Subject: [PATCH 11/17] Probably not rendering - Flesh out two pass pipeline - Shove everything into one visual type for now --- .../backend/compile/IndirectPrograms.java | 32 +++---- .../backend/engine/MaterialRenderState.java | 1 + .../engine/indirect/IndirectBuffers.java | 4 + .../engine/indirect/IndirectCullingGroup.java | 9 ++ .../engine/indirect/IndirectDrawManager.java | 85 +++++++++++++------ .../engine/indirect/VisibilityBuffer.java | 6 +- .../internal/indirect/zero_models.glsl | 18 ++++ 7 files changed, 110 insertions(+), 45 deletions(-) create mode 100644 common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/zero_models.glsl diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/compile/IndirectPrograms.java b/common/src/backend/java/dev/engine_room/flywheel/backend/compile/IndirectPrograms.java index bb140cf56..47e381eff 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/compile/IndirectPrograms.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/compile/IndirectPrograms.java @@ -33,7 +33,8 @@ public class IndirectPrograms extends AtomicReferenceCounted { private static final ResourceLocation SCATTER_SHADER_MAIN = Flywheel.rl("internal/indirect/scatter.glsl"); private static final ResourceLocation DEPTH_REDUCE_SHADER_MAIN = Flywheel.rl("internal/indirect/depth_reduce.glsl"); private static final ResourceLocation READ_VISIBILITY_SHADER_MAIN = Flywheel.rl("internal/indirect/read_visibility.glsl"); - public static final List UTIL_SHADERS = List.of(APPLY_SHADER_MAIN, SCATTER_SHADER_MAIN, DEPTH_REDUCE_SHADER_MAIN, READ_VISIBILITY_SHADER_MAIN); + private static final ResourceLocation ZERO_MODELS_SHADER_MAIN = Flywheel.rl("internal/indirect/zero_models.glsl"); + public static final List UTIL_SHADERS = List.of(APPLY_SHADER_MAIN, SCATTER_SHADER_MAIN, DEPTH_REDUCE_SHADER_MAIN, READ_VISIBILITY_SHADER_MAIN, ZERO_MODELS_SHADER_MAIN); private static final Compile> CULL = new Compile<>(); private static final Compile UTIL = new Compile<>(); @@ -47,19 +48,13 @@ public class IndirectPrograms extends AtomicReferenceCounted { private final Map pipeline; private final Map, GlProgram> culling; private final Map, GlProgram> cullPassTwo; - private final GlProgram apply; - private final GlProgram scatter; - private final GlProgram depthReduce; - private final GlProgram readVisibility; + private final Map utils; - private IndirectPrograms(Map pipeline, Map, GlProgram> culling, Map, GlProgram> cullPassTwo, GlProgram apply, GlProgram scatter, GlProgram depthReduce, GlProgram readVisibility) { + private IndirectPrograms(Map pipeline, Map, GlProgram> culling, Map, GlProgram> cullPassTwo, Map utils) { this.pipeline = pipeline; this.culling = culling; this.cullPassTwo = cullPassTwo; - this.apply = apply; - this.scatter = scatter; - this.depthReduce = depthReduce; - this.readVisibility = readVisibility; + this.utils = utils; } private static List getExtensions(GlslVersion glslVersion) { @@ -110,7 +105,7 @@ public class IndirectPrograms extends AtomicReferenceCounted { var utils = utilCompiler.compileAndReportErrors(UTIL_SHADERS); if (pipelineResult != null && pass1Result != null && pass2Result != null && utils != null) { - newInstance = new IndirectPrograms(pipelineResult, pass1Result, pass2Result, utils.get(APPLY_SHADER_MAIN), utils.get(SCATTER_SHADER_MAIN), utils.get(DEPTH_REDUCE_SHADER_MAIN), utils.get(READ_VISIBILITY_SHADER_MAIN)); + newInstance = new IndirectPrograms(pipelineResult, pass1Result, pass2Result, utils); } } catch (Throwable t) { FlwPrograms.LOGGER.error("Failed to compile indirect programs", t); @@ -195,19 +190,23 @@ public class IndirectPrograms extends AtomicReferenceCounted { } public GlProgram getApplyProgram() { - return apply; + return utils.get(APPLY_SHADER_MAIN); + } + + public GlProgram getZeroModelProgram() { + return utils.get(ZERO_MODELS_SHADER_MAIN); } public GlProgram getScatterProgram() { - return scatter; + return utils.get(SCATTER_SHADER_MAIN); } public GlProgram getDepthReduceProgram() { - return depthReduce; + return utils.get(DEPTH_REDUCE_SHADER_MAIN); } public GlProgram getReadVisibilityProgram() { - return readVisibility; + return utils.get(READ_VISIBILITY_SHADER_MAIN); } @Override @@ -216,6 +215,7 @@ public class IndirectPrograms extends AtomicReferenceCounted { .forEach(GlProgram::delete); culling.values() .forEach(GlProgram::delete); - apply.delete(); + utils.values() + .forEach(GlProgram::delete); } } diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/MaterialRenderState.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/MaterialRenderState.java index 0ddf8e1e7..d4fd52a41 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/MaterialRenderState.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/MaterialRenderState.java @@ -35,6 +35,7 @@ public final class MaterialRenderState { setupBackfaceCulling(material.backfaceCulling()); setupPolygonOffset(material.polygonOffset()); setupDepthTest(material.depthTest()); +// setupDepthTest(DepthTest.OFF); setupTransparency(material.transparency()); setupWriteMask(material.writeMask()); } diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectBuffers.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectBuffers.java index 82be8783a..2b0631525 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectBuffers.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectBuffers.java @@ -126,6 +126,10 @@ public class IndirectBuffers { multiBind(5, 2); } + public void bindForModelReset() { + multiBind(5, 1); + } + public void bindForDraw() { multiBind(3, 4); GlBufferType.DRAW_INDIRECT_BUFFER.bind(draw.handle()); diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectCullingGroup.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectCullingGroup.java index 0290f41cf..278d95b6c 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectCullingGroup.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectCullingGroup.java @@ -165,6 +165,15 @@ public class IndirectCullingGroup { glDispatchCompute(GlCompat.getComputeGroupCount(indirectDraws.size()), 1, 1); } + public void dispatchModelReset() { + if (nothingToDo()) { + return; + } + + buffers.bindForModelReset(); + glDispatchCompute(GlCompat.getComputeGroupCount(instancers.size()), 1, 1); + } + private boolean nothingToDo() { return indirectDraws.isEmpty() || instanceCountThisFrame == 0; } diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectDrawManager.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectDrawManager.java index b0083f241..799495a09 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectDrawManager.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectDrawManager.java @@ -93,7 +93,8 @@ public class IndirectDrawManager extends DrawManager> { } public void render(VisualType visualType) { - if (!hasVisualType(visualType)) { + // FIXME: Two pass occlusion prefers to render everything at once + if (visualType != VisualType.BLOCK_ENTITY) { return; } @@ -105,17 +106,50 @@ public class IndirectDrawManager extends DrawManager> { matrixBuffer.bind(); Uniforms.bindAll(); + glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT); + + visibilityBuffer.bind(); + + for (var group1 : cullingGroups.values()) { + group1.dispatchCull(); + } + + glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT); + + dispatchApply(); + + glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT); + visibilityBuffer.attach(); - if (needsBarrier) { - glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT); - needsBarrier = false; - } + submitDraws(); + + depthPyramid.generate(); + + programs.getZeroModelProgram() + .bind(); for (var group : cullingGroups.values()) { - group.submit(visualType); + group.dispatchModelReset(); } + glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT); + + GlTextureUnit.T0.makeActive(); + GlStateManager._bindTexture(depthPyramid.pyramidTextureId); + + for (var group1 : cullingGroups.values()) { + group1.dispatchCullPassTwo(); + } + + glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT); + + dispatchApply(); + + glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT); + + submitDraws(); + MaterialRenderState.reset(); TextureBinder.resetLightAndOverlay(); @@ -123,6 +157,23 @@ public class IndirectDrawManager extends DrawManager> { } } + private void dispatchApply() { + programs.getApplyProgram() + .bind(); + + for (var group1 : cullingGroups.values()) { + group1.dispatchApply(); + } + } + + private void submitDraws() { + for (var group : cullingGroups.values()) { + group.submit(VisualType.BLOCK_ENTITY); + group.submit(VisualType.ENTITY); + group.submit(VisualType.EFFECT); + } + } + @Override public void flush(LightStorage lightStorage, EnvironmentStorage environmentStorage) { super.flush(lightStorage, environmentStorage); @@ -159,31 +210,9 @@ public class IndirectDrawManager extends DrawManager> { stagingBuffer.flush(); - depthPyramid.generate(); - // We could probably save some driver calls here when there are // actually zero instances, but that feels like a very rare case - glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT); - - matrixBuffer.bind(); - - GlTextureUnit.T0.makeActive(); - GlStateManager._bindTexture(depthPyramid.pyramidTextureId); - - for (var group : cullingGroups.values()) { - group.dispatchCull(); - } - - glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT); - - programs.getApplyProgram() - .bind(); - - for (var group : cullingGroups.values()) { - group.dispatchApply(); - } - needsBarrier = true; totalPagesLastFrame = totalPagesThisFrame; diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/VisibilityBuffer.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/VisibilityBuffer.java index 85a26e816..34ef71499 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/VisibilityBuffer.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/VisibilityBuffer.java @@ -54,7 +54,7 @@ public class VisibilityBuffer { } readVisibilityProgram.bind(); - GL46.glBindBufferBase(GL46.GL_SHADER_STORAGE_BUFFER, BufferBindings.LAST_FRAME_VISIBILITY, lastFrameVisibility.handle()); + bind(); GlTextureUnit.T0.makeActive(); GlStateManager._bindTexture(textureId); @@ -62,6 +62,10 @@ public class VisibilityBuffer { GL46.glDispatchCompute(MoreMath.ceilingDiv(lastWidth, READ_GROUP_SIZE), MoreMath.ceilingDiv(lastHeight, READ_GROUP_SIZE), 1); } + public void bind() { + GL46.glBindBufferBase(GL46.GL_SHADER_STORAGE_BUFFER, BufferBindings.LAST_FRAME_VISIBILITY, lastFrameVisibility.handle()); + } + public void attach() { var mainRenderTarget = Minecraft.getInstance() .getMainRenderTarget(); diff --git a/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/zero_models.glsl b/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/zero_models.glsl new file mode 100644 index 000000000..c8f7a0b9c --- /dev/null +++ b/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/zero_models.glsl @@ -0,0 +1,18 @@ +#include "flywheel:internal/indirect/buffer_bindings.glsl" +#include "flywheel:internal/indirect/model_descriptor.glsl" + +layout(local_size_x = _FLW_SUBGROUP_SIZE) in; + +layout(std430, binding = _FLW_MODEL_BUFFER_BINDING) restrict writeonly buffer ModelBuffer { + ModelDescriptor models[]; +}; + +void main() { + uint modelIndex = gl_GlobalInvocationID.x; + + if (modelIndex >= models.length()) { + return; + } + + models[modelIndex].instanceCount = 0; +} From 4552716b74c3450ab2a34870e1298f89d46f10b4 Mon Sep 17 00:00:00 2001 From: Jozufozu Date: Sun, 8 Sep 2024 12:01:54 -0700 Subject: [PATCH 12/17] Error count decreasing - Mostly silly typos - Do not clear the visbuffer if it hasn't been generated yet - Grow pass two index buffer instead of pass two dispatch buffer --- .../backend/engine/indirect/IndirectBuffers.java | 11 +++++++---- .../backend/engine/indirect/VisibilityBuffer.java | 4 ++++ .../flywheel/internal/indirect/early_cull.glsl | 4 ++-- .../flywheel/internal/indirect/late_cull.glsl | 13 ++++++++++--- .../flywheel/flywheel/internal/indirect/main.vert | 8 ++++---- 5 files changed, 27 insertions(+), 13 deletions(-) diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectBuffers.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectBuffers.java index 2b0631525..cfc6cdcf6 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectBuffers.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectBuffers.java @@ -90,14 +90,14 @@ public class IndirectBuffers { void updateCounts(int instanceCount, int modelCount, int drawCount) { drawInstanceIndex.ensureCapacity(instanceCount); - passTwoDispatch.ensureCapacity(instanceCount); + passTwoInstanceIndex.ensureCapacity(instanceCount); model.ensureCapacity(modelCount); draw.ensureCapacity(drawCount); final long ptr = multiBindBlock.ptr(); MemoryUtil.memPutInt(ptr + PASS_TWO_DISPATCH_HANDLE_OFFSET, passTwoDispatch.handle()); - MemoryUtil.memPutInt(ptr + PASS_TWO_INSTANCE_INDEX_HANDLE_OFFSET, objectStorage.frameDescriptorBuffer.handle()); + MemoryUtil.memPutInt(ptr + PASS_TWO_INSTANCE_INDEX_HANDLE_OFFSET, passTwoInstanceIndex.handle()); MemoryUtil.memPutInt(ptr + PAGE_FRAME_DESCRIPTOR_HANDLE_OFFSET, objectStorage.frameDescriptorBuffer.handle()); MemoryUtil.memPutInt(ptr + INSTANCE_HANDLE_OFFSET, objectStorage.objectBuffer.handle()); MemoryUtil.memPutInt(ptr + DRAW_INSTANCE_INDEX_HANDLE_OFFSET, drawInstanceIndex.handle()); @@ -105,7 +105,7 @@ public class IndirectBuffers { MemoryUtil.memPutInt(ptr + DRAW_HANDLE_OFFSET, draw.handle()); MemoryUtil.memPutAddress(ptr + PASS_TWO_DISPATCH_SIZE_OFFSET, passTwoDispatch.capacity()); - MemoryUtil.memPutAddress(ptr + PASS_TWO_INSTANCE_INDEX_SIZE_OFFSET, objectStorage.frameDescriptorBuffer.capacity()); + MemoryUtil.memPutAddress(ptr + PASS_TWO_INSTANCE_INDEX_SIZE_OFFSET, passTwoInstanceIndex.capacity()); MemoryUtil.memPutAddress(ptr + PAGE_FRAME_DESCRIPTOR_SIZE_OFFSET, objectStorage.frameDescriptorBuffer.capacity()); MemoryUtil.memPutAddress(ptr + INSTANCE_SIZE_OFFSET, objectStorage.objectBuffer.capacity()); MemoryUtil.memPutAddress(ptr + DRAW_INSTANCE_INDEX_SIZE_OFFSET, INT_SIZE * instanceCount); @@ -144,7 +144,10 @@ public class IndirectBuffers { private void multiBind(int base, int count) { final long ptr = multiBindBlock.ptr(); - nglBindBuffersRange(GL_SHADER_STORAGE_BUFFER, base, count, ptr + base * INT_SIZE, ptr + OFFSET_OFFSET + base * PTR_SIZE, ptr + SIZE_OFFSET + base * PTR_SIZE); + long handlePtr = ptr + HANDLE_OFFSET + base * INT_SIZE; + long offsetPtr = ptr + OFFSET_OFFSET + base * PTR_SIZE; + long sizePtr = ptr + SIZE_OFFSET + base * PTR_SIZE; + nglBindBuffersRange(GL_SHADER_STORAGE_BUFFER, base, count, handlePtr, offsetPtr, sizePtr); } public void delete() { diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/VisibilityBuffer.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/VisibilityBuffer.java index 34ef71499..b3810a882 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/VisibilityBuffer.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/VisibilityBuffer.java @@ -99,6 +99,10 @@ public class VisibilityBuffer { } public void clear() { + if (lastWidth == -1 || lastHeight == -1) { + return; + } + GL46C.nglClearTexImage(textureId, 0, GL32.GL_RED_INTEGER, GL32.GL_UNSIGNED_INT, 0); } diff --git a/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/early_cull.glsl b/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/early_cull.glsl index 4a69f709a..e64869552 100644 --- a/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/early_cull.glsl +++ b/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/early_cull.glsl @@ -19,8 +19,8 @@ layout(std430, binding = _FLW_PASS_TWO_DISPATCH_BUFFER_BINDING) restrict buffer _FlwLateCullDispatch _flw_lateCullDispatch; }; -layout(std430, binding = _FLW_PASS_TWO_INSTANCE_INDEX_BUFFER_BINDING) restrict readonly buffer PassTwoIndexBuffer { - uint _flw_passTwoIndicies[]; +layout(std430, binding = _FLW_PASS_TWO_INSTANCE_INDEX_BUFFER_BINDING) restrict writeonly buffer PassTwoIndexBuffer { + uint _flw_passTwoIndices[]; }; layout(std430, binding = _FLW_DRAW_INSTANCE_INDEX_BUFFER_BINDING) restrict writeonly buffer DrawIndexBuffer { diff --git a/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/late_cull.glsl b/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/late_cull.glsl index 101e6328f..574170cbe 100644 --- a/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/late_cull.glsl +++ b/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/late_cull.glsl @@ -7,13 +7,20 @@ layout(local_size_x = 32) in; layout(std430, binding = _FLW_PASS_TWO_INSTANCE_INDEX_BUFFER_BINDING) restrict readonly buffer PassTwoIndexBuffer { - uint _flw_passTwoIndicies[]; + uint _flw_passTwoIndices[]; }; layout(std430, binding = _FLW_DRAW_INSTANCE_INDEX_BUFFER_BINDING) restrict writeonly buffer DrawIndexBuffer { uint _flw_drawIndices[]; }; + +// High 6 bits for the number of instances in the page. +const uint _FLW_PAGE_COUNT_OFFSET = 26u; +// Bottom 26 bits for the model index. +const uint _FLW_MODEL_INDEX_MASK = 0x3FFFFFF; + + layout(std430, binding = _FLW_PAGE_FRAME_DESCRIPTOR_BUFFER_BINDING) restrict readonly buffer PageFrameDescriptorBuffer { uint _flw_pageFrameDescriptors[]; }; @@ -104,7 +111,7 @@ bool _flw_isVisible(uint instanceIndex, uint modelIndex) { } void main() { - if (gl_GlobalInvocationID.x >= _flw_passTwoIndicies.length()) { + if (gl_GlobalInvocationID.x >= _flw_passTwoIndices.length()) { return; } @@ -119,6 +126,6 @@ void main() { if (_flw_isVisible(instanceIndex, modelIndex)) { uint localIndex = atomicAdd(_flw_models[modelIndex].instanceCount, 1); uint targetIndex = _flw_models[modelIndex].baseInstance + localIndex; - _flw_instanceIndices[targetIndex] = instanceIndex; + _flw_drawIndices[targetIndex] = instanceIndex; } } diff --git a/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/main.vert b/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/main.vert index 151e7d64a..a8b05b6f5 100644 --- a/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/main.vert +++ b/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/main.vert @@ -5,8 +5,8 @@ #include "flywheel:internal/indirect/light.glsl" #include "flywheel:internal/indirect/matrices.glsl" -layout(std430, binding = _FLW_TARGET_BUFFER_BINDING) restrict readonly buffer TargetBuffer { - uint _flw_instanceIndices[]; +layout(std430, binding = _FLW_DRAW_INSTANCE_INDEX_BUFFER_BINDING) restrict readonly buffer DrawIndexBuffer { + uint _flw_drawIndices[]; }; layout(std430, binding = _FLW_DRAW_BUFFER_BINDING) restrict readonly buffer DrawBuffer { @@ -49,9 +49,9 @@ void main() { #endif #if __VERSION__ < 460 - uint instanceIndex = _flw_instanceIndices[gl_BaseInstanceARB + gl_InstanceID]; + uint instanceIndex = _flw_drawIndices[gl_BaseInstanceARB + gl_InstanceID]; #else - uint instanceIndex = _flw_instanceIndices[gl_BaseInstance + gl_InstanceID]; + uint instanceIndex = _flw_drawIndices[gl_BaseInstance + gl_InstanceID]; #endif FlwInstance instance = _flw_unpackInstance(instanceIndex); From f12aa15daeef2055323001a285608700b5e58c12 Mon Sep 17 00:00:00 2001 From: Jozufozu Date: Mon, 9 Sep 2024 14:20:25 -0700 Subject: [PATCH 13/17] It's alive - Fix crash by resetting the indirect dispatch buffer each frame - Use DSA + immutable storage for depth pyramid and visibility buffer - In pass two, check against the thread count written out in pass one to early return - Require a draw barrier after each apply dispatch - Use a storage array for the last frame visibility buffer --- .../backend/engine/indirect/DepthPyramid.java | 36 +++++++----------- .../engine/indirect/IndirectBuffers.java | 4 +- .../engine/indirect/IndirectCullingGroup.java | 11 +++++- .../engine/indirect/VisibilityBuffer.java | 38 ++++++++++--------- .../flywheel/backend/gl/GlCompat.java | 3 ++ .../flywheel/internal/indirect/dispatch.glsl | 6 +++ .../internal/indirect/early_cull.glsl | 8 +--- .../flywheel/internal/indirect/late_cull.glsl | 8 +++- .../internal/indirect/read_visibility.glsl | 2 +- 9 files changed, 65 insertions(+), 51 deletions(-) create mode 100644 common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/dispatch.glsl diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/DepthPyramid.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/DepthPyramid.java index 49afe0776..1ba12b86f 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/DepthPyramid.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/DepthPyramid.java @@ -13,23 +13,13 @@ import net.minecraft.client.Minecraft; public class DepthPyramid { private final GlProgram depthReduceProgram; - public final int pyramidTextureId; + public int pyramidTextureId = -1; private int lastWidth = -1; private int lastHeight = -1; public DepthPyramid(GlProgram depthReduceProgram) { this.depthReduceProgram = depthReduceProgram; - - pyramidTextureId = GL32.glGenTextures(); - - GlStateManager._bindTexture(pyramidTextureId); - GlStateManager._texParameter(GL32.GL_TEXTURE_2D, GL32.GL_TEXTURE_MIN_FILTER, GL32.GL_NEAREST); - GlStateManager._texParameter(GL32.GL_TEXTURE_2D, GL32.GL_TEXTURE_MAG_FILTER, GL32.GL_NEAREST); - GlStateManager._texParameter(GL32.GL_TEXTURE_2D, GL32.GL_TEXTURE_COMPARE_MODE, GL32.GL_NONE); - GlStateManager._texParameter(GL32.GL_TEXTURE_2D, GL32.GL_TEXTURE_WRAP_S, GL32.GL_CLAMP_TO_EDGE); - GlStateManager._texParameter(GL32.GL_TEXTURE_2D, GL32.GL_TEXTURE_WRAP_T, GL32.GL_CLAMP_TO_EDGE); - } public void generate() { @@ -45,12 +35,9 @@ public class DepthPyramid { int depthBufferId = mainRenderTarget.getDepthTextureId(); - GlTextureUnit.T1.makeActive(); - GlStateManager._bindTexture(depthBufferId); - GL46.glMemoryBarrier(GL46.GL_FRAMEBUFFER_BARRIER_BIT); - GL46.glActiveTexture(GL32.GL_TEXTURE1); + GlTextureUnit.T1.makeActive(); depthReduceProgram.bind(); @@ -73,7 +60,10 @@ public class DepthPyramid { } public void delete() { - GL32.glDeleteTextures(pyramidTextureId); + if (pyramidTextureId != -1) { + GL32.glDeleteTextures(pyramidTextureId); + pyramidTextureId = -1; + } } private void createPyramidMips(int mipLevels, int width, int height) { @@ -84,14 +74,16 @@ public class DepthPyramid { lastWidth = width; lastHeight = height; - GL32.glBindTexture(GL32.GL_TEXTURE_2D, pyramidTextureId); + delete(); - for (int i = 0; i < mipLevels; i++) { - int mipWidth = mipSize(width, i); - int mipHeight = mipSize(height, i); + pyramidTextureId = GL46.glCreateTextures(GL46.GL_TEXTURE_2D); + GL46.glTextureStorage2D(pyramidTextureId, mipLevels, GL32.GL_R32F, width, height); - GL32.glTexImage2D(GL32.GL_TEXTURE_2D, i, GL32.GL_R32F, mipWidth, mipHeight, 0, GL32.GL_RED, GL32.GL_FLOAT, 0); - } + GL46.glTextureParameteri(pyramidTextureId, GL32.GL_TEXTURE_MIN_FILTER, GL32.GL_NEAREST); + GL46.glTextureParameteri(pyramidTextureId, GL32.GL_TEXTURE_MAG_FILTER, GL32.GL_NEAREST); + GL46.glTextureParameteri(pyramidTextureId, GL32.GL_TEXTURE_COMPARE_MODE, GL32.GL_NONE); + GL46.glTextureParameteri(pyramidTextureId, GL32.GL_TEXTURE_WRAP_S, GL32.GL_CLAMP_TO_EDGE); + GL46.glTextureParameteri(pyramidTextureId, GL32.GL_TEXTURE_WRAP_T, GL32.GL_CLAMP_TO_EDGE); } public static int mipSize(int mip0Size, int level) { diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectBuffers.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectBuffers.java index cfc6cdcf6..fdd97bc85 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectBuffers.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectBuffers.java @@ -105,7 +105,7 @@ public class IndirectBuffers { MemoryUtil.memPutInt(ptr + DRAW_HANDLE_OFFSET, draw.handle()); MemoryUtil.memPutAddress(ptr + PASS_TWO_DISPATCH_SIZE_OFFSET, passTwoDispatch.capacity()); - MemoryUtil.memPutAddress(ptr + PASS_TWO_INSTANCE_INDEX_SIZE_OFFSET, passTwoInstanceIndex.capacity()); + MemoryUtil.memPutAddress(ptr + PASS_TWO_INSTANCE_INDEX_SIZE_OFFSET, INT_SIZE * instanceCount); MemoryUtil.memPutAddress(ptr + PAGE_FRAME_DESCRIPTOR_SIZE_OFFSET, objectStorage.frameDescriptorBuffer.capacity()); MemoryUtil.memPutAddress(ptr + INSTANCE_SIZE_OFFSET, objectStorage.objectBuffer.capacity()); MemoryUtil.memPutAddress(ptr + DRAW_INSTANCE_INDEX_SIZE_OFFSET, INT_SIZE * instanceCount); @@ -118,7 +118,7 @@ public class IndirectBuffers { } public void bindForCullPassTwo() { - multiBind(1, 5); + multiBind(0, 6); GlBufferType.DISPATCH_INDIRECT_BUFFER.bind(passTwoDispatch.handle()); } diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectCullingGroup.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectCullingGroup.java index 278d95b6c..0d3693187 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectCullingGroup.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectCullingGroup.java @@ -14,6 +14,8 @@ import java.util.EnumMap; import java.util.List; import java.util.Map; +import org.lwjgl.system.MemoryUtil; + import dev.engine_room.flywheel.api.instance.Instance; import dev.engine_room.flywheel.api.instance.InstanceType; import dev.engine_room.flywheel.api.material.Material; @@ -127,7 +129,12 @@ public class IndirectCullingGroup { uploadDraws(stagingBuffer); - needsDrawBarrier = true; + stagingBuffer.enqueueCopy(4 * Integer.BYTES, buffers.passTwoDispatch.handle(), 0, ptr -> { + MemoryUtil.memPutInt(ptr, 0); + MemoryUtil.memPutInt(ptr + 4, 1); + MemoryUtil.memPutInt(ptr + 8, 1); + MemoryUtil.memPutInt(ptr + 12, 0); + }); } public void dispatchCull() { @@ -163,6 +170,8 @@ public class IndirectCullingGroup { buffers.bindForApply(); glDispatchCompute(GlCompat.getComputeGroupCount(indirectDraws.size()), 1, 1); + + needsDrawBarrier = true; } public void dispatchModelReset() { diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/VisibilityBuffer.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/VisibilityBuffer.java index b3810a882..8cfeaecc9 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/VisibilityBuffer.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/VisibilityBuffer.java @@ -20,8 +20,8 @@ public class VisibilityBuffer { private static final int ATTACHMENT = GL30.GL_COLOR_ATTACHMENT1; private final GlProgram readVisibilityProgram; - private final ResizableStorageBuffer lastFrameVisibility; - private final int textureId; + private final ResizableStorageArray lastFrameVisibility; + private int textureId = -1; private int lastWidth = -1; private int lastHeight = -1; @@ -30,14 +30,7 @@ public class VisibilityBuffer { public VisibilityBuffer(GlProgram readVisibilityProgram) { this.readVisibilityProgram = readVisibilityProgram; - lastFrameVisibility = new ResizableStorageBuffer(); - textureId = GL32.glGenTextures(); - - GlStateManager._bindTexture(textureId); - GlStateManager._texParameter(GL32.GL_TEXTURE_2D, GL32.GL_TEXTURE_MIN_FILTER, GL32.GL_NEAREST); - GlStateManager._texParameter(GL32.GL_TEXTURE_2D, GL32.GL_TEXTURE_MAG_FILTER, GL32.GL_NEAREST); - GlStateManager._texParameter(GL32.GL_TEXTURE_2D, GL32.GL_TEXTURE_WRAP_S, GL32.GL_CLAMP_TO_EDGE); - GlStateManager._texParameter(GL32.GL_TEXTURE_2D, GL32.GL_TEXTURE_WRAP_T, GL32.GL_CLAMP_TO_EDGE); + lastFrameVisibility = new ResizableStorageArray(Integer.BYTES, 1.25f); } public void read(int pageCount) { @@ -45,7 +38,7 @@ public class VisibilityBuffer { return; } - lastFrameVisibility.ensureCapacity((long) pageCount << 2); + lastFrameVisibility.ensureCapacity(pageCount); GL46.nglClearNamedBufferData(lastFrameVisibility.handle(), GL46.GL_R32UI, GL46.GL_RED_INTEGER, GL46.GL_UNSIGNED_INT, 0); @@ -95,7 +88,15 @@ public class VisibilityBuffer { } public void delete() { - GL32.glDeleteTextures(textureId); + deleteTexture(); + lastFrameVisibility.delete(); + } + + private void deleteTexture() { + if (textureId != -1) { + GL32.glDeleteTextures(textureId); + textureId = -1; + } } public void clear() { @@ -117,11 +118,14 @@ public class VisibilityBuffer { lastWidth = width; lastHeight = height; - GlTextureUnit.T0.makeActive(); - GlStateManager._bindTexture(textureId); + deleteTexture(); - // TODO: DSA texture storage? - GL32.glTexImage2D(GL32.GL_TEXTURE_2D, 0, GL32.GL_R32UI, width, height, 0, GL32.GL_RED_INTEGER, GL32.GL_UNSIGNED_INT, 0); - GlStateManager._bindTexture(0); + textureId = GL46.glCreateTextures(GL46.GL_TEXTURE_2D); + GL46.glTextureStorage2D(textureId, 1, GL32.GL_R32UI, width, height); + + GL46.glTextureParameteri(textureId, GL32.GL_TEXTURE_MIN_FILTER, GL32.GL_NEAREST); + GL46.glTextureParameteri(textureId, GL32.GL_TEXTURE_MAG_FILTER, GL32.GL_NEAREST); + GL46.glTextureParameteri(textureId, GL32.GL_TEXTURE_WRAP_S, GL32.GL_CLAMP_TO_EDGE); + GL46.glTextureParameteri(textureId, GL32.GL_TEXTURE_WRAP_T, GL32.GL_CLAMP_TO_EDGE); } } diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/gl/GlCompat.java b/common/src/backend/java/dev/engine_room/flywheel/backend/gl/GlCompat.java index b7efa23ed..8d80254c3 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/gl/GlCompat.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/gl/GlCompat.java @@ -10,6 +10,7 @@ import org.lwjgl.opengl.GL20C; import org.lwjgl.opengl.GL31C; import org.lwjgl.opengl.GL40; import org.lwjgl.opengl.GL43; +import org.lwjgl.opengl.GL46; import org.lwjgl.opengl.GLCapabilities; import org.lwjgl.opengl.KHRShaderSubgroup; import org.lwjgl.system.MemoryStack; @@ -42,6 +43,8 @@ public final class GlCompat { public static final boolean SUPPORTS_INSTANCING = isInstancingSupported(); public static final boolean SUPPORTS_INDIRECT = isIndirectSupported(); + public static final int MAX_SHADER_STORAGE_BUFFER_BINDINGS = GL46.glGetInteger(GL46.GL_MAX_SHADER_STORAGE_BUFFER_BINDINGS); + private GlCompat() { } diff --git a/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/dispatch.glsl b/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/dispatch.glsl new file mode 100644 index 000000000..b0989a7a7 --- /dev/null +++ b/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/dispatch.glsl @@ -0,0 +1,6 @@ +struct _FlwLateCullDispatch { + uint x; + uint y; + uint z; + uint threadCount; +}; diff --git a/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/early_cull.glsl b/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/early_cull.glsl index e64869552..840d3813d 100644 --- a/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/early_cull.glsl +++ b/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/early_cull.glsl @@ -3,18 +3,12 @@ #include "flywheel:internal/uniforms/uniforms.glsl" #include "flywheel:util/matrix.glsl" #include "flywheel:internal/indirect/matrices.glsl" +#include "flywheel:internal/indirect/dispatch.glsl" layout(local_size_x = 32) in; uniform uint _flw_visibilityReadOffsetPages; -struct _FlwLateCullDispatch { - uint x; - uint y; - uint z; - uint threadCount; -}; - layout(std430, binding = _FLW_PASS_TWO_DISPATCH_BUFFER_BINDING) restrict buffer PassTwoDispatchBuffer { _FlwLateCullDispatch _flw_lateCullDispatch; }; diff --git a/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/late_cull.glsl b/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/late_cull.glsl index 574170cbe..4a32340b9 100644 --- a/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/late_cull.glsl +++ b/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/late_cull.glsl @@ -3,9 +3,15 @@ #include "flywheel:internal/uniforms/uniforms.glsl" #include "flywheel:util/matrix.glsl" #include "flywheel:internal/indirect/matrices.glsl" +#include "flywheel:internal/indirect/dispatch.glsl" layout(local_size_x = 32) in; + +layout(std430, binding = _FLW_PASS_TWO_DISPATCH_BUFFER_BINDING) restrict buffer PassTwoDispatchBuffer { + _FlwLateCullDispatch _flw_lateCullDispatch; +}; + layout(std430, binding = _FLW_PASS_TWO_INSTANCE_INDEX_BUFFER_BINDING) restrict readonly buffer PassTwoIndexBuffer { uint _flw_passTwoIndices[]; }; @@ -111,7 +117,7 @@ bool _flw_isVisible(uint instanceIndex, uint modelIndex) { } void main() { - if (gl_GlobalInvocationID.x >= _flw_passTwoIndices.length()) { + if (gl_GlobalInvocationID.x >= _flw_lateCullDispatch.threadCount) { return; } diff --git a/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/read_visibility.glsl b/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/read_visibility.glsl index 3a4d9d9dc..52d4c655f 100644 --- a/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/read_visibility.glsl +++ b/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/read_visibility.glsl @@ -1,6 +1,6 @@ #include "flywheel:internal/indirect/buffer_bindings.glsl" -layout(local_size_x = 8, local_size_y = 8) in; +layout(local_size_x = 16, local_size_y = 16) in; layout(binding = 0) uniform usampler2D visBuffer; From 0bfaac7154d527fd15f620b937d46bbd7ec84d0b Mon Sep 17 00:00:00 2001 From: Jozufozu Date: Mon, 9 Sep 2024 20:39:10 -0700 Subject: [PATCH 14/17] Poking and prodding - Invert image size on CPU to avoid divisions on GPU - Increase depth reduce group size to 16x16 - Early-out in uploadInstances based on changed cardinality - Much faster to calculate cardinality than it is to clear an AtomicBitSet, so the check is worth it - Upload scatter list directly in the staging buffer if there's room --- .../backend/engine/MaterialRenderState.java | 1 - .../backend/engine/indirect/DepthPyramid.java | 6 +++-- .../engine/indirect/IndirectInstancer.java | 5 +++++ .../engine/indirect/StagingBuffer.java | 22 +++++++++++++++++-- .../internal/indirect/depth_reduce.glsl | 6 ++--- 5 files changed, 32 insertions(+), 8 deletions(-) diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/MaterialRenderState.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/MaterialRenderState.java index d4fd52a41..0ddf8e1e7 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/MaterialRenderState.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/MaterialRenderState.java @@ -35,7 +35,6 @@ public final class MaterialRenderState { setupBackfaceCulling(material.backfaceCulling()); setupPolygonOffset(material.polygonOffset()); setupDepthTest(material.depthTest()); -// setupDepthTest(DepthTest.OFF); setupTransparency(material.transparency()); setupWriteMask(material.writeMask()); } diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/DepthPyramid.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/DepthPyramid.java index 1ba12b86f..789891c57 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/DepthPyramid.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/DepthPyramid.java @@ -11,6 +11,8 @@ import dev.engine_room.flywheel.lib.math.MoreMath; import net.minecraft.client.Minecraft; public class DepthPyramid { + private static final int GROUP_SIZE = 16; + private final GlProgram depthReduceProgram; public int pyramidTextureId = -1; @@ -50,10 +52,10 @@ public class DepthPyramid { GL46.glBindImageTexture(0, pyramidTextureId, i, false, 0, GL32.GL_WRITE_ONLY, GL32.GL_R32F); - depthReduceProgram.setVec2("imageSize", mipWidth, mipHeight); + depthReduceProgram.setVec2("oneOverImageSize", 1f / (float) mipWidth, 1f / (float) mipHeight); depthReduceProgram.setInt("lod", Math.max(0, i - 1)); - GL46.glDispatchCompute(MoreMath.ceilingDiv(mipWidth, 8), MoreMath.ceilingDiv(mipHeight, 8), 1); + GL46.glDispatchCompute(MoreMath.ceilingDiv(mipWidth, GROUP_SIZE), MoreMath.ceilingDiv(mipHeight, GROUP_SIZE), 1); GL46.glMemoryBarrier(GL46.GL_TEXTURE_FETCH_BARRIER_BIT); } diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectInstancer.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectInstancer.java index fc8e1361e..abdcf8b24 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectInstancer.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectInstancer.java @@ -82,6 +82,11 @@ public class IndirectInstancer extends AbstractInstancer } public void uploadInstances(StagingBuffer stagingBuffer, int instanceVbo) { + if (changedPages.cardinality() == 0) { + // Early return because checking the cardinality is faster than clearing. + return; + } + int numPages = mapping.pageCount(); var instanceCount = instances.size(); diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/StagingBuffer.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/StagingBuffer.java index c976dcfce..0bea2bdb8 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/StagingBuffer.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/StagingBuffer.java @@ -23,6 +23,8 @@ public class StagingBuffer { private static final int STORAGE_FLAGS = GL45C.GL_MAP_PERSISTENT_BIT | GL45C.GL_MAP_WRITE_BIT | GL45C.GL_CLIENT_STORAGE_BIT; private static final int MAP_FLAGS = GL45C.GL_MAP_PERSISTENT_BIT | GL45C.GL_MAP_WRITE_BIT | GL45C.GL_MAP_FLUSH_EXPLICIT_BIT | GL45C.GL_MAP_INVALIDATE_BUFFER_BIT; + private static final int SSBO_ALIGNMENT = GL45.glGetInteger(GL45.GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT); + private final int vbo; private final long map; private final long capacity; @@ -254,7 +256,6 @@ public class StagingBuffer { scatterProgram.bind(); // These bindings don't change between dstVbos. - GL45.glBindBufferBase(GL45C.GL_SHADER_STORAGE_BUFFER, 0, scatterBuffer.handle()); GL45.glBindBufferBase(GL45C.GL_SHADER_STORAGE_BUFFER, 1, vbo); int dstVbo; @@ -276,7 +277,24 @@ public class StagingBuffer { } private void dispatchScatter(int dstVbo) { - scatterBuffer.upload(scatterList.ptr(), scatterList.usedBytes()); + var scatterSize = scatterList.usedBytes(); + + long alignedPos = pos + SSBO_ALIGNMENT - 1 - (pos + SSBO_ALIGNMENT - 1) % SSBO_ALIGNMENT; + + long remaining = capacity - alignedPos; + if (scatterSize <= remaining && scatterSize <= totalAvailable) { + MemoryUtil.memCopy(scatterList.ptr(), map + alignedPos, scatterSize); + GL45.glBindBufferRange(GL45C.GL_SHADER_STORAGE_BUFFER, 0, vbo, alignedPos, scatterSize); + + long alignmentCost = alignedPos - pos; + + usedCapacity += scatterSize + alignmentCost; + totalAvailable -= scatterSize + alignmentCost; + pos += scatterSize + alignmentCost; + } else { + scatterBuffer.upload(scatterList.ptr(), scatterSize); + GL45.glBindBufferBase(GL45C.GL_SHADER_STORAGE_BUFFER, 0, scatterBuffer.handle()); + } GL45.glBindBufferBase(GL45C.GL_SHADER_STORAGE_BUFFER, 2, dstVbo); diff --git a/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/depth_reduce.glsl b/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/depth_reduce.glsl index 49bbbf947..35b9b24ba 100644 --- a/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/depth_reduce.glsl +++ b/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/depth_reduce.glsl @@ -1,9 +1,9 @@ -layout(local_size_x = 8, local_size_y = 8) in; +layout(local_size_x = 16, local_size_y = 16) in; layout(binding = 0, r32f) uniform writeonly image2D outImage; layout(binding = 1) uniform sampler2D inImage; -uniform vec2 imageSize; +uniform vec2 oneOverImageSize; uniform int lod; uniform int useMin = 0; @@ -13,7 +13,7 @@ void main() { // Map the output texel to an input texel. Properly do the division because generating mip0 maps from the actual // full resolution depth buffer and the aspect ratio may be different from our Po2 pyramid. - ivec2 samplePos = ivec2(floor(vec2(pos) * vec2(textureSize(inImage, lod)) / imageSize)); + ivec2 samplePos = ivec2(floor(vec2(pos) * vec2(textureSize(inImage, lod)) * oneOverImageSize)); float depth01 = texelFetchOffset(inImage, samplePos, lod, ivec2(0, 1)).r; float depth11 = texelFetchOffset(inImage, samplePos, lod, ivec2(1, 1)).r; From 861009ed11f73627e6d77f3f465b737e18046e58 Mon Sep 17 00:00:00 2001 From: Jozufozu Date: Thu, 12 Sep 2024 21:32:13 -0700 Subject: [PATCH 15/17] Rapid descent - Implement single (but actually 2) pass downsampling --- .../backend/compile/IndirectPrograms.java | 12 +- .../backend/engine/indirect/DepthPyramid.java | 54 ++++++- .../engine/indirect/IndirectDrawManager.java | 5 +- .../internal/indirect/downsample.glsl | 31 ++++ .../internal/indirect/downsample_first.glsl | 150 ++++++++++++++++++ .../internal/indirect/downsample_second.glsl | 134 ++++++++++++++++ 6 files changed, 382 insertions(+), 4 deletions(-) create mode 100644 common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/downsample.glsl create mode 100644 common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/downsample_first.glsl create mode 100644 common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/downsample_second.glsl diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/compile/IndirectPrograms.java b/common/src/backend/java/dev/engine_room/flywheel/backend/compile/IndirectPrograms.java index 47e381eff..d352d3c03 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/compile/IndirectPrograms.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/compile/IndirectPrograms.java @@ -34,7 +34,9 @@ public class IndirectPrograms extends AtomicReferenceCounted { private static final ResourceLocation DEPTH_REDUCE_SHADER_MAIN = Flywheel.rl("internal/indirect/depth_reduce.glsl"); private static final ResourceLocation READ_VISIBILITY_SHADER_MAIN = Flywheel.rl("internal/indirect/read_visibility.glsl"); private static final ResourceLocation ZERO_MODELS_SHADER_MAIN = Flywheel.rl("internal/indirect/zero_models.glsl"); - public static final List UTIL_SHADERS = List.of(APPLY_SHADER_MAIN, SCATTER_SHADER_MAIN, DEPTH_REDUCE_SHADER_MAIN, READ_VISIBILITY_SHADER_MAIN, ZERO_MODELS_SHADER_MAIN); + private static final ResourceLocation DOWNSAMPLE_FIRST = Flywheel.rl("internal/indirect/downsample_first.glsl"); + private static final ResourceLocation DOWNSAMPLE_SECOND = Flywheel.rl("internal/indirect/downsample_second.glsl"); + public static final List UTIL_SHADERS = List.of(APPLY_SHADER_MAIN, SCATTER_SHADER_MAIN, DEPTH_REDUCE_SHADER_MAIN, READ_VISIBILITY_SHADER_MAIN, ZERO_MODELS_SHADER_MAIN, DOWNSAMPLE_FIRST, DOWNSAMPLE_SECOND); private static final Compile> CULL = new Compile<>(); private static final Compile UTIL = new Compile<>(); @@ -205,6 +207,14 @@ public class IndirectPrograms extends AtomicReferenceCounted { return utils.get(DEPTH_REDUCE_SHADER_MAIN); } + public GlProgram getDownsampleFirstProgram() { + return utils.get(DOWNSAMPLE_FIRST); + } + + public GlProgram getDownsampleSecondProgram() { + return utils.get(DOWNSAMPLE_SECOND); + } + public GlProgram getReadVisibilityProgram() { return utils.get(READ_VISIBILITY_SHADER_MAIN); } diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/DepthPyramid.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/DepthPyramid.java index 789891c57..fdfbadb86 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/DepthPyramid.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/DepthPyramid.java @@ -14,14 +14,18 @@ public class DepthPyramid { private static final int GROUP_SIZE = 16; private final GlProgram depthReduceProgram; + private final GlProgram downsampleFirstProgram; + private final GlProgram downsampleSecondProgram; public int pyramidTextureId = -1; private int lastWidth = -1; private int lastHeight = -1; - public DepthPyramid(GlProgram depthReduceProgram) { + public DepthPyramid(GlProgram depthReduceProgram, GlProgram downsampleFirstProgram, GlProgram downsampleSecondProgram) { this.depthReduceProgram = depthReduceProgram; + this.downsampleFirstProgram = downsampleFirstProgram; + this.downsampleSecondProgram = downsampleSecondProgram; } public void generate() { @@ -61,6 +65,54 @@ public class DepthPyramid { } } + public void generateSPD() { + var mainRenderTarget = Minecraft.getInstance() + .getMainRenderTarget(); + + int width = mip0Size(mainRenderTarget.width); + int height = mip0Size(mainRenderTarget.height); + + int mipLevels = getImageMipLevels(width, height); + + createPyramidMips(mipLevels, width, height); + + int depthBufferId = mainRenderTarget.getDepthTextureId(); + + GL46.glMemoryBarrier(GL46.GL_FRAMEBUFFER_BARRIER_BIT); + + GlTextureUnit.T0.makeActive(); + GlStateManager._bindTexture(depthBufferId); + + downsampleFirstProgram.bind(); + downsampleFirstProgram.setUInt("max_mip_level", mipLevels); + + for (int i = 0; i < Math.min(6, mipLevels); i++) { + GL46.glBindImageTexture(i + 1, pyramidTextureId, i, false, 0, GL32.GL_WRITE_ONLY, GL32.GL_R32F); + } + + GL46.glDispatchCompute(MoreMath.ceilingDiv(width << 1, 64), MoreMath.ceilingDiv(height << 1, 64), 1); + + if (mipLevels < 7) { + GL46.glMemoryBarrier(GL46.GL_TEXTURE_FETCH_BARRIER_BIT); + + return; + } + + GL46.glMemoryBarrier(GL46.GL_SHADER_IMAGE_ACCESS_BARRIER_BIT); + + downsampleSecondProgram.bind(); + downsampleSecondProgram.setUInt("max_mip_level", mipLevels); + + GL46.glBindImageTexture(0, pyramidTextureId, 5, false, 0, GL32.GL_READ_ONLY, GL32.GL_R32F); + for (int i = 6; i < Math.min(12, mipLevels); i++) { + GL46.glBindImageTexture(i - 5, pyramidTextureId, i, false, 0, GL32.GL_WRITE_ONLY, GL32.GL_R32F); + } + + GL46.glDispatchCompute(1, 1, 1); + + GL46.glMemoryBarrier(GL46.GL_TEXTURE_FETCH_BARRIER_BIT); + } + public void delete() { if (pyramidTextureId != -1) { GL32.glDeleteTextures(pyramidTextureId); diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectDrawManager.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectDrawManager.java index 799495a09..de452a1e5 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectDrawManager.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectDrawManager.java @@ -4,6 +4,7 @@ import static org.lwjgl.opengl.GL11.GL_TRIANGLES; import static org.lwjgl.opengl.GL11.GL_UNSIGNED_INT; import static org.lwjgl.opengl.GL30.glBindBufferRange; import static org.lwjgl.opengl.GL40.glDrawElementsIndirect; +import static org.lwjgl.opengl.GL42.GL_BUFFER_UPDATE_BARRIER_BIT; import static org.lwjgl.opengl.GL42.glMemoryBarrier; import static org.lwjgl.opengl.GL43.GL_SHADER_STORAGE_BARRIER_BIT; import static org.lwjgl.opengl.GL43.GL_SHADER_STORAGE_BUFFER; @@ -67,7 +68,7 @@ public class IndirectDrawManager extends DrawManager> { lightBuffers = new LightBuffers(); matrixBuffer = new MatrixBuffer(); - depthPyramid = new DepthPyramid(programs.getDepthReduceProgram()); + depthPyramid = new DepthPyramid(programs.getDepthReduceProgram(), programs.getDownsampleFirstProgram(), programs.getDownsampleSecondProgram()); visibilityBuffer = new VisibilityBuffer(programs.getReadVisibilityProgram()); } @@ -124,7 +125,7 @@ public class IndirectDrawManager extends DrawManager> { submitDraws(); - depthPyramid.generate(); + depthPyramid.generateSPD(); programs.getZeroModelProgram() .bind(); diff --git a/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/downsample.glsl b/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/downsample.glsl new file mode 100644 index 000000000..c423431e6 --- /dev/null +++ b/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/downsample.glsl @@ -0,0 +1,31 @@ +layout(local_size_x = 256) in; + +uniform uint max_mip_level; + +/// Generates a hierarchical depth buffer. +/// Based on FidelityFX SPD v2.1 https://github.com/GPUOpen-LibrariesAndSDKs/FidelityFX-SDK/blob/d7531ae47d8b36a5d4025663e731a47a38be882f/sdk/include/FidelityFX/gpu/spd/ffx_spd.h#L528 +/// Based on Bevy's more readable implementation https://github.com/JMS55/bevy/blob/ca2c8e63b9562f88c8cd7e1d88a17a4eea20aaf4/crates/bevy_pbr/src/meshlet/downsample_depth.wgsl + +shared float[16][16] intermediate_memory; + +uint extractBits(uint e, uint offset, uint count) { + return (e >> offset) & ((1u << count) - 1u); +} + +uint insertBits(uint e, uint newbits, uint offset, uint count) { + uint countMask = ((1u << count) - 1u); + // zero out the bits we're going to replace first + return (e & ~(countMask << offset)) | ((newbits & countMask) << offset); +} + +uvec2 remap_for_wave_reduction(uint a) { + return uvec2( + insertBits(extractBits(a, 2u, 3u), a, 0u, 1u), + insertBits(extractBits(a, 3u, 3u), extractBits(a, 1u, 2u), 0u, 2u) + ); +} + +float reduce_4(vec4 v) { + return max(max(v.x, v.y), max(v.z, v.w)); +} + diff --git a/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/downsample_first.glsl b/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/downsample_first.glsl new file mode 100644 index 000000000..e3951a45e --- /dev/null +++ b/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/downsample_first.glsl @@ -0,0 +1,150 @@ +#include "flywheel:internal/indirect/downsample.glsl" + +layout(binding = 0) uniform sampler2D mip_0; +layout(binding = 1, r32f) uniform writeonly image2D mip_1; +layout(binding = 2, r32f) uniform writeonly image2D mip_2; +layout(binding = 3, r32f) uniform writeonly image2D mip_3; +layout(binding = 4, r32f) uniform writeonly image2D mip_4; +layout(binding = 5, r32f) uniform writeonly image2D mip_5; +layout(binding = 6, r32f) uniform writeonly image2D mip_6; + +float reduce_load_mip_0(uvec2 tex) { + vec2 uv = (vec2(tex) + 0.5) / vec2(imageSize(mip_1)) * 0.5; + return reduce_4(textureGather(mip_0, uv)); +} + +void downsample_mips_0_and_1(uint x, uint y, ivec2 workgroup_id, uint local_invocation_index) { + vec4 v; + + ivec2 tex = workgroup_id * 64 + ivec2(x * 2u, y * 2u); + ivec2 pix = workgroup_id * 32 + ivec2(x, y); + v[0] = reduce_load_mip_0(tex); + imageStore(mip_1, pix, vec4(v[0])); + + tex = workgroup_id * 64 + ivec2(x * 2u + 32u, y * 2u); + pix = workgroup_id * 32 + ivec2(x + 16u, y); + v[1] = reduce_load_mip_0(tex); + imageStore(mip_1, pix, vec4(v[1])); + + tex = workgroup_id * 64 + ivec2(x * 2u, y * 2u + 32u); + pix = workgroup_id * 32 + ivec2(x, y + 16u); + v[2] = reduce_load_mip_0(tex); + imageStore(mip_1, pix, vec4(v[2])); + + tex = workgroup_id * 64 + ivec2(x * 2u + 32u, y * 2u + 32u); + pix = workgroup_id * 32 + ivec2(x + 16u, y + 16u); + v[3] = reduce_load_mip_0(tex); + imageStore(mip_1, pix, vec4(v[3])); + + if (max_mip_level <= 1u) { return; } + + for (uint i = 0u; i < 4u; i++) { + intermediate_memory[x][y] = v[i]; + barrier(); + if (local_invocation_index < 64u) { + v[i] = reduce_4(vec4( + intermediate_memory[x * 2u + 0u][y * 2u + 0u], + intermediate_memory[x * 2u + 1u][y * 2u + 0u], + intermediate_memory[x * 2u + 0u][y * 2u + 1u], + intermediate_memory[x * 2u + 1u][y * 2u + 1u] + )); + pix = (workgroup_id * 16) + ivec2( + x + (i % 2u) * 8u, + y + (i / 2u) * 8u + ); + imageStore(mip_2, pix, vec4(v[i])); + } + barrier(); + } + + if (local_invocation_index < 64u) { + intermediate_memory[x + 0u][y + 0u] = v[0]; + intermediate_memory[x + 8u][y + 0u] = v[1]; + intermediate_memory[x + 0u][y + 8u] = v[2]; + intermediate_memory[x + 8u][y + 8u] = v[3]; + } +} + + +void downsample_mip_2(uint x, uint y, ivec2 workgroup_id, uint local_invocation_index) { + if (local_invocation_index < 64u) { + float v = reduce_4(vec4( + intermediate_memory[x * 2u + 0u][y * 2u + 0u], + intermediate_memory[x * 2u + 1u][y * 2u + 0u], + intermediate_memory[x * 2u + 0u][y * 2u + 1u], + intermediate_memory[x * 2u + 1u][y * 2u + 1u] + )); + imageStore(mip_3, (workgroup_id * 8) + ivec2(x, y), vec4(v)); + intermediate_memory[x * 2u + y % 2u][y * 2u] = v; + } +} + +void downsample_mip_3(uint x, uint y, ivec2 workgroup_id, uint local_invocation_index) { + if (local_invocation_index < 16u) { + float v = reduce_4(vec4( + intermediate_memory[x * 4u + 0u + 0u][y * 4u + 0u], + intermediate_memory[x * 4u + 2u + 0u][y * 4u + 0u], + intermediate_memory[x * 4u + 0u + 1u][y * 4u + 2u], + intermediate_memory[x * 4u + 2u + 1u][y * 4u + 2u] + )); + imageStore(mip_4, (workgroup_id * 4) + ivec2(x, y), vec4(v)); + intermediate_memory[x * 4u + y][y * 4u] = v; + } +} + +void downsample_mip_4(uint x, uint y, ivec2 workgroup_id, uint local_invocation_index) { + if (local_invocation_index < 4u) { + float v = reduce_4(vec4( + intermediate_memory[x * 8u + 0u + 0u + y * 2u][y * 8u + 0u], + intermediate_memory[x * 8u + 4u + 0u + y * 2u][y * 8u + 0u], + intermediate_memory[x * 8u + 0u + 1u + y * 2u][y * 8u + 4u], + intermediate_memory[x * 8u + 4u + 1u + y * 2u][y * 8u + 4u] + )); + imageStore(mip_5, (workgroup_id * 2) + ivec2(x, y), vec4(v)); + intermediate_memory[x + y * 2u][0u] = v; + } +} + +void downsample_mip_5(ivec2 workgroup_id, uint local_invocation_index) { + if (local_invocation_index < 1u) { + float v = reduce_4(vec4( + intermediate_memory[0u][0u], + intermediate_memory[1u][0u], + intermediate_memory[2u][0u], + intermediate_memory[3u][0u] + )); + imageStore(mip_6, workgroup_id, vec4(v)); + } +} + +void downsample_mips_2_to_5(uint x, uint y, ivec2 workgroup_id, uint local_invocation_index) { + if (max_mip_level <= 2u) { return; } + barrier(); + downsample_mip_2(x, y, workgroup_id, local_invocation_index); + + if (max_mip_level <= 3u) { return; } + barrier(); + downsample_mip_3(x, y, workgroup_id, local_invocation_index); + + if (max_mip_level <= 4u) { return; } + barrier(); + downsample_mip_4(x, y, workgroup_id, local_invocation_index); + + if (max_mip_level <= 5u) { return; } + barrier(); + downsample_mip_5(workgroup_id, local_invocation_index); +} + +void downsample_depth_first() { + uvec2 sub_xy = remap_for_wave_reduction(gl_LocalInvocationIndex % 64u); + uint x = sub_xy.x + 8u * ((gl_LocalInvocationIndex >> 6u) % 2u); + uint y = sub_xy.y + 8u * (gl_LocalInvocationIndex >> 7u); + + downsample_mips_0_and_1(x, y, ivec2(gl_WorkGroupID.xy), gl_LocalInvocationIndex); + + downsample_mips_2_to_5(x, y, ivec2(gl_WorkGroupID.xy), gl_LocalInvocationIndex); +} + +void main() { + downsample_depth_first(); +} diff --git a/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/downsample_second.glsl b/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/downsample_second.glsl new file mode 100644 index 000000000..fe3b64f5a --- /dev/null +++ b/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/downsample_second.glsl @@ -0,0 +1,134 @@ +#include "flywheel:internal/indirect/downsample.glsl" + +layout(binding = 0, r32f) uniform readonly image2D mip_6; +layout(binding = 1, r32f) uniform writeonly image2D mip_7; +layout(binding = 2, r32f) uniform writeonly image2D mip_8; +layout(binding = 3, r32f) uniform writeonly image2D mip_9; +layout(binding = 4, r32f) uniform writeonly image2D mip_10; +layout(binding = 5, r32f) uniform writeonly image2D mip_11; +layout(binding = 6, r32f) uniform writeonly image2D mip_12; + +float reduce_load_mip_6(ivec2 tex) { + return reduce_4(vec4( + imageLoad(mip_6, tex + ivec2(0u, 0u)).r, + imageLoad(mip_6, tex + ivec2(0u, 1u)).r, + imageLoad(mip_6, tex + ivec2(1u, 0u)).r, + imageLoad(mip_6, tex + ivec2(1u, 1u)).r + )); +} + +void downsample_mips_6_and_7(uint x, uint y) { + vec4 v; + + ivec2 tex = ivec2(x * 4u + 0u, y * 4u + 0u); + ivec2 pix = ivec2(x * 2u + 0u, y * 2u + 0u); + v[0] = reduce_load_mip_6(tex); + imageStore(mip_7, pix, vec4(v[0])); + + tex = ivec2(x * 4u + 2u, y * 4u + 0u); + pix = ivec2(x * 2u + 1u, y * 2u + 0u); + v[1] = reduce_load_mip_6(tex); + imageStore(mip_7, pix, vec4(v[1])); + + tex = ivec2(x * 4u + 0u, y * 4u + 2u); + pix = ivec2(x * 2u + 0u, y * 2u + 1u); + v[2] = reduce_load_mip_6(tex); + imageStore(mip_7, pix, vec4(v[2])); + + tex = ivec2(x * 4u + 2u, y * 4u + 2u); + pix = ivec2(x * 2u + 1u, y * 2u + 1u); + v[3] = reduce_load_mip_6(tex); + imageStore(mip_7, pix, vec4(v[3])); + + if (max_mip_level <= 7u) { return; } + + float vr = reduce_4(v); + imageStore(mip_8, ivec2(x, y), vec4(vr)); + intermediate_memory[x][y] = vr; +} + + +void downsample_mip_8(uint x, uint y, uint local_invocation_index) { + if (local_invocation_index < 64u) { + float v = reduce_4(vec4( + intermediate_memory[x * 2u + 0u][y * 2u + 0u], + intermediate_memory[x * 2u + 1u][y * 2u + 0u], + intermediate_memory[x * 2u + 0u][y * 2u + 1u], + intermediate_memory[x * 2u + 1u][y * 2u + 1u] + )); + imageStore(mip_9, ivec2(x, y), vec4(v)); + intermediate_memory[x * 2u + y % 2u][y * 2u] = v; + } +} + +void downsample_mip_9(uint x, uint y, uint local_invocation_index) { + if (local_invocation_index < 16u) { + float v = reduce_4(vec4( + intermediate_memory[x * 4u + 0u + 0u][y * 4u + 0u], + intermediate_memory[x * 4u + 2u + 0u][y * 4u + 0u], + intermediate_memory[x * 4u + 0u + 1u][y * 4u + 2u], + intermediate_memory[x * 4u + 2u + 1u][y * 4u + 2u] + )); + imageStore(mip_10, ivec2(x, y), vec4(v)); + intermediate_memory[x * 4u + y][y * 4u] = v; + } +} + +void downsample_mip_10(uint x, uint y, uint local_invocation_index) { + if (local_invocation_index < 4u) { + float v = reduce_4(vec4( + intermediate_memory[x * 8u + 0u + 0u + y * 2u][y * 8u + 0u], + intermediate_memory[x * 8u + 4u + 0u + y * 2u][y * 8u + 0u], + intermediate_memory[x * 8u + 0u + 1u + y * 2u][y * 8u + 4u], + intermediate_memory[x * 8u + 4u + 1u + y * 2u][y * 8u + 4u] + )); + imageStore(mip_11, ivec2(x, y), vec4(v)); + intermediate_memory[x + y * 2u][0u] = v; + } +} + +void downsample_mip_11(uint local_invocation_index) { + if (local_invocation_index < 1u) { + float v = reduce_4(vec4( + intermediate_memory[0u][0u], + intermediate_memory[1u][0u], + intermediate_memory[2u][0u], + intermediate_memory[3u][0u] + )); + + imageStore(mip_12, ivec2(0u, 0u), vec4(v)); + } +} + + +void downsample_mips_8_to_11(uint x, uint y, uint local_invocation_index) { + if (max_mip_level <= 8u) { return; } + barrier(); + downsample_mip_8(x, y, local_invocation_index); + + if (max_mip_level <= 9u) { return; } + barrier(); + downsample_mip_9(x, y, local_invocation_index); + + if (max_mip_level <= 10u) { return; } + barrier(); + downsample_mip_10(x, y, local_invocation_index); + + if (max_mip_level <= 11u) { return; } + barrier(); + downsample_mip_11(local_invocation_index); +} + +void downsample_depth_second() { + uvec2 sub_xy = remap_for_wave_reduction(gl_LocalInvocationIndex % 64u); + uint x = sub_xy.x + 8u * ((gl_LocalInvocationIndex >> 6u) % 2u); + uint y = sub_xy.y + 8u * (gl_LocalInvocationIndex >> 7u); + + downsample_mips_6_and_7(x, y); + + downsample_mips_8_to_11(x, y, gl_LocalInvocationIndex); +} + +void main() { + downsample_depth_second(); +} From 0151364b8a07021fb06da3aae96cfa4fc982ab06 Mon Sep 17 00:00:00 2001 From: Jozufozu Date: Fri, 13 Sep 2024 21:06:48 -0700 Subject: [PATCH 16/17] Clear for debugging - Nsight explodes with the scatter shader resetting the indirect dispatch buffer - Instead, issue a clear buffer and buffer update barrier --- .../backend/engine/indirect/IndirectCullingGroup.java | 9 ++------- .../backend/engine/indirect/IndirectDrawManager.java | 2 +- .../flywheel/flywheel/internal/indirect/early_cull.glsl | 5 +++++ 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectCullingGroup.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectCullingGroup.java index 0d3693187..72181d152 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectCullingGroup.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectCullingGroup.java @@ -14,7 +14,7 @@ import java.util.EnumMap; import java.util.List; import java.util.Map; -import org.lwjgl.system.MemoryUtil; +import org.lwjgl.opengl.GL46; import dev.engine_room.flywheel.api.instance.Instance; import dev.engine_room.flywheel.api.instance.InstanceType; @@ -129,12 +129,7 @@ public class IndirectCullingGroup { uploadDraws(stagingBuffer); - stagingBuffer.enqueueCopy(4 * Integer.BYTES, buffers.passTwoDispatch.handle(), 0, ptr -> { - MemoryUtil.memPutInt(ptr, 0); - MemoryUtil.memPutInt(ptr + 4, 1); - MemoryUtil.memPutInt(ptr + 8, 1); - MemoryUtil.memPutInt(ptr + 12, 0); - }); + GL46.nglClearNamedBufferData(buffers.passTwoDispatch.handle(), GL46.GL_R32UI, GL46.GL_RED, GL46.GL_UNSIGNED_INT, 0); } public void dispatchCull() { diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectDrawManager.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectDrawManager.java index de452a1e5..ab7d8e3a6 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectDrawManager.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectDrawManager.java @@ -107,7 +107,7 @@ public class IndirectDrawManager extends DrawManager> { matrixBuffer.bind(); Uniforms.bindAll(); - glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT); + glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT | GL_BUFFER_UPDATE_BARRIER_BIT); visibilityBuffer.bind(); diff --git a/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/early_cull.glsl b/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/early_cull.glsl index 840d3813d..1c82a935d 100644 --- a/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/early_cull.glsl +++ b/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/early_cull.glsl @@ -112,6 +112,11 @@ void main() { if (targetIndex % 32u == 0u) { // This thread wrote an index that will be at the start of a new workgroup later atomicAdd(_flw_lateCullDispatch.x, 1); + + if (targetIndex == 0) { + _flw_lateCullDispatch.y = 1; + _flw_lateCullDispatch.z = 1; + } } } } From ba3d84b5ae201e5ec7e4590ff91e32d4cef591b4 Mon Sep 17 00:00:00 2001 From: Jozufozu Date: Fri, 13 Sep 2024 22:39:31 -0700 Subject: [PATCH 17/17] Seeing blue - Optimize read visibility by having each invocation read a 2x2 area and coalescing atomicOrs when all 4 texels are equal - Also use the fancy remap function for better texture cache locality --- .../engine/indirect/VisibilityBuffer.java | 2 +- .../internal/indirect/read_visibility.glsl | 43 +++++++++++++++++-- 2 files changed, 41 insertions(+), 4 deletions(-) diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/VisibilityBuffer.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/VisibilityBuffer.java index 8cfeaecc9..9b266b65a 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/VisibilityBuffer.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/VisibilityBuffer.java @@ -16,7 +16,7 @@ import it.unimi.dsi.fastutil.ints.IntSet; import net.minecraft.client.Minecraft; public class VisibilityBuffer { - private static final int READ_GROUP_SIZE = 16; + private static final int READ_GROUP_SIZE = 32; private static final int ATTACHMENT = GL30.GL_COLOR_ATTACHMENT1; private final GlProgram readVisibilityProgram; diff --git a/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/read_visibility.glsl b/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/read_visibility.glsl index 52d4c655f..b4d506f16 100644 --- a/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/read_visibility.glsl +++ b/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/read_visibility.glsl @@ -1,6 +1,6 @@ #include "flywheel:internal/indirect/buffer_bindings.glsl" -layout(local_size_x = 16, local_size_y = 16) in; +layout(local_size_x = 256) in; layout(binding = 0) uniform usampler2D visBuffer; @@ -8,9 +8,24 @@ layout(std430, binding = _FLW_LAST_FRAME_VISIBILITY_BUFFER_BINDING) restrict buf uint _flw_lastFrameVisibility[]; }; -void main() { - uint instanceID = texelFetch(visBuffer, ivec2(gl_GlobalInvocationID.xy), 0).r; +uint extractBits(uint e, uint offset, uint count) { + return (e >> offset) & ((1u << count) - 1u); +} +uint insertBits(uint e, uint newbits, uint offset, uint count) { + uint countMask = ((1u << count) - 1u); + // zero out the bits we're going to replace first + return (e & ~(countMask << offset)) | ((newbits & countMask) << offset); +} + +uvec2 remap_for_wave_reduction(uint a) { + return uvec2( + insertBits(extractBits(a, 2u, 3u), a, 0u, 1u), + insertBits(extractBits(a, 3u, 3u), extractBits(a, 1u, 2u), 0u, 2u) + ); +} + +void emit(uint instanceID) { // Null instance id. if (instanceID == 0) { return; @@ -25,3 +40,25 @@ void main() { atomicOr(_flw_lastFrameVisibility[index], mask); } + +void main() { + uvec2 sub_xy = remap_for_wave_reduction(gl_LocalInvocationIndex % 64u); + uint x = sub_xy.x + 8u * ((gl_LocalInvocationIndex >> 6u) % 2u); + uint y = sub_xy.y + 8u * (gl_LocalInvocationIndex >> 7u); + + ivec2 tex = ivec2(gl_WorkGroupID.xy) * 32 + ivec2(x, y) * 2; + + uint instanceID01 = texelFetchOffset(visBuffer, tex, 0, ivec2(0, 1)).r; + uint instanceID11 = texelFetchOffset(visBuffer, tex, 0, ivec2(1, 1)).r; + uint instanceID10 = texelFetchOffset(visBuffer, tex, 0, ivec2(1, 0)).r; + uint instanceID00 = texelFetch(visBuffer, tex, 0).r; + + if (instanceID00 == instanceID01 && instanceID01 == instanceID10 && instanceID10 == instanceID11) { + emit(instanceID00); + } else { + emit(instanceID00); + emit(instanceID01); + emit(instanceID10); + emit(instanceID11); + } +}