From dfca589c5b42a9593056d013c5ea4f790cbb8eae Mon Sep 17 00:00:00 2001 From: Jozufozu Date: Sun, 31 Mar 2024 12:56:01 -0700 Subject: [PATCH] Boundless changes - Fix exception thrown uploading indirect instances when an instance was added in the same frame that the tail instance was deleted. We'd leave behind some set changed bits out of bounds of the instance list, and try to access those indices later - Add out of bounds guards within forEachSetSpan, though I don't think they're actually necessary - Add AtomicBitset#set and #clear range methods - Update all changed bits at once when deleting instances rather than doing it in the loop - Start iterating from the first set bit rather than 0 to save potentially thousands of iterations - Smarter model index uploading --- .../backend/engine/AbstractInstancer.java | 21 +- .../engine/indirect/IndirectCullingGroup.java | 7 +- .../backend/engine/indirect/IndirectDraw.java | 14 +- .../engine/indirect/IndirectInstancer.java | 36 ++-- .../engine/instancing/InstancedInstancer.java | 9 +- .../flywheel/lib/util/AtomicBitset.java | 201 ++++++++++++++++-- 6 files changed, 226 insertions(+), 62 deletions(-) diff --git a/src/main/java/com/jozufozu/flywheel/backend/engine/AbstractInstancer.java b/src/main/java/com/jozufozu/flywheel/backend/engine/AbstractInstancer.java index 95fece409..62c905e65 100644 --- a/src/main/java/com/jozufozu/flywheel/backend/engine/AbstractInstancer.java +++ b/src/main/java/com/jozufozu/flywheel/backend/engine/AbstractInstancer.java @@ -119,8 +119,20 @@ public abstract class AbstractInstancer implements Instancer final int newSize = oldSize - removeCount; + // Start from the first deleted index. + int writePos = deleted.nextSetBit(0); + + if (writePos < newSize) { + // Since we'll be shifting everything into this space we can consider it all changed. + changed.set(writePos, newSize); + } + + // We definitely shouldn't consider the deleted instances as changed though, + // else we might try some out of bounds accesses later. + changed.clear(newSize, oldSize); + // Punch out the deleted instances, shifting over surviving instances to fill their place. - for (int scanPos = 0, writePos = 0; (scanPos < oldSize) && (writePos < newSize); scanPos++, writePos++) { + for (int scanPos = writePos; (scanPos < oldSize) && (writePos < newSize); scanPos++, writePos++) { // Find next non-deleted element. scanPos = deleted.nextClearBit(scanPos); @@ -133,13 +145,8 @@ public abstract class AbstractInstancer implements Instancer handles.set(writePos, handle); instances.set(writePos, instance); - // Make sure the handle knows it's been moved... + // Make sure the handle knows it's been moved handle.index = writePos; - // ...and set it changed to force an upload. - changed.set(writePos); - - // Clear the old slot. There's nothing meaningful there that can be considered "changed". - changed.clear(scanPos); } } diff --git a/src/main/java/com/jozufozu/flywheel/backend/engine/indirect/IndirectCullingGroup.java b/src/main/java/com/jozufozu/flywheel/backend/engine/indirect/IndirectCullingGroup.java index e705771bb..2f8969941 100644 --- a/src/main/java/com/jozufozu/flywheel/backend/engine/indirect/IndirectCullingGroup.java +++ b/src/main/java/com/jozufozu/flywheel/backend/engine/indirect/IndirectCullingGroup.java @@ -82,7 +82,7 @@ public class IndirectCullingGroup { continue; } - instancer.index = modelIndex; + instancer.modelIndex = modelIndex; instancer.baseInstance = instanceCountThisFrame; instanceCountThisFrame += instanceCount; @@ -177,7 +177,7 @@ public class IndirectCullingGroup { } public void add(IndirectInstancer instancer, Model model, RenderStage stage, MeshPool meshPool) { - instancer.index = instancers.size(); + instancer.modelIndex = instancers.size(); instancers.add(instancer); List meshes = model.meshes(); @@ -243,9 +243,6 @@ public class IndirectCullingGroup { for (var instancer : instancers) { instancer.uploadModelIndices(stagingBuffer, buffers.modelIndex.handle()); - } - - for (var instancer : instancers) { instancer.resetChanged(); } } diff --git a/src/main/java/com/jozufozu/flywheel/backend/engine/indirect/IndirectDraw.java b/src/main/java/com/jozufozu/flywheel/backend/engine/indirect/IndirectDraw.java index 77e1111b4..b96892c1d 100644 --- a/src/main/java/com/jozufozu/flywheel/backend/engine/indirect/IndirectDraw.java +++ b/src/main/java/com/jozufozu/flywheel/backend/engine/indirect/IndirectDraw.java @@ -9,7 +9,7 @@ import com.jozufozu.flywheel.backend.engine.MaterialEncoder; import com.jozufozu.flywheel.backend.engine.MeshPool; public class IndirectDraw { - private final IndirectInstancer model; + private final IndirectInstancer instancer; private final Material material; private final MeshPool.PooledMesh mesh; private final RenderStage stage; @@ -21,8 +21,8 @@ public class IndirectDraw { private final int packedMaterialProperties; private boolean deleted; - public IndirectDraw(IndirectInstancer model, Material material, MeshPool.PooledMesh mesh, RenderStage stage, int indexOfMeshInModel) { - this.model = model; + public IndirectDraw(IndirectInstancer instancer, Material material, MeshPool.PooledMesh mesh, RenderStage stage, int indexOfMeshInModel) { + this.instancer = instancer; this.material = material; this.mesh = mesh; this.stage = stage; @@ -61,9 +61,9 @@ public class IndirectDraw { MemoryUtil.memPutInt(ptr + 4, 0); // instanceCount - to be set by the apply shader MemoryUtil.memPutInt(ptr + 8, mesh.firstIndex()); // firstIndex MemoryUtil.memPutInt(ptr + 12, mesh.baseVertex()); // baseVertex - MemoryUtil.memPutInt(ptr + 16, model.baseInstance); // baseInstance + MemoryUtil.memPutInt(ptr + 16, instancer.baseInstance); // baseInstance - MemoryUtil.memPutInt(ptr + 20, model.index); // modelIndex + MemoryUtil.memPutInt(ptr + 20, instancer.modelIndex); // modelIndex MemoryUtil.memPutInt(ptr + 24, materialVertexIndex); // materialVertexIndex MemoryUtil.memPutInt(ptr + 28, materialFragmentIndex); // materialFragmentIndex @@ -76,9 +76,9 @@ public class IndirectDraw { MemoryUtil.memPutInt(ptr + 4, 1); // instanceCount - only drawing one instance MemoryUtil.memPutInt(ptr + 8, mesh.firstIndex()); // firstIndex MemoryUtil.memPutInt(ptr + 12, mesh.baseVertex()); // baseVertex - MemoryUtil.memPutInt(ptr + 16, model.baseInstance + instanceIndex); // baseInstance + MemoryUtil.memPutInt(ptr + 16, instancer.baseInstance + instanceIndex); // baseInstance - MemoryUtil.memPutInt(ptr + 20, model.index); // modelIndex + MemoryUtil.memPutInt(ptr + 20, instancer.modelIndex); // modelIndex MemoryUtil.memPutInt(ptr + 24, ShaderIndices.getVertexShaderIndex(materialOverride.shaders())); // materialVertexIndex MemoryUtil.memPutInt(ptr + 28, ShaderIndices.getFragmentShaderIndex(materialOverride.shaders())); // materialFragmentIndex diff --git a/src/main/java/com/jozufozu/flywheel/backend/engine/indirect/IndirectInstancer.java b/src/main/java/com/jozufozu/flywheel/backend/engine/indirect/IndirectInstancer.java index 90f53e446..07109caa3 100644 --- a/src/main/java/com/jozufozu/flywheel/backend/engine/indirect/IndirectInstancer.java +++ b/src/main/java/com/jozufozu/flywheel/backend/engine/indirect/IndirectInstancer.java @@ -19,10 +19,11 @@ public class IndirectInstancer extends AbstractInstancer private final List associatedDraws = new ArrayList<>(); private final Vector4fc boundingSphere; - public int index = -1; + public int modelIndex = -1; public int baseInstance = -1; private int lastModelIndex = -1; private int lastBaseInstance = -1; + private int lastInstanceCount = -1; public IndirectInstancer(InstanceType type, Environment environment, Model model) { super(type, environment); @@ -66,26 +67,31 @@ public class IndirectInstancer extends AbstractInstancer public void uploadModelIndices(StagingBuffer stagingBuffer, int modelIndexVbo) { long modelIndexBaseByte = baseInstance * IndirectBuffers.INT_SIZE; - if (baseInstance != lastBaseInstance || index != lastModelIndex) { + if (baseInstance != lastBaseInstance || modelIndex != lastModelIndex || instances.size() > lastInstanceCount) { uploadAllModelIndices(stagingBuffer, modelIndexBaseByte, modelIndexVbo); - } else { - uploadChangedModelIndices(stagingBuffer, modelIndexBaseByte, modelIndexVbo); } } public void resetChanged() { - lastModelIndex = index; + lastModelIndex = modelIndex; lastBaseInstance = baseInstance; + lastInstanceCount = instances.size(); changed.clear(); } private void uploadChangedInstances(StagingBuffer stagingBuffer, long baseByte, int instanceVbo) { changed.forEachSetSpan((startInclusive, endInclusive) -> { - int instanceCount = endInclusive - startInclusive + 1; + // Generally we're good about ensuring we don't have changed bits set out of bounds, but check just in case + if (startInclusive >= instances.size()) { + return; + } + int actualEnd = Math.min(endInclusive, instances.size() - 1); + + int instanceCount = actualEnd - startInclusive + 1; long totalSize = instanceCount * instanceStride; stagingBuffer.enqueueCopy(totalSize, instanceVbo, baseByte + startInclusive * instanceStride, ptr -> { - for (int i = startInclusive; i <= endInclusive; i++) { + for (int i = startInclusive; i <= actualEnd; i++) { var instance = instances.get(i); writer.write(ptr, instance); ptr += instanceStride; @@ -94,20 +100,6 @@ public class IndirectInstancer extends AbstractInstancer }); } - private void uploadChangedModelIndices(StagingBuffer stagingBuffer, long modelIndexBaseByte, int modelIndexVbo) { - changed.forEachSetSpan((startInclusive, endInclusive) -> { - int instanceCount = endInclusive - startInclusive + 1; - long modelIndexTotalSize = instanceCount * IndirectBuffers.INT_SIZE; - - stagingBuffer.enqueueCopy(modelIndexTotalSize, modelIndexVbo, modelIndexBaseByte + startInclusive * IndirectBuffers.INT_SIZE, ptr -> { - for (int i = startInclusive; i <= endInclusive; i++) { - MemoryUtil.memPutInt(ptr, index); - ptr += IndirectBuffers.INT_SIZE; - } - }); - }); - } - private void uploadAllInstances(StagingBuffer stagingBuffer, long baseByte, int instanceVbo) { long totalSize = instances.size() * instanceStride; @@ -124,7 +116,7 @@ public class IndirectInstancer extends AbstractInstancer stagingBuffer.enqueueCopy(modelIndexTotalSize, modelIndexVbo, modelIndexBaseByte, ptr -> { for (int i = 0; i < instances.size(); i++) { - MemoryUtil.memPutInt(ptr, index); + MemoryUtil.memPutInt(ptr, modelIndex); ptr += IndirectBuffers.INT_SIZE; } }); diff --git a/src/main/java/com/jozufozu/flywheel/backend/engine/instancing/InstancedInstancer.java b/src/main/java/com/jozufozu/flywheel/backend/engine/instancing/InstancedInstancer.java index ae148a137..afc225ef5 100644 --- a/src/main/java/com/jozufozu/flywheel/backend/engine/instancing/InstancedInstancer.java +++ b/src/main/java/com/jozufozu/flywheel/backend/engine/instancing/InstancedInstancer.java @@ -74,9 +74,14 @@ public class InstancedInstancer extends AbstractInstancer private void writeChanged() { changed.forEachSetSpan((startInclusive, endInclusive) -> { - var temp = MemoryBlock.malloc((long) instanceStride * (endInclusive - startInclusive + 1)); + // Generally we're good about ensuring we don't have changed bits set out of bounds, but check just in case + if (startInclusive >= instances.size()) { + return; + } + int actualEnd = Math.min(endInclusive, instances.size() - 1); + var temp = MemoryBlock.malloc((long) instanceStride * (actualEnd - startInclusive + 1)); long ptr = temp.ptr(); - for (int i = startInclusive; i <= endInclusive; i++) { + for (int i = startInclusive; i <= actualEnd; i++) { writer.write(ptr, instances.get(i)); ptr += instanceStride; } diff --git a/src/main/java/com/jozufozu/flywheel/lib/util/AtomicBitset.java b/src/main/java/com/jozufozu/flywheel/lib/util/AtomicBitset.java index 9d1a30b89..7a17a62e6 100644 --- a/src/main/java/com/jozufozu/flywheel/lib/util/AtomicBitset.java +++ b/src/main/java/com/jozufozu/flywheel/lib/util/AtomicBitset.java @@ -5,12 +5,16 @@ import java.util.BitSet; import java.util.concurrent.atomic.AtomicLongArray; import java.util.concurrent.atomic.AtomicReference; +import org.jetbrains.annotations.NotNull; + // https://github.com/Netflix/hollow/blob/master/hollow/src/main/java/com/netflix/hollow/core/memory/ThreadSafeBitSet.java // Refactored to remove unused methods, deduplicate some code segments, and add extra functionality with #forEachSetSpan public class AtomicBitset { // 1024 bits, 128 bytes, 16 longs per segment public static final int DEFAULT_LOG2_SEGMENT_SIZE_IN_BITS = 10; + private static final long WORD_MASK = 0xffffffffffffffffL; + private final int numLongsPerSegment; private final int log2SegmentSize; private final int segmentMask; @@ -44,19 +48,7 @@ public class AtomicBitset { AtomicLongArray segment = getSegmentForPosition(position); - long mask = maskForPosition(position); - - // Thread safety: we need to loop until we win the race to set the long value. - while (true) { - // determine what the new long value will be after we set the appropriate bit. - long currentLongValue = segment.get(longPosition); - long newLongValue = currentLongValue | mask; - - // if no other thread has modified the value since we read it, we won the race and we are done. - if (segment.compareAndSet(longPosition, currentLongValue, newLongValue)) { - break; - } - } + setOr(segment, longPosition, maskForPosition(position)); } public void clear(int position) { @@ -64,16 +56,184 @@ public class AtomicBitset { AtomicLongArray segment = getSegmentForPosition(position); - long mask = ~maskForPosition(position); + setAnd(segment, longPosition, ~maskForPosition(position)); + } + public void set(int fromIndex, int toIndex) { + if (fromIndex == toIndex) { + return; + } + + int firstSegmentIndex = segmentIndexForPosition(fromIndex); + int toSegmentIndex = segmentIndexForPosition(toIndex); + + var segments = expandToFit(toSegmentIndex); + + int fromLongIndex = longIndexInSegmentForPosition(fromIndex); + int toLongIndex = longIndexInSegmentForPosition(toIndex); + + long fromLongMask = WORD_MASK << fromIndex; + long toLongMask = WORD_MASK >>> -toIndex; + + var segment = segments.getSegment(firstSegmentIndex); + + if (firstSegmentIndex == toSegmentIndex) { + // Case 1: One segment + + if (fromLongIndex == toLongIndex) { + // Case 1A: One Long + setOr(segment, fromLongIndex, (fromLongMask & toLongMask)); + } else { + // Case 1B: Multiple Longs + // Handle first word + setOr(segment, fromLongIndex, fromLongMask); + + // Handle intermediate words, if any + for (int i = fromLongIndex + 1; i < toLongIndex; i++) { + segment.set(i, WORD_MASK); + } + + // Handle last word + setOr(segment, toLongIndex, toLongMask); + } + } else { + // Case 2: Multiple Segments + // Handle first segment + + // Handle first word + setOr(segment, fromLongIndex, fromLongMask); + + // Handle trailing words, if any + for (int i = fromLongIndex + 1; i < numLongsPerSegment; i++) { + segment.set(i, WORD_MASK); + } + + // Nuke intermediate segments, if any + for (int i = firstSegmentIndex + 1; i < toSegmentIndex; i++) { + segment = segments.getSegment(i); + + for (int j = 0; j < segment.length(); j++) { + segment.set(j, WORD_MASK); + } + } + + // Handle last segment + segment = segments.getSegment(toSegmentIndex); + + // Handle leading words, if any + for (int i = 0; i < toLongIndex; i++) { + segment.set(i, WORD_MASK); + } + + // Handle last word + setOr(segment, toLongIndex, toLongMask); + } + } + + public void clear(int fromIndex, int toIndex) { + if (fromIndex == toIndex) { + return; + } + + var segments = this.segments.get(); + int numSegments = segments.numSegments(); + + int firstSegmentIndex = segmentIndexForPosition(fromIndex); + + if (firstSegmentIndex >= numSegments) { + return; + } + + int toSegmentIndex = segmentIndexForPosition(toIndex); + + if (toSegmentIndex >= numSegments) { + toSegmentIndex = numSegments - 1; + } + + int fromLongIndex = longIndexInSegmentForPosition(fromIndex); + int toLongIndex = longIndexInSegmentForPosition(toIndex); + + long fromLongMask = WORD_MASK << fromIndex; + long toLongMask = WORD_MASK >>> -toIndex; + + var segment = segments.getSegment(firstSegmentIndex); + + if (firstSegmentIndex == toSegmentIndex) { + // Case 1: One segment + + if (fromLongIndex == toLongIndex) { + // Case 1A: One Long + setAnd(segment, fromLongIndex, ~(fromLongMask & toLongMask)); + } else { + // Case 1B: Multiple Longs + // Handle first word + setAnd(segment, fromLongIndex, ~fromLongMask); + + // Handle intermediate words, if any + for (int i = fromLongIndex + 1; i < toLongIndex; i++) { + segment.set(i, 0); + } + + // Handle last word + setAnd(segment, toLongIndex, ~toLongMask); + } + } else { + // Case 2: Multiple Segments + // Handle first segment + + // Handle first word + setAnd(segment, fromLongIndex, ~fromLongMask); + + // Handle trailing words, if any + for (int i = fromLongIndex + 1; i < numLongsPerSegment; i++) { + segment.set(i, 0); + } + + // Nuke intermediate segments, if any + for (int i = firstSegmentIndex + 1; i < toSegmentIndex; i++) { + segment = segments.getSegment(i); + + for (int j = 0; j < segment.length(); j++) { + segment.set(j, 0); + } + } + + // Handle last segment + segment = segments.getSegment(toSegmentIndex); + + // Handle leading words, if any + for (int i = 0; i < toLongIndex; i++) { + segment.set(i, 0); + } + + // Handle last word + setAnd(segment, toLongIndex, ~toLongMask); + } + } + + private void setOr(AtomicLongArray segment, int indexInSegment, long mask) { // Thread safety: we need to loop until we win the race to set the long value. while (true) { // determine what the new long value will be after we set the appropriate bit. - long currentLongValue = segment.get(longPosition); + long currentLongValue = segment.get(indexInSegment); + long newLongValue = currentLongValue | mask; + + // if no other thread has modified the value since we read it, we won the race and we are done. + if (segment.compareAndSet(indexInSegment, currentLongValue, newLongValue)) { + break; + } + } + } + + private void setAnd(AtomicLongArray segment, int indexInSegment, long mask) { + // Thread safety: we need to loop until we win the race to set the long value. + while (true) { + // determine what the new long value will be after we set the appropriate bit. + long currentLongValue = segment.get(indexInSegment); long newLongValue = currentLongValue & mask; // if no other thread has modified the value since we read it, we won the race and we are done. - if (segment.compareAndSet(longPosition, currentLongValue, newLongValue)) { + if (segment.compareAndSet(indexInSegment, currentLongValue, newLongValue)) { break; } } @@ -113,7 +273,6 @@ public class AtomicBitset { throw new IndexOutOfBoundsException("fromIndex < 0: " + fromIndex); } - AtomicBitsetSegments segments = this.segments.get(); int segmentPosition = segmentIndexForPosition(fromIndex); @@ -315,6 +474,11 @@ public class AtomicBitset { * @return the segment */ private AtomicLongArray segmentForPosition(int segmentIndex) { + return expandToFit(segmentIndex).getSegment(segmentIndex); + } + + @NotNull + private AtomicBitsetSegments expandToFit(int segmentIndex) { AtomicBitsetSegments visibleSegments = segments.get(); while (visibleSegments.numSegments() <= segmentIndex) { @@ -333,8 +497,7 @@ public class AtomicBitset { visibleSegments = segments.get(); } } - - return visibleSegments.getSegment(segmentIndex); + return visibleSegments; } private static class AtomicBitsetSegments {