From e1b594ac479a0b9d3b1f0ad9e7e7718a9e89a7f1 Mon Sep 17 00:00:00 2001 From: Jozufozu Date: Sun, 15 Sep 2024 21:29:00 -0700 Subject: [PATCH] Ctrl + Alt + N - IndirectInstancer#uploadInstances: 46% of render thread to 26% - Inline #enqueueCopy to avoid allocating LongConsumers - Do not even bother to track individual changed indices, instead rely on just the changedPage set --- .../engine/indirect/IndirectInstancer.java | 30 +++++++++++++++---- .../engine/indirect/StagingBuffer.java | 2 +- 2 files changed, 25 insertions(+), 7 deletions(-) diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectInstancer.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectInstancer.java index 2209069bb..7e3ef62c3 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectInstancer.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectInstancer.java @@ -42,7 +42,6 @@ public class IndirectInstancer extends AbstractInstancer if (index < 0 || index >= instanceCount()) { return; } - changed.set(index); changedPages.set(ObjectStorage.objectIndex2PageIndex(index)); } @@ -102,15 +101,34 @@ public class IndirectInstancer extends AbstractInstancer long baseByte = mapping.page2ByteOffset(page); long size = (endObject - startObject) * instanceStride; - stagingBuffer.enqueueCopy(size, instanceVbo, baseByte, ptr -> { + // Because writes are broken into pages, we end up with significantly more calls into + // StagingBuffer#enqueueCopy and the allocations for the writer got out of hand. Here + // we've inlined the enqueueCopy call and do not allocate the write lambda at all. + // Doing so cut upload times in half. + + // Try to write directly into the staging buffer if there is enough contiguous space. + long direct = stagingBuffer.reserveForCopy(size, instanceVbo, baseByte); + + if (direct != MemoryUtil.NULL) { for (int i = startObject; i < endObject; i++) { - writer.write(ptr, instances.get(i)); - ptr += instanceStride; + var instance = instances.get(i); + writer.write(direct, instance); + direct += instanceStride; } - }); + continue; + } + + // Otherwise, write to a scratch buffer and enqueue a copy. + var block = stagingBuffer.getScratch(size); + var ptr = block.ptr(); + for (int i = startObject; i < endObject; i++) { + var instance = instances.get(i); + writer.write(ptr, instance); + ptr += instanceStride; + } + stagingBuffer.enqueueCopy(block.ptr(), size, instanceVbo, baseByte); } - changed.clear(); changedPages.clear(); } diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/StagingBuffer.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/StagingBuffer.java index c976dcfce..308f97af4 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/StagingBuffer.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/StagingBuffer.java @@ -223,7 +223,7 @@ public class StagingBuffer { FlwMemoryTracker._freeCpuMemory(capacity); } - private MemoryBlock getScratch(long size) { + public MemoryBlock getScratch(long size) { if (scratch == null) { scratch = MemoryBlock.malloc(size); } else if (scratch.size() < size) {