Ctrl + Alt + N

- IndirectInstancer#uploadInstances: 46% of render thread to 26%
- Inline #enqueueCopy to avoid allocating LongConsumers
- Do not even bother to track individual changed indices, instead rely
  on just the changedPage set
This commit is contained in:
Jozufozu 2024-09-15 21:29:00 -07:00
parent c658b2bfe3
commit e1b594ac47
2 changed files with 25 additions and 7 deletions

View File

@ -42,7 +42,6 @@ public class IndirectInstancer<I extends Instance> extends AbstractInstancer<I>
if (index < 0 || index >= instanceCount()) {
return;
}
changed.set(index);
changedPages.set(ObjectStorage.objectIndex2PageIndex(index));
}
@ -102,15 +101,34 @@ public class IndirectInstancer<I extends Instance> extends AbstractInstancer<I>
long baseByte = mapping.page2ByteOffset(page);
long size = (endObject - startObject) * instanceStride;
stagingBuffer.enqueueCopy(size, instanceVbo, baseByte, ptr -> {
// Because writes are broken into pages, we end up with significantly more calls into
// StagingBuffer#enqueueCopy and the allocations for the writer got out of hand. Here
// we've inlined the enqueueCopy call and do not allocate the write lambda at all.
// Doing so cut upload times in half.
// Try to write directly into the staging buffer if there is enough contiguous space.
long direct = stagingBuffer.reserveForCopy(size, instanceVbo, baseByte);
if (direct != MemoryUtil.NULL) {
for (int i = startObject; i < endObject; i++) {
writer.write(ptr, instances.get(i));
ptr += instanceStride;
var instance = instances.get(i);
writer.write(direct, instance);
direct += instanceStride;
}
});
continue;
}
// Otherwise, write to a scratch buffer and enqueue a copy.
var block = stagingBuffer.getScratch(size);
var ptr = block.ptr();
for (int i = startObject; i < endObject; i++) {
var instance = instances.get(i);
writer.write(ptr, instance);
ptr += instanceStride;
}
stagingBuffer.enqueueCopy(block.ptr(), size, instanceVbo, baseByte);
}
changed.clear();
changedPages.clear();
}

View File

@ -223,7 +223,7 @@ public class StagingBuffer {
FlwMemoryTracker._freeCpuMemory(capacity);
}
private MemoryBlock getScratch(long size) {
public MemoryBlock getScratch(long size) {
if (scratch == null) {
scratch = MemoryBlock.malloc(size);
} else if (scratch.size() < size) {