From 7a7d58adf2892634944c3b72bfe9e20fde13cbca Mon Sep 17 00:00:00 2001 From: Jozufozu Date: Thu, 15 Aug 2024 11:41:33 -0700 Subject: [PATCH] Embeds your embeddings - Optimize embeddings on indirect backend by uploading all matrices in an SSBO - Allocate matrices in an arena - Flatten IndirectCullingGroups to only be parameterized by InstanceType, so now all instances from all embeddings get culled in the same dispatch - Sort indirect draws by whether they're embedded before anything else - Include an "embedded" boolean in the MultiDraw record to decide which shader to use - Include "matrixIndex" field in model descriptor and indirect draw structs - Use matrixIndex == 0 to indicate that a matrix is the identity to avoid unnecessary work in the cull shader - Add helper to write a mat3 as 3 vec4s --- .../flywheel/backend/engine/DrawManager.java | 3 +- .../flywheel/backend/engine/EngineImpl.java | 3 +- .../engine/embed/EmbeddedEnvironment.java | 19 ++++++---- .../engine/embed/EmbeddingUniforms.java | 8 +--- .../backend/engine/embed/Environment.java | 4 +- .../engine/embed/EnvironmentStorage.java | 38 ++++++++++++++++--- .../engine/embed/GlobalEnvironment.java | 6 +-- .../engine/indirect/BufferBindings.java | 1 + .../engine/indirect/IndirectBuffers.java | 4 +- .../engine/indirect/IndirectCullingGroup.java | 19 +++++----- .../backend/engine/indirect/IndirectDraw.java | 25 ++++++++---- .../engine/indirect/IndirectDrawManager.java | 17 ++++++--- .../engine/indirect/IndirectInstancer.java | 9 +++-- .../backend/engine/indirect/MatrixBuffer.java | 33 ++++++++++++++++ .../instancing/InstancedDrawManager.java | 5 ++- .../flywheel/flywheel/internal/common.vert | 4 +- .../internal/indirect/buffer_bindings.glsl | 1 + .../flywheel/internal/indirect/cull.glsl | 11 ++++-- .../internal/indirect/draw_command.glsl | 1 + .../flywheel/internal/indirect/main.vert | 15 +++++++- .../flywheel/internal/indirect/matrices.glsl | 11 ++++++ .../internal/indirect/model_descriptor.glsl | 1 + .../flywheel/internal/instancing/main.vert | 10 +++++ .../flywheel/lib/util/ExtraMemoryOps.java | 15 ++++++++ 24 files changed, 199 insertions(+), 64 deletions(-) create mode 100644 common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/MatrixBuffer.java create mode 100644 common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/matrices.glsl diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/DrawManager.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/DrawManager.java index cf07a4597..476a9abc4 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/DrawManager.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/DrawManager.java @@ -16,6 +16,7 @@ import dev.engine_room.flywheel.api.model.Model; import dev.engine_room.flywheel.api.visualization.VisualType; import dev.engine_room.flywheel.backend.FlwBackend; import dev.engine_room.flywheel.backend.engine.embed.Environment; +import dev.engine_room.flywheel.backend.engine.embed.EnvironmentStorage; import dev.engine_room.flywheel.lib.util.Pair; import it.unimi.dsi.fastutil.ints.Int2ObjectArrayMap; import it.unimi.dsi.fastutil.ints.Int2ObjectMap; @@ -40,7 +41,7 @@ public abstract class DrawManager> { return (Instancer) instancers.computeIfAbsent(new InstancerKey<>(environment, type, model, visualType, bias), this::createAndDeferInit); } - public void flush(LightStorage lightStorage) { + public void flush(LightStorage lightStorage, EnvironmentStorage environmentStorage) { // Thread safety: flush is called from the render thread after all visual updates have been made, // so there are no:tm: threads we could be racing with. for (var instancer : initializationQueue) { diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/EngineImpl.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/EngineImpl.java index 36ab2b11f..394085a70 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/EngineImpl.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/EngineImpl.java @@ -89,7 +89,7 @@ public class EngineImpl implements Engine { try (var state = GlStateTracker.getRestoreState()) { Uniforms.update(context); environmentStorage.flush(); - drawManager.flush(lightStorage); + drawManager.flush(lightStorage, environmentStorage); } } @@ -107,6 +107,7 @@ public class EngineImpl implements Engine { public void delete() { drawManager.delete(); lightStorage.delete(); + environmentStorage.delete(); } public Instancer instancer(Environment environment, InstanceType type, Model model, VisualType visualType, int bias) { diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/embed/EmbeddedEnvironment.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/embed/EmbeddedEnvironment.java index 89957dce9..09dff65c8 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/embed/EmbeddedEnvironment.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/embed/EmbeddedEnvironment.java @@ -16,6 +16,7 @@ import dev.engine_room.flywheel.api.visualization.VisualType; import dev.engine_room.flywheel.backend.compile.ContextShader; import dev.engine_room.flywheel.backend.engine.EngineImpl; import dev.engine_room.flywheel.backend.gl.shader.GlProgram; +import dev.engine_room.flywheel.lib.util.ExtraMemoryOps; import net.minecraft.core.Vec3i; public class EmbeddedEnvironment implements VisualEmbedding, Environment { @@ -31,6 +32,8 @@ public class EmbeddedEnvironment implements VisualEmbedding, Environment { private final Matrix4f poseComposed = new Matrix4f(); private final Matrix3f normalComposed = new Matrix3f(); + public int matrixIndex = 0; + private boolean deleted = false; public EmbeddedEnvironment(EngineImpl engine, VisualType visualType, Vec3i renderOrigin, @Nullable EmbeddedEnvironment parent) { @@ -81,23 +84,25 @@ public class EmbeddedEnvironment implements VisualEmbedding, Environment { return ContextShader.EMBEDDED; } - @Override - public void setupCull(GlProgram program) { - program.setBool(EmbeddingUniforms.USE_MODEL_MATRIX, true); - program.setMat4(EmbeddingUniforms.MODEL_MATRIX, poseComposed); - } - @Override public void setupDraw(GlProgram program) { program.setMat4(EmbeddingUniforms.MODEL_MATRIX, poseComposed); program.setMat3(EmbeddingUniforms.NORMAL_MATRIX, normalComposed); } - public void flush() { + @Override + public int matrixIndex() { + return matrixIndex; + } + + public void flush(long ptr) { poseComposed.identity(); normalComposed.identity(); composeMatrices(poseComposed, normalComposed); + + ExtraMemoryOps.putMatrix4f(ptr, poseComposed); + ExtraMemoryOps.putMatrix3fPadded(ptr + 16 * Float.BYTES, normalComposed); } private void composeMatrices(Matrix4f pose, Matrix3f normal) { diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/embed/EmbeddingUniforms.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/embed/EmbeddingUniforms.java index e62f0b018..e1c4c1978 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/embed/EmbeddingUniforms.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/embed/EmbeddingUniforms.java @@ -1,12 +1,8 @@ package dev.engine_room.flywheel.backend.engine.embed; public final class EmbeddingUniforms { - /** - * Only used by cull shaders. - */ - public static final String USE_MODEL_MATRIX = "_flw_useModelMatrix"; - public static final String MODEL_MATRIX = "_flw_modelMatrix"; - public static final String NORMAL_MATRIX = "_flw_normalMatrix"; + public static final String MODEL_MATRIX = "_flw_modelMatrixUniform"; + public static final String NORMAL_MATRIX = "_flw_normalMatrixUniform"; private EmbeddingUniforms() { } diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/embed/Environment.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/embed/Environment.java index 83fa60d62..8afcf6125 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/embed/Environment.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/embed/Environment.java @@ -6,7 +6,7 @@ import dev.engine_room.flywheel.backend.gl.shader.GlProgram; public interface Environment { ContextShader contextShader(); - void setupCull(GlProgram cullProgram); - void setupDraw(GlProgram drawProgram); + + int matrixIndex(); } diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/embed/EnvironmentStorage.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/embed/EnvironmentStorage.java index 9206b9cf3..942fc3623 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/embed/EnvironmentStorage.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/embed/EnvironmentStorage.java @@ -1,18 +1,46 @@ package dev.engine_room.flywheel.backend.engine.embed; +import dev.engine_room.flywheel.backend.engine.Arena; import it.unimi.dsi.fastutil.objects.ReferenceLinkedOpenHashSet; import it.unimi.dsi.fastutil.objects.ReferenceSet; -import it.unimi.dsi.fastutil.objects.ReferenceSets; public class EnvironmentStorage { - protected final ReferenceSet environments = ReferenceSets.synchronize(new ReferenceLinkedOpenHashSet<>()); + public static final int MATRIX_SIZE_BYTES = (16 + 12) * Float.BYTES; + + protected final Object lock = new Object(); + + protected final ReferenceSet environments = new ReferenceLinkedOpenHashSet<>(); + + // Note than the arena starts indexing at zero, but we reserve zero for the identity matrix. + // Any time an ID from the arena is written we want to add one to it. + public final Arena arena = new Arena(MATRIX_SIZE_BYTES, 32); + + { + arena.alloc(); // Reserve the identity matrix. + } public void track(EmbeddedEnvironment environment) { - environments.add(environment); + synchronized (lock) { + if (environments.add(environment)) { + environment.matrixIndex = arena.alloc(); + } + } } public void flush() { - environments.removeIf(EmbeddedEnvironment::isDeleted); - environments.forEach(EmbeddedEnvironment::flush); + environments.removeIf(embeddedEnvironment -> { + var deleted = embeddedEnvironment.isDeleted(); + if (deleted && embeddedEnvironment.matrixIndex > 0) { + arena.free(embeddedEnvironment.matrixIndex); + } + return deleted; + }); + for (EmbeddedEnvironment environment : environments) { + environment.flush(arena.indexToPointer(environment.matrixIndex)); + } + } + + public void delete() { + arena.delete(); } } diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/embed/GlobalEnvironment.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/embed/GlobalEnvironment.java index aaac1ca35..7b9919baf 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/embed/GlobalEnvironment.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/embed/GlobalEnvironment.java @@ -15,11 +15,11 @@ public class GlobalEnvironment implements Environment { } @Override - public void setupCull(GlProgram cullProgram) { - cullProgram.setBool(EmbeddingUniforms.USE_MODEL_MATRIX, false); + public void setupDraw(GlProgram drawProgram) { } @Override - public void setupDraw(GlProgram drawProgram) { + public int matrixIndex() { + return 0; } } diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/BufferBindings.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/BufferBindings.java index a0ae93a28..479eaed74 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/BufferBindings.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/BufferBindings.java @@ -8,6 +8,7 @@ public final class BufferBindings { public static final int DRAW = 4; public static final int LIGHT_LUT = 5; public static final int LIGHT_SECTION = 6; + public static final int MATRICES = 7; private BufferBindings() { } diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectBuffers.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectBuffers.java index 90ecd4149..b0766e171 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectBuffers.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectBuffers.java @@ -16,10 +16,10 @@ public class IndirectBuffers { public static final long INT_SIZE = Integer.BYTES; public static final long PTR_SIZE = Pointer.POINTER_SIZE; - public static final long MODEL_STRIDE = 24; + public static final long MODEL_STRIDE = 28; // Byte size of a draw command, plus our added mesh data. - public static final long DRAW_COMMAND_STRIDE = 40; + public static final long DRAW_COMMAND_STRIDE = 44; public static final long DRAW_COMMAND_OFFSET = 0; // Offsets to the 3 segments diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectCullingGroup.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectCullingGroup.java index 213b9016c..5afa7c46d 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectCullingGroup.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectCullingGroup.java @@ -24,7 +24,6 @@ import dev.engine_room.flywheel.backend.compile.IndirectPrograms; import dev.engine_room.flywheel.backend.engine.InstancerKey; import dev.engine_room.flywheel.backend.engine.MaterialRenderState; import dev.engine_room.flywheel.backend.engine.MeshPool; -import dev.engine_room.flywheel.backend.engine.embed.Environment; import dev.engine_room.flywheel.backend.engine.uniform.Uniforms; import dev.engine_room.flywheel.backend.gl.GlCompat; import dev.engine_room.flywheel.backend.gl.shader.GlProgram; @@ -33,6 +32,7 @@ import dev.engine_room.flywheel.lib.math.MoreMath; public class IndirectCullingGroup { private static final Comparator DRAW_COMPARATOR = Comparator.comparing(IndirectDraw::visualType) + .thenComparing(IndirectDraw::isEmbedded) .thenComparing(IndirectDraw::bias) .thenComparing(IndirectDraw::indexOfMeshInModel) .thenComparing(IndirectDraw::material, MaterialRenderState.COMPARATOR); @@ -40,7 +40,6 @@ public class IndirectCullingGroup { private static final int DRAW_BARRIER_BITS = GL_SHADER_STORAGE_BARRIER_BIT | GL_COMMAND_BARRIER_BIT; private final InstanceType instanceType; - private final Environment environment; private final long instanceStride; private final IndirectBuffers buffers; private final List> instancers = new ArrayList<>(); @@ -55,9 +54,8 @@ public class IndirectCullingGroup { private boolean needsDrawSort; private int instanceCountThisFrame; - IndirectCullingGroup(InstanceType instanceType, Environment environment, IndirectPrograms programs) { + IndirectCullingGroup(InstanceType instanceType, IndirectPrograms programs) { this.instanceType = instanceType; - this.environment = environment; instanceStride = MoreMath.align4(instanceType.layout() .byteSize()); buffers = new IndirectBuffers(instanceStride); @@ -124,8 +122,6 @@ public class IndirectCullingGroup { Uniforms.bindAll(); cullProgram.bind(); - environment.setupCull(cullProgram); - buffers.bindForCompute(); glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT); glDispatchCompute(GlCompat.getComputeGroupCount(instanceCountThisFrame), 1, 1); @@ -161,7 +157,7 @@ public class IndirectCullingGroup { // if the next draw call has a different VisualType or Material, start a new MultiDraw if (i == indirectDraws.size() - 1 || incompatibleDraws(draw1, indirectDraws.get(i + 1))) { multiDraws.computeIfAbsent(draw1.visualType(), s -> new ArrayList<>()) - .add(new MultiDraw(draw1.material(), start, i + 1)); + .add(new MultiDraw(draw1.material(), draw1.isEmbedded(), start, i + 1)); start = i + 1; } } @@ -171,6 +167,10 @@ public class IndirectCullingGroup { if (draw1.visualType() != draw2.visualType()) { return true; } + + if (draw1.isEmbedded() != draw2.isEmbedded()) { + return true; + } return !MaterialRenderState.materialEquals(draw1.material(), draw2.material()); } @@ -209,13 +209,12 @@ public class IndirectCullingGroup { int baseDrawUniformLoc = -1; for (var multiDraw : multiDraws.get(visualType)) { - var drawProgram = programs.getIndirectProgram(instanceType, environment.contextShader(), multiDraw.material.light()); + var drawProgram = programs.getIndirectProgram(instanceType, multiDraw.embedded ? ContextShader.EMBEDDED : ContextShader.DEFAULT, multiDraw.material.light()); if (drawProgram != lastProgram) { lastProgram = drawProgram; // Don't need to do this unless the program changes. drawProgram.bind(); - environment.setupDraw(drawProgram); baseDrawUniformLoc = drawProgram.getUniformLocation("_flw_baseDraw"); } @@ -300,7 +299,7 @@ public class IndirectCullingGroup { return out; } - private record MultiDraw(Material material, int start, int end) { + private record MultiDraw(Material material, boolean embedded, int start, int end) { private void submit() { GlCompat.safeMultiDrawElementsIndirect(GL_TRIANGLES, GL_UNSIGNED_INT, this.start * IndirectBuffers.DRAW_COMMAND_STRIDE, this.end - this.start, (int) IndirectBuffers.DRAW_COMMAND_STRIDE); } diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectDraw.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectDraw.java index 9494b6ade..fb763d006 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectDraw.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectDraw.java @@ -7,6 +7,7 @@ import dev.engine_room.flywheel.api.visualization.VisualType; import dev.engine_room.flywheel.backend.MaterialShaderIndices; import dev.engine_room.flywheel.backend.engine.MaterialEncoder; import dev.engine_room.flywheel.backend.engine.MeshPool; +import dev.engine_room.flywheel.backend.engine.embed.EmbeddedEnvironment; public class IndirectDraw { private final IndirectInstancer instancer; @@ -46,6 +47,10 @@ public class IndirectDraw { return material; } + public boolean isEmbedded() { + return instancer.environment instanceof EmbeddedEnvironment; + } + public MeshPool.PooledMesh mesh() { return mesh; } @@ -71,10 +76,12 @@ public class IndirectDraw { MemoryUtil.memPutInt(ptr + 20, instancer.modelIndex); // modelIndex - MemoryUtil.memPutInt(ptr + 24, materialVertexIndex); // materialVertexIndex - MemoryUtil.memPutInt(ptr + 28, materialFragmentIndex); // materialFragmentIndex - MemoryUtil.memPutInt(ptr + 32, packedFogAndCutout); // packedFogAndCutout - MemoryUtil.memPutInt(ptr + 36, packedMaterialProperties); // packedMaterialProperties + MemoryUtil.memPutInt(ptr + 24, instancer.environment.matrixIndex()); // matrixIndex + + MemoryUtil.memPutInt(ptr + 28, materialVertexIndex); // materialVertexIndex + MemoryUtil.memPutInt(ptr + 32, materialFragmentIndex); // materialFragmentIndex + MemoryUtil.memPutInt(ptr + 36, packedFogAndCutout); // packedFogAndCutout + MemoryUtil.memPutInt(ptr + 40, packedMaterialProperties); // packedMaterialProperties } public void writeWithOverrides(long ptr, int instanceIndex, Material materialOverride) { @@ -86,10 +93,12 @@ public class IndirectDraw { MemoryUtil.memPutInt(ptr + 20, instancer.modelIndex); // modelIndex - MemoryUtil.memPutInt(ptr + 24, MaterialShaderIndices.vertexIndex(materialOverride.shaders())); // materialVertexIndex - MemoryUtil.memPutInt(ptr + 28, MaterialShaderIndices.fragmentIndex(materialOverride.shaders())); // materialFragmentIndex - MemoryUtil.memPutInt(ptr + 32, MaterialEncoder.packUberShader(materialOverride)); // packedFogAndCutout - MemoryUtil.memPutInt(ptr + 36, MaterialEncoder.packProperties(materialOverride)); // packedMaterialProperties + MemoryUtil.memPutInt(ptr + 24, instancer.environment.matrixIndex()); // matrixIndex + + MemoryUtil.memPutInt(ptr + 28, MaterialShaderIndices.vertexIndex(materialOverride.shaders())); // materialVertexIndex + MemoryUtil.memPutInt(ptr + 32, MaterialShaderIndices.fragmentIndex(materialOverride.shaders())); // materialFragmentIndex + MemoryUtil.memPutInt(ptr + 36, MaterialEncoder.packUberShader(materialOverride)); // packedFogAndCutout + MemoryUtil.memPutInt(ptr + 40, MaterialEncoder.packProperties(materialOverride)); // packedMaterialProperties } public void delete() { diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectDrawManager.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectDrawManager.java index 2066554f0..073850193 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectDrawManager.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectDrawManager.java @@ -12,18 +12,19 @@ import java.util.Map; import dev.engine_room.flywheel.api.backend.Engine; import dev.engine_room.flywheel.api.instance.Instance; +import dev.engine_room.flywheel.api.instance.InstanceType; import dev.engine_room.flywheel.api.visualization.VisualType; import dev.engine_room.flywheel.backend.Samplers; import dev.engine_room.flywheel.backend.compile.ContextShader; import dev.engine_room.flywheel.backend.compile.IndirectPrograms; import dev.engine_room.flywheel.backend.engine.CommonCrumbling; import dev.engine_room.flywheel.backend.engine.DrawManager; -import dev.engine_room.flywheel.backend.engine.GroupKey; import dev.engine_room.flywheel.backend.engine.InstancerKey; import dev.engine_room.flywheel.backend.engine.LightStorage; import dev.engine_room.flywheel.backend.engine.MaterialRenderState; import dev.engine_room.flywheel.backend.engine.MeshPool; import dev.engine_room.flywheel.backend.engine.TextureBinder; +import dev.engine_room.flywheel.backend.engine.embed.EnvironmentStorage; import dev.engine_room.flywheel.backend.engine.uniform.Uniforms; import dev.engine_room.flywheel.backend.gl.GlStateTracker; import dev.engine_room.flywheel.backend.gl.array.GlVertexArray; @@ -38,9 +39,10 @@ public class IndirectDrawManager extends DrawManager> { private final StagingBuffer stagingBuffer; private final MeshPool meshPool; private final GlVertexArray vertexArray; - private final Map, IndirectCullingGroup> cullingGroups = new HashMap<>(); + private final Map, IndirectCullingGroup> cullingGroups = new HashMap<>(); private final GlBuffer crumblingDrawBuffer = new GlBuffer(); private final LightBuffers lightBuffers; + private final MatrixBuffer matrixBuffer; public IndirectDrawManager(IndirectPrograms programs) { this.programs = programs; @@ -51,6 +53,7 @@ public class IndirectDrawManager extends DrawManager> { vertexArray = GlVertexArray.create(); meshPool.bind(vertexArray); lightBuffers = new LightBuffers(); + matrixBuffer = new MatrixBuffer(); } @Override @@ -61,8 +64,7 @@ public class IndirectDrawManager extends DrawManager> { @SuppressWarnings("unchecked") @Override protected void initialize(InstancerKey key, IndirectInstancer instancer) { - var groupKey = new GroupKey<>(key.type(), key.environment()); - var group = (IndirectCullingGroup) cullingGroups.computeIfAbsent(groupKey, t -> new IndirectCullingGroup<>(t.instanceType(), t.environment(), programs)); + var group = (IndirectCullingGroup) cullingGroups.computeIfAbsent(key.type(), t -> new IndirectCullingGroup<>(t, programs)); group.add((IndirectInstancer) instancer, key, meshPool); } @@ -85,6 +87,7 @@ public class IndirectDrawManager extends DrawManager> { vertexArray.bindForDraw(); lightBuffers.bind(); + matrixBuffer.bind(); Uniforms.bindAll(); for (var group : cullingGroups.values()) { @@ -97,8 +100,8 @@ public class IndirectDrawManager extends DrawManager> { } @Override - public void flush(LightStorage lightStorage) { - super.flush(lightStorage); + public void flush(LightStorage lightStorage, EnvironmentStorage environmentStorage) { + super.flush(lightStorage, environmentStorage); for (var group : cullingGroups.values()) { group.flushInstancers(); @@ -116,6 +119,8 @@ public class IndirectDrawManager extends DrawManager> { lightBuffers.flush(stagingBuffer, lightStorage); + matrixBuffer.flush(stagingBuffer, environmentStorage); + for (var group : cullingGroups.values()) { group.upload(stagingBuffer); } diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectInstancer.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectInstancer.java index b0ac9bc98..75dc2b8e3 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectInstancer.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectInstancer.java @@ -49,10 +49,11 @@ public class IndirectInstancer extends AbstractInstancer public void writeModel(long ptr) { MemoryUtil.memPutInt(ptr, 0); // instanceCount - to be incremented by the cull shader MemoryUtil.memPutInt(ptr + 4, baseInstance); // baseInstance - MemoryUtil.memPutFloat(ptr + 8, boundingSphere.x()); // boundingSphere - MemoryUtil.memPutFloat(ptr + 12, boundingSphere.y()); - MemoryUtil.memPutFloat(ptr + 16, boundingSphere.z()); - MemoryUtil.memPutFloat(ptr + 20, boundingSphere.w()); + MemoryUtil.memPutInt(ptr + 8, environment.matrixIndex()); // matrixIndex + MemoryUtil.memPutFloat(ptr + 12, boundingSphere.x()); // boundingSphere + MemoryUtil.memPutFloat(ptr + 16, boundingSphere.y()); + MemoryUtil.memPutFloat(ptr + 20, boundingSphere.z()); + MemoryUtil.memPutFloat(ptr + 24, boundingSphere.w()); } public void uploadInstances(StagingBuffer stagingBuffer, int instanceVbo) { diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/MatrixBuffer.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/MatrixBuffer.java new file mode 100644 index 000000000..3e17d386d --- /dev/null +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/MatrixBuffer.java @@ -0,0 +1,33 @@ +package dev.engine_room.flywheel.backend.engine.indirect; + +import org.lwjgl.opengl.GL46; +import org.lwjgl.system.MemoryUtil; + +import dev.engine_room.flywheel.backend.engine.embed.EnvironmentStorage; + +public class MatrixBuffer { + private final ResizableStorageArray matrices = new ResizableStorageArray(EnvironmentStorage.MATRIX_SIZE_BYTES); + + public void flush(StagingBuffer stagingBuffer, EnvironmentStorage environmentStorage) { + var arena = environmentStorage.arena; + var capacity = arena.capacity(); + + if (capacity == 0) { + return; + } + + matrices.ensureCapacity(capacity); + + stagingBuffer.enqueueCopy((long) arena.capacity() * EnvironmentStorage.MATRIX_SIZE_BYTES, matrices.handle(), 0, ptr -> { + MemoryUtil.memCopy(arena.indexToPointer(0), ptr, (long) arena.capacity() * EnvironmentStorage.MATRIX_SIZE_BYTES); + }); + } + + public void bind() { + if (matrices.capacity() == 0) { + return; + } + + GL46.glBindBufferRange(GL46.GL_SHADER_STORAGE_BUFFER, BufferBindings.MATRICES, matrices.handle(), 0, matrices.byteCapacity()); + } +} diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/instancing/InstancedDrawManager.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/instancing/InstancedDrawManager.java index 93d68e11d..8b241f6c0 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/instancing/InstancedDrawManager.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/instancing/InstancedDrawManager.java @@ -23,6 +23,7 @@ import dev.engine_room.flywheel.backend.engine.MaterialEncoder; import dev.engine_room.flywheel.backend.engine.MaterialRenderState; import dev.engine_room.flywheel.backend.engine.MeshPool; import dev.engine_room.flywheel.backend.engine.TextureBinder; +import dev.engine_room.flywheel.backend.engine.embed.EnvironmentStorage; import dev.engine_room.flywheel.backend.engine.uniform.Uniforms; import dev.engine_room.flywheel.backend.gl.GlStateTracker; import dev.engine_room.flywheel.backend.gl.TextureBuffer; @@ -59,8 +60,8 @@ public class InstancedDrawManager extends DrawManager> { } @Override - public void flush(LightStorage lightStorage) { - super.flush(lightStorage); + public void flush(LightStorage lightStorage, EnvironmentStorage environmentStorage) { + super.flush(lightStorage, environmentStorage); this.instancers.values() .removeIf(instancer -> { diff --git a/common/src/backend/resources/assets/flywheel/flywheel/internal/common.vert b/common/src/backend/resources/assets/flywheel/flywheel/internal/common.vert index 075dae3f6..214ab12d2 100644 --- a/common/src/backend/resources/assets/flywheel/flywheel/internal/common.vert +++ b/common/src/backend/resources/assets/flywheel/flywheel/internal/common.vert @@ -67,8 +67,8 @@ vec2 getCrumblingTexCoord() { #endif #ifdef FLW_EMBEDDED -uniform mat4 _flw_modelMatrix; -uniform mat3 _flw_normalMatrix; +mat4 _flw_modelMatrix; +mat3 _flw_normalMatrix; #endif flat out uint _flw_instanceID; diff --git a/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/buffer_bindings.glsl b/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/buffer_bindings.glsl index c37db3502..346adfa93 100644 --- a/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/buffer_bindings.glsl +++ b/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/buffer_bindings.glsl @@ -5,3 +5,4 @@ #define _FLW_DRAW_BUFFER_BINDING 4 #define _FLW_LIGHT_LUT_BUFFER_BINDING 5 #define _FLW_LIGHT_SECTIONS_BUFFER_BINDING 6 +#define _FLW_MATRIX_BUFFER_BINDING 7 diff --git a/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/cull.glsl b/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/cull.glsl index 76b75bff1..58a893cbc 100644 --- a/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/cull.glsl +++ b/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/cull.glsl @@ -2,6 +2,7 @@ #include "flywheel:internal/indirect/model_descriptor.glsl" #include "flywheel:internal/uniforms/uniforms.glsl" #include "flywheel:util/matrix.glsl" +#include "flywheel:internal/indirect/matrices.glsl" layout(local_size_x = _FLW_SUBGROUP_SIZE) in; @@ -17,8 +18,9 @@ layout(std430, binding = _FLW_MODEL_BUFFER_BINDING) restrict buffer ModelBuffer ModelDescriptor _flw_models[]; }; -uniform mat4 _flw_modelMatrix; -uniform bool _flw_useModelMatrix = false; +layout(std430, binding = _FLW_MATRIX_BUFFER_BINDING) restrict buffer MatrixBuffer { + Matrices _flw_matrices[]; +}; // Disgustingly vectorized sphere frustum intersection taking advantage of ahead of time packing. // Only uses 6 fmas and some boolean ops. @@ -35,6 +37,7 @@ bool _flw_testSphere(vec3 center, float radius) { bool _flw_isVisible(uint instanceIndex, uint modelIndex) { BoundingSphere sphere = _flw_models[modelIndex].boundingSphere; + uint matrixIndex = _flw_models[modelIndex].matrixIndex; vec3 center; float radius; @@ -44,8 +47,8 @@ bool _flw_isVisible(uint instanceIndex, uint modelIndex) { flw_transformBoundingSphere(instance, center, radius); - if (_flw_useModelMatrix) { - transformBoundingSphere(_flw_modelMatrix, center, radius); + if (matrixIndex > 0) { + transformBoundingSphere(_flw_matrices[matrixIndex].pose, center, radius); } return _flw_testSphere(center, radius); diff --git a/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/draw_command.glsl b/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/draw_command.glsl index e8575930c..40ed9119b 100644 --- a/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/draw_command.glsl +++ b/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/draw_command.glsl @@ -6,6 +6,7 @@ struct MeshDrawCommand { uint baseInstance; uint modelIndex; + uint matrixIndex; uint materialVertexIndex; uint materialFragmentIndex; diff --git a/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/main.vert b/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/main.vert index a95d31662..e53dff313 100644 --- a/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/main.vert +++ b/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/main.vert @@ -3,6 +3,7 @@ #include "flywheel:internal/indirect/buffer_bindings.glsl" #include "flywheel:internal/indirect/draw_command.glsl" #include "flywheel:internal/indirect/light.glsl" +#include "flywheel:internal/indirect/matrices.glsl" layout(std430, binding = _FLW_TARGET_BUFFER_BINDING) restrict readonly buffer TargetBuffer { uint _flw_instanceIndices[]; @@ -12,6 +13,12 @@ layout(std430, binding = _FLW_DRAW_BUFFER_BINDING) restrict readonly buffer Draw MeshDrawCommand _flw_drawCommands[]; }; +#ifdef FLW_EMBEDDED +layout(std430, binding = _FLW_MATRIX_BUFFER_BINDING) restrict buffer MatrixBuffer { + Matrices _flw_matrices[]; +}; +#endif + uniform uint _flw_baseDraw; flat out uvec3 _flw_packedMaterial; @@ -29,7 +36,13 @@ void main() { _flw_unpackMaterialProperties(packedMaterialProperties, flw_material); _flw_packedMaterial = uvec3(draw.materialFragmentIndex, draw.packedFogAndCutout, packedMaterialProperties); -#if __VERSION__ < 460 + #ifdef FLW_EMBEDDED + _flw_unpackMatrices(_flw_matrices[draw.matrixIndex], _flw_modelMatrix, _flw_normalMatrix); + // _flw_modelMatrix = mat4(1.); + // _flw_normalMatrix = mat3(1.); + #endif + + #if __VERSION__ < 460 uint instanceIndex = _flw_instanceIndices[gl_BaseInstanceARB + gl_InstanceID]; #else uint instanceIndex = _flw_instanceIndices[gl_BaseInstance + gl_InstanceID]; diff --git a/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/matrices.glsl b/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/matrices.glsl new file mode 100644 index 000000000..efbc80b8d --- /dev/null +++ b/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/matrices.glsl @@ -0,0 +1,11 @@ +struct Matrices { + mat4 pose; + vec4 normalA; + vec4 normalB; + vec4 normalC; +}; + +void _flw_unpackMatrices(in Matrices mats, out mat4 pose, out mat3 normal) { + pose = mats.pose; + normal = mat3(mats.normalA.xyz, mats.normalB.xyz, mats.normalC.xyz); +} diff --git a/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/model_descriptor.glsl b/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/model_descriptor.glsl index a873a92a8..c5416f9dd 100644 --- a/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/model_descriptor.glsl +++ b/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/model_descriptor.glsl @@ -8,6 +8,7 @@ struct BoundingSphere { struct ModelDescriptor { uint instanceCount; uint baseInstance; + uint matrixIndex; BoundingSphere boundingSphere; }; diff --git a/common/src/backend/resources/assets/flywheel/flywheel/internal/instancing/main.vert b/common/src/backend/resources/assets/flywheel/flywheel/internal/instancing/main.vert index 4b6914eae..30a863917 100644 --- a/common/src/backend/resources/assets/flywheel/flywheel/internal/instancing/main.vert +++ b/common/src/backend/resources/assets/flywheel/flywheel/internal/instancing/main.vert @@ -5,11 +5,21 @@ uniform uvec4 _flw_packedMaterial; uniform int _flw_baseInstance = 0; +#ifdef FLW_EMBEDDED +uniform mat4 _flw_modelMatrixUniform; +uniform mat3 _flw_normalMatrixUniform; +#endif + void main() { _flw_uberMaterialVertexIndex = _flw_packedMaterial.x; _flw_unpackMaterialProperties(_flw_packedMaterial.w, flw_material); FlwInstance instance = _flw_unpackInstance(_flw_baseInstance + gl_InstanceID); + #ifdef FLW_EMBEDDED + _flw_modelMatrix = _flw_modelMatrixUniform; + _flw_normalMatrix = _flw_normalMatrixUniform; + #endif + _flw_main(instance, uint(gl_InstanceID)); } diff --git a/common/src/lib/java/dev/engine_room/flywheel/lib/util/ExtraMemoryOps.java b/common/src/lib/java/dev/engine_room/flywheel/lib/util/ExtraMemoryOps.java index 3840129be..45b752640 100644 --- a/common/src/lib/java/dev/engine_room/flywheel/lib/util/ExtraMemoryOps.java +++ b/common/src/lib/java/dev/engine_room/flywheel/lib/util/ExtraMemoryOps.java @@ -61,6 +61,21 @@ public final class ExtraMemoryOps { MemoryUtil.memPutFloat(ptr + 32, matrix.m22()); } + public static void putMatrix3fPadded(long ptr, Matrix3fc matrix) { + MemoryUtil.memPutFloat(ptr, matrix.m00()); + MemoryUtil.memPutFloat(ptr + 4, matrix.m01()); + MemoryUtil.memPutFloat(ptr + 8, matrix.m02()); + MemoryUtil.memPutFloat(ptr + 12, 0.0f); + MemoryUtil.memPutFloat(ptr + 16, matrix.m10()); + MemoryUtil.memPutFloat(ptr + 20, matrix.m11()); + MemoryUtil.memPutFloat(ptr + 24, matrix.m12()); + MemoryUtil.memPutFloat(ptr + 28, 0.0f); + MemoryUtil.memPutFloat(ptr + 32, matrix.m20()); + MemoryUtil.memPutFloat(ptr + 36, matrix.m21()); + MemoryUtil.memPutFloat(ptr + 40, matrix.m22()); + MemoryUtil.memPutFloat(ptr + 44, 0.0f); + } + public static void putMatrix4f(long ptr, Matrix4fc matrix) { MemoryUtil.memPutFloat(ptr, matrix.m00()); MemoryUtil.memPutFloat(ptr + 4, matrix.m01());