From 59a03ad8111cfab65f5151d75f11797f058c6776 Mon Sep 17 00:00:00 2001 From: Jozufozu Date: Thu, 30 Nov 2023 13:38:46 -0800 Subject: [PATCH] More direct buffers - Merge object and batch ID buffer. - ShaderCompiler accepts a Compilation callback. - Use callback to enable the conservative depth extension only in fragment shaders. - Query subgroup size if available and use callback to set a compile definition in compute shaders. --- .../flywheel/backend/compile/Compile.java | 19 ++++++++++- .../backend/compile/IndirectPrograms.java | 2 ++ .../backend/compile/PipelineCompiler.java | 1 + .../backend/compile/core/Compilation.java | 8 +++++ .../backend/compile/core/ShaderCompiler.java | 5 +-- .../engine/indirect/IndirectBuffers.java | 32 +++++-------------- .../engine/indirect/IndirectCullingGroup.java | 16 ++++------ .../backend/engine/indirect/IndirectDraw.java | 6 ++-- .../engine/indirect/IndirectInstancer.java | 22 +++++++------ .../com/jozufozu/flywheel/gl/GlCompat.java | 24 ++++++++++---- .../flywheel/internal/indirect_cull.glsl | 18 +++++------ .../flywheel/internal/indirect_draw.vert | 18 ++++++----- 12 files changed, 99 insertions(+), 72 deletions(-) diff --git a/src/main/java/com/jozufozu/flywheel/backend/compile/Compile.java b/src/main/java/com/jozufozu/flywheel/backend/compile/Compile.java index a58fd3763..61f620f01 100644 --- a/src/main/java/com/jozufozu/flywheel/backend/compile/Compile.java +++ b/src/main/java/com/jozufozu/flywheel/backend/compile/Compile.java @@ -6,11 +6,13 @@ import java.util.List; import java.util.Map; import java.util.function.BiConsumer; import java.util.function.BiFunction; +import java.util.function.Consumer; import java.util.function.Function; import org.jetbrains.annotations.NotNull; import org.jetbrains.annotations.Nullable; +import com.jozufozu.flywheel.backend.compile.core.Compilation; import com.jozufozu.flywheel.backend.compile.core.ProgramLinker; import com.jozufozu.flywheel.backend.compile.core.ShaderCompiler; import com.jozufozu.flywheel.gl.shader.GlProgram; @@ -83,6 +85,8 @@ public class Compile { public static class ShaderCompilerBuilder { private final GLSLVersion glslVersion; private final ShaderType shaderType; + private Consumer compilationCallbacks = $ -> { + }; private final List> fetchers = new ArrayList<>(); public ShaderCompilerBuilder(GLSLVersion glslVersion, ShaderType shaderType) { @@ -111,6 +115,19 @@ public class Compile { return withResource($ -> resourceLocation); } + public ShaderCompilerBuilder onCompile(Consumer cb) { + compilationCallbacks = compilationCallbacks.andThen(cb); + return this; + } + + public ShaderCompilerBuilder define(String def, int value) { + return onCompile(ctx -> ctx.define(def, String.valueOf(value))); + } + + public ShaderCompilerBuilder enableExtension(String extension) { + return onCompile(ctx -> ctx.enableExtension(extension)); + } + @Nullable private GlShader compile(K key, ShaderCompiler compiler, SourceLoader loader) { var components = new ArrayList(); @@ -127,7 +144,7 @@ public class Compile { return null; } - return compiler.compile(glslVersion, shaderType, components); + return compiler.compile(glslVersion, shaderType, compilationCallbacks, components); } } } diff --git a/src/main/java/com/jozufozu/flywheel/backend/compile/IndirectPrograms.java b/src/main/java/com/jozufozu/flywheel/backend/compile/IndirectPrograms.java index 650016caf..f2a98200e 100644 --- a/src/main/java/com/jozufozu/flywheel/backend/compile/IndirectPrograms.java +++ b/src/main/java/com/jozufozu/flywheel/backend/compile/IndirectPrograms.java @@ -12,6 +12,7 @@ import com.jozufozu.flywheel.api.vertex.VertexType; import com.jozufozu.flywheel.backend.compile.component.IndirectComponent; import com.jozufozu.flywheel.backend.compile.component.MaterialAdapterComponent; import com.jozufozu.flywheel.backend.compile.component.UniformComponent; +import com.jozufozu.flywheel.gl.GlCompat; import com.jozufozu.flywheel.gl.shader.GlProgram; import com.jozufozu.flywheel.gl.shader.ShaderType; import com.jozufozu.flywheel.glsl.GLSLVersion; @@ -75,6 +76,7 @@ public class IndirectPrograms { private static CompilationHarness> createCullingCompiler(UniformComponent uniformComponent, ShaderSources sources) { return new CompilationHarness<>(sources, createCullingKeys(), Compile.>program() .link(Compile.>shader(GLSLVersion.V460, ShaderType.COMPUTE) + .define("FLW_SUBGROUP_SIZE", GlCompat.SUBGROUP_SIZE) .withComponent(uniformComponent) .withComponent(IndirectComponent::create) .withResource(InstanceType::instanceShader) diff --git a/src/main/java/com/jozufozu/flywheel/backend/compile/PipelineCompiler.java b/src/main/java/com/jozufozu/flywheel/backend/compile/PipelineCompiler.java index 63393874b..cd2a9ac86 100644 --- a/src/main/java/com/jozufozu/flywheel/backend/compile/PipelineCompiler.java +++ b/src/main/java/com/jozufozu/flywheel/backend/compile/PipelineCompiler.java @@ -23,6 +23,7 @@ public class PipelineCompiler { .vertexShader()) .withResource(pipeline.vertexShader())) .link(Compile.shader(pipeline.glslVersion(), ShaderType.FRAGMENT) + .enableExtension("GL_ARB_conservative_depth") .withComponent(uniformComponent) .withComponent(fragmentMaterialComponent) .withResource(key -> key.contextShader() diff --git a/src/main/java/com/jozufozu/flywheel/backend/compile/core/Compilation.java b/src/main/java/com/jozufozu/flywheel/backend/compile/core/Compilation.java index 36f7780d9..99c403004 100644 --- a/src/main/java/com/jozufozu/flywheel/backend/compile/core/Compilation.java +++ b/src/main/java/com/jozufozu/flywheel/backend/compile/core/Compilation.java @@ -70,6 +70,14 @@ public class Compilation { .append(" : enable\n"); } + public void define(String key, String value) { + fullSource.append("#define ") + .append(key) + .append(' ') + .append(value) + .append('\n'); + } + public void appendComponent(SourceComponent component) { var source = component.source(); diff --git a/src/main/java/com/jozufozu/flywheel/backend/compile/core/ShaderCompiler.java b/src/main/java/com/jozufozu/flywheel/backend/compile/core/ShaderCompiler.java index 16803c88e..8ee9551f3 100644 --- a/src/main/java/com/jozufozu/flywheel/backend/compile/core/ShaderCompiler.java +++ b/src/main/java/com/jozufozu/flywheel/backend/compile/core/ShaderCompiler.java @@ -24,7 +24,7 @@ public class ShaderCompiler { } @Nullable - public GlShader compile(GLSLVersion glslVersion, ShaderType shaderType, List sourceComponents) { + public GlShader compile(GLSLVersion glslVersion, ShaderType shaderType, Consumer callback, List sourceComponents) { var key = new ShaderKey(glslVersion, shaderType, sourceComponents); var cached = shaderCache.get(key); if (cached != null) { @@ -32,7 +32,8 @@ public class ShaderCompiler { } Compilation ctx = new Compilation(glslVersion, shaderType); - ctx.enableExtension("GL_ARB_conservative_depth"); + + callback.accept(ctx); expand(sourceComponents, ctx::appendComponent); diff --git a/src/main/java/com/jozufozu/flywheel/backend/engine/indirect/IndirectBuffers.java b/src/main/java/com/jozufozu/flywheel/backend/engine/indirect/IndirectBuffers.java index 30f9b7538..e7b1b8bfd 100644 --- a/src/main/java/com/jozufozu/flywheel/backend/engine/indirect/IndirectBuffers.java +++ b/src/main/java/com/jozufozu/flywheel/backend/engine/indirect/IndirectBuffers.java @@ -24,7 +24,7 @@ import com.jozufozu.flywheel.lib.memory.FlwMemoryTracker; import com.jozufozu.flywheel.lib.memory.MemoryBlock; public class IndirectBuffers { - public static final int BUFFER_COUNT = 4; + public static final int BUFFER_COUNT = 3; public static final long INT_SIZE = Integer.BYTES; public static final long PTR_SIZE = Pointer.POINTER_SIZE; @@ -45,18 +45,15 @@ public class IndirectBuffers { private static final long OBJECT_SIZE_OFFSET = SIZE_OFFSET; private static final long TARGET_SIZE_OFFSET = OBJECT_SIZE_OFFSET + PTR_SIZE; - private static final long BATCH_SIZE_OFFSET = TARGET_SIZE_OFFSET + PTR_SIZE; - private static final long DRAW_SIZE_OFFSET = BATCH_SIZE_OFFSET + PTR_SIZE; + private static final long DRAW_SIZE_OFFSET = TARGET_SIZE_OFFSET + PTR_SIZE; private final MemoryBlock buffers; private final long objectStride; private int object; private int target; - private int batch; private int draw; long objectPtr; - long batchPtr; long drawPtr; private int maxObjectCount = 0; @@ -72,11 +69,10 @@ public class IndirectBuffers { void createBuffers() { final long ptr = buffers.ptr(); - nglCreateBuffers(4, ptr); + nglCreateBuffers(BUFFER_COUNT, ptr); object = MemoryUtil.memGetInt(ptr); target = MemoryUtil.memGetInt(ptr + 4); - batch = MemoryUtil.memGetInt(ptr + 8); - draw = MemoryUtil.memGetInt(ptr + 12); + draw = MemoryUtil.memGetInt(ptr + 8); } void updateCounts(int objectCount, int drawCount) { @@ -94,7 +90,6 @@ public class IndirectBuffers { final long ptr = buffers.ptr(); MemoryUtil.memPutAddress(ptr + OBJECT_SIZE_OFFSET, objectSize); MemoryUtil.memPutAddress(ptr + TARGET_SIZE_OFFSET, targetSize); - MemoryUtil.memPutAddress(ptr + BATCH_SIZE_OFFSET, targetSize); MemoryUtil.memPutAddress(ptr + DRAW_SIZE_OFFSET, drawSize); } @@ -105,38 +100,31 @@ public class IndirectBuffers { if (maxObjectCount > 0) { final long ptr = buffers.ptr(); - nglCreateBuffers(3, ptr); + nglCreateBuffers(BUFFER_COUNT - 1, ptr); int objectNew = MemoryUtil.memGetInt(ptr); int targetNew = MemoryUtil.memGetInt(ptr + 4); - int batchNew = MemoryUtil.memGetInt(ptr + 8); glNamedBufferStorage(objectNew, objectSize, PERSISTENT_BITS); glNamedBufferStorage(targetNew, targetSize, GPU_ONLY_BITS); - glNamedBufferStorage(batchNew, targetSize, PERSISTENT_BITS); glCopyNamedBufferSubData(object, objectNew, 0, 0, objectStride * maxObjectCount); glCopyNamedBufferSubData(target, targetNew, 0, 0, INT_SIZE * maxObjectCount); - glCopyNamedBufferSubData(batch, batchNew, 0, 0, INT_SIZE * maxObjectCount); glDeleteBuffers(object); glDeleteBuffers(target); - glDeleteBuffers(batch); object = objectNew; target = targetNew; - batch = batchNew; } else { glNamedBufferStorage(object, objectSize, PERSISTENT_BITS); glNamedBufferStorage(target, targetSize, GPU_ONLY_BITS); - glNamedBufferStorage(batch, targetSize, PERSISTENT_BITS); } objectPtr = nglMapNamedBufferRange(object, 0, objectSize, MAP_BITS); - batchPtr = nglMapNamedBufferRange(batch, 0, targetSize, MAP_BITS); maxObjectCount = objectCount; - FlwMemoryTracker._allocGPUMemory(maxObjectCount * objectStride + maxObjectCount * INT_SIZE); + FlwMemoryTracker._allocGPUMemory(maxObjectCount * objectStride); } void createDrawStorage(int drawCount) { @@ -150,7 +138,7 @@ public class IndirectBuffers { glDeleteBuffers(draw); - MemoryUtil.memPutInt(buffers.ptr() + INT_SIZE * 3, drawNew); + MemoryUtil.memPutInt(buffers.ptr() + INT_SIZE * 2, drawNew); draw = drawNew; drawPtr = MemoryUtil.nmemRealloc(drawPtr, drawSize); } else { @@ -163,7 +151,7 @@ public class IndirectBuffers { } private void freeObjectStogare() { - FlwMemoryTracker._freeGPUMemory(maxObjectCount * objectStride + maxObjectCount * INT_SIZE); + FlwMemoryTracker._freeGPUMemory(maxObjectCount * objectStride); } private void freeDrawStorage() { @@ -184,10 +172,6 @@ public class IndirectBuffers { nglBindBuffersRange(GL_SHADER_STORAGE_BUFFER, 0, IndirectBuffers.BUFFER_COUNT, ptr, ptr + OFFSET_OFFSET, ptr + SIZE_OFFSET); } - void flushBatchIDs(long length) { - glFlushMappedNamedBufferRange(batch, 0, length); - } - void flushObjects(long length) { glFlushMappedNamedBufferRange(object, 0, length); } diff --git a/src/main/java/com/jozufozu/flywheel/backend/engine/indirect/IndirectCullingGroup.java b/src/main/java/com/jozufozu/flywheel/backend/engine/indirect/IndirectCullingGroup.java index 8ad9fbec7..05109caf5 100644 --- a/src/main/java/com/jozufozu/flywheel/backend/engine/indirect/IndirectCullingGroup.java +++ b/src/main/java/com/jozufozu/flywheel/backend/engine/indirect/IndirectCullingGroup.java @@ -21,7 +21,7 @@ public class IndirectCullingGroup { private final GlProgram compute; private final GlProgram draw; - private final long instanceStride; + private final long objectStride; private final IndirectBuffers buffers; public final IndirectMeshPool meshPool; public final IndirectDrawSet drawSet = new IndirectDrawSet<>(); @@ -30,9 +30,10 @@ public class IndirectCullingGroup { private int instanceCountThisFrame; IndirectCullingGroup(InstanceType instanceType, VertexType vertexType) { - instanceStride = instanceType.getLayout() - .getStride(); - buffers = new IndirectBuffers(instanceStride); + objectStride = instanceType.getLayout() + .getStride() + IndirectBuffers.INT_SIZE; + + buffers = new IndirectBuffers(objectStride); buffers.createBuffers(); buffers.createObjectStorage(128); buffers.createDrawStorage(2); @@ -108,20 +109,17 @@ public class IndirectCullingGroup { private void uploadInstances() { long objectPtr = buffers.objectPtr; - long batchIDPtr = buffers.batchPtr; for (int i = 0, batchesSize = drawSet.indirectDraws.size(); i < batchesSize; i++) { var batch = drawSet.indirectDraws.get(i); var instanceCount = batch.instancer() .getInstanceCount(); - batch.writeObjects(objectPtr, batchIDPtr, i); + batch.writeObjects(objectPtr, i); - objectPtr += instanceCount * instanceStride; - batchIDPtr += instanceCount * IndirectBuffers.INT_SIZE; + objectPtr += instanceCount * objectStride; } buffers.flushObjects(objectPtr - buffers.objectPtr); - buffers.flushBatchIDs(batchIDPtr - buffers.batchPtr); } private void uploadIndirectCommands() { diff --git a/src/main/java/com/jozufozu/flywheel/backend/engine/indirect/IndirectDraw.java b/src/main/java/com/jozufozu/flywheel/backend/engine/indirect/IndirectDraw.java index 760d390c9..bbfcc6f7a 100644 --- a/src/main/java/com/jozufozu/flywheel/backend/engine/indirect/IndirectDraw.java +++ b/src/main/java/com/jozufozu/flywheel/backend/engine/indirect/IndirectDraw.java @@ -55,11 +55,11 @@ public class IndirectDraw { needsFullWrite = true; } - public void writeObjects(long objectPtr, long batchIDPtr, int batchID) { + public void writeObjects(long objectPtr, int batchID) { if (needsFullWrite) { - instancer.writeFull(objectPtr, batchIDPtr, batchID); + instancer.writeFull(objectPtr, batchID); } else { - instancer.writeSparse(objectPtr, batchIDPtr, batchID); + instancer.writeSparse(objectPtr, batchID); } } diff --git a/src/main/java/com/jozufozu/flywheel/backend/engine/indirect/IndirectInstancer.java b/src/main/java/com/jozufozu/flywheel/backend/engine/indirect/IndirectInstancer.java index b5ebe9624..48b274623 100644 --- a/src/main/java/com/jozufozu/flywheel/backend/engine/indirect/IndirectInstancer.java +++ b/src/main/java/com/jozufozu/flywheel/backend/engine/indirect/IndirectInstancer.java @@ -9,40 +9,42 @@ import com.jozufozu.flywheel.backend.engine.AbstractInstancer; public class IndirectInstancer extends AbstractInstancer { private final long instanceStride; + private final long objectStride; public IndirectInstancer(InstanceType type) { super(type); this.instanceStride = type.getLayout() .getStride(); + this.objectStride = instanceStride + IndirectBuffers.INT_SIZE; } public void update() { removeDeletedInstances(); } - public void writeSparse(long objectPtr, long batchIDPtr, int batchID) { + public void writeSparse(long objectPtr, int batchID) { int count = instances.size(); InstanceWriter writer = type.getWriter(); for (int i = changed.nextSetBit(0); i >= 0 && i < count; i = changed.nextSetBit(i + 1)) { - // write object - writer.write(objectPtr + instanceStride * i, instances.get(i)); - + long ptr = objectPtr + objectStride * i; // write batchID - MemoryUtil.memPutInt(batchIDPtr + IndirectBuffers.INT_SIZE * i, batchID); + MemoryUtil.memPutInt(ptr, batchID); + // write object + writer.write(ptr + IndirectBuffers.INT_SIZE, instances.get(i)); } changed.clear(); } - public void writeFull(long objectPtr, long batchIDPtr, int batchID) { + public void writeFull(long objectPtr, int batchID) { InstanceWriter writer = type.getWriter(); for (I object : instances) { + // write batchID + MemoryUtil.memPutInt(objectPtr, batchID); + objectPtr += IndirectBuffers.INT_SIZE; + // write object writer.write(objectPtr, object); objectPtr += instanceStride; - - // write batchID - MemoryUtil.memPutInt(batchIDPtr, batchID); - batchIDPtr += IndirectBuffers.INT_SIZE; } changed.clear(); } diff --git a/src/main/java/com/jozufozu/flywheel/gl/GlCompat.java b/src/main/java/com/jozufozu/flywheel/gl/GlCompat.java index 040819bf5..d54189a50 100644 --- a/src/main/java/com/jozufozu/flywheel/gl/GlCompat.java +++ b/src/main/java/com/jozufozu/flywheel/gl/GlCompat.java @@ -5,7 +5,9 @@ import java.nio.ByteBuffer; import org.lwjgl.PointerBuffer; import org.lwjgl.opengl.GL; import org.lwjgl.opengl.GL20C; +import org.lwjgl.opengl.GL31C; import org.lwjgl.opengl.GLCapabilities; +import org.lwjgl.opengl.KHRShaderSubgroup; import org.lwjgl.system.MemoryStack; import net.minecraft.Util; @@ -19,14 +21,16 @@ import net.minecraft.Util; public class GlCompat { public static final boolean ALLOW_DSA = true; public static final GLCapabilities CAPABILITIES = GL.createCapabilities(); - private static final boolean amd = _decideIfWeAreAMDWindows(); + private static final boolean amd = _decideIfWeAreAMD(); + private static final boolean windows = _decideIfWeAreWindows(); private static final boolean supportsIndirect = _decideIfWeSupportIndirect(); + public static final int SUBGROUP_SIZE = _subgroupSize(); private GlCompat() { } public static boolean onAMDWindows() { - return amd; + return amd && windows; } public static boolean supportsInstancing() { @@ -41,6 +45,14 @@ public class GlCompat { return CAPABILITIES.OpenGL46 || (CAPABILITIES.GL_ARB_compute_shader && CAPABILITIES.GL_ARB_shader_draw_parameters && CAPABILITIES.GL_ARB_base_instance && CAPABILITIES.GL_ARB_multi_draw_indirect && CAPABILITIES.GL_ARB_direct_state_access); } + private static int _subgroupSize() { + if (CAPABILITIES.GL_KHR_shader_subgroup) { + return GL31C.glGetInteger(KHRShaderSubgroup.GL_SUBGROUP_SIZE_KHR); + } + // try to guess + return amd ? 64 : 32; + } + /** * Modified from: *
canvas @@ -62,11 +74,11 @@ public class GlCompat { } } - private static boolean _decideIfWeAreAMDWindows() { - if (Util.getPlatform() != Util.OS.WINDOWS) { - return false; - } + private static boolean _decideIfWeAreWindows() { + return Util.getPlatform() == Util.OS.WINDOWS; + } + private static boolean _decideIfWeAreAMD() { String vendor = GL20C.glGetString(GL20C.GL_VENDOR); if (vendor == null) { diff --git a/src/main/resources/assets/flywheel/flywheel/internal/indirect_cull.glsl b/src/main/resources/assets/flywheel/flywheel/internal/indirect_cull.glsl index ff728d42b..660d49fe3 100644 --- a/src/main/resources/assets/flywheel/flywheel/internal/indirect_cull.glsl +++ b/src/main/resources/assets/flywheel/flywheel/internal/indirect_cull.glsl @@ -1,22 +1,22 @@ -#define FLW_SUBGROUP_SIZE 32 layout(local_size_x = FLW_SUBGROUP_SIZE) in; #include "flywheel:internal/indirect_draw_command.glsl" +struct Object { + uint batchID; + FlwPackedInstance instance; +}; + // populated by instancers layout(std430, binding = 0) restrict readonly buffer ObjectBuffer { - FlwPackedInstance objects[]; + Object objects[]; }; layout(std430, binding = 1) restrict writeonly buffer TargetBuffer { uint objectIDs[]; }; -layout(std430, binding = 2) restrict readonly buffer BatchBuffer { - uint batchIDs[]; -}; - -layout(std430, binding = 3) restrict buffer DrawCommands { +layout(std430, binding = 2) restrict buffer DrawCommands { MeshDrawCommand drawCommands[]; }; @@ -38,7 +38,7 @@ bool isVisible() { float radius; unpackBoundingSphere(sphere, center, radius); - FlwInstance object = _flw_unpackInstance(objects[flw_objectID]); + FlwInstance object = _flw_unpackInstance(objects[flw_objectID].instance); flw_transformBoundingSphere(object, center, radius); return testSphere(center, radius); @@ -51,7 +51,7 @@ void main() { return; } - flw_batchID = batchIDs[flw_objectID]; + flw_batchID = objects[flw_objectID].batchID; if (isVisible()) { uint batchIndex = atomicAdd(drawCommands[flw_batchID].instanceCount, 1); diff --git a/src/main/resources/assets/flywheel/flywheel/internal/indirect_draw.vert b/src/main/resources/assets/flywheel/flywheel/internal/indirect_draw.vert index c0ff96619..d4f774983 100644 --- a/src/main/resources/assets/flywheel/flywheel/internal/indirect_draw.vert +++ b/src/main/resources/assets/flywheel/flywheel/internal/indirect_draw.vert @@ -1,26 +1,28 @@ #include "flywheel:api/vertex.glsl" #include "flywheel:internal/indirect_draw_command.glsl" +struct Object { + uint batchID; + FlwPackedInstance instance; +}; + + layout(std430, binding = 0) restrict readonly buffer ObjectBuffer { - FlwPackedInstance objects[]; + Object objects[]; }; layout(std430, binding = 1) restrict readonly buffer TargetBuffer { uint objectIDs[]; }; -layout(std430, binding = 2) restrict readonly buffer BatchBuffer { - uint batchIDs[]; -}; - -layout(std430, binding = 3) restrict readonly buffer DrawCommands { +layout(std430, binding = 2) restrict readonly buffer DrawCommands { MeshDrawCommand drawCommands[]; }; void main() { uint instanceIndex = objectIDs[gl_BaseInstance + gl_InstanceID]; - uint batchID = batchIDs[instanceIndex]; - FlwInstance i = _flw_unpackInstance(objects[instanceIndex]); + uint batchID = objects[instanceIndex].batchID; + FlwInstance i = _flw_unpackInstance(objects[instanceIndex].instance); _flw_materialVertexID = drawCommands[batchID].vertexMaterialID; _flw_materialFragmentID = drawCommands[batchID].fragmentMaterialID;