From 02321c52efa63f1556cae0d096ecfdaa02ac5538 Mon Sep 17 00:00:00 2001 From: Jozufozu Date: Sun, 24 Dec 2023 11:55:33 -0800 Subject: [PATCH] Consecutive normal draws - Don't use MDI on intel, instead submit multiple draw indirect commands --- .../engine/indirect/IndirectCullingGroup.java | 12 +++++++- .../java/com/jozufozu/flywheel/gl/Driver.java | 9 ++++++ .../com/jozufozu/flywheel/gl/GlCompat.java | 28 ++++++++++++++----- 3 files changed, 41 insertions(+), 8 deletions(-) create mode 100644 src/main/java/com/jozufozu/flywheel/gl/Driver.java diff --git a/src/main/java/com/jozufozu/flywheel/backend/engine/indirect/IndirectCullingGroup.java b/src/main/java/com/jozufozu/flywheel/backend/engine/indirect/IndirectCullingGroup.java index b671f2cfc..99609b419 100644 --- a/src/main/java/com/jozufozu/flywheel/backend/engine/indirect/IndirectCullingGroup.java +++ b/src/main/java/com/jozufozu/flywheel/backend/engine/indirect/IndirectCullingGroup.java @@ -3,6 +3,7 @@ package com.jozufozu.flywheel.backend.engine.indirect; import static org.lwjgl.opengl.GL11.GL_TRIANGLES; import static org.lwjgl.opengl.GL11.GL_UNSIGNED_INT; import static org.lwjgl.opengl.GL30.glUniform1ui; +import static org.lwjgl.opengl.GL40.glDrawElementsIndirect; import static org.lwjgl.opengl.GL42.GL_COMMAND_BARRIER_BIT; import static org.lwjgl.opengl.GL42.glMemoryBarrier; import static org.lwjgl.opengl.GL43.GL_SHADER_STORAGE_BARRIER_BIT; @@ -24,6 +25,7 @@ import com.jozufozu.flywheel.api.model.Model; import com.jozufozu.flywheel.backend.compile.IndirectPrograms; import com.jozufozu.flywheel.backend.engine.MaterialRenderState; import com.jozufozu.flywheel.backend.engine.UniformBuffer; +import com.jozufozu.flywheel.gl.Driver; import com.jozufozu.flywheel.gl.GlCompat; import com.jozufozu.flywheel.gl.shader.GlProgram; import com.jozufozu.flywheel.lib.context.Contexts; @@ -267,7 +269,15 @@ public class IndirectCullingGroup { private record MultiDraw(Material material, int start, int end) { void submit() { MaterialRenderState.setup(material); - glMultiDrawElementsIndirect(GL_TRIANGLES, GL_UNSIGNED_INT, start * IndirectBuffers.DRAW_COMMAND_STRIDE, end - start, (int) IndirectBuffers.DRAW_COMMAND_STRIDE); + + if (GlCompat.DRIVER == Driver.INTEL) { + // Intel renders garbage with MDI, but Consecutive Normal Draws works fine. + for (int i = start; i < end; i++) { + glDrawElementsIndirect(GL_TRIANGLES, GL_UNSIGNED_INT, i * IndirectBuffers.DRAW_COMMAND_STRIDE); + } + } else { + glMultiDrawElementsIndirect(GL_TRIANGLES, GL_UNSIGNED_INT, start * IndirectBuffers.DRAW_COMMAND_STRIDE, end - start, (int) IndirectBuffers.DRAW_COMMAND_STRIDE); + } } } } diff --git a/src/main/java/com/jozufozu/flywheel/gl/Driver.java b/src/main/java/com/jozufozu/flywheel/gl/Driver.java new file mode 100644 index 000000000..54fbc87fe --- /dev/null +++ b/src/main/java/com/jozufozu/flywheel/gl/Driver.java @@ -0,0 +1,9 @@ +package com.jozufozu.flywheel.gl; + +public enum Driver { + NVIDIA, + AMD, + INTEL, + MESA, + UNKNOWN, +} diff --git a/src/main/java/com/jozufozu/flywheel/gl/GlCompat.java b/src/main/java/com/jozufozu/flywheel/gl/GlCompat.java index 4ab492405..6305b4666 100644 --- a/src/main/java/com/jozufozu/flywheel/gl/GlCompat.java +++ b/src/main/java/com/jozufozu/flywheel/gl/GlCompat.java @@ -22,17 +22,17 @@ import net.minecraft.Util; */ public final class GlCompat { public static final GLCapabilities CAPABILITIES = GL.createCapabilities(); - public static final boolean AMD = _decideIfWeAreAMD(); public static final boolean WINDOWS = _decideIfWeAreWindows(); public static final boolean ALLOW_DSA = true; public static final boolean SUPPORTS_INDIRECT = _decideIfWeSupportIndirect(); public static final int SUBGROUP_SIZE = _subgroupSize(); + public static final Driver DRIVER = _readVendorString(); private GlCompat() { } public static boolean onAMDWindows() { - return AMD && WINDOWS; + return DRIVER == Driver.AMD && WINDOWS; } public static boolean supportsInstancing() { @@ -43,15 +43,25 @@ public final class GlCompat { return SUPPORTS_INDIRECT; } - private static boolean _decideIfWeAreAMD() { + private static Driver _readVendorString() { String vendor = GL20C.glGetString(GL20C.GL_VENDOR); if (vendor == null) { - return false; + return Driver.UNKNOWN; } // vendor string I got was "ATI Technologies Inc." - return vendor.contains("ATI") || vendor.contains("AMD"); + if (vendor.contains("ATI") || vendor.contains("AMD")) { + return Driver.AMD; + } else if (vendor.contains("NVIDIA")) { + return Driver.NVIDIA; + } else if (vendor.contains("Intel")) { + return Driver.INTEL; + } else if (vendor.contains("Mesa")) { + return Driver.MESA; + } + + return Driver.UNKNOWN; } private static boolean _decideIfWeAreWindows() { @@ -66,8 +76,12 @@ public final class GlCompat { if (CAPABILITIES.GL_KHR_shader_subgroup) { return GL31C.glGetInteger(KHRShaderSubgroup.GL_SUBGROUP_SIZE_KHR); } - // try to guess - return AMD ? 64 : 32; + // Try to guess. + // Newer (RDNA) AMD cards have 32 threads in a wavefront, older ones have 64. + // I assume the newer drivers will implement the above extension, so 64 is a + // reasonable guess for AMD hardware. In the worst case we'll just spread + // load across multiple SIMDs + return DRIVER == Driver.AMD || DRIVER == Driver.MESA ? 64 : 32; } public static int getComputeGroupCount(int invocations) {