Consecutive normal draws

- Don't use MDI on intel, instead submit multiple draw indirect commands
This commit is contained in:
Jozufozu 2023-12-24 11:55:33 -08:00
parent 18e375418f
commit 56c0e51e54
3 changed files with 41 additions and 8 deletions

View file

@ -3,6 +3,7 @@ package com.jozufozu.flywheel.backend.engine.indirect;
import static org.lwjgl.opengl.GL11.GL_TRIANGLES;
import static org.lwjgl.opengl.GL11.GL_UNSIGNED_INT;
import static org.lwjgl.opengl.GL30.glUniform1ui;
import static org.lwjgl.opengl.GL40.glDrawElementsIndirect;
import static org.lwjgl.opengl.GL42.GL_COMMAND_BARRIER_BIT;
import static org.lwjgl.opengl.GL42.glMemoryBarrier;
import static org.lwjgl.opengl.GL43.GL_SHADER_STORAGE_BARRIER_BIT;
@ -24,6 +25,7 @@ import com.jozufozu.flywheel.api.model.Model;
import com.jozufozu.flywheel.backend.compile.IndirectPrograms;
import com.jozufozu.flywheel.backend.engine.MaterialRenderState;
import com.jozufozu.flywheel.backend.engine.UniformBuffer;
import com.jozufozu.flywheel.gl.Driver;
import com.jozufozu.flywheel.gl.GlCompat;
import com.jozufozu.flywheel.gl.shader.GlProgram;
import com.jozufozu.flywheel.lib.context.Contexts;
@ -267,7 +269,15 @@ public class IndirectCullingGroup<I extends Instance> {
private record MultiDraw(Material material, int start, int end) {
void submit() {
MaterialRenderState.setup(material);
if (GlCompat.DRIVER == Driver.INTEL) {
// Intel renders garbage with MDI, but Consecutive Normal Draws works fine.
for (int i = start; i < end; i++) {
glDrawElementsIndirect(GL_TRIANGLES, GL_UNSIGNED_INT, i * IndirectBuffers.DRAW_COMMAND_STRIDE);
}
} else {
glMultiDrawElementsIndirect(GL_TRIANGLES, GL_UNSIGNED_INT, start * IndirectBuffers.DRAW_COMMAND_STRIDE, end - start, (int) IndirectBuffers.DRAW_COMMAND_STRIDE);
}
}
}
}

View file

@ -0,0 +1,9 @@
package com.jozufozu.flywheel.gl;
public enum Driver {
NVIDIA,
AMD,
INTEL,
MESA,
UNKNOWN,
}

View file

@ -22,17 +22,17 @@ import net.minecraft.Util;
*/
public final class GlCompat {
public static final GLCapabilities CAPABILITIES = GL.createCapabilities();
public static final boolean AMD = _decideIfWeAreAMD();
public static final boolean WINDOWS = _decideIfWeAreWindows();
public static final boolean ALLOW_DSA = true;
public static final boolean SUPPORTS_INDIRECT = _decideIfWeSupportIndirect();
public static final int SUBGROUP_SIZE = _subgroupSize();
public static final Driver DRIVER = _readVendorString();
private GlCompat() {
}
public static boolean onAMDWindows() {
return AMD && WINDOWS;
return DRIVER == Driver.AMD && WINDOWS;
}
public static boolean supportsInstancing() {
@ -43,15 +43,25 @@ public final class GlCompat {
return SUPPORTS_INDIRECT;
}
private static boolean _decideIfWeAreAMD() {
private static Driver _readVendorString() {
String vendor = GL20C.glGetString(GL20C.GL_VENDOR);
if (vendor == null) {
return false;
return Driver.UNKNOWN;
}
// vendor string I got was "ATI Technologies Inc."
return vendor.contains("ATI") || vendor.contains("AMD");
if (vendor.contains("ATI") || vendor.contains("AMD")) {
return Driver.AMD;
} else if (vendor.contains("NVIDIA")) {
return Driver.NVIDIA;
} else if (vendor.contains("Intel")) {
return Driver.INTEL;
} else if (vendor.contains("Mesa")) {
return Driver.MESA;
}
return Driver.UNKNOWN;
}
private static boolean _decideIfWeAreWindows() {
@ -66,8 +76,12 @@ public final class GlCompat {
if (CAPABILITIES.GL_KHR_shader_subgroup) {
return GL31C.glGetInteger(KHRShaderSubgroup.GL_SUBGROUP_SIZE_KHR);
}
// try to guess
return AMD ? 64 : 32;
// Try to guess.
// Newer (RDNA) AMD cards have 32 threads in a wavefront, older ones have 64.
// I assume the newer drivers will implement the above extension, so 64 is a
// reasonable guess for AMD hardware. In the worst case we'll just spread
// load across multiple SIMDs
return DRIVER == Driver.AMD || DRIVER == Driver.MESA ? 64 : 32;
}
public static int getComputeGroupCount(int invocations) {