More direct buffers

- Merge object and batch ID buffer.
- ShaderCompiler accepts a Compilation callback.
- Use callback to enable the conservative depth extension only in
  fragment shaders.
- Query subgroup size if available and use callback to set a compile
  definition in compute shaders.
This commit is contained in:
Jozufozu 2023-11-30 13:38:46 -08:00
parent 3a581bac79
commit 59a03ad811
12 changed files with 99 additions and 72 deletions

View File

@ -6,11 +6,13 @@ import java.util.List;
import java.util.Map;
import java.util.function.BiConsumer;
import java.util.function.BiFunction;
import java.util.function.Consumer;
import java.util.function.Function;
import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;
import com.jozufozu.flywheel.backend.compile.core.Compilation;
import com.jozufozu.flywheel.backend.compile.core.ProgramLinker;
import com.jozufozu.flywheel.backend.compile.core.ShaderCompiler;
import com.jozufozu.flywheel.gl.shader.GlProgram;
@ -83,6 +85,8 @@ public class Compile {
public static class ShaderCompilerBuilder<K> {
private final GLSLVersion glslVersion;
private final ShaderType shaderType;
private Consumer<Compilation> compilationCallbacks = $ -> {
};
private final List<BiFunction<K, SourceLoader, SourceComponent>> fetchers = new ArrayList<>();
public ShaderCompilerBuilder(GLSLVersion glslVersion, ShaderType shaderType) {
@ -111,6 +115,19 @@ public class Compile {
return withResource($ -> resourceLocation);
}
public ShaderCompilerBuilder<K> onCompile(Consumer<Compilation> cb) {
compilationCallbacks = compilationCallbacks.andThen(cb);
return this;
}
public ShaderCompilerBuilder<K> define(String def, int value) {
return onCompile(ctx -> ctx.define(def, String.valueOf(value)));
}
public ShaderCompilerBuilder<K> enableExtension(String extension) {
return onCompile(ctx -> ctx.enableExtension(extension));
}
@Nullable
private GlShader compile(K key, ShaderCompiler compiler, SourceLoader loader) {
var components = new ArrayList<SourceComponent>();
@ -127,7 +144,7 @@ public class Compile {
return null;
}
return compiler.compile(glslVersion, shaderType, components);
return compiler.compile(glslVersion, shaderType, compilationCallbacks, components);
}
}
}

View File

@ -12,6 +12,7 @@ import com.jozufozu.flywheel.api.vertex.VertexType;
import com.jozufozu.flywheel.backend.compile.component.IndirectComponent;
import com.jozufozu.flywheel.backend.compile.component.MaterialAdapterComponent;
import com.jozufozu.flywheel.backend.compile.component.UniformComponent;
import com.jozufozu.flywheel.gl.GlCompat;
import com.jozufozu.flywheel.gl.shader.GlProgram;
import com.jozufozu.flywheel.gl.shader.ShaderType;
import com.jozufozu.flywheel.glsl.GLSLVersion;
@ -75,6 +76,7 @@ public class IndirectPrograms {
private static CompilationHarness<InstanceType<?>> createCullingCompiler(UniformComponent uniformComponent, ShaderSources sources) {
return new CompilationHarness<>(sources, createCullingKeys(), Compile.<InstanceType<?>>program()
.link(Compile.<InstanceType<?>>shader(GLSLVersion.V460, ShaderType.COMPUTE)
.define("FLW_SUBGROUP_SIZE", GlCompat.SUBGROUP_SIZE)
.withComponent(uniformComponent)
.withComponent(IndirectComponent::create)
.withResource(InstanceType::instanceShader)

View File

@ -23,6 +23,7 @@ public class PipelineCompiler {
.vertexShader())
.withResource(pipeline.vertexShader()))
.link(Compile.<PipelineProgramKey>shader(pipeline.glslVersion(), ShaderType.FRAGMENT)
.enableExtension("GL_ARB_conservative_depth")
.withComponent(uniformComponent)
.withComponent(fragmentMaterialComponent)
.withResource(key -> key.contextShader()

View File

@ -70,6 +70,14 @@ public class Compilation {
.append(" : enable\n");
}
public void define(String key, String value) {
fullSource.append("#define ")
.append(key)
.append(' ')
.append(value)
.append('\n');
}
public void appendComponent(SourceComponent component) {
var source = component.source();

View File

@ -24,7 +24,7 @@ public class ShaderCompiler {
}
@Nullable
public GlShader compile(GLSLVersion glslVersion, ShaderType shaderType, List<SourceComponent> sourceComponents) {
public GlShader compile(GLSLVersion glslVersion, ShaderType shaderType, Consumer<Compilation> callback, List<SourceComponent> sourceComponents) {
var key = new ShaderKey(glslVersion, shaderType, sourceComponents);
var cached = shaderCache.get(key);
if (cached != null) {
@ -32,7 +32,8 @@ public class ShaderCompiler {
}
Compilation ctx = new Compilation(glslVersion, shaderType);
ctx.enableExtension("GL_ARB_conservative_depth");
callback.accept(ctx);
expand(sourceComponents, ctx::appendComponent);

View File

@ -24,7 +24,7 @@ import com.jozufozu.flywheel.lib.memory.FlwMemoryTracker;
import com.jozufozu.flywheel.lib.memory.MemoryBlock;
public class IndirectBuffers {
public static final int BUFFER_COUNT = 4;
public static final int BUFFER_COUNT = 3;
public static final long INT_SIZE = Integer.BYTES;
public static final long PTR_SIZE = Pointer.POINTER_SIZE;
@ -45,18 +45,15 @@ public class IndirectBuffers {
private static final long OBJECT_SIZE_OFFSET = SIZE_OFFSET;
private static final long TARGET_SIZE_OFFSET = OBJECT_SIZE_OFFSET + PTR_SIZE;
private static final long BATCH_SIZE_OFFSET = TARGET_SIZE_OFFSET + PTR_SIZE;
private static final long DRAW_SIZE_OFFSET = BATCH_SIZE_OFFSET + PTR_SIZE;
private static final long DRAW_SIZE_OFFSET = TARGET_SIZE_OFFSET + PTR_SIZE;
private final MemoryBlock buffers;
private final long objectStride;
private int object;
private int target;
private int batch;
private int draw;
long objectPtr;
long batchPtr;
long drawPtr;
private int maxObjectCount = 0;
@ -72,11 +69,10 @@ public class IndirectBuffers {
void createBuffers() {
final long ptr = buffers.ptr();
nglCreateBuffers(4, ptr);
nglCreateBuffers(BUFFER_COUNT, ptr);
object = MemoryUtil.memGetInt(ptr);
target = MemoryUtil.memGetInt(ptr + 4);
batch = MemoryUtil.memGetInt(ptr + 8);
draw = MemoryUtil.memGetInt(ptr + 12);
draw = MemoryUtil.memGetInt(ptr + 8);
}
void updateCounts(int objectCount, int drawCount) {
@ -94,7 +90,6 @@ public class IndirectBuffers {
final long ptr = buffers.ptr();
MemoryUtil.memPutAddress(ptr + OBJECT_SIZE_OFFSET, objectSize);
MemoryUtil.memPutAddress(ptr + TARGET_SIZE_OFFSET, targetSize);
MemoryUtil.memPutAddress(ptr + BATCH_SIZE_OFFSET, targetSize);
MemoryUtil.memPutAddress(ptr + DRAW_SIZE_OFFSET, drawSize);
}
@ -105,38 +100,31 @@ public class IndirectBuffers {
if (maxObjectCount > 0) {
final long ptr = buffers.ptr();
nglCreateBuffers(3, ptr);
nglCreateBuffers(BUFFER_COUNT - 1, ptr);
int objectNew = MemoryUtil.memGetInt(ptr);
int targetNew = MemoryUtil.memGetInt(ptr + 4);
int batchNew = MemoryUtil.memGetInt(ptr + 8);
glNamedBufferStorage(objectNew, objectSize, PERSISTENT_BITS);
glNamedBufferStorage(targetNew, targetSize, GPU_ONLY_BITS);
glNamedBufferStorage(batchNew, targetSize, PERSISTENT_BITS);
glCopyNamedBufferSubData(object, objectNew, 0, 0, objectStride * maxObjectCount);
glCopyNamedBufferSubData(target, targetNew, 0, 0, INT_SIZE * maxObjectCount);
glCopyNamedBufferSubData(batch, batchNew, 0, 0, INT_SIZE * maxObjectCount);
glDeleteBuffers(object);
glDeleteBuffers(target);
glDeleteBuffers(batch);
object = objectNew;
target = targetNew;
batch = batchNew;
} else {
glNamedBufferStorage(object, objectSize, PERSISTENT_BITS);
glNamedBufferStorage(target, targetSize, GPU_ONLY_BITS);
glNamedBufferStorage(batch, targetSize, PERSISTENT_BITS);
}
objectPtr = nglMapNamedBufferRange(object, 0, objectSize, MAP_BITS);
batchPtr = nglMapNamedBufferRange(batch, 0, targetSize, MAP_BITS);
maxObjectCount = objectCount;
FlwMemoryTracker._allocGPUMemory(maxObjectCount * objectStride + maxObjectCount * INT_SIZE);
FlwMemoryTracker._allocGPUMemory(maxObjectCount * objectStride);
}
void createDrawStorage(int drawCount) {
@ -150,7 +138,7 @@ public class IndirectBuffers {
glDeleteBuffers(draw);
MemoryUtil.memPutInt(buffers.ptr() + INT_SIZE * 3, drawNew);
MemoryUtil.memPutInt(buffers.ptr() + INT_SIZE * 2, drawNew);
draw = drawNew;
drawPtr = MemoryUtil.nmemRealloc(drawPtr, drawSize);
} else {
@ -163,7 +151,7 @@ public class IndirectBuffers {
}
private void freeObjectStogare() {
FlwMemoryTracker._freeGPUMemory(maxObjectCount * objectStride + maxObjectCount * INT_SIZE);
FlwMemoryTracker._freeGPUMemory(maxObjectCount * objectStride);
}
private void freeDrawStorage() {
@ -184,10 +172,6 @@ public class IndirectBuffers {
nglBindBuffersRange(GL_SHADER_STORAGE_BUFFER, 0, IndirectBuffers.BUFFER_COUNT, ptr, ptr + OFFSET_OFFSET, ptr + SIZE_OFFSET);
}
void flushBatchIDs(long length) {
glFlushMappedNamedBufferRange(batch, 0, length);
}
void flushObjects(long length) {
glFlushMappedNamedBufferRange(object, 0, length);
}

View File

@ -21,7 +21,7 @@ public class IndirectCullingGroup<I extends Instance> {
private final GlProgram compute;
private final GlProgram draw;
private final long instanceStride;
private final long objectStride;
private final IndirectBuffers buffers;
public final IndirectMeshPool meshPool;
public final IndirectDrawSet<I> drawSet = new IndirectDrawSet<>();
@ -30,9 +30,10 @@ public class IndirectCullingGroup<I extends Instance> {
private int instanceCountThisFrame;
IndirectCullingGroup(InstanceType<I> instanceType, VertexType vertexType) {
instanceStride = instanceType.getLayout()
.getStride();
buffers = new IndirectBuffers(instanceStride);
objectStride = instanceType.getLayout()
.getStride() + IndirectBuffers.INT_SIZE;
buffers = new IndirectBuffers(objectStride);
buffers.createBuffers();
buffers.createObjectStorage(128);
buffers.createDrawStorage(2);
@ -108,20 +109,17 @@ public class IndirectCullingGroup<I extends Instance> {
private void uploadInstances() {
long objectPtr = buffers.objectPtr;
long batchIDPtr = buffers.batchPtr;
for (int i = 0, batchesSize = drawSet.indirectDraws.size(); i < batchesSize; i++) {
var batch = drawSet.indirectDraws.get(i);
var instanceCount = batch.instancer()
.getInstanceCount();
batch.writeObjects(objectPtr, batchIDPtr, i);
batch.writeObjects(objectPtr, i);
objectPtr += instanceCount * instanceStride;
batchIDPtr += instanceCount * IndirectBuffers.INT_SIZE;
objectPtr += instanceCount * objectStride;
}
buffers.flushObjects(objectPtr - buffers.objectPtr);
buffers.flushBatchIDs(batchIDPtr - buffers.batchPtr);
}
private void uploadIndirectCommands() {

View File

@ -55,11 +55,11 @@ public class IndirectDraw<I extends Instance> {
needsFullWrite = true;
}
public void writeObjects(long objectPtr, long batchIDPtr, int batchID) {
public void writeObjects(long objectPtr, int batchID) {
if (needsFullWrite) {
instancer.writeFull(objectPtr, batchIDPtr, batchID);
instancer.writeFull(objectPtr, batchID);
} else {
instancer.writeSparse(objectPtr, batchIDPtr, batchID);
instancer.writeSparse(objectPtr, batchID);
}
}

View File

@ -9,40 +9,42 @@ import com.jozufozu.flywheel.backend.engine.AbstractInstancer;
public class IndirectInstancer<I extends Instance> extends AbstractInstancer<I> {
private final long instanceStride;
private final long objectStride;
public IndirectInstancer(InstanceType<I> type) {
super(type);
this.instanceStride = type.getLayout()
.getStride();
this.objectStride = instanceStride + IndirectBuffers.INT_SIZE;
}
public void update() {
removeDeletedInstances();
}
public void writeSparse(long objectPtr, long batchIDPtr, int batchID) {
public void writeSparse(long objectPtr, int batchID) {
int count = instances.size();
InstanceWriter<I> writer = type.getWriter();
for (int i = changed.nextSetBit(0); i >= 0 && i < count; i = changed.nextSetBit(i + 1)) {
// write object
writer.write(objectPtr + instanceStride * i, instances.get(i));
long ptr = objectPtr + objectStride * i;
// write batchID
MemoryUtil.memPutInt(batchIDPtr + IndirectBuffers.INT_SIZE * i, batchID);
MemoryUtil.memPutInt(ptr, batchID);
// write object
writer.write(ptr + IndirectBuffers.INT_SIZE, instances.get(i));
}
changed.clear();
}
public void writeFull(long objectPtr, long batchIDPtr, int batchID) {
public void writeFull(long objectPtr, int batchID) {
InstanceWriter<I> writer = type.getWriter();
for (I object : instances) {
// write batchID
MemoryUtil.memPutInt(objectPtr, batchID);
objectPtr += IndirectBuffers.INT_SIZE;
// write object
writer.write(objectPtr, object);
objectPtr += instanceStride;
// write batchID
MemoryUtil.memPutInt(batchIDPtr, batchID);
batchIDPtr += IndirectBuffers.INT_SIZE;
}
changed.clear();
}

View File

@ -5,7 +5,9 @@ import java.nio.ByteBuffer;
import org.lwjgl.PointerBuffer;
import org.lwjgl.opengl.GL;
import org.lwjgl.opengl.GL20C;
import org.lwjgl.opengl.GL31C;
import org.lwjgl.opengl.GLCapabilities;
import org.lwjgl.opengl.KHRShaderSubgroup;
import org.lwjgl.system.MemoryStack;
import net.minecraft.Util;
@ -19,14 +21,16 @@ import net.minecraft.Util;
public class GlCompat {
public static final boolean ALLOW_DSA = true;
public static final GLCapabilities CAPABILITIES = GL.createCapabilities();
private static final boolean amd = _decideIfWeAreAMDWindows();
private static final boolean amd = _decideIfWeAreAMD();
private static final boolean windows = _decideIfWeAreWindows();
private static final boolean supportsIndirect = _decideIfWeSupportIndirect();
public static final int SUBGROUP_SIZE = _subgroupSize();
private GlCompat() {
}
public static boolean onAMDWindows() {
return amd;
return amd && windows;
}
public static boolean supportsInstancing() {
@ -41,6 +45,14 @@ public class GlCompat {
return CAPABILITIES.OpenGL46 || (CAPABILITIES.GL_ARB_compute_shader && CAPABILITIES.GL_ARB_shader_draw_parameters && CAPABILITIES.GL_ARB_base_instance && CAPABILITIES.GL_ARB_multi_draw_indirect && CAPABILITIES.GL_ARB_direct_state_access);
}
private static int _subgroupSize() {
if (CAPABILITIES.GL_KHR_shader_subgroup) {
return GL31C.glGetInteger(KHRShaderSubgroup.GL_SUBGROUP_SIZE_KHR);
}
// try to guess
return amd ? 64 : 32;
}
/**
* Modified from:
* <br> <a href="https://github.com/grondag/canvas/commit/820bf754092ccaf8d0c169620c2ff575722d7d96">canvas</a>
@ -62,11 +74,11 @@ public class GlCompat {
}
}
private static boolean _decideIfWeAreAMDWindows() {
if (Util.getPlatform() != Util.OS.WINDOWS) {
return false;
}
private static boolean _decideIfWeAreWindows() {
return Util.getPlatform() == Util.OS.WINDOWS;
}
private static boolean _decideIfWeAreAMD() {
String vendor = GL20C.glGetString(GL20C.GL_VENDOR);
if (vendor == null) {

View File

@ -1,22 +1,22 @@
#define FLW_SUBGROUP_SIZE 32
layout(local_size_x = FLW_SUBGROUP_SIZE) in;
#include "flywheel:internal/indirect_draw_command.glsl"
struct Object {
uint batchID;
FlwPackedInstance instance;
};
// populated by instancers
layout(std430, binding = 0) restrict readonly buffer ObjectBuffer {
FlwPackedInstance objects[];
Object objects[];
};
layout(std430, binding = 1) restrict writeonly buffer TargetBuffer {
uint objectIDs[];
};
layout(std430, binding = 2) restrict readonly buffer BatchBuffer {
uint batchIDs[];
};
layout(std430, binding = 3) restrict buffer DrawCommands {
layout(std430, binding = 2) restrict buffer DrawCommands {
MeshDrawCommand drawCommands[];
};
@ -38,7 +38,7 @@ bool isVisible() {
float radius;
unpackBoundingSphere(sphere, center, radius);
FlwInstance object = _flw_unpackInstance(objects[flw_objectID]);
FlwInstance object = _flw_unpackInstance(objects[flw_objectID].instance);
flw_transformBoundingSphere(object, center, radius);
return testSphere(center, radius);
@ -51,7 +51,7 @@ void main() {
return;
}
flw_batchID = batchIDs[flw_objectID];
flw_batchID = objects[flw_objectID].batchID;
if (isVisible()) {
uint batchIndex = atomicAdd(drawCommands[flw_batchID].instanceCount, 1);

View File

@ -1,26 +1,28 @@
#include "flywheel:api/vertex.glsl"
#include "flywheel:internal/indirect_draw_command.glsl"
struct Object {
uint batchID;
FlwPackedInstance instance;
};
layout(std430, binding = 0) restrict readonly buffer ObjectBuffer {
FlwPackedInstance objects[];
Object objects[];
};
layout(std430, binding = 1) restrict readonly buffer TargetBuffer {
uint objectIDs[];
};
layout(std430, binding = 2) restrict readonly buffer BatchBuffer {
uint batchIDs[];
};
layout(std430, binding = 3) restrict readonly buffer DrawCommands {
layout(std430, binding = 2) restrict readonly buffer DrawCommands {
MeshDrawCommand drawCommands[];
};
void main() {
uint instanceIndex = objectIDs[gl_BaseInstance + gl_InstanceID];
uint batchID = batchIDs[instanceIndex];
FlwInstance i = _flw_unpackInstance(objects[instanceIndex]);
uint batchID = objects[instanceIndex].batchID;
FlwInstance i = _flw_unpackInstance(objects[instanceIndex].instance);
_flw_materialVertexID = drawCommands[batchID].vertexMaterialID;
_flw_materialFragmentID = drawCommands[batchID].fragmentMaterialID;