More indirecter

- Indirect now supports multiple meshes per model.
- Cull entire models at once and then apply the accumulated instance
  count to each draw command in a separate compute pass.
- Add utility to calculate the bounding sphere for multiple meshes.
- Inline IndirectDrawSet into IndirectCullingGroup.
- Use MemoryBlocks for draw command and model descriptor storage.
- Fix leaked draw commands.
- Add IndirectModel to track bounding sphere and instancer stuffs.
- IndirectDrawCommand now references IndirectModel instead of Instancer.
This commit is contained in:
Jozufozu 2023-12-04 00:25:20 -08:00
parent 4f8e6af3d0
commit e405c41ade
19 changed files with 627 additions and 290 deletions

View file

@ -17,31 +17,37 @@ import com.jozufozu.flywheel.gl.shader.ShaderType;
import com.jozufozu.flywheel.glsl.GLSLVersion; import com.jozufozu.flywheel.glsl.GLSLVersion;
import com.jozufozu.flywheel.glsl.ShaderSources; import com.jozufozu.flywheel.glsl.ShaderSources;
import com.jozufozu.flywheel.glsl.SourceComponent; import com.jozufozu.flywheel.glsl.SourceComponent;
import com.jozufozu.flywheel.lib.util.Unit;
import net.minecraft.resources.ResourceLocation; import net.minecraft.resources.ResourceLocation;
public class IndirectPrograms { public class IndirectPrograms {
public static IndirectPrograms instance; public static IndirectPrograms instance;
private static final Compile<InstanceType<?>> CULL = new Compile<>(); private static final Compile<InstanceType<?>> CULL = new Compile<>();
private static final Compile<Unit> APPLY = new Compile<>();
private final Map<PipelineProgramKey, GlProgram> pipeline; private final Map<PipelineProgramKey, GlProgram> pipeline;
private final Map<InstanceType<?>, GlProgram> culling; private final Map<InstanceType<?>, GlProgram> culling;
private final GlProgram apply;
public IndirectPrograms(Map<PipelineProgramKey, GlProgram> pipeline, Map<InstanceType<?>, GlProgram> culling) { public IndirectPrograms(Map<PipelineProgramKey, GlProgram> pipeline, Map<InstanceType<?>, GlProgram> culling, GlProgram apply) {
this.pipeline = pipeline; this.pipeline = pipeline;
this.culling = culling; this.culling = culling;
this.apply = apply;
} }
static void reload(ShaderSources sources, ImmutableList<PipelineProgramKey> pipelineKeys, UniformComponent uniformComponent, List<SourceComponent> vertexComponents, List<SourceComponent> fragmentComponents) { static void reload(ShaderSources sources, ImmutableList<PipelineProgramKey> pipelineKeys, UniformComponent uniformComponent, List<SourceComponent> vertexComponents, List<SourceComponent> fragmentComponents) {
_delete(); _delete();
var pipelineCompiler = PipelineCompiler.create(sources, Pipelines.INDIRECT, pipelineKeys, uniformComponent, vertexComponents, fragmentComponents); var pipelineCompiler = PipelineCompiler.create(sources, Pipelines.INDIRECT, pipelineKeys, uniformComponent, vertexComponents, fragmentComponents);
var cullingCompiler = createCullingCompiler(uniformComponent, sources); var cullingCompiler = createCullingCompiler(uniformComponent, sources);
var stage2Compiler = createStage2Compiler(sources);
try { try {
var pipelineResult = pipelineCompiler.compileAndReportErrors(); var pipelineResult = pipelineCompiler.compileAndReportErrors();
var cullingResult = cullingCompiler.compileAndReportErrors(); var cullingResult = cullingCompiler.compileAndReportErrors();
var stage2Result = stage2Compiler.compileAndReportErrors();
if (pipelineResult != null && cullingResult != null) { if (pipelineResult != null && cullingResult != null && stage2Result != null) {
instance = new IndirectPrograms(pipelineResult, cullingResult); instance = new IndirectPrograms(pipelineResult, cullingResult, stage2Result.get(Unit.INSTANCE));
} }
} catch (Throwable e) { } catch (Throwable e) {
Flywheel.LOGGER.error("Failed to compile indirect programs", e); Flywheel.LOGGER.error("Failed to compile indirect programs", e);
@ -88,6 +94,16 @@ public class IndirectPrograms {
.build(); .build();
} }
private static CompilationHarness<Unit> createStage2Compiler(ShaderSources sources) {
return APPLY.harness(sources)
.keys(ImmutableList.of(Unit.INSTANCE))
.compiler(APPLY.program()
.link(APPLY.shader(GLSLVersion.V460, ShaderType.COMPUTE)
.define("FLW_SUBGROUP_SIZE", GlCompat.SUBGROUP_SIZE)
.withResource(Files.INDIRECT_APPLY)))
.build();
}
public GlProgram getIndirectProgram(InstanceType<?> instanceType, Context contextShader) { public GlProgram getIndirectProgram(InstanceType<?> instanceType, Context contextShader) {
return pipeline.get(new PipelineProgramKey(instanceType, contextShader)); return pipeline.get(new PipelineProgramKey(instanceType, contextShader));
} }
@ -96,14 +112,20 @@ public class IndirectPrograms {
return culling.get(instanceType); return culling.get(instanceType);
} }
public GlProgram getApplyProgram() {
return apply;
}
public void delete() { public void delete() {
pipeline.values() pipeline.values()
.forEach(GlProgram::delete); .forEach(GlProgram::delete);
culling.values() culling.values()
.forEach(GlProgram::delete); .forEach(GlProgram::delete);
apply.delete();
} }
private static final class Files { private static final class Files {
public static final ResourceLocation INDIRECT_CULL = Flywheel.rl("internal/indirect/cull.glsl"); public static final ResourceLocation INDIRECT_CULL = Flywheel.rl("internal/indirect/cull.glsl");
public static final ResourceLocation INDIRECT_APPLY = Flywheel.rl("internal/indirect/apply.glsl");
} }
} }

View file

@ -24,31 +24,41 @@ import com.jozufozu.flywheel.lib.memory.FlwMemoryTracker;
import com.jozufozu.flywheel.lib.memory.MemoryBlock; import com.jozufozu.flywheel.lib.memory.MemoryBlock;
public class IndirectBuffers { public class IndirectBuffers {
public static final int BUFFER_COUNT = 3; // Number of vbos created.
public static final int BUFFER_COUNT = 4;
public static final long INT_SIZE = Integer.BYTES; public static final long INT_SIZE = Integer.BYTES;
public static final long PTR_SIZE = Pointer.POINTER_SIZE; public static final long PTR_SIZE = Pointer.POINTER_SIZE;
// DRAW COMMAND // Byte size of a draw command, plus our added mesh data.
public static final long DRAW_COMMAND_STRIDE = 52; public static final long DRAW_COMMAND_STRIDE = 40;
public static final long DRAW_COMMAND_OFFSET = 0; public static final long DRAW_COMMAND_OFFSET = 0;
public static final long MODEL_STRIDE = 24;
// BITS // BITS
private static final int SUB_DATA_BITS = GL_DYNAMIC_STORAGE_BIT; private static final int SUB_DATA_BITS = GL_DYNAMIC_STORAGE_BIT;
private static final int PERSISTENT_BITS = GL_MAP_PERSISTENT_BIT | GL_MAP_WRITE_BIT; private static final int PERSISTENT_BITS = GL_MAP_PERSISTENT_BIT | GL_MAP_WRITE_BIT;
private static final int MAP_BITS = PERSISTENT_BITS | GL_MAP_FLUSH_EXPLICIT_BIT; private static final int MAP_BITS = PERSISTENT_BITS | GL_MAP_FLUSH_EXPLICIT_BIT;
private static final int GPU_ONLY_BITS = 0; private static final int GPU_ONLY_BITS = 0;
// OFFSETS // Offsets to the vbos
private static final long OBJECT_OFFSET = 0; private static final long VBO_OFFSET = 0;
private static final long OBJECT_OFFSET = VBO_OFFSET;
private static final long TARGET_OFFSET = INT_SIZE; private static final long TARGET_OFFSET = INT_SIZE;
private static final long DRAW_OFFSET = INT_SIZE * 2; private static final long MODEL_OFFSET = INT_SIZE * 2;
private static final long DRAW_OFFSET = INT_SIZE * 3;
// Offsets to the 3 segments
private static final long OFFSET_OFFSET = BUFFER_COUNT * INT_SIZE; private static final long OFFSET_OFFSET = BUFFER_COUNT * INT_SIZE;
private static final long SIZE_OFFSET = OFFSET_OFFSET + BUFFER_COUNT * PTR_SIZE; private static final long SIZE_OFFSET = OFFSET_OFFSET + BUFFER_COUNT * PTR_SIZE;
private static final long OBJECT_SIZE_OFFSET = SIZE_OFFSET;
private static final long TARGET_SIZE_OFFSET = SIZE_OFFSET + PTR_SIZE;
private static final long MODEL_SIZE_OFFSET = SIZE_OFFSET + PTR_SIZE * 2;
private static final long DRAW_SIZE_OFFSET = SIZE_OFFSET + PTR_SIZE * 3;
// Total size of the buffer.
private static final long BUFFERS_SIZE_BYTES = SIZE_OFFSET + BUFFER_COUNT * PTR_SIZE; private static final long BUFFERS_SIZE_BYTES = SIZE_OFFSET + BUFFER_COUNT * PTR_SIZE;
private static final long OBJECT_SIZE_OFFSET = SIZE_OFFSET;
private static final long TARGET_SIZE_OFFSET = OBJECT_SIZE_OFFSET + PTR_SIZE;
private static final long DRAW_SIZE_OFFSET = TARGET_SIZE_OFFSET + PTR_SIZE;
/** /**
* A small block of memory divided into 3 contiguous segments: * A small block of memory divided into 3 contiguous segments:
@ -60,21 +70,25 @@ public class IndirectBuffers {
* {@code sizes}: an array of {@link IndirectBuffers#PTR_SIZE} byte lengths of the buffers. * {@code sizes}: an array of {@link IndirectBuffers#PTR_SIZE} byte lengths of the buffers.
* <br> * <br>
* Each segment stores {@link IndirectBuffers#BUFFER_COUNT} elements, * Each segment stores {@link IndirectBuffers#BUFFER_COUNT} elements,
* one for the object buffer, one for the target buffer, and one for the draw buffer. * one for the object buffer, target buffer, model buffer, and draw buffer.
*/ */
private final MemoryBlock buffers; private final MemoryBlock buffers;
private final long objectStride; private final long objectStride;
private int object; private int object;
private int target; private int target;
private int model;
private int draw; private int draw;
long objectPtr; long objectPtr;
long drawPtr; MemoryBlock modelPtr;
MemoryBlock drawPtr;
private int maxObjectCount = 0; private int maxObjectCount = 0;
private int maxModelCount = 0;
private int maxDrawCount = 0; private int maxDrawCount = 0;
private static final float OBJECT_GROWTH_FACTOR = 2f; private static final float OBJECT_GROWTH_FACTOR = 1.25f;
private static final float MODEL_GROWTH_FACTOR = 2f;
private static final float DRAW_GROWTH_FACTOR = 2f; private static final float DRAW_GROWTH_FACTOR = 2f;
IndirectBuffers(long objectStride) { IndirectBuffers(long objectStride) {
@ -87,25 +101,26 @@ public class IndirectBuffers {
nglCreateBuffers(BUFFER_COUNT, ptr); nglCreateBuffers(BUFFER_COUNT, ptr);
object = MemoryUtil.memGetInt(ptr + OBJECT_OFFSET); object = MemoryUtil.memGetInt(ptr + OBJECT_OFFSET);
target = MemoryUtil.memGetInt(ptr + TARGET_OFFSET); target = MemoryUtil.memGetInt(ptr + TARGET_OFFSET);
model = MemoryUtil.memGetInt(ptr + MODEL_OFFSET);
draw = MemoryUtil.memGetInt(ptr + DRAW_OFFSET); draw = MemoryUtil.memGetInt(ptr + DRAW_OFFSET);
} }
void updateCounts(int objectCount, int drawCount) { void updateCounts(int objectCount, int drawCount, int modelCount) {
if (objectCount > maxObjectCount) { if (objectCount > maxObjectCount) {
createObjectStorage((int) (objectCount * OBJECT_GROWTH_FACTOR)); createObjectStorage((int) (objectCount * OBJECT_GROWTH_FACTOR));
} }
if (modelCount > maxModelCount) {
createModelStorage((int) (modelCount * MODEL_GROWTH_FACTOR));
}
if (drawCount > maxDrawCount) { if (drawCount > maxDrawCount) {
createDrawStorage((int) (drawCount * DRAW_GROWTH_FACTOR)); createDrawStorage((int) (drawCount * DRAW_GROWTH_FACTOR));
} }
final long objectSize = objectStride * objectCount;
final long targetSize = INT_SIZE * objectCount;
final long drawSize = DRAW_COMMAND_STRIDE * drawCount;
final long ptr = buffers.ptr(); final long ptr = buffers.ptr();
MemoryUtil.memPutAddress(ptr + OBJECT_SIZE_OFFSET, objectSize); MemoryUtil.memPutAddress(ptr + OBJECT_SIZE_OFFSET, objectStride * objectCount);
MemoryUtil.memPutAddress(ptr + TARGET_SIZE_OFFSET, targetSize); MemoryUtil.memPutAddress(ptr + TARGET_SIZE_OFFSET, INT_SIZE * objectCount);
MemoryUtil.memPutAddress(ptr + DRAW_SIZE_OFFSET, drawSize); MemoryUtil.memPutAddress(ptr + MODEL_SIZE_OFFSET, MODEL_STRIDE * modelCount);
MemoryUtil.memPutAddress(ptr + DRAW_SIZE_OFFSET, DRAW_COMMAND_STRIDE * drawCount);
} }
void createObjectStorage(int objectCount) { void createObjectStorage(int objectCount) {
@ -115,7 +130,7 @@ public class IndirectBuffers {
if (maxObjectCount > 0) { if (maxObjectCount > 0) {
final long ptr = buffers.ptr(); final long ptr = buffers.ptr();
nglCreateBuffers(BUFFER_COUNT - 1, ptr); nglCreateBuffers(2, ptr);
int objectNew = MemoryUtil.memGetInt(ptr + OBJECT_OFFSET); int objectNew = MemoryUtil.memGetInt(ptr + OBJECT_OFFSET);
int targetNew = MemoryUtil.memGetInt(ptr + TARGET_OFFSET); int targetNew = MemoryUtil.memGetInt(ptr + TARGET_OFFSET);
@ -142,6 +157,28 @@ public class IndirectBuffers {
FlwMemoryTracker._allocGPUMemory(maxObjectCount * objectStride); FlwMemoryTracker._allocGPUMemory(maxObjectCount * objectStride);
} }
void createModelStorage(int modelCount) {
freeModelStorage();
var modelSize = MODEL_STRIDE * modelCount;
if (maxModelCount > 0) {
int modelNew = glCreateBuffers();
glNamedBufferStorage(modelNew, modelSize, SUB_DATA_BITS);
glDeleteBuffers(model);
MemoryUtil.memPutInt(buffers.ptr() + MODEL_OFFSET, modelNew);
model = modelNew;
modelPtr = modelPtr.realloc(modelSize);
} else {
glNamedBufferStorage(model, modelSize, SUB_DATA_BITS);
modelPtr = MemoryBlock.malloc(modelSize);
}
maxModelCount = modelCount;
FlwMemoryTracker._allocGPUMemory(maxModelCount * MODEL_STRIDE);
}
void createDrawStorage(int drawCount) { void createDrawStorage(int drawCount) {
freeDrawStorage(); freeDrawStorage();
@ -155,11 +192,10 @@ public class IndirectBuffers {
MemoryUtil.memPutInt(buffers.ptr() + DRAW_OFFSET, drawNew); MemoryUtil.memPutInt(buffers.ptr() + DRAW_OFFSET, drawNew);
draw = drawNew; draw = drawNew;
drawPtr = MemoryUtil.nmemRealloc(drawPtr, drawSize); drawPtr = drawPtr.realloc(drawSize);
} else { } else {
glNamedBufferStorage(draw, drawSize, SUB_DATA_BITS); glNamedBufferStorage(draw, drawSize, SUB_DATA_BITS);
drawPtr = MemoryUtil.nmemAlloc(drawSize); drawPtr = MemoryBlock.malloc(drawSize);
} }
maxDrawCount = drawCount; maxDrawCount = drawCount;
FlwMemoryTracker._allocGPUMemory(maxDrawCount * DRAW_COMMAND_STRIDE); FlwMemoryTracker._allocGPUMemory(maxDrawCount * DRAW_COMMAND_STRIDE);
@ -169,6 +205,10 @@ public class IndirectBuffers {
FlwMemoryTracker._freeGPUMemory(maxObjectCount * objectStride); FlwMemoryTracker._freeGPUMemory(maxObjectCount * objectStride);
} }
private void freeModelStorage() {
FlwMemoryTracker._freeGPUMemory(maxModelCount * MODEL_STRIDE);
}
private void freeDrawStorage() { private void freeDrawStorage() {
FlwMemoryTracker._freeGPUMemory(maxDrawCount * DRAW_COMMAND_STRIDE); FlwMemoryTracker._freeGPUMemory(maxDrawCount * DRAW_COMMAND_STRIDE);
} }
@ -191,15 +231,25 @@ public class IndirectBuffers {
glFlushMappedNamedBufferRange(object, 0, length); glFlushMappedNamedBufferRange(object, 0, length);
} }
void flushModels(long length) {
nglNamedBufferSubData(model, 0, length, modelPtr.ptr());
}
void flushDrawCommands(long length) { void flushDrawCommands(long length) {
nglNamedBufferSubData(draw, 0, length, drawPtr); nglNamedBufferSubData(draw, 0, length, drawPtr.ptr());
// glFlushMappedNamedBufferRange(this.draw, 0, length);
} }
public void delete() { public void delete() {
nglDeleteBuffers(BUFFER_COUNT, buffers.ptr()); nglDeleteBuffers(BUFFER_COUNT, buffers.ptr());
buffers.free(); buffers.free();
if (modelPtr != null) {
modelPtr.free();
}
if (drawPtr != null) {
drawPtr.free();
}
freeObjectStorage(); freeObjectStorage();
freeModelStorage();
freeDrawStorage(); freeDrawStorage();
} }
} }

View file

@ -1,32 +1,49 @@
package com.jozufozu.flywheel.backend.engine.indirect; package com.jozufozu.flywheel.backend.engine.indirect;
import static org.lwjgl.opengl.GL11.GL_TRIANGLES;
import static org.lwjgl.opengl.GL11.GL_UNSIGNED_INT;
import static org.lwjgl.opengl.GL30.glUniform1ui;
import static org.lwjgl.opengl.GL42.GL_COMMAND_BARRIER_BIT; import static org.lwjgl.opengl.GL42.GL_COMMAND_BARRIER_BIT;
import static org.lwjgl.opengl.GL42.glMemoryBarrier; import static org.lwjgl.opengl.GL42.glMemoryBarrier;
import static org.lwjgl.opengl.GL43.GL_SHADER_STORAGE_BARRIER_BIT; import static org.lwjgl.opengl.GL43.GL_SHADER_STORAGE_BARRIER_BIT;
import static org.lwjgl.opengl.GL43.glDispatchCompute; import static org.lwjgl.opengl.GL43.glDispatchCompute;
import static org.lwjgl.opengl.GL43.glMultiDrawElementsIndirect;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.EnumMap;
import java.util.List;
import java.util.Map;
import com.jozufozu.flywheel.api.event.RenderStage; import com.jozufozu.flywheel.api.event.RenderStage;
import com.jozufozu.flywheel.api.instance.Instance; import com.jozufozu.flywheel.api.instance.Instance;
import com.jozufozu.flywheel.api.instance.InstanceType; import com.jozufozu.flywheel.api.instance.InstanceType;
import com.jozufozu.flywheel.api.material.Material; import com.jozufozu.flywheel.api.material.Material;
import com.jozufozu.flywheel.api.model.Mesh; import com.jozufozu.flywheel.api.model.Mesh;
import com.jozufozu.flywheel.api.model.Model;
import com.jozufozu.flywheel.backend.MaterialUtil;
import com.jozufozu.flywheel.backend.compile.IndirectPrograms; import com.jozufozu.flywheel.backend.compile.IndirectPrograms;
import com.jozufozu.flywheel.backend.engine.UniformBuffer; import com.jozufozu.flywheel.backend.engine.UniformBuffer;
import com.jozufozu.flywheel.gl.GlCompat;
import com.jozufozu.flywheel.gl.shader.GlProgram; import com.jozufozu.flywheel.gl.shader.GlProgram;
import com.jozufozu.flywheel.lib.context.Contexts; import com.jozufozu.flywheel.lib.context.Contexts;
import com.jozufozu.flywheel.lib.model.ModelUtil;
public class IndirectCullingGroup<I extends Instance> { public class IndirectCullingGroup<I extends Instance> {
private static final int BARRIER_BITS = GL_SHADER_STORAGE_BARRIER_BIT | GL_COMMAND_BARRIER_BIT; private static final int DRAW_BARRIER_BITS = GL_SHADER_STORAGE_BARRIER_BIT | GL_COMMAND_BARRIER_BIT;
private final GlProgram compute; private final GlProgram cull;
private final GlProgram draw; private final GlProgram draw;
private final long objectStride; private final long objectStride;
private final IndirectBuffers buffers; private final IndirectBuffers buffers;
public final IndirectMeshPool meshPool; public final IndirectMeshPool meshPool;
public final IndirectDrawSet<I> drawSet = new IndirectDrawSet<>(); private final List<IndirectModel> indirectModels = new ArrayList<>();
private boolean hasCulledThisFrame; private final List<IndirectDraw> indirectDraws = new ArrayList<>();
private boolean needsMemoryBarrier; private final Map<RenderStage, List<MultiDraw>> multiDraws = new EnumMap<>(RenderStage.class);
private boolean needsDrawBarrier;
private boolean needsSortDraws;
private int instanceCountThisFrame; private int instanceCountThisFrame;
private final GlProgram apply;
IndirectCullingGroup(InstanceType<I> instanceType) { IndirectCullingGroup(InstanceType<I> instanceType) {
objectStride = instanceType.getLayout() objectStride = instanceType.getLayout()
@ -34,59 +51,104 @@ public class IndirectCullingGroup<I extends Instance> {
buffers = new IndirectBuffers(objectStride); buffers = new IndirectBuffers(objectStride);
buffers.createBuffers(); buffers.createBuffers();
buffers.createObjectStorage(128);
buffers.createDrawStorage(2);
meshPool = new IndirectMeshPool(); meshPool = new IndirectMeshPool();
var indirectPrograms = IndirectPrograms.get(); var indirectPrograms = IndirectPrograms.get();
compute = indirectPrograms.getCullingProgram(instanceType); cull = indirectPrograms.getCullingProgram(instanceType);
apply = indirectPrograms.getApplyProgram();
draw = indirectPrograms.getIndirectProgram(instanceType, Contexts.WORLD); draw = indirectPrograms.getIndirectProgram(instanceType, Contexts.WORLD);
} }
public void add(IndirectInstancer<I> instancer, RenderStage stage, Material material, Mesh mesh) { public void add(IndirectInstancer<I> instancer, RenderStage stage, Model model) {
drawSet.add(instancer, material, stage, meshPool.alloc(mesh)); var meshes = model.getMeshes();
var boundingSphere = ModelUtil.computeBoundingSphere(meshes.values());
int modelID = indirectModels.size();
var indirectModel = new IndirectModel(instancer, modelID, boundingSphere);
indirectModels.add(indirectModel);
for (Map.Entry<Material, Mesh> materialMeshEntry : meshes.entrySet()) {
IndirectMeshPool.BufferedMesh bufferedMesh = meshPool.alloc(materialMeshEntry.getValue());
indirectDraws.add(new IndirectDraw(indirectModel, materialMeshEntry.getKey(), bufferedMesh, stage));
} }
public void beginFrame() { needsSortDraws = true;
hasCulledThisFrame = false; }
needsMemoryBarrier = true;
private void sortDraws() {
multiDraws.clear();
// sort by stage, then material
indirectDraws.sort(Comparator.comparing(IndirectDraw::stage)
.thenComparing(IndirectDraw::material, MaterialUtil.BY_STATE));
for (int start = 0, i = 0; i < indirectDraws.size(); i++) {
var draw1 = indirectDraws.get(i);
var material1 = draw1.material();
var stage1 = draw1.stage();
// if the next draw call has a different RenderStage or Material, start a new MultiDraw
if (i == indirectDraws.size() - 1 || stage1 != indirectDraws.get(i + 1)
.stage() || !material1.equals(indirectDraws.get(i + 1)
.material())) {
multiDraws.computeIfAbsent(stage1, s -> new ArrayList<>())
.add(new MultiDraw(material1, start, i + 1));
start = i + 1;
}
}
}
public void flush() {
needsDrawBarrier = true;
instanceCountThisFrame = calculateTotalInstanceCountAndPrepareBatches(); instanceCountThisFrame = calculateTotalInstanceCountAndPrepareBatches();
if (nothingToDo()) {
return;
}
buffers.updateCounts(instanceCountThisFrame, indirectDraws.size(), indirectModels.size());
if (needsSortDraws) {
sortDraws();
needsSortDraws = false;
}
meshPool.flush();
uploadInstances();
uploadModels();
uploadIndirectCommands();
}
public void dispatchCull() {
if (nothingToDo()) {
return;
}
UniformBuffer.syncAndBind(cull);
buffers.bindForCompute();
glDispatchCompute(getGroupCount(instanceCountThisFrame), 1, 1);
}
public void dispatchApply() {
if (nothingToDo()) {
return;
}
apply.bind();
buffers.bindForCompute();
glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
glDispatchCompute(getGroupCount(indirectDraws.size()), 1, 1);
}
private boolean nothingToDo() {
return indirectDraws.isEmpty() || instanceCountThisFrame == 0;
}
private boolean nothingToDo(RenderStage stage) {
return nothingToDo() || !multiDraws.containsKey(stage);
} }
public void submit(RenderStage stage) { public void submit(RenderStage stage) {
if (drawSet.isEmpty()) { if (nothingToDo(stage)) {
return;
}
if (instanceCountThisFrame == 0) {
return;
}
cull();
dispatchDraw(stage);
}
private void cull() {
if (hasCulledThisFrame) {
return;
}
buffers.updateCounts(instanceCountThisFrame, drawSet.size());
meshPool.flush();
uploadInstances();
uploadIndirectCommands();
UniformBuffer.syncAndBind(compute);
buffers.bindForCompute();
var groupCount = (instanceCountThisFrame + 31) >> 5; // ceil(instanceCount / 32)
glDispatchCompute(groupCount, 1, 1);
hasCulledThisFrame = true;
}
private void dispatchDraw(RenderStage stage) {
if (!drawSet.contains(stage)) {
return; return;
} }
@ -94,26 +156,30 @@ public class IndirectCullingGroup<I extends Instance> {
meshPool.bindForDraw(); meshPool.bindForDraw();
buffers.bindForDraw(); buffers.bindForDraw();
memoryBarrier(); drawBarrier();
drawSet.submit(stage); var flwBaseDraw = draw.getUniformLocation("_flw_baseDraw");
for (var multiDraw : multiDraws.get(stage)) {
glUniform1ui(flwBaseDraw, multiDraw.start);
multiDraw.submit();
}
MaterialUtil.reset();
} }
private void memoryBarrier() { private void drawBarrier() {
if (needsMemoryBarrier) { if (needsDrawBarrier) {
glMemoryBarrier(BARRIER_BITS); glMemoryBarrier(DRAW_BARRIER_BITS);
needsMemoryBarrier = false; needsDrawBarrier = false;
} }
} }
private void uploadInstances() { private void uploadInstances() {
long objectPtr = buffers.objectPtr; long objectPtr = buffers.objectPtr;
for (int i = 0, batchesSize = drawSet.indirectDraws.size(); i < batchesSize; i++) { for (IndirectModel batch : indirectModels) {
var batch = drawSet.indirectDraws.get(i); var instanceCount = batch.instancer.getInstanceCount();
var instanceCount = batch.instancer() batch.writeObjects(objectPtr);
.getInstanceCount();
batch.writeObjects(objectPtr, i);
objectPtr += instanceCount * objectStride; objectPtr += instanceCount * objectStride;
} }
@ -121,20 +187,29 @@ public class IndirectCullingGroup<I extends Instance> {
buffers.flushObjects(objectPtr - buffers.objectPtr); buffers.flushObjects(objectPtr - buffers.objectPtr);
} }
private void uploadModels() {
long writePtr = buffers.modelPtr.ptr();
for (var batch : indirectModels) {
batch.writeModel(writePtr);
writePtr += IndirectBuffers.MODEL_STRIDE;
}
buffers.flushModels(writePtr - buffers.modelPtr.ptr());
}
private void uploadIndirectCommands() { private void uploadIndirectCommands() {
long writePtr = buffers.drawPtr; long writePtr = buffers.drawPtr.ptr();
for (var batch : drawSet.indirectDraws) { for (var batch : indirectDraws) {
batch.writeIndirectCommand(writePtr); batch.writeIndirectCommand(writePtr);
writePtr += IndirectBuffers.DRAW_COMMAND_STRIDE; writePtr += IndirectBuffers.DRAW_COMMAND_STRIDE;
} }
buffers.flushDrawCommands(writePtr - buffers.drawPtr); buffers.flushDrawCommands(writePtr - buffers.drawPtr.ptr());
} }
private int calculateTotalInstanceCountAndPrepareBatches() { private int calculateTotalInstanceCountAndPrepareBatches() {
int baseInstance = 0; int baseInstance = 0;
for (var batch : drawSet.indirectDraws) { for (var batch : indirectModels) {
batch.prepare(baseInstance); batch.prepare(baseInstance);
baseInstance += batch.instancer().getInstanceCount(); baseInstance += batch.instancer.getInstanceCount();
} }
return baseInstance; return baseInstance;
} }
@ -145,6 +220,21 @@ public class IndirectCullingGroup<I extends Instance> {
} }
public boolean hasStage(RenderStage stage) { public boolean hasStage(RenderStage stage) {
return drawSet.contains(stage); return multiDraws.containsKey(stage);
}
private static int getGroupCount(int threadCount) {
if (GlCompat.amd) {
return (threadCount + 63) >> 6; // ceil(threadCount / 64)
} else {
return (threadCount + 31) >> 5; // ceil(threadCount / 32)
}
}
private record MultiDraw(Material material, int start, int end) {
void submit() {
MaterialUtil.setup(material);
glMultiDrawElementsIndirect(GL_TRIANGLES, GL_UNSIGNED_INT, start * IndirectBuffers.DRAW_COMMAND_STRIDE, end - start, (int) IndirectBuffers.DRAW_COMMAND_STRIDE);
}
} }
} }

View file

@ -3,13 +3,12 @@ package com.jozufozu.flywheel.backend.engine.indirect;
import org.lwjgl.system.MemoryUtil; import org.lwjgl.system.MemoryUtil;
import com.jozufozu.flywheel.api.event.RenderStage; import com.jozufozu.flywheel.api.event.RenderStage;
import com.jozufozu.flywheel.api.instance.Instance;
import com.jozufozu.flywheel.api.material.Material; import com.jozufozu.flywheel.api.material.Material;
import com.jozufozu.flywheel.backend.MaterialUtil; import com.jozufozu.flywheel.backend.MaterialUtil;
import com.jozufozu.flywheel.backend.ShaderIndices; import com.jozufozu.flywheel.backend.ShaderIndices;
public class IndirectDraw<I extends Instance> { public class IndirectDraw {
private final IndirectInstancer<I> instancer; private final IndirectModel model;
private final IndirectMeshPool.BufferedMesh mesh; private final IndirectMeshPool.BufferedMesh mesh;
private final Material material; private final Material material;
private final RenderStage stage; private final RenderStage stage;
@ -19,11 +18,8 @@ public class IndirectDraw<I extends Instance> {
private final int packedFogAndCutout; private final int packedFogAndCutout;
private final int packedMaterialProperties; private final int packedMaterialProperties;
private int baseInstance = -1; public IndirectDraw(IndirectModel model, Material material, IndirectMeshPool.BufferedMesh mesh, RenderStage stage) {
private boolean needsFullWrite = true; this.model = model;
public IndirectDraw(IndirectInstancer<I> instancer, Material material, IndirectMeshPool.BufferedMesh mesh, RenderStage stage) {
this.instancer = instancer;
this.material = material; this.material = material;
this.mesh = mesh; this.mesh = mesh;
this.stage = stage; this.stage = stage;
@ -34,10 +30,6 @@ public class IndirectDraw<I extends Instance> {
this.packedMaterialProperties = MaterialUtil.packProperties(material); this.packedMaterialProperties = MaterialUtil.packProperties(material);
} }
public IndirectInstancer<I> instancer() {
return instancer;
}
public Material material() { public Material material() {
return material; return material;
} }
@ -50,37 +42,17 @@ public class IndirectDraw<I extends Instance> {
return stage; return stage;
} }
public void prepare(int baseInstance) {
instancer.update();
if (baseInstance == this.baseInstance) {
needsFullWrite = false;
return;
}
this.baseInstance = baseInstance;
needsFullWrite = true;
}
public void writeObjects(long objectPtr, int batchID) {
if (needsFullWrite) {
instancer.writeFull(objectPtr, batchID);
} else {
instancer.writeSparse(objectPtr, batchID);
}
}
public void writeIndirectCommand(long ptr) { public void writeIndirectCommand(long ptr) {
var boundingSphere = mesh.boundingSphere();
MemoryUtil.memPutInt(ptr, mesh.indexCount()); // count MemoryUtil.memPutInt(ptr, mesh.indexCount()); // count
MemoryUtil.memPutInt(ptr + 4, 0); // instanceCount - to be incremented by the compute shader MemoryUtil.memPutInt(ptr + 4, 0); // instanceCount
MemoryUtil.memPutInt(ptr + 8, mesh.firstIndex); // firstIndex MemoryUtil.memPutInt(ptr + 8, mesh.firstIndex); // firstIndex
MemoryUtil.memPutInt(ptr + 12, mesh.baseVertex); // baseVertex MemoryUtil.memPutInt(ptr + 12, mesh.baseVertex); // baseVertex
MemoryUtil.memPutInt(ptr + 16, baseInstance); // baseInstance MemoryUtil.memPutInt(ptr + 16, model.baseInstance); // baseInstance
boundingSphere.getToAddress(ptr + 20); // boundingSphere MemoryUtil.memPutInt(ptr + 20, model.id); // modelID
MemoryUtil.memPutInt(ptr + 36, vertexMaterialID); // vertexMaterialID MemoryUtil.memPutInt(ptr + 24, vertexMaterialID); // vertexMaterialID
MemoryUtil.memPutInt(ptr + 40, fragmentMaterialID); // fragmentMaterialID MemoryUtil.memPutInt(ptr + 28, fragmentMaterialID); // fragmentMaterialID
MemoryUtil.memPutInt(ptr + 44, packedFogAndCutout); // packedFogAndCutout MemoryUtil.memPutInt(ptr + 32, packedFogAndCutout); // packedFogAndCutout
MemoryUtil.memPutInt(ptr + 48, packedMaterialProperties); // packedMaterialProperties MemoryUtil.memPutInt(ptr + 36, packedMaterialProperties); // packedMaterialProperties
} }
} }

View file

@ -20,17 +20,9 @@ public class IndirectDrawManager extends InstancerStorage<IndirectInstancer<?>>
@Override @Override
protected <I extends Instance> void add(InstancerKey<I> key, IndirectInstancer<?> instancer, Model model, RenderStage stage) { protected <I extends Instance> void add(InstancerKey<I> key, IndirectInstancer<?> instancer, Model model, RenderStage stage) {
var meshes = model.getMeshes();
for (var entry : meshes.entrySet()) {
var material = entry.getKey();
var mesh = entry.getValue();
var indirectList = (IndirectCullingGroup<I>) renderLists.computeIfAbsent(key.type(), IndirectCullingGroup::new); var indirectList = (IndirectCullingGroup<I>) renderLists.computeIfAbsent(key.type(), IndirectCullingGroup::new);
indirectList.add((IndirectInstancer<I>) instancer, stage, material, mesh); indirectList.add((IndirectInstancer<I>) instancer, stage, model);
break; // TODO: support multiple meshes per model
}
} }
public boolean hasStage(RenderStage stage) { public boolean hasStage(RenderStage stage) {
@ -46,8 +38,16 @@ public class IndirectDrawManager extends InstancerStorage<IndirectInstancer<?>>
public void flush() { public void flush() {
super.flush(); super.flush();
for (IndirectCullingGroup<?> value : renderLists.values()) { for (var group : renderLists.values()) {
value.beginFrame(); group.flush();
}
for (var group : renderLists.values()) {
group.dispatchCull();
}
for (var group : renderLists.values()) {
group.dispatchApply();
} }
} }

View file

@ -1,80 +0,0 @@
package com.jozufozu.flywheel.backend.engine.indirect;
import static org.lwjgl.opengl.GL11.GL_TRIANGLES;
import static org.lwjgl.opengl.GL11.GL_UNSIGNED_INT;
import static org.lwjgl.opengl.GL43.glMultiDrawElementsIndirect;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.EnumMap;
import java.util.List;
import java.util.Map;
import com.jozufozu.flywheel.api.event.RenderStage;
import com.jozufozu.flywheel.api.instance.Instance;
import com.jozufozu.flywheel.api.material.Material;
import com.jozufozu.flywheel.backend.MaterialUtil;
public class IndirectDrawSet<I extends Instance> {
final List<IndirectDraw<I>> indirectDraws = new ArrayList<>();
final Map<RenderStage, List<MultiDraw>> multiDraws = new EnumMap<>(RenderStage.class);
public boolean isEmpty() {
return indirectDraws.isEmpty();
}
public int size() {
return indirectDraws.size();
}
public void add(IndirectInstancer<I> instancer, Material material, RenderStage stage, IndirectMeshPool.BufferedMesh bufferedMesh) {
indirectDraws.add(new IndirectDraw<>(instancer, material, bufferedMesh, stage));
determineMultiDraws();
}
public void submit(RenderStage stage) {
if (!multiDraws.containsKey(stage)) {
return;
}
for (var multiDraw : multiDraws.get(stage)) {
multiDraw.submit();
}
MaterialUtil.reset();
}
public void determineMultiDraws() {
multiDraws.clear();
// sort by stage, then material
indirectDraws.sort(Comparator.comparing(IndirectDraw<I>::stage)
.thenComparing(IndirectDraw::material, MaterialUtil.BY_STATE));
for (int start = 0, i = 0; i < indirectDraws.size(); i++) {
var draw = indirectDraws.get(i);
var material = draw.material();
var stage = draw.stage();
// if the next draw call has a different RenderStage or Material, start a new MultiDraw
if (i == indirectDraws.size() - 1 || stage != indirectDraws.get(i + 1)
.stage() || !material.equals(indirectDraws.get(i + 1)
.material())) {
multiDraws.computeIfAbsent(stage, s -> new ArrayList<>())
.add(new MultiDraw(material, start, i + 1));
start = i + 1;
}
}
}
public boolean contains(RenderStage stage) {
return multiDraws.containsKey(stage);
}
private record MultiDraw(Material material, int start, int end) {
void submit() {
MaterialUtil.setup(material);
glMultiDrawElementsIndirect(GL_TRIANGLES, GL_UNSIGNED_INT, start * IndirectBuffers.DRAW_COMMAND_STRIDE, end - start, (int) IndirectBuffers.DRAW_COMMAND_STRIDE);
}
}
}

View file

@ -35,11 +35,11 @@ public class IndirectInstancer<I extends Instance> extends AbstractInstancer<I>
changed.clear(); changed.clear();
} }
public void writeFull(long objectPtr, int batchID) { public void writeFull(long objectPtr, int modelID) {
InstanceWriter<I> writer = type.getWriter(); InstanceWriter<I> writer = type.getWriter();
for (I object : instances) { for (I object : instances) {
// write batchID // write modelID
MemoryUtil.memPutInt(objectPtr, batchID); MemoryUtil.memPutInt(objectPtr, modelID);
objectPtr += IndirectBuffers.INT_SIZE; objectPtr += IndirectBuffers.INT_SIZE;
// write object // write object

View file

@ -0,0 +1,45 @@
package com.jozufozu.flywheel.backend.engine.indirect;
import org.joml.Vector4f;
import org.joml.Vector4fc;
import org.lwjgl.system.MemoryUtil;
public class IndirectModel {
public final IndirectInstancer<?> instancer;
public final int id;
public int baseInstance = -1;
private boolean needsFullWrite = true;
private final Vector4fc boundingSphere;
public IndirectModel(IndirectInstancer<?> instancer, int id, Vector4f boundingSphere) {
this.instancer = instancer;
this.id = id;
this.boundingSphere = boundingSphere;
}
public void writeModel(long ptr) {
MemoryUtil.memPutInt(ptr, 0); // instanceCount - to be incremented by the compute shader
MemoryUtil.memPutInt(ptr + 4, baseInstance); // baseInstance
boundingSphere.getToAddress(ptr + 8); // boundingSphere
}
public void prepare(int baseInstance) {
instancer.update();
if (baseInstance == this.baseInstance) {
needsFullWrite = false;
return;
}
this.baseInstance = baseInstance;
needsFullWrite = true;
}
public void writeObjects(long objectPtr) {
if (needsFullWrite) {
instancer.writeFull(objectPtr, id);
} else {
instancer.writeSparse(objectPtr, id);
}
}
}

View file

@ -21,9 +21,9 @@ import net.minecraft.Util;
public class GlCompat { public class GlCompat {
public static final boolean ALLOW_DSA = true; public static final boolean ALLOW_DSA = true;
public static final GLCapabilities CAPABILITIES = GL.createCapabilities(); public static final GLCapabilities CAPABILITIES = GL.createCapabilities();
private static final boolean amd = _decideIfWeAreAMD(); public static final boolean amd = _decideIfWeAreAMD();
private static final boolean windows = _decideIfWeAreWindows(); public static final boolean windows = _decideIfWeAreWindows();
private static final boolean supportsIndirect = _decideIfWeSupportIndirect(); public static final boolean supportsIndirect = _decideIfWeSupportIndirect();
public static final int SUBGROUP_SIZE = _subgroupSize(); public static final int SUBGROUP_SIZE = _subgroupSize();
private GlCompat() { private GlCompat() {

View file

@ -2,6 +2,7 @@ package com.jozufozu.flywheel.lib.model;
import java.lang.reflect.Field; import java.lang.reflect.Field;
import java.nio.ByteBuffer; import java.nio.ByteBuffer;
import java.util.Collection;
import org.jetbrains.annotations.Nullable; import org.jetbrains.annotations.Nullable;
import org.joml.Vector4f; import org.joml.Vector4f;
@ -11,12 +12,14 @@ import org.slf4j.Logger;
import com.dreizak.miniball.highdim.Miniball; import com.dreizak.miniball.highdim.Miniball;
import com.dreizak.miniball.model.PointSet; import com.dreizak.miniball.model.PointSet;
import com.jozufozu.flywheel.api.material.Material; import com.jozufozu.flywheel.api.material.Material;
import com.jozufozu.flywheel.api.model.Mesh;
import com.jozufozu.flywheel.api.vertex.ReusableVertexList; import com.jozufozu.flywheel.api.vertex.ReusableVertexList;
import com.jozufozu.flywheel.api.vertex.VertexList; import com.jozufozu.flywheel.api.vertex.VertexList;
import com.jozufozu.flywheel.api.vertex.VertexListProviderRegistry; import com.jozufozu.flywheel.api.vertex.VertexListProviderRegistry;
import com.jozufozu.flywheel.api.vertex.VertexType; import com.jozufozu.flywheel.api.vertex.VertexType;
import com.jozufozu.flywheel.lib.material.Materials; import com.jozufozu.flywheel.lib.material.Materials;
import com.jozufozu.flywheel.lib.memory.MemoryBlock; import com.jozufozu.flywheel.lib.memory.MemoryBlock;
import com.jozufozu.flywheel.lib.vertex.PositionOnlyVertexList;
import com.mojang.blaze3d.vertex.BufferBuilder; import com.mojang.blaze3d.vertex.BufferBuilder;
import com.mojang.blaze3d.vertex.BufferBuilder.DrawState; import com.mojang.blaze3d.vertex.BufferBuilder.DrawState;
import com.mojang.blaze3d.vertex.VertexFormat; import com.mojang.blaze3d.vertex.VertexFormat;
@ -98,6 +101,31 @@ public final class ModelUtil {
return null; return null;
} }
public static Vector4f computeBoundingSphere(Collection<Mesh> values) {
int totalVertices = 0;
for (Mesh value : values) {
totalVertices += value.vertexCount();
}
var block = MemoryBlock.malloc((long) totalVertices * PositionOnlyVertexList.STRIDE);
var vertexList = new PositionOnlyVertexList();
int baseVertex = 0;
for (Mesh value : values) {
vertexList.ptr(block.ptr() + (long) baseVertex * PositionOnlyVertexList.STRIDE);
value.write(vertexList);
baseVertex += value.vertexCount();
}
vertexList.ptr(block.ptr());
vertexList.vertexCount(totalVertices);
var out = computeBoundingSphere(vertexList);
block.free();
return out;
}
public static Vector4f computeBoundingSphere(VertexList vertexList) { public static Vector4f computeBoundingSphere(VertexList vertexList) {
return computeBoundingSphere(new PointSet() { return computeBoundingSphere(new PointSet() {
@Override @Override

View file

@ -0,0 +1,171 @@
package com.jozufozu.flywheel.lib.vertex;
import org.lwjgl.system.MemoryUtil;
import com.jozufozu.flywheel.api.vertex.MutableVertexList;
import net.minecraft.client.renderer.LightTexture;
import net.minecraft.client.renderer.texture.OverlayTexture;
public class PositionOnlyVertexList extends AbstractVertexList {
public static final int STRIDE = 12;
@Override
public float x(int index) {
return MemoryUtil.memGetFloat(ptr + (long) index * STRIDE);
}
@Override
public float y(int index) {
return MemoryUtil.memGetFloat(ptr + (long) index * STRIDE + 4);
}
@Override
public float z(int index) {
return MemoryUtil.memGetFloat(ptr + (long) index * STRIDE + 8);
}
@Override
public float r(int index) {
return 1;
}
@Override
public float g(int index) {
return 1;
}
@Override
public float b(int index) {
return 1;
}
@Override
public float a(int index) {
return 1;
}
@Override
public float u(int index) {
return 0;
}
@Override
public float v(int index) {
return 0;
}
@Override
public int overlay(int index) {
return OverlayTexture.NO_OVERLAY;
}
@Override
public int light(int index) {
return LightTexture.FULL_BRIGHT;
}
@Override
public float normalX(int index) {
return 0;
}
@Override
public float normalY(int index) {
return 1;
}
@Override
public float normalZ(int index) {
return 0;
}
@Override
public void x(int index, float x) {
MemoryUtil.memPutFloat(ptr + (long) index * STRIDE, x);
}
@Override
public void y(int index, float y) {
MemoryUtil.memPutFloat(ptr + (long) index * STRIDE + 4, y);
}
@Override
public void z(int index, float z) {
MemoryUtil.memPutFloat(ptr + (long) index * STRIDE + 8, z);
}
@Override
public void r(int index, float r) {
}
@Override
public void g(int index, float g) {
}
@Override
public void b(int index, float b) {
}
@Override
public void a(int index, float a) {
}
@Override
public void u(int index, float u) {
}
@Override
public void v(int index, float v) {
}
@Override
public void overlay(int index, int overlay) {
}
@Override
public void light(int index, int light) {
}
@Override
public void normalX(int index, float normalX) {
}
@Override
public void normalY(int index, float normalY) {
}
@Override
public void normalZ(int index, float normalZ) {
}
@Override
public void write(MutableVertexList dst, int srcIndex, int dstIndex) {
if (getClass() == dst.getClass()) {
long dstPtr = ((PositionOnlyVertexList) dst).ptr;
MemoryUtil.memCopy(ptr + srcIndex * STRIDE, dstPtr + dstIndex * STRIDE, STRIDE);
} else {
super.write(dst, srcIndex, dstIndex);
}
}
@Override
public void write(MutableVertexList dst, int srcStartIndex, int dstStartIndex, int vertexCount) {
if (getClass() == dst.getClass()) {
long dstPtr = ((PositionOnlyVertexList) dst).ptr;
MemoryUtil.memCopy(ptr + srcStartIndex * STRIDE, dstPtr + dstStartIndex * STRIDE, vertexCount * STRIDE);
} else {
super.write(dst, srcStartIndex, dstStartIndex, vertexCount);
}
}
@Override
public void writeAll(MutableVertexList dst) {
if (getClass() == dst.getClass()) {
long dstPtr = ((PositionOnlyVertexList) dst).ptr;
MemoryUtil.memCopy(ptr, dstPtr, vertexCount * STRIDE);
} else {
super.writeAll(dst);
}
}
}

View file

@ -0,0 +1,28 @@
#include "flywheel:internal/indirect/buffers.glsl"
#include "flywheel:internal/indirect/model_descriptor.glsl"
#include "flywheel:internal/indirect/draw_command.glsl"
layout(local_size_x = FLW_SUBGROUP_SIZE) in;
layout(std430, binding = MODEL_BINDING) restrict readonly buffer ModelDescriptors {
ModelDescriptor models[];
};
layout(std430, binding = DRAW_BINDING) restrict buffer MeshDrawCommands {
MeshDrawCommand drawCommands[];
};
// Apply the results of culling to the draw commands.
void main() {
uint drawID = gl_GlobalInvocationID.x;
if (drawID >= drawCommands.length()) {
return;
}
uint modelID = drawCommands[drawID].modelID;
uint instanceCount = models[modelID].instanceCount;
drawCommands[drawID].instanceCount = instanceCount;
}

View file

@ -0,0 +1,4 @@
#define OBJECT_BINDING 0
#define TARGET_BINDING 1
#define MODEL_BINDING 2
#define DRAW_BINDING 3

View file

@ -1,6 +1,8 @@
layout(local_size_x = FLW_SUBGROUP_SIZE) in; #include "flywheel:internal/indirect/buffers.glsl"
#include "flywheel:internal/indirect/model_descriptor.glsl"
#include "flywheel:internal/indirect/object.glsl"
#include "flywheel:internal/indirect/mesh.glsl" layout(local_size_x = FLW_SUBGROUP_SIZE) in;
// need to add stubs so the instance shader compiles. // need to add stubs so the instance shader compiles.
vec4 flw_vertexPos; vec4 flw_vertexPos;
@ -17,28 +19,24 @@ vec4 flw_var3;
void flw_transformBoundingSphere(in FlwInstance i, inout vec3 center, inout float radius); void flw_transformBoundingSphere(in FlwInstance i, inout vec3 center, inout float radius);
struct Object { layout(std430, binding = OBJECT_BINDING) restrict readonly buffer ObjectBuffer {
uint batchID;
FlwPackedInstance instance;
};
// populated by instancers
layout(std430, binding = 0) restrict readonly buffer ObjectBuffer {
Object objects[]; Object objects[];
}; };
layout(std430, binding = 1) restrict writeonly buffer TargetBuffer { layout(std430, binding = TARGET_BINDING) restrict writeonly buffer TargetBuffer {
uint objectIDs[]; uint objectIDs[];
}; };
layout(std430, binding = 2) restrict buffer DrawCommands { layout(std430, binding = MODEL_BINDING) restrict buffer ModelDescriptors {
MeshDrawCommand drawCommands[]; ModelDescriptor models[];
}; };
uint flw_objectID; // Disgustingly vectorized sphere frustum intersection taking advantage of ahead of time packing.
uint flw_batchID; // Only uses 6 fmas and some boolean ops.
// See also:
// 83 - 27 = 56 spirv instruction results // flywheel:uniform/flywheel.glsl
// com.jozufozu.flywheel.lib.math.MatrixMath.writePackedFrustumPlanes
// org.joml.FrustumIntersection.testSphere
bool testSphere(vec3 center, float radius) { bool testSphere(vec3 center, float radius) {
bvec4 xyInside = greaterThanEqual(fma(flywheel.planes.xyX, center.xxxx, fma(flywheel.planes.xyY, center.yyyy, fma(flywheel.planes.xyZ, center.zzzz, flywheel.planes.xyW))), -radius.xxxx); bvec4 xyInside = greaterThanEqual(fma(flywheel.planes.xyX, center.xxxx, fma(flywheel.planes.xyY, center.yyyy, fma(flywheel.planes.xyZ, center.zzzz, flywheel.planes.xyW))), -radius.xxxx);
bvec2 zInside = greaterThanEqual(fma(flywheel.planes.zX, center.xx, fma(flywheel.planes.zY, center.yy, fma(flywheel.planes.zZ, center.zz, flywheel.planes.zW))), -radius.xx); bvec2 zInside = greaterThanEqual(fma(flywheel.planes.zX, center.xx, fma(flywheel.planes.zY, center.yy, fma(flywheel.planes.zZ, center.zz, flywheel.planes.zW))), -radius.xx);
@ -46,32 +44,33 @@ bool testSphere(vec3 center, float radius) {
return all(xyInside) && all(zInside); return all(xyInside) && all(zInside);
} }
bool isVisible() { bool isVisible(uint objectID, uint modelID) {
BoundingSphere sphere = drawCommands[flw_batchID].boundingSphere; BoundingSphere sphere = models[modelID].boundingSphere;
vec3 center; vec3 center;
float radius; float radius;
unpackBoundingSphere(sphere, center, radius); unpackBoundingSphere(sphere, center, radius);
FlwInstance object = _flw_unpackInstance(objects[flw_objectID].instance); FlwInstance instance = _flw_unpackInstance(objects[objectID].instance);
flw_transformBoundingSphere(object, center, radius);
flw_transformBoundingSphere(instance, center, radius);
return testSphere(center, radius); return testSphere(center, radius);
} }
void main() { void main() {
flw_objectID = gl_GlobalInvocationID.x; uint objectID = gl_GlobalInvocationID.x;
if (flw_objectID >= objects.length()) { if (objectID >= objects.length()) {
return; return;
} }
flw_batchID = objects[flw_objectID].batchID; uint modelID = objects[objectID].modelID;
if (isVisible()) { if (isVisible(objectID, modelID)) {
uint batchIndex = atomicAdd(drawCommands[flw_batchID].instanceCount, 1); uint batchIndex = atomicAdd(models[modelID].instanceCount, 1);
uint globalIndex = drawCommands[flw_batchID].baseInstance + batchIndex; uint globalIndex = models[modelID].baseInstance + batchIndex;
objectIDs[globalIndex] = flw_objectID; objectIDs[globalIndex] = objectID;
} }
} }

View file

@ -1,31 +1,30 @@
#include "flywheel:internal/indirect/api/vertex.glsl" #include "flywheel:internal/indirect/api/vertex.glsl"
#include "flywheel:internal/indirect/mesh.glsl" #include "flywheel:internal/indirect/buffers.glsl"
#include "flywheel:internal/indirect/draw_command.glsl"
#include "flywheel:internal/indirect/object.glsl"
#include "flywheel:internal/material.glsl" #include "flywheel:internal/material.glsl"
#include "flywheel:internal/block.vert" #include "flywheel:internal/block.vert"
#include "flywheel:util/diffuse.glsl" #include "flywheel:util/diffuse.glsl"
flat out uvec3 _flw_material; flat out uvec3 _flw_material;
struct Object { layout(std430, binding = OBJECT_BINDING) restrict readonly buffer ObjectBuffer {
uint batchID;
FlwPackedInstance instance;
};
layout(std430, binding = 0) restrict readonly buffer ObjectBuffer {
Object objects[]; Object objects[];
}; };
layout(std430, binding = 1) restrict readonly buffer TargetBuffer { layout(std430, binding = TARGET_BINDING) restrict readonly buffer TargetBuffer {
uint objectIDs[]; uint objectIDs[];
}; };
layout(std430, binding = 2) restrict readonly buffer DrawCommands { layout(std430, binding = DRAW_BINDING) restrict readonly buffer DrawCommands {
MeshDrawCommand drawCommands[]; MeshDrawCommand drawCommands[];
}; };
uniform uint _flw_baseDraw;
void main() { void main() {
uint instanceIndex = objectIDs[gl_BaseInstance + gl_InstanceID]; uint instanceIndex = objectIDs[gl_BaseInstance + gl_InstanceID];
uint batchID = objects[instanceIndex].batchID; uint batchID = gl_DrawID + _flw_baseDraw;
FlwInstance i = _flw_unpackInstance(objects[instanceIndex].instance); FlwInstance i = _flw_unpackInstance(objects[instanceIndex].instance);
_flw_materialVertexID = drawCommands[batchID].vertexMaterialID; _flw_materialVertexID = drawCommands[batchID].vertexMaterialID;

View file

@ -0,0 +1,13 @@
struct MeshDrawCommand {
uint indexCount;
uint instanceCount;
uint firstIndex;
uint vertexOffset;
uint baseInstance;
uint modelID;
uint vertexMaterialID;
uint fragmentMaterialID;
uint packedFogAndCutout;
uint packedMaterialProperties;
};

View file

@ -1,25 +0,0 @@
struct BoundingSphere {
float x;
float y;
float z;
float radius;
};
void unpackBoundingSphere(in BoundingSphere sphere, out vec3 center, out float radius) {
center = vec3(sphere.x, sphere.y, sphere.z);
radius = sphere.radius;
}
struct MeshDrawCommand {
uint indexCount;
uint instanceCount;
uint firstIndex;
uint vertexOffset;
uint baseInstance;
BoundingSphere boundingSphere;
uint vertexMaterialID;
uint fragmentMaterialID;
uint packedFogAndCutout;
uint packedMaterialProperties;
};

View file

@ -0,0 +1,17 @@
struct BoundingSphere {
float x;
float y;
float z;
float radius;
};
void unpackBoundingSphere(in BoundingSphere sphere, out vec3 center, out float radius) {
center = vec3(sphere.x, sphere.y, sphere.z);
radius = sphere.radius;
}
struct ModelDescriptor {
uint instanceCount;
uint baseInstance;
BoundingSphere boundingSphere;
};

View file

@ -0,0 +1,4 @@
struct Object {
uint modelID;
FlwPackedInstance instance;
};