More indirecter

- Indirect now supports multiple meshes per model.
- Cull entire models at once and then apply the accumulated instance
  count to each draw command in a separate compute pass.
- Add utility to calculate the bounding sphere for multiple meshes.
- Inline IndirectDrawSet into IndirectCullingGroup.
- Use MemoryBlocks for draw command and model descriptor storage.
- Fix leaked draw commands.
- Add IndirectModel to track bounding sphere and instancer stuffs.
- IndirectDrawCommand now references IndirectModel instead of Instancer.
This commit is contained in:
Jozufozu 2023-12-04 00:25:20 -08:00
parent 4f8e6af3d0
commit e405c41ade
19 changed files with 627 additions and 290 deletions

View file

@ -17,31 +17,37 @@ import com.jozufozu.flywheel.gl.shader.ShaderType;
import com.jozufozu.flywheel.glsl.GLSLVersion;
import com.jozufozu.flywheel.glsl.ShaderSources;
import com.jozufozu.flywheel.glsl.SourceComponent;
import com.jozufozu.flywheel.lib.util.Unit;
import net.minecraft.resources.ResourceLocation;
public class IndirectPrograms {
public static IndirectPrograms instance;
private static final Compile<InstanceType<?>> CULL = new Compile<>();
private static final Compile<Unit> APPLY = new Compile<>();
private final Map<PipelineProgramKey, GlProgram> pipeline;
private final Map<InstanceType<?>, GlProgram> culling;
private final GlProgram apply;
public IndirectPrograms(Map<PipelineProgramKey, GlProgram> pipeline, Map<InstanceType<?>, GlProgram> culling) {
public IndirectPrograms(Map<PipelineProgramKey, GlProgram> pipeline, Map<InstanceType<?>, GlProgram> culling, GlProgram apply) {
this.pipeline = pipeline;
this.culling = culling;
this.apply = apply;
}
static void reload(ShaderSources sources, ImmutableList<PipelineProgramKey> pipelineKeys, UniformComponent uniformComponent, List<SourceComponent> vertexComponents, List<SourceComponent> fragmentComponents) {
_delete();
var pipelineCompiler = PipelineCompiler.create(sources, Pipelines.INDIRECT, pipelineKeys, uniformComponent, vertexComponents, fragmentComponents);
var cullingCompiler = createCullingCompiler(uniformComponent, sources);
var stage2Compiler = createStage2Compiler(sources);
try {
var pipelineResult = pipelineCompiler.compileAndReportErrors();
var cullingResult = cullingCompiler.compileAndReportErrors();
var stage2Result = stage2Compiler.compileAndReportErrors();
if (pipelineResult != null && cullingResult != null) {
instance = new IndirectPrograms(pipelineResult, cullingResult);
if (pipelineResult != null && cullingResult != null && stage2Result != null) {
instance = new IndirectPrograms(pipelineResult, cullingResult, stage2Result.get(Unit.INSTANCE));
}
} catch (Throwable e) {
Flywheel.LOGGER.error("Failed to compile indirect programs", e);
@ -88,6 +94,16 @@ public class IndirectPrograms {
.build();
}
private static CompilationHarness<Unit> createStage2Compiler(ShaderSources sources) {
return APPLY.harness(sources)
.keys(ImmutableList.of(Unit.INSTANCE))
.compiler(APPLY.program()
.link(APPLY.shader(GLSLVersion.V460, ShaderType.COMPUTE)
.define("FLW_SUBGROUP_SIZE", GlCompat.SUBGROUP_SIZE)
.withResource(Files.INDIRECT_APPLY)))
.build();
}
public GlProgram getIndirectProgram(InstanceType<?> instanceType, Context contextShader) {
return pipeline.get(new PipelineProgramKey(instanceType, contextShader));
}
@ -96,14 +112,20 @@ public class IndirectPrograms {
return culling.get(instanceType);
}
public GlProgram getApplyProgram() {
return apply;
}
public void delete() {
pipeline.values()
.forEach(GlProgram::delete);
culling.values()
.forEach(GlProgram::delete);
apply.delete();
}
private static final class Files {
public static final ResourceLocation INDIRECT_CULL = Flywheel.rl("internal/indirect/cull.glsl");
public static final ResourceLocation INDIRECT_APPLY = Flywheel.rl("internal/indirect/apply.glsl");
}
}

View file

@ -24,31 +24,41 @@ import com.jozufozu.flywheel.lib.memory.FlwMemoryTracker;
import com.jozufozu.flywheel.lib.memory.MemoryBlock;
public class IndirectBuffers {
public static final int BUFFER_COUNT = 3;
// Number of vbos created.
public static final int BUFFER_COUNT = 4;
public static final long INT_SIZE = Integer.BYTES;
public static final long PTR_SIZE = Pointer.POINTER_SIZE;
// DRAW COMMAND
public static final long DRAW_COMMAND_STRIDE = 52;
// Byte size of a draw command, plus our added mesh data.
public static final long DRAW_COMMAND_STRIDE = 40;
public static final long DRAW_COMMAND_OFFSET = 0;
public static final long MODEL_STRIDE = 24;
// BITS
private static final int SUB_DATA_BITS = GL_DYNAMIC_STORAGE_BIT;
private static final int PERSISTENT_BITS = GL_MAP_PERSISTENT_BIT | GL_MAP_WRITE_BIT;
private static final int MAP_BITS = PERSISTENT_BITS | GL_MAP_FLUSH_EXPLICIT_BIT;
private static final int GPU_ONLY_BITS = 0;
// OFFSETS
private static final long OBJECT_OFFSET = 0;
// Offsets to the vbos
private static final long VBO_OFFSET = 0;
private static final long OBJECT_OFFSET = VBO_OFFSET;
private static final long TARGET_OFFSET = INT_SIZE;
private static final long DRAW_OFFSET = INT_SIZE * 2;
private static final long MODEL_OFFSET = INT_SIZE * 2;
private static final long DRAW_OFFSET = INT_SIZE * 3;
// Offsets to the 3 segments
private static final long OFFSET_OFFSET = BUFFER_COUNT * INT_SIZE;
private static final long SIZE_OFFSET = OFFSET_OFFSET + BUFFER_COUNT * PTR_SIZE;
private static final long OBJECT_SIZE_OFFSET = SIZE_OFFSET;
private static final long TARGET_SIZE_OFFSET = SIZE_OFFSET + PTR_SIZE;
private static final long MODEL_SIZE_OFFSET = SIZE_OFFSET + PTR_SIZE * 2;
private static final long DRAW_SIZE_OFFSET = SIZE_OFFSET + PTR_SIZE * 3;
// Total size of the buffer.
private static final long BUFFERS_SIZE_BYTES = SIZE_OFFSET + BUFFER_COUNT * PTR_SIZE;
private static final long OBJECT_SIZE_OFFSET = SIZE_OFFSET;
private static final long TARGET_SIZE_OFFSET = OBJECT_SIZE_OFFSET + PTR_SIZE;
private static final long DRAW_SIZE_OFFSET = TARGET_SIZE_OFFSET + PTR_SIZE;
/**
* A small block of memory divided into 3 contiguous segments:
@ -60,21 +70,25 @@ public class IndirectBuffers {
* {@code sizes}: an array of {@link IndirectBuffers#PTR_SIZE} byte lengths of the buffers.
* <br>
* Each segment stores {@link IndirectBuffers#BUFFER_COUNT} elements,
* one for the object buffer, one for the target buffer, and one for the draw buffer.
* one for the object buffer, target buffer, model buffer, and draw buffer.
*/
private final MemoryBlock buffers;
private final long objectStride;
private int object;
private int target;
private int model;
private int draw;
long objectPtr;
long drawPtr;
MemoryBlock modelPtr;
MemoryBlock drawPtr;
private int maxObjectCount = 0;
private int maxModelCount = 0;
private int maxDrawCount = 0;
private static final float OBJECT_GROWTH_FACTOR = 2f;
private static final float OBJECT_GROWTH_FACTOR = 1.25f;
private static final float MODEL_GROWTH_FACTOR = 2f;
private static final float DRAW_GROWTH_FACTOR = 2f;
IndirectBuffers(long objectStride) {
@ -87,25 +101,26 @@ public class IndirectBuffers {
nglCreateBuffers(BUFFER_COUNT, ptr);
object = MemoryUtil.memGetInt(ptr + OBJECT_OFFSET);
target = MemoryUtil.memGetInt(ptr + TARGET_OFFSET);
model = MemoryUtil.memGetInt(ptr + MODEL_OFFSET);
draw = MemoryUtil.memGetInt(ptr + DRAW_OFFSET);
}
void updateCounts(int objectCount, int drawCount) {
void updateCounts(int objectCount, int drawCount, int modelCount) {
if (objectCount > maxObjectCount) {
createObjectStorage((int) (objectCount * OBJECT_GROWTH_FACTOR));
}
if (modelCount > maxModelCount) {
createModelStorage((int) (modelCount * MODEL_GROWTH_FACTOR));
}
if (drawCount > maxDrawCount) {
createDrawStorage((int) (drawCount * DRAW_GROWTH_FACTOR));
}
final long objectSize = objectStride * objectCount;
final long targetSize = INT_SIZE * objectCount;
final long drawSize = DRAW_COMMAND_STRIDE * drawCount;
final long ptr = buffers.ptr();
MemoryUtil.memPutAddress(ptr + OBJECT_SIZE_OFFSET, objectSize);
MemoryUtil.memPutAddress(ptr + TARGET_SIZE_OFFSET, targetSize);
MemoryUtil.memPutAddress(ptr + DRAW_SIZE_OFFSET, drawSize);
MemoryUtil.memPutAddress(ptr + OBJECT_SIZE_OFFSET, objectStride * objectCount);
MemoryUtil.memPutAddress(ptr + TARGET_SIZE_OFFSET, INT_SIZE * objectCount);
MemoryUtil.memPutAddress(ptr + MODEL_SIZE_OFFSET, MODEL_STRIDE * modelCount);
MemoryUtil.memPutAddress(ptr + DRAW_SIZE_OFFSET, DRAW_COMMAND_STRIDE * drawCount);
}
void createObjectStorage(int objectCount) {
@ -115,7 +130,7 @@ public class IndirectBuffers {
if (maxObjectCount > 0) {
final long ptr = buffers.ptr();
nglCreateBuffers(BUFFER_COUNT - 1, ptr);
nglCreateBuffers(2, ptr);
int objectNew = MemoryUtil.memGetInt(ptr + OBJECT_OFFSET);
int targetNew = MemoryUtil.memGetInt(ptr + TARGET_OFFSET);
@ -142,6 +157,28 @@ public class IndirectBuffers {
FlwMemoryTracker._allocGPUMemory(maxObjectCount * objectStride);
}
void createModelStorage(int modelCount) {
freeModelStorage();
var modelSize = MODEL_STRIDE * modelCount;
if (maxModelCount > 0) {
int modelNew = glCreateBuffers();
glNamedBufferStorage(modelNew, modelSize, SUB_DATA_BITS);
glDeleteBuffers(model);
MemoryUtil.memPutInt(buffers.ptr() + MODEL_OFFSET, modelNew);
model = modelNew;
modelPtr = modelPtr.realloc(modelSize);
} else {
glNamedBufferStorage(model, modelSize, SUB_DATA_BITS);
modelPtr = MemoryBlock.malloc(modelSize);
}
maxModelCount = modelCount;
FlwMemoryTracker._allocGPUMemory(maxModelCount * MODEL_STRIDE);
}
void createDrawStorage(int drawCount) {
freeDrawStorage();
@ -155,11 +192,10 @@ public class IndirectBuffers {
MemoryUtil.memPutInt(buffers.ptr() + DRAW_OFFSET, drawNew);
draw = drawNew;
drawPtr = MemoryUtil.nmemRealloc(drawPtr, drawSize);
drawPtr = drawPtr.realloc(drawSize);
} else {
glNamedBufferStorage(draw, drawSize, SUB_DATA_BITS);
drawPtr = MemoryUtil.nmemAlloc(drawSize);
drawPtr = MemoryBlock.malloc(drawSize);
}
maxDrawCount = drawCount;
FlwMemoryTracker._allocGPUMemory(maxDrawCount * DRAW_COMMAND_STRIDE);
@ -169,6 +205,10 @@ public class IndirectBuffers {
FlwMemoryTracker._freeGPUMemory(maxObjectCount * objectStride);
}
private void freeModelStorage() {
FlwMemoryTracker._freeGPUMemory(maxModelCount * MODEL_STRIDE);
}
private void freeDrawStorage() {
FlwMemoryTracker._freeGPUMemory(maxDrawCount * DRAW_COMMAND_STRIDE);
}
@ -191,15 +231,25 @@ public class IndirectBuffers {
glFlushMappedNamedBufferRange(object, 0, length);
}
void flushModels(long length) {
nglNamedBufferSubData(model, 0, length, modelPtr.ptr());
}
void flushDrawCommands(long length) {
nglNamedBufferSubData(draw, 0, length, drawPtr);
// glFlushMappedNamedBufferRange(this.draw, 0, length);
nglNamedBufferSubData(draw, 0, length, drawPtr.ptr());
}
public void delete() {
nglDeleteBuffers(BUFFER_COUNT, buffers.ptr());
buffers.free();
if (modelPtr != null) {
modelPtr.free();
}
if (drawPtr != null) {
drawPtr.free();
}
freeObjectStorage();
freeModelStorage();
freeDrawStorage();
}
}

View file

@ -1,32 +1,49 @@
package com.jozufozu.flywheel.backend.engine.indirect;
import static org.lwjgl.opengl.GL11.GL_TRIANGLES;
import static org.lwjgl.opengl.GL11.GL_UNSIGNED_INT;
import static org.lwjgl.opengl.GL30.glUniform1ui;
import static org.lwjgl.opengl.GL42.GL_COMMAND_BARRIER_BIT;
import static org.lwjgl.opengl.GL42.glMemoryBarrier;
import static org.lwjgl.opengl.GL43.GL_SHADER_STORAGE_BARRIER_BIT;
import static org.lwjgl.opengl.GL43.glDispatchCompute;
import static org.lwjgl.opengl.GL43.glMultiDrawElementsIndirect;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.EnumMap;
import java.util.List;
import java.util.Map;
import com.jozufozu.flywheel.api.event.RenderStage;
import com.jozufozu.flywheel.api.instance.Instance;
import com.jozufozu.flywheel.api.instance.InstanceType;
import com.jozufozu.flywheel.api.material.Material;
import com.jozufozu.flywheel.api.model.Mesh;
import com.jozufozu.flywheel.api.model.Model;
import com.jozufozu.flywheel.backend.MaterialUtil;
import com.jozufozu.flywheel.backend.compile.IndirectPrograms;
import com.jozufozu.flywheel.backend.engine.UniformBuffer;
import com.jozufozu.flywheel.gl.GlCompat;
import com.jozufozu.flywheel.gl.shader.GlProgram;
import com.jozufozu.flywheel.lib.context.Contexts;
import com.jozufozu.flywheel.lib.model.ModelUtil;
public class IndirectCullingGroup<I extends Instance> {
private static final int BARRIER_BITS = GL_SHADER_STORAGE_BARRIER_BIT | GL_COMMAND_BARRIER_BIT;
private static final int DRAW_BARRIER_BITS = GL_SHADER_STORAGE_BARRIER_BIT | GL_COMMAND_BARRIER_BIT;
private final GlProgram compute;
private final GlProgram cull;
private final GlProgram draw;
private final long objectStride;
private final IndirectBuffers buffers;
public final IndirectMeshPool meshPool;
public final IndirectDrawSet<I> drawSet = new IndirectDrawSet<>();
private boolean hasCulledThisFrame;
private boolean needsMemoryBarrier;
private final List<IndirectModel> indirectModels = new ArrayList<>();
private final List<IndirectDraw> indirectDraws = new ArrayList<>();
private final Map<RenderStage, List<MultiDraw>> multiDraws = new EnumMap<>(RenderStage.class);
private boolean needsDrawBarrier;
private boolean needsSortDraws;
private int instanceCountThisFrame;
private final GlProgram apply;
IndirectCullingGroup(InstanceType<I> instanceType) {
objectStride = instanceType.getLayout()
@ -34,59 +51,104 @@ public class IndirectCullingGroup<I extends Instance> {
buffers = new IndirectBuffers(objectStride);
buffers.createBuffers();
buffers.createObjectStorage(128);
buffers.createDrawStorage(2);
meshPool = new IndirectMeshPool();
var indirectPrograms = IndirectPrograms.get();
compute = indirectPrograms.getCullingProgram(instanceType);
cull = indirectPrograms.getCullingProgram(instanceType);
apply = indirectPrograms.getApplyProgram();
draw = indirectPrograms.getIndirectProgram(instanceType, Contexts.WORLD);
}
public void add(IndirectInstancer<I> instancer, RenderStage stage, Material material, Mesh mesh) {
drawSet.add(instancer, material, stage, meshPool.alloc(mesh));
public void add(IndirectInstancer<I> instancer, RenderStage stage, Model model) {
var meshes = model.getMeshes();
var boundingSphere = ModelUtil.computeBoundingSphere(meshes.values());
int modelID = indirectModels.size();
var indirectModel = new IndirectModel(instancer, modelID, boundingSphere);
indirectModels.add(indirectModel);
for (Map.Entry<Material, Mesh> materialMeshEntry : meshes.entrySet()) {
IndirectMeshPool.BufferedMesh bufferedMesh = meshPool.alloc(materialMeshEntry.getValue());
indirectDraws.add(new IndirectDraw(indirectModel, materialMeshEntry.getKey(), bufferedMesh, stage));
}
needsSortDraws = true;
}
public void beginFrame() {
hasCulledThisFrame = false;
needsMemoryBarrier = true;
private void sortDraws() {
multiDraws.clear();
// sort by stage, then material
indirectDraws.sort(Comparator.comparing(IndirectDraw::stage)
.thenComparing(IndirectDraw::material, MaterialUtil.BY_STATE));
for (int start = 0, i = 0; i < indirectDraws.size(); i++) {
var draw1 = indirectDraws.get(i);
var material1 = draw1.material();
var stage1 = draw1.stage();
// if the next draw call has a different RenderStage or Material, start a new MultiDraw
if (i == indirectDraws.size() - 1 || stage1 != indirectDraws.get(i + 1)
.stage() || !material1.equals(indirectDraws.get(i + 1)
.material())) {
multiDraws.computeIfAbsent(stage1, s -> new ArrayList<>())
.add(new MultiDraw(material1, start, i + 1));
start = i + 1;
}
}
}
public void flush() {
needsDrawBarrier = true;
instanceCountThisFrame = calculateTotalInstanceCountAndPrepareBatches();
if (nothingToDo()) {
return;
}
buffers.updateCounts(instanceCountThisFrame, indirectDraws.size(), indirectModels.size());
if (needsSortDraws) {
sortDraws();
needsSortDraws = false;
}
meshPool.flush();
uploadInstances();
uploadModels();
uploadIndirectCommands();
}
public void dispatchCull() {
if (nothingToDo()) {
return;
}
UniformBuffer.syncAndBind(cull);
buffers.bindForCompute();
glDispatchCompute(getGroupCount(instanceCountThisFrame), 1, 1);
}
public void dispatchApply() {
if (nothingToDo()) {
return;
}
apply.bind();
buffers.bindForCompute();
glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
glDispatchCompute(getGroupCount(indirectDraws.size()), 1, 1);
}
private boolean nothingToDo() {
return indirectDraws.isEmpty() || instanceCountThisFrame == 0;
}
private boolean nothingToDo(RenderStage stage) {
return nothingToDo() || !multiDraws.containsKey(stage);
}
public void submit(RenderStage stage) {
if (drawSet.isEmpty()) {
return;
}
if (instanceCountThisFrame == 0) {
return;
}
cull();
dispatchDraw(stage);
}
private void cull() {
if (hasCulledThisFrame) {
return;
}
buffers.updateCounts(instanceCountThisFrame, drawSet.size());
meshPool.flush();
uploadInstances();
uploadIndirectCommands();
UniformBuffer.syncAndBind(compute);
buffers.bindForCompute();
var groupCount = (instanceCountThisFrame + 31) >> 5; // ceil(instanceCount / 32)
glDispatchCompute(groupCount, 1, 1);
hasCulledThisFrame = true;
}
private void dispatchDraw(RenderStage stage) {
if (!drawSet.contains(stage)) {
if (nothingToDo(stage)) {
return;
}
@ -94,26 +156,30 @@ public class IndirectCullingGroup<I extends Instance> {
meshPool.bindForDraw();
buffers.bindForDraw();
memoryBarrier();
drawBarrier();
drawSet.submit(stage);
var flwBaseDraw = draw.getUniformLocation("_flw_baseDraw");
for (var multiDraw : multiDraws.get(stage)) {
glUniform1ui(flwBaseDraw, multiDraw.start);
multiDraw.submit();
}
MaterialUtil.reset();
}
private void memoryBarrier() {
if (needsMemoryBarrier) {
glMemoryBarrier(BARRIER_BITS);
needsMemoryBarrier = false;
private void drawBarrier() {
if (needsDrawBarrier) {
glMemoryBarrier(DRAW_BARRIER_BITS);
needsDrawBarrier = false;
}
}
private void uploadInstances() {
long objectPtr = buffers.objectPtr;
for (int i = 0, batchesSize = drawSet.indirectDraws.size(); i < batchesSize; i++) {
var batch = drawSet.indirectDraws.get(i);
var instanceCount = batch.instancer()
.getInstanceCount();
batch.writeObjects(objectPtr, i);
for (IndirectModel batch : indirectModels) {
var instanceCount = batch.instancer.getInstanceCount();
batch.writeObjects(objectPtr);
objectPtr += instanceCount * objectStride;
}
@ -121,20 +187,29 @@ public class IndirectCullingGroup<I extends Instance> {
buffers.flushObjects(objectPtr - buffers.objectPtr);
}
private void uploadModels() {
long writePtr = buffers.modelPtr.ptr();
for (var batch : indirectModels) {
batch.writeModel(writePtr);
writePtr += IndirectBuffers.MODEL_STRIDE;
}
buffers.flushModels(writePtr - buffers.modelPtr.ptr());
}
private void uploadIndirectCommands() {
long writePtr = buffers.drawPtr;
for (var batch : drawSet.indirectDraws) {
long writePtr = buffers.drawPtr.ptr();
for (var batch : indirectDraws) {
batch.writeIndirectCommand(writePtr);
writePtr += IndirectBuffers.DRAW_COMMAND_STRIDE;
}
buffers.flushDrawCommands(writePtr - buffers.drawPtr);
buffers.flushDrawCommands(writePtr - buffers.drawPtr.ptr());
}
private int calculateTotalInstanceCountAndPrepareBatches() {
int baseInstance = 0;
for (var batch : drawSet.indirectDraws) {
for (var batch : indirectModels) {
batch.prepare(baseInstance);
baseInstance += batch.instancer().getInstanceCount();
baseInstance += batch.instancer.getInstanceCount();
}
return baseInstance;
}
@ -145,6 +220,21 @@ public class IndirectCullingGroup<I extends Instance> {
}
public boolean hasStage(RenderStage stage) {
return drawSet.contains(stage);
return multiDraws.containsKey(stage);
}
private static int getGroupCount(int threadCount) {
if (GlCompat.amd) {
return (threadCount + 63) >> 6; // ceil(threadCount / 64)
} else {
return (threadCount + 31) >> 5; // ceil(threadCount / 32)
}
}
private record MultiDraw(Material material, int start, int end) {
void submit() {
MaterialUtil.setup(material);
glMultiDrawElementsIndirect(GL_TRIANGLES, GL_UNSIGNED_INT, start * IndirectBuffers.DRAW_COMMAND_STRIDE, end - start, (int) IndirectBuffers.DRAW_COMMAND_STRIDE);
}
}
}

View file

@ -3,13 +3,12 @@ package com.jozufozu.flywheel.backend.engine.indirect;
import org.lwjgl.system.MemoryUtil;
import com.jozufozu.flywheel.api.event.RenderStage;
import com.jozufozu.flywheel.api.instance.Instance;
import com.jozufozu.flywheel.api.material.Material;
import com.jozufozu.flywheel.backend.MaterialUtil;
import com.jozufozu.flywheel.backend.ShaderIndices;
public class IndirectDraw<I extends Instance> {
private final IndirectInstancer<I> instancer;
public class IndirectDraw {
private final IndirectModel model;
private final IndirectMeshPool.BufferedMesh mesh;
private final Material material;
private final RenderStage stage;
@ -19,11 +18,8 @@ public class IndirectDraw<I extends Instance> {
private final int packedFogAndCutout;
private final int packedMaterialProperties;
private int baseInstance = -1;
private boolean needsFullWrite = true;
public IndirectDraw(IndirectInstancer<I> instancer, Material material, IndirectMeshPool.BufferedMesh mesh, RenderStage stage) {
this.instancer = instancer;
public IndirectDraw(IndirectModel model, Material material, IndirectMeshPool.BufferedMesh mesh, RenderStage stage) {
this.model = model;
this.material = material;
this.mesh = mesh;
this.stage = stage;
@ -34,10 +30,6 @@ public class IndirectDraw<I extends Instance> {
this.packedMaterialProperties = MaterialUtil.packProperties(material);
}
public IndirectInstancer<I> instancer() {
return instancer;
}
public Material material() {
return material;
}
@ -50,37 +42,17 @@ public class IndirectDraw<I extends Instance> {
return stage;
}
public void prepare(int baseInstance) {
instancer.update();
if (baseInstance == this.baseInstance) {
needsFullWrite = false;
return;
}
this.baseInstance = baseInstance;
needsFullWrite = true;
}
public void writeObjects(long objectPtr, int batchID) {
if (needsFullWrite) {
instancer.writeFull(objectPtr, batchID);
} else {
instancer.writeSparse(objectPtr, batchID);
}
}
public void writeIndirectCommand(long ptr) {
var boundingSphere = mesh.boundingSphere();
MemoryUtil.memPutInt(ptr, mesh.indexCount()); // count
MemoryUtil.memPutInt(ptr + 4, 0); // instanceCount - to be incremented by the compute shader
MemoryUtil.memPutInt(ptr + 4, 0); // instanceCount
MemoryUtil.memPutInt(ptr + 8, mesh.firstIndex); // firstIndex
MemoryUtil.memPutInt(ptr + 12, mesh.baseVertex); // baseVertex
MemoryUtil.memPutInt(ptr + 16, baseInstance); // baseInstance
MemoryUtil.memPutInt(ptr + 16, model.baseInstance); // baseInstance
boundingSphere.getToAddress(ptr + 20); // boundingSphere
MemoryUtil.memPutInt(ptr + 36, vertexMaterialID); // vertexMaterialID
MemoryUtil.memPutInt(ptr + 40, fragmentMaterialID); // fragmentMaterialID
MemoryUtil.memPutInt(ptr + 44, packedFogAndCutout); // packedFogAndCutout
MemoryUtil.memPutInt(ptr + 48, packedMaterialProperties); // packedMaterialProperties
MemoryUtil.memPutInt(ptr + 20, model.id); // modelID
MemoryUtil.memPutInt(ptr + 24, vertexMaterialID); // vertexMaterialID
MemoryUtil.memPutInt(ptr + 28, fragmentMaterialID); // fragmentMaterialID
MemoryUtil.memPutInt(ptr + 32, packedFogAndCutout); // packedFogAndCutout
MemoryUtil.memPutInt(ptr + 36, packedMaterialProperties); // packedMaterialProperties
}
}

View file

@ -20,17 +20,9 @@ public class IndirectDrawManager extends InstancerStorage<IndirectInstancer<?>>
@Override
protected <I extends Instance> void add(InstancerKey<I> key, IndirectInstancer<?> instancer, Model model, RenderStage stage) {
var meshes = model.getMeshes();
for (var entry : meshes.entrySet()) {
var material = entry.getKey();
var mesh = entry.getValue();
var indirectList = (IndirectCullingGroup<I>) renderLists.computeIfAbsent(key.type(), IndirectCullingGroup::new);
var indirectList = (IndirectCullingGroup<I>) renderLists.computeIfAbsent(key.type(), IndirectCullingGroup::new);
indirectList.add((IndirectInstancer<I>) instancer, stage, material, mesh);
break; // TODO: support multiple meshes per model
}
indirectList.add((IndirectInstancer<I>) instancer, stage, model);
}
public boolean hasStage(RenderStage stage) {
@ -46,8 +38,16 @@ public class IndirectDrawManager extends InstancerStorage<IndirectInstancer<?>>
public void flush() {
super.flush();
for (IndirectCullingGroup<?> value : renderLists.values()) {
value.beginFrame();
for (var group : renderLists.values()) {
group.flush();
}
for (var group : renderLists.values()) {
group.dispatchCull();
}
for (var group : renderLists.values()) {
group.dispatchApply();
}
}

View file

@ -1,80 +0,0 @@
package com.jozufozu.flywheel.backend.engine.indirect;
import static org.lwjgl.opengl.GL11.GL_TRIANGLES;
import static org.lwjgl.opengl.GL11.GL_UNSIGNED_INT;
import static org.lwjgl.opengl.GL43.glMultiDrawElementsIndirect;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.EnumMap;
import java.util.List;
import java.util.Map;
import com.jozufozu.flywheel.api.event.RenderStage;
import com.jozufozu.flywheel.api.instance.Instance;
import com.jozufozu.flywheel.api.material.Material;
import com.jozufozu.flywheel.backend.MaterialUtil;
public class IndirectDrawSet<I extends Instance> {
final List<IndirectDraw<I>> indirectDraws = new ArrayList<>();
final Map<RenderStage, List<MultiDraw>> multiDraws = new EnumMap<>(RenderStage.class);
public boolean isEmpty() {
return indirectDraws.isEmpty();
}
public int size() {
return indirectDraws.size();
}
public void add(IndirectInstancer<I> instancer, Material material, RenderStage stage, IndirectMeshPool.BufferedMesh bufferedMesh) {
indirectDraws.add(new IndirectDraw<>(instancer, material, bufferedMesh, stage));
determineMultiDraws();
}
public void submit(RenderStage stage) {
if (!multiDraws.containsKey(stage)) {
return;
}
for (var multiDraw : multiDraws.get(stage)) {
multiDraw.submit();
}
MaterialUtil.reset();
}
public void determineMultiDraws() {
multiDraws.clear();
// sort by stage, then material
indirectDraws.sort(Comparator.comparing(IndirectDraw<I>::stage)
.thenComparing(IndirectDraw::material, MaterialUtil.BY_STATE));
for (int start = 0, i = 0; i < indirectDraws.size(); i++) {
var draw = indirectDraws.get(i);
var material = draw.material();
var stage = draw.stage();
// if the next draw call has a different RenderStage or Material, start a new MultiDraw
if (i == indirectDraws.size() - 1 || stage != indirectDraws.get(i + 1)
.stage() || !material.equals(indirectDraws.get(i + 1)
.material())) {
multiDraws.computeIfAbsent(stage, s -> new ArrayList<>())
.add(new MultiDraw(material, start, i + 1));
start = i + 1;
}
}
}
public boolean contains(RenderStage stage) {
return multiDraws.containsKey(stage);
}
private record MultiDraw(Material material, int start, int end) {
void submit() {
MaterialUtil.setup(material);
glMultiDrawElementsIndirect(GL_TRIANGLES, GL_UNSIGNED_INT, start * IndirectBuffers.DRAW_COMMAND_STRIDE, end - start, (int) IndirectBuffers.DRAW_COMMAND_STRIDE);
}
}
}

View file

@ -35,11 +35,11 @@ public class IndirectInstancer<I extends Instance> extends AbstractInstancer<I>
changed.clear();
}
public void writeFull(long objectPtr, int batchID) {
public void writeFull(long objectPtr, int modelID) {
InstanceWriter<I> writer = type.getWriter();
for (I object : instances) {
// write batchID
MemoryUtil.memPutInt(objectPtr, batchID);
// write modelID
MemoryUtil.memPutInt(objectPtr, modelID);
objectPtr += IndirectBuffers.INT_SIZE;
// write object

View file

@ -0,0 +1,45 @@
package com.jozufozu.flywheel.backend.engine.indirect;
import org.joml.Vector4f;
import org.joml.Vector4fc;
import org.lwjgl.system.MemoryUtil;
public class IndirectModel {
public final IndirectInstancer<?> instancer;
public final int id;
public int baseInstance = -1;
private boolean needsFullWrite = true;
private final Vector4fc boundingSphere;
public IndirectModel(IndirectInstancer<?> instancer, int id, Vector4f boundingSphere) {
this.instancer = instancer;
this.id = id;
this.boundingSphere = boundingSphere;
}
public void writeModel(long ptr) {
MemoryUtil.memPutInt(ptr, 0); // instanceCount - to be incremented by the compute shader
MemoryUtil.memPutInt(ptr + 4, baseInstance); // baseInstance
boundingSphere.getToAddress(ptr + 8); // boundingSphere
}
public void prepare(int baseInstance) {
instancer.update();
if (baseInstance == this.baseInstance) {
needsFullWrite = false;
return;
}
this.baseInstance = baseInstance;
needsFullWrite = true;
}
public void writeObjects(long objectPtr) {
if (needsFullWrite) {
instancer.writeFull(objectPtr, id);
} else {
instancer.writeSparse(objectPtr, id);
}
}
}

View file

@ -21,9 +21,9 @@ import net.minecraft.Util;
public class GlCompat {
public static final boolean ALLOW_DSA = true;
public static final GLCapabilities CAPABILITIES = GL.createCapabilities();
private static final boolean amd = _decideIfWeAreAMD();
private static final boolean windows = _decideIfWeAreWindows();
private static final boolean supportsIndirect = _decideIfWeSupportIndirect();
public static final boolean amd = _decideIfWeAreAMD();
public static final boolean windows = _decideIfWeAreWindows();
public static final boolean supportsIndirect = _decideIfWeSupportIndirect();
public static final int SUBGROUP_SIZE = _subgroupSize();
private GlCompat() {

View file

@ -2,6 +2,7 @@ package com.jozufozu.flywheel.lib.model;
import java.lang.reflect.Field;
import java.nio.ByteBuffer;
import java.util.Collection;
import org.jetbrains.annotations.Nullable;
import org.joml.Vector4f;
@ -11,12 +12,14 @@ import org.slf4j.Logger;
import com.dreizak.miniball.highdim.Miniball;
import com.dreizak.miniball.model.PointSet;
import com.jozufozu.flywheel.api.material.Material;
import com.jozufozu.flywheel.api.model.Mesh;
import com.jozufozu.flywheel.api.vertex.ReusableVertexList;
import com.jozufozu.flywheel.api.vertex.VertexList;
import com.jozufozu.flywheel.api.vertex.VertexListProviderRegistry;
import com.jozufozu.flywheel.api.vertex.VertexType;
import com.jozufozu.flywheel.lib.material.Materials;
import com.jozufozu.flywheel.lib.memory.MemoryBlock;
import com.jozufozu.flywheel.lib.vertex.PositionOnlyVertexList;
import com.mojang.blaze3d.vertex.BufferBuilder;
import com.mojang.blaze3d.vertex.BufferBuilder.DrawState;
import com.mojang.blaze3d.vertex.VertexFormat;
@ -98,6 +101,31 @@ public final class ModelUtil {
return null;
}
public static Vector4f computeBoundingSphere(Collection<Mesh> values) {
int totalVertices = 0;
for (Mesh value : values) {
totalVertices += value.vertexCount();
}
var block = MemoryBlock.malloc((long) totalVertices * PositionOnlyVertexList.STRIDE);
var vertexList = new PositionOnlyVertexList();
int baseVertex = 0;
for (Mesh value : values) {
vertexList.ptr(block.ptr() + (long) baseVertex * PositionOnlyVertexList.STRIDE);
value.write(vertexList);
baseVertex += value.vertexCount();
}
vertexList.ptr(block.ptr());
vertexList.vertexCount(totalVertices);
var out = computeBoundingSphere(vertexList);
block.free();
return out;
}
public static Vector4f computeBoundingSphere(VertexList vertexList) {
return computeBoundingSphere(new PointSet() {
@Override

View file

@ -0,0 +1,171 @@
package com.jozufozu.flywheel.lib.vertex;
import org.lwjgl.system.MemoryUtil;
import com.jozufozu.flywheel.api.vertex.MutableVertexList;
import net.minecraft.client.renderer.LightTexture;
import net.minecraft.client.renderer.texture.OverlayTexture;
public class PositionOnlyVertexList extends AbstractVertexList {
public static final int STRIDE = 12;
@Override
public float x(int index) {
return MemoryUtil.memGetFloat(ptr + (long) index * STRIDE);
}
@Override
public float y(int index) {
return MemoryUtil.memGetFloat(ptr + (long) index * STRIDE + 4);
}
@Override
public float z(int index) {
return MemoryUtil.memGetFloat(ptr + (long) index * STRIDE + 8);
}
@Override
public float r(int index) {
return 1;
}
@Override
public float g(int index) {
return 1;
}
@Override
public float b(int index) {
return 1;
}
@Override
public float a(int index) {
return 1;
}
@Override
public float u(int index) {
return 0;
}
@Override
public float v(int index) {
return 0;
}
@Override
public int overlay(int index) {
return OverlayTexture.NO_OVERLAY;
}
@Override
public int light(int index) {
return LightTexture.FULL_BRIGHT;
}
@Override
public float normalX(int index) {
return 0;
}
@Override
public float normalY(int index) {
return 1;
}
@Override
public float normalZ(int index) {
return 0;
}
@Override
public void x(int index, float x) {
MemoryUtil.memPutFloat(ptr + (long) index * STRIDE, x);
}
@Override
public void y(int index, float y) {
MemoryUtil.memPutFloat(ptr + (long) index * STRIDE + 4, y);
}
@Override
public void z(int index, float z) {
MemoryUtil.memPutFloat(ptr + (long) index * STRIDE + 8, z);
}
@Override
public void r(int index, float r) {
}
@Override
public void g(int index, float g) {
}
@Override
public void b(int index, float b) {
}
@Override
public void a(int index, float a) {
}
@Override
public void u(int index, float u) {
}
@Override
public void v(int index, float v) {
}
@Override
public void overlay(int index, int overlay) {
}
@Override
public void light(int index, int light) {
}
@Override
public void normalX(int index, float normalX) {
}
@Override
public void normalY(int index, float normalY) {
}
@Override
public void normalZ(int index, float normalZ) {
}
@Override
public void write(MutableVertexList dst, int srcIndex, int dstIndex) {
if (getClass() == dst.getClass()) {
long dstPtr = ((PositionOnlyVertexList) dst).ptr;
MemoryUtil.memCopy(ptr + srcIndex * STRIDE, dstPtr + dstIndex * STRIDE, STRIDE);
} else {
super.write(dst, srcIndex, dstIndex);
}
}
@Override
public void write(MutableVertexList dst, int srcStartIndex, int dstStartIndex, int vertexCount) {
if (getClass() == dst.getClass()) {
long dstPtr = ((PositionOnlyVertexList) dst).ptr;
MemoryUtil.memCopy(ptr + srcStartIndex * STRIDE, dstPtr + dstStartIndex * STRIDE, vertexCount * STRIDE);
} else {
super.write(dst, srcStartIndex, dstStartIndex, vertexCount);
}
}
@Override
public void writeAll(MutableVertexList dst) {
if (getClass() == dst.getClass()) {
long dstPtr = ((PositionOnlyVertexList) dst).ptr;
MemoryUtil.memCopy(ptr, dstPtr, vertexCount * STRIDE);
} else {
super.writeAll(dst);
}
}
}

View file

@ -0,0 +1,28 @@
#include "flywheel:internal/indirect/buffers.glsl"
#include "flywheel:internal/indirect/model_descriptor.glsl"
#include "flywheel:internal/indirect/draw_command.glsl"
layout(local_size_x = FLW_SUBGROUP_SIZE) in;
layout(std430, binding = MODEL_BINDING) restrict readonly buffer ModelDescriptors {
ModelDescriptor models[];
};
layout(std430, binding = DRAW_BINDING) restrict buffer MeshDrawCommands {
MeshDrawCommand drawCommands[];
};
// Apply the results of culling to the draw commands.
void main() {
uint drawID = gl_GlobalInvocationID.x;
if (drawID >= drawCommands.length()) {
return;
}
uint modelID = drawCommands[drawID].modelID;
uint instanceCount = models[modelID].instanceCount;
drawCommands[drawID].instanceCount = instanceCount;
}

View file

@ -0,0 +1,4 @@
#define OBJECT_BINDING 0
#define TARGET_BINDING 1
#define MODEL_BINDING 2
#define DRAW_BINDING 3

View file

@ -1,6 +1,8 @@
layout(local_size_x = FLW_SUBGROUP_SIZE) in;
#include "flywheel:internal/indirect/buffers.glsl"
#include "flywheel:internal/indirect/model_descriptor.glsl"
#include "flywheel:internal/indirect/object.glsl"
#include "flywheel:internal/indirect/mesh.glsl"
layout(local_size_x = FLW_SUBGROUP_SIZE) in;
// need to add stubs so the instance shader compiles.
vec4 flw_vertexPos;
@ -17,28 +19,24 @@ vec4 flw_var3;
void flw_transformBoundingSphere(in FlwInstance i, inout vec3 center, inout float radius);
struct Object {
uint batchID;
FlwPackedInstance instance;
};
// populated by instancers
layout(std430, binding = 0) restrict readonly buffer ObjectBuffer {
layout(std430, binding = OBJECT_BINDING) restrict readonly buffer ObjectBuffer {
Object objects[];
};
layout(std430, binding = 1) restrict writeonly buffer TargetBuffer {
layout(std430, binding = TARGET_BINDING) restrict writeonly buffer TargetBuffer {
uint objectIDs[];
};
layout(std430, binding = 2) restrict buffer DrawCommands {
MeshDrawCommand drawCommands[];
layout(std430, binding = MODEL_BINDING) restrict buffer ModelDescriptors {
ModelDescriptor models[];
};
uint flw_objectID;
uint flw_batchID;
// 83 - 27 = 56 spirv instruction results
// Disgustingly vectorized sphere frustum intersection taking advantage of ahead of time packing.
// Only uses 6 fmas and some boolean ops.
// See also:
// flywheel:uniform/flywheel.glsl
// com.jozufozu.flywheel.lib.math.MatrixMath.writePackedFrustumPlanes
// org.joml.FrustumIntersection.testSphere
bool testSphere(vec3 center, float radius) {
bvec4 xyInside = greaterThanEqual(fma(flywheel.planes.xyX, center.xxxx, fma(flywheel.planes.xyY, center.yyyy, fma(flywheel.planes.xyZ, center.zzzz, flywheel.planes.xyW))), -radius.xxxx);
bvec2 zInside = greaterThanEqual(fma(flywheel.planes.zX, center.xx, fma(flywheel.planes.zY, center.yy, fma(flywheel.planes.zZ, center.zz, flywheel.planes.zW))), -radius.xx);
@ -46,32 +44,33 @@ bool testSphere(vec3 center, float radius) {
return all(xyInside) && all(zInside);
}
bool isVisible() {
BoundingSphere sphere = drawCommands[flw_batchID].boundingSphere;
bool isVisible(uint objectID, uint modelID) {
BoundingSphere sphere = models[modelID].boundingSphere;
vec3 center;
float radius;
unpackBoundingSphere(sphere, center, radius);
FlwInstance object = _flw_unpackInstance(objects[flw_objectID].instance);
flw_transformBoundingSphere(object, center, radius);
FlwInstance instance = _flw_unpackInstance(objects[objectID].instance);
flw_transformBoundingSphere(instance, center, radius);
return testSphere(center, radius);
}
void main() {
flw_objectID = gl_GlobalInvocationID.x;
uint objectID = gl_GlobalInvocationID.x;
if (flw_objectID >= objects.length()) {
if (objectID >= objects.length()) {
return;
}
flw_batchID = objects[flw_objectID].batchID;
uint modelID = objects[objectID].modelID;
if (isVisible()) {
uint batchIndex = atomicAdd(drawCommands[flw_batchID].instanceCount, 1);
uint globalIndex = drawCommands[flw_batchID].baseInstance + batchIndex;
if (isVisible(objectID, modelID)) {
uint batchIndex = atomicAdd(models[modelID].instanceCount, 1);
uint globalIndex = models[modelID].baseInstance + batchIndex;
objectIDs[globalIndex] = flw_objectID;
objectIDs[globalIndex] = objectID;
}
}

View file

@ -1,31 +1,30 @@
#include "flywheel:internal/indirect/api/vertex.glsl"
#include "flywheel:internal/indirect/mesh.glsl"
#include "flywheel:internal/indirect/buffers.glsl"
#include "flywheel:internal/indirect/draw_command.glsl"
#include "flywheel:internal/indirect/object.glsl"
#include "flywheel:internal/material.glsl"
#include "flywheel:internal/block.vert"
#include "flywheel:util/diffuse.glsl"
flat out uvec3 _flw_material;
struct Object {
uint batchID;
FlwPackedInstance instance;
};
layout(std430, binding = 0) restrict readonly buffer ObjectBuffer {
layout(std430, binding = OBJECT_BINDING) restrict readonly buffer ObjectBuffer {
Object objects[];
};
layout(std430, binding = 1) restrict readonly buffer TargetBuffer {
layout(std430, binding = TARGET_BINDING) restrict readonly buffer TargetBuffer {
uint objectIDs[];
};
layout(std430, binding = 2) restrict readonly buffer DrawCommands {
layout(std430, binding = DRAW_BINDING) restrict readonly buffer DrawCommands {
MeshDrawCommand drawCommands[];
};
uniform uint _flw_baseDraw;
void main() {
uint instanceIndex = objectIDs[gl_BaseInstance + gl_InstanceID];
uint batchID = objects[instanceIndex].batchID;
uint batchID = gl_DrawID + _flw_baseDraw;
FlwInstance i = _flw_unpackInstance(objects[instanceIndex].instance);
_flw_materialVertexID = drawCommands[batchID].vertexMaterialID;

View file

@ -0,0 +1,13 @@
struct MeshDrawCommand {
uint indexCount;
uint instanceCount;
uint firstIndex;
uint vertexOffset;
uint baseInstance;
uint modelID;
uint vertexMaterialID;
uint fragmentMaterialID;
uint packedFogAndCutout;
uint packedMaterialProperties;
};

View file

@ -1,25 +0,0 @@
struct BoundingSphere {
float x;
float y;
float z;
float radius;
};
void unpackBoundingSphere(in BoundingSphere sphere, out vec3 center, out float radius) {
center = vec3(sphere.x, sphere.y, sphere.z);
radius = sphere.radius;
}
struct MeshDrawCommand {
uint indexCount;
uint instanceCount;
uint firstIndex;
uint vertexOffset;
uint baseInstance;
BoundingSphere boundingSphere;
uint vertexMaterialID;
uint fragmentMaterialID;
uint packedFogAndCutout;
uint packedMaterialProperties;
};

View file

@ -0,0 +1,17 @@
struct BoundingSphere {
float x;
float y;
float z;
float radius;
};
void unpackBoundingSphere(in BoundingSphere sphere, out vec3 center, out float radius) {
center = vec3(sphere.x, sphere.y, sphere.z);
radius = sphere.radius;
}
struct ModelDescriptor {
uint instanceCount;
uint baseInstance;
BoundingSphere boundingSphere;
};

View file

@ -0,0 +1,4 @@
struct Object {
uint modelID;
FlwPackedInstance instance;
};