Culling experiments 2: not testing the GPU

- Skeleton for compute shader culling/indirect rendering
This commit is contained in:
Jozufozu 2022-07-29 16:06:25 -07:00
parent 4ec1f8eaf3
commit 6234df6440
38 changed files with 951 additions and 50 deletions

View file

@ -31,6 +31,8 @@ version = mod_version + (dev && buildNumber != null ? "-${buildNumber}" : '')
java.toolchain.languageVersion = JavaLanguageVersion.of(17)
jarJar.enable()
println('Java: ' + System.getProperty('java.version') + ' JVM: ' + System.getProperty('java.vm.version') + '(' + System.getProperty('java.vendor') + ') Arch: ' + System.getProperty('os.arch'))
minecraft {
mappings channel: 'parchment', version: "${parchment_version}-${minecraft_version}"
@ -111,6 +113,11 @@ repositories {
dependencies {
minecraft "net.minecraftforge:forge:${minecraft_version}-${forge_version}"
implementation "com.dreizak:miniball:1.0.3"
jarJar(group: 'com.dreizak', name: 'miniball', version: '[1.0,2.0)') {
jarJar.pin(it, "[1.0,2.0)")
}
// switch to implementation for debugging
compileOnly fg.deobf("maven.modrinth:starlight-forge:1.0.2+1.18.2")
@ -171,7 +178,9 @@ void addLicense(jarTask) {
}
jar.finalizedBy('reobfJar')
tasks.jarJar.finalizedBy('reobfJarJar')
addLicense(jar)
addLicense(tasks.jarJar)
publishing {
publications {
@ -180,6 +189,7 @@ publishing {
from components.java
fg.component(it)
jarJar.component(it)
}
}

View file

@ -16,7 +16,7 @@ import com.jozufozu.flywheel.core.DebugRender;
import com.jozufozu.flywheel.core.PartialModel;
import com.jozufozu.flywheel.core.QuadConverter;
import com.jozufozu.flywheel.core.StitchedSprite;
import com.jozufozu.flywheel.core.compile.InstancedArraysCompiler;
import com.jozufozu.flywheel.backend.instancing.instancing.InstancedArraysCompiler;
import com.jozufozu.flywheel.core.crumbling.CrumblingRenderer;
import com.jozufozu.flywheel.core.model.Models;
import com.jozufozu.flywheel.event.EntityWorldHandler;

View file

@ -1,4 +1,4 @@
package com.jozufozu.flywheel.core.compile;
package com.jozufozu.flywheel.api.context;
import com.jozufozu.flywheel.backend.gl.shader.GlProgram;
import com.jozufozu.flywheel.core.source.FileResolution;

View file

@ -25,12 +25,9 @@ public abstract class InstancedPart {
}
public final boolean checkDirtyAndClear() {
if (dirty) {
dirty = false;
return true;
} else {
return false;
}
boolean wasDirty = dirty;
dirty = false;
return wasDirty;
}
public final boolean isRemoved() {

View file

@ -95,6 +95,7 @@ public class Backend {
case OFF -> true;
case BATCHING -> !usingShaders;
case INSTANCING -> !usingShaders && GlCompat.getInstance().instancedArraysSupported();
case INDIRECT -> !usingShaders && GlCompat.getInstance().supportsIndirect();
};
return canUseEngine ? preferredChoice : BackendType.OFF;

View file

@ -5,8 +5,8 @@ import com.jozufozu.flywheel.api.struct.StructType;
import com.jozufozu.flywheel.api.vertex.VertexType;
import com.jozufozu.flywheel.backend.instancing.InstancedRenderDispatcher;
import com.jozufozu.flywheel.core.ComponentRegistry;
import com.jozufozu.flywheel.core.compile.ContextShader;
import com.jozufozu.flywheel.core.compile.InstancedArraysCompiler;
import com.jozufozu.flywheel.api.context.ContextShader;
import com.jozufozu.flywheel.backend.instancing.instancing.InstancedArraysCompiler;
import com.jozufozu.flywheel.core.crumbling.CrumblingRenderer;
import com.jozufozu.flywheel.core.source.FileResolution;
import com.jozufozu.flywheel.core.source.ShaderLoadingException;

View file

@ -1,10 +1,12 @@
package com.jozufozu.flywheel.backend.gl.shader;
import org.lwjgl.opengl.GL20;
import org.lwjgl.opengl.GL43;
public enum ShaderType {
VERTEX("vertex", "VERTEX_SHADER", "vert", GL20.GL_VERTEX_SHADER),
FRAGMENT("fragment", "FRAGMENT_SHADER", "frag", GL20.GL_FRAGMENT_SHADER),
COMPUTE("compute", "COMPUTE_SHADER", "glsl", GL43.GL_COMPUTE_SHADER),
;
public final String name;

View file

@ -32,12 +32,15 @@ public class GlCompat {
public final InstancedArrays instancedArrays;
public final BufferStorage bufferStorage;
public final boolean amd;
public final boolean supportsIndirect;
private GlCompat() {
GLCapabilities caps = GL.createCapabilities();
instancedArrays = getLatest(InstancedArrays.class, caps);
bufferStorage = getLatest(BufferStorage.class, caps);
supportsIndirect = caps.OpenGL46;
amd = _isAmdWindows();
}
@ -116,5 +119,9 @@ public class GlCompat {
// vendor string I got was "ATI Technologies Inc."
return vendor.contains("ATI") || vendor.contains("AMD");
}
public boolean supportsIndirect() {
return supportsIndirect;
}
}

View file

@ -6,6 +6,7 @@ import java.util.BitSet;
import com.jozufozu.flywheel.api.instancer.InstancedPart;
import com.jozufozu.flywheel.api.instancer.Instancer;
import com.jozufozu.flywheel.api.struct.StructType;
import com.jozufozu.flywheel.api.struct.StructWriter;
public abstract class AbstractInstancer<D extends InstancedPart> implements Instancer<D> {
@ -28,7 +29,7 @@ public abstract class AbstractInstancer<D extends InstancedPart> implements Inst
/**
* Copy a data from another Instancer to this.
*
* <p>
* This has the effect of swapping out one model for another.
* @param inOther the data associated with a different model.
*/
@ -104,6 +105,22 @@ public abstract class AbstractInstancer<D extends InstancedPart> implements Inst
return instanceData;
}
protected void writeChangedUnchecked(StructWriter<D> writer) {
boolean sequential = true;
for (int i = 0; i < data.size(); i++) {
final D element = data.get(i);
if (element.checkDirtyAndClear()) {
if (!sequential) {
writer.seek(i);
}
writer.write(element);
sequential = true;
} else {
sequential = false;
}
}
}
public abstract void delete();
@Override

View file

@ -9,6 +9,7 @@ import com.jozufozu.flywheel.backend.instancing.blockentity.BlockEntityInstanceM
import com.jozufozu.flywheel.backend.instancing.effect.Effect;
import com.jozufozu.flywheel.backend.instancing.effect.EffectInstanceManager;
import com.jozufozu.flywheel.backend.instancing.entity.EntityInstanceManager;
import com.jozufozu.flywheel.backend.instancing.indirect.IndirectEngine;
import com.jozufozu.flywheel.backend.instancing.instancing.InstancingEngine;
import com.jozufozu.flywheel.core.Components;
import com.jozufozu.flywheel.core.RenderContext;
@ -37,6 +38,7 @@ public class InstanceWorld {
public static InstanceWorld create(LevelAccessor level) {
var engine = switch (Backend.getBackendType()) {
case INDIRECT -> new IndirectEngine(Components.WORLD);
case INSTANCING -> new InstancingEngine(Components.WORLD);
case BATCHING -> new BatchingEngine();
case OFF -> throw new IllegalStateException("Cannot create instance world when backend is off.");

View file

@ -3,7 +3,7 @@ package com.jozufozu.flywheel.backend.instancing.batching;
import java.util.HashSet;
import java.util.Set;
import com.jozufozu.flywheel.util.RenderTypeExtension;
import com.jozufozu.flywheel.util.extension.RenderTypeExtension;
import com.mojang.blaze3d.vertex.BufferBuilder;
import net.minecraft.client.renderer.RenderType;

View file

@ -0,0 +1,42 @@
package com.jozufozu.flywheel.backend.instancing.indirect;
import com.google.common.collect.ImmutableList;
import com.jozufozu.flywheel.backend.gl.shader.GlProgram;
import com.jozufozu.flywheel.backend.gl.shader.GlShader;
import com.jozufozu.flywheel.backend.gl.shader.ShaderType;
import com.jozufozu.flywheel.core.compile.Memoizer;
import com.jozufozu.flywheel.core.compile.ProgramAssembler;
import com.jozufozu.flywheel.core.source.CompilationContext;
import com.jozufozu.flywheel.core.source.FileResolution;
import com.jozufozu.flywheel.event.ReloadRenderersEvent;
public class ComputeCompiler extends Memoizer<FileResolution, GlProgram> {
public static final ComputeCompiler INSTANCE = new ComputeCompiler();
private ComputeCompiler() {
}
@Override
protected GlProgram _create(FileResolution file) {
String source = file.getFile()
.generateFinalSource(new CompilationContext());
var shader = new GlShader(source, ShaderType.COMPUTE, ImmutableList.of(file.getFileLoc()));
return new ProgramAssembler(file.getFileLoc())
.attachShader(shader)
.link()
.build(GlProgram::new);
}
@Override
protected void _destroy(GlProgram value) {
value.delete();
}
public static void invalidateAll(ReloadRenderersEvent ignored) {
INSTANCE.invalidate();
}
}

View file

@ -0,0 +1,10 @@
package com.jozufozu.flywheel.backend.instancing.indirect;
public class DSABuffer {
int id;
int byteSize;
public DSABuffer(int id) {
this.id = id;
}
}

View file

@ -0,0 +1,170 @@
package com.jozufozu.flywheel.backend.instancing.indirect;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.jetbrains.annotations.NotNull;
import org.lwjgl.opengl.GL32;
import com.jozufozu.flywheel.api.RenderStage;
import com.jozufozu.flywheel.api.instancer.InstancedPart;
import com.jozufozu.flywheel.api.material.Material;
import com.jozufozu.flywheel.api.struct.StructType;
import com.jozufozu.flywheel.api.vertex.VertexType;
import com.jozufozu.flywheel.backend.gl.GlTextureUnit;
import com.jozufozu.flywheel.backend.instancing.Engine;
import com.jozufozu.flywheel.backend.instancing.InstanceManager;
import com.jozufozu.flywheel.backend.instancing.TaskEngine;
import com.jozufozu.flywheel.backend.instancing.instancing.MeshPool;
import com.jozufozu.flywheel.core.RenderContext;
import com.jozufozu.flywheel.api.context.ContextShader;
import com.jozufozu.flywheel.backend.instancing.instancing.InstancedArraysCompiler;
import com.jozufozu.flywheel.core.source.FileResolution;
import com.jozufozu.flywheel.core.uniform.UniformBuffer;
import com.jozufozu.flywheel.util.WeakHashSet;
import com.mojang.blaze3d.systems.RenderSystem;
import net.minecraft.client.Camera;
import net.minecraft.client.Minecraft;
import net.minecraft.core.BlockPos;
import net.minecraft.core.Vec3i;
import net.minecraft.util.Mth;
import net.minecraft.world.phys.Vec3;
public class IndirectEngine implements Engine {
public static int MAX_ORIGIN_DISTANCE = 100;
protected BlockPos originCoordinate = BlockPos.ZERO;
protected final ContextShader context;
protected final Map<StructType<?>, IndirectFactory<?>> factories = new HashMap<>();
protected final List<InstancedModel<?>> uninitializedModels = new ArrayList<>();
protected final RenderLists renderLists = new RenderLists();
/**
* The set of instance managers that are attached to this engine.
*/
private final WeakHashSet<InstanceManager<?>> instanceManagers;
public IndirectEngine(ContextShader context) {
this.context = context;
this.instanceManagers = new WeakHashSet<>();
}
@SuppressWarnings("unchecked")
@NotNull
@Override
public <D extends InstancedPart> IndirectFactory<D> factory(StructType<D> type) {
return (IndirectFactory<D>) factories.computeIfAbsent(type, this::createFactory);
}
@NotNull
private <D extends InstancedPart> IndirectFactory<D> createFactory(StructType<D> type) {
return new IndirectFactory<>(type, uninitializedModels::add);
}
@Override
public void renderStage(TaskEngine taskEngine, RenderContext context, RenderStage stage) {
var groups = renderLists.get(stage);
setup();
for (var group : groups) {
group.submit();
}
}
private void setup() {
GlTextureUnit.T2.makeActive();
Minecraft.getInstance().gameRenderer.lightTexture().turnOnLightLayer();
RenderSystem.depthMask(true);
RenderSystem.colorMask(true, true, true, true);
RenderSystem.enableDepthTest();
RenderSystem.depthFunc(GL32.GL_LEQUAL);
RenderSystem.enableCull();
}
protected void setup(ShaderState desc) {
VertexType vertexType = desc.vertex();
FileResolution instanceShader = desc.instance()
.getInstanceShader();
Material material = desc.material();
var ctx = new InstancedArraysCompiler.Context(vertexType, material, instanceShader, context);
InstancedArraysCompiler.INSTANCE.getProgram(ctx)
.bind();
UniformBuffer.getInstance().sync();
}
public void clearAll() {
factories.values().forEach(IndirectFactory::clear);
}
@Override
public void delete() {
factories.values()
.forEach(IndirectFactory::delete);
factories.clear();
}
@Override
public Vec3i getOriginCoordinate() {
return originCoordinate;
}
@Override
public void attachManagers(InstanceManager<?>... listener) {
instanceManagers.addAll(List.of(listener));
}
@Override
public boolean maintainOriginCoordinate(Camera camera) {
Vec3 cameraPos = camera.getPosition();
double distanceSqr = Vec3.atLowerCornerOf(originCoordinate)
.subtract(cameraPos)
.lengthSqr();
if (distanceSqr > MAX_ORIGIN_DISTANCE * MAX_ORIGIN_DISTANCE) {
shiftListeners(Mth.floor(cameraPos.x), Mth.floor(cameraPos.y), Mth.floor(cameraPos.z));
return true;
}
return false;
}
@Override
public void beginFrame(TaskEngine taskEngine, RenderContext context) {
for (var model : uninitializedModels) {
model.init(renderLists);
}
uninitializedModels.clear();
MeshPool.getInstance()
.flush();
}
private void shiftListeners(int cX, int cY, int cZ) {
originCoordinate = new BlockPos(cX, cY, cZ);
factories.values().forEach(IndirectFactory::clear);
instanceManagers.forEach(InstanceManager::onOriginShift);
}
@Override
public void addDebugInfo(List<String> info) {
info.add("GL33 Instanced Arrays");
info.add("Origin: " + originCoordinate.getX() + ", " + originCoordinate.getY() + ", " + originCoordinate.getZ());
}
}

View file

@ -0,0 +1,73 @@
package com.jozufozu.flywheel.backend.instancing.indirect;
import java.util.HashMap;
import java.util.Map;
import java.util.function.Consumer;
import com.jozufozu.flywheel.api.instancer.InstancedPart;
import com.jozufozu.flywheel.api.instancer.Instancer;
import com.jozufozu.flywheel.api.instancer.InstancerFactory;
import com.jozufozu.flywheel.api.struct.StructType;
import com.jozufozu.flywheel.backend.instancing.AbstractInstancer;
import com.jozufozu.flywheel.core.model.Model;
public class IndirectFactory<D extends InstancedPart> implements InstancerFactory<D> {
protected final Map<Model, InstancedModel<D>> models = new HashMap<>();
protected final StructType<D> type;
private final Consumer<InstancedModel<D>> creationListener;
public IndirectFactory(StructType<D> type, Consumer<InstancedModel<D>> creationListener) {
this.type = type;
this.creationListener = creationListener;
}
@Override
public Instancer<D> model(Model modelKey) {
return models.computeIfAbsent(modelKey, this::createInstancer).getInstancer();
}
public int getInstanceCount() {
return models.values()
.stream()
.map(InstancedModel::getInstancer)
.mapToInt(AbstractInstancer::getInstanceCount)
.sum();
}
public int getVertexCount() {
return models.values()
.stream()
.mapToInt(InstancedModel::getVertexCount)
.sum();
}
public void delete() {
models.values().forEach(InstancedModel::delete);
models.clear();
}
/**
* Clear all instance data without freeing resources.
*/
public void clear() {
models.values()
.stream()
.map(InstancedModel::getInstancer)
.forEach(AbstractInstancer::clear);
}
private InstancedModel<D> createInstancer(Model model) {
var instancer = new InstancedModel<>(type, model);
this.creationListener.accept(instancer);
return instancer;
}
// private void bindInstanceAttributes(GlVertexArray vao) {
// vao.bindAttributes(this.vbo, this.attributeBaseIndex, this.instanceFormat, 0L);
//
// for (int i = 0; i < this.instanceFormat.getAttributeCount(); i++) {
// vao.setAttributeDivisor(this.attributeBaseIndex + i, 1);
// }
// }
}

View file

@ -0,0 +1,72 @@
package com.jozufozu.flywheel.backend.instancing.indirect;
import com.jozufozu.flywheel.api.instancer.InstancedPart;
import com.jozufozu.flywheel.api.struct.StructType;
import com.jozufozu.flywheel.api.struct.StructWriter;
import com.jozufozu.flywheel.backend.instancing.AbstractInstancer;
import com.jozufozu.flywheel.core.layout.BufferLayout;
public class IndirectInstancer<D extends InstancedPart> extends AbstractInstancer<D> {
public final BufferLayout instanceFormat;
public final StructType<D> structType;
public final InstancedModel<D> parent;
int maxInstanceCount = 0;
boolean anyToUpdate;
public IndirectInstancer(InstancedModel<D> parent, StructType<D> type) {
super(type);
this.parent = parent;
this.instanceFormat = type.getLayout();
this.structType = type;
}
@Override
public void notifyDirty() {
anyToUpdate = true;
}
public boolean isEmpty() {
return !anyToUpdate && !anyToRemove && maxInstanceCount == 0;
}
void update() {
if (anyToRemove) {
removeDeletedInstances();
}
maxInstanceCount = data.size();
anyToRemove = false;
}
void writeAll(final StructWriter<D> writer) {
anyToUpdate = false;
for (var instance : data) {
writer.write(instance);
}
}
void writeChanged(final StructWriter<D> writer) {
if (!anyToUpdate) {
return;
}
anyToUpdate = false;
final int size = data.size();
if (size == 0) {
return;
}
writeChangedUnchecked(writer);
}
@Override
public void delete() {
// noop
}
}

View file

@ -0,0 +1,121 @@
package com.jozufozu.flywheel.backend.instancing.indirect;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.List;
import java.util.Objects;
import org.lwjgl.opengl.GL46;
import org.lwjgl.opengl.GL46C;
import org.lwjgl.system.MemoryStack;
import org.lwjgl.system.MemoryUtil;
import com.jozufozu.flywheel.api.instancer.InstancedPart;
import com.jozufozu.flywheel.api.struct.StructType;
import com.jozufozu.flywheel.backend.gl.shader.GlProgram;
import com.jozufozu.flywheel.backend.instancing.instancing.MeshPool;
import com.jozufozu.flywheel.core.Components;
import com.jozufozu.flywheel.core.model.Mesh;
public class IndirectList<T extends InstancedPart> {
final GlProgram compute;
final GlProgram draw;
/**
* Stores raw instance data per-object.
*/
DSABuffer objectBuffer;
/**
* Stores bounding spheres
*/
DSABuffer boundingSphereBuffer;
/**
* Stores drawIndirect structs.
*/
DSABuffer drawBuffer;
DSABuffer targetBuffer;
final int[] buffers = new int[4];
final List<Batch<T>> batches = new ArrayList<>();
IndirectList(StructType<T> structType) {
GL46.glCreateBuffers(buffers);
objectBuffer = new DSABuffer(buffers[0]);
targetBuffer = new DSABuffer(buffers[1]);
boundingSphereBuffer = new DSABuffer(buffers[2]);
drawBuffer = new DSABuffer(buffers[3]);
compute = ComputeCompiler.INSTANCE.get(Components.Files.CULL_INSTANCES);
draw = null;
}
public void add(Mesh mesh, IndirectInstancer<T> instancer) {
var pool = MeshPool.getInstance();
var buffered = pool.alloc(mesh);
batches.add(new Batch<>(instancer, buffered));
}
public void prepare() {
try (var stack = MemoryStack.stackPush()) {
var size = batches.size() * 20;
long basePtr = stack.nmalloc(size);
long writePtr = basePtr;
for (Batch<T> batch : batches) {
batch.writeIndirectCommand(writePtr);
writePtr += 20;
}
GL46C.nglNamedBufferData(drawBuffer.id, size, basePtr, GL46.GL_STREAM_DRAW);
}
}
public void submit() {
compute.bind();
GL46.glBindBuffersBase(GL46.GL_SHADER_STORAGE_BUFFER, 0, buffers);
var groupCount = (getTotalInstanceCount() + 31) >> 5; // ceil(totalInstanceCount / 32)
GL46.glDispatchCompute(groupCount, 1, 1);
draw.bind();
GL46.glMemoryBarrier(GL46.GL_SHADER_STORAGE_BARRIER_BIT);
GL46.glBindBuffer(GL46.GL_DRAW_INDIRECT_BUFFER, drawBuffer.id);
GL46.glMultiDrawElementsIndirect(GL46.GL_TRIANGLES, GL46.GL_UNSIGNED_INT, 0, batches.size(), 0);
}
private int getTotalInstanceCount() {
return 0;
}
private static final class Batch<T extends InstancedPart> {
final IndirectInstancer<T> instancer;
final MeshPool.BufferedMesh mesh;
private Batch(IndirectInstancer<T> instancer, MeshPool.BufferedMesh mesh) {
this.instancer = instancer;
this.mesh = mesh;
}
public void writeIndirectCommand(long ptr) {
// typedef struct {
// GLuint count;
// GLuint instanceCount;
// GLuint firstIndex;
// GLuint baseVertex;
// GLuint baseInstance;
//} DrawElementsIndirectCommand;
MemoryUtil.memPutInt(ptr, mesh.getVertexCount());
MemoryUtil.memPutInt(ptr + 4, 0);
MemoryUtil.memPutInt(ptr + 8, 0);
MemoryUtil.memPutInt(ptr + 12, mesh.getBaseVertex());
MemoryUtil.memPutInt(ptr + 16, 0);
}
}
}

View file

@ -0,0 +1,51 @@
package com.jozufozu.flywheel.backend.instancing.indirect;
import java.util.List;
import java.util.Map;
import com.jozufozu.flywheel.api.RenderStage;
import com.jozufozu.flywheel.api.instancer.InstancedPart;
import com.jozufozu.flywheel.api.material.Material;
import com.jozufozu.flywheel.api.struct.StructType;
import com.jozufozu.flywheel.core.model.Mesh;
import com.jozufozu.flywheel.core.model.Model;
public class InstancedModel<D extends InstancedPart> {
private final Model model;
private final StructType<D> type;
private final IndirectInstancer<D> instancer;
public InstancedModel(StructType<D> type, Model model) {
this.model = model;
this.instancer = new IndirectInstancer<>(this, type);
this.type = type;
}
public void init(RenderLists renderLists) {
var materialMeshMap = this.model.getMeshes();
for (var entry : materialMeshMap.entrySet()) {
var material = entry.getKey();
var mesh = entry.getValue();
renderLists.add(material.getRenderStage(), type, mesh, instancer);
return; // TODO: support multiple meshes per model
}
}
public IndirectInstancer<D> getInstancer() {
return instancer;
}
public Model getModel() {
return model;
}
public int getVertexCount() {
return model.getVertexCount() * instancer.maxInstanceCount;
}
public void delete() {
}
}

View file

@ -0,0 +1,39 @@
package com.jozufozu.flywheel.backend.instancing.indirect;
import java.util.Collection;
import java.util.Collections;
import java.util.EnumMap;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import com.google.common.collect.ArrayListMultimap;
import com.google.common.collect.ImmutableListMultimap;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ListMultimap;
import com.jozufozu.flywheel.api.RenderStage;
import com.jozufozu.flywheel.api.instancer.InstancedPart;
import com.jozufozu.flywheel.api.material.Material;
import com.jozufozu.flywheel.api.struct.StructType;
import com.jozufozu.flywheel.core.model.Mesh;
public class RenderLists {
public final Map<RenderStage, Map<StructType<?>, IndirectList<?>>> renderLists = new EnumMap<>(RenderStage.class);
public Collection<IndirectList<?>> get(RenderStage stage) {
var renderList = renderLists.get(stage);
if (renderList == null) {
return Collections.emptyList();
}
return renderList.values();
}
@SuppressWarnings("unchecked")
public <D extends InstancedPart> void add(RenderStage stage, StructType<D> type, Mesh mesh, IndirectInstancer<D> instancer) {
var indirectList = (IndirectList<D>) renderLists.computeIfAbsent(stage, $ -> new HashMap<>())
.computeIfAbsent(type, IndirectList::new);
indirectList.add(mesh, instancer);
}
}

View file

@ -0,0 +1,8 @@
package com.jozufozu.flywheel.backend.instancing.indirect;
import com.jozufozu.flywheel.api.material.Material;
import com.jozufozu.flywheel.api.struct.StructType;
import com.jozufozu.flywheel.api.vertex.VertexType;
public record ShaderState(Material material, VertexType vertex, StructType<?> instance) {
}

View file

@ -0,0 +1,6 @@
@ParametersAreNonnullByDefault @MethodsReturnNonnullByDefault
package com.jozufozu.flywheel.backend.instancing.indirect;
import javax.annotation.ParametersAreNonnullByDefault;
import net.minecraft.MethodsReturnNonnullByDefault;

View file

@ -6,7 +6,6 @@ import java.util.Set;
import com.jozufozu.flywheel.Flywheel;
import com.jozufozu.flywheel.api.instancer.InstancedPart;
import com.jozufozu.flywheel.api.struct.StructType;
import com.jozufozu.flywheel.api.struct.StructWriter;
import com.jozufozu.flywheel.backend.gl.array.GlVertexArray;
import com.jozufozu.flywheel.backend.gl.buffer.GlBuffer;
import com.jozufozu.flywheel.backend.gl.buffer.GlBufferType;
@ -89,22 +88,7 @@ public class GPUInstancer<D extends InstancedPart> extends AbstractInstancer<D>
buf.clear(clearStart, clearLength);
if (size > 0) {
final StructWriter<D> writer = structType.getWriter(buf.unwrap());
boolean sequential = true;
for (int i = 0; i < size; i++) {
final D element = data.get(i);
if (element.checkDirtyAndClear()) {
if (!sequential) {
writer.seek(i);
}
writer.write(element);
sequential = true;
} else {
sequential = false;
}
}
writeChangedUnchecked(structType.getWriter(buf.unwrap()));
}
} catch (Exception e) {
Flywheel.LOGGER.error("Error updating GPUInstancer:", e);

View file

@ -1,14 +1,16 @@
package com.jozufozu.flywheel.core.compile;
package com.jozufozu.flywheel.backend.instancing.instancing;
import java.util.ArrayList;
import com.google.common.collect.ImmutableList;
import com.jozufozu.flywheel.api.context.ContextShader;
import com.jozufozu.flywheel.api.material.Material;
import com.jozufozu.flywheel.api.vertex.VertexType;
import com.jozufozu.flywheel.backend.gl.GLSLVersion;
import com.jozufozu.flywheel.backend.gl.shader.GlProgram;
import com.jozufozu.flywheel.backend.gl.shader.GlShader;
import com.jozufozu.flywheel.backend.gl.shader.ShaderType;
import com.jozufozu.flywheel.core.compile.*;
import com.jozufozu.flywheel.core.source.CompilationContext;
import com.jozufozu.flywheel.core.source.FileResolution;
import com.jozufozu.flywheel.core.source.SourceFile;

View file

@ -19,8 +19,7 @@ import com.jozufozu.flywheel.backend.instancing.Engine;
import com.jozufozu.flywheel.backend.instancing.InstanceManager;
import com.jozufozu.flywheel.backend.instancing.TaskEngine;
import com.jozufozu.flywheel.core.RenderContext;
import com.jozufozu.flywheel.core.compile.ContextShader;
import com.jozufozu.flywheel.core.compile.InstancedArraysCompiler;
import com.jozufozu.flywheel.api.context.ContextShader;
import com.jozufozu.flywheel.core.source.FileResolution;
import com.jozufozu.flywheel.core.uniform.UniformBuffer;
import com.jozufozu.flywheel.util.WeakHashSet;

View file

@ -19,8 +19,8 @@ import com.jozufozu.flywheel.backend.gl.buffer.GlBuffer;
import com.jozufozu.flywheel.backend.gl.buffer.GlBufferType;
import com.jozufozu.flywheel.backend.gl.buffer.MappedBuffer;
import com.jozufozu.flywheel.backend.gl.buffer.MappedGlBuffer;
import com.jozufozu.flywheel.core.layout.BufferLayout;
import com.jozufozu.flywheel.core.model.Mesh;
import com.jozufozu.flywheel.core.vertex.Formats;
import com.jozufozu.flywheel.event.ReloadRenderersEvent;
public class MeshPool {
@ -49,6 +49,7 @@ public class MeshPool {
private final GlBuffer vbo;
private long byteSize;
private int vertexCount;
private boolean dirty;
private boolean anyToRemove;
@ -70,8 +71,10 @@ public class MeshPool {
*/
public BufferedMesh alloc(Mesh mesh) {
return meshes.computeIfAbsent(mesh, m -> {
BufferedMesh bufferedModel = new BufferedMesh(m, byteSize);
byteSize += m.size();
// FIXME: culling experiments fixing everything to Formats.BLOCK
BufferedMesh bufferedModel = new BufferedMesh(Formats.BLOCK, m, byteSize, vertexCount);
byteSize += bufferedModel.getByteSize();
vertexCount += bufferedModel.mesh.getVertexCount();
allBuffered.add(bufferedModel);
pendingUpload.add(bufferedModel);
@ -115,17 +118,21 @@ public class MeshPool {
// re-evaluate first vertex for each model
int byteIndex = 0;
int baseVertex = 0;
for (BufferedMesh model : allBuffered) {
if (model.byteIndex != byteIndex) {
pendingUpload.add(model);
}
model.byteIndex = byteIndex;
model.baseVertex = baseVertex;
byteIndex += model.mesh.size();
byteIndex += model.getByteSize();
baseVertex += model.mesh.getVertexCount();
}
this.byteSize = byteIndex;
this.vertexCount = baseVertex;
this.anyToRemove = false;
}
@ -143,12 +150,15 @@ public class MeshPool {
ByteBuffer buffer = mapped.unwrap();
int byteIndex = 0;
int baseVertex = 0;
for (BufferedMesh model : allBuffered) {
model.byteIndex = byteIndex;
model.baseVertex = baseVertex;
model.buffer(buffer);
byteIndex += model.mesh.size();
byteIndex += model.getByteSize();
baseVertex += model.mesh.getVertexCount();
}
} catch (Exception e) {
@ -179,8 +189,9 @@ public class MeshPool {
private final ElementBuffer ebo;
private final Mesh mesh;
private final BufferLayout layout;
private final VertexType type;
private long byteIndex;
private int baseVertex;
private boolean deleted;
@ -188,12 +199,20 @@ public class MeshPool {
private final Set<GlVertexArray> boundTo = new HashSet<>();
public BufferedMesh(Mesh mesh, long byteIndex) {
public BufferedMesh(Mesh mesh, long byteIndex, int baseVertex) {
this.mesh = mesh;
this.byteIndex = byteIndex;
this.baseVertex = baseVertex;
this.ebo = mesh.createEBO();
this.layout = mesh.getVertexType()
.getLayout();
this.type = mesh.getVertexType();
}
public BufferedMesh(VertexType type, Mesh mesh, long byteIndex, int baseVertex) {
this.mesh = mesh;
this.byteIndex = byteIndex;
this.baseVertex = baseVertex;
this.ebo = mesh.createEBO();
this.type = type;
}
public void drawCall(GlVertexArray vao) {
@ -223,7 +242,7 @@ public class MeshPool {
private void setup(GlVertexArray vao) {
if (this.boundTo.add(vao)) {
vao.enableArrays(getAttributeCount());
vao.bindAttributes(MeshPool.this.vbo, 0, this.layout, this.byteIndex);
vao.bindAttributes(MeshPool.this.vbo, 0, type.getLayout(), this.byteIndex);
}
vao.bindElementArray(this.ebo.buffer);
vao.bind();
@ -240,14 +259,17 @@ public class MeshPool {
}
private void buffer(ByteBuffer buffer) {
this.mesh.writeInto(buffer, this.byteIndex);
var writer = type.createWriter(buffer);
writer.seek(this.byteIndex);
writer.writeVertexList(this.mesh.getReader());
this.boundTo.clear();
this.gpuResident = true;
}
public int getAttributeCount() {
return this.layout.getAttributeCount();
return this.type.getLayout()
.getAttributeCount();
}
public boolean isGpuResident() {
@ -255,7 +277,19 @@ public class MeshPool {
}
public VertexType getVertexType() {
return this.mesh.getVertexType();
return this.type;
}
public int getByteSize() {
return this.type.getLayout().getStride() * this.mesh.getVertexCount();
}
public int getBaseVertex() {
return baseVertex;
}
public int getVertexCount() {
return this.mesh.getVertexCount();
}
}

View file

@ -19,6 +19,11 @@ public enum BackendType {
* Use GPU instancing to render everything.
*/
INSTANCING("GL33 Instanced Arrays"),
/**
* Use Compute shaders to cull instances.
*/
INDIRECT("GL46 Compute Culling"),
;
private static final Map<String, BackendType> lookup;

View file

@ -141,6 +141,7 @@ public class FlwCommands {
case OFF -> new TextComponent("Disabled Flywheel").withStyle(ChatFormatting.RED);
case INSTANCING -> new TextComponent("Using Instancing Engine").withStyle(ChatFormatting.GREEN);
case BATCHING -> new TextComponent("Using Batching Engine").withStyle(ChatFormatting.GREEN);
case INDIRECT -> new TextComponent("Using Indirect Engine").withStyle(ChatFormatting.GREEN);
};
}

View file

@ -11,7 +11,7 @@ import com.jozufozu.flywheel.api.material.Material;
import com.jozufozu.flywheel.api.struct.StructType;
import com.jozufozu.flywheel.api.uniform.UniformProvider;
import com.jozufozu.flywheel.api.vertex.VertexType;
import com.jozufozu.flywheel.core.compile.ContextShader;
import com.jozufozu.flywheel.api.context.ContextShader;
import net.minecraft.resources.ResourceLocation;

View file

@ -3,7 +3,7 @@ package com.jozufozu.flywheel.core;
import java.util.function.BiConsumer;
import com.jozufozu.flywheel.Flywheel;
import com.jozufozu.flywheel.core.compile.ContextShader;
import com.jozufozu.flywheel.api.context.ContextShader;
import com.jozufozu.flywheel.core.crumbling.CrumblingProgram;
import com.jozufozu.flywheel.core.source.FileResolution;
import com.jozufozu.flywheel.core.source.SourceChecks;
@ -48,6 +48,11 @@ public class Components {
public static final FileResolution WORLD_FRAGMENT = contextFragment(ResourceUtil.subPath(Names.WORLD, ".frag"));
public static final FileResolution CRUMBLING_VERTEX = contextVertex(ResourceUtil.subPath(Names.CRUMBLING, ".vert"));
public static final FileResolution CRUMBLING_FRAGMENT = contextFragment(ResourceUtil.subPath(Names.CRUMBLING, ".frag"));
public static final FileResolution CULL_INSTANCES = compute(Flywheel.rl("compute/cull_instances.glsl"));
private static FileResolution compute(ResourceLocation rl) {
return FileResolution.get(rl);
}
private static FileResolution uniform(ResourceLocation location) {
return FileResolution.get(location);

View file

@ -88,7 +88,7 @@ public class DebugRender {
try (var stack = MemoryStack.stackPush()) {
var buf = stack.malloc(3 * 8 * 4);
culler.bufferPlanes(buf);
culler.getCorners(buf);
GL46.glNamedBufferSubData(buffer, indicesSize, buf);
}

View file

@ -16,7 +16,7 @@ public class CompileUtil {
public static final Pattern vecType = Pattern.compile("^[biud]?vec([234])$");
public static final Pattern matType = Pattern.compile("^mat([234])(?:x([234]))?$");
protected static String generateHeader(GLSLVersion version, ShaderType type) {
public static String generateHeader(GLSLVersion version, ShaderType type) {
return "#version " + version + '\n'
+ "#extension GL_ARB_explicit_attrib_location : enable\n"
+ "#extension GL_ARB_conservative_depth : enable\n"

View file

@ -5,6 +5,7 @@ import java.nio.ByteBuffer;
import org.jetbrains.annotations.Nullable;
import com.dreizak.miniball.highdim.Miniball;
import com.jozufozu.flywheel.Flywheel;
import com.jozufozu.flywheel.api.material.Material;
import com.jozufozu.flywheel.api.vertex.VertexList;

View file

@ -953,7 +953,7 @@ public class FrustumIntersection {
return da >= 0.0f || db >= 0.0f;
}
public void bufferPlanes(ByteBuffer buffer) {
public void getCorners(ByteBuffer buffer) {
Vector3f scratch = new Vector3f();
Vector3f result = new Vector3f();
@ -989,4 +989,60 @@ public class FrustumIntersection {
return result.div(f);
}
/**
* Writes the planes of this frustum to the given buffer.<p>
* Uses a different format that is friendly towards an optimized instruction-parallel
* implementation of sphere-frustum intersection.<p>
* The format is as follows:<p>
* {@code vec4(nxX, pxX, nyX, pyX)}<br>
* {@code vec4(nxY, pxY, nyY, pyY)}<br>
* {@code vec4(nxZ, pxZ, nyZ, pyZ)}<br>
* {@code vec4(nxW, pxW, nyW, pyW)}<br>
* {@code vec2(nzX, pzX)}<br>
* {@code vec2(nzY, pzY)}<br>
* {@code vec2(nzZ, pzZ)}<br>
* {@code vec2(nzW, pzW)}<br>
*
* @param buffer The buffer to write the planes to.
*/
public void getJozuPackedPlanes(ByteBuffer buffer) {
long addr = MemoryUtil.memAddress(buffer);
MemoryUtil.memPutFloat(addr, nxX);
MemoryUtil.memPutFloat(addr + 4, pxX);
MemoryUtil.memPutFloat(addr + 8, nyX);
MemoryUtil.memPutFloat(addr + 12, pyX);
MemoryUtil.memPutFloat(addr + 16, nxY);
MemoryUtil.memPutFloat(addr + 20, pxY);
MemoryUtil.memPutFloat(addr + 24, nyY);
MemoryUtil.memPutFloat(addr + 28, pyY);
MemoryUtil.memPutFloat(addr + 32, nxZ);
MemoryUtil.memPutFloat(addr + 36, pxZ);
MemoryUtil.memPutFloat(addr + 40, nyZ);
MemoryUtil.memPutFloat(addr + 44, pyZ);
MemoryUtil.memPutFloat(addr + 48, nxW);
MemoryUtil.memPutFloat(addr + 52, pxW);
MemoryUtil.memPutFloat(addr + 56, nyW);
MemoryUtil.memPutFloat(addr + 60, pyW);
MemoryUtil.memPutFloat(addr + 64, nzX);
MemoryUtil.memPutFloat(addr + 68, pzX);
MemoryUtil.memPutFloat(addr + 72, nzY);
MemoryUtil.memPutFloat(addr + 76, pzY);
MemoryUtil.memPutFloat(addr + 80, nzZ);
MemoryUtil.memPutFloat(addr + 84, pzZ);
MemoryUtil.memPutFloat(addr + 88, nzW);
MemoryUtil.memPutFloat(addr + 92, pzW);
}
public void getPlanes(ByteBuffer buffer) {
long addr = MemoryUtil.memAddress(buffer);
planes[0].getToAddress(addr);
planes[1].getToAddress(addr + 16);
planes[2].getToAddress(addr + 32);
planes[3].getToAddress(addr + 48);
planes[4].getToAddress(addr + 64);
planes[5].getToAddress(addr + 80);
}
}

View file

@ -0,0 +1,69 @@
#version 450
#define FLW_SUBGROUP_SIZE 32
layout(local_size_x = FLW_SUBGROUP_SIZE) in;
#use "flywheel:compute/objects.glsl"
#use "flywheel:util/quaternion.glsl"
layout(std130, binding = 3) uniform FrameData {
vec4 a1; // vec4(nx.x, px.x, ny.x, py.x)
vec4 a2; // vec4(nx.y, px.y, ny.y, py.y)
vec4 a3; // vec4(nx.z, px.z, ny.z, py.z)
vec4 a4; // vec4(nx.w, px.w, ny.w, py.w)
vec2 b1; // vec2(nz.x, pz.x)
vec2 b2; // vec2(nz.y, pz.y)
vec2 b3; // vec2(nz.z, pz.z)
vec2 b4; // vec2(nz.w, pz.w)
uint drawCount;
} frustum;
// populated by instancers
layout(binding = 0) readonly buffer ObjectBuffer {
Instance objects[];
};
layout(binding = 1) writeonly buffer TargetBuffer {
uint objectIDs[];
};
layout(binding = 2) readonly buffer BoundingSpheres {
vec4 boundingSpheres[];
};
layout(binding = 3) buffer DrawCommands {
MeshDrawCommand drawCommands[];
};
// 83 - 27 = 56 spirv instruction results
bool testSphere(vec3 center, float radius) {
return
all(lessThanEqual(fma(frustum.a1, center.xxxx, fma(frustum.a2, center.yyyy, fma(frustum.a3, center.zzzz, frustum.a4))), -radius.xxxx)) &&
all(lessThanEqual(fma(frustum.b1, center.xx, fma(frustum.b2, center.yy, fma(frustum.b3, center.zz, frustum.b4))), -radius.xx));
}
bool isVisible(uint objectID, uint batchID) {
vec4 sphere = boundingSpheres[batchID];
vec3 pivot = objects[objectID].pivot;
vec3 center = rotateQuat(sphere.xyz - pivot, objects[objectID].orientation) + pivot + objects[objectID].position;
float radius = sphere.r;
return testSphere(center, radius);
}
void main() {
uint objectID = gl_GlobalInvocationID.x;
if (objectID >= frustum.drawCount) {
return;
}
uint batchID = objects[objectID].batchID;
bool visible = isVisible(objectID, batchID);
if (visible) {
uint batchIndex = atomicAdd(drawCommands[batchID].instanceCount, 1);
uint globalIndex = drawCommands[batchID].baseInstance + batchIndex;
objectIDs[globalIndex] = objectID;
}
}

View file

@ -0,0 +1,28 @@
#use "flywheel:api/vertex.glsl"
#use "flywheel:compute/objects.glsl"
#use "flywheel:pos_tex_normal.glsl"
#use "flywheel:context/world.vert"
// populated by instancers
layout(binding = 0) readonly buffer ObjectBuffer {
Instance objects[];
};
layout(binding = 1) readonly buffer TargetBuffer {
uint objectIDs[];
};
void flw_instanceVertex(Instance i) {
flw_vertexPos = vec4(rotateVertexByQuat(flw_vertexPos.xyz - i.pivot, i.rotation) + i.pivot + i.pos, 1.0);
flw_vertexNormal = rotateVertexByQuat(flw_vertexNormal, i.rotation);
flw_vertexColor = i.color;
flw_vertexLight = i.light / 15.0;
}
void main() {
uint instanceIndex = objectIDs[gl_BaseInstance + gl_InstanceID];
flw_layoutVertex();
Instance i = objects[instanceIndex];
flw_instanceVertex(i);
flw_contextVertex();
}

View file

@ -0,0 +1,17 @@
struct Instance {
ivec2 light;
vec4 color;
vec3 pos;
vec3 pivot;
vec4 rotation;
uint batchID;
};
struct MeshDrawCommands {
uint indexCount;
uint instanceCount;
uint firstIndex;
uint vertexOffset;
uint baseInstance;
};

View file

@ -0,0 +1,72 @@
#version 450
#define FLW_SUBGROUP_SIZE 32
layout(local_size_x = FLW_SUBGROUP_SIZE) in;
// in uvec3 gl_NumWorkGroups;
// in uvec3 gl_WorkGroupID;
// in uvec3 gl_LocalInvocationID;
// in uvec3 gl_GlobalInvocationID;
// in uint gl_LocalInvocationIndex;
layout(std430, binding = 0) buffer Frustum1 {
vec4 a1; // vec4(nx.x, px.x, ny.x, py.x)
vec4 a2; // vec4(nx.y, px.y, ny.y, py.y)
vec4 a3; // vec4(nx.z, px.z, ny.z, py.z)
vec4 a4; // vec4(nx.w, px.w, ny.w, py.w)
vec2 b1; // vec2(nz.x, pz.x)
vec2 b2; // vec2(nz.y, pz.y)
vec2 b3; // vec2(nz.z, pz.z)
vec2 b4; // vec2(nz.w, pz.w)
} frustum1;
layout(binding = 1) buffer Frustum2 {
vec4 nx;
vec4 px;
vec4 ny;
vec4 py;
vec4 nz;
vec4 pz;
} frustum2;
layout(binding = 2) buffer Result {
bool res1;
bool res2;
bool res3;
} result;
// 83 - 27 = 56 spirv instruction results
bool testSphere1(vec4 sphere) {
return
all(lessThanEqual(fma(frustum1.a1, sphere.xxxx, fma(frustum1.a2, sphere.yyyy, fma(frustum1.a3, sphere.zzzz, frustum1.a4))), -sphere.wwww)) &&
all(lessThanEqual(fma(frustum1.b1, sphere.xx, fma(frustum1.b2, sphere.yy, fma(frustum1.b3, sphere.zz, frustum1.b4))), -sphere.ww));
}
// 236 - 92 = 144 spirv instruction results
bool testSphere2(vec4 sphere) {
return
fma(frustum2.nx.x, sphere.x, fma(frustum2.nx.y, sphere.y, fma(frustum2.nx.z, sphere.z, frustum2.nx.w))) >= -sphere.w &&
fma(frustum2.px.x, sphere.x, fma(frustum2.px.y, sphere.y, fma(frustum2.px.z, sphere.z, frustum2.px.w))) >= -sphere.w &&
fma(frustum2.ny.x, sphere.x, fma(frustum2.ny.y, sphere.y, fma(frustum2.ny.z, sphere.z, frustum2.ny.w))) >= -sphere.w &&
fma(frustum2.py.x, sphere.x, fma(frustum2.py.y, sphere.y, fma(frustum2.py.z, sphere.z, frustum2.py.w))) >= -sphere.w &&
fma(frustum2.nz.x, sphere.x, fma(frustum2.nz.y, sphere.y, fma(frustum2.nz.z, sphere.z, frustum2.nz.w))) >= -sphere.w &&
fma(frustum2.pz.x, sphere.x, fma(frustum2.pz.y, sphere.y, fma(frustum2.pz.z, sphere.z, frustum2.pz.w))) >= -sphere.w;
}
// 322 - 240 = 82 spirv instruction results
bool testSphere3(vec4 sphere) {
return
(dot(frustum2.nx.xyz, sphere.xyz) + frustum2.nx.w) >= -sphere.w &&
(dot(frustum2.px.xyz, sphere.xyz) + frustum2.px.w) >= -sphere.w &&
(dot(frustum2.ny.xyz, sphere.xyz) + frustum2.ny.w) >= -sphere.w &&
(dot(frustum2.py.xyz, sphere.xyz) + frustum2.py.w) >= -sphere.w &&
(dot(frustum2.nz.xyz, sphere.xyz) + frustum2.nz.w) >= -sphere.w &&
(dot(frustum2.pz.xyz, sphere.xyz) + frustum2.pz.w) >= -sphere.w;
}
void main() {
result.res1 = testSphere1(vec4(0., 1., 0., 1.));
result.res2 = testSphere2(vec4(0., 1., 0., 1.));
result.res3 = testSphere3(vec4(0., 1., 0., 1.));
}