The manhattan batch

- Use atomics to determine buffer position for batches
- Pass resultant vertex count to DrawBuffer once all transform calls
  have run
- Refactor to use RunOnAllPlan within TransformCall
- Make all vertex transform logic to operate per-instance instead of
  per-chunk
- Cull instances based on bounding sphere transformations
- Make BufferedMesh#mesh public to expose bounding sphere
- Roll batching #plan() arguments into FrameContext record
This commit is contained in:
Jozufozu 2023-04-16 20:47:47 -07:00
parent 1bd4c4f6cb
commit 5b84046c1d
11 changed files with 130 additions and 68 deletions

View File

@ -0,0 +1,13 @@
package com.jozufozu.flywheel.api.instance;
import org.joml.Vector4f;
public interface InstanceBoundingSphereTransformer<I extends Instance> {
/**
* Transform the bounding sphere of a mesh to match the location of the instance.
*
* @param boundingSphere The bounding sphere of the mesh formatted as < x, y, z, radius >
* @param instance The instance to transform the bounding sphere for.
*/
void transform(Vector4f boundingSphere, I instance);
}

View File

@ -30,4 +30,6 @@ public interface InstanceType<I extends Instance> {
ResourceLocation instanceShader();
InstanceVertexTransformer<I> getVertexTransformer();
InstanceBoundingSphereTransformer<I> getBoundingSphereTransformer();
}

View File

@ -149,7 +149,7 @@ public class BatchedMeshPool {
}
public class BufferedMesh {
private final Mesh mesh;
public final Mesh mesh;
private final int byteSize;
private final int vertexCount;

View File

@ -6,6 +6,8 @@ import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.joml.FrustumIntersection;
import com.jozufozu.flywheel.api.event.RenderContext;
import com.jozufozu.flywheel.api.event.RenderStage;
import com.jozufozu.flywheel.api.instance.Instance;
@ -17,6 +19,7 @@ import com.jozufozu.flywheel.api.task.Plan;
import com.jozufozu.flywheel.api.task.TaskExecutor;
import com.jozufozu.flywheel.backend.engine.AbstractEngine;
import com.jozufozu.flywheel.backend.engine.InstancerKey;
import com.jozufozu.flywheel.lib.math.MatrixUtil;
import com.jozufozu.flywheel.lib.task.NestedPlan;
import com.jozufozu.flywheel.util.FlwUtil;
import com.mojang.blaze3d.vertex.VertexFormat;
@ -48,12 +51,22 @@ public class BatchingEngine extends AbstractEngine {
var stack = FlwUtil.copyPoseStack(context.stack());
stack.translate(renderOrigin.getX() - cameraPos.x, renderOrigin.getY() - cameraPos.y, renderOrigin.getZ() - cameraPos.z);
double cameraX = cameraPos.x;
double cameraY = cameraPos.y;
double cameraZ = cameraPos.z;
org.joml.Matrix4f proj = MatrixUtil.toJoml(context.viewProjection());
proj.translate((float) (renderOrigin.getX() - cameraX), (float) (renderOrigin.getY() - cameraY), (float) (renderOrigin.getZ() - cameraZ));
FrustumIntersection frustum = new FrustumIntersection(proj);
var ctx = new FrameContext(context.level(), stack.last(), frustum);
flush();
var plans = new ArrayList<Plan>();
for (var transformSet : stages.values()) {
plans.add(transformSet.plan(stack.last(), context.level()));
plans.add(transformSet.plan(ctx));
}
return new NestedPlan(plans);

View File

@ -4,6 +4,7 @@ import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.atomic.AtomicInteger;
import com.jozufozu.flywheel.api.event.RenderStage;
import com.jozufozu.flywheel.api.task.Plan;
@ -11,9 +12,7 @@ import com.jozufozu.flywheel.api.task.TaskExecutor;
import com.jozufozu.flywheel.lib.task.NestedPlan;
import com.jozufozu.flywheel.lib.task.Synchronizer;
import com.jozufozu.flywheel.lib.task.UnitPlan;
import com.mojang.blaze3d.vertex.PoseStack;
import net.minecraft.client.multiplayer.ClientLevel;
import net.minecraft.client.renderer.RenderType;
/**
@ -29,11 +28,11 @@ public class BatchingStage {
this.tracker = tracker;
}
public Plan plan(PoseStack.Pose matrices, ClientLevel level) {
public Plan plan(FrameContext ctx) {
var plans = new ArrayList<Plan>();
for (var bufferPlan : buffers.values()) {
plans.add(bufferPlan.update(matrices, level));
plans.add(bufferPlan.update(ctx));
}
return new NestedPlan(plans);
@ -51,17 +50,15 @@ public class BatchingStage {
private class BufferPlan implements Plan {
private final DrawBuffer buffer;
private final List<TransformCall<?>> transformCalls = new ArrayList<>();
private PoseStack.Pose matrices;
private ClientLevel level;
private FrameContext ctx;
private int vertexCount;
public BufferPlan(DrawBuffer drawBuffer) {
buffer = drawBuffer;
}
public Plan update(PoseStack.Pose matrices, ClientLevel level) {
this.matrices = matrices;
this.level = level;
public Plan update(FrameContext ctx) {
this.ctx = ctx;
vertexCount = setupAndCountVertices();
if (vertexCount <= 0) {
@ -81,15 +78,17 @@ public class BatchingStage {
@Override
public void execute(TaskExecutor taskExecutor, Runnable onCompletion) {
AtomicInteger vertexCounter = new AtomicInteger(0);
buffer.prepare(vertexCount);
var synchronizer = new Synchronizer(transformCalls.size(), onCompletion);
var synchronizer = new Synchronizer(transformCalls.size(), () -> {
buffer.vertexCount(vertexCounter.get());
onCompletion.run();
});
int startVertex = 0;
for (var transformCall : transformCalls) {
transformCall.plan(buffer, startVertex, matrices, level)
transformCall.plan(ctx, buffer, vertexCounter)
.execute(taskExecutor, synchronizer::decrementAndEventuallyRun);
startVertex += transformCall.getTotalVertexCount();
}
}

View File

@ -69,19 +69,28 @@ public class DrawBuffer {
prepared = true;
}
public void vertexCount(int vertexCount) {
this.vertexCount = vertexCount;
}
public ReusableVertexList slice(int startVertex, int vertexCount) {
if (!prepared) {
throw new IllegalStateException("Cannot slice DrawBuffer that is not prepared!");
}
ReusableVertexList vertexList = provider.createVertexList();
vertexList.ptr(memory.ptr() + (long) startVertex * stride);
vertexList.ptr(ptrForVertex(startVertex));
vertexList.vertexCount(vertexCount);
return vertexList;
}
public long ptrForVertex(long startVertex) {
return memory.ptr() + startVertex * stride;
}
/**
* Injects the backing buffer into the given builder and prepares it for rendering.
*
* @param bufferBuilder The buffer builder to inject into.
*/
public void inject(BufferBuilderExtension bufferBuilder) {

View File

@ -0,0 +1,7 @@
package com.jozufozu.flywheel.backend.engine.batching;
import com.mojang.blaze3d.vertex.PoseStack;
public record FrameContext(net.minecraft.client.multiplayer.ClientLevel level, PoseStack.Pose matrices,
org.joml.FrustumIntersection frustum) {
}

View File

@ -1,23 +1,23 @@
package com.jozufozu.flywheel.backend.engine.batching;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.atomic.AtomicInteger;
import org.joml.Vector4f;
import org.joml.Vector4fc;
import com.jozufozu.flywheel.api.instance.Instance;
import com.jozufozu.flywheel.api.instance.InstanceBoundingSphereTransformer;
import com.jozufozu.flywheel.api.instance.InstanceVertexTransformer;
import com.jozufozu.flywheel.api.material.Material;
import com.jozufozu.flywheel.api.material.MaterialVertexTransformer;
import com.jozufozu.flywheel.api.task.Plan;
import com.jozufozu.flywheel.api.vertex.MutableVertexList;
import com.jozufozu.flywheel.api.vertex.ReusableVertexList;
import com.jozufozu.flywheel.lib.math.MoreMath;
import com.jozufozu.flywheel.lib.task.SimplePlan;
import com.jozufozu.flywheel.lib.task.RunOnAllPlan;
import com.jozufozu.flywheel.lib.vertex.VertexTransformations;
import com.mojang.blaze3d.vertex.PoseStack;
import com.mojang.math.Matrix3f;
import com.mojang.math.Matrix4f;
import net.minecraft.client.multiplayer.ClientLevel;
public class TransformCall<I extends Instance> {
private final CPUInstancer<I> instancer;
private final Material material;
@ -25,14 +25,23 @@ public class TransformCall<I extends Instance> {
private final int meshVertexCount;
private final int meshByteSize;
private final InstanceVertexTransformer<I> instanceVertexTransformer;
private final MaterialVertexTransformer materialVertexTransformer;
private final InstanceBoundingSphereTransformer<I> boundingSphereTransformer;
private final Vector4fc boundingSphere;
public TransformCall(CPUInstancer<I> instancer, Material material, BatchedMeshPool.BufferedMesh mesh) {
this.instancer = instancer;
this.material = material;
this.mesh = mesh;
instanceVertexTransformer = instancer.type.getVertexTransformer();
boundingSphereTransformer = instancer.type.getBoundingSphereTransformer();
materialVertexTransformer = material.getVertexTransformer();
meshVertexCount = mesh.getVertexCount();
meshByteSize = mesh.size();
boundingSphere = mesh.mesh.getBoundingSphere();
}
public int getTotalVertexCount() {
@ -43,49 +52,28 @@ public class TransformCall<I extends Instance> {
instancer.update();
}
public Plan plan(DrawBuffer buffer, int startVertex, PoseStack.Pose matrices, ClientLevel level) {
final int totalCount = instancer.getInstanceCount();
final int chunkSize = MoreMath.ceilingDiv(totalCount, 6 * 32);
public Plan plan(FrameContext ctx, DrawBuffer buffer, final AtomicInteger vertexCount) {
return RunOnAllPlan.of(instancer::getAll, instance -> {
var boundingSphere = new Vector4f(this.boundingSphere);
final var out = new ArrayList<Runnable>();
int remaining = totalCount;
while (remaining > 0) {
int end = remaining;
remaining -= chunkSize;
int start = Math.max(remaining, 0);
boundingSphereTransformer.transform(boundingSphere, instance);
int vertexCount = meshVertexCount * (end - start);
ReusableVertexList sub = buffer.slice(startVertex, vertexCount);
startVertex += vertexCount;
if (!ctx.frustum()
.testSphere(boundingSphere.x, boundingSphere.y, boundingSphere.z, boundingSphere.w)) {
return;
}
out.add(() -> transform(sub, matrices, level, instancer.getRange(start, end)));
}
return new SimplePlan(out);
}
final int baseVertex = vertexCount.getAndAdd(meshVertexCount);
private void transform(ReusableVertexList vertexList, PoseStack.Pose matrices, ClientLevel level, List<I> instances) {
// save the total size of the slice for later.
final long anchorPtr = vertexList.ptr();
final int totalVertexCount = vertexList.vertexCount();
var sub = buffer.slice(baseVertex, meshVertexCount);
// while working on individual instances, the vertex list should expose just a single copy of the mesh.
vertexList.vertexCount(meshVertexCount);
mesh.copyTo(sub.ptr());
InstanceVertexTransformer<I> instanceVertexTransformer = instancer.type.getVertexTransformer();
instanceVertexTransformer.transform(sub, instance, ctx.level());
for (I instance : instances) {
mesh.copyTo(vertexList.ptr());
instanceVertexTransformer.transform(vertexList, instance, level);
vertexList.ptr(vertexList.ptr() + meshByteSize);
}
// restore the original size of the slice to apply per-vertex transformations.
vertexList.ptr(anchorPtr);
vertexList.vertexCount(totalVertexCount);
material.getVertexTransformer().transform(vertexList, level);
applyMatrices(vertexList, matrices);
materialVertexTransformer.transform(sub, ctx.level());
applyMatrices(sub, ctx.matrices());
});
}
private static void applyMatrices(MutableVertexList vertexList, PoseStack.Pose matrices) {

View File

@ -1,5 +1,8 @@
package com.jozufozu.flywheel.lib.instance;
import org.joml.Quaternionf;
import com.jozufozu.flywheel.api.instance.InstanceBoundingSphereTransformer;
import com.jozufozu.flywheel.api.instance.InstanceHandle;
import com.jozufozu.flywheel.api.instance.InstanceType;
import com.jozufozu.flywheel.api.instance.InstanceVertexTransformer;
@ -74,4 +77,13 @@ public class OrientedType implements InstanceType<OrientedInstance> {
}
};
}
@Override
public InstanceBoundingSphereTransformer<OrientedInstance> getBoundingSphereTransformer() {
return (boundingSphere, instance) -> {
boundingSphere.sub(instance.pivotX, instance.pivotY, instance.pivotZ, 0);
boundingSphere.rotate(new Quaternionf(instance.qX, instance.qY, instance.qZ, instance.qW));
boundingSphere.add(instance.posX + instance.pivotX, instance.posY + instance.pivotY, instance.posZ + instance.pivotZ, 0);
};
}
}

View File

@ -1,11 +1,13 @@
package com.jozufozu.flywheel.lib.instance;
import com.jozufozu.flywheel.api.instance.InstanceBoundingSphereTransformer;
import com.jozufozu.flywheel.api.instance.InstanceHandle;
import com.jozufozu.flywheel.api.instance.InstanceType;
import com.jozufozu.flywheel.api.instance.InstanceVertexTransformer;
import com.jozufozu.flywheel.api.instance.InstanceWriter;
import com.jozufozu.flywheel.api.layout.BufferLayout;
import com.jozufozu.flywheel.lib.layout.CommonItems;
import com.jozufozu.flywheel.lib.math.MatrixUtil;
import com.jozufozu.flywheel.lib.math.RenderMath;
import com.jozufozu.flywheel.lib.vertex.VertexTransformations;
@ -60,4 +62,14 @@ public class TransformedType implements InstanceType<TransformedInstance> {
}
};
}
@Override
public InstanceBoundingSphereTransformer<TransformedInstance> getBoundingSphereTransformer() {
return (boundingSphere, instance) -> {
var radius = boundingSphere.w;
boundingSphere.w = 1;
boundingSphere.mul(MatrixUtil.toJoml(instance.model));
boundingSphere.w = radius * MatrixUtil.extractScale(instance.model);
};
}
}

View File

@ -129,18 +129,25 @@ public final class MatrixUtil {
public static void store(Matrix3f matrix, org.joml.Matrix3f jomlMatrix) {
Matrix3fAccessor m = (Matrix3fAccessor) (Object) matrix;
jomlMatrix.set(
m.flywheel$m00(), m.flywheel$m10(), m.flywheel$m20(),
m.flywheel$m01(), m.flywheel$m11(), m.flywheel$m21(),
m.flywheel$m02(), m.flywheel$m12(), m.flywheel$m22()
);
m.flywheel$m00(), m.flywheel$m10(), m.flywheel$m20(), m.flywheel$m01(), m.flywheel$m11(), m.flywheel$m21(), m.flywheel$m02(), m.flywheel$m12(), m.flywheel$m22());
}
public static org.joml.Matrix3f toJoml(Matrix3f matrix) {
Matrix3fAccessor m = (Matrix3fAccessor) (Object) matrix;
return new org.joml.Matrix3f(
m.flywheel$m00(), m.flywheel$m10(), m.flywheel$m20(),
m.flywheel$m01(), m.flywheel$m11(), m.flywheel$m21(),
m.flywheel$m02(), m.flywheel$m12(), m.flywheel$m22()
);
return new org.joml.Matrix3f(m.flywheel$m00(), m.flywheel$m10(), m.flywheel$m20(), m.flywheel$m01(), m.flywheel$m11(), m.flywheel$m21(), m.flywheel$m02(), m.flywheel$m12(), m.flywheel$m22());
}
/**
* Extracts the greatest scale factor across all axes from the given matrix.
*
* @param matrix The matrix to extract the scale from.
* @return The greatest scale factor across all axes.
*/
public static float extractScale(Matrix4f matrix) {
Matrix4fAccessor m = (Matrix4fAccessor) (Object) matrix;
float scaleSqrX = m.flywheel$m00() * m.flywheel$m00() + m.flywheel$m01() * m.flywheel$m01() + m.flywheel$m02() * m.flywheel$m02();
float scaleSqrY = m.flywheel$m10() * m.flywheel$m10() + m.flywheel$m11() * m.flywheel$m11() + m.flywheel$m12() * m.flywheel$m12();
float scaleSqrZ = m.flywheel$m20() * m.flywheel$m20() + m.flywheel$m21() * m.flywheel$m21() + m.flywheel$m22() * m.flywheel$m22();
return (float) Math.sqrt(Math.max(Math.max(scaleSqrX, scaleSqrY), scaleSqrZ));
}
}