Functional frustum filter

- Manually fix alignment issue
 - Align UBO size to 16
 - Manual frustum ubo upload
This commit is contained in:
Jozufozu 2022-08-05 21:53:21 -07:00
parent 9d657aed40
commit 63dc8ee923
13 changed files with 137 additions and 65 deletions

View file

@ -9,7 +9,7 @@ public abstract class UniformProvider {
protected ByteBuffer buffer;
protected Notifier notifier;
public abstract int getSize();
public abstract int getActualByteSize();
public void updatePtr(ByteBuffer backing, Notifier notifier) {
this.buffer = backing;

View file

@ -0,0 +1,31 @@
package com.jozufozu.flywheel.backend.instancing.indirect;
import static org.lwjgl.opengl.GL46.*;
import org.lwjgl.system.MemoryUtil;
import com.jozufozu.flywheel.util.joml.FrustumIntersection;
// This should be a push constant :whywheel:
public class FrustumUBO {
public static final int BUFFER_SIZE = 96;
private final int ubo;
private final long clientStorage;
FrustumUBO() {
ubo = glCreateBuffers();
glNamedBufferStorage(ubo, BUFFER_SIZE, GL_DYNAMIC_STORAGE_BIT);
clientStorage = MemoryUtil.nmemAlloc(BUFFER_SIZE);
}
public void update(FrustumIntersection frustum) {
frustum.getJozuPackedPlanes(clientStorage);
nglNamedBufferSubData(ubo, 0, BUFFER_SIZE, clientStorage);
}
public void bind() {
glBindBufferBase(GL_UNIFORM_BUFFER, 0, ubo);
}
}

View file

@ -17,7 +17,6 @@ import com.jozufozu.flywheel.backend.gl.GlTextureUnit;
import com.jozufozu.flywheel.backend.instancing.Engine;
import com.jozufozu.flywheel.backend.instancing.InstanceManager;
import com.jozufozu.flywheel.backend.instancing.TaskEngine;
import com.jozufozu.flywheel.backend.instancing.instancing.MeshPool;
import com.jozufozu.flywheel.core.RenderContext;
import com.jozufozu.flywheel.api.context.ContextShader;
import com.jozufozu.flywheel.backend.instancing.instancing.InstancedArraysCompiler;
@ -46,6 +45,8 @@ public class IndirectEngine implements Engine {
protected final List<InstancedModel<?>> uninitializedModels = new ArrayList<>();
protected final RenderLists renderLists = new RenderLists();
private FrustumUBO frustumUBO;
/**
* The set of instance managers that are attached to this engine.
*/
@ -76,7 +77,7 @@ public class IndirectEngine implements Engine {
setup();
for (var group : groups) {
group.submit();
group.submit(frustumUBO);
}
}
@ -145,10 +146,26 @@ public class IndirectEngine implements Engine {
@Override
public void beginFrame(TaskEngine taskEngine, RenderContext context) {
if (frustumUBO == null) {
frustumUBO = new FrustumUBO();
}
for (var model : uninitializedModels) {
model.init(renderLists);
}
uninitializedModels.clear();
Vec3 camera = context.camera()
.getPosition();
var camX = (float) (camera.x - originCoordinate.getX());
var camY = (float) (camera.y - originCoordinate.getY());
var camZ = (float) (camera.z - originCoordinate.getZ());
var culler = RenderContext.createCuller(context.viewProjection(), -camX, -camY, -camZ);
frustumUBO.update(culler);
}
private void shiftListeners(int cX, int cY, int cZ) {

View file

@ -2,18 +2,15 @@ package com.jozufozu.flywheel.backend.instancing.indirect;
import static org.lwjgl.opengl.GL46.*;
import java.text.Format;
import java.util.ArrayList;
import java.util.List;
import org.lwjgl.opengl.GL46C;
import org.lwjgl.system.MemoryStack;
import org.lwjgl.system.MemoryUtil;
import com.jozufozu.flywheel.api.instancer.InstancedPart;
import com.jozufozu.flywheel.api.struct.StorageBufferWriter;
import com.jozufozu.flywheel.api.struct.StructType;
import com.jozufozu.flywheel.backend.gl.GlNumericType;
import com.jozufozu.flywheel.backend.gl.shader.GlProgram;
import com.jozufozu.flywheel.core.Components;
import com.jozufozu.flywheel.core.Materials;
@ -24,7 +21,7 @@ import com.jozufozu.flywheel.core.vertex.Formats;
public class IndirectList<T extends InstancedPart> {
private static final int DRAW_COMMAND_STRIDE = 20;
private static final long DRAW_COMMAND_STRIDE = 20;
final StorageBufferWriter<T> storageBufferWriter;
final GlProgram compute;
@ -51,12 +48,13 @@ public class IndirectList<T extends InstancedPart> {
* Stores drawIndirect structs.
*/
int drawBuffer;
int debugBuffer;
final IndirectMeshPool meshPool;
int vertexArray;
final int[] shaderStorageBuffers = new int[4];
final int[] shaderStorageBuffers = new int[5];
final List<Batch<T>> batches = new ArrayList<>();
@ -73,10 +71,11 @@ public class IndirectList<T extends InstancedPart> {
targetBuffer = shaderStorageBuffers[1];
boundingSphereBuffer = shaderStorageBuffers[2];
drawBuffer = shaderStorageBuffers[3];
debugBuffer = shaderStorageBuffers[4];
meshPool = new IndirectMeshPool(Formats.BLOCK, 1024);
// FIXME: Resizable buffers
maxObjectCount = 1024L * 100L;
maxObjectCount = 1024L;
maxBatchCount = 64;
// +4 for the batch id
@ -85,6 +84,7 @@ public class IndirectList<T extends InstancedPart> {
glNamedBufferStorage(targetBuffer, 4 * maxObjectCount, GL_DYNAMIC_STORAGE_BIT);
glNamedBufferStorage(boundingSphereBuffer, 16 * maxBatchCount, GL_DYNAMIC_STORAGE_BIT);
glNamedBufferStorage(drawBuffer, DRAW_COMMAND_STRIDE * maxBatchCount, GL_DYNAMIC_STORAGE_BIT);
glNamedBufferStorage(debugBuffer, 4 * maxObjectCount, GL_DYNAMIC_STORAGE_BIT);
objectClientStorage = MemoryUtil.nmemAlloc(objectStride * maxObjectCount);
@ -115,17 +115,13 @@ public class IndirectList<T extends InstancedPart> {
offset += attribute
.getByteWidth();
}
glEnableVertexArrayAttrib(vertexArray, meshAttribs);
glVertexArrayVertexBuffer(vertexArray, meshAttribs, targetBuffer, 0, 4);
glVertexArrayAttribIFormat(vertexArray, meshAttribs, 1, GlNumericType.UINT.getGlEnum(), 0);
}
public void add(Mesh mesh, IndirectInstancer<T> instancer) {
batches.add(new Batch<>(instancer, meshPool.alloc(mesh)));
}
void submit() {
void submit(FrustumUBO frustumUBO) {
int instanceCountThisFrame = calculateTotalInstanceCount();
if (instanceCountThisFrame == 0) {
@ -137,14 +133,15 @@ public class IndirectList<T extends InstancedPart> {
uploadBoundingSpheres();
uploadIndirectCommands();
UniformBuffer.getInstance().sync();
frustumUBO.bind();
dispatchCompute(instanceCountThisFrame);
issueMemoryBarrier();
dispatchDraw();
}
private void dispatchDraw() {
UniformBuffer.getInstance().sync();
draw.bind();
Materials.BELL.setup();
glVertexArrayElementBuffer(vertexArray, elementBuffer);
@ -170,13 +167,17 @@ public class IndirectList<T extends InstancedPart> {
ptr += 16;
}
GL46C.nglNamedBufferSubData(boundingSphereBuffer, 0, size, basePtr);
nglNamedBufferSubData(boundingSphereBuffer, 0, size, basePtr);
}
}
private void dispatchCompute(int instanceCount) {
compute.bind();
glBindBuffersBase(GL_SHADER_STORAGE_BUFFER, 0, shaderStorageBuffers);
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, 0, objectBuffer, 0, instanceCount * objectStride);
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, 1, targetBuffer, 0, instanceCount * 4L);
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, 2, boundingSphereBuffer, 0, batches.size() * 16L);
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, 3, drawBuffer, 0, batches.size() * DRAW_COMMAND_STRIDE);
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, 4, debugBuffer, 0, instanceCount * 4L);
var groupCount = (instanceCount + 31) >> 5; // ceil(totalInstanceCount / 32)
glDispatchCompute(groupCount, 1, 1);
@ -197,20 +198,20 @@ public class IndirectList<T extends InstancedPart> {
batchID++;
}
GL46C.nglNamedBufferSubData(objectBuffer, 0, ptr - objectClientStorage, objectClientStorage);
nglNamedBufferSubData(objectBuffer, 0, ptr - objectClientStorage, objectClientStorage);
}
private void uploadIndirectCommands() {
try (var stack = MemoryStack.stackPush()) {
var size = batches.size() * DRAW_COMMAND_STRIDE;
long basePtr = stack.nmalloc(size);
long size = batches.size() * DRAW_COMMAND_STRIDE;
long basePtr = stack.nmalloc((int) size);
long writePtr = basePtr;
for (Batch<T> batch : batches) {
batch.writeIndirectCommand(writePtr);
writePtr += DRAW_COMMAND_STRIDE;
}
GL46C.nglNamedBufferSubData(drawBuffer, 0, size, basePtr);
nglNamedBufferSubData(drawBuffer, 0, size, basePtr);
}
}

View file

@ -1,5 +1,6 @@
package com.jozufozu.flywheel.backend.instancing.indirect;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.EnumMap;

View file

@ -23,13 +23,10 @@ public record RenderContext(LevelRenderer renderer, ClientLevel level, PoseStack
return viewProjection;
}
public static FrustumIntersection createCuller(Camera camera, Matrix4f viewProjection) {
public static FrustumIntersection createCuller(Matrix4f viewProjection, float camX, float camY, float camZ) {
com.jozufozu.flywheel.util.joml.Matrix4f proj = Matrix4fExtension.clone(viewProjection);
Vec3 cam = camera
.getPosition();
proj.translate((float) -cam.x, (float) -cam.y, (float) -cam.z);
proj.translate(camX, camY, camZ);
return new FrustumIntersection(proj);
}

View file

@ -23,23 +23,23 @@ public class OrientedStorageWriter implements StorageBufferWriter<OrientedPart>
MemoryUtil.memPutFloat(ptr + 20, d.posY);
MemoryUtil.memPutFloat(ptr + 24, d.posZ);
MemoryUtil.memPutFloat(ptr + 28, d.pivotX);
MemoryUtil.memPutFloat(ptr + 32, d.pivotY);
MemoryUtil.memPutFloat(ptr + 36, d.pivotZ);
MemoryUtil.memPutFloat(ptr + 32, d.pivotX);
MemoryUtil.memPutFloat(ptr + 36, d.pivotY);
MemoryUtil.memPutFloat(ptr + 40, d.pivotZ);
MemoryUtil.memPutShort(ptr + 40, d.blockLight);
MemoryUtil.memPutShort(ptr + 42, d.skyLight);
MemoryUtil.memPutShort(ptr + 44, d.blockLight);
MemoryUtil.memPutShort(ptr + 46, d.skyLight);
MemoryUtil.memPutByte(ptr + 44, d.r);
MemoryUtil.memPutByte(ptr + 45, d.g);
MemoryUtil.memPutByte(ptr + 46, d.b);
MemoryUtil.memPutByte(ptr + 47, d.a);
MemoryUtil.memPutByte(ptr + 48, d.r);
MemoryUtil.memPutByte(ptr + 49, d.g);
MemoryUtil.memPutByte(ptr + 50, d.b);
MemoryUtil.memPutByte(ptr + 51, d.a);
MemoryUtil.memPutInt(ptr + 48, batchID);
MemoryUtil.memPutInt(ptr + 52, batchID);
}
@Override
public int getAlignment() {
return 52;
return 64;
}
}

View file

@ -12,7 +12,7 @@ public class FogProvider extends UniformProvider {
@Override
public int getSize() {
public int getActualByteSize() {
return 16 + 8 + 4;
}

View file

@ -47,11 +47,11 @@ public class UniformBuffer {
int totalBytes = 0;
int index = 0;
for (UniformProvider provider : providers) {
int size = provider.getSize();
int size = alignPo2(provider.getActualByteSize(), 16);
builder.add(new Allocated(provider, totalBytes, size, index));
totalBytes = align(totalBytes + size);
totalBytes = alignUniformBuffer(totalBytes + size);
index++;
}
@ -82,7 +82,7 @@ public class UniformBuffer {
}
// https://stackoverflow.com/questions/3407012/rounding-up-to-the-nearest-multiple-of-a-number
private static int align(int numToRound) {
private static int alignUniformBuffer(int numToRound) {
if (PO2_ALIGNMENT) {
return (numToRound + OFFSET_ALIGNMENT - 1) & -OFFSET_ALIGNMENT;
} else {
@ -90,6 +90,10 @@ public class UniformBuffer {
}
}
private static int alignPo2(int numToRound, int alignment) {
return (numToRound + alignment - 1) & -alignment;
}
private class Allocated implements UniformProvider.Notifier {
private final UniformProvider provider;
private final int offset;

View file

@ -26,7 +26,7 @@ public class ViewProvider extends UniformProvider {
}
@Override
public int getSize() {
public int getActualByteSize() {
return 4 * 16 + 16 + 4;
}

View file

@ -27,6 +27,7 @@ import net.minecraft.client.renderer.GameRenderer;
import net.minecraft.client.renderer.LevelRenderer;
import net.minecraft.client.renderer.LightTexture;
import net.minecraft.client.renderer.RenderBuffers;
import net.minecraft.world.phys.Vec3;
import net.minecraftforge.common.MinecraftForge;
@Mixin(value = LevelRenderer.class, priority = 1001) // Higher priority to go after sodium
@ -44,7 +45,8 @@ public class LevelRendererMixin {
@Inject(at = @At("HEAD"), method = "renderLevel")
private void beginRender(PoseStack pPoseStack, float pPartialTick, long pFinishNanoTime, boolean pRenderBlockOutline, Camera pCamera, GameRenderer pGameRenderer, LightTexture pLightTexture, Matrix4f pProjectionMatrix, CallbackInfo ci) {
var viewProjection = RenderContext.createViewProjection(pPoseStack, pProjectionMatrix);
var culler = RenderContext.createCuller(pCamera, viewProjection);
var cameraPos = pCamera.getPosition();
var culler = RenderContext.createCuller(viewProjection, (float) -cameraPos.x, (float) -cameraPos.y, (float) -cameraPos.z);
renderContext = new RenderContext((LevelRenderer) (Object) this, level, pPoseStack, viewProjection, pProjectionMatrix, renderBuffers, pCamera, culler);
try (var restoreState = GlStateTracker.getRestoreState()) {

View file

@ -1004,11 +1004,9 @@ public class FrustumIntersection {
* {@code vec2(nzZ, pzZ)}<br>
* {@code vec2(nzW, pzW)}<br>
*
* @param buffer The buffer to write the planes to.
* @param addr The buffer to write the planes to.
*/
public void getJozuPackedPlanes(ByteBuffer buffer) {
long addr = MemoryUtil.memAddress(buffer);
public void getJozuPackedPlanes(long addr) {
MemoryUtil.memPutFloat(addr, nxX);
MemoryUtil.memPutFloat(addr + 4, pxX);
MemoryUtil.memPutFloat(addr + 8, nyX);

View file

@ -3,7 +3,10 @@ layout(local_size_x = FLW_SUBGROUP_SIZE) in;
#use "flywheel:compute/objects.glsl"
#use "flywheel:util/quaternion.glsl"
layout(std140, binding = 3) uniform FrameData {
uint flw_objectID;
uint flw_batchID;
layout(std140, binding = 0) uniform FrameData {
vec4 a1; // vec4(nx.x, px.x, ny.x, py.x)
vec4 a2; // vec4(nx.y, px.y, ny.y, py.y)
vec4 a3; // vec4(nx.z, px.z, ny.z, py.z)
@ -31,37 +34,55 @@ layout(std430, binding = 3) buffer DrawCommands {
MeshDrawCommand drawCommands[];
};
layout(std430, binding = 4) writeonly buffer DebugVisibility {
uint objectVisibilityBits[];
};
// 83 - 27 = 56 spirv instruction results
bool testSphere(vec3 center, float radius) {
return
all(lessThanEqual(fma(frustum.a1, center.xxxx, fma(frustum.a2, center.yyyy, fma(frustum.a3, center.zzzz, frustum.a4))), -radius.xxxx)) &&
all(lessThanEqual(fma(frustum.b1, center.xx, fma(frustum.b2, center.yy, fma(frustum.b3, center.zz, frustum.b4))), -radius.xx));
bvec4 resultA = greaterThanEqual(fma(frustum.a1, center.xxxx, fma(frustum.a2, center.yyyy, fma(frustum.a3, center.zzzz, frustum.a4))), -radius.xxxx);
bvec2 resultB = greaterThanEqual(fma(frustum.b1, center.xx, fma(frustum.b2, center.yy, fma(frustum.b3, center.zz, frustum.b4))), -radius.xx);
uint debug = uint(resultA.x);
debug |= uint(resultA.y) << 1;
debug |= uint(resultA.z) << 2;
debug |= uint(resultA.w) << 3;
debug |= uint(resultB.x) << 4;
debug |= uint(resultB.y) << 5;
objectVisibilityBits[flw_objectID] = debug;
return all(resultA) && all(resultB);
}
bool isVisible(uint objectID, uint batchID) {
vec4 sphere = boundingSpheres[batchID];
void flw_transformBoundingSphere(in Instance i, inout vec3 center, inout float radius) {
center = rotateVertexByQuat(center - i.pivot, i.rotation) + i.pivot + i.pos;
radius = radius;
}
vec3 pivot = objects[objectID].pivot;
vec3 center = rotateVertexByQuat(sphere.xyz - pivot, objects[objectID].rotation) + pivot + objects[objectID].pos;
bool isVisible() {
vec4 sphere = boundingSpheres[flw_batchID];
vec3 center = sphere.xyz;
float radius = sphere.r;
flw_transformBoundingSphere(objects[flw_objectID], center, radius);
return true; //testSphere(center, radius);
return testSphere(center, radius);
}
void main() {
uint objectID = gl_GlobalInvocationID.x;
flw_objectID = gl_GlobalInvocationID.x;
if (objectID >= objects.length()) {
if (flw_objectID >= objects.length()) {
return;
}
uint batchID = objects[objectID].batchID;
bool visible = isVisible(objectID, batchID);
flw_batchID = objects[objectID].batchID;
if (visible) {
uint batchIndex = atomicAdd(drawCommands[batchID].instanceCount, 1);
uint globalIndex = drawCommands[batchID].baseInstance + batchIndex;
if (isVisible()) {
uint batchIndex = atomicAdd(drawCommands[flw_batchID].instanceCount, 1);
uint globalIndex = drawCommands[flw_batchID].baseInstance + batchIndex;
objectIDs[globalIndex] = objectID;
objectIDs[globalIndex] = flw_objectID;
}
}