Joining the occult

- Implement hi-z occlusion culling
- Generate depth pyramid just before issuing cull dispatches
- Currently use raw texel fetches but this may be causing loss
- Add _flw_cullData to frame uniforms
This commit is contained in:
Jozufozu 2024-09-04 11:05:04 -05:00
parent 81cb2340e7
commit 01a7936a05
8 changed files with 251 additions and 6 deletions

View File

@ -30,6 +30,7 @@ public class IndirectPrograms extends AtomicReferenceCounted {
private static final ResourceLocation CULL_SHADER_MAIN = Flywheel.rl("internal/indirect/cull.glsl");
private static final ResourceLocation APPLY_SHADER_MAIN = Flywheel.rl("internal/indirect/apply.glsl");
private static final ResourceLocation SCATTER_SHADER_MAIN = Flywheel.rl("internal/indirect/scatter.glsl");
private static final ResourceLocation DEPTH_REDUCE_SHADER_MAIN = Flywheel.rl("internal/indirect/depth_reduce.glsl");
private static final Compile<InstanceType<?>> CULL = new Compile<>();
private static final Compile<ResourceLocation> UTIL = new Compile<>();
@ -44,12 +45,14 @@ public class IndirectPrograms extends AtomicReferenceCounted {
private final Map<InstanceType<?>, GlProgram> culling;
private final GlProgram apply;
private final GlProgram scatter;
private final GlProgram depthReduce;
private IndirectPrograms(Map<PipelineProgramKey, GlProgram> pipeline, Map<InstanceType<?>, GlProgram> culling, GlProgram apply, GlProgram scatter) {
private IndirectPrograms(Map<PipelineProgramKey, GlProgram> pipeline, Map<InstanceType<?>, GlProgram> culling, GlProgram apply, GlProgram scatter, GlProgram depthReduce) {
this.pipeline = pipeline;
this.culling = culling;
this.apply = apply;
this.scatter = scatter;
this.depthReduce = depthReduce;
}
private static List<String> getExtensions(GlslVersion glslVersion) {
@ -94,10 +97,10 @@ public class IndirectPrograms extends AtomicReferenceCounted {
try {
var pipelineResult = pipelineCompiler.compileAndReportErrors(pipelineKeys);
var cullingResult = cullingCompiler.compileAndReportErrors(createCullingKeys());
var utils = utilCompiler.compileAndReportErrors(List.of(APPLY_SHADER_MAIN, SCATTER_SHADER_MAIN));
var utils = utilCompiler.compileAndReportErrors(List.of(APPLY_SHADER_MAIN, SCATTER_SHADER_MAIN, DEPTH_REDUCE_SHADER_MAIN));
if (pipelineResult != null && cullingResult != null && utils != null) {
newInstance = new IndirectPrograms(pipelineResult, cullingResult, utils.get(APPLY_SHADER_MAIN), utils.get(SCATTER_SHADER_MAIN));
newInstance = new IndirectPrograms(pipelineResult, cullingResult, utils.get(APPLY_SHADER_MAIN), utils.get(SCATTER_SHADER_MAIN), utils.get(DEPTH_REDUCE_SHADER_MAIN));
}
} catch (Throwable t) {
FlwPrograms.LOGGER.error("Failed to compile indirect programs", t);
@ -184,6 +187,10 @@ public class IndirectPrograms extends AtomicReferenceCounted {
return scatter;
}
public GlProgram getDepthReduceProgram() {
return depthReduce;
}
@Override
protected void _delete() {
pipeline.values()

View File

@ -0,0 +1,106 @@
package dev.engine_room.flywheel.backend.engine.indirect;
import org.lwjgl.opengl.GL32;
import org.lwjgl.opengl.GL46;
import com.mojang.blaze3d.platform.GlStateManager;
import dev.engine_room.flywheel.backend.gl.shader.GlProgram;
import dev.engine_room.flywheel.lib.math.MoreMath;
import net.minecraft.client.Minecraft;
public class DepthPyramid {
private final GlProgram depthReduceProgram;
public final int pyramidTextureId;
private int lastWidth = -1;
private int lastHeight = -1;
public DepthPyramid(GlProgram depthReduceProgram) {
this.depthReduceProgram = depthReduceProgram;
pyramidTextureId = GL32.glGenTextures();
GlStateManager._bindTexture(pyramidTextureId);
GlStateManager._texParameter(GL32.GL_TEXTURE_2D, GL32.GL_TEXTURE_MIN_FILTER, GL32.GL_NEAREST);
GlStateManager._texParameter(GL32.GL_TEXTURE_2D, GL32.GL_TEXTURE_MAG_FILTER, GL32.GL_NEAREST);
GlStateManager._texParameter(GL32.GL_TEXTURE_2D, GL32.GL_TEXTURE_COMPARE_MODE, GL32.GL_NONE);
GlStateManager._texParameter(GL32.GL_TEXTURE_2D, GL32.GL_TEXTURE_WRAP_S, GL32.GL_CLAMP_TO_EDGE);
GlStateManager._texParameter(GL32.GL_TEXTURE_2D, GL32.GL_TEXTURE_WRAP_T, GL32.GL_CLAMP_TO_EDGE);
}
public void generate() {
var mainRenderTarget = Minecraft.getInstance()
.getMainRenderTarget();
int width = mainRenderTarget.width;
int height = mainRenderTarget.height;
int mipLevels = getImageMipLevels(width, height);
createPyramidMips(mipLevels, width, height);
int depthBufferId = mainRenderTarget.getDepthTextureId();
GlStateManager._bindTexture(depthBufferId);
GL46.glMemoryBarrier(GL46.GL_FRAMEBUFFER_BARRIER_BIT);
GL46.glActiveTexture(GL32.GL_TEXTURE1);
depthReduceProgram.bind();
for (int i = 0; i < mipLevels; i++) {
int mipWidth = Math.max(1, width >> i);
int mipHeight = Math.max(1, height >> i);
int srcTexture = (i == 0) ? depthBufferId : pyramidTextureId;
GL46.glBindTexture(GL32.GL_TEXTURE_2D, srcTexture);
GL46.glBindImageTexture(0, pyramidTextureId, i, false, 0, GL32.GL_WRITE_ONLY, GL32.GL_R32F);
depthReduceProgram.setUVec2("imageSize", mipWidth, mipHeight);
depthReduceProgram.setInt("lod", Math.max(0, i - 1));
GL46.glDispatchCompute(MoreMath.ceilingDiv(mipWidth, 8), MoreMath.ceilingDiv(mipHeight, 8), 1);
GL46.glMemoryBarrier(GL46.GL_TEXTURE_FETCH_BARRIER_BIT);
}
}
public void delete() {
GL32.glDeleteTextures(pyramidTextureId);
}
private void createPyramidMips(int mipLevels, int width, int height) {
if (lastWidth == width && lastHeight == height) {
return;
}
lastWidth = width;
lastHeight = height;
GL32.glBindTexture(GL32.GL_TEXTURE_2D, pyramidTextureId);
for (int i = 0; i < mipLevels; i++) {
int mipWidth = Math.max(1, width >> (i + 1));
int mipHeight = Math.max(1, height >> (i + 1));
GL32.glTexImage2D(GL32.GL_TEXTURE_2D, i, GL32.GL_R32F, mipWidth, mipHeight, 0, GL32.GL_RED, GL32.GL_FLOAT, 0);
}
}
private static int getImageMipLevels(int width, int height) {
int result = 1;
while (width > 2 && height > 2) {
result++;
width /= 2;
height /= 2;
}
return result;
}
}

View File

@ -12,6 +12,8 @@ import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.lwjgl.opengl.GL46;
import dev.engine_room.flywheel.api.backend.Engine;
import dev.engine_room.flywheel.api.instance.Instance;
import dev.engine_room.flywheel.api.instance.InstanceType;
@ -46,6 +48,8 @@ public class IndirectDrawManager extends DrawManager<IndirectInstancer<?>> {
private final LightBuffers lightBuffers;
private final MatrixBuffer matrixBuffer;
private final DepthPyramid depthPyramid;
private boolean needsBarrier = false;
public IndirectDrawManager(IndirectPrograms programs) {
@ -58,6 +62,8 @@ public class IndirectDrawManager extends DrawManager<IndirectInstancer<?>> {
meshPool.bind(vertexArray);
lightBuffers = new LightBuffers();
matrixBuffer = new MatrixBuffer();
depthPyramid = new DepthPyramid(programs.getDepthReduceProgram());
}
@Override
@ -136,6 +142,8 @@ public class IndirectDrawManager extends DrawManager<IndirectInstancer<?>> {
stagingBuffer.flush();
depthPyramid.generate();
// We could probably save some driver calls here when there are
// actually zero instances, but that feels like a very rare case
@ -143,6 +151,9 @@ public class IndirectDrawManager extends DrawManager<IndirectInstancer<?>> {
matrixBuffer.bind();
GL46.glActiveTexture(GL46.GL_TEXTURE0);
GL46.glBindTexture(GL46.GL_TEXTURE_2D, depthPyramid.pyramidTextureId);
for (var group : cullingGroups.values()) {
group.dispatchCull();
}

View File

@ -17,7 +17,7 @@ import net.minecraft.world.level.Level;
import net.minecraft.world.phys.Vec3;
public final class FrameUniforms extends UniformWriter {
private static final int SIZE = 96 + 64 * 9 + 16 * 5 + 8 * 2 + 8 + 4 * 10;
private static final int SIZE = 96 + 64 * 9 + 16 * 5 + 8 * 2 + 8 + 4 * 16;
static final UniformBuffer BUFFER = new UniformBuffer(Uniforms.FRAME_INDEX, SIZE);
private static final Matrix4f VIEW = new Matrix4f();
@ -112,6 +112,8 @@ public final class FrameUniforms extends UniformWriter {
ptr = writeInt(ptr, debugMode);
ptr = writeCullData(ptr);
firstWrite = false;
BUFFER.markDirty();
}
@ -179,6 +181,18 @@ public final class FrameUniforms extends UniformWriter {
return writeInFluidAndBlock(ptr, level, blockPos, cameraPos);
}
private static long writeCullData(long ptr) {
ptr = writeFloat(ptr, 0.05F); // zNear
ptr = writeFloat(ptr, Minecraft.getInstance().gameRenderer.getDepthFar()); // zFar
ptr = writeFloat(ptr, PROJECTION.m00()); // P00
ptr = writeFloat(ptr, PROJECTION.m11()); // P11
ptr = writeFloat(ptr, Minecraft.getInstance().getMainRenderTarget().width >> 1); // pyramidWidth
ptr = writeFloat(ptr, Minecraft.getInstance().getMainRenderTarget().height >> 1); // pyramidHeight
ptr = writeInt(ptr, 0); // useMin
return ptr;
}
/**
* Writes the frustum planes of the given projection matrix to the given buffer.<p>
* Uses a different format that is friendly towards an optimized instruction-parallel

View File

@ -11,6 +11,7 @@ import static org.lwjgl.opengl.GL20.glUniform4f;
import static org.lwjgl.opengl.GL20.glUniformMatrix3fv;
import static org.lwjgl.opengl.GL20.glUniformMatrix4fv;
import static org.lwjgl.opengl.GL30.glUniform1ui;
import static org.lwjgl.opengl.GL30.glUniform2ui;
import static org.lwjgl.opengl.GL31.GL_INVALID_INDEX;
import static org.lwjgl.opengl.GL31.glGetUniformBlockIndex;
import static org.lwjgl.opengl.GL31.glUniformBlockBinding;
@ -118,6 +119,16 @@ public class GlProgram extends GlObject {
glUniform1ui(uniform, value);
}
public void setUVec2(String name, int x, int y) {
int uniform = getUniformLocation(name);
if (uniform < 0) {
return;
}
glUniform2ui(uniform, x, y);
}
public void setInt(String glslName, int value) {
int uniform = getUniformLocation(glslName);

View File

@ -23,10 +23,12 @@ layout(std430, binding = _FLW_MODEL_BUFFER_BINDING) restrict buffer ModelBuffer
ModelDescriptor _flw_models[];
};
layout(std430, binding = _FLW_MATRIX_BUFFER_BINDING) restrict buffer MatrixBuffer {
layout(std430, binding = _FLW_MATRIX_BUFFER_BINDING) restrict readonly buffer MatrixBuffer {
Matrices _flw_matrices[];
};
layout(binding = 0) uniform sampler2D _flw_depthPyramid;
// Disgustingly vectorized sphere frustum intersection taking advantage of ahead of time packing.
// Only uses 6 fmas and some boolean ops.
// See also:
@ -40,6 +42,28 @@ bool _flw_testSphere(vec3 center, float radius) {
return all(xyInside) && all(zInside);
}
bool projectSphere(vec3 c, float r, float znear, float P00, float P11, out vec4 aabb) {
if (c.z > r + znear) {
return false;
}
vec3 cr = c * r;
float czr2 = c.z * c.z - r * r;
float vx = sqrt(c.x * c.x + czr2);
float minx = (vx * c.x - cr.z) / (vx * c.z + cr.x);
float maxx = (vx * c.x + cr.z) / (vx * c.z - cr.x);
float vy = sqrt(c.y * c.y + czr2);
float miny = (vy * c.y - cr.z) / (vy * c.z + cr.y);
float maxy = (vy * c.y + cr.z) / (vy * c.z - cr.y);
aabb = vec4(minx * P00, miny * P11, maxx * P00, maxy * P11);
aabb = aabb.xwzy * vec4(-0.5f, -0.5f, -0.5f, -0.5f) + vec4(0.5f); // clip space -> uv space
return true;
}
bool _flw_isVisible(uint instanceIndex, uint modelIndex) {
uint matrixIndex = _flw_models[modelIndex].matrixIndex;
BoundingSphere sphere = _flw_models[modelIndex].boundingSphere;
@ -56,7 +80,38 @@ bool _flw_isVisible(uint instanceIndex, uint modelIndex) {
transformBoundingSphere(_flw_matrices[matrixIndex].pose, center, radius);
}
return _flw_testSphere(center, radius);
bool isVisible = _flw_testSphere(center, radius);
if (isVisible) {
transformBoundingSphere(flw_view, center, radius);
vec4 aabb;
if (projectSphere(center, radius, _flw_cullData.znear, _flw_cullData.P00, _flw_cullData.P11, aabb))
{
float width = (aabb.z - aabb.x) * _flw_cullData.pyramidWidth;
float height = (aabb.w - aabb.y) * _flw_cullData.pyramidHeight;
float level = floor(log2(max(width, height)));
float depth01 = textureLod(_flw_depthPyramid, aabb.xw, level).r;
float depth11 = textureLod(_flw_depthPyramid, aabb.zw, level).r;
float depth10 = textureLod(_flw_depthPyramid, aabb.zy, level).r;
float depth00 = textureLod(_flw_depthPyramid, aabb.xy, level).r;
float depth;
if (_flw_cullData.useMin == 0) {
depth = max(max(depth00, depth01), max(depth10, depth11));
} else {
depth = min(min(depth00, depth01), min(depth10, depth11));
}
float depthSphere = 1. + _flw_cullData.znear / (center.z + radius);
isVisible = isVisible && depthSphere <= depth;
}
}
return isVisible;
}
void main() {

View File

@ -0,0 +1,29 @@
layout(local_size_x = 8, local_size_y = 8) in;
layout(binding = 0, r32f) uniform writeonly image2D outImage;
layout(binding = 1) uniform sampler2D inImage;
uniform uvec2 imageSize;
uniform int lod;
uniform int useMin = 0;
void main() {
uvec2 pos = gl_GlobalInvocationID.xy;
ivec2 samplePos = ivec2(pos) * 2;
float depth01 = texelFetchOffset(inImage, samplePos, lod, ivec2(0, 1)).r;
float depth11 = texelFetchOffset(inImage, samplePos, lod, ivec2(1, 1)).r;
float depth10 = texelFetchOffset(inImage, samplePos, lod, ivec2(1, 0)).r;
float depth00 = texelFetchOffset(inImage, samplePos, lod, ivec2(0, 0)).r;
float depth;
if (useMin == 0) {
depth = max(max(depth00, depth01), max(depth10, depth11));
} else {
depth = min(min(depth00, depth01), min(depth10, depth11));
}
imageStore(outImage, ivec2(pos), vec4(depth));
}

View File

@ -9,6 +9,16 @@ struct FrustumPlanes {
vec2 zW; // <nz.w, pz.w>
};
struct _FlwCullData {
float znear;
float zfar;
float P00;
float P11;
float pyramidWidth;
float pyramidHeight;
uint useMin;
};
layout(std140) uniform _FlwFrameUniforms {
FrustumPlanes flw_frustumPlanes;
@ -47,6 +57,8 @@ layout(std140) uniform _FlwFrameUniforms {
uint flw_cameraInBlock;
uint _flw_debugMode;
_FlwCullData _flw_cullData;
};
#define flw_renderOrigin (_flw_renderOrigin.xyz)