- Actually compile and run visibility read shader
- Clear the visbuffer and readbuffer each frame
- Track culling group page counts between frames
- Fix texture binding issues between visbuffer and depth pyramid
- Add early and late cull shaders
- Compile early and late shaders separately
- Move util shader list to a static field
This commit is contained in:
Jozufozu 2024-09-08 09:57:11 -05:00
parent 77d64aa5a2
commit 9009bfe730
9 changed files with 360 additions and 23 deletions

View file

@ -27,10 +27,13 @@ import net.minecraft.resources.ResourceLocation;
public class IndirectPrograms extends AtomicReferenceCounted { public class IndirectPrograms extends AtomicReferenceCounted {
private static final ResourceLocation CULL_SHADER_API_IMPL = Flywheel.rl("internal/indirect/cull_api_impl.glsl"); private static final ResourceLocation CULL_SHADER_API_IMPL = Flywheel.rl("internal/indirect/cull_api_impl.glsl");
private static final ResourceLocation CULL_SHADER_MAIN = Flywheel.rl("internal/indirect/cull.glsl"); private static final ResourceLocation CULL_SHADER_MAIN = Flywheel.rl("internal/indirect/early_cull.glsl");
private static final ResourceLocation PASS2_SHADER_MAIN = Flywheel.rl("internal/indirect/late_cull.glsl");
private static final ResourceLocation APPLY_SHADER_MAIN = Flywheel.rl("internal/indirect/apply.glsl"); private static final ResourceLocation APPLY_SHADER_MAIN = Flywheel.rl("internal/indirect/apply.glsl");
private static final ResourceLocation SCATTER_SHADER_MAIN = Flywheel.rl("internal/indirect/scatter.glsl"); private static final ResourceLocation SCATTER_SHADER_MAIN = Flywheel.rl("internal/indirect/scatter.glsl");
private static final ResourceLocation DEPTH_REDUCE_SHADER_MAIN = Flywheel.rl("internal/indirect/depth_reduce.glsl"); private static final ResourceLocation DEPTH_REDUCE_SHADER_MAIN = Flywheel.rl("internal/indirect/depth_reduce.glsl");
private static final ResourceLocation READ_VISIBILITY_SHADER_MAIN = Flywheel.rl("internal/indirect/read_visibility.glsl");
public static final List<ResourceLocation> UTIL_SHADERS = List.of(APPLY_SHADER_MAIN, SCATTER_SHADER_MAIN, DEPTH_REDUCE_SHADER_MAIN, READ_VISIBILITY_SHADER_MAIN);
private static final Compile<InstanceType<?>> CULL = new Compile<>(); private static final Compile<InstanceType<?>> CULL = new Compile<>();
private static final Compile<ResourceLocation> UTIL = new Compile<>(); private static final Compile<ResourceLocation> UTIL = new Compile<>();
@ -43,16 +46,20 @@ public class IndirectPrograms extends AtomicReferenceCounted {
private final Map<PipelineProgramKey, GlProgram> pipeline; private final Map<PipelineProgramKey, GlProgram> pipeline;
private final Map<InstanceType<?>, GlProgram> culling; private final Map<InstanceType<?>, GlProgram> culling;
private final Map<InstanceType<?>, GlProgram> cullPassTwo;
private final GlProgram apply; private final GlProgram apply;
private final GlProgram scatter; private final GlProgram scatter;
private final GlProgram depthReduce; private final GlProgram depthReduce;
private final GlProgram readVisibility;
private IndirectPrograms(Map<PipelineProgramKey, GlProgram> pipeline, Map<InstanceType<?>, GlProgram> culling, GlProgram apply, GlProgram scatter, GlProgram depthReduce) { private IndirectPrograms(Map<PipelineProgramKey, GlProgram> pipeline, Map<InstanceType<?>, GlProgram> culling, Map<InstanceType<?>, GlProgram> cullPassTwo, GlProgram apply, GlProgram scatter, GlProgram depthReduce, GlProgram readVisibility) {
this.pipeline = pipeline; this.pipeline = pipeline;
this.culling = culling; this.culling = culling;
this.cullPassTwo = cullPassTwo;
this.apply = apply; this.apply = apply;
this.scatter = scatter; this.scatter = scatter;
this.depthReduce = depthReduce; this.depthReduce = depthReduce;
this.readVisibility = readVisibility;
} }
private static List<String> getExtensions(GlslVersion glslVersion) { private static List<String> getExtensions(GlslVersion glslVersion) {
@ -91,23 +98,27 @@ public class IndirectPrograms extends AtomicReferenceCounted {
IndirectPrograms newInstance = null; IndirectPrograms newInstance = null;
var pipelineCompiler = PipelineCompiler.create(sources, Pipelines.INDIRECT, vertexComponents, fragmentComponents, EXTENSIONS); var pipelineCompiler = PipelineCompiler.create(sources, Pipelines.INDIRECT, vertexComponents, fragmentComponents, EXTENSIONS);
var cullingCompiler = createCullingCompiler(sources); var pass1Compiler = createCullingCompiler(sources, CULL_SHADER_MAIN, "early_cull");
var pass2Compiler = createCullingCompiler(sources, PASS2_SHADER_MAIN, "late_cull");
var utilCompiler = createUtilCompiler(sources); var utilCompiler = createUtilCompiler(sources);
var cullingKeys = createCullingKeys();
try { try {
var pipelineResult = pipelineCompiler.compileAndReportErrors(pipelineKeys); var pipelineResult = pipelineCompiler.compileAndReportErrors(pipelineKeys);
var cullingResult = cullingCompiler.compileAndReportErrors(createCullingKeys()); var pass1Result = pass1Compiler.compileAndReportErrors(cullingKeys);
var utils = utilCompiler.compileAndReportErrors(List.of(APPLY_SHADER_MAIN, SCATTER_SHADER_MAIN, DEPTH_REDUCE_SHADER_MAIN)); var pass2Result = pass2Compiler.compileAndReportErrors(cullingKeys);
var utils = utilCompiler.compileAndReportErrors(UTIL_SHADERS);
if (pipelineResult != null && cullingResult != null && utils != null) { if (pipelineResult != null && pass1Result != null && pass2Result != null && utils != null) {
newInstance = new IndirectPrograms(pipelineResult, cullingResult, utils.get(APPLY_SHADER_MAIN), utils.get(SCATTER_SHADER_MAIN), utils.get(DEPTH_REDUCE_SHADER_MAIN)); newInstance = new IndirectPrograms(pipelineResult, pass1Result, pass2Result, utils.get(APPLY_SHADER_MAIN), utils.get(SCATTER_SHADER_MAIN), utils.get(DEPTH_REDUCE_SHADER_MAIN), utils.get(READ_VISIBILITY_SHADER_MAIN));
} }
} catch (Throwable t) { } catch (Throwable t) {
FlwPrograms.LOGGER.error("Failed to compile indirect programs", t); FlwPrograms.LOGGER.error("Failed to compile indirect programs", t);
} }
pipelineCompiler.delete(); pipelineCompiler.delete();
cullingCompiler.delete(); pass1Compiler.delete();
pass2Compiler.delete();
utilCompiler.delete(); utilCompiler.delete();
setInstance(newInstance); setInstance(newInstance);
@ -116,19 +127,19 @@ public class IndirectPrograms extends AtomicReferenceCounted {
/** /**
* A compiler for cull shaders, parameterized by the instance type. * A compiler for cull shaders, parameterized by the instance type.
*/ */
private static CompilationHarness<InstanceType<?>> createCullingCompiler(ShaderSources sources) { private static CompilationHarness<InstanceType<?>> createCullingCompiler(ShaderSources sources, ResourceLocation main, String name) {
return CULL.program() return CULL.program()
.link(CULL.shader(GlCompat.MAX_GLSL_VERSION, ShaderType.COMPUTE) .link(CULL.shader(GlCompat.MAX_GLSL_VERSION, ShaderType.COMPUTE)
.nameMapper(instanceType -> "culling/" + ResourceUtil.toDebugFileNameNoExtension(instanceType.cullShader())) .nameMapper(instanceType -> name + "/" + ResourceUtil.toDebugFileNameNoExtension(instanceType.cullShader()))
.requireExtensions(COMPUTE_EXTENSIONS) .requireExtensions(COMPUTE_EXTENSIONS)
.define("_FLW_SUBGROUP_SIZE", GlCompat.SUBGROUP_SIZE) .define("_FLW_SUBGROUP_SIZE", GlCompat.SUBGROUP_SIZE)
.withResource(CULL_SHADER_API_IMPL) .withResource(CULL_SHADER_API_IMPL)
.withComponent(InstanceStructComponent::new) .withComponent(InstanceStructComponent::new)
.withResource(InstanceType::cullShader) .withResource(InstanceType::cullShader)
.withComponent(SsboInstanceComponent::new) .withComponent(SsboInstanceComponent::new)
.withResource(CULL_SHADER_MAIN)) .withResource(main))
.postLink((key, program) -> Uniforms.setUniformBlockBindings(program)) .postLink((key, program) -> Uniforms.setUniformBlockBindings(program))
.harness("culling", sources); .harness(name, sources);
} }
/** /**
@ -179,6 +190,10 @@ public class IndirectPrograms extends AtomicReferenceCounted {
return culling.get(instanceType); return culling.get(instanceType);
} }
public GlProgram getCullPassTwoProgram(InstanceType<?> instanceType) {
return cullPassTwo.get(instanceType);
}
public GlProgram getApplyProgram() { public GlProgram getApplyProgram() {
return apply; return apply;
} }
@ -191,6 +206,10 @@ public class IndirectPrograms extends AtomicReferenceCounted {
return depthReduce; return depthReduce;
} }
public GlProgram getReadVisibilityProgram() {
return readVisibility;
}
@Override @Override
protected void _delete() { protected void _delete() {
pipeline.values() pipeline.values()

View file

@ -5,6 +5,7 @@ import org.lwjgl.opengl.GL46;
import com.mojang.blaze3d.platform.GlStateManager; import com.mojang.blaze3d.platform.GlStateManager;
import dev.engine_room.flywheel.backend.gl.GlTextureUnit;
import dev.engine_room.flywheel.backend.gl.shader.GlProgram; import dev.engine_room.flywheel.backend.gl.shader.GlProgram;
import dev.engine_room.flywheel.lib.math.MoreMath; import dev.engine_room.flywheel.lib.math.MoreMath;
import net.minecraft.client.Minecraft; import net.minecraft.client.Minecraft;
@ -44,6 +45,7 @@ public class DepthPyramid {
int depthBufferId = mainRenderTarget.getDepthTextureId(); int depthBufferId = mainRenderTarget.getDepthTextureId();
GlTextureUnit.T1.makeActive();
GlStateManager._bindTexture(depthBufferId); GlStateManager._bindTexture(depthBufferId);
GL46.glMemoryBarrier(GL46.GL_FRAMEBUFFER_BARRIER_BIT); GL46.glMemoryBarrier(GL46.GL_FRAMEBUFFER_BARRIER_BIT);
@ -57,7 +59,7 @@ public class DepthPyramid {
int mipHeight = mipSize(height, i); int mipHeight = mipSize(height, i);
int srcTexture = (i == 0) ? depthBufferId : pyramidTextureId; int srcTexture = (i == 0) ? depthBufferId : pyramidTextureId;
GL46.glBindTexture(GL32.GL_TEXTURE_2D, srcTexture); GlStateManager._bindTexture(srcTexture);
GL46.glBindImageTexture(0, pyramidTextureId, i, false, 0, GL32.GL_WRITE_ONLY, GL32.GL_R32F); GL46.glBindImageTexture(0, pyramidTextureId, i, false, 0, GL32.GL_WRITE_ONLY, GL32.GL_R32F);

View file

@ -48,7 +48,13 @@ public class IndirectCullingGroup<I extends Instance> {
private boolean needsDrawBarrier; private boolean needsDrawBarrier;
private boolean needsDrawSort; private boolean needsDrawSort;
private int instanceCountThisFrame; public int instanceCountThisFrame;
private int pagesLastFrame = 0;
private int pagesThisFrame = 0;
private int visibilityWriteOffsetPages = 0;
private int visibilityReadOffsetPages = 0;
IndirectCullingGroup(InstanceType<I> instanceType, IndirectPrograms programs) { IndirectCullingGroup(InstanceType<I> instanceType, IndirectPrograms programs) {
this.instanceType = instanceType; this.instanceType = instanceType;
@ -85,6 +91,17 @@ public class IndirectCullingGroup<I extends Instance> {
} }
} }
public int flipVisibilityOffsets(int visibilityWriteOffsetPages) {
this.visibilityReadOffsetPages = this.visibilityWriteOffsetPages;
this.visibilityWriteOffsetPages = visibilityWriteOffsetPages;
pagesLastFrame = pagesThisFrame;
pagesThisFrame = buffers.objectStorage.capacity();
return pagesThisFrame;
}
public void upload(StagingBuffer stagingBuffer) { public void upload(StagingBuffer stagingBuffer) {
if (nothingToDo()) { if (nothingToDo()) {
return; return;
@ -118,6 +135,8 @@ public class IndirectCullingGroup<I extends Instance> {
Uniforms.bindAll(); Uniforms.bindAll();
cullProgram.bind(); cullProgram.bind();
cullProgram.setUInt("_flw_visibilityReadOffsetPages", visibilityReadOffsetPages);
buffers.bindForCompute(); buffers.bindForCompute();
glDispatchCompute(buffers.objectStorage.capacity(), 1, 1); glDispatchCompute(buffers.objectStorage.capacity(), 1, 1);
} }
@ -211,6 +230,8 @@ public class IndirectCullingGroup<I extends Instance> {
// Don't need to do this unless the program changes. // Don't need to do this unless the program changes.
drawProgram.bind(); drawProgram.bind();
baseDrawUniformLoc = drawProgram.getUniformLocation("_flw_baseDraw"); baseDrawUniformLoc = drawProgram.getUniformLocation("_flw_baseDraw");
drawProgram.setUInt("_flw_visibilityWriteOffsetInstances", visibilityWriteOffsetPages << ObjectStorage.LOG_2_PAGE_SIZE);
} }
glUniform1ui(baseDrawUniformLoc, multiDraw.start); glUniform1ui(baseDrawUniformLoc, multiDraw.start);

View file

@ -12,7 +12,7 @@ import java.util.HashMap;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import org.lwjgl.opengl.GL46; import com.mojang.blaze3d.platform.GlStateManager;
import dev.engine_room.flywheel.api.backend.Engine; import dev.engine_room.flywheel.api.backend.Engine;
import dev.engine_room.flywheel.api.instance.Instance; import dev.engine_room.flywheel.api.instance.Instance;
@ -31,6 +31,7 @@ import dev.engine_room.flywheel.backend.engine.TextureBinder;
import dev.engine_room.flywheel.backend.engine.embed.EnvironmentStorage; import dev.engine_room.flywheel.backend.engine.embed.EnvironmentStorage;
import dev.engine_room.flywheel.backend.engine.uniform.Uniforms; import dev.engine_room.flywheel.backend.engine.uniform.Uniforms;
import dev.engine_room.flywheel.backend.gl.GlStateTracker; import dev.engine_room.flywheel.backend.gl.GlStateTracker;
import dev.engine_room.flywheel.backend.gl.GlTextureUnit;
import dev.engine_room.flywheel.backend.gl.array.GlVertexArray; import dev.engine_room.flywheel.backend.gl.array.GlVertexArray;
import dev.engine_room.flywheel.backend.gl.buffer.GlBuffer; import dev.engine_room.flywheel.backend.gl.buffer.GlBuffer;
import dev.engine_room.flywheel.backend.gl.buffer.GlBufferType; import dev.engine_room.flywheel.backend.gl.buffer.GlBufferType;
@ -51,6 +52,8 @@ public class IndirectDrawManager extends DrawManager<IndirectInstancer<?>> {
private final DepthPyramid depthPyramid; private final DepthPyramid depthPyramid;
private final VisibilityBuffer visibilityBuffer; private final VisibilityBuffer visibilityBuffer;
private int totalPagesLastFrame = 0;
private boolean needsBarrier = false; private boolean needsBarrier = false;
public IndirectDrawManager(IndirectPrograms programs) { public IndirectDrawManager(IndirectPrograms programs) {
@ -65,7 +68,7 @@ public class IndirectDrawManager extends DrawManager<IndirectInstancer<?>> {
matrixBuffer = new MatrixBuffer(); matrixBuffer = new MatrixBuffer();
depthPyramid = new DepthPyramid(programs.getDepthReduceProgram()); depthPyramid = new DepthPyramid(programs.getDepthReduceProgram());
visibilityBuffer = new VisibilityBuffer(); visibilityBuffer = new VisibilityBuffer(programs.getReadVisibilityProgram());
} }
@Override @Override
@ -128,12 +131,20 @@ public class IndirectDrawManager extends DrawManager<IndirectInstancer<?>> {
group.flushInstancers(); group.flushInstancers();
} }
visibilityBuffer.read(totalPagesLastFrame);
visibilityBuffer.clear();
cullingGroups.values() cullingGroups.values()
.removeIf(IndirectCullingGroup::checkEmptyAndDelete); .removeIf(IndirectCullingGroup::checkEmptyAndDelete);
instancers.values() instancers.values()
.removeIf(instancer -> instancer.instanceCount() == 0); .removeIf(instancer -> instancer.instanceCount() == 0);
int totalPagesThisFrame = 0;
for (var group : cullingGroups.values()) {
totalPagesThisFrame += group.flipVisibilityOffsets(totalPagesThisFrame);
}
meshPool.flush(); meshPool.flush();
stagingBuffer.reclaim(); stagingBuffer.reclaim();
@ -157,8 +168,8 @@ public class IndirectDrawManager extends DrawManager<IndirectInstancer<?>> {
matrixBuffer.bind(); matrixBuffer.bind();
GL46.glActiveTexture(GL46.GL_TEXTURE0); GlTextureUnit.T0.makeActive();
GL46.glBindTexture(GL46.GL_TEXTURE_2D, depthPyramid.pyramidTextureId); GlStateManager._bindTexture(depthPyramid.pyramidTextureId);
for (var group : cullingGroups.values()) { for (var group : cullingGroups.values()) {
group.dispatchCull(); group.dispatchCull();
@ -174,6 +185,8 @@ public class IndirectDrawManager extends DrawManager<IndirectInstancer<?>> {
} }
needsBarrier = true; needsBarrier = true;
totalPagesLastFrame = totalPagesThisFrame;
} }
@Override @Override

View file

@ -3,18 +3,24 @@ package dev.engine_room.flywheel.backend.engine.indirect;
import org.lwjgl.opengl.GL30; import org.lwjgl.opengl.GL30;
import org.lwjgl.opengl.GL32; import org.lwjgl.opengl.GL32;
import org.lwjgl.opengl.GL46; import org.lwjgl.opengl.GL46;
import org.lwjgl.opengl.GL46C;
import com.mojang.blaze3d.platform.GlStateManager; import com.mojang.blaze3d.platform.GlStateManager;
import dev.engine_room.flywheel.backend.FlwBackend; import dev.engine_room.flywheel.backend.FlwBackend;
import dev.engine_room.flywheel.backend.gl.GlTextureUnit; import dev.engine_room.flywheel.backend.gl.GlTextureUnit;
import dev.engine_room.flywheel.backend.gl.shader.GlProgram;
import dev.engine_room.flywheel.lib.math.MoreMath;
import it.unimi.dsi.fastutil.ints.IntArraySet; import it.unimi.dsi.fastutil.ints.IntArraySet;
import it.unimi.dsi.fastutil.ints.IntSet; import it.unimi.dsi.fastutil.ints.IntSet;
import net.minecraft.client.Minecraft; import net.minecraft.client.Minecraft;
public class VisibilityBuffer { public class VisibilityBuffer {
private static final int READ_GROUP_SIZE = 16;
private static final int ATTACHMENT = GL30.GL_COLOR_ATTACHMENT1; private static final int ATTACHMENT = GL30.GL_COLOR_ATTACHMENT1;
private final GlProgram readVisibilityProgram;
private final ResizableStorageBuffer visibilityBitset;
private final int textureId; private final int textureId;
private int lastWidth = -1; private int lastWidth = -1;
@ -22,7 +28,9 @@ public class VisibilityBuffer {
private final IntSet attached = new IntArraySet(); private final IntSet attached = new IntArraySet();
public VisibilityBuffer() { public VisibilityBuffer(GlProgram readVisibilityProgram) {
this.readVisibilityProgram = readVisibilityProgram;
visibilityBitset = new ResizableStorageBuffer();
textureId = GL32.glGenTextures(); textureId = GL32.glGenTextures();
GlStateManager._bindTexture(textureId); GlStateManager._bindTexture(textureId);
@ -32,9 +40,29 @@ public class VisibilityBuffer {
GlStateManager._texParameter(GL32.GL_TEXTURE_2D, GL32.GL_TEXTURE_WRAP_T, GL32.GL_CLAMP_TO_EDGE); GlStateManager._texParameter(GL32.GL_TEXTURE_2D, GL32.GL_TEXTURE_WRAP_T, GL32.GL_CLAMP_TO_EDGE);
} }
public void attach() { public void read(int pageCount) {
// TODO: clear the vis buffer. maybe do this when we read it? if (pageCount == 0) {
return;
}
visibilityBitset.ensureCapacity((long) pageCount << 2);
GL46.nglClearNamedBufferData(visibilityBitset.handle(), GL46.GL_R32UI, GL46.GL_RED_INTEGER, GL46.GL_UNSIGNED_INT, 0);
if (lastWidth == -1 || lastHeight == -1) {
return;
}
readVisibilityProgram.bind();
GL46.glBindBufferBase(GL46.GL_SHADER_STORAGE_BUFFER, 0, visibilityBitset.handle());
GlTextureUnit.T0.makeActive();
GlStateManager._bindTexture(textureId);
GL46.glDispatchCompute(MoreMath.ceilingDiv(lastWidth, READ_GROUP_SIZE), MoreMath.ceilingDiv(lastHeight, READ_GROUP_SIZE), 1);
}
public void attach() {
var mainRenderTarget = Minecraft.getInstance() var mainRenderTarget = Minecraft.getInstance()
.getMainRenderTarget(); .getMainRenderTarget();
@ -66,6 +94,10 @@ public class VisibilityBuffer {
GL32.glDeleteTextures(textureId); GL32.glDeleteTextures(textureId);
} }
public void clear() {
GL46C.nglClearTexImage(textureId, 0, GL32.GL_RED_INTEGER, GL32.GL_UNSIGNED_INT, 0);
}
private void setupTexture(int width, int height) { private void setupTexture(int width, int height) {
if (lastWidth == width && lastHeight == height) { if (lastWidth == width && lastHeight == height) {
return; return;

View file

@ -6,3 +6,6 @@
#define _FLW_LIGHT_LUT_BUFFER_BINDING 5 #define _FLW_LIGHT_LUT_BUFFER_BINDING 5
#define _FLW_LIGHT_SECTIONS_BUFFER_BINDING 6 #define _FLW_LIGHT_SECTIONS_BUFFER_BINDING 6
#define _FLW_MATRIX_BUFFER_BINDING 7 #define _FLW_MATRIX_BUFFER_BINDING 7
#define _FLW_PASS_TWO_BUFFER_BINDING 8
#define _FLW_LATE_CULL_BUFFER_BINDING 9
#define _FLW_LAST_FRAME_VISIBILITY_BUFFER_BINDING 10

View file

@ -0,0 +1,123 @@
#include "flywheel:internal/indirect/buffer_bindings.glsl"
#include "flywheel:internal/indirect/model_descriptor.glsl"
#include "flywheel:internal/uniforms/uniforms.glsl"
#include "flywheel:util/matrix.glsl"
#include "flywheel:internal/indirect/matrices.glsl"
layout(local_size_x = 32) in;
uniform uint _flw_visibilityReadOffsetPages;
layout(std430, binding = _FLW_TARGET_BUFFER_BINDING) restrict writeonly buffer DrawIndexBuffer {
uint _flw_drawIndices[];
};
layout(std430, binding = _FLW_PASS_TWO_BUFFER_BINDING) restrict writeonly buffer PassTwoIndexBuffer {
uint _flw_passTwoIndicies[];
};
// High 6 bits for the number of instances in the page.
const uint _FLW_PAGE_COUNT_OFFSET = 26u;
// Bottom 26 bits for the model index.
const uint _FLW_MODEL_INDEX_MASK = 0x3FFFFFF;
layout(std430, binding = _FLW_PAGE_FRAME_DESCRIPTOR_BUFFER_BINDING) restrict readonly buffer PageFrameDescriptorBuffer {
uint _flw_pageFrameDescriptors[];
};
layout(std430, binding = _FLW_LAST_FRAME_VISIBILITY_BUFFER_BINDING) restrict readonly buffer LastFrameVisibilityBuffer {
uint _flw_visibleFlag[];
};
layout(std430, binding = _FLW_MODEL_BUFFER_BINDING) restrict buffer ModelBuffer {
ModelDescriptor _flw_models[];
};
layout(std430, binding = _FLW_MATRIX_BUFFER_BINDING) restrict readonly buffer MatrixBuffer {
Matrices _flw_matrices[];
};
struct _FlwLateCullDispatch {
uint x;
uint y;
uint z;
uint threadCount;
};
layout(std430, binding = _FLW_LATE_CULL_BUFFER_BINDING) restrict buffer LateCullBuffer {
_FlwLateCullDispatch _flw_lateCullDispatch;
};
// Disgustingly vectorized sphere frustum intersection taking advantage of ahead of time packing.
// Only uses 6 fmas and some boolean ops.
// See also:
// flywheel:uniform/flywheel.glsl
// dev.engine_room.flywheel.lib.math.MatrixMath.writePackedFrustumPlanes
// org.joml.FrustumIntersection.testSphere
bool _flw_testSphere(vec3 center, float radius) {
bvec4 xyInside = greaterThanEqual(fma(flw_frustumPlanes.xyX, center.xxxx, fma(flw_frustumPlanes.xyY, center.yyyy, fma(flw_frustumPlanes.xyZ, center.zzzz, flw_frustumPlanes.xyW))), -radius.xxxx);
bvec2 zInside = greaterThanEqual(fma(flw_frustumPlanes.zX, center.xx, fma(flw_frustumPlanes.zY, center.yy, fma(flw_frustumPlanes.zZ, center.zz, flw_frustumPlanes.zW))), -radius.xx);
return all(xyInside) && all(zInside);
}
bool _flw_isVisible(uint instanceIndex, uint modelIndex) {
uint matrixIndex = _flw_models[modelIndex].matrixIndex;
BoundingSphere sphere = _flw_models[modelIndex].boundingSphere;
vec3 center;
float radius;
_flw_unpackBoundingSphere(sphere, center, radius);
FlwInstance instance = _flw_unpackInstance(instanceIndex);
flw_transformBoundingSphere(instance, center, radius);
if (matrixIndex > 0) {
transformBoundingSphere(_flw_matrices[matrixIndex].pose, center, radius);
}
return _flw_testSphere(center, radius);
}
void main() {
uint pageIndex = gl_WorkGroupID.x;
if (pageIndex >= _flw_pageFrameDescriptors.length()) {
return;
}
uint packedModelIndexAndCount = _flw_pageFrameDescriptors[pageIndex];
uint pageInstanceCount = packedModelIndexAndCount >> _FLW_PAGE_COUNT_OFFSET;
if (gl_LocalInvocationID.x >= pageInstanceCount) {
return;
}
uint instanceIndex = gl_GlobalInvocationID.x;
uint modelIndex = packedModelIndexAndCount & _FLW_MODEL_INDEX_MASK;
if (!_flw_isVisible(instanceIndex, modelIndex)) {
return;
}
uint pageVisibility = _flw_visibleFlag[_flw_visibilityReadOffsetPages + pageIndex];
if ((pageVisibility & (1u << gl_LocalInvocationID.x)) != 0u) {
// This instance was visibile last frame, it should be rendered early.
uint localIndex = atomicAdd(_flw_models[modelIndex].instanceCount, 1);
uint targetIndex = _flw_models[modelIndex].baseInstance + localIndex;
_flw_drawIndices[targetIndex] = instanceIndex;
} else {
// Try again later to see if it's been disoccluded.
uint targetIndex = atomicAdd(_flw_lateCullDispatch.threadCount, 1);
_flw_passTwoIndices[targetIndex] = instanceIndex;
if (targetIndex % 32u == 0u) {
// This thread wrote an index that will be at the start of a new workgroup later
atomicAdd(_flw_lateCullDispatch.x, 1);
}
}
}

View file

@ -0,0 +1,124 @@
#include "flywheel:internal/indirect/buffer_bindings.glsl"
#include "flywheel:internal/indirect/model_descriptor.glsl"
#include "flywheel:internal/uniforms/uniforms.glsl"
#include "flywheel:util/matrix.glsl"
#include "flywheel:internal/indirect/matrices.glsl"
layout(local_size_x = 32) in;
layout(std430, binding = _FLW_TARGET_BUFFER_BINDING) restrict writeonly buffer DrawIndexBuffer {
uint _flw_drawIndices[];
};
layout(std430, binding = _FLW_PASS_TWO_BUFFER_BINDING) restrict readonly buffer PassTwoIndexBuffer {
uint _flw_passTwoIndicies[];
};
layout(std430, binding = _FLW_PAGE_FRAME_DESCRIPTOR_BUFFER_BINDING) restrict readonly buffer PageFrameDescriptorBuffer {
uint _flw_pageFrameDescriptors[];
};
layout(std430, binding = _FLW_MODEL_BUFFER_BINDING) restrict buffer ModelBuffer {
ModelDescriptor _flw_models[];
};
layout(std430, binding = _FLW_MATRIX_BUFFER_BINDING) restrict readonly buffer MatrixBuffer {
Matrices _flw_matrices[];
};
layout(binding = 0) uniform sampler2D _flw_depthPyramid;
bool projectSphere(vec3 c, float r, float znear, float P00, float P11, out vec4 aabb) {
// Closest point on the sphere is between the camera and the near plane, don't even attempt to cull.
if (c.z + r > -znear) {
return false;
}
vec3 cr = c * r;
float czr2 = c.z * c.z - r * r;
float vx = sqrt(c.x * c.x + czr2);
float minx = (vx * c.x - cr.z) / (vx * c.z + cr.x);
float maxx = (vx * c.x + cr.z) / (vx * c.z - cr.x);
float vy = sqrt(c.y * c.y + czr2);
float miny = (vy * c.y - cr.z) / (vy * c.z + cr.y);
float maxy = (vy * c.y + cr.z) / (vy * c.z - cr.y);
aabb = vec4(minx * P00, miny * P11, maxx * P00, maxy * P11);
aabb = aabb.xwzy * vec4(-0.5f, -0.5f, -0.5f, -0.5f) + vec4(0.5f); // clip space -> uv space
return true;
}
bool _flw_isVisible(uint instanceIndex, uint modelIndex) {
uint matrixIndex = _flw_models[modelIndex].matrixIndex;
BoundingSphere sphere = _flw_models[modelIndex].boundingSphere;
vec3 center;
float radius;
_flw_unpackBoundingSphere(sphere, center, radius);
FlwInstance instance = _flw_unpackInstance(instanceIndex);
flw_transformBoundingSphere(instance, center, radius);
if (matrixIndex > 0) {
transformBoundingSphere(_flw_matrices[matrixIndex].pose, center, radius);
}
transformBoundingSphere(flw_view, center, radius);
vec4 aabb;
if (projectSphere(center, radius, _flw_cullData.znear, _flw_cullData.P00, _flw_cullData.P11, aabb))
{
float width = (aabb.z - aabb.x) * _flw_cullData.pyramidWidth;
float height = (aabb.w - aabb.y) * _flw_cullData.pyramidHeight;
int level = clamp(int(ceil(log2(max(width, height)))), 0, _flw_cullData.pyramidLevels);
ivec2 levelSize = textureSize(_flw_depthPyramid, level);
ivec4 levelSizePair = ivec4(levelSize, levelSize);
ivec4 bounds = ivec4(aabb * vec4(levelSizePair));
float depth01 = texelFetch(_flw_depthPyramid, bounds.xw, level).r;
float depth11 = texelFetch(_flw_depthPyramid, bounds.zw, level).r;
float depth10 = texelFetch(_flw_depthPyramid, bounds.zy, level).r;
float depth00 = texelFetch(_flw_depthPyramid, bounds.xy, level).r;
float depth;
if (_flw_cullData.useMin == 0) {
depth = max(max(depth00, depth01), max(depth10, depth11));
} else {
depth = min(min(depth00, depth01), min(depth10, depth11));
}
float depthSphere = 1. + _flw_cullData.znear / (center.z + radius);
return depthSphere <= depth;
}
return true;
}
void main() {
if (gl_GlobalInvocationID.x >= _flw_passTwoIndicies.length()) {
return;
}
uint instanceIndex = _flw_passTwoIndices[gl_GlobalInvocationID.x];
uint pageIndex = instanceIndex >> 5;
uint packedModelIndexAndCount = _flw_pageFrameDescriptors[pageIndex];
uint modelIndex = packedModelIndexAndCount & _FLW_MODEL_INDEX_MASK;
if (_flw_isVisible(instanceIndex, modelIndex)) {
uint localIndex = atomicAdd(_flw_models[modelIndex].instanceCount, 1);
uint targetIndex = _flw_models[modelIndex].baseInstance + localIndex;
_flw_instanceIndices[targetIndex] = instanceIndex;
}
}

View file

@ -23,7 +23,7 @@ uniform uint _flw_baseDraw;
// We read the visibility buffer for all culling groups into a single shared buffer. // We read the visibility buffer for all culling groups into a single shared buffer.
// This offset is used to know where each culling group starts. // This offset is used to know where each culling group starts.
uniform uint _flw_globalInstanceIdOffset = 0; uniform uint _flw_visibilityWriteOffsetInstances = 0;
flat out uvec3 _flw_packedMaterial; flat out uvec3 _flw_packedMaterial;
@ -58,5 +58,5 @@ void main() {
_flw_main(instance); _flw_main(instance);
// Add 1 because a 0 instance id means null. // Add 1 because a 0 instance id means null.
_flw_instanceID = _flw_globalInstanceIdOffset + instanceIndex + 1; _flw_instanceID = _flw_visibilityWriteOffsetInstances + instanceIndex + 1;
} }