diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/compile/IndirectPrograms.java b/common/src/backend/java/dev/engine_room/flywheel/backend/compile/IndirectPrograms.java index df1696657..bb140cf56 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/compile/IndirectPrograms.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/compile/IndirectPrograms.java @@ -27,10 +27,13 @@ import net.minecraft.resources.ResourceLocation; public class IndirectPrograms extends AtomicReferenceCounted { private static final ResourceLocation CULL_SHADER_API_IMPL = Flywheel.rl("internal/indirect/cull_api_impl.glsl"); - private static final ResourceLocation CULL_SHADER_MAIN = Flywheel.rl("internal/indirect/cull.glsl"); + private static final ResourceLocation CULL_SHADER_MAIN = Flywheel.rl("internal/indirect/early_cull.glsl"); + private static final ResourceLocation PASS2_SHADER_MAIN = Flywheel.rl("internal/indirect/late_cull.glsl"); private static final ResourceLocation APPLY_SHADER_MAIN = Flywheel.rl("internal/indirect/apply.glsl"); private static final ResourceLocation SCATTER_SHADER_MAIN = Flywheel.rl("internal/indirect/scatter.glsl"); private static final ResourceLocation DEPTH_REDUCE_SHADER_MAIN = Flywheel.rl("internal/indirect/depth_reduce.glsl"); + private static final ResourceLocation READ_VISIBILITY_SHADER_MAIN = Flywheel.rl("internal/indirect/read_visibility.glsl"); + public static final List UTIL_SHADERS = List.of(APPLY_SHADER_MAIN, SCATTER_SHADER_MAIN, DEPTH_REDUCE_SHADER_MAIN, READ_VISIBILITY_SHADER_MAIN); private static final Compile> CULL = new Compile<>(); private static final Compile UTIL = new Compile<>(); @@ -43,16 +46,20 @@ public class IndirectPrograms extends AtomicReferenceCounted { private final Map pipeline; private final Map, GlProgram> culling; + private final Map, GlProgram> cullPassTwo; private final GlProgram apply; private final GlProgram scatter; private final GlProgram depthReduce; + private final GlProgram readVisibility; - private IndirectPrograms(Map pipeline, Map, GlProgram> culling, GlProgram apply, GlProgram scatter, GlProgram depthReduce) { + private IndirectPrograms(Map pipeline, Map, GlProgram> culling, Map, GlProgram> cullPassTwo, GlProgram apply, GlProgram scatter, GlProgram depthReduce, GlProgram readVisibility) { this.pipeline = pipeline; this.culling = culling; + this.cullPassTwo = cullPassTwo; this.apply = apply; this.scatter = scatter; this.depthReduce = depthReduce; + this.readVisibility = readVisibility; } private static List getExtensions(GlslVersion glslVersion) { @@ -91,23 +98,27 @@ public class IndirectPrograms extends AtomicReferenceCounted { IndirectPrograms newInstance = null; var pipelineCompiler = PipelineCompiler.create(sources, Pipelines.INDIRECT, vertexComponents, fragmentComponents, EXTENSIONS); - var cullingCompiler = createCullingCompiler(sources); + var pass1Compiler = createCullingCompiler(sources, CULL_SHADER_MAIN, "early_cull"); + var pass2Compiler = createCullingCompiler(sources, PASS2_SHADER_MAIN, "late_cull"); var utilCompiler = createUtilCompiler(sources); + var cullingKeys = createCullingKeys(); try { var pipelineResult = pipelineCompiler.compileAndReportErrors(pipelineKeys); - var cullingResult = cullingCompiler.compileAndReportErrors(createCullingKeys()); - var utils = utilCompiler.compileAndReportErrors(List.of(APPLY_SHADER_MAIN, SCATTER_SHADER_MAIN, DEPTH_REDUCE_SHADER_MAIN)); + var pass1Result = pass1Compiler.compileAndReportErrors(cullingKeys); + var pass2Result = pass2Compiler.compileAndReportErrors(cullingKeys); + var utils = utilCompiler.compileAndReportErrors(UTIL_SHADERS); - if (pipelineResult != null && cullingResult != null && utils != null) { - newInstance = new IndirectPrograms(pipelineResult, cullingResult, utils.get(APPLY_SHADER_MAIN), utils.get(SCATTER_SHADER_MAIN), utils.get(DEPTH_REDUCE_SHADER_MAIN)); + if (pipelineResult != null && pass1Result != null && pass2Result != null && utils != null) { + newInstance = new IndirectPrograms(pipelineResult, pass1Result, pass2Result, utils.get(APPLY_SHADER_MAIN), utils.get(SCATTER_SHADER_MAIN), utils.get(DEPTH_REDUCE_SHADER_MAIN), utils.get(READ_VISIBILITY_SHADER_MAIN)); } } catch (Throwable t) { FlwPrograms.LOGGER.error("Failed to compile indirect programs", t); } pipelineCompiler.delete(); - cullingCompiler.delete(); + pass1Compiler.delete(); + pass2Compiler.delete(); utilCompiler.delete(); setInstance(newInstance); @@ -116,19 +127,19 @@ public class IndirectPrograms extends AtomicReferenceCounted { /** * A compiler for cull shaders, parameterized by the instance type. */ - private static CompilationHarness> createCullingCompiler(ShaderSources sources) { + private static CompilationHarness> createCullingCompiler(ShaderSources sources, ResourceLocation main, String name) { return CULL.program() .link(CULL.shader(GlCompat.MAX_GLSL_VERSION, ShaderType.COMPUTE) - .nameMapper(instanceType -> "culling/" + ResourceUtil.toDebugFileNameNoExtension(instanceType.cullShader())) + .nameMapper(instanceType -> name + "/" + ResourceUtil.toDebugFileNameNoExtension(instanceType.cullShader())) .requireExtensions(COMPUTE_EXTENSIONS) .define("_FLW_SUBGROUP_SIZE", GlCompat.SUBGROUP_SIZE) .withResource(CULL_SHADER_API_IMPL) .withComponent(InstanceStructComponent::new) .withResource(InstanceType::cullShader) .withComponent(SsboInstanceComponent::new) - .withResource(CULL_SHADER_MAIN)) + .withResource(main)) .postLink((key, program) -> Uniforms.setUniformBlockBindings(program)) - .harness("culling", sources); + .harness(name, sources); } /** @@ -179,6 +190,10 @@ public class IndirectPrograms extends AtomicReferenceCounted { return culling.get(instanceType); } + public GlProgram getCullPassTwoProgram(InstanceType instanceType) { + return cullPassTwo.get(instanceType); + } + public GlProgram getApplyProgram() { return apply; } @@ -191,6 +206,10 @@ public class IndirectPrograms extends AtomicReferenceCounted { return depthReduce; } + public GlProgram getReadVisibilityProgram() { + return readVisibility; + } + @Override protected void _delete() { pipeline.values() diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/DepthPyramid.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/DepthPyramid.java index cb17f5276..49afe0776 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/DepthPyramid.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/DepthPyramid.java @@ -5,6 +5,7 @@ import org.lwjgl.opengl.GL46; import com.mojang.blaze3d.platform.GlStateManager; +import dev.engine_room.flywheel.backend.gl.GlTextureUnit; import dev.engine_room.flywheel.backend.gl.shader.GlProgram; import dev.engine_room.flywheel.lib.math.MoreMath; import net.minecraft.client.Minecraft; @@ -44,6 +45,7 @@ public class DepthPyramid { int depthBufferId = mainRenderTarget.getDepthTextureId(); + GlTextureUnit.T1.makeActive(); GlStateManager._bindTexture(depthBufferId); GL46.glMemoryBarrier(GL46.GL_FRAMEBUFFER_BARRIER_BIT); @@ -57,7 +59,7 @@ public class DepthPyramid { int mipHeight = mipSize(height, i); int srcTexture = (i == 0) ? depthBufferId : pyramidTextureId; - GL46.glBindTexture(GL32.GL_TEXTURE_2D, srcTexture); + GlStateManager._bindTexture(srcTexture); GL46.glBindImageTexture(0, pyramidTextureId, i, false, 0, GL32.GL_WRITE_ONLY, GL32.GL_R32F); diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectCullingGroup.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectCullingGroup.java index 961d5b2ce..45b764404 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectCullingGroup.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectCullingGroup.java @@ -48,7 +48,13 @@ public class IndirectCullingGroup { private boolean needsDrawBarrier; private boolean needsDrawSort; - private int instanceCountThisFrame; + public int instanceCountThisFrame; + + private int pagesLastFrame = 0; + private int pagesThisFrame = 0; + + private int visibilityWriteOffsetPages = 0; + private int visibilityReadOffsetPages = 0; IndirectCullingGroup(InstanceType instanceType, IndirectPrograms programs) { this.instanceType = instanceType; @@ -85,6 +91,17 @@ public class IndirectCullingGroup { } } + public int flipVisibilityOffsets(int visibilityWriteOffsetPages) { + this.visibilityReadOffsetPages = this.visibilityWriteOffsetPages; + this.visibilityWriteOffsetPages = visibilityWriteOffsetPages; + + pagesLastFrame = pagesThisFrame; + + pagesThisFrame = buffers.objectStorage.capacity(); + + return pagesThisFrame; + } + public void upload(StagingBuffer stagingBuffer) { if (nothingToDo()) { return; @@ -118,6 +135,8 @@ public class IndirectCullingGroup { Uniforms.bindAll(); cullProgram.bind(); + cullProgram.setUInt("_flw_visibilityReadOffsetPages", visibilityReadOffsetPages); + buffers.bindForCompute(); glDispatchCompute(buffers.objectStorage.capacity(), 1, 1); } @@ -211,6 +230,8 @@ public class IndirectCullingGroup { // Don't need to do this unless the program changes. drawProgram.bind(); baseDrawUniformLoc = drawProgram.getUniformLocation("_flw_baseDraw"); + + drawProgram.setUInt("_flw_visibilityWriteOffsetInstances", visibilityWriteOffsetPages << ObjectStorage.LOG_2_PAGE_SIZE); } glUniform1ui(baseDrawUniformLoc, multiDraw.start); diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectDrawManager.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectDrawManager.java index 7f71d72d0..b0083f241 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectDrawManager.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectDrawManager.java @@ -12,7 +12,7 @@ import java.util.HashMap; import java.util.List; import java.util.Map; -import org.lwjgl.opengl.GL46; +import com.mojang.blaze3d.platform.GlStateManager; import dev.engine_room.flywheel.api.backend.Engine; import dev.engine_room.flywheel.api.instance.Instance; @@ -31,6 +31,7 @@ import dev.engine_room.flywheel.backend.engine.TextureBinder; import dev.engine_room.flywheel.backend.engine.embed.EnvironmentStorage; import dev.engine_room.flywheel.backend.engine.uniform.Uniforms; import dev.engine_room.flywheel.backend.gl.GlStateTracker; +import dev.engine_room.flywheel.backend.gl.GlTextureUnit; import dev.engine_room.flywheel.backend.gl.array.GlVertexArray; import dev.engine_room.flywheel.backend.gl.buffer.GlBuffer; import dev.engine_room.flywheel.backend.gl.buffer.GlBufferType; @@ -51,6 +52,8 @@ public class IndirectDrawManager extends DrawManager> { private final DepthPyramid depthPyramid; private final VisibilityBuffer visibilityBuffer; + private int totalPagesLastFrame = 0; + private boolean needsBarrier = false; public IndirectDrawManager(IndirectPrograms programs) { @@ -65,7 +68,7 @@ public class IndirectDrawManager extends DrawManager> { matrixBuffer = new MatrixBuffer(); depthPyramid = new DepthPyramid(programs.getDepthReduceProgram()); - visibilityBuffer = new VisibilityBuffer(); + visibilityBuffer = new VisibilityBuffer(programs.getReadVisibilityProgram()); } @Override @@ -128,12 +131,20 @@ public class IndirectDrawManager extends DrawManager> { group.flushInstancers(); } + visibilityBuffer.read(totalPagesLastFrame); + visibilityBuffer.clear(); + cullingGroups.values() .removeIf(IndirectCullingGroup::checkEmptyAndDelete); instancers.values() .removeIf(instancer -> instancer.instanceCount() == 0); + int totalPagesThisFrame = 0; + for (var group : cullingGroups.values()) { + totalPagesThisFrame += group.flipVisibilityOffsets(totalPagesThisFrame); + } + meshPool.flush(); stagingBuffer.reclaim(); @@ -157,8 +168,8 @@ public class IndirectDrawManager extends DrawManager> { matrixBuffer.bind(); - GL46.glActiveTexture(GL46.GL_TEXTURE0); - GL46.glBindTexture(GL46.GL_TEXTURE_2D, depthPyramid.pyramidTextureId); + GlTextureUnit.T0.makeActive(); + GlStateManager._bindTexture(depthPyramid.pyramidTextureId); for (var group : cullingGroups.values()) { group.dispatchCull(); @@ -174,6 +185,8 @@ public class IndirectDrawManager extends DrawManager> { } needsBarrier = true; + + totalPagesLastFrame = totalPagesThisFrame; } @Override diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/VisibilityBuffer.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/VisibilityBuffer.java index 025d7bf1f..b0a600792 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/VisibilityBuffer.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/VisibilityBuffer.java @@ -3,18 +3,24 @@ package dev.engine_room.flywheel.backend.engine.indirect; import org.lwjgl.opengl.GL30; import org.lwjgl.opengl.GL32; import org.lwjgl.opengl.GL46; +import org.lwjgl.opengl.GL46C; import com.mojang.blaze3d.platform.GlStateManager; import dev.engine_room.flywheel.backend.FlwBackend; import dev.engine_room.flywheel.backend.gl.GlTextureUnit; +import dev.engine_room.flywheel.backend.gl.shader.GlProgram; +import dev.engine_room.flywheel.lib.math.MoreMath; import it.unimi.dsi.fastutil.ints.IntArraySet; import it.unimi.dsi.fastutil.ints.IntSet; import net.minecraft.client.Minecraft; public class VisibilityBuffer { + private static final int READ_GROUP_SIZE = 16; private static final int ATTACHMENT = GL30.GL_COLOR_ATTACHMENT1; + private final GlProgram readVisibilityProgram; + private final ResizableStorageBuffer visibilityBitset; private final int textureId; private int lastWidth = -1; @@ -22,7 +28,9 @@ public class VisibilityBuffer { private final IntSet attached = new IntArraySet(); - public VisibilityBuffer() { + public VisibilityBuffer(GlProgram readVisibilityProgram) { + this.readVisibilityProgram = readVisibilityProgram; + visibilityBitset = new ResizableStorageBuffer(); textureId = GL32.glGenTextures(); GlStateManager._bindTexture(textureId); @@ -32,9 +40,29 @@ public class VisibilityBuffer { GlStateManager._texParameter(GL32.GL_TEXTURE_2D, GL32.GL_TEXTURE_WRAP_T, GL32.GL_CLAMP_TO_EDGE); } - public void attach() { - // TODO: clear the vis buffer. maybe do this when we read it? + public void read(int pageCount) { + if (pageCount == 0) { + return; + } + visibilityBitset.ensureCapacity((long) pageCount << 2); + + GL46.nglClearNamedBufferData(visibilityBitset.handle(), GL46.GL_R32UI, GL46.GL_RED_INTEGER, GL46.GL_UNSIGNED_INT, 0); + + if (lastWidth == -1 || lastHeight == -1) { + return; + } + + readVisibilityProgram.bind(); + GL46.glBindBufferBase(GL46.GL_SHADER_STORAGE_BUFFER, 0, visibilityBitset.handle()); + + GlTextureUnit.T0.makeActive(); + GlStateManager._bindTexture(textureId); + + GL46.glDispatchCompute(MoreMath.ceilingDiv(lastWidth, READ_GROUP_SIZE), MoreMath.ceilingDiv(lastHeight, READ_GROUP_SIZE), 1); + } + + public void attach() { var mainRenderTarget = Minecraft.getInstance() .getMainRenderTarget(); @@ -66,6 +94,10 @@ public class VisibilityBuffer { GL32.glDeleteTextures(textureId); } + public void clear() { + GL46C.nglClearTexImage(textureId, 0, GL32.GL_RED_INTEGER, GL32.GL_UNSIGNED_INT, 0); + } + private void setupTexture(int width, int height) { if (lastWidth == width && lastHeight == height) { return; diff --git a/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/buffer_bindings.glsl b/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/buffer_bindings.glsl index 449836630..7e818f13c 100644 --- a/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/buffer_bindings.glsl +++ b/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/buffer_bindings.glsl @@ -6,3 +6,6 @@ #define _FLW_LIGHT_LUT_BUFFER_BINDING 5 #define _FLW_LIGHT_SECTIONS_BUFFER_BINDING 6 #define _FLW_MATRIX_BUFFER_BINDING 7 +#define _FLW_PASS_TWO_BUFFER_BINDING 8 +#define _FLW_LATE_CULL_BUFFER_BINDING 9 +#define _FLW_LAST_FRAME_VISIBILITY_BUFFER_BINDING 10 diff --git a/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/early_cull.glsl b/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/early_cull.glsl new file mode 100644 index 000000000..404c29ca9 --- /dev/null +++ b/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/early_cull.glsl @@ -0,0 +1,123 @@ +#include "flywheel:internal/indirect/buffer_bindings.glsl" +#include "flywheel:internal/indirect/model_descriptor.glsl" +#include "flywheel:internal/uniforms/uniforms.glsl" +#include "flywheel:util/matrix.glsl" +#include "flywheel:internal/indirect/matrices.glsl" + +layout(local_size_x = 32) in; + +uniform uint _flw_visibilityReadOffsetPages; + +layout(std430, binding = _FLW_TARGET_BUFFER_BINDING) restrict writeonly buffer DrawIndexBuffer { + uint _flw_drawIndices[]; +}; + +layout(std430, binding = _FLW_PASS_TWO_BUFFER_BINDING) restrict writeonly buffer PassTwoIndexBuffer { + uint _flw_passTwoIndicies[]; +}; + +// High 6 bits for the number of instances in the page. +const uint _FLW_PAGE_COUNT_OFFSET = 26u; +// Bottom 26 bits for the model index. +const uint _FLW_MODEL_INDEX_MASK = 0x3FFFFFF; + +layout(std430, binding = _FLW_PAGE_FRAME_DESCRIPTOR_BUFFER_BINDING) restrict readonly buffer PageFrameDescriptorBuffer { + uint _flw_pageFrameDescriptors[]; +}; + +layout(std430, binding = _FLW_LAST_FRAME_VISIBILITY_BUFFER_BINDING) restrict readonly buffer LastFrameVisibilityBuffer { + uint _flw_visibleFlag[]; +}; + +layout(std430, binding = _FLW_MODEL_BUFFER_BINDING) restrict buffer ModelBuffer { + ModelDescriptor _flw_models[]; +}; + +layout(std430, binding = _FLW_MATRIX_BUFFER_BINDING) restrict readonly buffer MatrixBuffer { + Matrices _flw_matrices[]; +}; + +struct _FlwLateCullDispatch { + uint x; + uint y; + uint z; + uint threadCount; +}; + +layout(std430, binding = _FLW_LATE_CULL_BUFFER_BINDING) restrict buffer LateCullBuffer { + _FlwLateCullDispatch _flw_lateCullDispatch; +}; + +// Disgustingly vectorized sphere frustum intersection taking advantage of ahead of time packing. +// Only uses 6 fmas and some boolean ops. +// See also: +// flywheel:uniform/flywheel.glsl +// dev.engine_room.flywheel.lib.math.MatrixMath.writePackedFrustumPlanes +// org.joml.FrustumIntersection.testSphere +bool _flw_testSphere(vec3 center, float radius) { + bvec4 xyInside = greaterThanEqual(fma(flw_frustumPlanes.xyX, center.xxxx, fma(flw_frustumPlanes.xyY, center.yyyy, fma(flw_frustumPlanes.xyZ, center.zzzz, flw_frustumPlanes.xyW))), -radius.xxxx); + bvec2 zInside = greaterThanEqual(fma(flw_frustumPlanes.zX, center.xx, fma(flw_frustumPlanes.zY, center.yy, fma(flw_frustumPlanes.zZ, center.zz, flw_frustumPlanes.zW))), -radius.xx); + + return all(xyInside) && all(zInside); +} + +bool _flw_isVisible(uint instanceIndex, uint modelIndex) { + uint matrixIndex = _flw_models[modelIndex].matrixIndex; + BoundingSphere sphere = _flw_models[modelIndex].boundingSphere; + + vec3 center; + float radius; + _flw_unpackBoundingSphere(sphere, center, radius); + + FlwInstance instance = _flw_unpackInstance(instanceIndex); + + flw_transformBoundingSphere(instance, center, radius); + + if (matrixIndex > 0) { + transformBoundingSphere(_flw_matrices[matrixIndex].pose, center, radius); + } + + return _flw_testSphere(center, radius); +} + +void main() { + uint pageIndex = gl_WorkGroupID.x; + + if (pageIndex >= _flw_pageFrameDescriptors.length()) { + return; + } + + uint packedModelIndexAndCount = _flw_pageFrameDescriptors[pageIndex]; + + uint pageInstanceCount = packedModelIndexAndCount >> _FLW_PAGE_COUNT_OFFSET; + + if (gl_LocalInvocationID.x >= pageInstanceCount) { + return; + } + + uint instanceIndex = gl_GlobalInvocationID.x; + + uint modelIndex = packedModelIndexAndCount & _FLW_MODEL_INDEX_MASK; + + if (!_flw_isVisible(instanceIndex, modelIndex)) { + return; + } + + uint pageVisibility = _flw_visibleFlag[_flw_visibilityReadOffsetPages + pageIndex]; + + if ((pageVisibility & (1u << gl_LocalInvocationID.x)) != 0u) { + // This instance was visibile last frame, it should be rendered early. + uint localIndex = atomicAdd(_flw_models[modelIndex].instanceCount, 1); + uint targetIndex = _flw_models[modelIndex].baseInstance + localIndex; + _flw_drawIndices[targetIndex] = instanceIndex; + } else { + // Try again later to see if it's been disoccluded. + uint targetIndex = atomicAdd(_flw_lateCullDispatch.threadCount, 1); + _flw_passTwoIndices[targetIndex] = instanceIndex; + + if (targetIndex % 32u == 0u) { + // This thread wrote an index that will be at the start of a new workgroup later + atomicAdd(_flw_lateCullDispatch.x, 1); + } + } +} diff --git a/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/late_cull.glsl b/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/late_cull.glsl new file mode 100644 index 000000000..4d32b81d1 --- /dev/null +++ b/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/late_cull.glsl @@ -0,0 +1,124 @@ +#include "flywheel:internal/indirect/buffer_bindings.glsl" +#include "flywheel:internal/indirect/model_descriptor.glsl" +#include "flywheel:internal/uniforms/uniforms.glsl" +#include "flywheel:util/matrix.glsl" +#include "flywheel:internal/indirect/matrices.glsl" + +layout(local_size_x = 32) in; + +layout(std430, binding = _FLW_TARGET_BUFFER_BINDING) restrict writeonly buffer DrawIndexBuffer { + uint _flw_drawIndices[]; +}; + +layout(std430, binding = _FLW_PASS_TWO_BUFFER_BINDING) restrict readonly buffer PassTwoIndexBuffer { + uint _flw_passTwoIndicies[]; +}; + +layout(std430, binding = _FLW_PAGE_FRAME_DESCRIPTOR_BUFFER_BINDING) restrict readonly buffer PageFrameDescriptorBuffer { + uint _flw_pageFrameDescriptors[]; +}; + +layout(std430, binding = _FLW_MODEL_BUFFER_BINDING) restrict buffer ModelBuffer { + ModelDescriptor _flw_models[]; +}; + +layout(std430, binding = _FLW_MATRIX_BUFFER_BINDING) restrict readonly buffer MatrixBuffer { + Matrices _flw_matrices[]; +}; + +layout(binding = 0) uniform sampler2D _flw_depthPyramid; + +bool projectSphere(vec3 c, float r, float znear, float P00, float P11, out vec4 aabb) { + // Closest point on the sphere is between the camera and the near plane, don't even attempt to cull. + if (c.z + r > -znear) { + return false; + } + + vec3 cr = c * r; + float czr2 = c.z * c.z - r * r; + + float vx = sqrt(c.x * c.x + czr2); + float minx = (vx * c.x - cr.z) / (vx * c.z + cr.x); + float maxx = (vx * c.x + cr.z) / (vx * c.z - cr.x); + + float vy = sqrt(c.y * c.y + czr2); + float miny = (vy * c.y - cr.z) / (vy * c.z + cr.y); + float maxy = (vy * c.y + cr.z) / (vy * c.z - cr.y); + + aabb = vec4(minx * P00, miny * P11, maxx * P00, maxy * P11); + aabb = aabb.xwzy * vec4(-0.5f, -0.5f, -0.5f, -0.5f) + vec4(0.5f); // clip space -> uv space + + return true; +} + +bool _flw_isVisible(uint instanceIndex, uint modelIndex) { + uint matrixIndex = _flw_models[modelIndex].matrixIndex; + BoundingSphere sphere = _flw_models[modelIndex].boundingSphere; + + vec3 center; + float radius; + _flw_unpackBoundingSphere(sphere, center, radius); + + FlwInstance instance = _flw_unpackInstance(instanceIndex); + + flw_transformBoundingSphere(instance, center, radius); + + if (matrixIndex > 0) { + transformBoundingSphere(_flw_matrices[matrixIndex].pose, center, radius); + } + + transformBoundingSphere(flw_view, center, radius); + + vec4 aabb; + if (projectSphere(center, radius, _flw_cullData.znear, _flw_cullData.P00, _flw_cullData.P11, aabb)) + { + float width = (aabb.z - aabb.x) * _flw_cullData.pyramidWidth; + float height = (aabb.w - aabb.y) * _flw_cullData.pyramidHeight; + + int level = clamp(int(ceil(log2(max(width, height)))), 0, _flw_cullData.pyramidLevels); + + ivec2 levelSize = textureSize(_flw_depthPyramid, level); + + ivec4 levelSizePair = ivec4(levelSize, levelSize); + + ivec4 bounds = ivec4(aabb * vec4(levelSizePair)); + + float depth01 = texelFetch(_flw_depthPyramid, bounds.xw, level).r; + float depth11 = texelFetch(_flw_depthPyramid, bounds.zw, level).r; + float depth10 = texelFetch(_flw_depthPyramid, bounds.zy, level).r; + float depth00 = texelFetch(_flw_depthPyramid, bounds.xy, level).r; + + float depth; + if (_flw_cullData.useMin == 0) { + depth = max(max(depth00, depth01), max(depth10, depth11)); + } else { + depth = min(min(depth00, depth01), min(depth10, depth11)); + } + + float depthSphere = 1. + _flw_cullData.znear / (center.z + radius); + + return depthSphere <= depth; + } + + return true; +} + +void main() { + if (gl_GlobalInvocationID.x >= _flw_passTwoIndicies.length()) { + return; + } + + uint instanceIndex = _flw_passTwoIndices[gl_GlobalInvocationID.x]; + + uint pageIndex = instanceIndex >> 5; + + uint packedModelIndexAndCount = _flw_pageFrameDescriptors[pageIndex]; + + uint modelIndex = packedModelIndexAndCount & _FLW_MODEL_INDEX_MASK; + + if (_flw_isVisible(instanceIndex, modelIndex)) { + uint localIndex = atomicAdd(_flw_models[modelIndex].instanceCount, 1); + uint targetIndex = _flw_models[modelIndex].baseInstance + localIndex; + _flw_instanceIndices[targetIndex] = instanceIndex; + } +} diff --git a/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/main.vert b/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/main.vert index 9cc21ce97..151e7d64a 100644 --- a/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/main.vert +++ b/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/main.vert @@ -23,7 +23,7 @@ uniform uint _flw_baseDraw; // We read the visibility buffer for all culling groups into a single shared buffer. // This offset is used to know where each culling group starts. -uniform uint _flw_globalInstanceIdOffset = 0; +uniform uint _flw_visibilityWriteOffsetInstances = 0; flat out uvec3 _flw_packedMaterial; @@ -58,5 +58,5 @@ void main() { _flw_main(instance); // Add 1 because a 0 instance id means null. - _flw_instanceID = _flw_globalInstanceIdOffset + instanceIndex + 1; + _flw_instanceID = _flw_visibilityWriteOffsetInstances + instanceIndex + 1; }