mirror of
https://github.com/Jozufozu/Flywheel.git
synced 2025-01-28 22:04:57 +01:00
Directly visible
- Don't actually need a framebuffer attachment for visibility - Instead, process everything in pass 2 and write out the visibility bitset directly - Persist visibility bits between frames for use in pass 1 - No need for indirect dispatch! - Also saves some ssbo bindings - Do frustum culling in both passes
This commit is contained in:
parent
afdab92010
commit
b90c43ba7e
13 changed files with 109 additions and 513 deletions
|
@ -109,6 +109,8 @@ public class IndirectPrograms extends AtomicReferenceCounted {
|
|||
.nameMapper(instanceType -> name + "/" + ResourceUtil.toDebugFileNameNoExtension(instanceType.cullShader()))
|
||||
.requireExtensions(COMPUTE_EXTENSIONS)
|
||||
.define("_FLW_SUBGROUP_SIZE", GlCompat.SUBGROUP_SIZE)
|
||||
.enableExtension("GL_KHR_shader_subgroup_basic")
|
||||
.enableExtension("GL_KHR_shader_subgroup_ballot")
|
||||
.withResource(CULL_SHADER_API_IMPL)
|
||||
.withComponent(InstanceStructComponent::new)
|
||||
.withResource(InstanceType::cullShader)
|
||||
|
|
|
@ -1,18 +1,16 @@
|
|||
package dev.engine_room.flywheel.backend.engine.indirect;
|
||||
|
||||
public final class BufferBindings {
|
||||
public static final int PASS_TWO_DISPATCH = 0;
|
||||
public static final int PASS_TWO_INSTANCE_INDEX = 1;
|
||||
public static final int PAGE_FRAME_DESCRIPTOR = 2;
|
||||
public static final int INSTANCE = 3;
|
||||
public static final int DRAW_INSTANCE_INDEX = 4;
|
||||
public static final int MODEL = 5;
|
||||
public static final int DRAW = 6;
|
||||
public static final int LAST_FRAME_VISIBILITY = 0;
|
||||
public static final int PAGE_FRAME_DESCRIPTOR = 1;
|
||||
public static final int INSTANCE = 2;
|
||||
public static final int DRAW_INSTANCE_INDEX = 3;
|
||||
public static final int MODEL = 4;
|
||||
public static final int DRAW = 5;
|
||||
|
||||
public static final int LIGHT_LUT = 7;
|
||||
public static final int LIGHT_SECTION = 8;
|
||||
public static final int MATRICES = 9;
|
||||
public static final int LAST_FRAME_VISIBILITY = 10;
|
||||
public static final int LIGHT_LUT = 6;
|
||||
public static final int LIGHT_SECTION = 7;
|
||||
public static final int MATRICES = 8;
|
||||
|
||||
private BufferBindings() {
|
||||
}
|
||||
|
|
|
@ -7,11 +7,12 @@ import org.lwjgl.system.MemoryUtil;
|
|||
import org.lwjgl.system.Pointer;
|
||||
|
||||
import dev.engine_room.flywheel.backend.gl.buffer.GlBufferType;
|
||||
import dev.engine_room.flywheel.lib.math.MoreMath;
|
||||
import dev.engine_room.flywheel.lib.memory.MemoryBlock;
|
||||
|
||||
public class IndirectBuffers {
|
||||
// Number of vbos created.
|
||||
public static final int BUFFER_COUNT = 7;
|
||||
public static final int BUFFER_COUNT = 6;
|
||||
|
||||
public static final long INT_SIZE = Integer.BYTES;
|
||||
public static final long PTR_SIZE = Pointer.POINTER_SIZE;
|
||||
|
@ -30,8 +31,7 @@ public class IndirectBuffers {
|
|||
private static final long BUFFERS_SIZE_BYTES = SIZE_OFFSET + BUFFER_COUNT * PTR_SIZE;
|
||||
|
||||
// Offsets to the vbos
|
||||
private static final long PASS_TWO_DISPATCH_HANDLE_OFFSET = HANDLE_OFFSET + BufferBindings.PASS_TWO_DISPATCH * INT_SIZE;
|
||||
private static final long PASS_TWO_INSTANCE_INDEX_HANDLE_OFFSET = HANDLE_OFFSET + BufferBindings.PASS_TWO_INSTANCE_INDEX * INT_SIZE;
|
||||
private static final long LAST_FRAME_VISIBILITY_HANDLE_OFFSET = HANDLE_OFFSET + BufferBindings.LAST_FRAME_VISIBILITY * INT_SIZE;
|
||||
private static final long PAGE_FRAME_DESCRIPTOR_HANDLE_OFFSET = HANDLE_OFFSET + BufferBindings.PAGE_FRAME_DESCRIPTOR * INT_SIZE;
|
||||
private static final long INSTANCE_HANDLE_OFFSET = HANDLE_OFFSET + BufferBindings.INSTANCE * INT_SIZE;
|
||||
private static final long DRAW_INSTANCE_INDEX_HANDLE_OFFSET = HANDLE_OFFSET + BufferBindings.DRAW_INSTANCE_INDEX * INT_SIZE;
|
||||
|
@ -39,8 +39,7 @@ public class IndirectBuffers {
|
|||
private static final long DRAW_HANDLE_OFFSET = HANDLE_OFFSET + BufferBindings.DRAW * INT_SIZE;
|
||||
|
||||
// Offsets to the sizes
|
||||
private static final long PASS_TWO_DISPATCH_SIZE_OFFSET = SIZE_OFFSET + BufferBindings.PASS_TWO_DISPATCH * PTR_SIZE;
|
||||
private static final long PASS_TWO_INSTANCE_INDEX_SIZE_OFFSET = SIZE_OFFSET + BufferBindings.PASS_TWO_INSTANCE_INDEX * PTR_SIZE;
|
||||
private static final long LAST_FRAME_VISIBILITY_SIZE_OFFSET = SIZE_OFFSET + BufferBindings.LAST_FRAME_VISIBILITY * PTR_SIZE;
|
||||
private static final long PAGE_FRAME_DESCRIPTOR_SIZE_OFFSET = SIZE_OFFSET + BufferBindings.PAGE_FRAME_DESCRIPTOR * PTR_SIZE;
|
||||
private static final long INSTANCE_SIZE_OFFSET = SIZE_OFFSET + BufferBindings.INSTANCE * PTR_SIZE;
|
||||
private static final long DRAW_INSTANCE_INDEX_SIZE_OFFSET = SIZE_OFFSET + BufferBindings.DRAW_INSTANCE_INDEX * PTR_SIZE;
|
||||
|
@ -66,8 +65,7 @@ public class IndirectBuffers {
|
|||
*/
|
||||
private final MemoryBlock multiBindBlock;
|
||||
|
||||
public final ResizableStorageBuffer passTwoDispatch;
|
||||
public final ResizableStorageArray passTwoInstanceIndex;
|
||||
public final ResizableStorageArray lastFrameVisibility;
|
||||
public final ObjectStorage objectStorage;
|
||||
public final ResizableStorageArray drawInstanceIndex;
|
||||
public final ResizableStorageArray model;
|
||||
|
@ -76,34 +74,29 @@ public class IndirectBuffers {
|
|||
IndirectBuffers(long instanceStride) {
|
||||
this.multiBindBlock = MemoryBlock.calloc(BUFFERS_SIZE_BYTES, 1);
|
||||
|
||||
passTwoDispatch = new ResizableStorageBuffer();
|
||||
passTwoInstanceIndex = new ResizableStorageArray(INT_SIZE, INSTANCE_GROWTH_FACTOR);
|
||||
lastFrameVisibility = new ResizableStorageArray(INT_SIZE, INSTANCE_GROWTH_FACTOR);
|
||||
objectStorage = new ObjectStorage(instanceStride);
|
||||
drawInstanceIndex = new ResizableStorageArray(INT_SIZE, INSTANCE_GROWTH_FACTOR);
|
||||
model = new ResizableStorageArray(MODEL_STRIDE, MODEL_GROWTH_FACTOR);
|
||||
draw = new ResizableStorageArray(DRAW_COMMAND_STRIDE, DRAW_GROWTH_FACTOR);
|
||||
|
||||
passTwoDispatch.ensureCapacity(INT_SIZE * 4);
|
||||
}
|
||||
|
||||
void updateCounts(int instanceCount, int modelCount, int drawCount) {
|
||||
drawInstanceIndex.ensureCapacity(instanceCount);
|
||||
passTwoInstanceIndex.ensureCapacity(instanceCount);
|
||||
lastFrameVisibility.ensureCapacity(MoreMath.ceilingDiv(instanceCount, 32));
|
||||
model.ensureCapacity(modelCount);
|
||||
draw.ensureCapacity(drawCount);
|
||||
|
||||
final long ptr = multiBindBlock.ptr();
|
||||
|
||||
MemoryUtil.memPutInt(ptr + PASS_TWO_DISPATCH_HANDLE_OFFSET, passTwoDispatch.handle());
|
||||
MemoryUtil.memPutInt(ptr + PASS_TWO_INSTANCE_INDEX_HANDLE_OFFSET, passTwoInstanceIndex.handle());
|
||||
MemoryUtil.memPutInt(ptr + LAST_FRAME_VISIBILITY_HANDLE_OFFSET, lastFrameVisibility.handle());
|
||||
MemoryUtil.memPutInt(ptr + PAGE_FRAME_DESCRIPTOR_HANDLE_OFFSET, objectStorage.frameDescriptorBuffer.handle());
|
||||
MemoryUtil.memPutInt(ptr + INSTANCE_HANDLE_OFFSET, objectStorage.objectBuffer.handle());
|
||||
MemoryUtil.memPutInt(ptr + DRAW_INSTANCE_INDEX_HANDLE_OFFSET, drawInstanceIndex.handle());
|
||||
MemoryUtil.memPutInt(ptr + MODEL_HANDLE_OFFSET, model.handle());
|
||||
MemoryUtil.memPutInt(ptr + DRAW_HANDLE_OFFSET, draw.handle());
|
||||
|
||||
MemoryUtil.memPutAddress(ptr + PASS_TWO_DISPATCH_SIZE_OFFSET, passTwoDispatch.capacity());
|
||||
MemoryUtil.memPutAddress(ptr + PASS_TWO_INSTANCE_INDEX_SIZE_OFFSET, INT_SIZE * instanceCount);
|
||||
MemoryUtil.memPutAddress(ptr + LAST_FRAME_VISIBILITY_SIZE_OFFSET, INT_SIZE * MoreMath.ceilingDiv(instanceCount, 32));
|
||||
MemoryUtil.memPutAddress(ptr + PAGE_FRAME_DESCRIPTOR_SIZE_OFFSET, objectStorage.frameDescriptorBuffer.capacity());
|
||||
MemoryUtil.memPutAddress(ptr + INSTANCE_SIZE_OFFSET, objectStorage.objectBuffer.capacity());
|
||||
MemoryUtil.memPutAddress(ptr + DRAW_INSTANCE_INDEX_SIZE_OFFSET, INT_SIZE * instanceCount);
|
||||
|
@ -112,24 +105,23 @@ public class IndirectBuffers {
|
|||
}
|
||||
|
||||
public void bindForCullPassOne() {
|
||||
multiBind(0, 6);
|
||||
multiBind(0, 5);
|
||||
}
|
||||
|
||||
public void bindForCullPassTwo() {
|
||||
multiBind(0, 6);
|
||||
GlBufferType.DISPATCH_INDIRECT_BUFFER.bind(passTwoDispatch.handle());
|
||||
multiBind(0, 5);
|
||||
}
|
||||
|
||||
public void bindForApply() {
|
||||
multiBind(5, 2);
|
||||
multiBind(4, 2);
|
||||
}
|
||||
|
||||
public void bindForModelReset() {
|
||||
multiBind(5, 1);
|
||||
multiBind(4, 1);
|
||||
}
|
||||
|
||||
public void bindForDraw() {
|
||||
multiBind(3, 4);
|
||||
multiBind(2, 4);
|
||||
GlBufferType.DRAW_INDIRECT_BUFFER.bind(draw.handle());
|
||||
}
|
||||
|
||||
|
@ -155,7 +147,6 @@ public class IndirectBuffers {
|
|||
drawInstanceIndex.delete();
|
||||
model.delete();
|
||||
draw.delete();
|
||||
passTwoDispatch.delete();
|
||||
passTwoInstanceIndex.delete();
|
||||
lastFrameVisibility.delete();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -6,7 +6,6 @@ import static org.lwjgl.opengl.GL30.glUniform1ui;
|
|||
import static org.lwjgl.opengl.GL42.GL_COMMAND_BARRIER_BIT;
|
||||
import static org.lwjgl.opengl.GL42.glMemoryBarrier;
|
||||
import static org.lwjgl.opengl.GL43.glDispatchCompute;
|
||||
import static org.lwjgl.opengl.GL43.glDispatchComputeIndirect;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Comparator;
|
||||
|
@ -14,8 +13,6 @@ import java.util.EnumMap;
|
|||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import org.lwjgl.opengl.GL46;
|
||||
|
||||
import dev.engine_room.flywheel.api.instance.Instance;
|
||||
import dev.engine_room.flywheel.api.instance.InstanceType;
|
||||
import dev.engine_room.flywheel.api.material.Material;
|
||||
|
@ -29,7 +26,6 @@ import dev.engine_room.flywheel.backend.engine.MeshPool;
|
|||
import dev.engine_room.flywheel.backend.engine.uniform.Uniforms;
|
||||
import dev.engine_room.flywheel.backend.gl.GlCompat;
|
||||
import dev.engine_room.flywheel.backend.gl.shader.GlProgram;
|
||||
import dev.engine_room.flywheel.lib.material.LightShaders;
|
||||
import dev.engine_room.flywheel.lib.math.MoreMath;
|
||||
|
||||
public class IndirectCullingGroup<I extends Instance> {
|
||||
|
@ -54,12 +50,6 @@ public class IndirectCullingGroup<I extends Instance> {
|
|||
private boolean needsDrawSort;
|
||||
public int instanceCountThisFrame;
|
||||
|
||||
private int pagesLastFrame = 0;
|
||||
private int pagesThisFrame = 0;
|
||||
|
||||
private int visibilityWriteOffsetPages = 0;
|
||||
private int visibilityReadOffsetPages = 0;
|
||||
|
||||
IndirectCullingGroup(InstanceType<I> instanceType, IndirectPrograms programs) {
|
||||
this.instanceType = instanceType;
|
||||
instanceStride = MoreMath.align4(instanceType.layout()
|
||||
|
@ -95,17 +85,6 @@ public class IndirectCullingGroup<I extends Instance> {
|
|||
}
|
||||
}
|
||||
|
||||
public int flipVisibilityOffsets(int visibilityWriteOffsetPages) {
|
||||
this.visibilityReadOffsetPages = this.visibilityWriteOffsetPages;
|
||||
this.visibilityWriteOffsetPages = visibilityWriteOffsetPages;
|
||||
|
||||
pagesLastFrame = pagesThisFrame;
|
||||
|
||||
pagesThisFrame = buffers.objectStorage.capacity();
|
||||
|
||||
return pagesThisFrame;
|
||||
}
|
||||
|
||||
public void upload(StagingBuffer stagingBuffer) {
|
||||
if (nothingToDo()) {
|
||||
return;
|
||||
|
@ -127,8 +106,6 @@ public class IndirectCullingGroup<I extends Instance> {
|
|||
}
|
||||
|
||||
uploadDraws(stagingBuffer);
|
||||
|
||||
GL46.nglClearNamedBufferData(buffers.passTwoDispatch.handle(), GL46.GL_R32UI, GL46.GL_RED, GL46.GL_UNSIGNED_INT, 0);
|
||||
}
|
||||
|
||||
public void dispatchCull() {
|
||||
|
@ -139,8 +116,6 @@ public class IndirectCullingGroup<I extends Instance> {
|
|||
Uniforms.bindAll();
|
||||
earlyCull.bind();
|
||||
|
||||
earlyCull.setUInt("_flw_visibilityReadOffsetPages", visibilityReadOffsetPages);
|
||||
|
||||
buffers.bindForCullPassOne();
|
||||
glDispatchCompute(buffers.objectStorage.capacity(), 1, 1);
|
||||
}
|
||||
|
@ -154,7 +129,7 @@ public class IndirectCullingGroup<I extends Instance> {
|
|||
lateCull.bind();
|
||||
|
||||
buffers.bindForCullPassTwo();
|
||||
glDispatchComputeIndirect(0);
|
||||
glDispatchCompute(buffers.objectStorage.capacity(), 1, 1);
|
||||
}
|
||||
|
||||
public void dispatchApply() {
|
||||
|
@ -257,8 +232,6 @@ public class IndirectCullingGroup<I extends Instance> {
|
|||
// Don't need to do this unless the program changes.
|
||||
drawProgram.bind();
|
||||
baseDrawUniformLoc = drawProgram.getUniformLocation("_flw_baseDraw");
|
||||
|
||||
drawProgram.setUInt("_flw_visibilityWriteOffsetInstances", visibilityWriteOffsetPages << ObjectStorage.LOG_2_PAGE_SIZE);
|
||||
}
|
||||
|
||||
glUniform1ui(baseDrawUniformLoc, multiDraw.start);
|
||||
|
|
|
@ -53,11 +53,6 @@ public class IndirectDrawManager extends DrawManager<IndirectInstancer<?>> {
|
|||
private final MatrixBuffer matrixBuffer;
|
||||
|
||||
private final DepthPyramid depthPyramid;
|
||||
private final VisibilityBuffer visibilityBuffer;
|
||||
|
||||
private int totalPagesLastFrame = 0;
|
||||
|
||||
private boolean needsBarrier = false;
|
||||
|
||||
public IndirectDrawManager(IndirectPrograms programs) {
|
||||
this.programs = programs;
|
||||
|
@ -73,7 +68,6 @@ public class IndirectDrawManager extends DrawManager<IndirectInstancer<?>> {
|
|||
matrixBuffer = new MatrixBuffer();
|
||||
|
||||
depthPyramid = new DepthPyramid(programs);
|
||||
visibilityBuffer = new VisibilityBuffer(programs);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -112,8 +106,6 @@ public class IndirectDrawManager extends DrawManager<IndirectInstancer<?>> {
|
|||
|
||||
glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT | GL_BUFFER_UPDATE_BARRIER_BIT);
|
||||
|
||||
visibilityBuffer.bind();
|
||||
|
||||
for (var group1 : cullingGroups.values()) {
|
||||
group1.dispatchCull();
|
||||
}
|
||||
|
@ -124,8 +116,6 @@ public class IndirectDrawManager extends DrawManager<IndirectInstancer<?>> {
|
|||
|
||||
glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
|
||||
|
||||
visibilityBuffer.attach();
|
||||
|
||||
submitDraws();
|
||||
|
||||
depthPyramid.generate();
|
||||
|
@ -156,8 +146,6 @@ public class IndirectDrawManager extends DrawManager<IndirectInstancer<?>> {
|
|||
|
||||
MaterialRenderState.reset();
|
||||
TextureBinder.resetLightAndOverlay();
|
||||
|
||||
visibilityBuffer.detach();
|
||||
}
|
||||
|
||||
private void dispatchApply() {
|
||||
|
@ -185,20 +173,12 @@ public class IndirectDrawManager extends DrawManager<IndirectInstancer<?>> {
|
|||
group.flushInstancers();
|
||||
}
|
||||
|
||||
visibilityBuffer.read(totalPagesLastFrame);
|
||||
visibilityBuffer.clear();
|
||||
|
||||
cullingGroups.values()
|
||||
.removeIf(IndirectCullingGroup::checkEmptyAndDelete);
|
||||
|
||||
instancers.values()
|
||||
.removeIf(instancer -> instancer.instanceCount() == 0);
|
||||
|
||||
int totalPagesThisFrame = 0;
|
||||
for (var group : cullingGroups.values()) {
|
||||
totalPagesThisFrame += group.flipVisibilityOffsets(totalPagesThisFrame);
|
||||
}
|
||||
|
||||
meshPool.flush();
|
||||
|
||||
stagingBuffer.reclaim();
|
||||
|
@ -215,10 +195,6 @@ public class IndirectDrawManager extends DrawManager<IndirectInstancer<?>> {
|
|||
|
||||
// We could probably save some driver calls here when there are
|
||||
// actually zero instances, but that feels like a very rare case
|
||||
|
||||
needsBarrier = true;
|
||||
|
||||
totalPagesLastFrame = totalPagesThisFrame;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -238,8 +214,6 @@ public class IndirectDrawManager extends DrawManager<IndirectInstancer<?>> {
|
|||
programs.release();
|
||||
|
||||
depthPyramid.delete();
|
||||
|
||||
visibilityBuffer.delete();
|
||||
}
|
||||
|
||||
public void renderCrumbling(List<Engine.CrumblingBlock> crumblingBlocks) {
|
||||
|
|
|
@ -1,132 +0,0 @@
|
|||
package dev.engine_room.flywheel.backend.engine.indirect;
|
||||
|
||||
import org.lwjgl.opengl.GL30;
|
||||
import org.lwjgl.opengl.GL32;
|
||||
import org.lwjgl.opengl.GL46;
|
||||
import org.lwjgl.opengl.GL46C;
|
||||
|
||||
import com.mojang.blaze3d.platform.GlStateManager;
|
||||
|
||||
import dev.engine_room.flywheel.backend.FlwBackend;
|
||||
import dev.engine_room.flywheel.backend.compile.IndirectPrograms;
|
||||
import dev.engine_room.flywheel.backend.gl.GlTextureUnit;
|
||||
import dev.engine_room.flywheel.lib.math.MoreMath;
|
||||
import it.unimi.dsi.fastutil.ints.IntArraySet;
|
||||
import it.unimi.dsi.fastutil.ints.IntSet;
|
||||
import net.minecraft.client.Minecraft;
|
||||
|
||||
public class VisibilityBuffer {
|
||||
private static final int READ_GROUP_SIZE = 32;
|
||||
private static final int ATTACHMENT = GL30.GL_COLOR_ATTACHMENT1;
|
||||
|
||||
private final IndirectPrograms programs;
|
||||
private final ResizableStorageArray lastFrameVisibility;
|
||||
private int textureId = -1;
|
||||
|
||||
private int lastWidth = -1;
|
||||
private int lastHeight = -1;
|
||||
|
||||
private final IntSet attached = new IntArraySet();
|
||||
|
||||
public VisibilityBuffer(IndirectPrograms programs) {
|
||||
this.programs = programs;
|
||||
lastFrameVisibility = new ResizableStorageArray(Integer.BYTES, 1.25f);
|
||||
}
|
||||
|
||||
public void read(int pageCount) {
|
||||
if (pageCount == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
lastFrameVisibility.ensureCapacity(pageCount);
|
||||
|
||||
GL46.nglClearNamedBufferData(lastFrameVisibility.handle(), GL46.GL_R32UI, GL46.GL_RED_INTEGER, GL46.GL_UNSIGNED_INT, 0);
|
||||
|
||||
if (lastWidth == -1 || lastHeight == -1) {
|
||||
return;
|
||||
}
|
||||
|
||||
programs.getReadVisibilityProgram()
|
||||
.bind();
|
||||
bind();
|
||||
|
||||
GlTextureUnit.T0.makeActive();
|
||||
GlStateManager._bindTexture(textureId);
|
||||
|
||||
GL46.glDispatchCompute(MoreMath.ceilingDiv(lastWidth, READ_GROUP_SIZE), MoreMath.ceilingDiv(lastHeight, READ_GROUP_SIZE), 1);
|
||||
}
|
||||
|
||||
public void bind() {
|
||||
GL46.glBindBufferBase(GL46.GL_SHADER_STORAGE_BUFFER, BufferBindings.LAST_FRAME_VISIBILITY, lastFrameVisibility.handle());
|
||||
}
|
||||
|
||||
public void attach() {
|
||||
var mainRenderTarget = Minecraft.getInstance()
|
||||
.getMainRenderTarget();
|
||||
|
||||
setupTexture(mainRenderTarget.width, mainRenderTarget.height);
|
||||
|
||||
if (attached.add(mainRenderTarget.frameBufferId)) {
|
||||
GL46.glNamedFramebufferTexture(mainRenderTarget.frameBufferId, ATTACHMENT, textureId, 0);
|
||||
|
||||
try {
|
||||
mainRenderTarget.checkStatus();
|
||||
} catch (Exception e) {
|
||||
FlwBackend.LOGGER.error("Error attaching visbuffer", e);
|
||||
}
|
||||
}
|
||||
|
||||
// Enable writes
|
||||
GL46.glNamedFramebufferDrawBuffers(mainRenderTarget.frameBufferId, new int[] { GL30.GL_COLOR_ATTACHMENT0, ATTACHMENT });
|
||||
}
|
||||
|
||||
public void detach() {
|
||||
var mainRenderTarget = Minecraft.getInstance()
|
||||
.getMainRenderTarget();
|
||||
|
||||
// Disable writes
|
||||
GL46.glNamedFramebufferDrawBuffers(mainRenderTarget.frameBufferId, new int[] { GL30.GL_COLOR_ATTACHMENT0 });
|
||||
}
|
||||
|
||||
public void delete() {
|
||||
deleteTexture();
|
||||
lastFrameVisibility.delete();
|
||||
}
|
||||
|
||||
private void deleteTexture() {
|
||||
if (textureId != -1) {
|
||||
GL32.glDeleteTextures(textureId);
|
||||
textureId = -1;
|
||||
}
|
||||
}
|
||||
|
||||
public void clear() {
|
||||
if (lastWidth == -1 || lastHeight == -1) {
|
||||
return;
|
||||
}
|
||||
|
||||
GL46C.nglClearTexImage(textureId, 0, GL32.GL_RED_INTEGER, GL32.GL_UNSIGNED_INT, 0);
|
||||
}
|
||||
|
||||
private void setupTexture(int width, int height) {
|
||||
if (lastWidth == width && lastHeight == height) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Need to rebind to all fbos because an attachment becomes incomplete when it's resized
|
||||
attached.clear();
|
||||
|
||||
lastWidth = width;
|
||||
lastHeight = height;
|
||||
|
||||
deleteTexture();
|
||||
|
||||
textureId = GL46.glCreateTextures(GL46.GL_TEXTURE_2D);
|
||||
GL46.glTextureStorage2D(textureId, 1, GL32.GL_R32UI, width, height);
|
||||
|
||||
GL46.glTextureParameteri(textureId, GL32.GL_TEXTURE_MIN_FILTER, GL32.GL_NEAREST);
|
||||
GL46.glTextureParameteri(textureId, GL32.GL_TEXTURE_MAG_FILTER, GL32.GL_NEAREST);
|
||||
GL46.glTextureParameteri(textureId, GL32.GL_TEXTURE_WRAP_S, GL32.GL_CLAMP_TO_EDGE);
|
||||
GL46.glTextureParameteri(textureId, GL32.GL_TEXTURE_WRAP_T, GL32.GL_CLAMP_TO_EDGE);
|
||||
}
|
||||
}
|
|
@ -1,17 +1,17 @@
|
|||
// FIXME: minimum required SSBO bindings in OpenGL is 8, but we use 9.
|
||||
// A few of these could be combined.
|
||||
|
||||
// Per culling group
|
||||
#define _FLW_PASS_TWO_DISPATCH_BUFFER_BINDING 0 // cull1
|
||||
#define _FLW_PASS_TWO_INSTANCE_INDEX_BUFFER_BINDING 1 // cull1, cull2
|
||||
#define _FLW_PAGE_FRAME_DESCRIPTOR_BUFFER_BINDING 2 // cull1, cull2
|
||||
#define _FLW_INSTANCE_BUFFER_BINDING 3 // cull1, cull2, draw
|
||||
#define _FLW_DRAW_INSTANCE_INDEX_BUFFER_BINDING 4 // cull1, cull2, draw
|
||||
#define _FLW_MODEL_BUFFER_BINDING 5 // cull1, cull2, apply
|
||||
#define _FLW_DRAW_BUFFER_BINDING 6 // apply, draw
|
||||
#define _FLW_LAST_FRAME_VISIBILITY_BUFFER_BINDING 0// cull1, cull2
|
||||
#define _FLW_PAGE_FRAME_DESCRIPTOR_BUFFER_BINDING 1// cull1, cull2
|
||||
#define _FLW_INSTANCE_BUFFER_BINDING 2// cull1, cull2, draw
|
||||
#define _FLW_DRAW_INSTANCE_INDEX_BUFFER_BINDING 3// cull1, cull2, draw
|
||||
#define _FLW_MODEL_BUFFER_BINDING 4// cull1, cull2, apply
|
||||
#define _FLW_DRAW_BUFFER_BINDING 5// apply, draw
|
||||
|
||||
|
||||
// Global to the engine
|
||||
#define _FLW_LIGHT_LUT_BUFFER_BINDING 7
|
||||
#define _FLW_LIGHT_SECTIONS_BUFFER_BINDING 8
|
||||
#define _FLW_LIGHT_LUT_BUFFER_BINDING 6
|
||||
#define _FLW_LIGHT_SECTIONS_BUFFER_BINDING 7
|
||||
|
||||
#define _FLW_MATRIX_BUFFER_BINDING 9
|
||||
|
||||
#define _FLW_LAST_FRAME_VISIBILITY_BUFFER_BINDING 10
|
||||
#define _FLW_MATRIX_BUFFER_BINDING 8
|
||||
|
|
|
@ -1,152 +0,0 @@
|
|||
#include "flywheel:internal/indirect/buffer_bindings.glsl"
|
||||
#include "flywheel:internal/indirect/model_descriptor.glsl"
|
||||
#include "flywheel:internal/uniforms/uniforms.glsl"
|
||||
#include "flywheel:util/matrix.glsl"
|
||||
#include "flywheel:internal/indirect/matrices.glsl"
|
||||
|
||||
layout(local_size_x = 32) in;
|
||||
|
||||
layout(std430, binding = _FLW_DRAW_INSTANCE_INDEX_BUFFER_BINDING) restrict writeonly buffer TargetBuffer {
|
||||
uint _flw_instanceIndices[];
|
||||
};
|
||||
|
||||
// High 6 bits for the number of instances in the page.
|
||||
const uint _FLW_PAGE_COUNT_OFFSET = 26u;
|
||||
// Bottom 26 bits for the model index.
|
||||
const uint _FLW_MODEL_INDEX_MASK = 0x3FFFFFF;
|
||||
|
||||
layout(std430, binding = _FLW_PAGE_FRAME_DESCRIPTOR_BUFFER_BINDING) restrict readonly buffer PageFrameDescriptorBuffer {
|
||||
uint _flw_pageFrameDescriptors[];
|
||||
};
|
||||
|
||||
layout(std430, binding = _FLW_MODEL_BUFFER_BINDING) restrict buffer ModelBuffer {
|
||||
ModelDescriptor _flw_models[];
|
||||
};
|
||||
|
||||
layout(std430, binding = _FLW_MATRIX_BUFFER_BINDING) restrict readonly buffer MatrixBuffer {
|
||||
Matrices _flw_matrices[];
|
||||
};
|
||||
|
||||
layout(binding = 0) uniform sampler2D _flw_depthPyramid;
|
||||
|
||||
// Disgustingly vectorized sphere frustum intersection taking advantage of ahead of time packing.
|
||||
// Only uses 6 fmas and some boolean ops.
|
||||
// See also:
|
||||
// flywheel:uniform/flywheel.glsl
|
||||
// dev.engine_room.flywheel.lib.math.MatrixMath.writePackedFrustumPlanes
|
||||
// org.joml.FrustumIntersection.testSphere
|
||||
bool _flw_testSphere(vec3 center, float radius) {
|
||||
bvec4 xyInside = greaterThanEqual(fma(flw_frustumPlanes.xyX, center.xxxx, fma(flw_frustumPlanes.xyY, center.yyyy, fma(flw_frustumPlanes.xyZ, center.zzzz, flw_frustumPlanes.xyW))), -radius.xxxx);
|
||||
bvec2 zInside = greaterThanEqual(fma(flw_frustumPlanes.zX, center.xx, fma(flw_frustumPlanes.zY, center.yy, fma(flw_frustumPlanes.zZ, center.zz, flw_frustumPlanes.zW))), -radius.xx);
|
||||
|
||||
return all(xyInside) && all(zInside);
|
||||
}
|
||||
|
||||
bool projectSphere(vec3 c, float r, float znear, float P00, float P11, out vec4 aabb) {
|
||||
// Closest point on the sphere is between the camera and the near plane, don't even attempt to cull.
|
||||
if (c.z + r > -znear) {
|
||||
return false;
|
||||
}
|
||||
|
||||
vec3 cr = c * r;
|
||||
float czr2 = c.z * c.z - r * r;
|
||||
|
||||
float vx = sqrt(c.x * c.x + czr2);
|
||||
float minx = (vx * c.x - cr.z) / (vx * c.z + cr.x);
|
||||
float maxx = (vx * c.x + cr.z) / (vx * c.z - cr.x);
|
||||
|
||||
float vy = sqrt(c.y * c.y + czr2);
|
||||
float miny = (vy * c.y - cr.z) / (vy * c.z + cr.y);
|
||||
float maxy = (vy * c.y + cr.z) / (vy * c.z - cr.y);
|
||||
|
||||
aabb = vec4(minx * P00, miny * P11, maxx * P00, maxy * P11);
|
||||
aabb = aabb.xwzy * vec4(-0.5f, -0.5f, -0.5f, -0.5f) + vec4(0.5f); // clip space -> uv space
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool _flw_isVisible(uint instanceIndex, uint modelIndex) {
|
||||
uint matrixIndex = _flw_models[modelIndex].matrixIndex;
|
||||
BoundingSphere sphere = _flw_models[modelIndex].boundingSphere;
|
||||
|
||||
vec3 center;
|
||||
float radius;
|
||||
_flw_unpackBoundingSphere(sphere, center, radius);
|
||||
|
||||
FlwInstance instance = _flw_unpackInstance(instanceIndex);
|
||||
|
||||
flw_transformBoundingSphere(instance, center, radius);
|
||||
|
||||
if (matrixIndex > 0) {
|
||||
transformBoundingSphere(_flw_matrices[matrixIndex].pose, center, radius);
|
||||
}
|
||||
|
||||
bool isVisible = _flw_testSphere(center, radius);
|
||||
|
||||
if (isVisible) {
|
||||
transformBoundingSphere(flw_view, center, radius);
|
||||
|
||||
vec4 aabb;
|
||||
if (projectSphere(center, radius, _flw_cullData.znear, _flw_cullData.P00, _flw_cullData.P11, aabb))
|
||||
{
|
||||
float width = (aabb.z - aabb.x) * _flw_cullData.pyramidWidth;
|
||||
float height = (aabb.w - aabb.y) * _flw_cullData.pyramidHeight;
|
||||
|
||||
int level = clamp(int(ceil(log2(max(width, height)))), 0, _flw_cullData.pyramidLevels);
|
||||
|
||||
ivec2 levelSize = textureSize(_flw_depthPyramid, level);
|
||||
|
||||
ivec4 levelSizePair = ivec4(levelSize, levelSize);
|
||||
|
||||
ivec4 bounds = ivec4(aabb * vec4(levelSizePair));
|
||||
|
||||
// Clamp to the texture bounds.
|
||||
// Since we're not going through a sampler out of bounds texel fetches will return 0.
|
||||
bounds = clamp(bounds, ivec4(0), levelSizePair);
|
||||
|
||||
float depth01 = texelFetch(_flw_depthPyramid, bounds.xw, level).r;
|
||||
float depth11 = texelFetch(_flw_depthPyramid, bounds.zw, level).r;
|
||||
float depth10 = texelFetch(_flw_depthPyramid, bounds.zy, level).r;
|
||||
float depth00 = texelFetch(_flw_depthPyramid, bounds.xy, level).r;
|
||||
|
||||
float depth;
|
||||
if (_flw_cullData.useMin == 0) {
|
||||
depth = max(max(depth00, depth01), max(depth10, depth11));
|
||||
} else {
|
||||
depth = min(min(depth00, depth01), min(depth10, depth11));
|
||||
}
|
||||
|
||||
float depthSphere = 1. + _flw_cullData.znear / (center.z + radius);
|
||||
|
||||
isVisible = isVisible && depthSphere <= depth;
|
||||
}
|
||||
}
|
||||
|
||||
return isVisible;
|
||||
}
|
||||
|
||||
void main() {
|
||||
uint pageIndex = gl_WorkGroupID.x;
|
||||
|
||||
if (pageIndex >= _flw_pageFrameDescriptors.length()) {
|
||||
return;
|
||||
}
|
||||
|
||||
uint packedModelIndexAndCount = _flw_pageFrameDescriptors[pageIndex];
|
||||
|
||||
uint pageInstanceCount = packedModelIndexAndCount >> _FLW_PAGE_COUNT_OFFSET;
|
||||
|
||||
if (gl_LocalInvocationID.x >= pageInstanceCount) {
|
||||
return;
|
||||
}
|
||||
|
||||
uint instanceIndex = gl_GlobalInvocationID.x;
|
||||
|
||||
uint modelIndex = packedModelIndexAndCount & _FLW_MODEL_INDEX_MASK;
|
||||
|
||||
if (_flw_isVisible(instanceIndex, modelIndex)) {
|
||||
uint localIndex = atomicAdd(_flw_models[modelIndex].instanceCount, 1);
|
||||
uint targetIndex = _flw_models[modelIndex].baseInstance + localIndex;
|
||||
_flw_instanceIndices[targetIndex] = instanceIndex;
|
||||
}
|
||||
}
|
|
@ -7,16 +7,6 @@
|
|||
|
||||
layout(local_size_x = 32) in;
|
||||
|
||||
uniform uint _flw_visibilityReadOffsetPages;
|
||||
|
||||
layout(std430, binding = _FLW_PASS_TWO_DISPATCH_BUFFER_BINDING) restrict buffer PassTwoDispatchBuffer {
|
||||
_FlwLateCullDispatch _flw_lateCullDispatch;
|
||||
};
|
||||
|
||||
layout(std430, binding = _FLW_PASS_TWO_INSTANCE_INDEX_BUFFER_BINDING) restrict writeonly buffer PassTwoIndexBuffer {
|
||||
uint _flw_passTwoIndices[];
|
||||
};
|
||||
|
||||
layout(std430, binding = _FLW_DRAW_INSTANCE_INDEX_BUFFER_BINDING) restrict writeonly buffer DrawIndexBuffer {
|
||||
uint _flw_drawIndices[];
|
||||
};
|
||||
|
@ -31,7 +21,7 @@ layout(std430, binding = _FLW_PAGE_FRAME_DESCRIPTOR_BUFFER_BINDING) restrict rea
|
|||
};
|
||||
|
||||
layout(std430, binding = _FLW_LAST_FRAME_VISIBILITY_BUFFER_BINDING) restrict readonly buffer LastFrameVisibilityBuffer {
|
||||
uint _flw_lastFrameVisibility[];
|
||||
uint _flw_visibility[];
|
||||
};
|
||||
|
||||
layout(std430, binding = _FLW_MODEL_BUFFER_BINDING) restrict buffer ModelBuffer {
|
||||
|
@ -74,6 +64,10 @@ bool _flw_isVisible(uint instanceIndex, uint modelIndex) {
|
|||
return _flw_testSphere(center, radius);
|
||||
}
|
||||
|
||||
// TODO: There's an opportunity here to write out the transformed bounding spheres to a buffer and use them in pass 2,
|
||||
// instead of pulling the entire instance again. It would save a lot of memory bandwidth and matrix multiplications in
|
||||
// pass 2, but it would also be a good bit of writes in pass 1. It's worth investigating, but it would be nice to have
|
||||
// nsight trace working to be more sure.
|
||||
void main() {
|
||||
uint pageIndex = gl_WorkGroupID.x;
|
||||
|
||||
|
@ -97,26 +91,12 @@ void main() {
|
|||
return;
|
||||
}
|
||||
|
||||
uint pageVisibility = _flw_lastFrameVisibility[_flw_visibilityReadOffsetPages + pageIndex];
|
||||
uint pageVisibility = _flw_visibility[pageIndex];
|
||||
|
||||
if ((pageVisibility & (1u << gl_LocalInvocationID.x)) != 0u) {
|
||||
// This instance was visibile last frame, it should be rendered early.
|
||||
uint localIndex = atomicAdd(_flw_models[modelIndex].instanceCount, 1);
|
||||
uint targetIndex = _flw_models[modelIndex].baseInstance + localIndex;
|
||||
_flw_drawIndices[targetIndex] = instanceIndex;
|
||||
} else {
|
||||
// Try again later to see if it's been disoccluded.
|
||||
uint targetIndex = atomicAdd(_flw_lateCullDispatch.threadCount, 1);
|
||||
_flw_passTwoIndices[targetIndex] = instanceIndex;
|
||||
|
||||
if (targetIndex % 32u == 0u) {
|
||||
// This thread wrote an index that will be at the start of a new workgroup later
|
||||
atomicAdd(_flw_lateCullDispatch.x, 1);
|
||||
|
||||
if (targetIndex == 0) {
|
||||
_flw_lateCullDispatch.y = 1;
|
||||
_flw_lateCullDispatch.z = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -7,15 +7,6 @@
|
|||
|
||||
layout(local_size_x = 32) in;
|
||||
|
||||
|
||||
layout(std430, binding = _FLW_PASS_TWO_DISPATCH_BUFFER_BINDING) restrict buffer PassTwoDispatchBuffer {
|
||||
_FlwLateCullDispatch _flw_lateCullDispatch;
|
||||
};
|
||||
|
||||
layout(std430, binding = _FLW_PASS_TWO_INSTANCE_INDEX_BUFFER_BINDING) restrict readonly buffer PassTwoIndexBuffer {
|
||||
uint _flw_passTwoIndices[];
|
||||
};
|
||||
|
||||
layout(std430, binding = _FLW_DRAW_INSTANCE_INDEX_BUFFER_BINDING) restrict writeonly buffer DrawIndexBuffer {
|
||||
uint _flw_drawIndices[];
|
||||
};
|
||||
|
@ -31,6 +22,10 @@ layout(std430, binding = _FLW_PAGE_FRAME_DESCRIPTOR_BUFFER_BINDING) restrict rea
|
|||
uint _flw_pageFrameDescriptors[];
|
||||
};
|
||||
|
||||
layout(std430, binding = _FLW_LAST_FRAME_VISIBILITY_BUFFER_BINDING) restrict buffer LastFrameVisibilityBuffer {
|
||||
uint _flw_visibility[];
|
||||
};
|
||||
|
||||
layout(std430, binding = _FLW_MODEL_BUFFER_BINDING) restrict buffer ModelBuffer {
|
||||
ModelDescriptor _flw_models[];
|
||||
};
|
||||
|
@ -64,22 +59,20 @@ bool projectSphere(vec3 c, float r, float znear, float P00, float P11, out vec4
|
|||
return true;
|
||||
}
|
||||
|
||||
bool _flw_isVisible(uint instanceIndex, uint modelIndex) {
|
||||
uint matrixIndex = _flw_models[modelIndex].matrixIndex;
|
||||
BoundingSphere sphere = _flw_models[modelIndex].boundingSphere;
|
||||
// Disgustingly vectorized sphere frustum intersection taking advantage of ahead of time packing.
|
||||
// Only uses 6 fmas and some boolean ops.
|
||||
// See also:
|
||||
// flywheel:uniform/flywheel.glsl
|
||||
// dev.engine_room.flywheel.lib.math.MatrixMath.writePackedFrustumPlanes
|
||||
// org.joml.FrustumIntersection.testSphere
|
||||
bool _flw_testSphere(vec3 center, float radius) {
|
||||
bvec4 xyInside = greaterThanEqual(fma(flw_frustumPlanes.xyX, center.xxxx, fma(flw_frustumPlanes.xyY, center.yyyy, fma(flw_frustumPlanes.xyZ, center.zzzz, flw_frustumPlanes.xyW))), -radius.xxxx);
|
||||
bvec2 zInside = greaterThanEqual(fma(flw_frustumPlanes.zX, center.xx, fma(flw_frustumPlanes.zY, center.yy, fma(flw_frustumPlanes.zZ, center.zz, flw_frustumPlanes.zW))), -radius.xx);
|
||||
|
||||
vec3 center;
|
||||
float radius;
|
||||
_flw_unpackBoundingSphere(sphere, center, radius);
|
||||
|
||||
FlwInstance instance = _flw_unpackInstance(instanceIndex);
|
||||
|
||||
flw_transformBoundingSphere(instance, center, radius);
|
||||
|
||||
if (matrixIndex > 0) {
|
||||
transformBoundingSphere(_flw_matrices[matrixIndex].pose, center, radius);
|
||||
}
|
||||
return all(xyInside) && all(zInside);
|
||||
}
|
||||
|
||||
bool _flw_hizTest(vec3 center, float radius) {
|
||||
transformBoundingSphere(flw_view, center, radius);
|
||||
|
||||
vec4 aabb;
|
||||
|
@ -116,22 +109,63 @@ bool _flw_isVisible(uint instanceIndex, uint modelIndex) {
|
|||
return true;
|
||||
}
|
||||
|
||||
bool _flw_isVisible(uint instanceIndex, uint modelIndex) {
|
||||
uint matrixIndex = _flw_models[modelIndex].matrixIndex;
|
||||
BoundingSphere sphere = _flw_models[modelIndex].boundingSphere;
|
||||
|
||||
vec3 center;
|
||||
float radius;
|
||||
_flw_unpackBoundingSphere(sphere, center, radius);
|
||||
|
||||
FlwInstance instance = _flw_unpackInstance(instanceIndex);
|
||||
|
||||
flw_transformBoundingSphere(instance, center, radius);
|
||||
|
||||
if (matrixIndex > 0) {
|
||||
transformBoundingSphere(_flw_matrices[matrixIndex].pose, center, radius);
|
||||
}
|
||||
|
||||
bool visible = _flw_testSphere(center, radius);
|
||||
|
||||
if (visible) {
|
||||
visible = visible && _flw_hizTest(center, radius);
|
||||
}
|
||||
|
||||
return visible;
|
||||
}
|
||||
|
||||
void main() {
|
||||
if (gl_GlobalInvocationID.x >= _flw_lateCullDispatch.threadCount) {
|
||||
uint pageIndex = gl_WorkGroupID.x;
|
||||
|
||||
if (pageIndex >= _flw_pageFrameDescriptors.length()) {
|
||||
return;
|
||||
}
|
||||
|
||||
uint instanceIndex = _flw_passTwoIndices[gl_GlobalInvocationID.x];
|
||||
|
||||
uint pageIndex = instanceIndex >> 5;
|
||||
|
||||
uint packedModelIndexAndCount = _flw_pageFrameDescriptors[pageIndex];
|
||||
|
||||
uint pageInstanceCount = packedModelIndexAndCount >> _FLW_PAGE_COUNT_OFFSET;
|
||||
|
||||
if (gl_LocalInvocationID.x >= pageInstanceCount) {
|
||||
return;
|
||||
}
|
||||
|
||||
uint instanceIndex = gl_GlobalInvocationID.x;
|
||||
|
||||
uint modelIndex = packedModelIndexAndCount & _FLW_MODEL_INDEX_MASK;
|
||||
|
||||
if (_flw_isVisible(instanceIndex, modelIndex)) {
|
||||
bool visible = _flw_isVisible(instanceIndex, modelIndex);
|
||||
bool visibleLastFrame = (_flw_visibility[pageIndex] & (1u << gl_LocalInvocationID.x)) != 0u;
|
||||
|
||||
if (visible && !visibleLastFrame) {
|
||||
uint localIndex = atomicAdd(_flw_models[modelIndex].instanceCount, 1);
|
||||
uint targetIndex = _flw_models[modelIndex].baseInstance + localIndex;
|
||||
_flw_drawIndices[targetIndex] = instanceIndex;
|
||||
}
|
||||
|
||||
// FIXME: need a non-subgroup path
|
||||
uvec4 visibility = subgroupBallot(visible);
|
||||
|
||||
if (subgroupElect()) {
|
||||
_flw_visibility[pageIndex] = visibility.x;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -6,13 +6,9 @@ flat in uvec2 _flw_packedMaterial;
|
|||
|
||||
flat in uint _flw_instanceID;
|
||||
|
||||
layout(location = 1) out uint _flw_out_instanceID;
|
||||
|
||||
void main() {
|
||||
_flw_unpackUint2x16(_flw_packedMaterial.x, _flw_uberFogIndex, _flw_uberCutoutIndex);
|
||||
_flw_unpackMaterialProperties(_flw_packedMaterial.y, flw_material);
|
||||
|
||||
_flw_main(_flw_instanceID);
|
||||
|
||||
_flw_out_instanceID = _flw_instanceID;
|
||||
}
|
||||
|
|
|
@ -21,10 +21,6 @@ layout(std430, binding = _FLW_MATRIX_BUFFER_BINDING) restrict buffer MatrixBuffe
|
|||
|
||||
uniform uint _flw_baseDraw;
|
||||
|
||||
// We read the visibility buffer for all culling groups into a single shared buffer.
|
||||
// This offset is used to know where each culling group starts.
|
||||
uniform uint _flw_visibilityWriteOffsetInstances = 0;
|
||||
|
||||
flat out uvec2 _flw_packedMaterial;
|
||||
|
||||
flat out uint _flw_instanceID;
|
||||
|
@ -60,5 +56,5 @@ void main() {
|
|||
_flw_main(instance);
|
||||
|
||||
// Add 1 because a 0 instance id means null.
|
||||
_flw_instanceID = _flw_visibilityWriteOffsetInstances + instanceIndex + 1;
|
||||
_flw_instanceID = instanceIndex + 1;
|
||||
}
|
||||
|
|
|
@ -1,64 +0,0 @@
|
|||
#include "flywheel:internal/indirect/buffer_bindings.glsl"
|
||||
|
||||
layout(local_size_x = 256) in;
|
||||
|
||||
layout(binding = 0) uniform usampler2D visBuffer;
|
||||
|
||||
layout(std430, binding = _FLW_LAST_FRAME_VISIBILITY_BUFFER_BINDING) restrict buffer LastFrameVisibilityBuffer {
|
||||
uint _flw_lastFrameVisibility[];
|
||||
};
|
||||
|
||||
uint extractBits(uint e, uint offset, uint count) {
|
||||
return (e >> offset) & ((1u << count) - 1u);
|
||||
}
|
||||
|
||||
uint insertBits(uint e, uint newbits, uint offset, uint count) {
|
||||
uint countMask = ((1u << count) - 1u);
|
||||
// zero out the bits we're going to replace first
|
||||
return (e & ~(countMask << offset)) | ((newbits & countMask) << offset);
|
||||
}
|
||||
|
||||
uvec2 remap_for_wave_reduction(uint a) {
|
||||
return uvec2(
|
||||
insertBits(extractBits(a, 2u, 3u), a, 0u, 1u),
|
||||
insertBits(extractBits(a, 3u, 3u), extractBits(a, 1u, 2u), 0u, 2u)
|
||||
);
|
||||
}
|
||||
|
||||
void emit(uint instanceID) {
|
||||
// Null instance id.
|
||||
if (instanceID == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Adjust for null to find the actual index.
|
||||
instanceID = instanceID - 1;
|
||||
|
||||
uint index = instanceID >> 5;
|
||||
|
||||
uint mask = 1u << (instanceID & 31u);
|
||||
|
||||
atomicOr(_flw_lastFrameVisibility[index], mask);
|
||||
}
|
||||
|
||||
void main() {
|
||||
uvec2 sub_xy = remap_for_wave_reduction(gl_LocalInvocationIndex % 64u);
|
||||
uint x = sub_xy.x + 8u * ((gl_LocalInvocationIndex >> 6u) % 2u);
|
||||
uint y = sub_xy.y + 8u * (gl_LocalInvocationIndex >> 7u);
|
||||
|
||||
ivec2 tex = ivec2(gl_WorkGroupID.xy) * 32 + ivec2(x, y) * 2;
|
||||
|
||||
uint instanceID01 = texelFetchOffset(visBuffer, tex, 0, ivec2(0, 1)).r;
|
||||
uint instanceID11 = texelFetchOffset(visBuffer, tex, 0, ivec2(1, 1)).r;
|
||||
uint instanceID10 = texelFetchOffset(visBuffer, tex, 0, ivec2(1, 0)).r;
|
||||
uint instanceID00 = texelFetch(visBuffer, tex, 0).r;
|
||||
|
||||
if (instanceID00 == instanceID01 && instanceID01 == instanceID10 && instanceID10 == instanceID11) {
|
||||
emit(instanceID00);
|
||||
} else {
|
||||
emit(instanceID00);
|
||||
emit(instanceID01);
|
||||
emit(instanceID10);
|
||||
emit(instanceID11);
|
||||
}
|
||||
}
|
Loading…
Reference in a new issue