Merge branch '1.20/last-frame-visibility' into 1.20/dev

# Conflicts:
#	common/src/backend/java/dev/engine_room/flywheel/backend/compile/IndirectPrograms.java
#	common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/BufferBindings.java
#	common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/DepthPyramid.java
#	common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectBuffers.java
#	common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectCullingGroup.java
#	common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectDraw.java
#	common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectDrawManager.java
#	common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectInstancer.java
#	common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/ObjectStorage.java
#	common/src/backend/java/dev/engine_room/flywheel/backend/engine/uniform/FrameUniforms.java
#	common/src/backend/resources/assets/flywheel/flywheel/internal/common.frag
#	common/src/backend/resources/assets/flywheel/flywheel/internal/common.vert
#	common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/buffer_bindings.glsl
#	common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/cull.glsl
#	common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/downsample.glsl
#	common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/downsample_first.glsl
#	common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/downsample_second.glsl
#	common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/main.vert
This commit is contained in:
Jozufozu 2024-10-18 12:02:04 -07:00
commit afdab92010
21 changed files with 789 additions and 89 deletions

View file

@ -26,12 +26,15 @@ import net.minecraft.resources.ResourceLocation;
public class IndirectPrograms extends AtomicReferenceCounted {
private static final ResourceLocation CULL_SHADER_API_IMPL = Flywheel.rl("internal/indirect/cull_api_impl.glsl");
private static final ResourceLocation CULL_SHADER_MAIN = Flywheel.rl("internal/indirect/cull.glsl");
private static final ResourceLocation CULL_SHADER_MAIN = Flywheel.rl("internal/indirect/early_cull.glsl");
private static final ResourceLocation PASS2_SHADER_MAIN = Flywheel.rl("internal/indirect/late_cull.glsl");
private static final ResourceLocation APPLY_SHADER_MAIN = Flywheel.rl("internal/indirect/apply.glsl");
private static final ResourceLocation SCATTER_SHADER_MAIN = Flywheel.rl("internal/indirect/scatter.glsl");
private static final ResourceLocation READ_VISIBILITY_SHADER_MAIN = Flywheel.rl("internal/indirect/read_visibility.glsl");
private static final ResourceLocation ZERO_MODELS_SHADER_MAIN = Flywheel.rl("internal/indirect/zero_models.glsl");
private static final ResourceLocation DOWNSAMPLE_FIRST = Flywheel.rl("internal/indirect/downsample_first.glsl");
private static final ResourceLocation DOWNSAMPLE_SECOND = Flywheel.rl("internal/indirect/downsample_second.glsl");
public static final List<ResourceLocation> UTIL_SHADERS = List.of(APPLY_SHADER_MAIN, SCATTER_SHADER_MAIN, DOWNSAMPLE_FIRST, DOWNSAMPLE_SECOND);
public static final List<ResourceLocation> UTIL_SHADERS = List.of(APPLY_SHADER_MAIN, SCATTER_SHADER_MAIN, READ_VISIBILITY_SHADER_MAIN, ZERO_MODELS_SHADER_MAIN, DOWNSAMPLE_FIRST, DOWNSAMPLE_SECOND);
private static final Compile<InstanceType<?>> CULL = new Compile<>();
private static final Compile<ResourceLocation> UTIL = new Compile<>();
@ -44,11 +47,13 @@ public class IndirectPrograms extends AtomicReferenceCounted {
private final PipelineCompiler pipeline;
private final CompilationHarness<InstanceType<?>> culling;
private final CompilationHarness<InstanceType<?>> cullPassTwo;
private final CompilationHarness<ResourceLocation> utils;
private IndirectPrograms(PipelineCompiler pipeline, CompilationHarness<InstanceType<?>> culling, CompilationHarness<ResourceLocation> utils) {
private IndirectPrograms(PipelineCompiler pipeline, CompilationHarness<InstanceType<?>> culling, CompilationHarness<InstanceType<?>> cullPassTwo, CompilationHarness<ResourceLocation> utils) {
this.pipeline = pipeline;
this.culling = culling;
this.cullPassTwo = cullPassTwo;
this.utils = utils;
}
@ -86,10 +91,11 @@ public class IndirectPrograms extends AtomicReferenceCounted {
}
var pipelineCompiler = PipelineCompiler.create(sources, Pipelines.INDIRECT, vertexComponents, fragmentComponents, EXTENSIONS);
var cullingCompiler = createCullingCompiler(sources);
var pass1Compiler = createCullingCompiler(sources, CULL_SHADER_MAIN, "early_cull");
var pass2Compiler = createCullingCompiler(sources, PASS2_SHADER_MAIN, "late_cull");
var utilCompiler = createUtilCompiler(sources);
IndirectPrograms newInstance = new IndirectPrograms(pipelineCompiler, cullingCompiler, utilCompiler);
IndirectPrograms newInstance = new IndirectPrograms(pipelineCompiler, pass1Compiler, pass2Compiler, utilCompiler);
setInstance(newInstance);
}
@ -97,19 +103,19 @@ public class IndirectPrograms extends AtomicReferenceCounted {
/**
* A compiler for cull shaders, parameterized by the instance type.
*/
private static CompilationHarness<InstanceType<?>> createCullingCompiler(ShaderSources sources) {
private static CompilationHarness<InstanceType<?>> createCullingCompiler(ShaderSources sources, ResourceLocation main, String name) {
return CULL.program()
.link(CULL.shader(GlCompat.MAX_GLSL_VERSION, ShaderType.COMPUTE)
.nameMapper(instanceType -> "culling/" + ResourceUtil.toDebugFileNameNoExtension(instanceType.cullShader()))
.nameMapper(instanceType -> name + "/" + ResourceUtil.toDebugFileNameNoExtension(instanceType.cullShader()))
.requireExtensions(COMPUTE_EXTENSIONS)
.define("_FLW_SUBGROUP_SIZE", GlCompat.SUBGROUP_SIZE)
.withResource(CULL_SHADER_API_IMPL)
.withComponent(InstanceStructComponent::new)
.withResource(InstanceType::cullShader)
.withComponent(SsboInstanceComponent::new)
.withResource(CULL_SHADER_MAIN))
.withResource(main))
.postLink((key, program) -> Uniforms.setUniformBlockBindings(program))
.harness("culling", sources);
.harness(name, sources);
}
/**
@ -156,10 +162,18 @@ public class IndirectPrograms extends AtomicReferenceCounted {
return culling.get(instanceType);
}
public GlProgram getCullPassTwoProgram(InstanceType<?> instanceType) {
return cullPassTwo.get(instanceType);
}
public GlProgram getApplyProgram() {
return utils.get(APPLY_SHADER_MAIN);
}
public GlProgram getZeroModelProgram() {
return utils.get(ZERO_MODELS_SHADER_MAIN);
}
public GlProgram getScatterProgram() {
return utils.get(SCATTER_SHADER_MAIN);
}
@ -172,10 +186,15 @@ public class IndirectPrograms extends AtomicReferenceCounted {
return utils.get(DOWNSAMPLE_SECOND);
}
public GlProgram getReadVisibilityProgram() {
return utils.get(READ_VISIBILITY_SHADER_MAIN);
}
@Override
protected void _delete() {
pipeline.delete();
culling.delete();
cullPassTwo.delete();
utils.delete();
}
}

View file

@ -1,15 +1,18 @@
package dev.engine_room.flywheel.backend.engine.indirect;
public final class BufferBindings {
public static final int PAGE_FRAME_DESCRIPTOR = 0;
public static final int INSTANCE = 1;
public static final int DRAW_INSTANCE_INDEX = 2;
public static final int MODEL = 3;
public static final int DRAW = 4;
public static final int PASS_TWO_DISPATCH = 0;
public static final int PASS_TWO_INSTANCE_INDEX = 1;
public static final int PAGE_FRAME_DESCRIPTOR = 2;
public static final int INSTANCE = 3;
public static final int DRAW_INSTANCE_INDEX = 4;
public static final int MODEL = 5;
public static final int DRAW = 6;
public static final int LIGHT_LUT = 5;
public static final int LIGHT_SECTION = 6;
public static final int MATRICES = 7;
public static final int LIGHT_LUT = 7;
public static final int LIGHT_SECTION = 8;
public static final int MATRICES = 9;
public static final int LAST_FRAME_VISIBILITY = 10;
private BufferBindings() {
}

View file

@ -11,7 +11,7 @@ import dev.engine_room.flywheel.lib.memory.MemoryBlock;
public class IndirectBuffers {
// Number of vbos created.
public static final int BUFFER_COUNT = 5;
public static final int BUFFER_COUNT = 7;
public static final long INT_SIZE = Integer.BYTES;
public static final long PTR_SIZE = Pointer.POINTER_SIZE;
@ -30,6 +30,8 @@ public class IndirectBuffers {
private static final long BUFFERS_SIZE_BYTES = SIZE_OFFSET + BUFFER_COUNT * PTR_SIZE;
// Offsets to the vbos
private static final long PASS_TWO_DISPATCH_HANDLE_OFFSET = HANDLE_OFFSET + BufferBindings.PASS_TWO_DISPATCH * INT_SIZE;
private static final long PASS_TWO_INSTANCE_INDEX_HANDLE_OFFSET = HANDLE_OFFSET + BufferBindings.PASS_TWO_INSTANCE_INDEX * INT_SIZE;
private static final long PAGE_FRAME_DESCRIPTOR_HANDLE_OFFSET = HANDLE_OFFSET + BufferBindings.PAGE_FRAME_DESCRIPTOR * INT_SIZE;
private static final long INSTANCE_HANDLE_OFFSET = HANDLE_OFFSET + BufferBindings.INSTANCE * INT_SIZE;
private static final long DRAW_INSTANCE_INDEX_HANDLE_OFFSET = HANDLE_OFFSET + BufferBindings.DRAW_INSTANCE_INDEX * INT_SIZE;
@ -37,6 +39,8 @@ public class IndirectBuffers {
private static final long DRAW_HANDLE_OFFSET = HANDLE_OFFSET + BufferBindings.DRAW * INT_SIZE;
// Offsets to the sizes
private static final long PASS_TWO_DISPATCH_SIZE_OFFSET = SIZE_OFFSET + BufferBindings.PASS_TWO_DISPATCH * PTR_SIZE;
private static final long PASS_TWO_INSTANCE_INDEX_SIZE_OFFSET = SIZE_OFFSET + BufferBindings.PASS_TWO_INSTANCE_INDEX * PTR_SIZE;
private static final long PAGE_FRAME_DESCRIPTOR_SIZE_OFFSET = SIZE_OFFSET + BufferBindings.PAGE_FRAME_DESCRIPTOR * PTR_SIZE;
private static final long INSTANCE_SIZE_OFFSET = SIZE_OFFSET + BufferBindings.INSTANCE * PTR_SIZE;
private static final long DRAW_INSTANCE_INDEX_SIZE_OFFSET = SIZE_OFFSET + BufferBindings.DRAW_INSTANCE_INDEX * PTR_SIZE;
@ -62,6 +66,8 @@ public class IndirectBuffers {
*/
private final MemoryBlock multiBindBlock;
public final ResizableStorageBuffer passTwoDispatch;
public final ResizableStorageArray passTwoInstanceIndex;
public final ObjectStorage objectStorage;
public final ResizableStorageArray drawInstanceIndex;
public final ResizableStorageArray model;
@ -70,25 +76,34 @@ public class IndirectBuffers {
IndirectBuffers(long instanceStride) {
this.multiBindBlock = MemoryBlock.calloc(BUFFERS_SIZE_BYTES, 1);
passTwoDispatch = new ResizableStorageBuffer();
passTwoInstanceIndex = new ResizableStorageArray(INT_SIZE, INSTANCE_GROWTH_FACTOR);
objectStorage = new ObjectStorage(instanceStride);
drawInstanceIndex = new ResizableStorageArray(INT_SIZE, INSTANCE_GROWTH_FACTOR);
model = new ResizableStorageArray(MODEL_STRIDE, MODEL_GROWTH_FACTOR);
draw = new ResizableStorageArray(DRAW_COMMAND_STRIDE, DRAW_GROWTH_FACTOR);
passTwoDispatch.ensureCapacity(INT_SIZE * 4);
}
void updateCounts(int instanceCount, int modelCount, int drawCount) {
drawInstanceIndex.ensureCapacity(instanceCount);
passTwoInstanceIndex.ensureCapacity(instanceCount);
model.ensureCapacity(modelCount);
draw.ensureCapacity(drawCount);
final long ptr = multiBindBlock.ptr();
MemoryUtil.memPutInt(ptr + PASS_TWO_DISPATCH_HANDLE_OFFSET, passTwoDispatch.handle());
MemoryUtil.memPutInt(ptr + PASS_TWO_INSTANCE_INDEX_HANDLE_OFFSET, passTwoInstanceIndex.handle());
MemoryUtil.memPutInt(ptr + PAGE_FRAME_DESCRIPTOR_HANDLE_OFFSET, objectStorage.frameDescriptorBuffer.handle());
MemoryUtil.memPutInt(ptr + INSTANCE_HANDLE_OFFSET, objectStorage.objectBuffer.handle());
MemoryUtil.memPutInt(ptr + DRAW_INSTANCE_INDEX_HANDLE_OFFSET, drawInstanceIndex.handle());
MemoryUtil.memPutInt(ptr + MODEL_HANDLE_OFFSET, model.handle());
MemoryUtil.memPutInt(ptr + DRAW_HANDLE_OFFSET, draw.handle());
MemoryUtil.memPutAddress(ptr + PASS_TWO_DISPATCH_SIZE_OFFSET, passTwoDispatch.capacity());
MemoryUtil.memPutAddress(ptr + PASS_TWO_INSTANCE_INDEX_SIZE_OFFSET, INT_SIZE * instanceCount);
MemoryUtil.memPutAddress(ptr + PAGE_FRAME_DESCRIPTOR_SIZE_OFFSET, objectStorage.frameDescriptorBuffer.capacity());
MemoryUtil.memPutAddress(ptr + INSTANCE_SIZE_OFFSET, objectStorage.objectBuffer.capacity());
MemoryUtil.memPutAddress(ptr + DRAW_INSTANCE_INDEX_SIZE_OFFSET, INT_SIZE * instanceCount);
@ -96,16 +111,25 @@ public class IndirectBuffers {
MemoryUtil.memPutAddress(ptr + DRAW_SIZE_OFFSET, DRAW_COMMAND_STRIDE * drawCount);
}
public void bindForCull() {
multiBind(0, 4);
public void bindForCullPassOne() {
multiBind(0, 6);
}
public void bindForCullPassTwo() {
multiBind(0, 6);
GlBufferType.DISPATCH_INDIRECT_BUFFER.bind(passTwoDispatch.handle());
}
public void bindForApply() {
multiBind(3, 2);
multiBind(5, 2);
}
public void bindForModelReset() {
multiBind(5, 1);
}
public void bindForDraw() {
multiBind(1, 4);
multiBind(3, 4);
GlBufferType.DRAW_INDIRECT_BUFFER.bind(draw.handle());
}
@ -113,12 +137,15 @@ public class IndirectBuffers {
* Bind all buffers except the draw command buffer.
*/
public void bindForCrumbling() {
multiBind(1, 4);
multiBind(3, 3);
}
private void multiBind(int base, int count) {
final long ptr = multiBindBlock.ptr();
nglBindBuffersRange(GL_SHADER_STORAGE_BUFFER, base, count, ptr + base * INT_SIZE, ptr + OFFSET_OFFSET + base * PTR_SIZE, ptr + SIZE_OFFSET + base * PTR_SIZE);
long handlePtr = ptr + HANDLE_OFFSET + base * INT_SIZE;
long offsetPtr = ptr + OFFSET_OFFSET + base * PTR_SIZE;
long sizePtr = ptr + SIZE_OFFSET + base * PTR_SIZE;
nglBindBuffersRange(GL_SHADER_STORAGE_BUFFER, base, count, handlePtr, offsetPtr, sizePtr);
}
public void delete() {
@ -128,5 +155,7 @@ public class IndirectBuffers {
drawInstanceIndex.delete();
model.delete();
draw.delete();
passTwoDispatch.delete();
passTwoInstanceIndex.delete();
}
}

View file

@ -6,6 +6,7 @@ import static org.lwjgl.opengl.GL30.glUniform1ui;
import static org.lwjgl.opengl.GL42.GL_COMMAND_BARRIER_BIT;
import static org.lwjgl.opengl.GL42.glMemoryBarrier;
import static org.lwjgl.opengl.GL43.glDispatchCompute;
import static org.lwjgl.opengl.GL43.glDispatchComputeIndirect;
import java.util.ArrayList;
import java.util.Comparator;
@ -13,6 +14,8 @@ import java.util.EnumMap;
import java.util.List;
import java.util.Map;
import org.lwjgl.opengl.GL46;
import dev.engine_room.flywheel.api.instance.Instance;
import dev.engine_room.flywheel.api.instance.InstanceType;
import dev.engine_room.flywheel.api.material.Material;
@ -26,6 +29,7 @@ import dev.engine_room.flywheel.backend.engine.MeshPool;
import dev.engine_room.flywheel.backend.engine.uniform.Uniforms;
import dev.engine_room.flywheel.backend.gl.GlCompat;
import dev.engine_room.flywheel.backend.gl.shader.GlProgram;
import dev.engine_room.flywheel.lib.material.LightShaders;
import dev.engine_room.flywheel.lib.math.MoreMath;
public class IndirectCullingGroup<I extends Instance> {
@ -43,11 +47,18 @@ public class IndirectCullingGroup<I extends Instance> {
private final Map<VisualType, List<MultiDraw>> multiDraws = new EnumMap<>(VisualType.class);
private final IndirectPrograms programs;
private final GlProgram cullProgram;
private final GlProgram earlyCull;
private final GlProgram lateCull;
private boolean needsDrawBarrier;
private boolean needsDrawSort;
private int instanceCountThisFrame;
public int instanceCountThisFrame;
private int pagesLastFrame = 0;
private int pagesThisFrame = 0;
private int visibilityWriteOffsetPages = 0;
private int visibilityReadOffsetPages = 0;
IndirectCullingGroup(InstanceType<I> instanceType, IndirectPrograms programs) {
this.instanceType = instanceType;
@ -56,7 +67,8 @@ public class IndirectCullingGroup<I extends Instance> {
buffers = new IndirectBuffers(instanceStride);
this.programs = programs;
cullProgram = programs.getCullingProgram(instanceType);
earlyCull = programs.getCullingProgram(instanceType);
lateCull = programs.getCullPassTwoProgram(instanceType);
}
public void flushInstancers() {
@ -83,6 +95,17 @@ public class IndirectCullingGroup<I extends Instance> {
}
}
public int flipVisibilityOffsets(int visibilityWriteOffsetPages) {
this.visibilityReadOffsetPages = this.visibilityWriteOffsetPages;
this.visibilityWriteOffsetPages = visibilityWriteOffsetPages;
pagesLastFrame = pagesThisFrame;
pagesThisFrame = buffers.objectStorage.capacity();
return pagesThisFrame;
}
public void upload(StagingBuffer stagingBuffer) {
if (nothingToDo()) {
return;
@ -105,7 +128,7 @@ public class IndirectCullingGroup<I extends Instance> {
uploadDraws(stagingBuffer);
needsDrawBarrier = true;
GL46.nglClearNamedBufferData(buffers.passTwoDispatch.handle(), GL46.GL_R32UI, GL46.GL_RED, GL46.GL_UNSIGNED_INT, 0);
}
public void dispatchCull() {
@ -114,12 +137,26 @@ public class IndirectCullingGroup<I extends Instance> {
}
Uniforms.bindAll();
cullProgram.bind();
earlyCull.bind();
buffers.bindForCull();
earlyCull.setUInt("_flw_visibilityReadOffsetPages", visibilityReadOffsetPages);
buffers.bindForCullPassOne();
glDispatchCompute(buffers.objectStorage.capacity(), 1, 1);
}
public void dispatchCullPassTwo() {
if (nothingToDo()) {
return;
}
Uniforms.bindAll();
lateCull.bind();
buffers.bindForCullPassTwo();
glDispatchComputeIndirect(0);
}
public void dispatchApply() {
if (nothingToDo()) {
return;
@ -127,6 +164,17 @@ public class IndirectCullingGroup<I extends Instance> {
buffers.bindForApply();
glDispatchCompute(GlCompat.getComputeGroupCount(indirectDraws.size()), 1, 1);
needsDrawBarrier = true;
}
public void dispatchModelReset() {
if (nothingToDo()) {
return;
}
buffers.bindForModelReset();
glDispatchCompute(GlCompat.getComputeGroupCount(instancers.size()), 1, 1);
}
private boolean nothingToDo() {
@ -209,6 +257,8 @@ public class IndirectCullingGroup<I extends Instance> {
// Don't need to do this unless the program changes.
drawProgram.bind();
baseDrawUniformLoc = drawProgram.getUniformLocation("_flw_baseDraw");
drawProgram.setUInt("_flw_visibilityWriteOffsetInstances", visibilityWriteOffsetPages << ObjectStorage.LOG_2_PAGE_SIZE);
}
glUniform1ui(baseDrawUniformLoc, multiDraw.start);

View file

@ -4,6 +4,7 @@ import static org.lwjgl.opengl.GL11.GL_TRIANGLES;
import static org.lwjgl.opengl.GL11.GL_UNSIGNED_INT;
import static org.lwjgl.opengl.GL30.glBindBufferRange;
import static org.lwjgl.opengl.GL40.glDrawElementsIndirect;
import static org.lwjgl.opengl.GL42.GL_BUFFER_UPDATE_BARRIER_BIT;
import static org.lwjgl.opengl.GL42.glMemoryBarrier;
import static org.lwjgl.opengl.GL43.GL_SHADER_STORAGE_BARRIER_BIT;
import static org.lwjgl.opengl.GL43.GL_SHADER_STORAGE_BUFFER;
@ -12,6 +13,8 @@ import java.util.HashMap;
import java.util.List;
import java.util.Map;
import com.mojang.blaze3d.platform.GlStateManager;
import dev.engine_room.flywheel.api.backend.Engine;
import dev.engine_room.flywheel.api.instance.Instance;
import dev.engine_room.flywheel.api.instance.InstanceType;
@ -30,6 +33,7 @@ import dev.engine_room.flywheel.backend.engine.MeshPool;
import dev.engine_room.flywheel.backend.engine.TextureBinder;
import dev.engine_room.flywheel.backend.engine.embed.EnvironmentStorage;
import dev.engine_room.flywheel.backend.engine.uniform.Uniforms;
import dev.engine_room.flywheel.backend.gl.GlTextureUnit;
import dev.engine_room.flywheel.backend.gl.array.GlVertexArray;
import dev.engine_room.flywheel.backend.gl.buffer.GlBuffer;
import dev.engine_room.flywheel.backend.gl.buffer.GlBufferType;
@ -49,6 +53,9 @@ public class IndirectDrawManager extends DrawManager<IndirectInstancer<?>> {
private final MatrixBuffer matrixBuffer;
private final DepthPyramid depthPyramid;
private final VisibilityBuffer visibilityBuffer;
private int totalPagesLastFrame = 0;
private boolean needsBarrier = false;
@ -66,6 +73,7 @@ public class IndirectDrawManager extends DrawManager<IndirectInstancer<?>> {
matrixBuffer = new MatrixBuffer();
depthPyramid = new DepthPyramid(programs);
visibilityBuffer = new VisibilityBuffer(programs);
}
@Override
@ -90,7 +98,8 @@ public class IndirectDrawManager extends DrawManager<IndirectInstancer<?>> {
}
public void render(VisualType visualType) {
if (!hasVisualType(visualType)) {
// FIXME: Two pass occlusion prefers to render everything at once
if (visualType != VisualType.BLOCK_ENTITY) {
return;
}
@ -101,17 +110,71 @@ public class IndirectDrawManager extends DrawManager<IndirectInstancer<?>> {
matrixBuffer.bind();
Uniforms.bindAll();
if (needsBarrier) {
glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
needsBarrier = false;
glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT | GL_BUFFER_UPDATE_BARRIER_BIT);
visibilityBuffer.bind();
for (var group1 : cullingGroups.values()) {
group1.dispatchCull();
}
glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
dispatchApply();
glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
visibilityBuffer.attach();
submitDraws();
depthPyramid.generate();
programs.getZeroModelProgram()
.bind();
for (var group : cullingGroups.values()) {
group.submit(visualType);
group.dispatchModelReset();
}
glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
GlTextureUnit.T0.makeActive();
GlStateManager._bindTexture(depthPyramid.pyramidTextureId);
for (var group1 : cullingGroups.values()) {
group1.dispatchCullPassTwo();
}
glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
dispatchApply();
glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
submitDraws();
MaterialRenderState.reset();
TextureBinder.resetLightAndOverlay();
visibilityBuffer.detach();
}
private void dispatchApply() {
programs.getApplyProgram()
.bind();
for (var group1 : cullingGroups.values()) {
group1.dispatchApply();
}
}
private void submitDraws() {
for (var group : cullingGroups.values()) {
group.submit(VisualType.BLOCK_ENTITY);
group.submit(VisualType.ENTITY);
group.submit(VisualType.EFFECT);
}
}
@Override
@ -122,12 +185,20 @@ public class IndirectDrawManager extends DrawManager<IndirectInstancer<?>> {
group.flushInstancers();
}
visibilityBuffer.read(totalPagesLastFrame);
visibilityBuffer.clear();
cullingGroups.values()
.removeIf(IndirectCullingGroup::checkEmptyAndDelete);
instancers.values()
.removeIf(instancer -> instancer.instanceCount() == 0);
int totalPagesThisFrame = 0;
for (var group : cullingGroups.values()) {
totalPagesThisFrame += group.flipVisibilityOffsets(totalPagesThisFrame);
}
meshPool.flush();
stagingBuffer.reclaim();
@ -142,31 +213,12 @@ public class IndirectDrawManager extends DrawManager<IndirectInstancer<?>> {
stagingBuffer.flush();
depthPyramid.generate();
// We could probably save some driver calls here when there are
// actually zero instances, but that feels like a very rare case
glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
matrixBuffer.bind();
depthPyramid.bindForCull();
for (var group : cullingGroups.values()) {
group.dispatchCull();
}
glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
programs.getApplyProgram()
.bind();
for (var group : cullingGroups.values()) {
group.dispatchApply();
}
needsBarrier = true;
totalPagesLastFrame = totalPagesThisFrame;
}
@Override
@ -186,6 +238,8 @@ public class IndirectDrawManager extends DrawManager<IndirectInstancer<?>> {
programs.release();
depthPyramid.delete();
visibilityBuffer.delete();
}
public void renderCrumbling(List<Engine.CrumblingBlock> crumblingBlocks) {

View file

@ -22,6 +22,8 @@ public class StagingBuffer {
private static final int STORAGE_FLAGS = GL45C.GL_MAP_PERSISTENT_BIT | GL45C.GL_MAP_WRITE_BIT | GL45C.GL_CLIENT_STORAGE_BIT;
private static final int MAP_FLAGS = GL45C.GL_MAP_PERSISTENT_BIT | GL45C.GL_MAP_WRITE_BIT | GL45C.GL_MAP_FLUSH_EXPLICIT_BIT | GL45C.GL_MAP_INVALIDATE_BUFFER_BIT;
private static final int SSBO_ALIGNMENT = GL45.glGetInteger(GL45.GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT);
private final int vbo;
private final long map;
private final long capacity;
@ -252,7 +254,6 @@ public class StagingBuffer {
.bind();
// These bindings don't change between dstVbos.
GL45.glBindBufferBase(GL45C.GL_SHADER_STORAGE_BUFFER, 0, scatterBuffer.handle());
GL45.glBindBufferBase(GL45C.GL_SHADER_STORAGE_BUFFER, 1, vbo);
int dstVbo;
@ -274,7 +275,24 @@ public class StagingBuffer {
}
private void dispatchScatter(int dstVbo) {
scatterBuffer.upload(scatterList.ptr(), scatterList.usedBytes());
var scatterSize = scatterList.usedBytes();
long alignedPos = pos + SSBO_ALIGNMENT - 1 - (pos + SSBO_ALIGNMENT - 1) % SSBO_ALIGNMENT;
long remaining = capacity - alignedPos;
if (scatterSize <= remaining && scatterSize <= totalAvailable) {
MemoryUtil.memCopy(scatterList.ptr(), map + alignedPos, scatterSize);
GL45.glBindBufferRange(GL45C.GL_SHADER_STORAGE_BUFFER, 0, vbo, alignedPos, scatterSize);
long alignmentCost = alignedPos - pos;
usedCapacity += scatterSize + alignmentCost;
totalAvailable -= scatterSize + alignmentCost;
pos += scatterSize + alignmentCost;
} else {
scatterBuffer.upload(scatterList.ptr(), scatterSize);
GL45.glBindBufferBase(GL45C.GL_SHADER_STORAGE_BUFFER, 0, scatterBuffer.handle());
}
GL45.glBindBufferBase(GL45C.GL_SHADER_STORAGE_BUFFER, 2, dstVbo);

View file

@ -0,0 +1,132 @@
package dev.engine_room.flywheel.backend.engine.indirect;
import org.lwjgl.opengl.GL30;
import org.lwjgl.opengl.GL32;
import org.lwjgl.opengl.GL46;
import org.lwjgl.opengl.GL46C;
import com.mojang.blaze3d.platform.GlStateManager;
import dev.engine_room.flywheel.backend.FlwBackend;
import dev.engine_room.flywheel.backend.compile.IndirectPrograms;
import dev.engine_room.flywheel.backend.gl.GlTextureUnit;
import dev.engine_room.flywheel.lib.math.MoreMath;
import it.unimi.dsi.fastutil.ints.IntArraySet;
import it.unimi.dsi.fastutil.ints.IntSet;
import net.minecraft.client.Minecraft;
public class VisibilityBuffer {
private static final int READ_GROUP_SIZE = 32;
private static final int ATTACHMENT = GL30.GL_COLOR_ATTACHMENT1;
private final IndirectPrograms programs;
private final ResizableStorageArray lastFrameVisibility;
private int textureId = -1;
private int lastWidth = -1;
private int lastHeight = -1;
private final IntSet attached = new IntArraySet();
public VisibilityBuffer(IndirectPrograms programs) {
this.programs = programs;
lastFrameVisibility = new ResizableStorageArray(Integer.BYTES, 1.25f);
}
public void read(int pageCount) {
if (pageCount == 0) {
return;
}
lastFrameVisibility.ensureCapacity(pageCount);
GL46.nglClearNamedBufferData(lastFrameVisibility.handle(), GL46.GL_R32UI, GL46.GL_RED_INTEGER, GL46.GL_UNSIGNED_INT, 0);
if (lastWidth == -1 || lastHeight == -1) {
return;
}
programs.getReadVisibilityProgram()
.bind();
bind();
GlTextureUnit.T0.makeActive();
GlStateManager._bindTexture(textureId);
GL46.glDispatchCompute(MoreMath.ceilingDiv(lastWidth, READ_GROUP_SIZE), MoreMath.ceilingDiv(lastHeight, READ_GROUP_SIZE), 1);
}
public void bind() {
GL46.glBindBufferBase(GL46.GL_SHADER_STORAGE_BUFFER, BufferBindings.LAST_FRAME_VISIBILITY, lastFrameVisibility.handle());
}
public void attach() {
var mainRenderTarget = Minecraft.getInstance()
.getMainRenderTarget();
setupTexture(mainRenderTarget.width, mainRenderTarget.height);
if (attached.add(mainRenderTarget.frameBufferId)) {
GL46.glNamedFramebufferTexture(mainRenderTarget.frameBufferId, ATTACHMENT, textureId, 0);
try {
mainRenderTarget.checkStatus();
} catch (Exception e) {
FlwBackend.LOGGER.error("Error attaching visbuffer", e);
}
}
// Enable writes
GL46.glNamedFramebufferDrawBuffers(mainRenderTarget.frameBufferId, new int[] { GL30.GL_COLOR_ATTACHMENT0, ATTACHMENT });
}
public void detach() {
var mainRenderTarget = Minecraft.getInstance()
.getMainRenderTarget();
// Disable writes
GL46.glNamedFramebufferDrawBuffers(mainRenderTarget.frameBufferId, new int[] { GL30.GL_COLOR_ATTACHMENT0 });
}
public void delete() {
deleteTexture();
lastFrameVisibility.delete();
}
private void deleteTexture() {
if (textureId != -1) {
GL32.glDeleteTextures(textureId);
textureId = -1;
}
}
public void clear() {
if (lastWidth == -1 || lastHeight == -1) {
return;
}
GL46C.nglClearTexImage(textureId, 0, GL32.GL_RED_INTEGER, GL32.GL_UNSIGNED_INT, 0);
}
private void setupTexture(int width, int height) {
if (lastWidth == width && lastHeight == height) {
return;
}
// Need to rebind to all fbos because an attachment becomes incomplete when it's resized
attached.clear();
lastWidth = width;
lastHeight = height;
deleteTexture();
textureId = GL46.glCreateTextures(GL46.GL_TEXTURE_2D);
GL46.glTextureStorage2D(textureId, 1, GL32.GL_R32UI, width, height);
GL46.glTextureParameteri(textureId, GL32.GL_TEXTURE_MIN_FILTER, GL32.GL_NEAREST);
GL46.glTextureParameteri(textureId, GL32.GL_TEXTURE_MAG_FILTER, GL32.GL_NEAREST);
GL46.glTextureParameteri(textureId, GL32.GL_TEXTURE_WRAP_S, GL32.GL_CLAMP_TO_EDGE);
GL46.glTextureParameteri(textureId, GL32.GL_TEXTURE_WRAP_T, GL32.GL_CLAMP_TO_EDGE);
}
}

View file

@ -10,6 +10,7 @@ import org.lwjgl.opengl.GL20C;
import org.lwjgl.opengl.GL31C;
import org.lwjgl.opengl.GL40;
import org.lwjgl.opengl.GL43;
import org.lwjgl.opengl.GL46;
import org.lwjgl.opengl.GLCapabilities;
import org.lwjgl.opengl.KHRShaderSubgroup;
import org.lwjgl.system.MemoryStack;
@ -42,6 +43,8 @@ public final class GlCompat {
public static final boolean SUPPORTS_INSTANCING = isInstancingSupported();
public static final boolean SUPPORTS_INDIRECT = isIndirectSupported();
public static final int MAX_SHADER_STORAGE_BUFFER_BINDINGS = GL46.glGetInteger(GL46.GL_MAX_SHADER_STORAGE_BUFFER_BINDINGS);
private GlCompat() {
}

View file

@ -13,11 +13,7 @@ uniform sampler2D _flw_crumblingTex;
in vec2 _flw_crumblingTexCoord;
#endif
#ifdef _FLW_DEBUG
flat in uint _flw_instanceID;
#endif
out vec4 _flw_outputColor;
layout(location = 0) out vec4 _flw_outputColor;
float _flw_diffuseFactor() {
if (flw_material.diffuse) {
@ -35,7 +31,7 @@ float _flw_diffuseFactor() {
}
}
void _flw_main() {
void _flw_main(uint instanceID) {
flw_sampleColor = texture(flw_diffuseTex, flw_vertexTexCoord);
flw_fragColor = flw_vertexColor * flw_sampleColor;
flw_fragOverlay = flw_vertexOverlay;
@ -81,7 +77,7 @@ void _flw_main() {
color = vec4(flw_vertexNormal * .5 + .5, 1.);
break;
case 2u:
color = _flw_id2Color(_flw_instanceID);
color = _flw_id2Color(instanceID);
break;
case 3u:
color = vec4(vec2((flw_fragLight * 15.0 + 0.5) / 16.), 0., 1.);

View file

@ -71,11 +71,7 @@ mat4 _flw_modelMatrix;
mat3 _flw_normalMatrix;
#endif
#ifdef _FLW_DEBUG
flat out uint _flw_instanceID;
#endif
void _flw_main(in FlwInstance instance, in uint stableInstanceID) {
void _flw_main(in FlwInstance instance) {
_flw_layoutVertex();
flw_instanceVertex(instance);
flw_materialVertex();
@ -94,8 +90,4 @@ void _flw_main(in FlwInstance instance, in uint stableInstanceID) {
flw_distance = fogDistance(flw_vertexPos.xyz, flw_cameraPos, flw_fogShape);
gl_Position = flw_viewProjection * flw_vertexPos;
#ifdef _FLW_DEBUG
_flw_instanceID = stableInstanceID;
#endif
}

View file

@ -1,12 +1,17 @@
// Per culling group
#define _FLW_PAGE_FRAME_DESCRIPTOR_BUFFER_BINDING 0// cull
#define _FLW_INSTANCE_BUFFER_BINDING 1// cull, draw
#define _FLW_DRAW_INSTANCE_INDEX_BUFFER_BINDING 2// cull, draw
#define _FLW_MODEL_BUFFER_BINDING 3// cull, apply
#define _FLW_DRAW_BUFFER_BINDING 4// apply, draw
#define _FLW_PASS_TWO_DISPATCH_BUFFER_BINDING 0 // cull1
#define _FLW_PASS_TWO_INSTANCE_INDEX_BUFFER_BINDING 1 // cull1, cull2
#define _FLW_PAGE_FRAME_DESCRIPTOR_BUFFER_BINDING 2 // cull1, cull2
#define _FLW_INSTANCE_BUFFER_BINDING 3 // cull1, cull2, draw
#define _FLW_DRAW_INSTANCE_INDEX_BUFFER_BINDING 4 // cull1, cull2, draw
#define _FLW_MODEL_BUFFER_BINDING 5 // cull1, cull2, apply
#define _FLW_DRAW_BUFFER_BINDING 6 // apply, draw
// Global to the engine
#define _FLW_LIGHT_LUT_BUFFER_BINDING 5
#define _FLW_LIGHT_SECTIONS_BUFFER_BINDING 6
#define _FLW_LIGHT_LUT_BUFFER_BINDING 7
#define _FLW_LIGHT_SECTIONS_BUFFER_BINDING 8
#define _FLW_MATRIX_BUFFER_BINDING 7
#define _FLW_MATRIX_BUFFER_BINDING 9
#define _FLW_LAST_FRAME_VISIBILITY_BUFFER_BINDING 10

View file

@ -0,0 +1,31 @@
layout(local_size_x = 16, local_size_y = 16) in;
layout(binding = 0, r32f) uniform writeonly image2D outImage;
layout(binding = 1) uniform sampler2D inImage;
uniform vec2 oneOverImageSize;
uniform int lod;
uniform int useMin = 0;
void main() {
uvec2 pos = gl_GlobalInvocationID.xy;
// Map the output texel to an input texel. Properly do the division because generating mip0 maps from the actual
// full resolution depth buffer and the aspect ratio may be different from our Po2 pyramid.
ivec2 samplePos = ivec2(floor(vec2(pos) * vec2(textureSize(inImage, lod)) * oneOverImageSize));
float depth01 = texelFetchOffset(inImage, samplePos, lod, ivec2(0, 1)).r;
float depth11 = texelFetchOffset(inImage, samplePos, lod, ivec2(1, 1)).r;
float depth10 = texelFetchOffset(inImage, samplePos, lod, ivec2(1, 0)).r;
float depth00 = texelFetchOffset(inImage, samplePos, lod, ivec2(0, 0)).r;
float depth;
if (useMin == 0) {
depth = max(max(depth00, depth01), max(depth10, depth11));
} else {
depth = min(min(depth00, depth01), min(depth10, depth11));
}
imageStore(outImage, ivec2(pos), vec4(depth));
}

View file

@ -0,0 +1,6 @@
struct _FlwLateCullDispatch {
uint x;
uint y;
uint z;
uint threadCount;
};

View file

@ -0,0 +1,122 @@
#include "flywheel:internal/indirect/buffer_bindings.glsl"
#include "flywheel:internal/indirect/model_descriptor.glsl"
#include "flywheel:internal/uniforms/uniforms.glsl"
#include "flywheel:util/matrix.glsl"
#include "flywheel:internal/indirect/matrices.glsl"
#include "flywheel:internal/indirect/dispatch.glsl"
layout(local_size_x = 32) in;
uniform uint _flw_visibilityReadOffsetPages;
layout(std430, binding = _FLW_PASS_TWO_DISPATCH_BUFFER_BINDING) restrict buffer PassTwoDispatchBuffer {
_FlwLateCullDispatch _flw_lateCullDispatch;
};
layout(std430, binding = _FLW_PASS_TWO_INSTANCE_INDEX_BUFFER_BINDING) restrict writeonly buffer PassTwoIndexBuffer {
uint _flw_passTwoIndices[];
};
layout(std430, binding = _FLW_DRAW_INSTANCE_INDEX_BUFFER_BINDING) restrict writeonly buffer DrawIndexBuffer {
uint _flw_drawIndices[];
};
// High 6 bits for the number of instances in the page.
const uint _FLW_PAGE_COUNT_OFFSET = 26u;
// Bottom 26 bits for the model index.
const uint _FLW_MODEL_INDEX_MASK = 0x3FFFFFF;
layout(std430, binding = _FLW_PAGE_FRAME_DESCRIPTOR_BUFFER_BINDING) restrict readonly buffer PageFrameDescriptorBuffer {
uint _flw_pageFrameDescriptors[];
};
layout(std430, binding = _FLW_LAST_FRAME_VISIBILITY_BUFFER_BINDING) restrict readonly buffer LastFrameVisibilityBuffer {
uint _flw_lastFrameVisibility[];
};
layout(std430, binding = _FLW_MODEL_BUFFER_BINDING) restrict buffer ModelBuffer {
ModelDescriptor _flw_models[];
};
layout(std430, binding = _FLW_MATRIX_BUFFER_BINDING) restrict readonly buffer MatrixBuffer {
Matrices _flw_matrices[];
};
// Disgustingly vectorized sphere frustum intersection taking advantage of ahead of time packing.
// Only uses 6 fmas and some boolean ops.
// See also:
// flywheel:uniform/flywheel.glsl
// dev.engine_room.flywheel.lib.math.MatrixMath.writePackedFrustumPlanes
// org.joml.FrustumIntersection.testSphere
bool _flw_testSphere(vec3 center, float radius) {
bvec4 xyInside = greaterThanEqual(fma(flw_frustumPlanes.xyX, center.xxxx, fma(flw_frustumPlanes.xyY, center.yyyy, fma(flw_frustumPlanes.xyZ, center.zzzz, flw_frustumPlanes.xyW))), -radius.xxxx);
bvec2 zInside = greaterThanEqual(fma(flw_frustumPlanes.zX, center.xx, fma(flw_frustumPlanes.zY, center.yy, fma(flw_frustumPlanes.zZ, center.zz, flw_frustumPlanes.zW))), -radius.xx);
return all(xyInside) && all(zInside);
}
bool _flw_isVisible(uint instanceIndex, uint modelIndex) {
uint matrixIndex = _flw_models[modelIndex].matrixIndex;
BoundingSphere sphere = _flw_models[modelIndex].boundingSphere;
vec3 center;
float radius;
_flw_unpackBoundingSphere(sphere, center, radius);
FlwInstance instance = _flw_unpackInstance(instanceIndex);
flw_transformBoundingSphere(instance, center, radius);
if (matrixIndex > 0) {
transformBoundingSphere(_flw_matrices[matrixIndex].pose, center, radius);
}
return _flw_testSphere(center, radius);
}
void main() {
uint pageIndex = gl_WorkGroupID.x;
if (pageIndex >= _flw_pageFrameDescriptors.length()) {
return;
}
uint packedModelIndexAndCount = _flw_pageFrameDescriptors[pageIndex];
uint pageInstanceCount = packedModelIndexAndCount >> _FLW_PAGE_COUNT_OFFSET;
if (gl_LocalInvocationID.x >= pageInstanceCount) {
return;
}
uint instanceIndex = gl_GlobalInvocationID.x;
uint modelIndex = packedModelIndexAndCount & _FLW_MODEL_INDEX_MASK;
if (!_flw_isVisible(instanceIndex, modelIndex)) {
return;
}
uint pageVisibility = _flw_lastFrameVisibility[_flw_visibilityReadOffsetPages + pageIndex];
if ((pageVisibility & (1u << gl_LocalInvocationID.x)) != 0u) {
// This instance was visibile last frame, it should be rendered early.
uint localIndex = atomicAdd(_flw_models[modelIndex].instanceCount, 1);
uint targetIndex = _flw_models[modelIndex].baseInstance + localIndex;
_flw_drawIndices[targetIndex] = instanceIndex;
} else {
// Try again later to see if it's been disoccluded.
uint targetIndex = atomicAdd(_flw_lateCullDispatch.threadCount, 1);
_flw_passTwoIndices[targetIndex] = instanceIndex;
if (targetIndex % 32u == 0u) {
// This thread wrote an index that will be at the start of a new workgroup later
atomicAdd(_flw_lateCullDispatch.x, 1);
if (targetIndex == 0) {
_flw_lateCullDispatch.y = 1;
_flw_lateCullDispatch.z = 1;
}
}
}
}

View file

@ -0,0 +1,137 @@
#include "flywheel:internal/indirect/buffer_bindings.glsl"
#include "flywheel:internal/indirect/model_descriptor.glsl"
#include "flywheel:internal/uniforms/uniforms.glsl"
#include "flywheel:util/matrix.glsl"
#include "flywheel:internal/indirect/matrices.glsl"
#include "flywheel:internal/indirect/dispatch.glsl"
layout(local_size_x = 32) in;
layout(std430, binding = _FLW_PASS_TWO_DISPATCH_BUFFER_BINDING) restrict buffer PassTwoDispatchBuffer {
_FlwLateCullDispatch _flw_lateCullDispatch;
};
layout(std430, binding = _FLW_PASS_TWO_INSTANCE_INDEX_BUFFER_BINDING) restrict readonly buffer PassTwoIndexBuffer {
uint _flw_passTwoIndices[];
};
layout(std430, binding = _FLW_DRAW_INSTANCE_INDEX_BUFFER_BINDING) restrict writeonly buffer DrawIndexBuffer {
uint _flw_drawIndices[];
};
// High 6 bits for the number of instances in the page.
const uint _FLW_PAGE_COUNT_OFFSET = 26u;
// Bottom 26 bits for the model index.
const uint _FLW_MODEL_INDEX_MASK = 0x3FFFFFF;
layout(std430, binding = _FLW_PAGE_FRAME_DESCRIPTOR_BUFFER_BINDING) restrict readonly buffer PageFrameDescriptorBuffer {
uint _flw_pageFrameDescriptors[];
};
layout(std430, binding = _FLW_MODEL_BUFFER_BINDING) restrict buffer ModelBuffer {
ModelDescriptor _flw_models[];
};
layout(std430, binding = _FLW_MATRIX_BUFFER_BINDING) restrict readonly buffer MatrixBuffer {
Matrices _flw_matrices[];
};
layout(binding = 0) uniform sampler2D _flw_depthPyramid;
bool projectSphere(vec3 c, float r, float znear, float P00, float P11, out vec4 aabb) {
// Closest point on the sphere is between the camera and the near plane, don't even attempt to cull.
if (c.z + r > -znear) {
return false;
}
vec3 cr = c * r;
float czr2 = c.z * c.z - r * r;
float vx = sqrt(c.x * c.x + czr2);
float minx = (vx * c.x - cr.z) / (vx * c.z + cr.x);
float maxx = (vx * c.x + cr.z) / (vx * c.z - cr.x);
float vy = sqrt(c.y * c.y + czr2);
float miny = (vy * c.y - cr.z) / (vy * c.z + cr.y);
float maxy = (vy * c.y + cr.z) / (vy * c.z - cr.y);
aabb = vec4(minx * P00, miny * P11, maxx * P00, maxy * P11);
aabb = aabb.xwzy * vec4(-0.5f, -0.5f, -0.5f, -0.5f) + vec4(0.5f); // clip space -> uv space
return true;
}
bool _flw_isVisible(uint instanceIndex, uint modelIndex) {
uint matrixIndex = _flw_models[modelIndex].matrixIndex;
BoundingSphere sphere = _flw_models[modelIndex].boundingSphere;
vec3 center;
float radius;
_flw_unpackBoundingSphere(sphere, center, radius);
FlwInstance instance = _flw_unpackInstance(instanceIndex);
flw_transformBoundingSphere(instance, center, radius);
if (matrixIndex > 0) {
transformBoundingSphere(_flw_matrices[matrixIndex].pose, center, radius);
}
transformBoundingSphere(flw_view, center, radius);
vec4 aabb;
if (projectSphere(center, radius, _flw_cullData.znear, _flw_cullData.P00, _flw_cullData.P11, aabb))
{
float width = (aabb.z - aabb.x) * _flw_cullData.pyramidWidth;
float height = (aabb.w - aabb.y) * _flw_cullData.pyramidHeight;
int level = clamp(int(ceil(log2(max(width, height)))), 0, _flw_cullData.pyramidLevels);
ivec2 levelSize = textureSize(_flw_depthPyramid, level);
ivec4 levelSizePair = ivec4(levelSize, levelSize);
ivec4 bounds = ivec4(aabb * vec4(levelSizePair));
float depth01 = texelFetch(_flw_depthPyramid, bounds.xw, level).r;
float depth11 = texelFetch(_flw_depthPyramid, bounds.zw, level).r;
float depth10 = texelFetch(_flw_depthPyramid, bounds.zy, level).r;
float depth00 = texelFetch(_flw_depthPyramid, bounds.xy, level).r;
float depth;
if (_flw_cullData.useMin == 0) {
depth = max(max(depth00, depth01), max(depth10, depth11));
} else {
depth = min(min(depth00, depth01), min(depth10, depth11));
}
float depthSphere = 1. + _flw_cullData.znear / (center.z + radius);
return depthSphere <= depth;
}
return true;
}
void main() {
if (gl_GlobalInvocationID.x >= _flw_lateCullDispatch.threadCount) {
return;
}
uint instanceIndex = _flw_passTwoIndices[gl_GlobalInvocationID.x];
uint pageIndex = instanceIndex >> 5;
uint packedModelIndexAndCount = _flw_pageFrameDescriptors[pageIndex];
uint modelIndex = packedModelIndexAndCount & _FLW_MODEL_INDEX_MASK;
if (_flw_isVisible(instanceIndex, modelIndex)) {
uint localIndex = atomicAdd(_flw_models[modelIndex].instanceCount, 1);
uint targetIndex = _flw_models[modelIndex].baseInstance + localIndex;
_flw_drawIndices[targetIndex] = instanceIndex;
}
}

View file

@ -4,9 +4,15 @@
flat in uvec2 _flw_packedMaterial;
flat in uint _flw_instanceID;
layout(location = 1) out uint _flw_out_instanceID;
void main() {
_flw_unpackUint2x16(_flw_packedMaterial.x, _flw_uberFogIndex, _flw_uberCutoutIndex);
_flw_unpackMaterialProperties(_flw_packedMaterial.y, flw_material);
_flw_main();
_flw_main(_flw_instanceID);
_flw_out_instanceID = _flw_instanceID;
}

View file

@ -5,8 +5,8 @@
#include "flywheel:internal/indirect/light.glsl"
#include "flywheel:internal/indirect/matrices.glsl"
layout(std430, binding = _FLW_DRAW_INSTANCE_INDEX_BUFFER_BINDING) restrict readonly buffer TargetBuffer {
uint _flw_instanceIndices[];
layout(std430, binding = _FLW_DRAW_INSTANCE_INDEX_BUFFER_BINDING) restrict readonly buffer DrawIndexBuffer {
uint _flw_drawIndices[];
};
layout(std430, binding = _FLW_DRAW_BUFFER_BINDING) restrict readonly buffer DrawBuffer {
@ -21,8 +21,14 @@ layout(std430, binding = _FLW_MATRIX_BUFFER_BINDING) restrict buffer MatrixBuffe
uniform uint _flw_baseDraw;
// We read the visibility buffer for all culling groups into a single shared buffer.
// This offset is used to know where each culling group starts.
uniform uint _flw_visibilityWriteOffsetInstances = 0;
flat out uvec2 _flw_packedMaterial;
flat out uint _flw_instanceID;
#if __VERSION__ < 460
#define flw_baseInstance gl_BaseInstanceARB
#define flw_drawId gl_DrawIDARB
@ -46,10 +52,13 @@ void main() {
#ifdef _FLW_CRUMBLING
uint instanceIndex = flw_baseInstance;
#else
uint instanceIndex = _flw_instanceIndices[flw_baseInstance + gl_InstanceID];
uint instanceIndex = _flw_drawIndices[flw_baseInstance + gl_InstanceID];
#endif
FlwInstance instance = _flw_unpackInstance(instanceIndex);
_flw_main(instance, instanceIndex);
_flw_main(instance);
// Add 1 because a 0 instance id means null.
_flw_instanceID = _flw_visibilityWriteOffsetInstances + instanceIndex + 1;
}

View file

@ -0,0 +1,64 @@
#include "flywheel:internal/indirect/buffer_bindings.glsl"
layout(local_size_x = 256) in;
layout(binding = 0) uniform usampler2D visBuffer;
layout(std430, binding = _FLW_LAST_FRAME_VISIBILITY_BUFFER_BINDING) restrict buffer LastFrameVisibilityBuffer {
uint _flw_lastFrameVisibility[];
};
uint extractBits(uint e, uint offset, uint count) {
return (e >> offset) & ((1u << count) - 1u);
}
uint insertBits(uint e, uint newbits, uint offset, uint count) {
uint countMask = ((1u << count) - 1u);
// zero out the bits we're going to replace first
return (e & ~(countMask << offset)) | ((newbits & countMask) << offset);
}
uvec2 remap_for_wave_reduction(uint a) {
return uvec2(
insertBits(extractBits(a, 2u, 3u), a, 0u, 1u),
insertBits(extractBits(a, 3u, 3u), extractBits(a, 1u, 2u), 0u, 2u)
);
}
void emit(uint instanceID) {
// Null instance id.
if (instanceID == 0) {
return;
}
// Adjust for null to find the actual index.
instanceID = instanceID - 1;
uint index = instanceID >> 5;
uint mask = 1u << (instanceID & 31u);
atomicOr(_flw_lastFrameVisibility[index], mask);
}
void main() {
uvec2 sub_xy = remap_for_wave_reduction(gl_LocalInvocationIndex % 64u);
uint x = sub_xy.x + 8u * ((gl_LocalInvocationIndex >> 6u) % 2u);
uint y = sub_xy.y + 8u * (gl_LocalInvocationIndex >> 7u);
ivec2 tex = ivec2(gl_WorkGroupID.xy) * 32 + ivec2(x, y) * 2;
uint instanceID01 = texelFetchOffset(visBuffer, tex, 0, ivec2(0, 1)).r;
uint instanceID11 = texelFetchOffset(visBuffer, tex, 0, ivec2(1, 1)).r;
uint instanceID10 = texelFetchOffset(visBuffer, tex, 0, ivec2(1, 0)).r;
uint instanceID00 = texelFetch(visBuffer, tex, 0).r;
if (instanceID00 == instanceID01 && instanceID01 == instanceID10 && instanceID10 == instanceID11) {
emit(instanceID00);
} else {
emit(instanceID00);
emit(instanceID01);
emit(instanceID10);
emit(instanceID11);
}
}

View file

@ -0,0 +1,18 @@
#include "flywheel:internal/indirect/buffer_bindings.glsl"
#include "flywheel:internal/indirect/model_descriptor.glsl"
layout(local_size_x = _FLW_SUBGROUP_SIZE) in;
layout(std430, binding = _FLW_MODEL_BUFFER_BINDING) restrict writeonly buffer ModelBuffer {
ModelDescriptor models[];
};
void main() {
uint modelIndex = gl_GlobalInvocationID.x;
if (modelIndex >= models.length()) {
return;
}
models[modelIndex].instanceCount = 0;
}

View file

@ -3,9 +3,11 @@
uniform uvec2 _flw_packedMaterial;
flat in uint _flw_instanceID;
void main() {
_flw_unpackUint2x16(_flw_packedMaterial.x, _flw_uberFogIndex, _flw_uberCutoutIndex);
_flw_unpackMaterialProperties(_flw_packedMaterial.y, flw_material);
_flw_main();
_flw_main(_flw_instanceID);
}

View file

@ -10,6 +10,8 @@ uniform mat4 _flw_modelMatrixUniform;
uniform mat3 _flw_normalMatrixUniform;
#endif
flat out uint _flw_instanceID;
void main() {
_flw_unpackMaterialProperties(_flw_packedMaterial.y, flw_material);
@ -20,5 +22,7 @@ void main() {
_flw_normalMatrix = _flw_normalMatrixUniform;
#endif
_flw_main(instance, uint(gl_InstanceID));
_flw_main(instance);
_flw_instanceID = gl_InstanceID;
}