Seeing blue

- Optimize read visibility by having each invocation read a 2x2 area and
  coalescing atomicOrs when all 4 texels are equal
- Also use the fancy remap function for better texture cache locality
This commit is contained in:
Jozufozu 2024-09-13 22:39:31 -07:00
parent 0151364b8a
commit ba3d84b5ae
2 changed files with 41 additions and 4 deletions

View file

@ -16,7 +16,7 @@ import it.unimi.dsi.fastutil.ints.IntSet;
import net.minecraft.client.Minecraft; import net.minecraft.client.Minecraft;
public class VisibilityBuffer { public class VisibilityBuffer {
private static final int READ_GROUP_SIZE = 16; private static final int READ_GROUP_SIZE = 32;
private static final int ATTACHMENT = GL30.GL_COLOR_ATTACHMENT1; private static final int ATTACHMENT = GL30.GL_COLOR_ATTACHMENT1;
private final GlProgram readVisibilityProgram; private final GlProgram readVisibilityProgram;

View file

@ -1,6 +1,6 @@
#include "flywheel:internal/indirect/buffer_bindings.glsl" #include "flywheel:internal/indirect/buffer_bindings.glsl"
layout(local_size_x = 16, local_size_y = 16) in; layout(local_size_x = 256) in;
layout(binding = 0) uniform usampler2D visBuffer; layout(binding = 0) uniform usampler2D visBuffer;
@ -8,9 +8,24 @@ layout(std430, binding = _FLW_LAST_FRAME_VISIBILITY_BUFFER_BINDING) restrict buf
uint _flw_lastFrameVisibility[]; uint _flw_lastFrameVisibility[];
}; };
void main() { uint extractBits(uint e, uint offset, uint count) {
uint instanceID = texelFetch(visBuffer, ivec2(gl_GlobalInvocationID.xy), 0).r; return (e >> offset) & ((1u << count) - 1u);
}
uint insertBits(uint e, uint newbits, uint offset, uint count) {
uint countMask = ((1u << count) - 1u);
// zero out the bits we're going to replace first
return (e & ~(countMask << offset)) | ((newbits & countMask) << offset);
}
uvec2 remap_for_wave_reduction(uint a) {
return uvec2(
insertBits(extractBits(a, 2u, 3u), a, 0u, 1u),
insertBits(extractBits(a, 3u, 3u), extractBits(a, 1u, 2u), 0u, 2u)
);
}
void emit(uint instanceID) {
// Null instance id. // Null instance id.
if (instanceID == 0) { if (instanceID == 0) {
return; return;
@ -25,3 +40,25 @@ void main() {
atomicOr(_flw_lastFrameVisibility[index], mask); atomicOr(_flw_lastFrameVisibility[index], mask);
} }
void main() {
uvec2 sub_xy = remap_for_wave_reduction(gl_LocalInvocationIndex % 64u);
uint x = sub_xy.x + 8u * ((gl_LocalInvocationIndex >> 6u) % 2u);
uint y = sub_xy.y + 8u * (gl_LocalInvocationIndex >> 7u);
ivec2 tex = ivec2(gl_WorkGroupID.xy) * 32 + ivec2(x, y) * 2;
uint instanceID01 = texelFetchOffset(visBuffer, tex, 0, ivec2(0, 1)).r;
uint instanceID11 = texelFetchOffset(visBuffer, tex, 0, ivec2(1, 1)).r;
uint instanceID10 = texelFetchOffset(visBuffer, tex, 0, ivec2(1, 0)).r;
uint instanceID00 = texelFetch(visBuffer, tex, 0).r;
if (instanceID00 == instanceID01 && instanceID01 == instanceID10 && instanceID10 == instanceID11) {
emit(instanceID00);
} else {
emit(instanceID00);
emit(instanceID01);
emit(instanceID10);
emit(instanceID11);
}
}