0
0
Fork 0
mirror of https://github.com/Jozufozu/Flywheel.git synced 2025-02-19 00:15:33 +01:00

Seeing blue

- Optimize read visibility by having each invocation read a 2x2 area and
  coalescing atomicOrs when all 4 texels are equal
- Also use the fancy remap function for better texture cache locality
This commit is contained in:
Jozufozu 2024-09-13 22:39:31 -07:00
parent 0151364b8a
commit ba3d84b5ae
2 changed files with 41 additions and 4 deletions
common/src/backend
java/dev/engine_room/flywheel/backend/engine/indirect
resources/assets/flywheel/flywheel/internal/indirect

View file

@ -16,7 +16,7 @@ import it.unimi.dsi.fastutil.ints.IntSet;
import net.minecraft.client.Minecraft;
public class VisibilityBuffer {
private static final int READ_GROUP_SIZE = 16;
private static final int READ_GROUP_SIZE = 32;
private static final int ATTACHMENT = GL30.GL_COLOR_ATTACHMENT1;
private final GlProgram readVisibilityProgram;

View file

@ -1,6 +1,6 @@
#include "flywheel:internal/indirect/buffer_bindings.glsl"
layout(local_size_x = 16, local_size_y = 16) in;
layout(local_size_x = 256) in;
layout(binding = 0) uniform usampler2D visBuffer;
@ -8,9 +8,24 @@ layout(std430, binding = _FLW_LAST_FRAME_VISIBILITY_BUFFER_BINDING) restrict buf
uint _flw_lastFrameVisibility[];
};
void main() {
uint instanceID = texelFetch(visBuffer, ivec2(gl_GlobalInvocationID.xy), 0).r;
uint extractBits(uint e, uint offset, uint count) {
return (e >> offset) & ((1u << count) - 1u);
}
uint insertBits(uint e, uint newbits, uint offset, uint count) {
uint countMask = ((1u << count) - 1u);
// zero out the bits we're going to replace first
return (e & ~(countMask << offset)) | ((newbits & countMask) << offset);
}
uvec2 remap_for_wave_reduction(uint a) {
return uvec2(
insertBits(extractBits(a, 2u, 3u), a, 0u, 1u),
insertBits(extractBits(a, 3u, 3u), extractBits(a, 1u, 2u), 0u, 2u)
);
}
void emit(uint instanceID) {
// Null instance id.
if (instanceID == 0) {
return;
@ -25,3 +40,25 @@ void main() {
atomicOr(_flw_lastFrameVisibility[index], mask);
}
void main() {
uvec2 sub_xy = remap_for_wave_reduction(gl_LocalInvocationIndex % 64u);
uint x = sub_xy.x + 8u * ((gl_LocalInvocationIndex >> 6u) % 2u);
uint y = sub_xy.y + 8u * (gl_LocalInvocationIndex >> 7u);
ivec2 tex = ivec2(gl_WorkGroupID.xy) * 32 + ivec2(x, y) * 2;
uint instanceID01 = texelFetchOffset(visBuffer, tex, 0, ivec2(0, 1)).r;
uint instanceID11 = texelFetchOffset(visBuffer, tex, 0, ivec2(1, 1)).r;
uint instanceID10 = texelFetchOffset(visBuffer, tex, 0, ivec2(1, 0)).r;
uint instanceID00 = texelFetch(visBuffer, tex, 0).r;
if (instanceID00 == instanceID01 && instanceID01 == instanceID10 && instanceID10 == instanceID11) {
emit(instanceID00);
} else {
emit(instanceID00);
emit(instanceID01);
emit(instanceID10);
emit(instanceID11);
}
}