mirror of
https://github.com/Jozufozu/Flywheel.git
synced 2025-02-19 00:15:33 +01:00
Seeing blue
- Optimize read visibility by having each invocation read a 2x2 area and coalescing atomicOrs when all 4 texels are equal - Also use the fancy remap function for better texture cache locality
This commit is contained in:
parent
0151364b8a
commit
ba3d84b5ae
2 changed files with 41 additions and 4 deletions
common/src/backend
java/dev/engine_room/flywheel/backend/engine/indirect
resources/assets/flywheel/flywheel/internal/indirect
|
@ -16,7 +16,7 @@ import it.unimi.dsi.fastutil.ints.IntSet;
|
|||
import net.minecraft.client.Minecraft;
|
||||
|
||||
public class VisibilityBuffer {
|
||||
private static final int READ_GROUP_SIZE = 16;
|
||||
private static final int READ_GROUP_SIZE = 32;
|
||||
private static final int ATTACHMENT = GL30.GL_COLOR_ATTACHMENT1;
|
||||
|
||||
private final GlProgram readVisibilityProgram;
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
#include "flywheel:internal/indirect/buffer_bindings.glsl"
|
||||
|
||||
layout(local_size_x = 16, local_size_y = 16) in;
|
||||
layout(local_size_x = 256) in;
|
||||
|
||||
layout(binding = 0) uniform usampler2D visBuffer;
|
||||
|
||||
|
@ -8,9 +8,24 @@ layout(std430, binding = _FLW_LAST_FRAME_VISIBILITY_BUFFER_BINDING) restrict buf
|
|||
uint _flw_lastFrameVisibility[];
|
||||
};
|
||||
|
||||
void main() {
|
||||
uint instanceID = texelFetch(visBuffer, ivec2(gl_GlobalInvocationID.xy), 0).r;
|
||||
uint extractBits(uint e, uint offset, uint count) {
|
||||
return (e >> offset) & ((1u << count) - 1u);
|
||||
}
|
||||
|
||||
uint insertBits(uint e, uint newbits, uint offset, uint count) {
|
||||
uint countMask = ((1u << count) - 1u);
|
||||
// zero out the bits we're going to replace first
|
||||
return (e & ~(countMask << offset)) | ((newbits & countMask) << offset);
|
||||
}
|
||||
|
||||
uvec2 remap_for_wave_reduction(uint a) {
|
||||
return uvec2(
|
||||
insertBits(extractBits(a, 2u, 3u), a, 0u, 1u),
|
||||
insertBits(extractBits(a, 3u, 3u), extractBits(a, 1u, 2u), 0u, 2u)
|
||||
);
|
||||
}
|
||||
|
||||
void emit(uint instanceID) {
|
||||
// Null instance id.
|
||||
if (instanceID == 0) {
|
||||
return;
|
||||
|
@ -25,3 +40,25 @@ void main() {
|
|||
|
||||
atomicOr(_flw_lastFrameVisibility[index], mask);
|
||||
}
|
||||
|
||||
void main() {
|
||||
uvec2 sub_xy = remap_for_wave_reduction(gl_LocalInvocationIndex % 64u);
|
||||
uint x = sub_xy.x + 8u * ((gl_LocalInvocationIndex >> 6u) % 2u);
|
||||
uint y = sub_xy.y + 8u * (gl_LocalInvocationIndex >> 7u);
|
||||
|
||||
ivec2 tex = ivec2(gl_WorkGroupID.xy) * 32 + ivec2(x, y) * 2;
|
||||
|
||||
uint instanceID01 = texelFetchOffset(visBuffer, tex, 0, ivec2(0, 1)).r;
|
||||
uint instanceID11 = texelFetchOffset(visBuffer, tex, 0, ivec2(1, 1)).r;
|
||||
uint instanceID10 = texelFetchOffset(visBuffer, tex, 0, ivec2(1, 0)).r;
|
||||
uint instanceID00 = texelFetch(visBuffer, tex, 0).r;
|
||||
|
||||
if (instanceID00 == instanceID01 && instanceID01 == instanceID10 && instanceID10 == instanceID11) {
|
||||
emit(instanceID00);
|
||||
} else {
|
||||
emit(instanceID00);
|
||||
emit(instanceID01);
|
||||
emit(instanceID10);
|
||||
emit(instanceID11);
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue