mirror of
https://github.com/Jozufozu/Flywheel.git
synced 2025-02-20 17:05:32 +01:00
Seeing blue
- Optimize read visibility by having each invocation read a 2x2 area and coalescing atomicOrs when all 4 texels are equal - Also use the fancy remap function for better texture cache locality
This commit is contained in:
parent
0151364b8a
commit
ba3d84b5ae
2 changed files with 41 additions and 4 deletions
|
@ -16,7 +16,7 @@ import it.unimi.dsi.fastutil.ints.IntSet;
|
||||||
import net.minecraft.client.Minecraft;
|
import net.minecraft.client.Minecraft;
|
||||||
|
|
||||||
public class VisibilityBuffer {
|
public class VisibilityBuffer {
|
||||||
private static final int READ_GROUP_SIZE = 16;
|
private static final int READ_GROUP_SIZE = 32;
|
||||||
private static final int ATTACHMENT = GL30.GL_COLOR_ATTACHMENT1;
|
private static final int ATTACHMENT = GL30.GL_COLOR_ATTACHMENT1;
|
||||||
|
|
||||||
private final GlProgram readVisibilityProgram;
|
private final GlProgram readVisibilityProgram;
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
#include "flywheel:internal/indirect/buffer_bindings.glsl"
|
#include "flywheel:internal/indirect/buffer_bindings.glsl"
|
||||||
|
|
||||||
layout(local_size_x = 16, local_size_y = 16) in;
|
layout(local_size_x = 256) in;
|
||||||
|
|
||||||
layout(binding = 0) uniform usampler2D visBuffer;
|
layout(binding = 0) uniform usampler2D visBuffer;
|
||||||
|
|
||||||
|
@ -8,9 +8,24 @@ layout(std430, binding = _FLW_LAST_FRAME_VISIBILITY_BUFFER_BINDING) restrict buf
|
||||||
uint _flw_lastFrameVisibility[];
|
uint _flw_lastFrameVisibility[];
|
||||||
};
|
};
|
||||||
|
|
||||||
void main() {
|
uint extractBits(uint e, uint offset, uint count) {
|
||||||
uint instanceID = texelFetch(visBuffer, ivec2(gl_GlobalInvocationID.xy), 0).r;
|
return (e >> offset) & ((1u << count) - 1u);
|
||||||
|
}
|
||||||
|
|
||||||
|
uint insertBits(uint e, uint newbits, uint offset, uint count) {
|
||||||
|
uint countMask = ((1u << count) - 1u);
|
||||||
|
// zero out the bits we're going to replace first
|
||||||
|
return (e & ~(countMask << offset)) | ((newbits & countMask) << offset);
|
||||||
|
}
|
||||||
|
|
||||||
|
uvec2 remap_for_wave_reduction(uint a) {
|
||||||
|
return uvec2(
|
||||||
|
insertBits(extractBits(a, 2u, 3u), a, 0u, 1u),
|
||||||
|
insertBits(extractBits(a, 3u, 3u), extractBits(a, 1u, 2u), 0u, 2u)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
void emit(uint instanceID) {
|
||||||
// Null instance id.
|
// Null instance id.
|
||||||
if (instanceID == 0) {
|
if (instanceID == 0) {
|
||||||
return;
|
return;
|
||||||
|
@ -25,3 +40,25 @@ void main() {
|
||||||
|
|
||||||
atomicOr(_flw_lastFrameVisibility[index], mask);
|
atomicOr(_flw_lastFrameVisibility[index], mask);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void main() {
|
||||||
|
uvec2 sub_xy = remap_for_wave_reduction(gl_LocalInvocationIndex % 64u);
|
||||||
|
uint x = sub_xy.x + 8u * ((gl_LocalInvocationIndex >> 6u) % 2u);
|
||||||
|
uint y = sub_xy.y + 8u * (gl_LocalInvocationIndex >> 7u);
|
||||||
|
|
||||||
|
ivec2 tex = ivec2(gl_WorkGroupID.xy) * 32 + ivec2(x, y) * 2;
|
||||||
|
|
||||||
|
uint instanceID01 = texelFetchOffset(visBuffer, tex, 0, ivec2(0, 1)).r;
|
||||||
|
uint instanceID11 = texelFetchOffset(visBuffer, tex, 0, ivec2(1, 1)).r;
|
||||||
|
uint instanceID10 = texelFetchOffset(visBuffer, tex, 0, ivec2(1, 0)).r;
|
||||||
|
uint instanceID00 = texelFetch(visBuffer, tex, 0).r;
|
||||||
|
|
||||||
|
if (instanceID00 == instanceID01 && instanceID01 == instanceID10 && instanceID10 == instanceID11) {
|
||||||
|
emit(instanceID00);
|
||||||
|
} else {
|
||||||
|
emit(instanceID00);
|
||||||
|
emit(instanceID01);
|
||||||
|
emit(instanceID10);
|
||||||
|
emit(instanceID11);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
Loading…
Add table
Reference in a new issue