mirror of
https://github.com/Jozufozu/Flywheel.git
synced 2024-11-14 22:43:56 +01:00
Rapid descent
- Implement single (but actually 2) pass downsampling
This commit is contained in:
parent
0bfaac7154
commit
861009ed11
@ -34,7 +34,9 @@ public class IndirectPrograms extends AtomicReferenceCounted {
|
||||
private static final ResourceLocation DEPTH_REDUCE_SHADER_MAIN = Flywheel.rl("internal/indirect/depth_reduce.glsl");
|
||||
private static final ResourceLocation READ_VISIBILITY_SHADER_MAIN = Flywheel.rl("internal/indirect/read_visibility.glsl");
|
||||
private static final ResourceLocation ZERO_MODELS_SHADER_MAIN = Flywheel.rl("internal/indirect/zero_models.glsl");
|
||||
public static final List<ResourceLocation> UTIL_SHADERS = List.of(APPLY_SHADER_MAIN, SCATTER_SHADER_MAIN, DEPTH_REDUCE_SHADER_MAIN, READ_VISIBILITY_SHADER_MAIN, ZERO_MODELS_SHADER_MAIN);
|
||||
private static final ResourceLocation DOWNSAMPLE_FIRST = Flywheel.rl("internal/indirect/downsample_first.glsl");
|
||||
private static final ResourceLocation DOWNSAMPLE_SECOND = Flywheel.rl("internal/indirect/downsample_second.glsl");
|
||||
public static final List<ResourceLocation> UTIL_SHADERS = List.of(APPLY_SHADER_MAIN, SCATTER_SHADER_MAIN, DEPTH_REDUCE_SHADER_MAIN, READ_VISIBILITY_SHADER_MAIN, ZERO_MODELS_SHADER_MAIN, DOWNSAMPLE_FIRST, DOWNSAMPLE_SECOND);
|
||||
|
||||
private static final Compile<InstanceType<?>> CULL = new Compile<>();
|
||||
private static final Compile<ResourceLocation> UTIL = new Compile<>();
|
||||
@ -205,6 +207,14 @@ public class IndirectPrograms extends AtomicReferenceCounted {
|
||||
return utils.get(DEPTH_REDUCE_SHADER_MAIN);
|
||||
}
|
||||
|
||||
public GlProgram getDownsampleFirstProgram() {
|
||||
return utils.get(DOWNSAMPLE_FIRST);
|
||||
}
|
||||
|
||||
public GlProgram getDownsampleSecondProgram() {
|
||||
return utils.get(DOWNSAMPLE_SECOND);
|
||||
}
|
||||
|
||||
public GlProgram getReadVisibilityProgram() {
|
||||
return utils.get(READ_VISIBILITY_SHADER_MAIN);
|
||||
}
|
||||
|
@ -14,14 +14,18 @@ public class DepthPyramid {
|
||||
private static final int GROUP_SIZE = 16;
|
||||
|
||||
private final GlProgram depthReduceProgram;
|
||||
private final GlProgram downsampleFirstProgram;
|
||||
private final GlProgram downsampleSecondProgram;
|
||||
|
||||
public int pyramidTextureId = -1;
|
||||
|
||||
private int lastWidth = -1;
|
||||
private int lastHeight = -1;
|
||||
|
||||
public DepthPyramid(GlProgram depthReduceProgram) {
|
||||
public DepthPyramid(GlProgram depthReduceProgram, GlProgram downsampleFirstProgram, GlProgram downsampleSecondProgram) {
|
||||
this.depthReduceProgram = depthReduceProgram;
|
||||
this.downsampleFirstProgram = downsampleFirstProgram;
|
||||
this.downsampleSecondProgram = downsampleSecondProgram;
|
||||
}
|
||||
|
||||
public void generate() {
|
||||
@ -61,6 +65,54 @@ public class DepthPyramid {
|
||||
}
|
||||
}
|
||||
|
||||
public void generateSPD() {
|
||||
var mainRenderTarget = Minecraft.getInstance()
|
||||
.getMainRenderTarget();
|
||||
|
||||
int width = mip0Size(mainRenderTarget.width);
|
||||
int height = mip0Size(mainRenderTarget.height);
|
||||
|
||||
int mipLevels = getImageMipLevels(width, height);
|
||||
|
||||
createPyramidMips(mipLevels, width, height);
|
||||
|
||||
int depthBufferId = mainRenderTarget.getDepthTextureId();
|
||||
|
||||
GL46.glMemoryBarrier(GL46.GL_FRAMEBUFFER_BARRIER_BIT);
|
||||
|
||||
GlTextureUnit.T0.makeActive();
|
||||
GlStateManager._bindTexture(depthBufferId);
|
||||
|
||||
downsampleFirstProgram.bind();
|
||||
downsampleFirstProgram.setUInt("max_mip_level", mipLevels);
|
||||
|
||||
for (int i = 0; i < Math.min(6, mipLevels); i++) {
|
||||
GL46.glBindImageTexture(i + 1, pyramidTextureId, i, false, 0, GL32.GL_WRITE_ONLY, GL32.GL_R32F);
|
||||
}
|
||||
|
||||
GL46.glDispatchCompute(MoreMath.ceilingDiv(width << 1, 64), MoreMath.ceilingDiv(height << 1, 64), 1);
|
||||
|
||||
if (mipLevels < 7) {
|
||||
GL46.glMemoryBarrier(GL46.GL_TEXTURE_FETCH_BARRIER_BIT);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
GL46.glMemoryBarrier(GL46.GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);
|
||||
|
||||
downsampleSecondProgram.bind();
|
||||
downsampleSecondProgram.setUInt("max_mip_level", mipLevels);
|
||||
|
||||
GL46.glBindImageTexture(0, pyramidTextureId, 5, false, 0, GL32.GL_READ_ONLY, GL32.GL_R32F);
|
||||
for (int i = 6; i < Math.min(12, mipLevels); i++) {
|
||||
GL46.glBindImageTexture(i - 5, pyramidTextureId, i, false, 0, GL32.GL_WRITE_ONLY, GL32.GL_R32F);
|
||||
}
|
||||
|
||||
GL46.glDispatchCompute(1, 1, 1);
|
||||
|
||||
GL46.glMemoryBarrier(GL46.GL_TEXTURE_FETCH_BARRIER_BIT);
|
||||
}
|
||||
|
||||
public void delete() {
|
||||
if (pyramidTextureId != -1) {
|
||||
GL32.glDeleteTextures(pyramidTextureId);
|
||||
|
@ -4,6 +4,7 @@ import static org.lwjgl.opengl.GL11.GL_TRIANGLES;
|
||||
import static org.lwjgl.opengl.GL11.GL_UNSIGNED_INT;
|
||||
import static org.lwjgl.opengl.GL30.glBindBufferRange;
|
||||
import static org.lwjgl.opengl.GL40.glDrawElementsIndirect;
|
||||
import static org.lwjgl.opengl.GL42.GL_BUFFER_UPDATE_BARRIER_BIT;
|
||||
import static org.lwjgl.opengl.GL42.glMemoryBarrier;
|
||||
import static org.lwjgl.opengl.GL43.GL_SHADER_STORAGE_BARRIER_BIT;
|
||||
import static org.lwjgl.opengl.GL43.GL_SHADER_STORAGE_BUFFER;
|
||||
@ -67,7 +68,7 @@ public class IndirectDrawManager extends DrawManager<IndirectInstancer<?>> {
|
||||
lightBuffers = new LightBuffers();
|
||||
matrixBuffer = new MatrixBuffer();
|
||||
|
||||
depthPyramid = new DepthPyramid(programs.getDepthReduceProgram());
|
||||
depthPyramid = new DepthPyramid(programs.getDepthReduceProgram(), programs.getDownsampleFirstProgram(), programs.getDownsampleSecondProgram());
|
||||
visibilityBuffer = new VisibilityBuffer(programs.getReadVisibilityProgram());
|
||||
}
|
||||
|
||||
@ -124,7 +125,7 @@ public class IndirectDrawManager extends DrawManager<IndirectInstancer<?>> {
|
||||
|
||||
submitDraws();
|
||||
|
||||
depthPyramid.generate();
|
||||
depthPyramid.generateSPD();
|
||||
|
||||
programs.getZeroModelProgram()
|
||||
.bind();
|
||||
|
@ -0,0 +1,31 @@
|
||||
layout(local_size_x = 256) in;
|
||||
|
||||
uniform uint max_mip_level;
|
||||
|
||||
/// Generates a hierarchical depth buffer.
|
||||
/// Based on FidelityFX SPD v2.1 https://github.com/GPUOpen-LibrariesAndSDKs/FidelityFX-SDK/blob/d7531ae47d8b36a5d4025663e731a47a38be882f/sdk/include/FidelityFX/gpu/spd/ffx_spd.h#L528
|
||||
/// Based on Bevy's more readable implementation https://github.com/JMS55/bevy/blob/ca2c8e63b9562f88c8cd7e1d88a17a4eea20aaf4/crates/bevy_pbr/src/meshlet/downsample_depth.wgsl
|
||||
|
||||
shared float[16][16] intermediate_memory;
|
||||
|
||||
uint extractBits(uint e, uint offset, uint count) {
|
||||
return (e >> offset) & ((1u << count) - 1u);
|
||||
}
|
||||
|
||||
uint insertBits(uint e, uint newbits, uint offset, uint count) {
|
||||
uint countMask = ((1u << count) - 1u);
|
||||
// zero out the bits we're going to replace first
|
||||
return (e & ~(countMask << offset)) | ((newbits & countMask) << offset);
|
||||
}
|
||||
|
||||
uvec2 remap_for_wave_reduction(uint a) {
|
||||
return uvec2(
|
||||
insertBits(extractBits(a, 2u, 3u), a, 0u, 1u),
|
||||
insertBits(extractBits(a, 3u, 3u), extractBits(a, 1u, 2u), 0u, 2u)
|
||||
);
|
||||
}
|
||||
|
||||
float reduce_4(vec4 v) {
|
||||
return max(max(v.x, v.y), max(v.z, v.w));
|
||||
}
|
||||
|
@ -0,0 +1,150 @@
|
||||
#include "flywheel:internal/indirect/downsample.glsl"
|
||||
|
||||
layout(binding = 0) uniform sampler2D mip_0;
|
||||
layout(binding = 1, r32f) uniform writeonly image2D mip_1;
|
||||
layout(binding = 2, r32f) uniform writeonly image2D mip_2;
|
||||
layout(binding = 3, r32f) uniform writeonly image2D mip_3;
|
||||
layout(binding = 4, r32f) uniform writeonly image2D mip_4;
|
||||
layout(binding = 5, r32f) uniform writeonly image2D mip_5;
|
||||
layout(binding = 6, r32f) uniform writeonly image2D mip_6;
|
||||
|
||||
float reduce_load_mip_0(uvec2 tex) {
|
||||
vec2 uv = (vec2(tex) + 0.5) / vec2(imageSize(mip_1)) * 0.5;
|
||||
return reduce_4(textureGather(mip_0, uv));
|
||||
}
|
||||
|
||||
void downsample_mips_0_and_1(uint x, uint y, ivec2 workgroup_id, uint local_invocation_index) {
|
||||
vec4 v;
|
||||
|
||||
ivec2 tex = workgroup_id * 64 + ivec2(x * 2u, y * 2u);
|
||||
ivec2 pix = workgroup_id * 32 + ivec2(x, y);
|
||||
v[0] = reduce_load_mip_0(tex);
|
||||
imageStore(mip_1, pix, vec4(v[0]));
|
||||
|
||||
tex = workgroup_id * 64 + ivec2(x * 2u + 32u, y * 2u);
|
||||
pix = workgroup_id * 32 + ivec2(x + 16u, y);
|
||||
v[1] = reduce_load_mip_0(tex);
|
||||
imageStore(mip_1, pix, vec4(v[1]));
|
||||
|
||||
tex = workgroup_id * 64 + ivec2(x * 2u, y * 2u + 32u);
|
||||
pix = workgroup_id * 32 + ivec2(x, y + 16u);
|
||||
v[2] = reduce_load_mip_0(tex);
|
||||
imageStore(mip_1, pix, vec4(v[2]));
|
||||
|
||||
tex = workgroup_id * 64 + ivec2(x * 2u + 32u, y * 2u + 32u);
|
||||
pix = workgroup_id * 32 + ivec2(x + 16u, y + 16u);
|
||||
v[3] = reduce_load_mip_0(tex);
|
||||
imageStore(mip_1, pix, vec4(v[3]));
|
||||
|
||||
if (max_mip_level <= 1u) { return; }
|
||||
|
||||
for (uint i = 0u; i < 4u; i++) {
|
||||
intermediate_memory[x][y] = v[i];
|
||||
barrier();
|
||||
if (local_invocation_index < 64u) {
|
||||
v[i] = reduce_4(vec4(
|
||||
intermediate_memory[x * 2u + 0u][y * 2u + 0u],
|
||||
intermediate_memory[x * 2u + 1u][y * 2u + 0u],
|
||||
intermediate_memory[x * 2u + 0u][y * 2u + 1u],
|
||||
intermediate_memory[x * 2u + 1u][y * 2u + 1u]
|
||||
));
|
||||
pix = (workgroup_id * 16) + ivec2(
|
||||
x + (i % 2u) * 8u,
|
||||
y + (i / 2u) * 8u
|
||||
);
|
||||
imageStore(mip_2, pix, vec4(v[i]));
|
||||
}
|
||||
barrier();
|
||||
}
|
||||
|
||||
if (local_invocation_index < 64u) {
|
||||
intermediate_memory[x + 0u][y + 0u] = v[0];
|
||||
intermediate_memory[x + 8u][y + 0u] = v[1];
|
||||
intermediate_memory[x + 0u][y + 8u] = v[2];
|
||||
intermediate_memory[x + 8u][y + 8u] = v[3];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void downsample_mip_2(uint x, uint y, ivec2 workgroup_id, uint local_invocation_index) {
|
||||
if (local_invocation_index < 64u) {
|
||||
float v = reduce_4(vec4(
|
||||
intermediate_memory[x * 2u + 0u][y * 2u + 0u],
|
||||
intermediate_memory[x * 2u + 1u][y * 2u + 0u],
|
||||
intermediate_memory[x * 2u + 0u][y * 2u + 1u],
|
||||
intermediate_memory[x * 2u + 1u][y * 2u + 1u]
|
||||
));
|
||||
imageStore(mip_3, (workgroup_id * 8) + ivec2(x, y), vec4(v));
|
||||
intermediate_memory[x * 2u + y % 2u][y * 2u] = v;
|
||||
}
|
||||
}
|
||||
|
||||
void downsample_mip_3(uint x, uint y, ivec2 workgroup_id, uint local_invocation_index) {
|
||||
if (local_invocation_index < 16u) {
|
||||
float v = reduce_4(vec4(
|
||||
intermediate_memory[x * 4u + 0u + 0u][y * 4u + 0u],
|
||||
intermediate_memory[x * 4u + 2u + 0u][y * 4u + 0u],
|
||||
intermediate_memory[x * 4u + 0u + 1u][y * 4u + 2u],
|
||||
intermediate_memory[x * 4u + 2u + 1u][y * 4u + 2u]
|
||||
));
|
||||
imageStore(mip_4, (workgroup_id * 4) + ivec2(x, y), vec4(v));
|
||||
intermediate_memory[x * 4u + y][y * 4u] = v;
|
||||
}
|
||||
}
|
||||
|
||||
void downsample_mip_4(uint x, uint y, ivec2 workgroup_id, uint local_invocation_index) {
|
||||
if (local_invocation_index < 4u) {
|
||||
float v = reduce_4(vec4(
|
||||
intermediate_memory[x * 8u + 0u + 0u + y * 2u][y * 8u + 0u],
|
||||
intermediate_memory[x * 8u + 4u + 0u + y * 2u][y * 8u + 0u],
|
||||
intermediate_memory[x * 8u + 0u + 1u + y * 2u][y * 8u + 4u],
|
||||
intermediate_memory[x * 8u + 4u + 1u + y * 2u][y * 8u + 4u]
|
||||
));
|
||||
imageStore(mip_5, (workgroup_id * 2) + ivec2(x, y), vec4(v));
|
||||
intermediate_memory[x + y * 2u][0u] = v;
|
||||
}
|
||||
}
|
||||
|
||||
void downsample_mip_5(ivec2 workgroup_id, uint local_invocation_index) {
|
||||
if (local_invocation_index < 1u) {
|
||||
float v = reduce_4(vec4(
|
||||
intermediate_memory[0u][0u],
|
||||
intermediate_memory[1u][0u],
|
||||
intermediate_memory[2u][0u],
|
||||
intermediate_memory[3u][0u]
|
||||
));
|
||||
imageStore(mip_6, workgroup_id, vec4(v));
|
||||
}
|
||||
}
|
||||
|
||||
void downsample_mips_2_to_5(uint x, uint y, ivec2 workgroup_id, uint local_invocation_index) {
|
||||
if (max_mip_level <= 2u) { return; }
|
||||
barrier();
|
||||
downsample_mip_2(x, y, workgroup_id, local_invocation_index);
|
||||
|
||||
if (max_mip_level <= 3u) { return; }
|
||||
barrier();
|
||||
downsample_mip_3(x, y, workgroup_id, local_invocation_index);
|
||||
|
||||
if (max_mip_level <= 4u) { return; }
|
||||
barrier();
|
||||
downsample_mip_4(x, y, workgroup_id, local_invocation_index);
|
||||
|
||||
if (max_mip_level <= 5u) { return; }
|
||||
barrier();
|
||||
downsample_mip_5(workgroup_id, local_invocation_index);
|
||||
}
|
||||
|
||||
void downsample_depth_first() {
|
||||
uvec2 sub_xy = remap_for_wave_reduction(gl_LocalInvocationIndex % 64u);
|
||||
uint x = sub_xy.x + 8u * ((gl_LocalInvocationIndex >> 6u) % 2u);
|
||||
uint y = sub_xy.y + 8u * (gl_LocalInvocationIndex >> 7u);
|
||||
|
||||
downsample_mips_0_and_1(x, y, ivec2(gl_WorkGroupID.xy), gl_LocalInvocationIndex);
|
||||
|
||||
downsample_mips_2_to_5(x, y, ivec2(gl_WorkGroupID.xy), gl_LocalInvocationIndex);
|
||||
}
|
||||
|
||||
void main() {
|
||||
downsample_depth_first();
|
||||
}
|
@ -0,0 +1,134 @@
|
||||
#include "flywheel:internal/indirect/downsample.glsl"
|
||||
|
||||
layout(binding = 0, r32f) uniform readonly image2D mip_6;
|
||||
layout(binding = 1, r32f) uniform writeonly image2D mip_7;
|
||||
layout(binding = 2, r32f) uniform writeonly image2D mip_8;
|
||||
layout(binding = 3, r32f) uniform writeonly image2D mip_9;
|
||||
layout(binding = 4, r32f) uniform writeonly image2D mip_10;
|
||||
layout(binding = 5, r32f) uniform writeonly image2D mip_11;
|
||||
layout(binding = 6, r32f) uniform writeonly image2D mip_12;
|
||||
|
||||
float reduce_load_mip_6(ivec2 tex) {
|
||||
return reduce_4(vec4(
|
||||
imageLoad(mip_6, tex + ivec2(0u, 0u)).r,
|
||||
imageLoad(mip_6, tex + ivec2(0u, 1u)).r,
|
||||
imageLoad(mip_6, tex + ivec2(1u, 0u)).r,
|
||||
imageLoad(mip_6, tex + ivec2(1u, 1u)).r
|
||||
));
|
||||
}
|
||||
|
||||
void downsample_mips_6_and_7(uint x, uint y) {
|
||||
vec4 v;
|
||||
|
||||
ivec2 tex = ivec2(x * 4u + 0u, y * 4u + 0u);
|
||||
ivec2 pix = ivec2(x * 2u + 0u, y * 2u + 0u);
|
||||
v[0] = reduce_load_mip_6(tex);
|
||||
imageStore(mip_7, pix, vec4(v[0]));
|
||||
|
||||
tex = ivec2(x * 4u + 2u, y * 4u + 0u);
|
||||
pix = ivec2(x * 2u + 1u, y * 2u + 0u);
|
||||
v[1] = reduce_load_mip_6(tex);
|
||||
imageStore(mip_7, pix, vec4(v[1]));
|
||||
|
||||
tex = ivec2(x * 4u + 0u, y * 4u + 2u);
|
||||
pix = ivec2(x * 2u + 0u, y * 2u + 1u);
|
||||
v[2] = reduce_load_mip_6(tex);
|
||||
imageStore(mip_7, pix, vec4(v[2]));
|
||||
|
||||
tex = ivec2(x * 4u + 2u, y * 4u + 2u);
|
||||
pix = ivec2(x * 2u + 1u, y * 2u + 1u);
|
||||
v[3] = reduce_load_mip_6(tex);
|
||||
imageStore(mip_7, pix, vec4(v[3]));
|
||||
|
||||
if (max_mip_level <= 7u) { return; }
|
||||
|
||||
float vr = reduce_4(v);
|
||||
imageStore(mip_8, ivec2(x, y), vec4(vr));
|
||||
intermediate_memory[x][y] = vr;
|
||||
}
|
||||
|
||||
|
||||
void downsample_mip_8(uint x, uint y, uint local_invocation_index) {
|
||||
if (local_invocation_index < 64u) {
|
||||
float v = reduce_4(vec4(
|
||||
intermediate_memory[x * 2u + 0u][y * 2u + 0u],
|
||||
intermediate_memory[x * 2u + 1u][y * 2u + 0u],
|
||||
intermediate_memory[x * 2u + 0u][y * 2u + 1u],
|
||||
intermediate_memory[x * 2u + 1u][y * 2u + 1u]
|
||||
));
|
||||
imageStore(mip_9, ivec2(x, y), vec4(v));
|
||||
intermediate_memory[x * 2u + y % 2u][y * 2u] = v;
|
||||
}
|
||||
}
|
||||
|
||||
void downsample_mip_9(uint x, uint y, uint local_invocation_index) {
|
||||
if (local_invocation_index < 16u) {
|
||||
float v = reduce_4(vec4(
|
||||
intermediate_memory[x * 4u + 0u + 0u][y * 4u + 0u],
|
||||
intermediate_memory[x * 4u + 2u + 0u][y * 4u + 0u],
|
||||
intermediate_memory[x * 4u + 0u + 1u][y * 4u + 2u],
|
||||
intermediate_memory[x * 4u + 2u + 1u][y * 4u + 2u]
|
||||
));
|
||||
imageStore(mip_10, ivec2(x, y), vec4(v));
|
||||
intermediate_memory[x * 4u + y][y * 4u] = v;
|
||||
}
|
||||
}
|
||||
|
||||
void downsample_mip_10(uint x, uint y, uint local_invocation_index) {
|
||||
if (local_invocation_index < 4u) {
|
||||
float v = reduce_4(vec4(
|
||||
intermediate_memory[x * 8u + 0u + 0u + y * 2u][y * 8u + 0u],
|
||||
intermediate_memory[x * 8u + 4u + 0u + y * 2u][y * 8u + 0u],
|
||||
intermediate_memory[x * 8u + 0u + 1u + y * 2u][y * 8u + 4u],
|
||||
intermediate_memory[x * 8u + 4u + 1u + y * 2u][y * 8u + 4u]
|
||||
));
|
||||
imageStore(mip_11, ivec2(x, y), vec4(v));
|
||||
intermediate_memory[x + y * 2u][0u] = v;
|
||||
}
|
||||
}
|
||||
|
||||
void downsample_mip_11(uint local_invocation_index) {
|
||||
if (local_invocation_index < 1u) {
|
||||
float v = reduce_4(vec4(
|
||||
intermediate_memory[0u][0u],
|
||||
intermediate_memory[1u][0u],
|
||||
intermediate_memory[2u][0u],
|
||||
intermediate_memory[3u][0u]
|
||||
));
|
||||
|
||||
imageStore(mip_12, ivec2(0u, 0u), vec4(v));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void downsample_mips_8_to_11(uint x, uint y, uint local_invocation_index) {
|
||||
if (max_mip_level <= 8u) { return; }
|
||||
barrier();
|
||||
downsample_mip_8(x, y, local_invocation_index);
|
||||
|
||||
if (max_mip_level <= 9u) { return; }
|
||||
barrier();
|
||||
downsample_mip_9(x, y, local_invocation_index);
|
||||
|
||||
if (max_mip_level <= 10u) { return; }
|
||||
barrier();
|
||||
downsample_mip_10(x, y, local_invocation_index);
|
||||
|
||||
if (max_mip_level <= 11u) { return; }
|
||||
barrier();
|
||||
downsample_mip_11(local_invocation_index);
|
||||
}
|
||||
|
||||
void downsample_depth_second() {
|
||||
uvec2 sub_xy = remap_for_wave_reduction(gl_LocalInvocationIndex % 64u);
|
||||
uint x = sub_xy.x + 8u * ((gl_LocalInvocationIndex >> 6u) % 2u);
|
||||
uint y = sub_xy.y + 8u * (gl_LocalInvocationIndex >> 7u);
|
||||
|
||||
downsample_mips_6_and_7(x, y);
|
||||
|
||||
downsample_mips_8_to_11(x, y, gl_LocalInvocationIndex);
|
||||
}
|
||||
|
||||
void main() {
|
||||
downsample_depth_second();
|
||||
}
|
Loading…
Reference in New Issue
Block a user