mirror of
https://github.com/Jozufozu/Flywheel.git
synced 2025-02-24 02:45:34 +01:00
Poking and prodding
- Invert image size on CPU to avoid divisions on GPU - Increase depth reduce group size to 16x16 - Early-out in uploadInstances based on changed cardinality - Much faster to calculate cardinality than it is to clear an AtomicBitSet, so the check is worth it - Upload scatter list directly in the staging buffer if there's room
This commit is contained in:
parent
f12aa15dae
commit
0bfaac7154
5 changed files with 32 additions and 8 deletions
common/src/backend
java/dev/engine_room/flywheel/backend/engine
resources/assets/flywheel/flywheel/internal/indirect
|
@ -35,7 +35,6 @@ public final class MaterialRenderState {
|
|||
setupBackfaceCulling(material.backfaceCulling());
|
||||
setupPolygonOffset(material.polygonOffset());
|
||||
setupDepthTest(material.depthTest());
|
||||
// setupDepthTest(DepthTest.OFF);
|
||||
setupTransparency(material.transparency());
|
||||
setupWriteMask(material.writeMask());
|
||||
}
|
||||
|
|
|
@ -11,6 +11,8 @@ import dev.engine_room.flywheel.lib.math.MoreMath;
|
|||
import net.minecraft.client.Minecraft;
|
||||
|
||||
public class DepthPyramid {
|
||||
private static final int GROUP_SIZE = 16;
|
||||
|
||||
private final GlProgram depthReduceProgram;
|
||||
|
||||
public int pyramidTextureId = -1;
|
||||
|
@ -50,10 +52,10 @@ public class DepthPyramid {
|
|||
|
||||
GL46.glBindImageTexture(0, pyramidTextureId, i, false, 0, GL32.GL_WRITE_ONLY, GL32.GL_R32F);
|
||||
|
||||
depthReduceProgram.setVec2("imageSize", mipWidth, mipHeight);
|
||||
depthReduceProgram.setVec2("oneOverImageSize", 1f / (float) mipWidth, 1f / (float) mipHeight);
|
||||
depthReduceProgram.setInt("lod", Math.max(0, i - 1));
|
||||
|
||||
GL46.glDispatchCompute(MoreMath.ceilingDiv(mipWidth, 8), MoreMath.ceilingDiv(mipHeight, 8), 1);
|
||||
GL46.glDispatchCompute(MoreMath.ceilingDiv(mipWidth, GROUP_SIZE), MoreMath.ceilingDiv(mipHeight, GROUP_SIZE), 1);
|
||||
|
||||
GL46.glMemoryBarrier(GL46.GL_TEXTURE_FETCH_BARRIER_BIT);
|
||||
}
|
||||
|
|
|
@ -82,6 +82,11 @@ public class IndirectInstancer<I extends Instance> extends AbstractInstancer<I>
|
|||
}
|
||||
|
||||
public void uploadInstances(StagingBuffer stagingBuffer, int instanceVbo) {
|
||||
if (changedPages.cardinality() == 0) {
|
||||
// Early return because checking the cardinality is faster than clearing.
|
||||
return;
|
||||
}
|
||||
|
||||
int numPages = mapping.pageCount();
|
||||
|
||||
var instanceCount = instances.size();
|
||||
|
|
|
@ -23,6 +23,8 @@ public class StagingBuffer {
|
|||
private static final int STORAGE_FLAGS = GL45C.GL_MAP_PERSISTENT_BIT | GL45C.GL_MAP_WRITE_BIT | GL45C.GL_CLIENT_STORAGE_BIT;
|
||||
private static final int MAP_FLAGS = GL45C.GL_MAP_PERSISTENT_BIT | GL45C.GL_MAP_WRITE_BIT | GL45C.GL_MAP_FLUSH_EXPLICIT_BIT | GL45C.GL_MAP_INVALIDATE_BUFFER_BIT;
|
||||
|
||||
private static final int SSBO_ALIGNMENT = GL45.glGetInteger(GL45.GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT);
|
||||
|
||||
private final int vbo;
|
||||
private final long map;
|
||||
private final long capacity;
|
||||
|
@ -254,7 +256,6 @@ public class StagingBuffer {
|
|||
scatterProgram.bind();
|
||||
|
||||
// These bindings don't change between dstVbos.
|
||||
GL45.glBindBufferBase(GL45C.GL_SHADER_STORAGE_BUFFER, 0, scatterBuffer.handle());
|
||||
GL45.glBindBufferBase(GL45C.GL_SHADER_STORAGE_BUFFER, 1, vbo);
|
||||
|
||||
int dstVbo;
|
||||
|
@ -276,7 +277,24 @@ public class StagingBuffer {
|
|||
}
|
||||
|
||||
private void dispatchScatter(int dstVbo) {
|
||||
scatterBuffer.upload(scatterList.ptr(), scatterList.usedBytes());
|
||||
var scatterSize = scatterList.usedBytes();
|
||||
|
||||
long alignedPos = pos + SSBO_ALIGNMENT - 1 - (pos + SSBO_ALIGNMENT - 1) % SSBO_ALIGNMENT;
|
||||
|
||||
long remaining = capacity - alignedPos;
|
||||
if (scatterSize <= remaining && scatterSize <= totalAvailable) {
|
||||
MemoryUtil.memCopy(scatterList.ptr(), map + alignedPos, scatterSize);
|
||||
GL45.glBindBufferRange(GL45C.GL_SHADER_STORAGE_BUFFER, 0, vbo, alignedPos, scatterSize);
|
||||
|
||||
long alignmentCost = alignedPos - pos;
|
||||
|
||||
usedCapacity += scatterSize + alignmentCost;
|
||||
totalAvailable -= scatterSize + alignmentCost;
|
||||
pos += scatterSize + alignmentCost;
|
||||
} else {
|
||||
scatterBuffer.upload(scatterList.ptr(), scatterSize);
|
||||
GL45.glBindBufferBase(GL45C.GL_SHADER_STORAGE_BUFFER, 0, scatterBuffer.handle());
|
||||
}
|
||||
|
||||
GL45.glBindBufferBase(GL45C.GL_SHADER_STORAGE_BUFFER, 2, dstVbo);
|
||||
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
layout(local_size_x = 8, local_size_y = 8) in;
|
||||
layout(local_size_x = 16, local_size_y = 16) in;
|
||||
|
||||
layout(binding = 0, r32f) uniform writeonly image2D outImage;
|
||||
layout(binding = 1) uniform sampler2D inImage;
|
||||
|
||||
uniform vec2 imageSize;
|
||||
uniform vec2 oneOverImageSize;
|
||||
uniform int lod;
|
||||
|
||||
uniform int useMin = 0;
|
||||
|
@ -13,7 +13,7 @@ void main() {
|
|||
|
||||
// Map the output texel to an input texel. Properly do the division because generating mip0 maps from the actual
|
||||
// full resolution depth buffer and the aspect ratio may be different from our Po2 pyramid.
|
||||
ivec2 samplePos = ivec2(floor(vec2(pos) * vec2(textureSize(inImage, lod)) / imageSize));
|
||||
ivec2 samplePos = ivec2(floor(vec2(pos) * vec2(textureSize(inImage, lod)) * oneOverImageSize));
|
||||
|
||||
float depth01 = texelFetchOffset(inImage, samplePos, lod, ivec2(0, 1)).r;
|
||||
float depth11 = texelFetchOffset(inImage, samplePos, lod, ivec2(1, 1)).r;
|
||||
|
|
Loading…
Add table
Reference in a new issue