Embeds your embeddings

- Optimize embeddings on indirect backend by uploading all matrices in
  an SSBO
- Allocate matrices in an arena
- Flatten IndirectCullingGroups to only be parameterized by
  InstanceType, so now all instances from all embeddings get culled in
  the same dispatch
- Sort indirect draws by whether they're embedded before anything else
- Include an "embedded" boolean in the MultiDraw record to decide which
  shader to use
- Include "matrixIndex" field in model descriptor and indirect draw
  structs
- Use matrixIndex == 0 to indicate that a matrix is the identity to
  avoid unnecessary work in the cull shader
- Add helper to write a mat3 as 3 vec4s
This commit is contained in:
Jozufozu 2024-08-15 11:41:33 -07:00
parent b7d2b2ac7c
commit 7a7d58adf2
24 changed files with 199 additions and 64 deletions

View file

@ -16,6 +16,7 @@ import dev.engine_room.flywheel.api.model.Model;
import dev.engine_room.flywheel.api.visualization.VisualType;
import dev.engine_room.flywheel.backend.FlwBackend;
import dev.engine_room.flywheel.backend.engine.embed.Environment;
import dev.engine_room.flywheel.backend.engine.embed.EnvironmentStorage;
import dev.engine_room.flywheel.lib.util.Pair;
import it.unimi.dsi.fastutil.ints.Int2ObjectArrayMap;
import it.unimi.dsi.fastutil.ints.Int2ObjectMap;
@ -40,7 +41,7 @@ public abstract class DrawManager<N extends AbstractInstancer<?>> {
return (Instancer<I>) instancers.computeIfAbsent(new InstancerKey<>(environment, type, model, visualType, bias), this::createAndDeferInit);
}
public void flush(LightStorage lightStorage) {
public void flush(LightStorage lightStorage, EnvironmentStorage environmentStorage) {
// Thread safety: flush is called from the render thread after all visual updates have been made,
// so there are no:tm: threads we could be racing with.
for (var instancer : initializationQueue) {

View file

@ -89,7 +89,7 @@ public class EngineImpl implements Engine {
try (var state = GlStateTracker.getRestoreState()) {
Uniforms.update(context);
environmentStorage.flush();
drawManager.flush(lightStorage);
drawManager.flush(lightStorage, environmentStorage);
}
}
@ -107,6 +107,7 @@ public class EngineImpl implements Engine {
public void delete() {
drawManager.delete();
lightStorage.delete();
environmentStorage.delete();
}
public <I extends Instance> Instancer<I> instancer(Environment environment, InstanceType<I> type, Model model, VisualType visualType, int bias) {

View file

@ -16,6 +16,7 @@ import dev.engine_room.flywheel.api.visualization.VisualType;
import dev.engine_room.flywheel.backend.compile.ContextShader;
import dev.engine_room.flywheel.backend.engine.EngineImpl;
import dev.engine_room.flywheel.backend.gl.shader.GlProgram;
import dev.engine_room.flywheel.lib.util.ExtraMemoryOps;
import net.minecraft.core.Vec3i;
public class EmbeddedEnvironment implements VisualEmbedding, Environment {
@ -31,6 +32,8 @@ public class EmbeddedEnvironment implements VisualEmbedding, Environment {
private final Matrix4f poseComposed = new Matrix4f();
private final Matrix3f normalComposed = new Matrix3f();
public int matrixIndex = 0;
private boolean deleted = false;
public EmbeddedEnvironment(EngineImpl engine, VisualType visualType, Vec3i renderOrigin, @Nullable EmbeddedEnvironment parent) {
@ -81,23 +84,25 @@ public class EmbeddedEnvironment implements VisualEmbedding, Environment {
return ContextShader.EMBEDDED;
}
@Override
public void setupCull(GlProgram program) {
program.setBool(EmbeddingUniforms.USE_MODEL_MATRIX, true);
program.setMat4(EmbeddingUniforms.MODEL_MATRIX, poseComposed);
}
@Override
public void setupDraw(GlProgram program) {
program.setMat4(EmbeddingUniforms.MODEL_MATRIX, poseComposed);
program.setMat3(EmbeddingUniforms.NORMAL_MATRIX, normalComposed);
}
public void flush() {
@Override
public int matrixIndex() {
return matrixIndex;
}
public void flush(long ptr) {
poseComposed.identity();
normalComposed.identity();
composeMatrices(poseComposed, normalComposed);
ExtraMemoryOps.putMatrix4f(ptr, poseComposed);
ExtraMemoryOps.putMatrix3fPadded(ptr + 16 * Float.BYTES, normalComposed);
}
private void composeMatrices(Matrix4f pose, Matrix3f normal) {

View file

@ -1,12 +1,8 @@
package dev.engine_room.flywheel.backend.engine.embed;
public final class EmbeddingUniforms {
/**
* Only used by cull shaders.
*/
public static final String USE_MODEL_MATRIX = "_flw_useModelMatrix";
public static final String MODEL_MATRIX = "_flw_modelMatrix";
public static final String NORMAL_MATRIX = "_flw_normalMatrix";
public static final String MODEL_MATRIX = "_flw_modelMatrixUniform";
public static final String NORMAL_MATRIX = "_flw_normalMatrixUniform";
private EmbeddingUniforms() {
}

View file

@ -6,7 +6,7 @@ import dev.engine_room.flywheel.backend.gl.shader.GlProgram;
public interface Environment {
ContextShader contextShader();
void setupCull(GlProgram cullProgram);
void setupDraw(GlProgram drawProgram);
int matrixIndex();
}

View file

@ -1,18 +1,46 @@
package dev.engine_room.flywheel.backend.engine.embed;
import dev.engine_room.flywheel.backend.engine.Arena;
import it.unimi.dsi.fastutil.objects.ReferenceLinkedOpenHashSet;
import it.unimi.dsi.fastutil.objects.ReferenceSet;
import it.unimi.dsi.fastutil.objects.ReferenceSets;
public class EnvironmentStorage {
protected final ReferenceSet<EmbeddedEnvironment> environments = ReferenceSets.synchronize(new ReferenceLinkedOpenHashSet<>());
public static final int MATRIX_SIZE_BYTES = (16 + 12) * Float.BYTES;
protected final Object lock = new Object();
protected final ReferenceSet<EmbeddedEnvironment> environments = new ReferenceLinkedOpenHashSet<>();
// Note than the arena starts indexing at zero, but we reserve zero for the identity matrix.
// Any time an ID from the arena is written we want to add one to it.
public final Arena arena = new Arena(MATRIX_SIZE_BYTES, 32);
{
arena.alloc(); // Reserve the identity matrix.
}
public void track(EmbeddedEnvironment environment) {
environments.add(environment);
synchronized (lock) {
if (environments.add(environment)) {
environment.matrixIndex = arena.alloc();
}
}
}
public void flush() {
environments.removeIf(EmbeddedEnvironment::isDeleted);
environments.forEach(EmbeddedEnvironment::flush);
environments.removeIf(embeddedEnvironment -> {
var deleted = embeddedEnvironment.isDeleted();
if (deleted && embeddedEnvironment.matrixIndex > 0) {
arena.free(embeddedEnvironment.matrixIndex);
}
return deleted;
});
for (EmbeddedEnvironment environment : environments) {
environment.flush(arena.indexToPointer(environment.matrixIndex));
}
}
public void delete() {
arena.delete();
}
}

View file

@ -15,11 +15,11 @@ public class GlobalEnvironment implements Environment {
}
@Override
public void setupCull(GlProgram cullProgram) {
cullProgram.setBool(EmbeddingUniforms.USE_MODEL_MATRIX, false);
public void setupDraw(GlProgram drawProgram) {
}
@Override
public void setupDraw(GlProgram drawProgram) {
public int matrixIndex() {
return 0;
}
}

View file

@ -8,6 +8,7 @@ public final class BufferBindings {
public static final int DRAW = 4;
public static final int LIGHT_LUT = 5;
public static final int LIGHT_SECTION = 6;
public static final int MATRICES = 7;
private BufferBindings() {
}

View file

@ -16,10 +16,10 @@ public class IndirectBuffers {
public static final long INT_SIZE = Integer.BYTES;
public static final long PTR_SIZE = Pointer.POINTER_SIZE;
public static final long MODEL_STRIDE = 24;
public static final long MODEL_STRIDE = 28;
// Byte size of a draw command, plus our added mesh data.
public static final long DRAW_COMMAND_STRIDE = 40;
public static final long DRAW_COMMAND_STRIDE = 44;
public static final long DRAW_COMMAND_OFFSET = 0;
// Offsets to the 3 segments

View file

@ -24,7 +24,6 @@ import dev.engine_room.flywheel.backend.compile.IndirectPrograms;
import dev.engine_room.flywheel.backend.engine.InstancerKey;
import dev.engine_room.flywheel.backend.engine.MaterialRenderState;
import dev.engine_room.flywheel.backend.engine.MeshPool;
import dev.engine_room.flywheel.backend.engine.embed.Environment;
import dev.engine_room.flywheel.backend.engine.uniform.Uniforms;
import dev.engine_room.flywheel.backend.gl.GlCompat;
import dev.engine_room.flywheel.backend.gl.shader.GlProgram;
@ -33,6 +32,7 @@ import dev.engine_room.flywheel.lib.math.MoreMath;
public class IndirectCullingGroup<I extends Instance> {
private static final Comparator<IndirectDraw> DRAW_COMPARATOR = Comparator.comparing(IndirectDraw::visualType)
.thenComparing(IndirectDraw::isEmbedded)
.thenComparing(IndirectDraw::bias)
.thenComparing(IndirectDraw::indexOfMeshInModel)
.thenComparing(IndirectDraw::material, MaterialRenderState.COMPARATOR);
@ -40,7 +40,6 @@ public class IndirectCullingGroup<I extends Instance> {
private static final int DRAW_BARRIER_BITS = GL_SHADER_STORAGE_BARRIER_BIT | GL_COMMAND_BARRIER_BIT;
private final InstanceType<I> instanceType;
private final Environment environment;
private final long instanceStride;
private final IndirectBuffers buffers;
private final List<IndirectInstancer<I>> instancers = new ArrayList<>();
@ -55,9 +54,8 @@ public class IndirectCullingGroup<I extends Instance> {
private boolean needsDrawSort;
private int instanceCountThisFrame;
IndirectCullingGroup(InstanceType<I> instanceType, Environment environment, IndirectPrograms programs) {
IndirectCullingGroup(InstanceType<I> instanceType, IndirectPrograms programs) {
this.instanceType = instanceType;
this.environment = environment;
instanceStride = MoreMath.align4(instanceType.layout()
.byteSize());
buffers = new IndirectBuffers(instanceStride);
@ -124,8 +122,6 @@ public class IndirectCullingGroup<I extends Instance> {
Uniforms.bindAll();
cullProgram.bind();
environment.setupCull(cullProgram);
buffers.bindForCompute();
glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
glDispatchCompute(GlCompat.getComputeGroupCount(instanceCountThisFrame), 1, 1);
@ -161,7 +157,7 @@ public class IndirectCullingGroup<I extends Instance> {
// if the next draw call has a different VisualType or Material, start a new MultiDraw
if (i == indirectDraws.size() - 1 || incompatibleDraws(draw1, indirectDraws.get(i + 1))) {
multiDraws.computeIfAbsent(draw1.visualType(), s -> new ArrayList<>())
.add(new MultiDraw(draw1.material(), start, i + 1));
.add(new MultiDraw(draw1.material(), draw1.isEmbedded(), start, i + 1));
start = i + 1;
}
}
@ -171,6 +167,10 @@ public class IndirectCullingGroup<I extends Instance> {
if (draw1.visualType() != draw2.visualType()) {
return true;
}
if (draw1.isEmbedded() != draw2.isEmbedded()) {
return true;
}
return !MaterialRenderState.materialEquals(draw1.material(), draw2.material());
}
@ -209,13 +209,12 @@ public class IndirectCullingGroup<I extends Instance> {
int baseDrawUniformLoc = -1;
for (var multiDraw : multiDraws.get(visualType)) {
var drawProgram = programs.getIndirectProgram(instanceType, environment.contextShader(), multiDraw.material.light());
var drawProgram = programs.getIndirectProgram(instanceType, multiDraw.embedded ? ContextShader.EMBEDDED : ContextShader.DEFAULT, multiDraw.material.light());
if (drawProgram != lastProgram) {
lastProgram = drawProgram;
// Don't need to do this unless the program changes.
drawProgram.bind();
environment.setupDraw(drawProgram);
baseDrawUniformLoc = drawProgram.getUniformLocation("_flw_baseDraw");
}
@ -300,7 +299,7 @@ public class IndirectCullingGroup<I extends Instance> {
return out;
}
private record MultiDraw(Material material, int start, int end) {
private record MultiDraw(Material material, boolean embedded, int start, int end) {
private void submit() {
GlCompat.safeMultiDrawElementsIndirect(GL_TRIANGLES, GL_UNSIGNED_INT, this.start * IndirectBuffers.DRAW_COMMAND_STRIDE, this.end - this.start, (int) IndirectBuffers.DRAW_COMMAND_STRIDE);
}

View file

@ -7,6 +7,7 @@ import dev.engine_room.flywheel.api.visualization.VisualType;
import dev.engine_room.flywheel.backend.MaterialShaderIndices;
import dev.engine_room.flywheel.backend.engine.MaterialEncoder;
import dev.engine_room.flywheel.backend.engine.MeshPool;
import dev.engine_room.flywheel.backend.engine.embed.EmbeddedEnvironment;
public class IndirectDraw {
private final IndirectInstancer<?> instancer;
@ -46,6 +47,10 @@ public class IndirectDraw {
return material;
}
public boolean isEmbedded() {
return instancer.environment instanceof EmbeddedEnvironment;
}
public MeshPool.PooledMesh mesh() {
return mesh;
}
@ -71,10 +76,12 @@ public class IndirectDraw {
MemoryUtil.memPutInt(ptr + 20, instancer.modelIndex); // modelIndex
MemoryUtil.memPutInt(ptr + 24, materialVertexIndex); // materialVertexIndex
MemoryUtil.memPutInt(ptr + 28, materialFragmentIndex); // materialFragmentIndex
MemoryUtil.memPutInt(ptr + 32, packedFogAndCutout); // packedFogAndCutout
MemoryUtil.memPutInt(ptr + 36, packedMaterialProperties); // packedMaterialProperties
MemoryUtil.memPutInt(ptr + 24, instancer.environment.matrixIndex()); // matrixIndex
MemoryUtil.memPutInt(ptr + 28, materialVertexIndex); // materialVertexIndex
MemoryUtil.memPutInt(ptr + 32, materialFragmentIndex); // materialFragmentIndex
MemoryUtil.memPutInt(ptr + 36, packedFogAndCutout); // packedFogAndCutout
MemoryUtil.memPutInt(ptr + 40, packedMaterialProperties); // packedMaterialProperties
}
public void writeWithOverrides(long ptr, int instanceIndex, Material materialOverride) {
@ -86,10 +93,12 @@ public class IndirectDraw {
MemoryUtil.memPutInt(ptr + 20, instancer.modelIndex); // modelIndex
MemoryUtil.memPutInt(ptr + 24, MaterialShaderIndices.vertexIndex(materialOverride.shaders())); // materialVertexIndex
MemoryUtil.memPutInt(ptr + 28, MaterialShaderIndices.fragmentIndex(materialOverride.shaders())); // materialFragmentIndex
MemoryUtil.memPutInt(ptr + 32, MaterialEncoder.packUberShader(materialOverride)); // packedFogAndCutout
MemoryUtil.memPutInt(ptr + 36, MaterialEncoder.packProperties(materialOverride)); // packedMaterialProperties
MemoryUtil.memPutInt(ptr + 24, instancer.environment.matrixIndex()); // matrixIndex
MemoryUtil.memPutInt(ptr + 28, MaterialShaderIndices.vertexIndex(materialOverride.shaders())); // materialVertexIndex
MemoryUtil.memPutInt(ptr + 32, MaterialShaderIndices.fragmentIndex(materialOverride.shaders())); // materialFragmentIndex
MemoryUtil.memPutInt(ptr + 36, MaterialEncoder.packUberShader(materialOverride)); // packedFogAndCutout
MemoryUtil.memPutInt(ptr + 40, MaterialEncoder.packProperties(materialOverride)); // packedMaterialProperties
}
public void delete() {

View file

@ -12,18 +12,19 @@ import java.util.Map;
import dev.engine_room.flywheel.api.backend.Engine;
import dev.engine_room.flywheel.api.instance.Instance;
import dev.engine_room.flywheel.api.instance.InstanceType;
import dev.engine_room.flywheel.api.visualization.VisualType;
import dev.engine_room.flywheel.backend.Samplers;
import dev.engine_room.flywheel.backend.compile.ContextShader;
import dev.engine_room.flywheel.backend.compile.IndirectPrograms;
import dev.engine_room.flywheel.backend.engine.CommonCrumbling;
import dev.engine_room.flywheel.backend.engine.DrawManager;
import dev.engine_room.flywheel.backend.engine.GroupKey;
import dev.engine_room.flywheel.backend.engine.InstancerKey;
import dev.engine_room.flywheel.backend.engine.LightStorage;
import dev.engine_room.flywheel.backend.engine.MaterialRenderState;
import dev.engine_room.flywheel.backend.engine.MeshPool;
import dev.engine_room.flywheel.backend.engine.TextureBinder;
import dev.engine_room.flywheel.backend.engine.embed.EnvironmentStorage;
import dev.engine_room.flywheel.backend.engine.uniform.Uniforms;
import dev.engine_room.flywheel.backend.gl.GlStateTracker;
import dev.engine_room.flywheel.backend.gl.array.GlVertexArray;
@ -38,9 +39,10 @@ public class IndirectDrawManager extends DrawManager<IndirectInstancer<?>> {
private final StagingBuffer stagingBuffer;
private final MeshPool meshPool;
private final GlVertexArray vertexArray;
private final Map<GroupKey<?>, IndirectCullingGroup<?>> cullingGroups = new HashMap<>();
private final Map<InstanceType<?>, IndirectCullingGroup<?>> cullingGroups = new HashMap<>();
private final GlBuffer crumblingDrawBuffer = new GlBuffer();
private final LightBuffers lightBuffers;
private final MatrixBuffer matrixBuffer;
public IndirectDrawManager(IndirectPrograms programs) {
this.programs = programs;
@ -51,6 +53,7 @@ public class IndirectDrawManager extends DrawManager<IndirectInstancer<?>> {
vertexArray = GlVertexArray.create();
meshPool.bind(vertexArray);
lightBuffers = new LightBuffers();
matrixBuffer = new MatrixBuffer();
}
@Override
@ -61,8 +64,7 @@ public class IndirectDrawManager extends DrawManager<IndirectInstancer<?>> {
@SuppressWarnings("unchecked")
@Override
protected <I extends Instance> void initialize(InstancerKey<I> key, IndirectInstancer<?> instancer) {
var groupKey = new GroupKey<>(key.type(), key.environment());
var group = (IndirectCullingGroup<I>) cullingGroups.computeIfAbsent(groupKey, t -> new IndirectCullingGroup<>(t.instanceType(), t.environment(), programs));
var group = (IndirectCullingGroup<I>) cullingGroups.computeIfAbsent(key.type(), t -> new IndirectCullingGroup<>(t, programs));
group.add((IndirectInstancer<I>) instancer, key, meshPool);
}
@ -85,6 +87,7 @@ public class IndirectDrawManager extends DrawManager<IndirectInstancer<?>> {
vertexArray.bindForDraw();
lightBuffers.bind();
matrixBuffer.bind();
Uniforms.bindAll();
for (var group : cullingGroups.values()) {
@ -97,8 +100,8 @@ public class IndirectDrawManager extends DrawManager<IndirectInstancer<?>> {
}
@Override
public void flush(LightStorage lightStorage) {
super.flush(lightStorage);
public void flush(LightStorage lightStorage, EnvironmentStorage environmentStorage) {
super.flush(lightStorage, environmentStorage);
for (var group : cullingGroups.values()) {
group.flushInstancers();
@ -116,6 +119,8 @@ public class IndirectDrawManager extends DrawManager<IndirectInstancer<?>> {
lightBuffers.flush(stagingBuffer, lightStorage);
matrixBuffer.flush(stagingBuffer, environmentStorage);
for (var group : cullingGroups.values()) {
group.upload(stagingBuffer);
}

View file

@ -49,10 +49,11 @@ public class IndirectInstancer<I extends Instance> extends AbstractInstancer<I>
public void writeModel(long ptr) {
MemoryUtil.memPutInt(ptr, 0); // instanceCount - to be incremented by the cull shader
MemoryUtil.memPutInt(ptr + 4, baseInstance); // baseInstance
MemoryUtil.memPutFloat(ptr + 8, boundingSphere.x()); // boundingSphere
MemoryUtil.memPutFloat(ptr + 12, boundingSphere.y());
MemoryUtil.memPutFloat(ptr + 16, boundingSphere.z());
MemoryUtil.memPutFloat(ptr + 20, boundingSphere.w());
MemoryUtil.memPutInt(ptr + 8, environment.matrixIndex()); // matrixIndex
MemoryUtil.memPutFloat(ptr + 12, boundingSphere.x()); // boundingSphere
MemoryUtil.memPutFloat(ptr + 16, boundingSphere.y());
MemoryUtil.memPutFloat(ptr + 20, boundingSphere.z());
MemoryUtil.memPutFloat(ptr + 24, boundingSphere.w());
}
public void uploadInstances(StagingBuffer stagingBuffer, int instanceVbo) {

View file

@ -0,0 +1,33 @@
package dev.engine_room.flywheel.backend.engine.indirect;
import org.lwjgl.opengl.GL46;
import org.lwjgl.system.MemoryUtil;
import dev.engine_room.flywheel.backend.engine.embed.EnvironmentStorage;
public class MatrixBuffer {
private final ResizableStorageArray matrices = new ResizableStorageArray(EnvironmentStorage.MATRIX_SIZE_BYTES);
public void flush(StagingBuffer stagingBuffer, EnvironmentStorage environmentStorage) {
var arena = environmentStorage.arena;
var capacity = arena.capacity();
if (capacity == 0) {
return;
}
matrices.ensureCapacity(capacity);
stagingBuffer.enqueueCopy((long) arena.capacity() * EnvironmentStorage.MATRIX_SIZE_BYTES, matrices.handle(), 0, ptr -> {
MemoryUtil.memCopy(arena.indexToPointer(0), ptr, (long) arena.capacity() * EnvironmentStorage.MATRIX_SIZE_BYTES);
});
}
public void bind() {
if (matrices.capacity() == 0) {
return;
}
GL46.glBindBufferRange(GL46.GL_SHADER_STORAGE_BUFFER, BufferBindings.MATRICES, matrices.handle(), 0, matrices.byteCapacity());
}
}

View file

@ -23,6 +23,7 @@ import dev.engine_room.flywheel.backend.engine.MaterialEncoder;
import dev.engine_room.flywheel.backend.engine.MaterialRenderState;
import dev.engine_room.flywheel.backend.engine.MeshPool;
import dev.engine_room.flywheel.backend.engine.TextureBinder;
import dev.engine_room.flywheel.backend.engine.embed.EnvironmentStorage;
import dev.engine_room.flywheel.backend.engine.uniform.Uniforms;
import dev.engine_room.flywheel.backend.gl.GlStateTracker;
import dev.engine_room.flywheel.backend.gl.TextureBuffer;
@ -59,8 +60,8 @@ public class InstancedDrawManager extends DrawManager<InstancedInstancer<?>> {
}
@Override
public void flush(LightStorage lightStorage) {
super.flush(lightStorage);
public void flush(LightStorage lightStorage, EnvironmentStorage environmentStorage) {
super.flush(lightStorage, environmentStorage);
this.instancers.values()
.removeIf(instancer -> {

View file

@ -67,8 +67,8 @@ vec2 getCrumblingTexCoord() {
#endif
#ifdef FLW_EMBEDDED
uniform mat4 _flw_modelMatrix;
uniform mat3 _flw_normalMatrix;
mat4 _flw_modelMatrix;
mat3 _flw_normalMatrix;
#endif
flat out uint _flw_instanceID;

View file

@ -5,3 +5,4 @@
#define _FLW_DRAW_BUFFER_BINDING 4
#define _FLW_LIGHT_LUT_BUFFER_BINDING 5
#define _FLW_LIGHT_SECTIONS_BUFFER_BINDING 6
#define _FLW_MATRIX_BUFFER_BINDING 7

View file

@ -2,6 +2,7 @@
#include "flywheel:internal/indirect/model_descriptor.glsl"
#include "flywheel:internal/uniforms/uniforms.glsl"
#include "flywheel:util/matrix.glsl"
#include "flywheel:internal/indirect/matrices.glsl"
layout(local_size_x = _FLW_SUBGROUP_SIZE) in;
@ -17,8 +18,9 @@ layout(std430, binding = _FLW_MODEL_BUFFER_BINDING) restrict buffer ModelBuffer
ModelDescriptor _flw_models[];
};
uniform mat4 _flw_modelMatrix;
uniform bool _flw_useModelMatrix = false;
layout(std430, binding = _FLW_MATRIX_BUFFER_BINDING) restrict buffer MatrixBuffer {
Matrices _flw_matrices[];
};
// Disgustingly vectorized sphere frustum intersection taking advantage of ahead of time packing.
// Only uses 6 fmas and some boolean ops.
@ -35,6 +37,7 @@ bool _flw_testSphere(vec3 center, float radius) {
bool _flw_isVisible(uint instanceIndex, uint modelIndex) {
BoundingSphere sphere = _flw_models[modelIndex].boundingSphere;
uint matrixIndex = _flw_models[modelIndex].matrixIndex;
vec3 center;
float radius;
@ -44,8 +47,8 @@ bool _flw_isVisible(uint instanceIndex, uint modelIndex) {
flw_transformBoundingSphere(instance, center, radius);
if (_flw_useModelMatrix) {
transformBoundingSphere(_flw_modelMatrix, center, radius);
if (matrixIndex > 0) {
transformBoundingSphere(_flw_matrices[matrixIndex].pose, center, radius);
}
return _flw_testSphere(center, radius);

View file

@ -6,6 +6,7 @@ struct MeshDrawCommand {
uint baseInstance;
uint modelIndex;
uint matrixIndex;
uint materialVertexIndex;
uint materialFragmentIndex;

View file

@ -3,6 +3,7 @@
#include "flywheel:internal/indirect/buffer_bindings.glsl"
#include "flywheel:internal/indirect/draw_command.glsl"
#include "flywheel:internal/indirect/light.glsl"
#include "flywheel:internal/indirect/matrices.glsl"
layout(std430, binding = _FLW_TARGET_BUFFER_BINDING) restrict readonly buffer TargetBuffer {
uint _flw_instanceIndices[];
@ -12,6 +13,12 @@ layout(std430, binding = _FLW_DRAW_BUFFER_BINDING) restrict readonly buffer Draw
MeshDrawCommand _flw_drawCommands[];
};
#ifdef FLW_EMBEDDED
layout(std430, binding = _FLW_MATRIX_BUFFER_BINDING) restrict buffer MatrixBuffer {
Matrices _flw_matrices[];
};
#endif
uniform uint _flw_baseDraw;
flat out uvec3 _flw_packedMaterial;
@ -29,7 +36,13 @@ void main() {
_flw_unpackMaterialProperties(packedMaterialProperties, flw_material);
_flw_packedMaterial = uvec3(draw.materialFragmentIndex, draw.packedFogAndCutout, packedMaterialProperties);
#if __VERSION__ < 460
#ifdef FLW_EMBEDDED
_flw_unpackMatrices(_flw_matrices[draw.matrixIndex], _flw_modelMatrix, _flw_normalMatrix);
// _flw_modelMatrix = mat4(1.);
// _flw_normalMatrix = mat3(1.);
#endif
#if __VERSION__ < 460
uint instanceIndex = _flw_instanceIndices[gl_BaseInstanceARB + gl_InstanceID];
#else
uint instanceIndex = _flw_instanceIndices[gl_BaseInstance + gl_InstanceID];

View file

@ -0,0 +1,11 @@
struct Matrices {
mat4 pose;
vec4 normalA;
vec4 normalB;
vec4 normalC;
};
void _flw_unpackMatrices(in Matrices mats, out mat4 pose, out mat3 normal) {
pose = mats.pose;
normal = mat3(mats.normalA.xyz, mats.normalB.xyz, mats.normalC.xyz);
}

View file

@ -8,6 +8,7 @@ struct BoundingSphere {
struct ModelDescriptor {
uint instanceCount;
uint baseInstance;
uint matrixIndex;
BoundingSphere boundingSphere;
};

View file

@ -5,11 +5,21 @@
uniform uvec4 _flw_packedMaterial;
uniform int _flw_baseInstance = 0;
#ifdef FLW_EMBEDDED
uniform mat4 _flw_modelMatrixUniform;
uniform mat3 _flw_normalMatrixUniform;
#endif
void main() {
_flw_uberMaterialVertexIndex = _flw_packedMaterial.x;
_flw_unpackMaterialProperties(_flw_packedMaterial.w, flw_material);
FlwInstance instance = _flw_unpackInstance(_flw_baseInstance + gl_InstanceID);
#ifdef FLW_EMBEDDED
_flw_modelMatrix = _flw_modelMatrixUniform;
_flw_normalMatrix = _flw_normalMatrixUniform;
#endif
_flw_main(instance, uint(gl_InstanceID));
}

View file

@ -61,6 +61,21 @@ public final class ExtraMemoryOps {
MemoryUtil.memPutFloat(ptr + 32, matrix.m22());
}
public static void putMatrix3fPadded(long ptr, Matrix3fc matrix) {
MemoryUtil.memPutFloat(ptr, matrix.m00());
MemoryUtil.memPutFloat(ptr + 4, matrix.m01());
MemoryUtil.memPutFloat(ptr + 8, matrix.m02());
MemoryUtil.memPutFloat(ptr + 12, 0.0f);
MemoryUtil.memPutFloat(ptr + 16, matrix.m10());
MemoryUtil.memPutFloat(ptr + 20, matrix.m11());
MemoryUtil.memPutFloat(ptr + 24, matrix.m12());
MemoryUtil.memPutFloat(ptr + 28, 0.0f);
MemoryUtil.memPutFloat(ptr + 32, matrix.m20());
MemoryUtil.memPutFloat(ptr + 36, matrix.m21());
MemoryUtil.memPutFloat(ptr + 40, matrix.m22());
MemoryUtil.memPutFloat(ptr + 44, 0.0f);
}
public static void putMatrix4f(long ptr, Matrix4fc matrix) {
MemoryUtil.memPutFloat(ptr, matrix.m00());
MemoryUtil.memPutFloat(ptr + 4, matrix.m01());