From 76a4b35ce6b52ec6b3640f3dc1784b39a58f1d84 Mon Sep 17 00:00:00 2001 From: Jozufozu Date: Sat, 10 Aug 2024 12:54:15 -0700 Subject: [PATCH 1/6] Assimilate Backend Config - Merge flywheel-backend config into an object within the base flywheel config - On forge, push a path in the toml - On fabric, serialize a nested json object - Still expose the BackendConfig via FlwBackendXplat, but have the impl set a static field in the xplat impl - Revert debug shulker box changes in previous commit --- .../flywheel/backend/BackendConfig.java | 3 - .../flywheel/vanilla/ShulkerBoxVisual.java | 27 +---- .../flywheel/backend/FabricBackendConfig.java | 106 ------------------ .../flywheel/backend/FlwBackendXplatImpl.java | 7 +- .../flywheel/impl/FabricFlwConfig.java | 68 +++++++++++ .../flywheel/impl/FlwCommands.java | 7 +- .../flywheel/impl/mixin/MinecraftMixin.java | 2 - .../flywheel/backend/FlwBackendXplatImpl.java | 7 +- .../flywheel/backend/ForgeBackendConfig.java | 39 ------- .../flywheel/impl/FlwCommands.java | 3 +- .../flywheel/impl/FlywheelForge.java | 2 - .../flywheel/impl/ForgeFlwConfig.java | 26 +++++ 12 files changed, 112 insertions(+), 185 deletions(-) delete mode 100644 fabric/src/backend/java/dev/engine_room/flywheel/backend/FabricBackendConfig.java delete mode 100644 forge/src/backend/java/dev/engine_room/flywheel/backend/ForgeBackendConfig.java diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/BackendConfig.java b/common/src/backend/java/dev/engine_room/flywheel/backend/BackendConfig.java index 1bc137773..779ab2f9c 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/BackendConfig.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/BackendConfig.java @@ -8,9 +8,6 @@ public interface BackendConfig { /** * How smooth/accurate our flw_light impl is. * - *

This makes more sense here as a backend-specific config because it's tightly coupled to - * our backend's implementation. 3rd party backend may have different approaches and configurations. - * * @return The current light smoothness setting. */ LightSmoothness lightSmoothness(); diff --git a/common/src/main/java/dev/engine_room/flywheel/vanilla/ShulkerBoxVisual.java b/common/src/main/java/dev/engine_room/flywheel/vanilla/ShulkerBoxVisual.java index 716b8a317..9e9e17925 100644 --- a/common/src/main/java/dev/engine_room/flywheel/vanilla/ShulkerBoxVisual.java +++ b/common/src/main/java/dev/engine_room/flywheel/vanilla/ShulkerBoxVisual.java @@ -8,12 +8,10 @@ import com.mojang.blaze3d.vertex.PoseStack; import com.mojang.math.Axis; import dev.engine_room.flywheel.api.instance.Instance; -import dev.engine_room.flywheel.api.visual.ShaderLightVisual; import dev.engine_room.flywheel.api.visualization.VisualizationContext; import dev.engine_room.flywheel.lib.instance.InstanceTypes; import dev.engine_room.flywheel.lib.instance.TransformedInstance; import dev.engine_room.flywheel.lib.material.CutoutShaders; -import dev.engine_room.flywheel.lib.material.LightShaders; import dev.engine_room.flywheel.lib.material.SimpleMaterial; import dev.engine_room.flywheel.lib.model.ModelCache; import dev.engine_room.flywheel.lib.model.SingleMeshModel; @@ -21,20 +19,17 @@ import dev.engine_room.flywheel.lib.model.part.ModelPartConverter; import dev.engine_room.flywheel.lib.transform.TransformStack; import dev.engine_room.flywheel.lib.visual.AbstractBlockEntityVisual; import dev.engine_room.flywheel.lib.visual.SimpleDynamicVisual; -import it.unimi.dsi.fastutil.longs.LongArraySet; import net.minecraft.client.model.geom.ModelLayers; import net.minecraft.client.renderer.Sheets; import net.minecraft.client.resources.model.Material; import net.minecraft.core.Direction; -import net.minecraft.core.SectionPos; import net.minecraft.world.item.DyeColor; import net.minecraft.world.level.block.ShulkerBoxBlock; import net.minecraft.world.level.block.entity.ShulkerBoxBlockEntity; -public class ShulkerBoxVisual extends AbstractBlockEntityVisual implements SimpleDynamicVisual, ShaderLightVisual { +public class ShulkerBoxVisual extends AbstractBlockEntityVisual implements SimpleDynamicVisual { private static final dev.engine_room.flywheel.api.material.Material MATERIAL = SimpleMaterial.builder() .cutout(CutoutShaders.ONE_TENTH) - .light(LightShaders.SMOOTH) .texture(Sheets.SHULKER_SHEET) .mipmap(false) .backfaceCulling(false) @@ -72,7 +67,6 @@ public class ShulkerBoxVisual extends AbstractBlockEntityVisual { - var oldValue = FabricBackendConfig.INSTANCE.lightSmoothness; + var oldValue = FabricFlwConfig.INSTANCE.backendConfig.lightSmoothness; var newValue = context.getArgument("mode", LightSmoothness.class); if (oldValue != newValue) { - FabricBackendConfig.INSTANCE.lightSmoothness = newValue; - FabricBackendConfig.INSTANCE.save(); + FabricFlwConfig.INSTANCE.backendConfig.lightSmoothness = newValue; + FabricFlwConfig.INSTANCE.save(); Minecraft.getInstance() .reloadResourcePacks(); } diff --git a/fabric/src/main/java/dev/engine_room/flywheel/impl/mixin/MinecraftMixin.java b/fabric/src/main/java/dev/engine_room/flywheel/impl/mixin/MinecraftMixin.java index 79ec1986b..f96a68595 100644 --- a/fabric/src/main/java/dev/engine_room/flywheel/impl/mixin/MinecraftMixin.java +++ b/fabric/src/main/java/dev/engine_room/flywheel/impl/mixin/MinecraftMixin.java @@ -11,7 +11,6 @@ import org.spongepowered.asm.mixin.injection.Inject; import org.spongepowered.asm.mixin.injection.callback.CallbackInfo; import dev.engine_room.flywheel.api.event.EndClientResourceReloadCallback; -import dev.engine_room.flywheel.backend.FabricBackendConfig; import dev.engine_room.flywheel.impl.FabricFlwConfig; import dev.engine_room.flywheel.impl.FlwImpl; import net.minecraft.client.Minecraft; @@ -29,7 +28,6 @@ abstract class MinecraftMixin { // Load the config after we freeze registries, // so we can find third party backends. FabricFlwConfig.INSTANCE.load(); - FabricBackendConfig.INSTANCE.load(); } @Inject(method = "method_24040", at = @At("HEAD")) diff --git a/forge/src/backend/java/dev/engine_room/flywheel/backend/FlwBackendXplatImpl.java b/forge/src/backend/java/dev/engine_room/flywheel/backend/FlwBackendXplatImpl.java index 6c6bd2c68..c8eb18fb7 100644 --- a/forge/src/backend/java/dev/engine_room/flywheel/backend/FlwBackendXplatImpl.java +++ b/forge/src/backend/java/dev/engine_room/flywheel/backend/FlwBackendXplatImpl.java @@ -1,10 +1,15 @@ package dev.engine_room.flywheel.backend; +import org.jetbrains.annotations.UnknownNullability; + import net.minecraft.core.BlockPos; import net.minecraft.world.level.BlockGetter; import net.minecraft.world.level.block.state.BlockState; public class FlwBackendXplatImpl implements FlwBackendXplat { + @UnknownNullability + public static BackendConfig CONFIG; + @Override public int getLightEmission(BlockState state, BlockGetter level, BlockPos pos) { return state.getLightEmission(level, pos); @@ -12,6 +17,6 @@ public class FlwBackendXplatImpl implements FlwBackendXplat { @Override public BackendConfig getConfig() { - return ForgeBackendConfig.INSTANCE; + return CONFIG; } } diff --git a/forge/src/backend/java/dev/engine_room/flywheel/backend/ForgeBackendConfig.java b/forge/src/backend/java/dev/engine_room/flywheel/backend/ForgeBackendConfig.java deleted file mode 100644 index 3d3e64963..000000000 --- a/forge/src/backend/java/dev/engine_room/flywheel/backend/ForgeBackendConfig.java +++ /dev/null @@ -1,39 +0,0 @@ -package dev.engine_room.flywheel.backend; - -import org.apache.commons.lang3.tuple.Pair; - -import dev.engine_room.flywheel.backend.compile.LightSmoothness; -import net.minecraftforge.common.ForgeConfigSpec; -import net.minecraftforge.fml.ModLoadingContext; -import net.minecraftforge.fml.config.ModConfig; - -public class ForgeBackendConfig implements BackendConfig { - public static final ForgeBackendConfig INSTANCE = new ForgeBackendConfig(); - - public final ClientConfig client; - private final ForgeConfigSpec clientSpec; - - private ForgeBackendConfig() { - Pair clientPair = new ForgeConfigSpec.Builder().configure(ClientConfig::new); - this.client = clientPair.getLeft(); - clientSpec = clientPair.getRight(); - } - - @Override - public LightSmoothness lightSmoothness() { - return client.lightSmoothness.get(); - } - - public void registerSpecs(ModLoadingContext context) { - context.registerConfig(ModConfig.Type.CLIENT, clientSpec, "flywheel-backend.toml"); - } - - public static class ClientConfig { - public final ForgeConfigSpec.EnumValue lightSmoothness; - - private ClientConfig(ForgeConfigSpec.Builder builder) { - lightSmoothness = builder.comment("How smooth flywheel's shader-based lighting should be. May have a large performance impact.") - .defineEnum("lightSmoothness", LightSmoothness.SMOOTH); - } - } -} diff --git a/forge/src/main/java/dev/engine_room/flywheel/impl/FlwCommands.java b/forge/src/main/java/dev/engine_room/flywheel/impl/FlwCommands.java index 640225eca..870680a66 100644 --- a/forge/src/main/java/dev/engine_room/flywheel/impl/FlwCommands.java +++ b/forge/src/main/java/dev/engine_room/flywheel/impl/FlwCommands.java @@ -6,7 +6,6 @@ import com.mojang.brigadier.builder.LiteralArgumentBuilder; import dev.engine_room.flywheel.api.backend.Backend; import dev.engine_room.flywheel.api.backend.BackendManager; -import dev.engine_room.flywheel.backend.ForgeBackendConfig; import dev.engine_room.flywheel.backend.LightSmoothnessArgument; import dev.engine_room.flywheel.backend.compile.LightSmoothness; import dev.engine_room.flywheel.backend.engine.uniform.DebugMode; @@ -124,7 +123,7 @@ public final class FlwCommands { return Command.SINGLE_SUCCESS; }))); - var lightSmoothnessValue = ForgeBackendConfig.INSTANCE.client.lightSmoothness; + var lightSmoothnessValue = ForgeFlwConfig.INSTANCE.client.backendConfig.lightSmoothness; command.then(Commands.literal("lightSmoothness") .then(Commands.argument("mode", LightSmoothnessArgument.INSTANCE) .executes(context -> { diff --git a/forge/src/main/java/dev/engine_room/flywheel/impl/FlywheelForge.java b/forge/src/main/java/dev/engine_room/flywheel/impl/FlywheelForge.java index 57756aa47..f8ba8d83b 100644 --- a/forge/src/main/java/dev/engine_room/flywheel/impl/FlywheelForge.java +++ b/forge/src/main/java/dev/engine_room/flywheel/impl/FlywheelForge.java @@ -6,7 +6,6 @@ import org.jetbrains.annotations.UnknownNullability; import dev.engine_room.flywheel.api.Flywheel; import dev.engine_room.flywheel.api.event.EndClientResourceReloadEvent; import dev.engine_room.flywheel.api.event.ReloadLevelRendererEvent; -import dev.engine_room.flywheel.backend.ForgeBackendConfig; import dev.engine_room.flywheel.backend.LightSmoothnessArgument; import dev.engine_room.flywheel.backend.compile.FlwProgramsReloader; import dev.engine_room.flywheel.backend.engine.uniform.Uniforms; @@ -54,7 +53,6 @@ public final class FlywheelForge { .getModEventBus(); ForgeFlwConfig.INSTANCE.registerSpecs(modLoadingContext); - ForgeBackendConfig.INSTANCE.registerSpecs(modLoadingContext); DistExecutor.unsafeRunWhenOn(Dist.CLIENT, () -> () -> FlywheelForge.clientInit(forgeEventBus, modEventBus)); } diff --git a/forge/src/main/java/dev/engine_room/flywheel/impl/ForgeFlwConfig.java b/forge/src/main/java/dev/engine_room/flywheel/impl/ForgeFlwConfig.java index 4f24a7f64..b19ae9fea 100644 --- a/forge/src/main/java/dev/engine_room/flywheel/impl/ForgeFlwConfig.java +++ b/forge/src/main/java/dev/engine_room/flywheel/impl/ForgeFlwConfig.java @@ -5,6 +5,9 @@ import org.jetbrains.annotations.Nullable; import dev.engine_room.flywheel.api.backend.Backend; import dev.engine_room.flywheel.api.backend.BackendManager; +import dev.engine_room.flywheel.backend.BackendConfig; +import dev.engine_room.flywheel.backend.FlwBackendXplatImpl; +import dev.engine_room.flywheel.backend.compile.LightSmoothness; import net.minecraft.ResourceLocationException; import net.minecraft.resources.ResourceLocation; import net.minecraftforge.common.ForgeConfigSpec; @@ -21,6 +24,8 @@ public class ForgeFlwConfig implements FlwConfig { Pair clientPair = new ForgeConfigSpec.Builder().configure(ClientConfig::new); this.client = clientPair.getLeft(); clientSpec = clientPair.getRight(); + + FlwBackendXplatImpl.CONFIG = client.backendConfig; } @Override @@ -72,6 +77,8 @@ public class ForgeFlwConfig implements FlwConfig { public final ForgeConfigSpec.BooleanValue limitUpdates; public final ForgeConfigSpec.IntValue workerThreads; + public final ForgeBackendConfig backendConfig; + private ClientConfig(ForgeConfigSpec.Builder builder) { backend = builder.comment("Select the backend to use.") .define("backend", Backend.REGISTRY.getIdOrThrow(BackendManager.defaultBackend()).toString()); @@ -82,6 +89,25 @@ public class ForgeFlwConfig implements FlwConfig { workerThreads = builder.comment("The number of worker threads to use. Set to -1 to let Flywheel decide. Set to 0 to disable parallelism. Requires a game restart to take effect.") .defineInRange("workerThreads", -1, -1, Runtime.getRuntime() .availableProcessors()); + + builder.comment("Config options for flywheel's build-in backends.") + .push("flw_backends"); + + backendConfig = new ForgeBackendConfig(builder); + } + } + + public static class ForgeBackendConfig implements BackendConfig { + public final ForgeConfigSpec.EnumValue lightSmoothness; + + public ForgeBackendConfig(ForgeConfigSpec.Builder builder) { + lightSmoothness = builder.comment("How smooth flywheel's shader-based lighting should be. May have a large performance impact.") + .defineEnum("lightSmoothness", LightSmoothness.SMOOTH); + } + + @Override + public LightSmoothness lightSmoothness() { + return lightSmoothness.get(); } } } From 2d37c3894d6989cfe546c20d6767eb9280efe2aa Mon Sep 17 00:00:00 2001 From: Jozufozu Date: Mon, 12 Aug 2024 15:24:43 -0700 Subject: [PATCH 2/6] We (un)roll - Manually unroll all loops in light_lut with the help of macros - Pretty significant perf gains on my 5600G - I tried assembling a bitmask of the blocks we actually want to fetch and branching in each _FLW_LIGHT_FETCH in an attempt to reduce the bandwidth required but that turned out much slower. Perhaps there's still some middle-ground to be found for axis-aligned normals - Re-order the 8-arrays in _flw_lightForDirection to be xzy to be consistent with everything else and improve the memory access pattern --- .../flywheel/flywheel/internal/light_lut.glsl | 271 +++++++++++------- 1 file changed, 172 insertions(+), 99 deletions(-) diff --git a/common/src/backend/resources/assets/flywheel/flywheel/internal/light_lut.glsl b/common/src/backend/resources/assets/flywheel/flywheel/internal/light_lut.glsl index d537f4515..355d2290f 100644 --- a/common/src/backend/resources/assets/flywheel/flywheel/internal/light_lut.glsl +++ b/common/src/backend/resources/assets/flywheel/flywheel/internal/light_lut.glsl @@ -11,14 +11,13 @@ const uint _FLW_LIGHT_SECTION_SIZE_INTS = _FLW_LIGHT_SECTION_SIZE_BYTES / 4; const uint _FLW_COMPLETELY_SOLID = 0x7FFFFFFu; const float _FLW_EPSILON = 1e-5; +const uint _FLW_LOWER_10_BITS = 0x3FFu; +const uint _FLW_UPPER_10_BITS = 0xFFF00000u; + uint _flw_indexLut(uint index); uint _flw_indexLight(uint index); -// Adding this option takes my test world from ~800 to ~1250 FPS on my 3060ti. -// I have not taken it to a profiler otherwise. -#pragma optionNV (unroll all) - /// Find the index for the next step in the LUT. /// @param base The base index in the LUT, should point to the start of a coordinate span. /// @param coord The coordinate to look for. @@ -103,57 +102,120 @@ bool flw_lightFetch(ivec3 blockPos, out vec2 lightCoord) { return true; } -/// Premtively collect all light in a 3x3x3 area centered on our block. -/// Depending on the normal, we won't use all the data, but fetching on demand will have many duplicated fetches. -/// -/// The output is a 3-component vector packed into a single uint to save -/// memory and ALU ops later on. 10 bits are used for each component. This allows 4 such packed ints to be added -/// together with room to spare before overflowing into the next component. -uint[27] _flw_fetchLight3x3x3(uint sectionOffset, ivec3 blockInSectionPos, uint solid) { - uint[27] lights; - - uint index = 0u; - uint mask = 1u; - for (int y = -1; y <= 1; y++) { - for (int z = -1; z <= 1; z++) { - for (int x = -1; x <= 1; x++) { - // 0 if the block is solid, 1 if it's not. - uint notSolid = uint((solid & mask) == 0u); - uvec2 light = _flw_lightAt(sectionOffset, uvec3(blockInSectionPos + ivec3(x, y, z))); - - lights[index] = light.x; - lights[index] |= (light.y) << 10; - lights[index] |= (notSolid) << 20; - - index++; - mask <<= 1; - } - } - } - - return lights; -} uint _flw_fetchSolid3x3x3(uint sectionOffset, ivec3 blockInSectionPos) { uint ret = 0; - uint index = 0; - for (int y = -1; y <= 1; y++) { - for (int z = -1; z <= 1; z++) { - for (int x = -1; x <= 1; x++) { - bool flag = _flw_isSolid(sectionOffset, uvec3(blockInSectionPos + ivec3(x, y, z))); - ret |= uint(flag) << index; + // The formatter does NOT like these macros + // @formatter:off - index++; - } - } + #define _FLW_FETCH_SOLID(x, y, z, i) { \ + bool flag = _flw_isSolid(sectionOffset, uvec3(blockInSectionPos + ivec3(x, y, z))); \ + ret |= uint(flag) << i; \ } + /// fori y, z, x: unrolled + _FLW_FETCH_SOLID(-1, -1, -1, 0) + _FLW_FETCH_SOLID(0, -1, -1, 1) + _FLW_FETCH_SOLID(1, -1, -1, 2) + + _FLW_FETCH_SOLID(-1, -1, 0, 3) + _FLW_FETCH_SOLID(0, -1, 0, 4) + _FLW_FETCH_SOLID(1, -1, 0, 5) + + _FLW_FETCH_SOLID(-1, -1, 1, 6) + _FLW_FETCH_SOLID(0, -1, 1, 7) + _FLW_FETCH_SOLID(1, -1, 1, 8) + + _FLW_FETCH_SOLID(-1, 0, -1, 9) + _FLW_FETCH_SOLID(0, 0, -1, 10) + _FLW_FETCH_SOLID(1, 0, -1, 11) + + _FLW_FETCH_SOLID(-1, 0, 0, 12) + _FLW_FETCH_SOLID(0, 0, 0, 13) + _FLW_FETCH_SOLID(1, 0, 0, 14) + + _FLW_FETCH_SOLID(-1, 0, 1, 15) + _FLW_FETCH_SOLID(0, 0, 1, 16) + _FLW_FETCH_SOLID(1, 0, 1, 17) + + _FLW_FETCH_SOLID(-1, 1, -1, 18) + _FLW_FETCH_SOLID(0, 1, -1, 19) + _FLW_FETCH_SOLID(1, 1, -1, 20) + + _FLW_FETCH_SOLID(-1, 1, 0, 21) + _FLW_FETCH_SOLID(0, 1, 0, 22) + _FLW_FETCH_SOLID(1, 1, 0, 23) + + _FLW_FETCH_SOLID(-1, 1, 1, 24) + _FLW_FETCH_SOLID(0, 1, 1, 25) + _FLW_FETCH_SOLID(1, 1, 1, 26) + + // @formatter:on + return ret; } +/// Premtively collect all light in a 3x3x3 area centered on our block. +/// Depending on the normal, we won't use all the data, but fetching on demand will have many duplicated fetches. +/// Only fetching what we'll actually use using a bitmask turned out significantly slower, but perhaps a less +/// granular approach could see wins. +/// +/// The output is a 3-component vector packed into a single uint to save +/// memory and ALU ops later on. 10 bits are used for each component. This allows 4 such packed ints to be added +/// together with room to spare before overflowing into the next component. +uint[27] _flw_fetchLight3x3x3(uint sectionOffset, ivec3 blockInSectionPos, uint solidMask) { + uint[27] lights; + + // @formatter:off + #define _FLW_FETCH_LIGHT(_x, _y, _z, i) { \ + uvec2 light = _flw_lightAt(sectionOffset, uvec3(blockInSectionPos + ivec3(_x, _y, _z))); \ + lights[i] = (light.x) | ((light.y) << 10) | (uint((solidMask & (1u << i)) == 0u) << 20); \ + } + + /// fori y, z, x: unrolled + _FLW_FETCH_LIGHT(-1, -1, -1, 0) + _FLW_FETCH_LIGHT(0, -1, -1, 1) + _FLW_FETCH_LIGHT(1, -1, -1, 2) + + _FLW_FETCH_LIGHT(-1, -1, 0, 3) + _FLW_FETCH_LIGHT(0, -1, 0, 4) + _FLW_FETCH_LIGHT(1, -1, 0, 5) + + _FLW_FETCH_LIGHT(-1, -1, 1, 6) + _FLW_FETCH_LIGHT(0, -1, 1, 7) + _FLW_FETCH_LIGHT(1, -1, 1, 8) + + _FLW_FETCH_LIGHT(-1, 0, -1, 9) + _FLW_FETCH_LIGHT(0, 0, -1, 10) + _FLW_FETCH_LIGHT(1, 0, -1, 11) + + _FLW_FETCH_LIGHT(-1, 0, 0, 12) + _FLW_FETCH_LIGHT(0, 0, 0, 13) + _FLW_FETCH_LIGHT(1, 0, 0, 14) + + _FLW_FETCH_LIGHT(-1, 0, 1, 15) + _FLW_FETCH_LIGHT(0, 0, 1, 16) + _FLW_FETCH_LIGHT(1, 0, 1, 17) + + _FLW_FETCH_LIGHT(-1, 1, -1, 18) + _FLW_FETCH_LIGHT(0, 1, -1, 19) + _FLW_FETCH_LIGHT(1, 1, -1, 20) + + _FLW_FETCH_LIGHT(-1, 1, 0, 21) + _FLW_FETCH_LIGHT(0, 1, 0, 22) + _FLW_FETCH_LIGHT(1, 1, 0, 23) + + _FLW_FETCH_LIGHT(-1, 1, 1, 24) + _FLW_FETCH_LIGHT(0, 1, 1, 25) + _FLW_FETCH_LIGHT(1, 1, 1, 26) + + // @formatter:on + + return lights; +} + #define _flw_index3x3x3(x, y, z) ((x) + (z) * 3u + (y) * 9u) -#define _flw_index3x3x3v(p) _flw_index3x3x3((p).x, (p).y, (p).z) #define _flw_validCountToAo(validCount) (1. - (4. - (validCount)) * 0.2) /// Calculate the light for a direction by averaging the light at the corners of the block. @@ -167,65 +229,73 @@ uint _flw_fetchSolid3x3x3(uint sectionOffset, ivec3 blockInSectionPos) { /// @param interpolant The position within the center block. /// @param c00..c11 4 offsets to determine which "direction" we are averaging. /// @param oppositeMask A bitmask telling this function which bit to flip to get the opposite index for a given corner -vec3 _flw_lightForDirection(uint[27] lights, vec3 interpolant, uvec3 c00, uvec3 c01, uvec3 c10, uvec3 c11, uint oppositeMask) { - // Constant propatation should inline all of these index calculations, - // but since they're distributive we can lay them out more nicely. - uint ic00 = _flw_index3x3x3v(c00); - uint ic01 = _flw_index3x3x3v(c01); - uint ic10 = _flw_index3x3x3v(c10); - uint ic11 = _flw_index3x3x3v(c11); - - const uint[8] corners = uint[]( - _flw_index3x3x3(0u, 0u, 0u), - _flw_index3x3x3(0u, 0u, 1u), - _flw_index3x3x3(0u, 1u, 0u), - _flw_index3x3x3(0u, 1u, 1u), - _flw_index3x3x3(1u, 0u, 0u), - _flw_index3x3x3(1u, 0u, 1u), - _flw_index3x3x3(1u, 1u, 0u), - _flw_index3x3x3(1u, 1u, 1u) - ); - - // Division and branching are both kinda expensive, so use this table for the valid block normalization - const float[5] normalizers = float[](0., 1., 1. / 2., 1. / 3., 1. / 4.); - +vec3 _flw_lightForDirection(uint[27] lights, vec3 interpolant, uint c00, uint c01, uint c10, uint c11, uint oppositeMask) { // Sum up the light and number of valid blocks in each corner for this direction uint[8] summed; - for (uint i = 0; i < 8; i++) { - uint corner = corners[i]; - summed[i] = lights[ic00 + corner] + lights[ic01 + corner] + lights[ic10 + corner] + lights[ic11 + corner]; + + // @formatter:off + + #define _FLW_SUM_CORNER(_x, _y, _z, i) { \ + const uint corner = _flw_index3x3x3(_x, _y, _z); \ + summed[i] = lights[c00 + corner] + lights[c01 + corner] + lights[c10 + corner] + lights[c11 + corner]; \ } + _FLW_SUM_CORNER(0u, 0u, 0u, 0) + _FLW_SUM_CORNER(1u, 0u, 0u, 1) + _FLW_SUM_CORNER(0u, 0u, 1u, 2) + _FLW_SUM_CORNER(1u, 0u, 1u, 3) + _FLW_SUM_CORNER(0u, 1u, 0u, 4) + _FLW_SUM_CORNER(1u, 1u, 0u, 5) + _FLW_SUM_CORNER(0u, 1u, 1u, 6) + _FLW_SUM_CORNER(1u, 1u, 1u, 7) + + // @formatter:on + // The final light and number of valid blocks for each corner. vec3[8] adjusted; - for (uint i = 0; i < 8; i++) { - #ifdef _FLW_INNER_FACE_CORRECTION - // If the current corner has no valid blocks, use the opposite - // corner's light based on which direction we're evaluating. - // Because of how our corners are indexed, moving along one axis is the same as flipping a bit. - uint cornerIndex = (summed[i] & 0xFFF00000u) == 0u ? i ^ oppositeMask : i; - #else - uint cornerIndex = i; - #endif - uint corner = summed[cornerIndex]; - uvec3 unpacked = uvec3(corner & 0x3FFu, (corner >> 10u) & 0x3FFu, corner >> 20u); + #ifdef _FLW_INNER_FACE_CORRECTION + // If the current corner has no valid blocks, use the opposite + // corner's light based on which direction we're evaluating. + // Because of how our corners are indexed, moving along one axis is the same as flipping a bit. + #define _FLW_CORNER_INDEX(i) ((summed[i] & _FLW_UPPER_10_BITS) == 0u ? i ^ oppositeMask : i) + #else + #define _FLW_CORNER_INDEX(i) i + #endif - // Normalize by the number of valid blocks. - adjusted[i].xy = vec2(unpacked.xy) * normalizers[unpacked.z]; - adjusted[i].z = float(unpacked.z); + // Division and branching (to avoid dividing by zero) are both kinda expensive, so use this table for the valid block normalization + const float[5] normalizers = float[](0., 1., 1. / 2., 1. / 3., 1. / 4.); + + // @formatter:off + + #define _FLW_ADJUST_CORNER(i) { \ + uint corner = summed[_FLW_CORNER_INDEX(i)]; \ + uint validCount = corner >> 20u; \ + adjusted[i].xy = vec2(corner & _FLW_LOWER_10_BITS, (corner >> 10u) & _FLW_LOWER_10_BITS) * normalizers[validCount]; \ + adjusted[i].z = float(validCount); \ } + _FLW_ADJUST_CORNER(0) + _FLW_ADJUST_CORNER(1) + _FLW_ADJUST_CORNER(2) + _FLW_ADJUST_CORNER(3) + _FLW_ADJUST_CORNER(4) + _FLW_ADJUST_CORNER(5) + _FLW_ADJUST_CORNER(6) + _FLW_ADJUST_CORNER(7) + + // @formatter:on + // Trilinear interpolation, including valid count - vec3 light00 = mix(adjusted[0], adjusted[1], interpolant.z); - vec3 light01 = mix(adjusted[2], adjusted[3], interpolant.z); - vec3 light10 = mix(adjusted[4], adjusted[5], interpolant.z); - vec3 light11 = mix(adjusted[6], adjusted[7], interpolant.z); + vec3 light00 = mix(adjusted[0], adjusted[1], interpolant.x); + vec3 light01 = mix(adjusted[2], adjusted[3], interpolant.x); + vec3 light10 = mix(adjusted[4], adjusted[5], interpolant.x); + vec3 light11 = mix(adjusted[6], adjusted[7], interpolant.x); - vec3 light0 = mix(light00, light01, interpolant.y); - vec3 light1 = mix(light10, light11, interpolant.y); + vec3 light0 = mix(light00, light01, interpolant.z); + vec3 light1 = mix(light10, light11, interpolant.z); - vec3 light = mix(light0, light1, interpolant.x); + vec3 light = mix(light0, light1, interpolant.y); // Normalize the light coords light.xy *= 1. / 15.; @@ -251,7 +321,8 @@ bool flw_light(vec3 worldPos, vec3 normal, out FlwLightAo light) { // The block's position in the section adjusted into 18x18x18 space ivec3 blockInSectionPos = (blockPos & 0xF) + 1; - #if _FLW_LIGHT_SMOOTHNESS == 1// Directly trilerp as if sampling a texture + // Directly trilerp as if sampling a texture + #if _FLW_LIGHT_SMOOTHNESS == 1 // The lowest corner of the 2x2x2 area we'll be trilinear interpolating. // The ugly bit on the end evaluates to -1 or 0 depending on which side of 0.5 we are. @@ -283,7 +354,8 @@ bool flw_light(vec3 worldPos, vec3 normal, out FlwLightAo light) { light.light = mix(light0, light1, interpolant.x) / 15.; light.ao = 1.; - #elif _FLW_LIGHT_SMOOTHNESS == 2// Lighting and AO accurate to chunk baking + // Lighting and AO accurate to chunk baking + #elif _FLW_LIGHT_SMOOTHNESS == 2 uint solid = _flw_fetchSolid3x3x3(sectionOffset, blockInSectionPos); @@ -304,27 +376,27 @@ bool flw_light(vec3 worldPos, vec3 normal, out FlwLightAo light) { vec3 lightX; if (normal.x > _FLW_EPSILON) { - lightX = _flw_lightForDirection(lights, interpolant, uvec3(1u, 0u, 0u), uvec3(1u, 0u, 1u), uvec3(1u, 1u, 0u), uvec3(1u, 1u, 1u), 4u); + lightX = _flw_lightForDirection(lights, interpolant, _flw_index3x3x3(1u, 0u, 0u), _flw_index3x3x3(1u, 0u, 1u), _flw_index3x3x3(1u, 1u, 0u), _flw_index3x3x3(1u, 1u, 1u), 1u); } else if (normal.x < -_FLW_EPSILON) { - lightX = _flw_lightForDirection(lights, interpolant, uvec3(0u, 0u, 0u), uvec3(0u, 0u, 1u), uvec3(0u, 1u, 0u), uvec3(0u, 1u, 1u), 4u); + lightX = _flw_lightForDirection(lights, interpolant, _flw_index3x3x3(0u, 0u, 0u), _flw_index3x3x3(0u, 0u, 1u), _flw_index3x3x3(0u, 1u, 0u), _flw_index3x3x3(0u, 1u, 1u), 1u); } else { lightX = vec3(0.); } vec3 lightZ; if (normal.z > _FLW_EPSILON) { - lightZ = _flw_lightForDirection(lights, interpolant, uvec3(0u, 0u, 1u), uvec3(0u, 1u, 1u), uvec3(1u, 0u, 1u), uvec3(1u, 1u, 1u), 1u); + lightZ = _flw_lightForDirection(lights, interpolant, _flw_index3x3x3(0u, 0u, 1u), _flw_index3x3x3(0u, 1u, 1u), _flw_index3x3x3(1u, 0u, 1u), _flw_index3x3x3(1u, 1u, 1u), 2u); } else if (normal.z < -_FLW_EPSILON) { - lightZ = _flw_lightForDirection(lights, interpolant, uvec3(0u, 0u, 0u), uvec3(0u, 1u, 0u), uvec3(1u, 0u, 0u), uvec3(1u, 1u, 0u), 1u); + lightZ = _flw_lightForDirection(lights, interpolant, _flw_index3x3x3(0u, 0u, 0u), _flw_index3x3x3(0u, 1u, 0u), _flw_index3x3x3(1u, 0u, 0u), _flw_index3x3x3(1u, 1u, 0u), 2u); } else { lightZ = vec3(0.); } vec3 lightY; if (normal.y > _FLW_EPSILON) { - lightY = _flw_lightForDirection(lights, interpolant, uvec3(0u, 1u, 0u), uvec3(0u, 1u, 1u), uvec3(1u, 1u, 0u), uvec3(1u, 1u, 1u), 2u); + lightY = _flw_lightForDirection(lights, interpolant, _flw_index3x3x3(0u, 1u, 0u), _flw_index3x3x3(0u, 1u, 1u), _flw_index3x3x3(1u, 1u, 0u), _flw_index3x3x3(1u, 1u, 1u), 4u); } else if (normal.y < -_FLW_EPSILON) { - lightY = _flw_lightForDirection(lights, interpolant, uvec3(0u, 0u, 0u), uvec3(0u, 0u, 1u), uvec3(1u, 0u, 0u), uvec3(1u, 0u, 1u), 2u); + lightY = _flw_lightForDirection(lights, interpolant, _flw_index3x3x3(0u, 0u, 0u), _flw_index3x3x3(0u, 0u, 1u), _flw_index3x3x3(1u, 0u, 0u), _flw_index3x3x3(1u, 0u, 1u), 4u); } else { lightY = vec3(0.); } @@ -335,7 +407,8 @@ bool flw_light(vec3 worldPos, vec3 normal, out FlwLightAo light) { light.light = lightAo.xy; light.ao = lightAo.z; - #else// Entirely flat lighting, the lowest setting and a fallback in case an invalid option is set + // Entirely flat lighting, the lowest setting and a fallback in case an invalid option is set + #else light.light = vec2(_flw_lightAt(sectionOffset, blockInSectionPos)) / 15.; light.ao = 1.; From b7d2b2ac7ceb71a4915500df9c6bc2cb60718dc1 Mon Sep 17 00:00:00 2001 From: Jozufozu Date: Mon, 12 Aug 2024 17:35:31 -0700 Subject: [PATCH 3/6] Ubern't - De-uberify the light shader - Remove lightSources index - Include LightShader in PipelineProgramKey and parameterize the pipeline fragment shader by it - Profiling suggests that specializing the shaders uses significantly less GPU time, and we may want to do this for actual user-authored material shaders (and cutout?) as well - Sort LightShader highest in the material comparator - Implement a materialEquals method so IndirectCullingGroup can bucket draws on more that just material reference equality - Do not store any particular draw program in IndirectCullingGroup --- .../backend/MaterialShaderIndices.java | 14 ------- .../flywheel/backend/compile/FlwPrograms.java | 24 +++-------- .../backend/compile/IndirectPrograms.java | 5 ++- .../backend/compile/InstancingPrograms.java | 5 ++- .../backend/compile/PipelineCompiler.java | 5 ++- .../backend/compile/PipelineProgramKey.java | 4 +- .../backend/engine/MaterialEncoder.java | 3 +- .../backend/engine/MaterialRenderState.java | 18 +++++++- .../engine/indirect/IndirectCullingGroup.java | 42 ++++++++++++------- .../instancing/InstancedDrawManager.java | 3 +- .../instancing/InstancedRenderStage.java | 11 ++--- .../flywheel/internal/components_header.frag | 1 - .../flywheel/internal/indirect/main.frag | 2 +- .../flywheel/internal/instancing/main.frag | 2 +- .../flywheel/internal/packed_material.glsl | 6 --- 15 files changed, 73 insertions(+), 72 deletions(-) diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/MaterialShaderIndices.java b/common/src/backend/java/dev/engine_room/flywheel/backend/MaterialShaderIndices.java index a67daaadb..8592a9e65 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/MaterialShaderIndices.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/MaterialShaderIndices.java @@ -8,7 +8,6 @@ import org.jetbrains.annotations.Unmodifiable; import dev.engine_room.flywheel.api.material.CutoutShader; import dev.engine_room.flywheel.api.material.FogShader; -import dev.engine_room.flywheel.api.material.LightShader; import dev.engine_room.flywheel.api.material.MaterialShaders; import dev.engine_room.flywheel.api.registry.Registry; import it.unimi.dsi.fastutil.objects.Object2IntMap; @@ -26,8 +25,6 @@ public final class MaterialShaderIndices { private static Index fogSources; @Nullable private static Index cutoutSources; - @Nullable - private static Index lightSources; private MaterialShaderIndices() { } @@ -60,13 +57,6 @@ public final class MaterialShaderIndices { return cutoutSources; } - public static Index lightSources() { - if (lightSources == null) { - lightSources = indexFromRegistry(LightShader.REGISTRY, LightShader::source); - } - return lightSources; - } - public static int vertexIndex(MaterialShaders shaders) { return vertexSources().index(shaders.vertexSource()); } @@ -83,10 +73,6 @@ public final class MaterialShaderIndices { return cutoutSources().index(cutoutShader.source()); } - public static int lightIndex(LightShader lightShader) { - return lightSources().index(lightShader.source()); - } - private static Index indexFromRegistry(Registry registry, Function sourceFunc) { if (!registry.isFrozen()) { throw new IllegalStateException("Cannot create index from registry that is not frozen!"); diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/compile/FlwPrograms.java b/common/src/backend/java/dev/engine_room/flywheel/backend/compile/FlwPrograms.java index 6681d4676..4bb7a5066 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/compile/FlwPrograms.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/compile/FlwPrograms.java @@ -10,6 +10,7 @@ import com.google.common.collect.ImmutableList; import dev.engine_room.flywheel.api.Flywheel; import dev.engine_room.flywheel.api.instance.InstanceType; +import dev.engine_room.flywheel.api.material.LightShader; import dev.engine_room.flywheel.backend.MaterialShaderIndices; import dev.engine_room.flywheel.backend.compile.component.UberShaderComponent; import dev.engine_room.flywheel.backend.compile.core.CompilerStats; @@ -46,16 +47,15 @@ public final class FlwPrograms { var fragmentMaterialComponent = createFragmentMaterialComponent(loader); var fogComponent = createFogComponent(loader); var cutoutComponent = createCutoutComponent(loader); - var lightComponent = createLightComponent(loader); - if (stats.errored() || vertexComponentsHeader == null || fragmentComponentsHeader == null || vertexMaterialComponent == null || fragmentMaterialComponent == null || fogComponent == null || cutoutComponent == null || lightComponent == null) { + if (stats.errored() || vertexComponentsHeader == null || fragmentComponentsHeader == null || vertexMaterialComponent == null || fragmentMaterialComponent == null || fogComponent == null || cutoutComponent == null) { // Probably means the shader sources are missing. stats.emitErrorLog(); return; } List vertexComponents = List.of(vertexComponentsHeader, vertexMaterialComponent); - List fragmentComponents = List.of(fragmentComponentsHeader, fragmentMaterialComponent, fogComponent, cutoutComponent, lightComponent); + List fragmentComponents = List.of(fragmentComponentsHeader, fragmentMaterialComponent, fogComponent, cutoutComponent); var pipelineKeys = createPipelineKeys(); InstancingPrograms.reload(sources, pipelineKeys, vertexComponents, fragmentComponents); @@ -66,7 +66,9 @@ public final class FlwPrograms { ImmutableList.Builder builder = ImmutableList.builder(); for (ContextShader contextShader : ContextShader.values()) { for (InstanceType instanceType : InstanceType.REGISTRY) { - builder.add(new PipelineProgramKey(instanceType, contextShader)); + for (LightShader light : LightShader.REGISTRY.getAll()) { + builder.add(new PipelineProgramKey(instanceType, contextShader, light)); + } } } return builder.build(); @@ -119,18 +121,4 @@ public final class FlwPrograms { .switchOn(GlslExpr.variable("_flw_uberCutoutIndex")) .build(loader); } - - // TODO: Do not uber this component. Shader compile times are very high now - @Nullable - private static UberShaderComponent createLightComponent(SourceLoader loader) { - return UberShaderComponent.builder(Flywheel.rl("light")) - .materialSources(MaterialShaderIndices.lightSources() - .all()) - .adapt(FnSignature.create() - .returnType("void") - .name("flw_shaderLight") - .build()) - .switchOn(GlslExpr.variable("_flw_uberLightIndex")) - .build(loader); - } } diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/compile/IndirectPrograms.java b/common/src/backend/java/dev/engine_room/flywheel/backend/compile/IndirectPrograms.java index a2c7ed16c..3b705754a 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/compile/IndirectPrograms.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/compile/IndirectPrograms.java @@ -9,6 +9,7 @@ import com.google.common.collect.ImmutableList; import dev.engine_room.flywheel.api.Flywheel; import dev.engine_room.flywheel.api.instance.InstanceType; +import dev.engine_room.flywheel.api.material.LightShader; import dev.engine_room.flywheel.backend.compile.component.InstanceStructComponent; import dev.engine_room.flywheel.backend.compile.component.SsboInstanceComponent; import dev.engine_room.flywheel.backend.compile.core.CompilationHarness; @@ -167,8 +168,8 @@ public class IndirectPrograms extends AtomicReferenceCounted { return instance != null; } - public GlProgram getIndirectProgram(InstanceType instanceType, ContextShader contextShader) { - return pipeline.get(new PipelineProgramKey(instanceType, contextShader)); + public GlProgram getIndirectProgram(InstanceType instanceType, ContextShader contextShader, LightShader light) { + return pipeline.get(new PipelineProgramKey(instanceType, contextShader, light)); } public GlProgram getCullingProgram(InstanceType instanceType) { diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/compile/InstancingPrograms.java b/common/src/backend/java/dev/engine_room/flywheel/backend/compile/InstancingPrograms.java index f91483469..6b9fcfad9 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/compile/InstancingPrograms.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/compile/InstancingPrograms.java @@ -8,6 +8,7 @@ import org.jetbrains.annotations.Nullable; import com.google.common.collect.ImmutableList; import dev.engine_room.flywheel.api.instance.InstanceType; +import dev.engine_room.flywheel.api.material.LightShader; import dev.engine_room.flywheel.backend.gl.GlCompat; import dev.engine_room.flywheel.backend.gl.shader.GlProgram; import dev.engine_room.flywheel.backend.glsl.GlslVersion; @@ -78,8 +79,8 @@ public class InstancingPrograms extends AtomicReferenceCounted { return instance != null; } - public GlProgram get(InstanceType instanceType, ContextShader contextShader) { - return pipeline.get(new PipelineProgramKey(instanceType, contextShader)); + public GlProgram get(InstanceType instanceType, ContextShader contextShader, LightShader light) { + return pipeline.get(new PipelineProgramKey(instanceType, contextShader, light)); } @Override diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/compile/PipelineCompiler.java b/common/src/backend/java/dev/engine_room/flywheel/backend/compile/PipelineCompiler.java index bc303f8f9..405ca7ffe 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/compile/PipelineCompiler.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/compile/PipelineCompiler.java @@ -56,7 +56,8 @@ public final class PipelineCompiler { .nameMapper(key -> { var context = key.contextShader() .nameLowerCase(); - return "pipeline/" + pipeline.compilerMarker() + "/" + context; + return "pipeline/" + pipeline.compilerMarker() + "/" + ResourceUtil.toDebugFileNameNoExtension(key.light() + .source()) + "_" + context; }) .requireExtensions(extensions) .enableExtension("GL_ARB_conservative_depth") @@ -65,6 +66,8 @@ public final class PipelineCompiler { .onCompile((key, comp) -> lightSmoothness.onCompile(comp)) .withResource(API_IMPL_FRAG) .withComponents(fragmentComponents) + .withResource(key -> key.light() + .source()) .withResource(pipeline.fragmentMain())) .preLink((key, program) -> { program.bindAttribLocation("_flw_aPos", 0); diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/compile/PipelineProgramKey.java b/common/src/backend/java/dev/engine_room/flywheel/backend/compile/PipelineProgramKey.java index 691e026b5..3fe6a1033 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/compile/PipelineProgramKey.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/compile/PipelineProgramKey.java @@ -1,12 +1,14 @@ package dev.engine_room.flywheel.backend.compile; import dev.engine_room.flywheel.api.instance.InstanceType; +import dev.engine_room.flywheel.api.material.LightShader; /** * Represents the entire context of a program's usage. * * @param instanceType The instance shader to use. * @param contextShader The context shader to use. + * @param light The light shader to use. */ -public record PipelineProgramKey(InstanceType instanceType, ContextShader contextShader) { +public record PipelineProgramKey(InstanceType instanceType, ContextShader contextShader, LightShader light) { } diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/MaterialEncoder.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/MaterialEncoder.java index 36e7c6ba7..fc45d402e 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/MaterialEncoder.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/MaterialEncoder.java @@ -55,8 +55,7 @@ public final class MaterialEncoder { public static int packUberShader(Material material) { var fog = MaterialShaderIndices.fogIndex(material.fog()); var cutout = MaterialShaderIndices.cutoutIndex(material.cutout()); - var light = MaterialShaderIndices.lightIndex(material.light()); - return (light & 0x3FF) | (cutout & 0x3FF) << 10 | (fog & 0x3FF) << 20; + return (cutout & 0xFFFF) | (fog & 0xFFFF) << 16; } // Packed format: diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/MaterialRenderState.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/MaterialRenderState.java index de0637ddb..0ddf8e1e7 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/MaterialRenderState.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/MaterialRenderState.java @@ -16,7 +16,9 @@ import net.minecraft.client.Minecraft; import net.minecraft.client.renderer.texture.AbstractTexture; public final class MaterialRenderState { - public static final Comparator COMPARATOR = Comparator.comparing(Material::texture) + public static final Comparator COMPARATOR = Comparator.comparing((Material m) -> m.light() + .source()) + .thenComparing(Material::texture) .thenComparing(Material::blur) .thenComparing(Material::mipmap) .thenComparing(Material::backfaceCulling) @@ -177,4 +179,18 @@ public final class MaterialRenderState { RenderSystem.depthMask(true); RenderSystem.colorMask(true, true, true, true); } + + public static boolean materialEquals(Material lhs, Material rhs) { + if (lhs == rhs) { + return true; + } + + // Not here because ubershader: useLight, useOverlay, diffuse, shaders, fog shader, and cutout shader + // Everything in the comparator should be here. + return lhs.blur() == rhs.blur() && lhs.mipmap() == rhs.mipmap() && lhs.backfaceCulling() == rhs.backfaceCulling() && lhs.polygonOffset() == rhs.polygonOffset() && lhs.light() + .source() + .equals(rhs.light() + .source()) && lhs.texture() + .equals(rhs.texture()) && lhs.depthTest() == rhs.depthTest() && lhs.transparency() == rhs.transparency() && lhs.writeMask() == rhs.writeMask(); + } } diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectCullingGroup.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectCullingGroup.java index 5dbcbe3c8..213b9016c 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectCullingGroup.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectCullingGroup.java @@ -28,6 +28,7 @@ import dev.engine_room.flywheel.backend.engine.embed.Environment; import dev.engine_room.flywheel.backend.engine.uniform.Uniforms; import dev.engine_room.flywheel.backend.gl.GlCompat; import dev.engine_room.flywheel.backend.gl.shader.GlProgram; +import dev.engine_room.flywheel.lib.material.LightShaders; import dev.engine_room.flywheel.lib.math.MoreMath; public class IndirectCullingGroup { @@ -49,7 +50,6 @@ public class IndirectCullingGroup { private final IndirectPrograms programs; private final GlProgram cullProgram; private final GlProgram applyProgram; - private final GlProgram drawProgram; private boolean needsDrawBarrier; private boolean needsDrawSort; @@ -65,7 +65,6 @@ public class IndirectCullingGroup { this.programs = programs; cullProgram = programs.getCullingProgram(instanceType); applyProgram = programs.getApplyProgram(); - drawProgram = programs.getIndirectProgram(instanceType, environment.contextShader()); } public void flushInstancers() { @@ -158,20 +157,23 @@ public class IndirectCullingGroup { for (int start = 0, i = 0; i < indirectDraws.size(); i++) { var draw1 = indirectDraws.get(i); - var material1 = draw1.material(); - var visualType1 = draw1.visualType(); // if the next draw call has a different VisualType or Material, start a new MultiDraw - if (i == indirectDraws.size() - 1 || visualType1 != indirectDraws.get(i + 1) - .visualType() || !material1.equals(indirectDraws.get(i + 1) - .material())) { - multiDraws.computeIfAbsent(visualType1, s -> new ArrayList<>()) - .add(new MultiDraw(material1, start, i + 1)); + if (i == indirectDraws.size() - 1 || incompatibleDraws(draw1, indirectDraws.get(i + 1))) { + multiDraws.computeIfAbsent(draw1.visualType(), s -> new ArrayList<>()) + .add(new MultiDraw(draw1.material(), start, i + 1)); start = i + 1; } } } + private boolean incompatibleDraws(IndirectDraw draw1, IndirectDraw draw2) { + if (draw1.visualType() != draw2.visualType()) { + return true; + } + return !MaterialRenderState.materialEquals(draw1.material(), draw2.material()); + } + public boolean hasVisualType(VisualType visualType) { return multiDraws.containsKey(visualType); } @@ -199,17 +201,25 @@ public class IndirectCullingGroup { return; } - drawProgram.bind(); buffers.bindForDraw(); - environment.setupDraw(drawProgram); - drawBarrier(); - var flwBaseDraw = drawProgram.getUniformLocation("_flw_baseDraw"); + GlProgram lastProgram = null; + int baseDrawUniformLoc = -1; for (var multiDraw : multiDraws.get(visualType)) { - glUniform1ui(flwBaseDraw, multiDraw.start); + var drawProgram = programs.getIndirectProgram(instanceType, environment.contextShader(), multiDraw.material.light()); + if (drawProgram != lastProgram) { + lastProgram = drawProgram; + + // Don't need to do this unless the program changes. + drawProgram.bind(); + environment.setupDraw(drawProgram); + baseDrawUniformLoc = drawProgram.getUniformLocation("_flw_baseDraw"); + } + + glUniform1ui(baseDrawUniformLoc, multiDraw.start); MaterialRenderState.setup(multiDraw.material); @@ -218,7 +228,7 @@ public class IndirectCullingGroup { } public void bindWithContextShader(ContextShader override) { - var program = programs.getIndirectProgram(instanceType, override); + var program = programs.getIndirectProgram(instanceType, override, LightShaders.SMOOTH_WHEN_EMBEDDED); program.bind(); @@ -226,7 +236,7 @@ public class IndirectCullingGroup { drawBarrier(); - var flwBaseDraw = drawProgram.getUniformLocation("_flw_baseDraw"); + var flwBaseDraw = program.getUniformLocation("_flw_baseDraw"); glUniform1ui(flwBaseDraw, 0); } diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/instancing/InstancedDrawManager.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/instancing/InstancedDrawManager.java index dfdc57808..93d68e11d 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/instancing/InstancedDrawManager.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/instancing/InstancedDrawManager.java @@ -28,6 +28,7 @@ import dev.engine_room.flywheel.backend.gl.GlStateTracker; import dev.engine_room.flywheel.backend.gl.TextureBuffer; import dev.engine_room.flywheel.backend.gl.array.GlVertexArray; import dev.engine_room.flywheel.backend.gl.shader.GlProgram; +import dev.engine_room.flywheel.lib.material.LightShaders; import dev.engine_room.flywheel.lib.material.SimpleMaterial; import net.minecraft.client.resources.model.ModelBakery; @@ -170,7 +171,7 @@ public class InstancedDrawManager extends DrawManager> { GroupKey shader = groupEntry.getKey(); - var program = programs.get(shader.instanceType(), ContextShader.CRUMBLING); + var program = programs.get(shader.instanceType(), ContextShader.CRUMBLING, LightShaders.SMOOTH_WHEN_EMBEDDED); program.bind(); for (var progressEntry : byProgress.int2ObjectEntrySet()) { diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/instancing/InstancedRenderStage.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/instancing/InstancedRenderStage.java index 5e62e9718..73b1fdddf 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/instancing/InstancedRenderStage.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/instancing/InstancedRenderStage.java @@ -54,12 +54,13 @@ public class InstancedRenderStage { var environment = shader.environment(); - var program = programs.get(shader.instanceType(), environment.contextShader()); - program.bind(); - - environment.setupDraw(program); - for (var drawCall : drawCalls.draws) { + var program = programs.get(shader.instanceType(), environment.contextShader(), drawCall.material() + .light()); + program.bind(); + + environment.setupDraw(program); + var material = drawCall.material(); uploadMaterialUniform(program, material); diff --git a/common/src/backend/resources/assets/flywheel/flywheel/internal/components_header.frag b/common/src/backend/resources/assets/flywheel/flywheel/internal/components_header.frag index 5c0550d60..ef45f6d68 100644 --- a/common/src/backend/resources/assets/flywheel/flywheel/internal/components_header.frag +++ b/common/src/backend/resources/assets/flywheel/flywheel/internal/components_header.frag @@ -1,4 +1,3 @@ uint _flw_uberMaterialFragmentIndex; uint _flw_uberFogIndex; uint _flw_uberCutoutIndex; -uint _flw_uberLightIndex; diff --git a/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/main.frag b/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/main.frag index a67cb47a2..7d528ce24 100644 --- a/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/main.frag +++ b/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/main.frag @@ -6,7 +6,7 @@ flat in uvec3 _flw_packedMaterial; void main() { _flw_uberMaterialFragmentIndex = _flw_packedMaterial.x; - _flw_unpackUint3x10(_flw_packedMaterial.y, _flw_uberFogIndex, _flw_uberCutoutIndex, _flw_uberLightIndex); + _flw_unpackUint2x16(_flw_packedMaterial.y, _flw_uberFogIndex, _flw_uberCutoutIndex); _flw_unpackMaterialProperties(_flw_packedMaterial.z, flw_material); _flw_main(); diff --git a/common/src/backend/resources/assets/flywheel/flywheel/internal/instancing/main.frag b/common/src/backend/resources/assets/flywheel/flywheel/internal/instancing/main.frag index 6ada1a130..cfd7dfea1 100644 --- a/common/src/backend/resources/assets/flywheel/flywheel/internal/instancing/main.frag +++ b/common/src/backend/resources/assets/flywheel/flywheel/internal/instancing/main.frag @@ -5,7 +5,7 @@ uniform uvec4 _flw_packedMaterial; void main() { _flw_uberMaterialFragmentIndex = _flw_packedMaterial.y; - _flw_unpackUint3x10(_flw_packedMaterial.z, _flw_uberFogIndex, _flw_uberCutoutIndex, _flw_uberLightIndex); + _flw_unpackUint2x16(_flw_packedMaterial.z, _flw_uberFogIndex, _flw_uberCutoutIndex); _flw_unpackMaterialProperties(_flw_packedMaterial.w, flw_material); _flw_main(); diff --git a/common/src/backend/resources/assets/flywheel/flywheel/internal/packed_material.glsl b/common/src/backend/resources/assets/flywheel/flywheel/internal/packed_material.glsl index 892795e66..b56828c1f 100644 --- a/common/src/backend/resources/assets/flywheel/flywheel/internal/packed_material.glsl +++ b/common/src/backend/resources/assets/flywheel/flywheel/internal/packed_material.glsl @@ -53,9 +53,3 @@ void _flw_unpackUint2x16(uint s, out uint hi, out uint lo) { hi = (s >> 16) & 0xFFFFu; lo = s & 0xFFFFu; } - -void _flw_unpackUint3x10(uint s, out uint hi, out uint mi, out uint lo) { - hi = (s >> 20) & 0x3FFu; - mi = (s >> 10) & 0x3FFu; - lo = s & 0x3FFu; -} From 7a7d58adf2892634944c3b72bfe9e20fde13cbca Mon Sep 17 00:00:00 2001 From: Jozufozu Date: Thu, 15 Aug 2024 11:41:33 -0700 Subject: [PATCH 4/6] Embeds your embeddings - Optimize embeddings on indirect backend by uploading all matrices in an SSBO - Allocate matrices in an arena - Flatten IndirectCullingGroups to only be parameterized by InstanceType, so now all instances from all embeddings get culled in the same dispatch - Sort indirect draws by whether they're embedded before anything else - Include an "embedded" boolean in the MultiDraw record to decide which shader to use - Include "matrixIndex" field in model descriptor and indirect draw structs - Use matrixIndex == 0 to indicate that a matrix is the identity to avoid unnecessary work in the cull shader - Add helper to write a mat3 as 3 vec4s --- .../flywheel/backend/engine/DrawManager.java | 3 +- .../flywheel/backend/engine/EngineImpl.java | 3 +- .../engine/embed/EmbeddedEnvironment.java | 19 ++++++---- .../engine/embed/EmbeddingUniforms.java | 8 +--- .../backend/engine/embed/Environment.java | 4 +- .../engine/embed/EnvironmentStorage.java | 38 ++++++++++++++++--- .../engine/embed/GlobalEnvironment.java | 6 +-- .../engine/indirect/BufferBindings.java | 1 + .../engine/indirect/IndirectBuffers.java | 4 +- .../engine/indirect/IndirectCullingGroup.java | 19 +++++----- .../backend/engine/indirect/IndirectDraw.java | 25 ++++++++---- .../engine/indirect/IndirectDrawManager.java | 17 ++++++--- .../engine/indirect/IndirectInstancer.java | 9 +++-- .../backend/engine/indirect/MatrixBuffer.java | 33 ++++++++++++++++ .../instancing/InstancedDrawManager.java | 5 ++- .../flywheel/flywheel/internal/common.vert | 4 +- .../internal/indirect/buffer_bindings.glsl | 1 + .../flywheel/internal/indirect/cull.glsl | 11 ++++-- .../internal/indirect/draw_command.glsl | 1 + .../flywheel/internal/indirect/main.vert | 15 +++++++- .../flywheel/internal/indirect/matrices.glsl | 11 ++++++ .../internal/indirect/model_descriptor.glsl | 1 + .../flywheel/internal/instancing/main.vert | 10 +++++ .../flywheel/lib/util/ExtraMemoryOps.java | 15 ++++++++ 24 files changed, 199 insertions(+), 64 deletions(-) create mode 100644 common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/MatrixBuffer.java create mode 100644 common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/matrices.glsl diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/DrawManager.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/DrawManager.java index cf07a4597..476a9abc4 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/DrawManager.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/DrawManager.java @@ -16,6 +16,7 @@ import dev.engine_room.flywheel.api.model.Model; import dev.engine_room.flywheel.api.visualization.VisualType; import dev.engine_room.flywheel.backend.FlwBackend; import dev.engine_room.flywheel.backend.engine.embed.Environment; +import dev.engine_room.flywheel.backend.engine.embed.EnvironmentStorage; import dev.engine_room.flywheel.lib.util.Pair; import it.unimi.dsi.fastutil.ints.Int2ObjectArrayMap; import it.unimi.dsi.fastutil.ints.Int2ObjectMap; @@ -40,7 +41,7 @@ public abstract class DrawManager> { return (Instancer) instancers.computeIfAbsent(new InstancerKey<>(environment, type, model, visualType, bias), this::createAndDeferInit); } - public void flush(LightStorage lightStorage) { + public void flush(LightStorage lightStorage, EnvironmentStorage environmentStorage) { // Thread safety: flush is called from the render thread after all visual updates have been made, // so there are no:tm: threads we could be racing with. for (var instancer : initializationQueue) { diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/EngineImpl.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/EngineImpl.java index 36ab2b11f..394085a70 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/EngineImpl.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/EngineImpl.java @@ -89,7 +89,7 @@ public class EngineImpl implements Engine { try (var state = GlStateTracker.getRestoreState()) { Uniforms.update(context); environmentStorage.flush(); - drawManager.flush(lightStorage); + drawManager.flush(lightStorage, environmentStorage); } } @@ -107,6 +107,7 @@ public class EngineImpl implements Engine { public void delete() { drawManager.delete(); lightStorage.delete(); + environmentStorage.delete(); } public Instancer instancer(Environment environment, InstanceType type, Model model, VisualType visualType, int bias) { diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/embed/EmbeddedEnvironment.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/embed/EmbeddedEnvironment.java index 89957dce9..09dff65c8 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/embed/EmbeddedEnvironment.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/embed/EmbeddedEnvironment.java @@ -16,6 +16,7 @@ import dev.engine_room.flywheel.api.visualization.VisualType; import dev.engine_room.flywheel.backend.compile.ContextShader; import dev.engine_room.flywheel.backend.engine.EngineImpl; import dev.engine_room.flywheel.backend.gl.shader.GlProgram; +import dev.engine_room.flywheel.lib.util.ExtraMemoryOps; import net.minecraft.core.Vec3i; public class EmbeddedEnvironment implements VisualEmbedding, Environment { @@ -31,6 +32,8 @@ public class EmbeddedEnvironment implements VisualEmbedding, Environment { private final Matrix4f poseComposed = new Matrix4f(); private final Matrix3f normalComposed = new Matrix3f(); + public int matrixIndex = 0; + private boolean deleted = false; public EmbeddedEnvironment(EngineImpl engine, VisualType visualType, Vec3i renderOrigin, @Nullable EmbeddedEnvironment parent) { @@ -81,23 +84,25 @@ public class EmbeddedEnvironment implements VisualEmbedding, Environment { return ContextShader.EMBEDDED; } - @Override - public void setupCull(GlProgram program) { - program.setBool(EmbeddingUniforms.USE_MODEL_MATRIX, true); - program.setMat4(EmbeddingUniforms.MODEL_MATRIX, poseComposed); - } - @Override public void setupDraw(GlProgram program) { program.setMat4(EmbeddingUniforms.MODEL_MATRIX, poseComposed); program.setMat3(EmbeddingUniforms.NORMAL_MATRIX, normalComposed); } - public void flush() { + @Override + public int matrixIndex() { + return matrixIndex; + } + + public void flush(long ptr) { poseComposed.identity(); normalComposed.identity(); composeMatrices(poseComposed, normalComposed); + + ExtraMemoryOps.putMatrix4f(ptr, poseComposed); + ExtraMemoryOps.putMatrix3fPadded(ptr + 16 * Float.BYTES, normalComposed); } private void composeMatrices(Matrix4f pose, Matrix3f normal) { diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/embed/EmbeddingUniforms.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/embed/EmbeddingUniforms.java index e62f0b018..e1c4c1978 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/embed/EmbeddingUniforms.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/embed/EmbeddingUniforms.java @@ -1,12 +1,8 @@ package dev.engine_room.flywheel.backend.engine.embed; public final class EmbeddingUniforms { - /** - * Only used by cull shaders. - */ - public static final String USE_MODEL_MATRIX = "_flw_useModelMatrix"; - public static final String MODEL_MATRIX = "_flw_modelMatrix"; - public static final String NORMAL_MATRIX = "_flw_normalMatrix"; + public static final String MODEL_MATRIX = "_flw_modelMatrixUniform"; + public static final String NORMAL_MATRIX = "_flw_normalMatrixUniform"; private EmbeddingUniforms() { } diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/embed/Environment.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/embed/Environment.java index 83fa60d62..8afcf6125 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/embed/Environment.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/embed/Environment.java @@ -6,7 +6,7 @@ import dev.engine_room.flywheel.backend.gl.shader.GlProgram; public interface Environment { ContextShader contextShader(); - void setupCull(GlProgram cullProgram); - void setupDraw(GlProgram drawProgram); + + int matrixIndex(); } diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/embed/EnvironmentStorage.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/embed/EnvironmentStorage.java index 9206b9cf3..942fc3623 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/embed/EnvironmentStorage.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/embed/EnvironmentStorage.java @@ -1,18 +1,46 @@ package dev.engine_room.flywheel.backend.engine.embed; +import dev.engine_room.flywheel.backend.engine.Arena; import it.unimi.dsi.fastutil.objects.ReferenceLinkedOpenHashSet; import it.unimi.dsi.fastutil.objects.ReferenceSet; -import it.unimi.dsi.fastutil.objects.ReferenceSets; public class EnvironmentStorage { - protected final ReferenceSet environments = ReferenceSets.synchronize(new ReferenceLinkedOpenHashSet<>()); + public static final int MATRIX_SIZE_BYTES = (16 + 12) * Float.BYTES; + + protected final Object lock = new Object(); + + protected final ReferenceSet environments = new ReferenceLinkedOpenHashSet<>(); + + // Note than the arena starts indexing at zero, but we reserve zero for the identity matrix. + // Any time an ID from the arena is written we want to add one to it. + public final Arena arena = new Arena(MATRIX_SIZE_BYTES, 32); + + { + arena.alloc(); // Reserve the identity matrix. + } public void track(EmbeddedEnvironment environment) { - environments.add(environment); + synchronized (lock) { + if (environments.add(environment)) { + environment.matrixIndex = arena.alloc(); + } + } } public void flush() { - environments.removeIf(EmbeddedEnvironment::isDeleted); - environments.forEach(EmbeddedEnvironment::flush); + environments.removeIf(embeddedEnvironment -> { + var deleted = embeddedEnvironment.isDeleted(); + if (deleted && embeddedEnvironment.matrixIndex > 0) { + arena.free(embeddedEnvironment.matrixIndex); + } + return deleted; + }); + for (EmbeddedEnvironment environment : environments) { + environment.flush(arena.indexToPointer(environment.matrixIndex)); + } + } + + public void delete() { + arena.delete(); } } diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/embed/GlobalEnvironment.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/embed/GlobalEnvironment.java index aaac1ca35..7b9919baf 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/embed/GlobalEnvironment.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/embed/GlobalEnvironment.java @@ -15,11 +15,11 @@ public class GlobalEnvironment implements Environment { } @Override - public void setupCull(GlProgram cullProgram) { - cullProgram.setBool(EmbeddingUniforms.USE_MODEL_MATRIX, false); + public void setupDraw(GlProgram drawProgram) { } @Override - public void setupDraw(GlProgram drawProgram) { + public int matrixIndex() { + return 0; } } diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/BufferBindings.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/BufferBindings.java index a0ae93a28..479eaed74 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/BufferBindings.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/BufferBindings.java @@ -8,6 +8,7 @@ public final class BufferBindings { public static final int DRAW = 4; public static final int LIGHT_LUT = 5; public static final int LIGHT_SECTION = 6; + public static final int MATRICES = 7; private BufferBindings() { } diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectBuffers.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectBuffers.java index 90ecd4149..b0766e171 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectBuffers.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectBuffers.java @@ -16,10 +16,10 @@ public class IndirectBuffers { public static final long INT_SIZE = Integer.BYTES; public static final long PTR_SIZE = Pointer.POINTER_SIZE; - public static final long MODEL_STRIDE = 24; + public static final long MODEL_STRIDE = 28; // Byte size of a draw command, plus our added mesh data. - public static final long DRAW_COMMAND_STRIDE = 40; + public static final long DRAW_COMMAND_STRIDE = 44; public static final long DRAW_COMMAND_OFFSET = 0; // Offsets to the 3 segments diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectCullingGroup.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectCullingGroup.java index 213b9016c..5afa7c46d 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectCullingGroup.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectCullingGroup.java @@ -24,7 +24,6 @@ import dev.engine_room.flywheel.backend.compile.IndirectPrograms; import dev.engine_room.flywheel.backend.engine.InstancerKey; import dev.engine_room.flywheel.backend.engine.MaterialRenderState; import dev.engine_room.flywheel.backend.engine.MeshPool; -import dev.engine_room.flywheel.backend.engine.embed.Environment; import dev.engine_room.flywheel.backend.engine.uniform.Uniforms; import dev.engine_room.flywheel.backend.gl.GlCompat; import dev.engine_room.flywheel.backend.gl.shader.GlProgram; @@ -33,6 +32,7 @@ import dev.engine_room.flywheel.lib.math.MoreMath; public class IndirectCullingGroup { private static final Comparator DRAW_COMPARATOR = Comparator.comparing(IndirectDraw::visualType) + .thenComparing(IndirectDraw::isEmbedded) .thenComparing(IndirectDraw::bias) .thenComparing(IndirectDraw::indexOfMeshInModel) .thenComparing(IndirectDraw::material, MaterialRenderState.COMPARATOR); @@ -40,7 +40,6 @@ public class IndirectCullingGroup { private static final int DRAW_BARRIER_BITS = GL_SHADER_STORAGE_BARRIER_BIT | GL_COMMAND_BARRIER_BIT; private final InstanceType instanceType; - private final Environment environment; private final long instanceStride; private final IndirectBuffers buffers; private final List> instancers = new ArrayList<>(); @@ -55,9 +54,8 @@ public class IndirectCullingGroup { private boolean needsDrawSort; private int instanceCountThisFrame; - IndirectCullingGroup(InstanceType instanceType, Environment environment, IndirectPrograms programs) { + IndirectCullingGroup(InstanceType instanceType, IndirectPrograms programs) { this.instanceType = instanceType; - this.environment = environment; instanceStride = MoreMath.align4(instanceType.layout() .byteSize()); buffers = new IndirectBuffers(instanceStride); @@ -124,8 +122,6 @@ public class IndirectCullingGroup { Uniforms.bindAll(); cullProgram.bind(); - environment.setupCull(cullProgram); - buffers.bindForCompute(); glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT); glDispatchCompute(GlCompat.getComputeGroupCount(instanceCountThisFrame), 1, 1); @@ -161,7 +157,7 @@ public class IndirectCullingGroup { // if the next draw call has a different VisualType or Material, start a new MultiDraw if (i == indirectDraws.size() - 1 || incompatibleDraws(draw1, indirectDraws.get(i + 1))) { multiDraws.computeIfAbsent(draw1.visualType(), s -> new ArrayList<>()) - .add(new MultiDraw(draw1.material(), start, i + 1)); + .add(new MultiDraw(draw1.material(), draw1.isEmbedded(), start, i + 1)); start = i + 1; } } @@ -171,6 +167,10 @@ public class IndirectCullingGroup { if (draw1.visualType() != draw2.visualType()) { return true; } + + if (draw1.isEmbedded() != draw2.isEmbedded()) { + return true; + } return !MaterialRenderState.materialEquals(draw1.material(), draw2.material()); } @@ -209,13 +209,12 @@ public class IndirectCullingGroup { int baseDrawUniformLoc = -1; for (var multiDraw : multiDraws.get(visualType)) { - var drawProgram = programs.getIndirectProgram(instanceType, environment.contextShader(), multiDraw.material.light()); + var drawProgram = programs.getIndirectProgram(instanceType, multiDraw.embedded ? ContextShader.EMBEDDED : ContextShader.DEFAULT, multiDraw.material.light()); if (drawProgram != lastProgram) { lastProgram = drawProgram; // Don't need to do this unless the program changes. drawProgram.bind(); - environment.setupDraw(drawProgram); baseDrawUniformLoc = drawProgram.getUniformLocation("_flw_baseDraw"); } @@ -300,7 +299,7 @@ public class IndirectCullingGroup { return out; } - private record MultiDraw(Material material, int start, int end) { + private record MultiDraw(Material material, boolean embedded, int start, int end) { private void submit() { GlCompat.safeMultiDrawElementsIndirect(GL_TRIANGLES, GL_UNSIGNED_INT, this.start * IndirectBuffers.DRAW_COMMAND_STRIDE, this.end - this.start, (int) IndirectBuffers.DRAW_COMMAND_STRIDE); } diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectDraw.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectDraw.java index 9494b6ade..fb763d006 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectDraw.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectDraw.java @@ -7,6 +7,7 @@ import dev.engine_room.flywheel.api.visualization.VisualType; import dev.engine_room.flywheel.backend.MaterialShaderIndices; import dev.engine_room.flywheel.backend.engine.MaterialEncoder; import dev.engine_room.flywheel.backend.engine.MeshPool; +import dev.engine_room.flywheel.backend.engine.embed.EmbeddedEnvironment; public class IndirectDraw { private final IndirectInstancer instancer; @@ -46,6 +47,10 @@ public class IndirectDraw { return material; } + public boolean isEmbedded() { + return instancer.environment instanceof EmbeddedEnvironment; + } + public MeshPool.PooledMesh mesh() { return mesh; } @@ -71,10 +76,12 @@ public class IndirectDraw { MemoryUtil.memPutInt(ptr + 20, instancer.modelIndex); // modelIndex - MemoryUtil.memPutInt(ptr + 24, materialVertexIndex); // materialVertexIndex - MemoryUtil.memPutInt(ptr + 28, materialFragmentIndex); // materialFragmentIndex - MemoryUtil.memPutInt(ptr + 32, packedFogAndCutout); // packedFogAndCutout - MemoryUtil.memPutInt(ptr + 36, packedMaterialProperties); // packedMaterialProperties + MemoryUtil.memPutInt(ptr + 24, instancer.environment.matrixIndex()); // matrixIndex + + MemoryUtil.memPutInt(ptr + 28, materialVertexIndex); // materialVertexIndex + MemoryUtil.memPutInt(ptr + 32, materialFragmentIndex); // materialFragmentIndex + MemoryUtil.memPutInt(ptr + 36, packedFogAndCutout); // packedFogAndCutout + MemoryUtil.memPutInt(ptr + 40, packedMaterialProperties); // packedMaterialProperties } public void writeWithOverrides(long ptr, int instanceIndex, Material materialOverride) { @@ -86,10 +93,12 @@ public class IndirectDraw { MemoryUtil.memPutInt(ptr + 20, instancer.modelIndex); // modelIndex - MemoryUtil.memPutInt(ptr + 24, MaterialShaderIndices.vertexIndex(materialOverride.shaders())); // materialVertexIndex - MemoryUtil.memPutInt(ptr + 28, MaterialShaderIndices.fragmentIndex(materialOverride.shaders())); // materialFragmentIndex - MemoryUtil.memPutInt(ptr + 32, MaterialEncoder.packUberShader(materialOverride)); // packedFogAndCutout - MemoryUtil.memPutInt(ptr + 36, MaterialEncoder.packProperties(materialOverride)); // packedMaterialProperties + MemoryUtil.memPutInt(ptr + 24, instancer.environment.matrixIndex()); // matrixIndex + + MemoryUtil.memPutInt(ptr + 28, MaterialShaderIndices.vertexIndex(materialOverride.shaders())); // materialVertexIndex + MemoryUtil.memPutInt(ptr + 32, MaterialShaderIndices.fragmentIndex(materialOverride.shaders())); // materialFragmentIndex + MemoryUtil.memPutInt(ptr + 36, MaterialEncoder.packUberShader(materialOverride)); // packedFogAndCutout + MemoryUtil.memPutInt(ptr + 40, MaterialEncoder.packProperties(materialOverride)); // packedMaterialProperties } public void delete() { diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectDrawManager.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectDrawManager.java index 2066554f0..073850193 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectDrawManager.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectDrawManager.java @@ -12,18 +12,19 @@ import java.util.Map; import dev.engine_room.flywheel.api.backend.Engine; import dev.engine_room.flywheel.api.instance.Instance; +import dev.engine_room.flywheel.api.instance.InstanceType; import dev.engine_room.flywheel.api.visualization.VisualType; import dev.engine_room.flywheel.backend.Samplers; import dev.engine_room.flywheel.backend.compile.ContextShader; import dev.engine_room.flywheel.backend.compile.IndirectPrograms; import dev.engine_room.flywheel.backend.engine.CommonCrumbling; import dev.engine_room.flywheel.backend.engine.DrawManager; -import dev.engine_room.flywheel.backend.engine.GroupKey; import dev.engine_room.flywheel.backend.engine.InstancerKey; import dev.engine_room.flywheel.backend.engine.LightStorage; import dev.engine_room.flywheel.backend.engine.MaterialRenderState; import dev.engine_room.flywheel.backend.engine.MeshPool; import dev.engine_room.flywheel.backend.engine.TextureBinder; +import dev.engine_room.flywheel.backend.engine.embed.EnvironmentStorage; import dev.engine_room.flywheel.backend.engine.uniform.Uniforms; import dev.engine_room.flywheel.backend.gl.GlStateTracker; import dev.engine_room.flywheel.backend.gl.array.GlVertexArray; @@ -38,9 +39,10 @@ public class IndirectDrawManager extends DrawManager> { private final StagingBuffer stagingBuffer; private final MeshPool meshPool; private final GlVertexArray vertexArray; - private final Map, IndirectCullingGroup> cullingGroups = new HashMap<>(); + private final Map, IndirectCullingGroup> cullingGroups = new HashMap<>(); private final GlBuffer crumblingDrawBuffer = new GlBuffer(); private final LightBuffers lightBuffers; + private final MatrixBuffer matrixBuffer; public IndirectDrawManager(IndirectPrograms programs) { this.programs = programs; @@ -51,6 +53,7 @@ public class IndirectDrawManager extends DrawManager> { vertexArray = GlVertexArray.create(); meshPool.bind(vertexArray); lightBuffers = new LightBuffers(); + matrixBuffer = new MatrixBuffer(); } @Override @@ -61,8 +64,7 @@ public class IndirectDrawManager extends DrawManager> { @SuppressWarnings("unchecked") @Override protected void initialize(InstancerKey key, IndirectInstancer instancer) { - var groupKey = new GroupKey<>(key.type(), key.environment()); - var group = (IndirectCullingGroup) cullingGroups.computeIfAbsent(groupKey, t -> new IndirectCullingGroup<>(t.instanceType(), t.environment(), programs)); + var group = (IndirectCullingGroup) cullingGroups.computeIfAbsent(key.type(), t -> new IndirectCullingGroup<>(t, programs)); group.add((IndirectInstancer) instancer, key, meshPool); } @@ -85,6 +87,7 @@ public class IndirectDrawManager extends DrawManager> { vertexArray.bindForDraw(); lightBuffers.bind(); + matrixBuffer.bind(); Uniforms.bindAll(); for (var group : cullingGroups.values()) { @@ -97,8 +100,8 @@ public class IndirectDrawManager extends DrawManager> { } @Override - public void flush(LightStorage lightStorage) { - super.flush(lightStorage); + public void flush(LightStorage lightStorage, EnvironmentStorage environmentStorage) { + super.flush(lightStorage, environmentStorage); for (var group : cullingGroups.values()) { group.flushInstancers(); @@ -116,6 +119,8 @@ public class IndirectDrawManager extends DrawManager> { lightBuffers.flush(stagingBuffer, lightStorage); + matrixBuffer.flush(stagingBuffer, environmentStorage); + for (var group : cullingGroups.values()) { group.upload(stagingBuffer); } diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectInstancer.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectInstancer.java index b0ac9bc98..75dc2b8e3 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectInstancer.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectInstancer.java @@ -49,10 +49,11 @@ public class IndirectInstancer extends AbstractInstancer public void writeModel(long ptr) { MemoryUtil.memPutInt(ptr, 0); // instanceCount - to be incremented by the cull shader MemoryUtil.memPutInt(ptr + 4, baseInstance); // baseInstance - MemoryUtil.memPutFloat(ptr + 8, boundingSphere.x()); // boundingSphere - MemoryUtil.memPutFloat(ptr + 12, boundingSphere.y()); - MemoryUtil.memPutFloat(ptr + 16, boundingSphere.z()); - MemoryUtil.memPutFloat(ptr + 20, boundingSphere.w()); + MemoryUtil.memPutInt(ptr + 8, environment.matrixIndex()); // matrixIndex + MemoryUtil.memPutFloat(ptr + 12, boundingSphere.x()); // boundingSphere + MemoryUtil.memPutFloat(ptr + 16, boundingSphere.y()); + MemoryUtil.memPutFloat(ptr + 20, boundingSphere.z()); + MemoryUtil.memPutFloat(ptr + 24, boundingSphere.w()); } public void uploadInstances(StagingBuffer stagingBuffer, int instanceVbo) { diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/MatrixBuffer.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/MatrixBuffer.java new file mode 100644 index 000000000..3e17d386d --- /dev/null +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/MatrixBuffer.java @@ -0,0 +1,33 @@ +package dev.engine_room.flywheel.backend.engine.indirect; + +import org.lwjgl.opengl.GL46; +import org.lwjgl.system.MemoryUtil; + +import dev.engine_room.flywheel.backend.engine.embed.EnvironmentStorage; + +public class MatrixBuffer { + private final ResizableStorageArray matrices = new ResizableStorageArray(EnvironmentStorage.MATRIX_SIZE_BYTES); + + public void flush(StagingBuffer stagingBuffer, EnvironmentStorage environmentStorage) { + var arena = environmentStorage.arena; + var capacity = arena.capacity(); + + if (capacity == 0) { + return; + } + + matrices.ensureCapacity(capacity); + + stagingBuffer.enqueueCopy((long) arena.capacity() * EnvironmentStorage.MATRIX_SIZE_BYTES, matrices.handle(), 0, ptr -> { + MemoryUtil.memCopy(arena.indexToPointer(0), ptr, (long) arena.capacity() * EnvironmentStorage.MATRIX_SIZE_BYTES); + }); + } + + public void bind() { + if (matrices.capacity() == 0) { + return; + } + + GL46.glBindBufferRange(GL46.GL_SHADER_STORAGE_BUFFER, BufferBindings.MATRICES, matrices.handle(), 0, matrices.byteCapacity()); + } +} diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/instancing/InstancedDrawManager.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/instancing/InstancedDrawManager.java index 93d68e11d..8b241f6c0 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/instancing/InstancedDrawManager.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/instancing/InstancedDrawManager.java @@ -23,6 +23,7 @@ import dev.engine_room.flywheel.backend.engine.MaterialEncoder; import dev.engine_room.flywheel.backend.engine.MaterialRenderState; import dev.engine_room.flywheel.backend.engine.MeshPool; import dev.engine_room.flywheel.backend.engine.TextureBinder; +import dev.engine_room.flywheel.backend.engine.embed.EnvironmentStorage; import dev.engine_room.flywheel.backend.engine.uniform.Uniforms; import dev.engine_room.flywheel.backend.gl.GlStateTracker; import dev.engine_room.flywheel.backend.gl.TextureBuffer; @@ -59,8 +60,8 @@ public class InstancedDrawManager extends DrawManager> { } @Override - public void flush(LightStorage lightStorage) { - super.flush(lightStorage); + public void flush(LightStorage lightStorage, EnvironmentStorage environmentStorage) { + super.flush(lightStorage, environmentStorage); this.instancers.values() .removeIf(instancer -> { diff --git a/common/src/backend/resources/assets/flywheel/flywheel/internal/common.vert b/common/src/backend/resources/assets/flywheel/flywheel/internal/common.vert index 075dae3f6..214ab12d2 100644 --- a/common/src/backend/resources/assets/flywheel/flywheel/internal/common.vert +++ b/common/src/backend/resources/assets/flywheel/flywheel/internal/common.vert @@ -67,8 +67,8 @@ vec2 getCrumblingTexCoord() { #endif #ifdef FLW_EMBEDDED -uniform mat4 _flw_modelMatrix; -uniform mat3 _flw_normalMatrix; +mat4 _flw_modelMatrix; +mat3 _flw_normalMatrix; #endif flat out uint _flw_instanceID; diff --git a/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/buffer_bindings.glsl b/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/buffer_bindings.glsl index c37db3502..346adfa93 100644 --- a/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/buffer_bindings.glsl +++ b/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/buffer_bindings.glsl @@ -5,3 +5,4 @@ #define _FLW_DRAW_BUFFER_BINDING 4 #define _FLW_LIGHT_LUT_BUFFER_BINDING 5 #define _FLW_LIGHT_SECTIONS_BUFFER_BINDING 6 +#define _FLW_MATRIX_BUFFER_BINDING 7 diff --git a/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/cull.glsl b/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/cull.glsl index 76b75bff1..58a893cbc 100644 --- a/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/cull.glsl +++ b/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/cull.glsl @@ -2,6 +2,7 @@ #include "flywheel:internal/indirect/model_descriptor.glsl" #include "flywheel:internal/uniforms/uniforms.glsl" #include "flywheel:util/matrix.glsl" +#include "flywheel:internal/indirect/matrices.glsl" layout(local_size_x = _FLW_SUBGROUP_SIZE) in; @@ -17,8 +18,9 @@ layout(std430, binding = _FLW_MODEL_BUFFER_BINDING) restrict buffer ModelBuffer ModelDescriptor _flw_models[]; }; -uniform mat4 _flw_modelMatrix; -uniform bool _flw_useModelMatrix = false; +layout(std430, binding = _FLW_MATRIX_BUFFER_BINDING) restrict buffer MatrixBuffer { + Matrices _flw_matrices[]; +}; // Disgustingly vectorized sphere frustum intersection taking advantage of ahead of time packing. // Only uses 6 fmas and some boolean ops. @@ -35,6 +37,7 @@ bool _flw_testSphere(vec3 center, float radius) { bool _flw_isVisible(uint instanceIndex, uint modelIndex) { BoundingSphere sphere = _flw_models[modelIndex].boundingSphere; + uint matrixIndex = _flw_models[modelIndex].matrixIndex; vec3 center; float radius; @@ -44,8 +47,8 @@ bool _flw_isVisible(uint instanceIndex, uint modelIndex) { flw_transformBoundingSphere(instance, center, radius); - if (_flw_useModelMatrix) { - transformBoundingSphere(_flw_modelMatrix, center, radius); + if (matrixIndex > 0) { + transformBoundingSphere(_flw_matrices[matrixIndex].pose, center, radius); } return _flw_testSphere(center, radius); diff --git a/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/draw_command.glsl b/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/draw_command.glsl index e8575930c..40ed9119b 100644 --- a/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/draw_command.glsl +++ b/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/draw_command.glsl @@ -6,6 +6,7 @@ struct MeshDrawCommand { uint baseInstance; uint modelIndex; + uint matrixIndex; uint materialVertexIndex; uint materialFragmentIndex; diff --git a/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/main.vert b/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/main.vert index a95d31662..e53dff313 100644 --- a/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/main.vert +++ b/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/main.vert @@ -3,6 +3,7 @@ #include "flywheel:internal/indirect/buffer_bindings.glsl" #include "flywheel:internal/indirect/draw_command.glsl" #include "flywheel:internal/indirect/light.glsl" +#include "flywheel:internal/indirect/matrices.glsl" layout(std430, binding = _FLW_TARGET_BUFFER_BINDING) restrict readonly buffer TargetBuffer { uint _flw_instanceIndices[]; @@ -12,6 +13,12 @@ layout(std430, binding = _FLW_DRAW_BUFFER_BINDING) restrict readonly buffer Draw MeshDrawCommand _flw_drawCommands[]; }; +#ifdef FLW_EMBEDDED +layout(std430, binding = _FLW_MATRIX_BUFFER_BINDING) restrict buffer MatrixBuffer { + Matrices _flw_matrices[]; +}; +#endif + uniform uint _flw_baseDraw; flat out uvec3 _flw_packedMaterial; @@ -29,7 +36,13 @@ void main() { _flw_unpackMaterialProperties(packedMaterialProperties, flw_material); _flw_packedMaterial = uvec3(draw.materialFragmentIndex, draw.packedFogAndCutout, packedMaterialProperties); -#if __VERSION__ < 460 + #ifdef FLW_EMBEDDED + _flw_unpackMatrices(_flw_matrices[draw.matrixIndex], _flw_modelMatrix, _flw_normalMatrix); + // _flw_modelMatrix = mat4(1.); + // _flw_normalMatrix = mat3(1.); + #endif + + #if __VERSION__ < 460 uint instanceIndex = _flw_instanceIndices[gl_BaseInstanceARB + gl_InstanceID]; #else uint instanceIndex = _flw_instanceIndices[gl_BaseInstance + gl_InstanceID]; diff --git a/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/matrices.glsl b/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/matrices.glsl new file mode 100644 index 000000000..efbc80b8d --- /dev/null +++ b/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/matrices.glsl @@ -0,0 +1,11 @@ +struct Matrices { + mat4 pose; + vec4 normalA; + vec4 normalB; + vec4 normalC; +}; + +void _flw_unpackMatrices(in Matrices mats, out mat4 pose, out mat3 normal) { + pose = mats.pose; + normal = mat3(mats.normalA.xyz, mats.normalB.xyz, mats.normalC.xyz); +} diff --git a/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/model_descriptor.glsl b/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/model_descriptor.glsl index a873a92a8..c5416f9dd 100644 --- a/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/model_descriptor.glsl +++ b/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/model_descriptor.glsl @@ -8,6 +8,7 @@ struct BoundingSphere { struct ModelDescriptor { uint instanceCount; uint baseInstance; + uint matrixIndex; BoundingSphere boundingSphere; }; diff --git a/common/src/backend/resources/assets/flywheel/flywheel/internal/instancing/main.vert b/common/src/backend/resources/assets/flywheel/flywheel/internal/instancing/main.vert index 4b6914eae..30a863917 100644 --- a/common/src/backend/resources/assets/flywheel/flywheel/internal/instancing/main.vert +++ b/common/src/backend/resources/assets/flywheel/flywheel/internal/instancing/main.vert @@ -5,11 +5,21 @@ uniform uvec4 _flw_packedMaterial; uniform int _flw_baseInstance = 0; +#ifdef FLW_EMBEDDED +uniform mat4 _flw_modelMatrixUniform; +uniform mat3 _flw_normalMatrixUniform; +#endif + void main() { _flw_uberMaterialVertexIndex = _flw_packedMaterial.x; _flw_unpackMaterialProperties(_flw_packedMaterial.w, flw_material); FlwInstance instance = _flw_unpackInstance(_flw_baseInstance + gl_InstanceID); + #ifdef FLW_EMBEDDED + _flw_modelMatrix = _flw_modelMatrixUniform; + _flw_normalMatrix = _flw_normalMatrixUniform; + #endif + _flw_main(instance, uint(gl_InstanceID)); } diff --git a/common/src/lib/java/dev/engine_room/flywheel/lib/util/ExtraMemoryOps.java b/common/src/lib/java/dev/engine_room/flywheel/lib/util/ExtraMemoryOps.java index 3840129be..45b752640 100644 --- a/common/src/lib/java/dev/engine_room/flywheel/lib/util/ExtraMemoryOps.java +++ b/common/src/lib/java/dev/engine_room/flywheel/lib/util/ExtraMemoryOps.java @@ -61,6 +61,21 @@ public final class ExtraMemoryOps { MemoryUtil.memPutFloat(ptr + 32, matrix.m22()); } + public static void putMatrix3fPadded(long ptr, Matrix3fc matrix) { + MemoryUtil.memPutFloat(ptr, matrix.m00()); + MemoryUtil.memPutFloat(ptr + 4, matrix.m01()); + MemoryUtil.memPutFloat(ptr + 8, matrix.m02()); + MemoryUtil.memPutFloat(ptr + 12, 0.0f); + MemoryUtil.memPutFloat(ptr + 16, matrix.m10()); + MemoryUtil.memPutFloat(ptr + 20, matrix.m11()); + MemoryUtil.memPutFloat(ptr + 24, matrix.m12()); + MemoryUtil.memPutFloat(ptr + 28, 0.0f); + MemoryUtil.memPutFloat(ptr + 32, matrix.m20()); + MemoryUtil.memPutFloat(ptr + 36, matrix.m21()); + MemoryUtil.memPutFloat(ptr + 40, matrix.m22()); + MemoryUtil.memPutFloat(ptr + 44, 0.0f); + } + public static void putMatrix4f(long ptr, Matrix4fc matrix) { MemoryUtil.memPutFloat(ptr, matrix.m00()); MemoryUtil.memPutFloat(ptr + 4, matrix.m01()); From a5f49c6738b97828fc8c05e917e21cfe9ae6192e Mon Sep 17 00:00:00 2001 From: Jozufozu Date: Thu, 15 Aug 2024 20:57:28 -0700 Subject: [PATCH 5/6] Hol up - Use way fewer memory barriers - I didn't realize that GL_SHADER_STORAGE_BARRIER_BIT was global instead of operating only on the currently bound buffers. Oh, well - Move apply program binding to IndirectDrawManager - Fix embedded instances flickering when first loading a world. Need to actually bind the matrix buffer for the cull shader. Not sure how it worked at all before - Minor styling/cleanup --- .../flywheel/backend/engine/Arena.java | 4 ++++ .../engine/embed/EnvironmentStorage.java | 3 ++- .../engine/indirect/IndirectCullingGroup.java | 12 +++------- .../engine/indirect/IndirectDrawManager.java | 23 +++++++++++++++++++ .../backend/engine/indirect/MatrixBuffer.java | 4 ++-- .../flywheel/internal/indirect/cull.glsl | 2 +- 6 files changed, 35 insertions(+), 13 deletions(-) diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/Arena.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/Arena.java index 101cc0013..e7aa67071 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/Arena.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/Arena.java @@ -51,4 +51,8 @@ public class Arena { public int capacity() { return top; } + + public long byteCapacity() { + return memoryBlock.size(); + } } diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/embed/EnvironmentStorage.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/embed/EnvironmentStorage.java index 942fc3623..2b707a3b8 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/embed/EnvironmentStorage.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/embed/EnvironmentStorage.java @@ -16,7 +16,8 @@ public class EnvironmentStorage { public final Arena arena = new Arena(MATRIX_SIZE_BYTES, 32); { - arena.alloc(); // Reserve the identity matrix. + // Reserve the identity matrix. Burns a few bytes but oh well. + arena.alloc(); } public void track(EmbeddedEnvironment environment) { diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectCullingGroup.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectCullingGroup.java index 5afa7c46d..9a376ab14 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectCullingGroup.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectCullingGroup.java @@ -5,7 +5,6 @@ import static org.lwjgl.opengl.GL11.GL_UNSIGNED_INT; import static org.lwjgl.opengl.GL30.glUniform1ui; import static org.lwjgl.opengl.GL42.GL_COMMAND_BARRIER_BIT; import static org.lwjgl.opengl.GL42.glMemoryBarrier; -import static org.lwjgl.opengl.GL43.GL_SHADER_STORAGE_BARRIER_BIT; import static org.lwjgl.opengl.GL43.glDispatchCompute; import java.util.ArrayList; @@ -37,8 +36,6 @@ public class IndirectCullingGroup { .thenComparing(IndirectDraw::indexOfMeshInModel) .thenComparing(IndirectDraw::material, MaterialRenderState.COMPARATOR); - private static final int DRAW_BARRIER_BITS = GL_SHADER_STORAGE_BARRIER_BIT | GL_COMMAND_BARRIER_BIT; - private final InstanceType instanceType; private final long instanceStride; private final IndirectBuffers buffers; @@ -48,7 +45,6 @@ public class IndirectCullingGroup { private final IndirectPrograms programs; private final GlProgram cullProgram; - private final GlProgram applyProgram; private boolean needsDrawBarrier; private boolean needsDrawSort; @@ -62,7 +58,6 @@ public class IndirectCullingGroup { this.programs = programs; cullProgram = programs.getCullingProgram(instanceType); - applyProgram = programs.getApplyProgram(); } public void flushInstancers() { @@ -123,7 +118,6 @@ public class IndirectCullingGroup { cullProgram.bind(); buffers.bindForCompute(); - glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT); glDispatchCompute(GlCompat.getComputeGroupCount(instanceCountThisFrame), 1, 1); } @@ -132,9 +126,7 @@ public class IndirectCullingGroup { return; } - applyProgram.bind(); buffers.bindForCompute(); - glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT); glDispatchCompute(GlCompat.getComputeGroupCount(indirectDraws.size()), 1, 1); } @@ -241,7 +233,9 @@ public class IndirectCullingGroup { private void drawBarrier() { if (needsDrawBarrier) { - glMemoryBarrier(DRAW_BARRIER_BITS); + // In theory all command buffer writes will be protected by + // the shader storage barrier bit, but better safe than sorry. + glMemoryBarrier(GL_COMMAND_BARRIER_BIT); needsDrawBarrier = false; } } diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectDrawManager.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectDrawManager.java index 073850193..adabbf653 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectDrawManager.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectDrawManager.java @@ -4,6 +4,8 @@ import static org.lwjgl.opengl.GL11.GL_TRIANGLES; import static org.lwjgl.opengl.GL11.GL_UNSIGNED_INT; import static org.lwjgl.opengl.GL30.glBindBufferRange; import static org.lwjgl.opengl.GL40.glDrawElementsIndirect; +import static org.lwjgl.opengl.GL42.glMemoryBarrier; +import static org.lwjgl.opengl.GL43.GL_SHADER_STORAGE_BARRIER_BIT; import static org.lwjgl.opengl.GL43.GL_SHADER_STORAGE_BUFFER; import java.util.HashMap; @@ -44,6 +46,8 @@ public class IndirectDrawManager extends DrawManager> { private final LightBuffers lightBuffers; private final MatrixBuffer matrixBuffer; + private boolean needsBarrier = false; + public IndirectDrawManager(IndirectPrograms programs) { this.programs = programs; programs.acquire(); @@ -90,6 +94,11 @@ public class IndirectDrawManager extends DrawManager> { matrixBuffer.bind(); Uniforms.bindAll(); + if (needsBarrier) { + glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT); + needsBarrier = false; + } + for (var group : cullingGroups.values()) { group.submit(visualType); } @@ -127,13 +136,27 @@ public class IndirectDrawManager extends DrawManager> { stagingBuffer.flush(); + // We could probably save some driver calls here when there are + // actually zero instances, but that feels like a very rare case + + glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT); + + matrixBuffer.bind(); + for (var group : cullingGroups.values()) { group.dispatchCull(); } + glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT); + + programs.getApplyProgram() + .bind(); + for (var group : cullingGroups.values()) { group.dispatchApply(); } + + needsBarrier = true; } @Override diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/MatrixBuffer.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/MatrixBuffer.java index 3e17d386d..ce579c465 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/MatrixBuffer.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/MatrixBuffer.java @@ -18,8 +18,8 @@ public class MatrixBuffer { matrices.ensureCapacity(capacity); - stagingBuffer.enqueueCopy((long) arena.capacity() * EnvironmentStorage.MATRIX_SIZE_BYTES, matrices.handle(), 0, ptr -> { - MemoryUtil.memCopy(arena.indexToPointer(0), ptr, (long) arena.capacity() * EnvironmentStorage.MATRIX_SIZE_BYTES); + stagingBuffer.enqueueCopy(arena.byteCapacity(), matrices.handle(), 0, ptr -> { + MemoryUtil.memCopy(arena.indexToPointer(0), ptr, arena.byteCapacity()); }); } diff --git a/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/cull.glsl b/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/cull.glsl index 58a893cbc..65d5baae0 100644 --- a/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/cull.glsl +++ b/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/cull.glsl @@ -36,8 +36,8 @@ bool _flw_testSphere(vec3 center, float radius) { } bool _flw_isVisible(uint instanceIndex, uint modelIndex) { - BoundingSphere sphere = _flw_models[modelIndex].boundingSphere; uint matrixIndex = _flw_models[modelIndex].matrixIndex; + BoundingSphere sphere = _flw_models[modelIndex].boundingSphere; vec3 center; float radius; From 1a8ed8db2831c73d0676990214abbdf92bb738c3 Mon Sep 17 00:00:00 2001 From: Jozufozu Date: Thu, 15 Aug 2024 21:45:14 -0700 Subject: [PATCH 6/6] Keeping our priorities straight - Implement backend priority system - Give indirect priority 1000 and instancing 500 - Generate the sorted list of backends on demand in case one changes priority at runtime --- .../flywheel/api/backend/Backend.java | 10 +++- .../flywheel/backend/Backends.java | 3 +- .../flywheel/lib/backend/SimpleBackend.java | 26 ++++------- .../flywheel/impl/BackendManagerImpl.java | 46 +++++++++++++++---- 4 files changed, 56 insertions(+), 29 deletions(-) diff --git a/common/src/api/java/dev/engine_room/flywheel/api/backend/Backend.java b/common/src/api/java/dev/engine_room/flywheel/api/backend/Backend.java index ca63659c7..6cc6228f9 100644 --- a/common/src/api/java/dev/engine_room/flywheel/api/backend/Backend.java +++ b/common/src/api/java/dev/engine_room/flywheel/api/backend/Backend.java @@ -14,9 +14,15 @@ public interface Backend { Engine createEngine(LevelAccessor level); /** - * Get a fallback backend in case this backend is not supported. + * The priority of this backend. + *

The backend with the highest priority upon first launch will be chosen as the default backend. + * + *

If the selected backend becomes unavailable for whatever reason, the next supported backend + * with a LOWER priority than the selected one will be chosen. + * + * @return The priority of this backend. */ - Backend findFallback(); + int priority(); /** * Check if this backend is supported. diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/Backends.java b/common/src/backend/java/dev/engine_room/flywheel/backend/Backends.java index 59e255cd5..81cdf2408 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/Backends.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/Backends.java @@ -17,6 +17,7 @@ public final class Backends { */ public static final Backend INSTANCING = SimpleBackend.builder() .engineFactory(level -> new EngineImpl(level, new InstancedDrawManager(InstancingPrograms.get()), 256)) + .priority(500) .supported(() -> GlCompat.SUPPORTS_INSTANCING && InstancingPrograms.allLoaded() && !ShadersModHandler.isShaderPackInUse()) .register(Flywheel.rl("instancing")); @@ -25,7 +26,7 @@ public final class Backends { */ public static final Backend INDIRECT = SimpleBackend.builder() .engineFactory(level -> new EngineImpl(level, new IndirectDrawManager(IndirectPrograms.get()), 256)) - .fallback(() -> Backends.INSTANCING) + .priority(1000) .supported(() -> GlCompat.SUPPORTS_INDIRECT && IndirectPrograms.allLoaded() && !ShadersModHandler.isShaderPackInUse()) .register(Flywheel.rl("indirect")); diff --git a/common/src/lib/java/dev/engine_room/flywheel/lib/backend/SimpleBackend.java b/common/src/lib/java/dev/engine_room/flywheel/lib/backend/SimpleBackend.java index cdec1433f..bc09227c1 100644 --- a/common/src/lib/java/dev/engine_room/flywheel/lib/backend/SimpleBackend.java +++ b/common/src/lib/java/dev/engine_room/flywheel/lib/backend/SimpleBackend.java @@ -3,22 +3,20 @@ package dev.engine_room.flywheel.lib.backend; import java.util.Objects; import java.util.function.BooleanSupplier; import java.util.function.Function; -import java.util.function.Supplier; import dev.engine_room.flywheel.api.backend.Backend; -import dev.engine_room.flywheel.api.backend.BackendManager; import dev.engine_room.flywheel.api.backend.Engine; import net.minecraft.resources.ResourceLocation; import net.minecraft.world.level.LevelAccessor; public final class SimpleBackend implements Backend { private final Function engineFactory; - private final Supplier fallback; + private final int priority; private final BooleanSupplier isSupported; - public SimpleBackend(Function engineFactory, Supplier fallback, BooleanSupplier isSupported) { + public SimpleBackend(int priority, Function engineFactory, BooleanSupplier isSupported) { + this.priority = priority; this.engineFactory = engineFactory; - this.fallback = fallback; this.isSupported = isSupported; } @@ -32,13 +30,8 @@ public final class SimpleBackend implements Backend { } @Override - public Backend findFallback() { - if (isSupported()) { - return this; - } else { - return fallback.get() - .findFallback(); - } + public int priority() { + return priority; } @Override @@ -48,7 +41,7 @@ public final class SimpleBackend implements Backend { public static final class Builder { private Function engineFactory; - private Supplier fallback = BackendManager::offBackend; + private int priority = 0; private BooleanSupplier isSupported; public Builder engineFactory(Function engineFactory) { @@ -56,8 +49,8 @@ public final class SimpleBackend implements Backend { return this; } - public Builder fallback(Supplier fallback) { - this.fallback = fallback; + public Builder priority(int priority) { + this.priority = priority; return this; } @@ -68,10 +61,9 @@ public final class SimpleBackend implements Backend { public Backend register(ResourceLocation id) { Objects.requireNonNull(engineFactory); - Objects.requireNonNull(fallback); Objects.requireNonNull(isSupported); - return Backend.REGISTRY.registerAndGet(id, new SimpleBackend(engineFactory, fallback, isSupported)); + return Backend.REGISTRY.registerAndGet(id, new SimpleBackend(priority, engineFactory, isSupported)); } } } diff --git a/common/src/main/java/dev/engine_room/flywheel/impl/BackendManagerImpl.java b/common/src/main/java/dev/engine_room/flywheel/impl/BackendManagerImpl.java index bd2ae4a78..75133adeb 100644 --- a/common/src/main/java/dev/engine_room/flywheel/impl/BackendManagerImpl.java +++ b/common/src/main/java/dev/engine_room/flywheel/impl/BackendManagerImpl.java @@ -1,8 +1,9 @@ package dev.engine_room.flywheel.impl; +import java.util.ArrayList; + import dev.engine_room.flywheel.api.Flywheel; import dev.engine_room.flywheel.api.backend.Backend; -import dev.engine_room.flywheel.backend.Backends; import dev.engine_room.flywheel.impl.visualization.VisualizationManagerImpl; import dev.engine_room.flywheel.lib.backend.SimpleBackend; import net.minecraft.client.multiplayer.ClientLevel; @@ -31,21 +32,48 @@ public final class BackendManagerImpl { return backend != OFF_BACKEND; } + // Don't store this statically because backends can theoretically change their priorities at runtime. + private static ArrayList backendsByPriority() { + var backends = new ArrayList<>(Backend.REGISTRY.getAll()); + + // Sort with keys backwards so that the highest priority is first. + backends.sort((a, b) -> Integer.compare(b.priority(), a.priority())); + return backends; + } + private static Backend findDefaultBackend() { - // TODO: Automatically select the best default config based on the user's driver - // TODO: Figure out how this will work if custom backends are registered and without hardcoding the default backends - return Backends.INDIRECT; + var backendsByPriority = backendsByPriority(); + if (backendsByPriority.isEmpty()) { + // This probably shouldn't happen, but fail gracefully. + FlwImpl.LOGGER.warn("No backends registered, defaulting to 'flywheel:off'"); + return OFF_BACKEND; + } + + return backendsByPriority.get(0); } private static void chooseBackend() { var preferred = FlwConfig.INSTANCE.backend(); - var actual = preferred.findFallback(); - - if (preferred != actual) { - FlwImpl.LOGGER.warn("Flywheel backend fell back from '{}' to '{}'", Backend.REGISTRY.getIdOrThrow(preferred), Backend.REGISTRY.getIdOrThrow(actual)); + if (preferred.isSupported()) { + backend = preferred; + return; } - backend = actual; + var backendsByPriority = backendsByPriority(); + + var startIndex = backendsByPriority.indexOf(preferred) + 1; + + // For safety in case we don't find anything + backend = OFF_BACKEND; + for (int i = startIndex; i < backendsByPriority.size(); i++) { + var candidate = backendsByPriority.get(i); + if (candidate.isSupported()) { + backend = candidate; + break; + } + } + + FlwImpl.LOGGER.warn("Flywheel backend fell back from '{}' to '{}'", Backend.REGISTRY.getIdOrThrow(preferred), Backend.REGISTRY.getIdOrThrow(backend)); } public static String getBackendString() {