Wave goodbye to order

- Implement wavelet OIT
- Needed to do the same normalization step as in MBOIT but that's not
  described in the blog post I followed
- Use an expensive pseudo blue noise function to slightly correct
  banding artifacts
This commit is contained in:
Jozufozu 2025-02-17 23:16:39 -08:00
parent 4eba43b2d6
commit 3f2086e588
10 changed files with 386 additions and 1391 deletions

View file

@ -11,7 +11,6 @@ public class Samplers {
public static final GlTextureUnit LIGHT_LUT = GlTextureUnit.T5;
public static final GlTextureUnit LIGHT_SECTIONS = GlTextureUnit.T6;
public static final GlTextureUnit ZEROTH_MOMENT = GlTextureUnit.T7;
public static final GlTextureUnit MOMENTS0 = GlTextureUnit.T8;
public static final GlTextureUnit MOMENTS1 = GlTextureUnit.T9;
public static final GlTextureUnit DEPTH_RANGE = GlTextureUnit.T7;
public static final GlTextureUnit COEFFICIENTS = GlTextureUnit.T8;
}

View file

@ -230,8 +230,9 @@ public final class PipelineCompiler {
public enum OitMode {
OFF("", ""),
GENERATE("_FLW_GENERATE_MOMENTS", "_generate"),
RESOLVE("_FLW_RESOLVE_MOMENTS", "_resolve"),
DEPTH_RANGE("_FLW_DEPTH_RANGE", "_depth_range"),
GENERATE_COEFFICIENTS("_FLW_COLLECT_COEFFS", "_generate_coefficients"),
EVALUATE("_FLW_EVALUATE", "_resolve"),
;
public final String define;

View file

@ -49,7 +49,7 @@ public class IndirectDrawManager extends DrawManager<IndirectInstancer<?>> {
private final DepthPyramid depthPyramid;
private final MboitFramebuffer wboitFrameBuffer;
private final OitFramebuffer wboitFrameBuffer;
public IndirectDrawManager(IndirectPrograms programs) {
this.programs = programs;
@ -66,7 +66,7 @@ public class IndirectDrawManager extends DrawManager<IndirectInstancer<?>> {
depthPyramid = new DepthPyramid(programs);
wboitFrameBuffer = new MboitFramebuffer(programs);
wboitFrameBuffer = new OitFramebuffer(programs);
}
@Override
@ -146,16 +146,26 @@ public class IndirectDrawManager extends DrawManager<IndirectInstancer<?>> {
group.submitSolid();
}
wboitFrameBuffer.generateMoments();
wboitFrameBuffer.depthRange();
for (var group : cullingGroups.values()) {
group.submitTransparent(PipelineCompiler.OitMode.GENERATE);
group.submitTransparent(PipelineCompiler.OitMode.DEPTH_RANGE);
}
wboitFrameBuffer.resolveMoments();
wboitFrameBuffer.renderTransmittance();
for (var group : cullingGroups.values()) {
group.submitTransparent(PipelineCompiler.OitMode.RESOLVE);
group.submitTransparent(PipelineCompiler.OitMode.GENERATE_COEFFICIENTS);
}
// wboitFrameBuffer.adjustBackgroundForTotalTransmittance();
// vertexArray.bindForDraw();
wboitFrameBuffer.shade();
for (var group : cullingGroups.values()) {
group.submitTransparent(PipelineCompiler.OitMode.EVALUATE);
}
wboitFrameBuffer.composite();

View file

@ -10,29 +10,27 @@ import dev.engine_room.flywheel.backend.Samplers;
import dev.engine_room.flywheel.backend.compile.IndirectPrograms;
import dev.engine_room.flywheel.backend.gl.GlTextureUnit;
import net.minecraft.client.Minecraft;
import net.minecraft.util.Mth;
public class MboitFramebuffer {
public class OitFramebuffer {
public final int fbo;
private final IndirectPrograms programs;
private final int vao;
public int zerothMoment;
public int moments0;
public int moments1;
public int depthBounds;
public int coefficients;
public int accumulate;
private int lastWidth = -1;
private int lastHeight = -1;
public MboitFramebuffer(IndirectPrograms programs) {
public OitFramebuffer(IndirectPrograms programs) {
this.programs = programs;
fbo = GL46.glCreateFramebuffers();
vao = GL46.glCreateVertexArrays();
}
public void generateMoments() {
public void depthRange() {
var mainRenderTarget = Minecraft.getInstance()
.getMainRenderTarget();
@ -42,36 +40,55 @@ public class MboitFramebuffer {
RenderSystem.depthMask(false);
RenderSystem.enableBlend();
RenderSystem.blendFunc(GlStateManager.SourceFactor.ONE, GlStateManager.DestFactor.ONE);
RenderSystem.blendEquation(GL46.GL_FUNC_ADD);
RenderSystem.blendEquation(GL46.GL_MAX);
GL46.glNamedFramebufferTexture(fbo, GL46.GL_DEPTH_ATTACHMENT, mainRenderTarget.getDepthTextureId(), 0);
GL46.glNamedFramebufferDrawBuffers(fbo, new int[]{GL46.GL_COLOR_ATTACHMENT0, GL46.GL_COLOR_ATTACHMENT1, GL46.GL_COLOR_ATTACHMENT2});
GL46.glNamedFramebufferDrawBuffers(fbo, new int[]{GL46.GL_COLOR_ATTACHMENT0});
GL46.glClearNamedFramebufferfv(fbo, GL46.GL_COLOR, 0, new float[]{0, 0, 0, 0});
GL46.glClearNamedFramebufferfv(fbo, GL46.GL_COLOR, 1, new float[]{0, 0, 0, 0});
GL46.glClearNamedFramebufferfv(fbo, GL46.GL_COLOR, 2, new float[]{0, 0, 0, 0});
var far = Minecraft.getInstance().gameRenderer.getDepthFar();
GL46.glClearNamedFramebufferfv(fbo, GL46.GL_COLOR, 0, new float[]{-far, -far, 0, 0});
GlStateManager._glBindFramebuffer(GL46.GL_FRAMEBUFFER, fbo);
}
public void resolveMoments() {
public void renderTransmittance() {
// No depth writes, but we'll still use the depth test
RenderSystem.depthMask(false);
RenderSystem.enableBlend();
RenderSystem.blendFunc(GlStateManager.SourceFactor.ONE, GlStateManager.DestFactor.ONE);
RenderSystem.blendEquation(GL46.GL_FUNC_ADD);
Samplers.ZEROTH_MOMENT.makeActive();
GlStateManager._bindTexture(zerothMoment);
Samplers.DEPTH_RANGE.makeActive();
GlStateManager._bindTexture(depthBounds);
Samplers.MOMENTS0.makeActive();
GlStateManager._bindTexture(moments0);
GL46.glNamedFramebufferDrawBuffers(fbo, new int[]{GL46.GL_COLOR_ATTACHMENT1, GL46.GL_COLOR_ATTACHMENT2, GL46.GL_COLOR_ATTACHMENT3, GL46.GL_COLOR_ATTACHMENT4});
Samplers.MOMENTS1.makeActive();
GlStateManager._bindTexture(moments1);
GL46.glClearNamedFramebufferfv(fbo, GL46.GL_COLOR, 0, new float[]{0, 0, 0, 0});
GL46.glClearNamedFramebufferfv(fbo, GL46.GL_COLOR, 1, new float[]{0, 0, 0, 0});
GL46.glClearNamedFramebufferfv(fbo, GL46.GL_COLOR, 2, new float[]{0, 0, 0, 0});
GL46.glClearNamedFramebufferfv(fbo, GL46.GL_COLOR, 3, new float[]{0, 0, 0, 0});
GL46.glNamedFramebufferDrawBuffers(fbo, new int[]{GL46.GL_COLOR_ATTACHMENT3});
GlStateManager._glBindFramebuffer(GL46.GL_FRAMEBUFFER, fbo);
}
public void shade() {
// No depth writes, but we'll still use the depth test
RenderSystem.depthMask(false);
RenderSystem.enableBlend();
RenderSystem.blendFunc(GlStateManager.SourceFactor.ONE, GlStateManager.DestFactor.ONE);
RenderSystem.blendEquation(GL46.GL_FUNC_ADD);
Samplers.DEPTH_RANGE.makeActive();
GlStateManager._bindTexture(depthBounds);
Samplers.COEFFICIENTS.makeActive();
GlStateManager._bindTexture(0);
GL46.glBindTextureUnit(Samplers.COEFFICIENTS.number, coefficients);
GL46.glNamedFramebufferDrawBuffers(fbo, new int[]{GL46.GL_COLOR_ATTACHMENT5});
GL46.glClearNamedFramebufferfv(fbo, GL46.GL_COLOR, 0, new float[]{0, 0, 0, 0});
@ -79,26 +96,27 @@ public class MboitFramebuffer {
}
public void composite() {
// No depth writes, but we'll still use the depth test
RenderSystem.depthMask(false);
RenderSystem.enableBlend();
RenderSystem.blendFunc(GlStateManager.SourceFactor.ONE_MINUS_SRC_ALPHA, GlStateManager.DestFactor.SRC_ALPHA);
RenderSystem.blendEquation(GL46.GL_FUNC_ADD);
var mainRenderTarget = Minecraft.getInstance()
.getMainRenderTarget();
mainRenderTarget.bindWrite(false);
var oitCompositeProgram = programs.getOitCompositeProgram();
GlStateManager._depthMask(false);
GlStateManager._depthFunc(GL46.GL_ALWAYS);
GlStateManager._enableBlend();
RenderSystem.blendFunc(GlStateManager.SourceFactor.ONE_MINUS_SRC_ALPHA, GlStateManager.DestFactor.SRC_ALPHA);
oitCompositeProgram.bind();
GlTextureUnit.T0.makeActive();
GlStateManager._bindTexture(zerothMoment);
GlStateManager._bindTexture(0);
GL46.glBindTextureUnit(0, coefficients);
GlTextureUnit.T1.makeActive();
GlStateManager._bindTexture(accumulate);
programs.getOitCompositeProgram()
.bind();
// Empty VAO, the actual full screen triangle is generated in the vertex shader
GlStateManager._glBindVertexArray(vao);
@ -112,9 +130,8 @@ public class MboitFramebuffer {
}
private void deleteTextures() {
GL46.glDeleteTextures(zerothMoment);
GL46.glDeleteTextures(moments0);
GL46.glDeleteTextures(moments1);
GL46.glDeleteTextures(depthBounds);
GL46.glDeleteTextures(coefficients);
GL46.glDeleteTextures(accumulate);
}
@ -128,14 +145,12 @@ public class MboitFramebuffer {
deleteTextures();
zerothMoment = GL46.glCreateTextures(GL46.GL_TEXTURE_2D);
moments0 = GL46.glCreateTextures(GL46.GL_TEXTURE_2D);
moments1 = GL46.glCreateTextures(GL46.GL_TEXTURE_2D);
depthBounds = GL46.glCreateTextures(GL46.GL_TEXTURE_2D);
coefficients = GL46.glCreateTextures(GL46.GL_TEXTURE_2D_ARRAY);
accumulate = GL46.glCreateTextures(GL46.GL_TEXTURE_2D);
GL46.glTextureStorage2D(zerothMoment, 1, GL32.GL_R16F, width, height);
GL46.glTextureStorage2D(moments0, 1, GL32.GL_RGBA16F, width, height);
GL46.glTextureStorage2D(moments1, 1, GL32.GL_RGBA16F, width, height);
GL46.glTextureStorage2D(depthBounds, 1, GL32.GL_RG32F, width, height);
GL46.glTextureStorage3D(coefficients, 1, GL32.GL_RGBA16F, width, height, 4);
GL46.glTextureStorage2D(accumulate, 1, GL32.GL_RGBA16F, width, height);
@ -147,39 +162,11 @@ public class MboitFramebuffer {
// GL46.glTextureParameteri(tex, GL32.GL_TEXTURE_WRAP_T, GL32.GL_CLAMP_TO_EDGE);
// }
GL46.glNamedFramebufferTexture(fbo, GL46.GL_COLOR_ATTACHMENT0, zerothMoment, 0);
GL46.glNamedFramebufferTexture(fbo, GL46.GL_COLOR_ATTACHMENT1, moments0, 0);
GL46.glNamedFramebufferTexture(fbo, GL46.GL_COLOR_ATTACHMENT2, moments1, 0);
GL46.glNamedFramebufferTexture(fbo, GL46.GL_COLOR_ATTACHMENT3, accumulate, 0);
}
float circleToParameter(float angle) {
float x = Mth.cos(angle);
float y = Mth.sin(angle);
float result = Mth.abs(y) - Mth.abs(x);
result = (x < 0.0f) ? (2.0f - result) : result;
result = (y < 0.0f) ? (6.0f - result) : result;
result += (angle >= 2.0f * Mth.PI) ? 8.0f : 0.0f;
return result;
}
void computeWrappingZoneParameters(float[] out) {
computeWrappingZoneParameters(out, 0.1f * Mth.PI);
}
/*! Given an angle in radians providing the size of the wrapping zone, this
function computes all constants required by the shader.*/
void computeWrappingZoneParameters(float[] p_out_wrapping_zone_parameters, float new_wrapping_zone_angle) {
p_out_wrapping_zone_parameters[0] = new_wrapping_zone_angle;
p_out_wrapping_zone_parameters[1] = Mth.PI - 0.5f * new_wrapping_zone_angle;
if (new_wrapping_zone_angle <= 0.0f) {
p_out_wrapping_zone_parameters[2] = 0.0f;
p_out_wrapping_zone_parameters[3] = 0.0f;
} else {
float zone_end_parameter = 7;
float zone_begin_parameter = circleToParameter(2.0f * Mth.PI - new_wrapping_zone_angle);
p_out_wrapping_zone_parameters[2] = 1.0f / (zone_end_parameter - zone_begin_parameter);
p_out_wrapping_zone_parameters[3] = 1.0f - zone_end_parameter * p_out_wrapping_zone_parameters[2];
}
GL46.glNamedFramebufferTexture(fbo, GL46.GL_COLOR_ATTACHMENT0, depthBounds, 0);
GL46.glNamedFramebufferTextureLayer(fbo, GL46.GL_COLOR_ATTACHMENT1, coefficients, 0, 0);
GL46.glNamedFramebufferTextureLayer(fbo, GL46.GL_COLOR_ATTACHMENT2, coefficients, 0, 1);
GL46.glNamedFramebufferTextureLayer(fbo, GL46.GL_COLOR_ATTACHMENT3, coefficients, 0, 2);
GL46.glNamedFramebufferTextureLayer(fbo, GL46.GL_COLOR_ATTACHMENT4, coefficients, 0, 3);
GL46.glNamedFramebufferTexture(fbo, GL46.GL_COLOR_ATTACHMENT5, accumulate, 0);
}
}

View file

@ -1,7 +1,6 @@
#include "flywheel:internal/packed_material.glsl"
#include "flywheel:internal/diffuse.glsl"
#include "flywheel:internal/colorizer.glsl"
#include "flywheel:internal/mboit/moment_oit.glsl"
// optimize discard usage
#if defined(GL_ARB_conservative_depth) && defined(_FLW_USE_DISCARD)
@ -19,14 +18,222 @@ flat in uvec2 _flw_ids;
#endif
#ifdef _FLW_OIT
#ifdef _FLW_GENERATE_MOMENTS
layout (location = 0) out float _flw_zerothMoment_out;
layout (location = 1) out vec4 _flw_moments0_out;
layout (location = 2) out vec4 _flw_moments1_out;
#define TRANSPARENCY_WAVELET_RANK 3
#define TRANSPARENCY_WAVELET_COEFFICIENT_COUNT 16
#define floatN float
#define all(e) (e)
#define mad fma
#define lerp mix
#define Coefficients_Out vec4[4]
#define Coefficients_In sampler2DArray
layout (binding = 7) uniform sampler2D _flw_depthRange;
layout (binding = 8) uniform sampler2DArray _flw_coefficients;
#define REMOVE_SIGNAL true
#ifdef _FLW_DEPTH_RANGE
layout (location = 0) out vec2 _flw_depthRange_out;
#endif
#ifdef _FLW_RESOLVE_MOMENTS
layout (location = 0) out vec4 _flw_accumulate_out;
#ifdef _FLW_COLLECT_COEFFS
layout (location = 0) out vec4 _flw_coeffs0;
layout (location = 1) out vec4 _flw_coeffs1;
layout (location = 2) out vec4 _flw_coeffs2;
layout (location = 3) out vec4 _flw_coeffs3;
void add_to_index(inout Coefficients_Out coefficients, uint index, floatN addend) {
coefficients[index >> 2][index & 3u] = addend;
}
void add_event_to_wavelets(inout Coefficients_Out coefficients, floatN signal, float depth)
{
depth *= float(TRANSPARENCY_WAVELET_COEFFICIENT_COUNT-1) / TRANSPARENCY_WAVELET_COEFFICIENT_COUNT;
int index = clamp(int(floor(depth * TRANSPARENCY_WAVELET_COEFFICIENT_COUNT)), 0, TRANSPARENCY_WAVELET_COEFFICIENT_COUNT - 1);
index += TRANSPARENCY_WAVELET_COEFFICIENT_COUNT - 1;
for (int i = 0; i < (TRANSPARENCY_WAVELET_RANK+1); ++i)
{
int power = TRANSPARENCY_WAVELET_RANK - i;
int new_index = (index - 1) >> 1;
float k = float((new_index + 1) & ((1 << power) - 1));
int wavelet_sign = ((index & 1) << 1) - 1;
float wavelet_phase = ((index + 1) & 1) * exp2(-power);
floatN addend = mad(mad(-exp2(-power), k, depth), wavelet_sign, wavelet_phase) * exp2(power * 0.5) * signal;
add_to_index(coefficients, new_index, addend);
index = new_index;
}
floatN addend = mad(signal, -depth, signal);
add_to_index(coefficients, TRANSPARENCY_WAVELET_COEFFICIENT_COUNT - 1, addend);
}
void add_transmittance_event_to_wavelets(inout Coefficients_Out coefficients, floatN transmittance, float depth)
{
float absorbance = -log(max(transmittance, 0.00001));// transforming the signal from multiplicative transmittance to additive absorbance
add_event_to_wavelets(coefficients, absorbance, depth);
}
#endif
#ifdef _FLW_EVALUATE
layout (location = 0) out vec4 _flw_accumulate;
floatN get_coefficients(in Coefficients_In coefficients, uint index) {
return texelFetch(coefficients, ivec3(gl_FragCoord.xy, index >> 2), 0)[index & 3u];
}
floatN evaluate_wavelets(in Coefficients_In coefficients, float depth, floatN signal)
{
floatN scale_coefficient = get_coefficients(coefficients, TRANSPARENCY_WAVELET_COEFFICIENT_COUNT - 1);
if (all(scale_coefficient == 0))
{
return 0;
}
if (REMOVE_SIGNAL)
{
floatN scale_coefficient_addend = mad(signal, -depth, signal);
scale_coefficient -= scale_coefficient_addend;
}
depth *= float(TRANSPARENCY_WAVELET_COEFFICIENT_COUNT-1) / TRANSPARENCY_WAVELET_COEFFICIENT_COUNT;
float coefficient_depth = depth * TRANSPARENCY_WAVELET_COEFFICIENT_COUNT;
int index_b = clamp(int(floor(coefficient_depth)), 0, TRANSPARENCY_WAVELET_COEFFICIENT_COUNT - 1);
bool sample_a = index_b >= 1;
int index_a = sample_a ? (index_b - 1) : index_b;
index_b += TRANSPARENCY_WAVELET_COEFFICIENT_COUNT - 1;
index_a += TRANSPARENCY_WAVELET_COEFFICIENT_COUNT - 1;
floatN b = scale_coefficient;
floatN a = sample_a ? scale_coefficient : 0;
for (int i = 0; i < (TRANSPARENCY_WAVELET_RANK+1); ++i)
{
int power = TRANSPARENCY_WAVELET_RANK - i;
int new_index_b = (index_b - 1) >> 1;
int wavelet_sign_b = ((index_b & 1) << 1) - 1;
floatN coeff_b = get_coefficients(coefficients, new_index_b);
if (REMOVE_SIGNAL)
{
float wavelet_phase_b = ((index_b + 1) & 1) * exp2(-power);
float k = float((new_index_b + 1) & ((1 << power) - 1));
floatN addend = mad(mad(-exp2(-power), k, depth), wavelet_sign_b, wavelet_phase_b) * exp2(power * 0.5) * signal;
coeff_b -= addend;
}
b -= exp2(float(power) * 0.5) * coeff_b * wavelet_sign_b;
index_b = new_index_b;
if (sample_a)
{
int new_index_a = (index_a - 1) >> 1;
int wavelet_sign_a = ((index_a & 1) << 1) - 1;
floatN coeff_a = (new_index_a == new_index_b) ? coeff_b : get_coefficients(coefficients, new_index_a);// No addend here on purpose, the original signal didn't contribute to this coefficient
a -= exp2(float(power) * 0.5) * coeff_a * wavelet_sign_a;
index_a = new_index_a;
}
}
float t = coefficient_depth >= TRANSPARENCY_WAVELET_COEFFICIENT_COUNT ? 1.0 : fract(coefficient_depth);
return lerp(a, b, t);
}
floatN evaluate_transmittance_wavelets(in Coefficients_In coefficients, float depth, floatN signal)
{
floatN absorbance = evaluate_wavelets(coefficients, depth, signal);
return clamp(exp(-absorbance), 0., 1.);// undoing the transformation from absorbance back to transmittance
}
#endif
// TODO: blue noise texture
uint HilbertIndex(uvec2 p) {
uint i = 0u;
for (uint l = 0x4000u; l > 0u; l >>= 1u) {
uvec2 r = min(p & l, 1u);
i = (i << 2u) | ((r.x * 3u) ^ r.y);
p = r.y == 0u ? (0x7FFFu * r.x) ^ p.yx : p;
}
return i;
}
uint ReverseBits(uint x) {
x = ((x & 0xaaaaaaaau) >> 1) | ((x & 0x55555555u) << 1);
x = ((x & 0xccccccccu) >> 2) | ((x & 0x33333333u) << 2);
x = ((x & 0xf0f0f0f0u) >> 4) | ((x & 0x0f0f0f0fu) << 4);
x = ((x & 0xff00ff00u) >> 8) | ((x & 0x00ff00ffu) << 8);
return (x >> 16) | (x << 16);
}
// from: https://psychopath.io/post/2021_01_30_building_a_better_lk_hash
uint OwenHash(uint x, uint seed) { // seed is any random number
x ^= x * 0x3d20adeau;
x += seed;
x *= (seed >> 16) | 1u;
x ^= x * 0x05526c56u;
x ^= x * 0x53a22864u;
return x;
}
// https://www.shadertoy.com/view/ssBBW1
float blue() {
uint m = HilbertIndex(uvec2(gl_FragCoord.xy));// map pixel coords to hilbert curve index
m = OwenHash(ReverseBits(m), 0xe7843fbfu);// owen-scramble hilbert index
m = OwenHash(ReverseBits(m), 0x8d8fb1e0u);// map hilbert index to sobol sequence and owen-scramble
float mask = float(ReverseBits(m)) / 4294967296.0;// convert to float
return mask;
}
uniform vec3 _flw_depthAdjust;
float adjust_depth(float normalizedDepth) {
float tentIn = abs(normalizedDepth * 2. - 1);
float tentIn2 = tentIn * tentIn;
float tentIn4 = tentIn2 * tentIn2;
float tent = 1 - (tentIn2 * tentIn4);
float b = blue();
return normalizedDepth - b * tent * 0.08;
}
float linearize_depth(float d, float zNear, float zFar) {
float z_n = 2.0 * d - 1.0;
return 2.0 * zNear * zFar / (zFar + zNear - z_n * (zFar - zNear));
}
float linear_depth() {
return linearize_depth(gl_FragCoord.z, _flw_cullData.znear, _flw_cullData.zfar);
}
float depth() {
float linearDepth = linear_depth();
vec2 depthRange = texelFetch(_flw_depthRange, ivec2(gl_FragCoord.xy), 0).rg;
float depth = (linearDepth + depthRange.x) / (depthRange.x + depthRange.y);
return adjust_depth(depth);
}
#else
out vec4 _flw_outputColor;
@ -47,11 +254,6 @@ float _flw_diffuseFactor() {
}
}
float linearize_depth(float d, float zNear, float zFar) {
float z_n = 2.0 * d - 1.0;
return 2.0 * zNear * zFar / (zFar + zNear - z_n * (zFar - zNear));
}
void _flw_main() {
flw_sampleColor = texture(flw_diffuseTex, flw_vertexTexCoord);
flw_fragColor = flw_vertexColor * flw_sampleColor;
@ -121,39 +323,41 @@ void _flw_main() {
color = flw_fogFilter(color);
#ifdef _FLW_OIT
float linearDepth = linearize_depth(gl_FragCoord.z, _flw_cullData.znear, _flw_cullData.zfar);
float lnNear = log(_flw_cullData.znear);
float lnFar = log(_flw_cullData.zfar);
#ifdef _FLW_DEPTH_RANGE
float linearDepth = linear_depth();
float depth = (log(linearDepth) - lnNear);
// Pad the depth by some unbalanced epsilons because minecraft has a lot of single-quad tranparency.
// The unbalance means our fragment will be considered closer to the screen in the normalization,
// which helps prevent unnecessary noise as it'll be closer to the edge of our tent function.
_flw_depthRange_out = vec2(-linearDepth + 1e-5, linearDepth + 1e-2);
#endif
depth /= lnFar - lnNear;
#ifdef _FLW_COLLECT_COEFFS
depth = clamp(depth * 2. - 1., -1., 1.);
Coefficients_Out result;
result[0] = vec4(0.);
result[1] = vec4(0.);
result[2] = vec4(0.);
result[3] = vec4(0.);
#ifdef _FLW_GENERATE_MOMENTS
add_transmittance_event_to_wavelets(result, 1. - color.a, depth());
generateMoments(depth, 1 - color.a, _flw_zerothMoment_out, _flw_moments0_out, _flw_moments1_out);
_flw_coeffs0 = result[0];
_flw_coeffs1 = result[1];
_flw_coeffs2 = result[2];
_flw_coeffs3 = result[3];
#endif
#ifdef _FLW_RESOLVE_MOMENTS
float tt;
float td;
resolveMoments(td, tt, depth, gl_FragCoord.xy);
#ifdef _FLW_EVALUATE
if (abs(td) < 1e-5) {
discard;
}
floatN transmittance = evaluate_transmittance_wavelets(_flw_coefficients, depth(), 1. - color.a);
color.rgb *= color.a;
color *= td;
_flw_accumulate_out = color;
_flw_accumulate = vec4(color.rgb * color.a, color.a) * transmittance;
#endif
#else
_flw_outputColor = color;

View file

@ -1,20 +1,79 @@
layout (location = 0) out vec4 frag;
layout (binding = 0) uniform sampler2D zerothMoment;
layout (binding = 1) uniform sampler2D accumulate;
layout (binding = 0) uniform sampler2DArray _flw_coefficients;
layout (binding = 1) uniform sampler2D _flw_accumulate;
#define TRANSPARENCY_WAVELET_RANK 3
#define TRANSPARENCY_WAVELET_COEFFICIENT_COUNT 16
#define floatN float
#define all(e) (e)
#define mad fma
#define lerp mix
#define Coefficients_Out vec4[4]
#define Coefficients_In sampler2DArray
floatN get_coefficients(in Coefficients_In coefficients, uint index) {
return texelFetch(coefficients, ivec3(gl_FragCoord.xy, index >> 2), 0)[index & 3u];
}
floatN evaluate_wavelet_index(in Coefficients_In coefficients, int index)
{
floatN result = 0;
index += TRANSPARENCY_WAVELET_COEFFICIENT_COUNT - 1;
for (int i = 0; i < (TRANSPARENCY_WAVELET_RANK+1); ++i)
{
int power = TRANSPARENCY_WAVELET_RANK - i;
int new_index = (index - 1) >> 1;
floatN coeff = get_coefficients(coefficients, new_index);
int wavelet_sign = ((index & 1) << 1) - 1;
result -= exp2(float(power) * 0.5) * coeff * wavelet_sign;
index = new_index;
}
return result;
}
floatN evaluate_wavelets(in Coefficients_In coefficients, float depth)
{
floatN scale_coefficient = get_coefficients(coefficients, TRANSPARENCY_WAVELET_COEFFICIENT_COUNT - 1);
if (all(scale_coefficient == 0))
{
return 0;
}
depth *= float(TRANSPARENCY_WAVELET_COEFFICIENT_COUNT-1) / TRANSPARENCY_WAVELET_COEFFICIENT_COUNT;
float coefficient_depth = depth * TRANSPARENCY_WAVELET_COEFFICIENT_COUNT;
int index = clamp(int(floor(coefficient_depth)), 0, TRANSPARENCY_WAVELET_COEFFICIENT_COUNT - 1);
floatN a = 0;
floatN b = scale_coefficient + evaluate_wavelet_index(coefficients, index);
if (index > 0) { a = scale_coefficient + evaluate_wavelet_index(coefficients, index - 1); }
float t = coefficient_depth >= TRANSPARENCY_WAVELET_COEFFICIENT_COUNT ? 1.0 : fract(coefficient_depth);
floatN signal = lerp(a, b, t);// You can experiment here with different types of interpolation as well
return signal;
}
floatN evaluate_transmittance_wavelets(in Coefficients_In coefficients, float depth)
{
floatN absorbance = evaluate_wavelets(coefficients, depth);
return clamp(exp(-absorbance), 0., 1.);// undoing the transformation from absorbance back to transmittance
}
const float infinity = 1. / 0.;
void main() {
ivec2 coords = ivec2(gl_FragCoord.xy);
vec4 texel = texelFetch(_flw_accumulate, ivec2(gl_FragCoord.xy), 0);
float b0 = texelFetch(zerothMoment, coords, 0).r;
if (b0 < 1e-5) {
if (texel.a < 1e-5) {
discard;
}
vec4 accumulation = texelFetch(accumulate, coords, 0);
floatN total_transmittance = evaluate_transmittance_wavelets(_flw_coefficients, infinity);
vec3 normalizedAccumulation = accumulation.rgb / max(accumulation.a, 1e-5);
frag = vec4(normalizedAccumulation, exp(-b0));
frag = vec4(texel.rgb / texel.a, total_transmittance);
}

View file

@ -1,213 +0,0 @@
/*! \file
This header defines utility functions to deal with complex numbers and
complex polynomials.*/
void sincos(float theta, out float s, out float c) {
s = sin(theta);
c = cos(theta);
}
float saturate(float a) {
return clamp(a, 0., 1.);
}
/*! Returns the complex conjugate of the given complex number (i.e. it changes
the sign of the y-component).*/
vec2 Conjugate(vec2 Z){
return vec2(Z.x, -Z.y);
}
/*! This function implements complex multiplication.*/
vec2 Multiply(vec2 LHS, vec2 RHS){
return vec2(LHS.x*RHS.x-LHS.y*RHS.y, LHS.x*RHS.y+LHS.y*RHS.x);
}
/*! This function computes the magnitude of the given complex number.*/
float Magnitude(vec2 Z){
return sqrt(dot(Z, Z));
}
/*! This function computes the quotient of two complex numbers. The denominator
must not be zero.*/
vec2 Divide(vec2 Numerator, vec2 Denominator){
return vec2(Numerator.x*Denominator.x+Numerator.y*Denominator.y, -Numerator.x*Denominator.y+Numerator.y*Denominator.x)/dot(Denominator, Denominator);
}
/*! This function divides a real number by a complex number. The denominator
must not be zero.*/
vec2 Divide(float Numerator, vec2 Denominator){
return vec2(Numerator*Denominator.x, -Numerator*Denominator.y)/dot(Denominator, Denominator);
}
/*! This function implements computation of the reciprocal of the given non-
zero complex number.*/
vec2 Reciprocal(vec2 Z){
return vec2(Z.x, -Z.y)/dot(Z, Z);
}
/*! This utility function implements complex squaring.*/
vec2 Square(vec2 Z){
return vec2(Z.x*Z.x-Z.y*Z.y, 2.0f*Z.x*Z.y);
}
/*! This utility function implements complex computation of the third power.*/
vec2 Cube(vec2 Z){
return Multiply(Square(Z), Z);
}
/*! This utility function computes one square root of the given complex value.
The other one can be found using the unary minus operator.
\warning This function is continuous but not defined on the negative real
axis (and cannot be continued continuously there).
\sa SquareRoot() */
vec2 SquareRootUnsafe(vec2 Z){
float ZLengthSq=dot(Z, Z);
float ZLengthInv=inversesqrt(ZLengthSq);
vec2 UnnormalizedRoot=Z*ZLengthInv+vec2(1.0f, 0.0f);
float UnnormalizedRootLengthSq=dot(UnnormalizedRoot, UnnormalizedRoot);
float NormalizationFactorInvSq=UnnormalizedRootLengthSq*ZLengthInv;
float NormalizationFactor=inversesqrt(NormalizationFactorInvSq);
return NormalizationFactor*UnnormalizedRoot;
}
/*! This utility function computes one square root of the given complex value.
The other one can be found using the unary minus operator.
\note This function has discontinuities for values with real part zero.
\sa SquareRootUnsafe() */
vec2 SquareRoot(vec2 Z){
vec2 ZPositiveRealPart=vec2(abs(Z.x), Z.y);
vec2 ComputedRoot=SquareRootUnsafe(ZPositiveRealPart);
return (Z.x>=0.0)?ComputedRoot:ComputedRoot.yx;
}
/*! This utility function computes one cubic root of the given complex value. The
other roots can be found by multiplication by cubic roots of unity.
\note This function has various discontinuities.*/
vec2 CubicRoot(vec2 Z){
float Argument=atan(Z.y, Z.x);
float NewArgument=Argument/3.0f;
vec2 NormalizedRoot;
sincos(NewArgument, NormalizedRoot.y, NormalizedRoot.x);
return NormalizedRoot*pow(dot(Z, Z), 1.0f/6.0f);
}
/*! @{
Returns the complex conjugate of the given complex vector (i.e. it changes the
second column resp the y-component).*/
mat2x2 Conjugate(mat2x2 Vector){
return mat2x2(Vector[0].x, -Vector[0].y, Vector[1].x, -Vector[1].y);
}
mat3x2 Conjugate(mat3x2 Vector){
return mat3x2(Vector[0].x, -Vector[0].y, Vector[1].x, -Vector[1].y, Vector[2].x, -Vector[2].y);
}
mat4x2 Conjugate(mat4x2 Vector){
return mat4x2(Vector[0].x, -Vector[0].y, Vector[1].x, -Vector[1].y, Vector[2].x, -Vector[2].y, Vector[3].x, -Vector[3].y);
}
void Conjugate(out vec2 OutConjugateVector[5], vec2 Vector[5]){
for (int i=0;i!=5;++i){
OutConjugateVector[i]=vec2(Vector[i].x, -Vector[i].x);
}
}
//!@}
/*! Returns the real part of a complex number as real.*/
float RealPart(vec2 Z){
return Z.x;
}
/*! Given coefficients of a quadratic polynomial A*x^2+B*x+C, this function
outputs its two complex roots.*/
void SolveQuadratic(out vec2 pOutRoot[2], vec2 A, vec2 B, vec2 C)
{
// Normalize the coefficients
vec2 InvA=Reciprocal(A);
B=Multiply(B, InvA);
C=Multiply(C, InvA);
// Divide the middle coefficient by two
B*=0.5f;
// Apply the quadratic formula
vec2 DiscriminantRoot=SquareRoot(Square(B)-C);
pOutRoot[0]=-B-DiscriminantRoot;
pOutRoot[1]=-B+DiscriminantRoot;
}
/*! Given coefficients of a cubic polynomial A*x^3+B*x^2+C*x+D, this function
outputs its three complex roots.*/
void SolveCubicBlinn(out vec2 pOutRoot[3], vec2 A, vec2 B, vec2 C, vec2 D)
{
// Normalize the polynomial
vec2 InvA=Reciprocal(A);
B=Multiply(B, InvA);
C=Multiply(C, InvA);
D=Multiply(D, InvA);
// Divide middle coefficients by three
B/=3.0f;
C/=3.0f;
// Compute the Hessian and the discriminant
vec2 Delta00=-Square(B)+C;
vec2 Delta01=-Multiply(C, B)+D;
vec2 Delta11=Multiply(B, D)-Square(C);
vec2 Discriminant=4.0f*Multiply(Delta00, Delta11)-Square(Delta01);
// Compute coefficients of the depressed cubic
// (third is zero, fourth is one)
vec2 DepressedD=-2.0f*Multiply(B, Delta00)+Delta01;
vec2 DepressedC=Delta00;
// Take the cubic root of a complex number avoiding cancellation
vec2 DiscriminantRoot=SquareRoot(-Discriminant);
DiscriminantRoot=faceforward(DiscriminantRoot, DiscriminantRoot, DepressedD);
vec2 CubedRoot=DiscriminantRoot-DepressedD;
vec2 FirstRoot=CubicRoot(0.5f*CubedRoot);
vec2 pCubicRoot[3]={
FirstRoot,
Multiply(vec2(-0.5f, -0.5f*sqrt(3.0f)), FirstRoot),
Multiply(vec2(-0.5f, 0.5f*sqrt(3.0f)), FirstRoot)
};
// Also compute the reciprocal cubic roots
vec2 InvFirstRoot=Reciprocal(FirstRoot);
vec2 pInvCubicRoot[3]={
InvFirstRoot,
Multiply(vec2(-0.5f, 0.5f*sqrt(3.0f)), InvFirstRoot),
Multiply(vec2(-0.5f, -0.5f*sqrt(3.0f)), InvFirstRoot)
};
// Turn them into roots of the depressed cubic and revert the depression
// transform
for (int i=0;i!=3;++i)
{
pOutRoot[i]=pCubicRoot[i]-Multiply(DepressedC, pInvCubicRoot[i])-B;
}
}
/*! Given coefficients of a quartic polynomial A*x^4+B*x^3+C*x^2+D*x+E, this
function outputs its four complex roots.*/
void SolveQuarticNeumark(out vec2 pOutRoot[4], vec2 A, vec2 B, vec2 C, vec2 D, vec2 E)
{
// Normalize the polynomial
vec2 InvA=Reciprocal(A);
B=Multiply(B, InvA);
C=Multiply(C, InvA);
D=Multiply(D, InvA);
E=Multiply(E, InvA);
// Construct a normalized cubic
vec2 P=-2.0f*C;
vec2 Q=Square(C)+Multiply(B, D)-4.0f*E;
vec2 R=Square(D)+Multiply(Square(B), E)-Multiply(Multiply(B, C), D);
// Compute a root that is not the smallest of the cubic
vec2 pCubicRoot[3];
SolveCubicBlinn(pCubicRoot, vec2(1.0f, 0.0f), P, Q, R);
vec2 y=(dot(pCubicRoot[1], pCubicRoot[1])>dot(pCubicRoot[0], pCubicRoot[0]))?pCubicRoot[1]:pCubicRoot[0];
// Solve a quadratic to obtain linear coefficients for quadratic polynomials
vec2 BB=Square(B);
vec2 fy=4.0f*y;
vec2 BB_fy=BB-fy;
vec2 tmp=SquareRoot(BB_fy);
vec2 G=(B+tmp)*0.5f;
vec2 g=(B-tmp)*0.5f;
// Construct the corresponding constant coefficients
vec2 Z=C-y;
tmp=Divide(0.5f*Multiply(B, Z)-D, tmp);
vec2 H=Z*0.5f+tmp;
vec2 h=Z*0.5f-tmp;
// Compute the roots
vec2 pQuadraticRoot[2];
SolveQuadratic(pQuadraticRoot, vec2(1.0f, 0.0f), G, H);
pOutRoot[0]=pQuadraticRoot[0];
pOutRoot[1]=pQuadraticRoot[1];
SolveQuadratic(pQuadraticRoot, vec2(1.0f, 0.0f), g, h);
pOutRoot[2]=pQuadraticRoot[0];
pOutRoot[3]=pQuadraticRoot[1];
}

View file

@ -1,490 +0,0 @@
/*! \file
This header provides utility functions to reconstruct the transmittance
from a given vector of power moments (4, 6 or 8 power moments) at a
specified depth. As prerequisite, utility functions for computing the real
roots of polynomials up to degree four are defined.
*/
#include "flywheel:internal/mboit/trigonometric_moment_math.glsl"
/*! Given coefficients of a quadratic polynomial A*x^2+B*x+C, this function
outputs its two real roots.*/
vec2 solveQuadratic(vec3 coeffs)
{
coeffs[1] *= 0.5;
float x1, x2, tmp;
tmp = (coeffs[1] * coeffs[1] - coeffs[0] * coeffs[2]);
if (coeffs[1] >= 0) {
tmp = sqrt(tmp);
x1 = (-coeffs[2]) / (coeffs[1] + tmp);
x2 = (-coeffs[1] - tmp) / coeffs[0];
} else {
tmp = sqrt(tmp);
x1 = (-coeffs[1] + tmp) / coeffs[0];
x2 = coeffs[2] / (-coeffs[1] + tmp);
}
return vec2(x1, x2);
}
/*! Code taken from the blog "Moments in Graphics" by Christoph Peters.
http://momentsingraphics.de/?p=105
This function computes the three real roots of a cubic polynomial
Coefficient[0]+Coefficient[1]*x+Coefficient[2]*x^2+Coefficient[3]*x^3.*/
vec3 SolveCubic(vec4 Coefficient) {
// Normalize the polynomial
Coefficient.xyz /= Coefficient.w;
// Divide middle coefficients by three
Coefficient.yz /= 3.0f;
// Compute the Hessian and the discrimant
vec3 Delta = vec3(
fma(-Coefficient.z, Coefficient.z, Coefficient.y),
fma(-Coefficient.y, Coefficient.z, Coefficient.x),
dot(vec2(Coefficient.z, -Coefficient.y), Coefficient.xy)
);
float Discriminant = dot(vec2(4.0f*Delta.x, -Delta.y), Delta.zy);
// Compute coefficients of the depressed cubic
// (third is zero, fourth is one)
vec2 Depressed = vec2(
fma(-2.0f*Coefficient.z, Delta.x, Delta.y),
Delta.x
);
// Take the cubic root of a normalized complex number
float Theta = atan(sqrt(Discriminant), -Depressed.x) / 3.0f;
vec2 CubicRoot;
sincos(Theta, CubicRoot.y, CubicRoot.x);
// Compute the three roots, scale appropriately and
// revert the depression transform
vec3 Root = vec3(
CubicRoot.x,
dot(vec2(-0.5f, -0.5f*sqrt(3.0f)), CubicRoot),
dot(vec2(-0.5f, 0.5f*sqrt(3.0f)), CubicRoot)
);
Root = fma(vec3(2.0f*sqrt(-Depressed.y)), Root, vec3(-Coefficient.z));
return Root;
}
/*! Given coefficients of a cubic polynomial
coeffs[0]+coeffs[1]*x+coeffs[2]*x^2+coeffs[3]*x^3 with three real roots,
this function returns the root of least magnitude.*/
float solveCubicBlinnSmallest(vec4 coeffs)
{
coeffs.xyz /= coeffs.w;
coeffs.yz /= 3.0;
vec3 delta = vec3(fma(-coeffs.z, coeffs.z, coeffs.y), fma(-coeffs.z, coeffs.y, coeffs.x), coeffs.z * coeffs.x - coeffs.y * coeffs.y);
float discriminant = 4.0 * delta.x * delta.z - delta.y * delta.y;
vec2 depressed = vec2(delta.z, -coeffs.x * delta.y + 2.0 * coeffs.y * delta.z);
float theta = abs(atan(coeffs.x * sqrt(discriminant), -depressed.y)) / 3.0;
vec2 sin_cos;
sincos(theta, sin_cos.x, sin_cos.y);
float tmp = 2.0 * sqrt(-depressed.x);
vec2 x = vec2(tmp * sin_cos.y, tmp * (-0.5 * sin_cos.y - 0.5 * sqrt(3.0) * sin_cos.x));
vec2 s = (x.x + x.y < 2.0 * coeffs.y) ? vec2(-coeffs.x, x.x + coeffs.y) : vec2(-coeffs.x, x.y + coeffs.y);
return s.x / s.y;
}
/*! Given coefficients of a quartic polynomial
coeffs[0]+coeffs[1]*x+coeffs[2]*x^2+coeffs[3]*x^3+coeffs[4]*x^4 with four
real roots, this function returns all roots.*/
vec4 solveQuarticNeumark(float coeffs[5])
{
// Normalization
float B = coeffs[3] / coeffs[4];
float C = coeffs[2] / coeffs[4];
float D = coeffs[1] / coeffs[4];
float E = coeffs[0] / coeffs[4];
// Compute coefficients of the cubic resolvent
float P = -2.0*C;
float Q = C*C + B*D - 4.0*E;
float R = D*D + B*B*E -B*C*D;
// Obtain the smallest cubic root
float y = solveCubicBlinnSmallest(vec4(R, Q, P, 1.0));
float BB = B*B;
float fy = 4.0 * y;
float BB_fy = BB - fy;
float Z = C - y;
float ZZ = Z*Z;
float fE = 4.0 * E;
float ZZ_fE = ZZ - fE;
float G, g, H, h;
// Compute the coefficients of the quadratics adaptively using the two
// proposed factorizations by Neumark. Choose the appropriate
// factorizations using the heuristic proposed by Herbison-Evans.
if (y < 0 || (ZZ + fE) * BB_fy > ZZ_fE * (BB + fy)) {
float tmp = sqrt(BB_fy);
G = (B + tmp) * 0.5;
g = (B - tmp) * 0.5;
tmp = (B*Z - 2.0*D) / (2.0*tmp);
H = fma(Z, 0.5, tmp);
h = fma(Z, 0.5, -tmp);
} else {
float tmp = sqrt(ZZ_fE);
H = (Z + tmp) * 0.5;
h = (Z - tmp) * 0.5;
tmp = (B*Z - 2.0*D) / (2.0*tmp);
G = fma(B, 0.5, tmp);
g = fma(B, 0.5, -tmp);
}
// Solve the quadratics
return vec4(solveQuadratic(vec3(1.0, G, H)), solveQuadratic(vec3(1.0, g, h)));
}
/*! Definition of utility functions for quantization and dequantization of
power moments stored in 16 bits per moment. */
void offsetMoments(inout vec2 b_even, inout vec2 b_odd, float sign)
{
b_odd += 0.5 * sign;
}
void quantizeMoments(out vec2 b_even_q, out vec2 b_odd_q, vec2 b_even, vec2 b_odd)
{
b_odd_q = b_odd * mat2x2(1.5f, sqrt(3.0f)*0.5f, -2.0f, -sqrt(3.0f)*2.0f / 9.0f);
b_even_q = b_even * mat2x2(4.0f, 0.5f, -4.0f, 0.5f);
}
void offsetAndDequantizeMoments(out vec2 b_even, out vec2 b_odd, vec2 b_even_q, vec2 b_odd_q)
{
offsetMoments(b_even_q, b_odd_q, -1.0);
b_odd = b_odd_q * mat2x2(-1.0f / 3.0f, -0.75f, sqrt(3.0f), 0.75f*sqrt(3.0f));
b_even = b_even_q * mat2x2(0.125f, -0.125f, 1.0f, 1.0f);
}
void offsetMoments(inout vec3 b_even, inout vec3 b_odd, float sign)
{
b_odd += 0.5 * sign;
b_even.z += 0.018888946f * sign;
}
void quantizeMoments(out vec3 b_even_q, out vec3 b_odd_q, vec3 b_even, vec3 b_odd)
{
const mat3x3 QuantizationMatrixOdd = mat3x3(
2.5f, -1.87499864450f, 1.26583039016f,
-10.0f, 4.20757543111f, -1.47644882902f,
8.0f, -1.83257678661f, 0.71061660238f);
const mat3x3 QuantizationMatrixEven = mat3x3(
4.0f, 9.0f, -0.57759806484f,
-4.0f, -24.0f, 4.61936647543f,
0.0f, 16.0f, -3.07953906655f);
b_odd_q = b_odd * QuantizationMatrixOdd;
b_even_q = b_even * QuantizationMatrixEven;
}
void offsetAndDequantizeMoments(out vec3 b_even, out vec3 b_odd, vec3 b_even_q, vec3 b_odd_q)
{
const mat3x3 QuantizationMatrixOdd = mat3x3(
-0.02877789192f, 0.09995235706f, 0.25893353755f,
0.47635550422f, 0.84532580931f, 0.90779616657f,
1.55242808973f, 1.05472570761f, 0.83327335647f);
const mat3x3 QuantizationMatrixEven = mat3x3(
0.00001253044f, -0.24998746956f, -0.37498825271f,
0.16668494186f, 0.16668494186f, 0.21876713299f,
0.86602540579f, 0.86602540579f, 0.81189881793f);
offsetMoments(b_even_q, b_odd_q, -1.0);
b_odd = b_odd_q * QuantizationMatrixOdd;
b_even = b_even_q * QuantizationMatrixEven;
}
void offsetMoments(inout vec4 b_even, inout vec4 b_odd, float sign)
{
b_odd += 0.5 * sign;
b_even += vec4(0.972481993925964, 1.0, 0.999179192513328, 0.991778293073131) * sign;
}
void quantizeMoments(out vec4 b_even_q, out vec4 b_odd_q, vec4 b_even, vec4 b_odd)
{
const mat4x4 mat_odd = mat4x4(3.48044635732474, -27.5760737514826, 55.1267384344761, -31.5311110403183,
1.26797185782836, -0.928755808743913, -2.07520453231032, 1.23598848322588,
-2.1671560004294, 6.17950199592966, -0.276515571579297, -4.23583042392097,
0.974332879165755, -0.443426830933027, -0.360491648368785, 0.310149466050223);
const mat4x4 mat_even = mat4x4(0.280504133158527, -0.757633844606942, 0.392179589334688, -0.887531871812237,
-2.01362265883247, 0.221551373038988, -1.06107954265125, 2.83887201588367,
-7.31010494985321, 13.9855979699139, -0.114305766176437, -7.4361899359832,
-15.8954215629556, 79.6186327084103, -127.457278992502, 63.7349456687829);
b_odd_q = mat_odd * b_odd;
b_even_q = mat_even * b_even;
}
void offsetAndDequantizeMoments(out vec4 b_even, out vec4 b_odd, vec4 b_even_q, vec4 b_odd_q)
{
const mat4x4 mat_odd = mat4x4(-0.00482399708502382, -0.423201508674231, 0.0348312382605129, 1.67179208266592,
-0.0233402218644408, -0.832829097046478, 0.0193406040499625, 1.21021509068975,
-0.010888537031885, -0.926393772997063, -0.11723394414779, 0.983723301818275,
-0.0308713357806732, -0.937989172670245, -0.218033377677099, 0.845991731322996);
const mat4x4 mat_even = mat4x4(-0.976220278891035, -0.456139260269401, -0.0504335521016742, 0.000838800390651085,
-1.04828341778299, -0.229726640510149, 0.0259608334616091, -0.00133632693205861,
-1.03115268628604, -0.077844420809897, 0.00443408851014257, -0.0103744938457406,
-0.996038443434636, 0.0175438624416783, -0.0361414253243963, -0.00317839994022725);
offsetMoments(b_even_q, b_odd_q, -1.0);
b_odd = mat_odd * b_odd_q;
b_even = mat_even * b_even_q;
}
/*! This function reconstructs the transmittance at the given depth from four
normalized power moments and the given zeroth moment.*/
float computeTransmittanceAtDepthFrom4PowerMoments(float b_0, vec2 b_even, vec2 b_odd, float depth, float bias, float overestimation, vec4 bias_vector)
{
vec4 b = vec4(b_odd.x, b_even.x, b_odd.y, b_even.y);
// Bias input data to avoid artifacts
b = mix(b, bias_vector, bias);
vec3 z;
z[0] = depth;
// Compute a Cholesky factorization of the Hankel matrix B storing only non-
// trivial entries or related products
float L21D11=fma(-b[0], b[1], b[2]);
float D11=fma(-b[0], b[0], b[1]);
float InvD11=1.0f/D11;
float L21=L21D11*InvD11;
float SquaredDepthVariance=fma(-b[1], b[1], b[3]);
float D22=fma(-L21D11, L21, SquaredDepthVariance);
// Obtain a scaled inverse image of bz=(1,z[0],z[0]*z[0])^T
vec3 c=vec3(1.0f, z[0], z[0]*z[0]);
// Forward substitution to solve L*c1=bz
c[1]-=b.x;
c[2]-=b.y+L21*c[1];
// Scaling to solve D*c2=c1
c[1]*=InvD11;
c[2]/=D22;
// Backward substitution to solve L^T*c3=c2
c[1]-=L21*c[2];
c[0]-=dot(c.yz, b.xy);
// Solve the quadratic equation c[0]+c[1]*z+c[2]*z^2 to obtain solutions
// z[1] and z[2]
float InvC2=1.0f/c[2];
float p=c[1]*InvC2;
float q=c[0]*InvC2;
float D=(p*p*0.25f)-q;
float r=sqrt(D);
z[1]=-p*0.5f-r;
z[2]=-p*0.5f+r;
// Compute the absorbance by summing the appropriate weights
vec3 polynomial;
vec3 weight_factor = vec3(overestimation, (z[1] < z[0])?1.0f:0.0f, (z[2] < z[0])?1.0f:0.0f);
float f0=weight_factor[0];
float f1=weight_factor[1];
float f2=weight_factor[2];
float f01=(f1-f0)/(z[1]-z[0]);
float f12=(f2-f1)/(z[2]-z[1]);
float f012=(f12-f01)/(z[2]-z[0]);
polynomial[0]=f012;
polynomial[1]=polynomial[0];
polynomial[0]=f01-polynomial[0]*z[1];
polynomial[2]=polynomial[1];
polynomial[1]=polynomial[0]-polynomial[1]*z[0];
polynomial[0]=f0-polynomial[0]*z[0];
float absorbance = polynomial[0] + dot(b.xy, polynomial.yz);;
// Turn the normalized absorbance into transmittance
return saturate(exp(-b_0 * absorbance));
}
/*! This function reconstructs the transmittance at the given depth from six
normalized power moments and the given zeroth moment.*/
float computeTransmittanceAtDepthFrom6PowerMoments(float b_0, vec3 b_even, vec3 b_odd, float depth, float bias, float overestimation, float bias_vector[6])
{
float b[6] = { b_odd.x, b_even.x, b_odd.y, b_even.y, b_odd.z, b_even.z };
// Bias input data to avoid artifacts
for (int i = 0; i != 6; ++i) {
b[i] = mix(b[i], bias_vector[i], bias);
}
vec4 z;
z[0] = depth;
// Compute a Cholesky factorization of the Hankel matrix B storing only non-
// trivial entries or related products
float InvD11 = 1.0f / fma(-b[0], b[0], b[1]);
float L21D11 = fma(-b[0], b[1], b[2]);
float L21 = L21D11*InvD11;
float D22 = fma(-L21D11, L21, fma(-b[1], b[1], b[3]));
float L31D11 = fma(-b[0], b[2], b[3]);
float L31 = L31D11*InvD11;
float InvD22 = 1.0f / D22;
float L32D22 = fma(-L21D11, L31, fma(-b[1], b[2], b[4]));
float L32 = L32D22*InvD22;
float D33 = fma(-b[2], b[2], b[5]) - dot(vec2(L31D11, L32D22), vec2(L31, L32));
float InvD33 = 1.0f / D33;
// Construct the polynomial whose roots have to be points of support of the
// canonical distribution: bz=(1,z[0],z[0]*z[0],z[0]*z[0]*z[0])^T
vec4 c;
c[0] = 1.0f;
c[1] = z[0];
c[2] = c[1] * z[0];
c[3] = c[2] * z[0];
// Forward substitution to solve L*c1=bz
c[1] -= b[0];
c[2] -= fma(L21, c[1], b[1]);
c[3] -= b[2] + dot(vec2(L31, L32), c.yz);
// Scaling to solve D*c2=c1
c.yzw *= vec3(InvD11, InvD22, InvD33);
// Backward substitution to solve L^T*c3=c2
c[2] -= L32*c[3];
c[1] -= dot(vec2(L21, L31), c.zw);
c[0] -= dot(vec3(b[0], b[1], b[2]), c.yzw);
// Solve the cubic equation
z.yzw = SolveCubic(c);
// Compute the absorbance by summing the appropriate weights
vec4 weigth_factor;
weigth_factor[0] = overestimation;
weigth_factor.yzw = vec3(greaterThan(z.yzw, z.xxx));
// Construct an interpolation polynomial
float f0 = weigth_factor[0];
float f1 = weigth_factor[1];
float f2 = weigth_factor[2];
float f3 = weigth_factor[3];
float f01 = (f1 - f0) / (z[1] - z[0]);
float f12 = (f2 - f1) / (z[2] - z[1]);
float f23 = (f3 - f2) / (z[3] - z[2]);
float f012 = (f12 - f01) / (z[2] - z[0]);
float f123 = (f23 - f12) / (z[3] - z[1]);
float f0123 = (f123 - f012) / (z[3] - z[0]);
vec4 polynomial;
// f012+f0123 *(z-z2)
polynomial[0] = fma(-f0123, z[2], f012);
polynomial[1] = f0123;
// *(z-z1) +f01
polynomial[2] = polynomial[1];
polynomial[1] = fma(polynomial[1], -z[1], polynomial[0]);
polynomial[0] = fma(polynomial[0], -z[1], f01);
// *(z-z0) +f0
polynomial[3] = polynomial[2];
polynomial[2] = fma(polynomial[2], -z[0], polynomial[1]);
polynomial[1] = fma(polynomial[1], -z[0], polynomial[0]);
polynomial[0] = fma(polynomial[0], -z[0], f0);
float absorbance = dot(polynomial, vec4 (1.0, b[0], b[1], b[2]));
// Turn the normalized absorbance into transmittance
return saturate(exp(-b_0 * absorbance));
}
/*! This function reconstructs the transmittance at the given depth from eight
normalized power moments and the given zeroth moment.*/
float computeTransmittanceAtDepthFrom8PowerMoments(float b_0, vec4 b_even, vec4 b_odd, float depth, float bias, float overestimation, float bias_vector[8])
{
float b[8] = { b_odd.x, b_even.x, b_odd.y, b_even.y, b_odd.z, b_even.z, b_odd.w, b_even.w };
// Bias input data to avoid artifacts
for (int i = 0; i != 8; ++i) {
b[i] = mix(b[i], bias_vector[i], bias);
}
float z[5];
z[0] = depth;
// Compute a Cholesky factorization of the Hankel matrix B storing only non-trivial entries or related products
float D22 = fma(-b[0], b[0], b[1]);
float InvD22 = 1.0 / D22;
float L32D22 = fma(-b[1], b[0], b[2]);
float L32 = L32D22 * InvD22;
float L42D22 = fma(-b[2], b[0], b[3]);
float L42 = L42D22 * InvD22;
float L52D22 = fma(-b[3], b[0], b[4]);
float L52 = L52D22 * InvD22;
float D33 = fma(-L32, L32D22, fma(-b[1], b[1], b[3]));
float InvD33 = 1.0 / D33;
float L43D33 = fma(-L42, L32D22, fma(-b[2], b[1], b[4]));
float L43 = L43D33 * InvD33;
float L53D33 = fma(-L52, L32D22, fma(-b[3], b[1], b[5]));
float L53 = L53D33 * InvD33;
float D44 = fma(-b[2], b[2], b[5]) - dot(vec2(L42, L43), vec2(L42D22, L43D33));
float InvD44 = 1.0 / D44;
float L54D44 = fma(-b[3], b[2], b[6]) - dot(vec2(L52, L53), vec2(L42D22, L43D33));
float L54 = L54D44 * InvD44;
float D55 = fma(-b[3], b[3], b[7]) - dot(vec3(L52, L53, L54), vec3(L52D22, L53D33, L54D44));
float InvD55 = 1.0 / D55;
// Construct the polynomial whose roots have to be points of support of the
// Canonical distribution:
// bz = (1,z[0],z[0]^2,z[0]^3,z[0]^4)^T
float c[5];
c[0] = 1.0;
c[1] = z[0];
c[2] = c[1] * z[0];
c[3] = c[2] * z[0];
c[4] = c[3] * z[0];
// Forward substitution to solve L*c1 = bz
c[1] -= b[0];
c[2] -= fma(L32, c[1], b[1]);
c[3] -= b[2] + dot(vec2(L42, L43), vec2(c[1], c[2]));
c[4] -= b[3] + dot(vec3(L52, L53, L54), vec3(c[1], c[2], c[3]));
// Scaling to solve D*c2 = c1
//c = c .*[1, InvD22, InvD33, InvD44, InvD55];
c[1] *= InvD22;
c[2] *= InvD33;
c[3] *= InvD44;
c[4] *= InvD55;
// Backward substitution to solve L^T*c3 = c2
c[3] -= L54 * c[4];
c[2] -= dot(vec2(L53, L43), vec2(c[4], c[3]));
c[1] -= dot(vec3(L52, L42, L32), vec3(c[4], c[3], c[2]));
c[0] -= dot(vec4(b[3], b[2], b[1], b[0]), vec4(c[4], c[3], c[2], c[1]));
// Solve the quartic equation
vec4 zz = solveQuarticNeumark(c);
z[1] = zz[0];
z[2] = zz[1];
z[3] = zz[2];
z[4] = zz[3];
// Compute the absorbance by summing the appropriate weights
vec4 weigth_factor = vec4(lessThanEqual(vec4(z[1], z[2], z[3], z[4]), z[0].xxxx));
// Construct an interpolation polynomial
float f0 = overestimation;
float f1 = weigth_factor[0];
float f2 = weigth_factor[1];
float f3 = weigth_factor[2];
float f4 = weigth_factor[3];
float f01 = (f1 - f0) / (z[1] - z[0]);
float f12 = (f2 - f1) / (z[2] - z[1]);
float f23 = (f3 - f2) / (z[3] - z[2]);
float f34 = (f4 - f3) / (z[4] - z[3]);
float f012 = (f12 - f01) / (z[2] - z[0]);
float f123 = (f23 - f12) / (z[3] - z[1]);
float f234 = (f34 - f23) / (z[4] - z[2]);
float f0123 = (f123 - f012) / (z[3] - z[0]);
float f1234 = (f234 - f123) / (z[4] - z[1]);
float f01234 = (f1234 - f0123) / (z[4] - z[0]);
float Polynomial_0;
vec4 Polynomial;
// f0123 + f01234 * (z - z3)
Polynomial_0 = fma(-f01234, z[3], f0123);
Polynomial[0] = f01234;
// * (z - z2) + f012
Polynomial[1] = Polynomial[0];
Polynomial[0] = fma(-Polynomial[0], z[2], Polynomial_0);
Polynomial_0 = fma(-Polynomial_0, z[2], f012);
// * (z - z1) + f01
Polynomial[2] = Polynomial[1];
Polynomial[1] = fma(-Polynomial[1], z[1], Polynomial[0]);
Polynomial[0] = fma(-Polynomial[0], z[1], Polynomial_0);
Polynomial_0 = fma(-Polynomial_0, z[1], f01);
// * (z - z0) + f1
Polynomial[3] = Polynomial[2];
Polynomial[2] = fma(-Polynomial[2], z[0], Polynomial[1]);
Polynomial[1] = fma(-Polynomial[1], z[0], Polynomial[0]);
Polynomial[0] = fma(-Polynomial[0], z[0], Polynomial_0);
Polynomial_0 = fma(-Polynomial_0, z[0], f0);
float absorbance = Polynomial_0 + dot(Polynomial, vec4(b[0], b[1], b[2], b[3]));
// Turn the normalized absorbance into transmittance
return saturate(exp(-b_0 * absorbance));
}

View file

@ -1,251 +0,0 @@
/*! \file
This header provides the functionality to create the vectors of moments and
to blend surfaces together with an appropriately reconstructed
transmittance. It is needed for both additive passes of moment-based OIT.
*/
//cbuffer MomentOIT
//{
// struct {
// vec4 wrapping_zone_parameters;
// float overestimation;
// float moment_bias;
// }MomentOIT;
//};
#include "flywheel:internal/mboit/moment_math.glsl"
const float moment_bias = 0.25;
const float overestimation = 0.25;
const vec4 wrapping_zone_parameters = vec4(0.31415927, 2.984513, 2.7934167, -18.553917);
void clip(float a) {
if (a < 0.) {
discard;
}
}
// jozu: The trigonometric moments and higher order power moments rely on a second render target
// which the java side is not set up to support. Trying to enable them as is will cause compile errors also.
#define NUM_MOMENTS 8
#define SINGLE_PRECISION 1
#ifdef _FLW_GENERATE_MOMENTS
/*! Generation of moments in case that rasterizer ordered views are used.
This includes the case if moments are stored in 16 bits. */
/*! This functions relies on fixed function additive blending to compute the
vector of moments.moment vector. The shader that calls this function must
provide the required render targets.*/
#if NUM_MOMENTS == 4
void generateMoments(float depth, float transmittance, out float b_0, out vec4 b)
#elif NUM_MOMENTS == 6
#if USE_R_RG_RBBA_FOR_MBOIT6
void generateMoments(float depth, float transmittance, out float b_0, out vec2 b_12, out vec4 b_3456)
#else
void generateMoments(float depth, float transmittance, out float b_0, out vec2 b_12, out vec2 b_34, out vec2 b_56)
#endif
#elif NUM_MOMENTS == 8
void generateMoments(float depth, float transmittance, out float b_0, out vec4 b_even, out vec4 b_odd)
#endif
{
transmittance = max(transmittance, 0.000001);
float absorbance = -log(transmittance);
b_0 = absorbance;
#if TRIGONOMETRIC
float phase = fma(depth, wrapping_zone_parameters.y, wrapping_zone_parameters.y);
vec2 circle_point = vec2(sin(phase), cos(phase));
vec2 circle_point_pow2 = Multiply(circle_point, circle_point);
#if NUM_MOMENTS == 4
b = vec4(circle_point, circle_point_pow2) * absorbance;
#elif NUM_MOMENTS == 6
b_12 = circle_point * absorbance;
#if USE_R_RG_RBBA_FOR_MBOIT6
b_3456 = vec4(circle_point_pow2, Multiply(circle_point, circle_point_pow2)) * absorbance;
#else
b_34 = circle_point_pow2 * absorbance;
b_56 = Multiply(circle_point, circle_point_pow2) * absorbance;
#endif
#elif NUM_MOMENTS == 8
b_even = vec4(circle_point_pow2, Multiply(circle_point_pow2, circle_point_pow2)) * absorbance;
b_odd = vec4(circle_point, Multiply(circle_point, circle_point_pow2)) * absorbance;
#endif
#else
float depth_pow2 = depth * depth;
float depth_pow4 = depth_pow2 * depth_pow2;
#if NUM_MOMENTS == 4
b = vec4(depth, depth_pow2, depth_pow2 * depth, depth_pow4) * absorbance;
#elif NUM_MOMENTS == 6
b_12 = vec2(depth, depth_pow2) * absorbance;
#if USE_R_RG_RBBA_FOR_MBOIT6
b_3456 = vec4(depth_pow2 * depth, depth_pow4, depth_pow4 * depth, depth_pow4 * depth_pow2) * absorbance;
#else
b_34 = vec2(depth_pow2 * depth, depth_pow4) * absorbance;
b_56 = vec2(depth_pow4 * depth, depth_pow4 * depth_pow2) * absorbance;
#endif
#elif NUM_MOMENTS == 8
float depth_pow6 = depth_pow4 * depth_pow2;
b_even = vec4(depth_pow2, depth_pow4, depth_pow6, depth_pow6 * depth_pow2) * absorbance;
b_odd = vec4(depth, depth_pow2 * depth, depth_pow4 * depth, depth_pow6 * depth) * absorbance;
#endif
#endif
}
#else//MOMENT_GENERATION is disabled
layout (binding = 7) uniform sampler2D _flw_zeroth_moment_sampler;
layout (binding = 8) uniform sampler2D _flw_moments0_sampler;
layout (binding = 9) uniform sampler2D _flw_moments1_sampler;
/*! This function is to be called from the shader that composites the
transparent fragments. It reads the moments and calls the appropriate
function to reconstruct the transmittance at the specified depth.*/
void resolveMoments(out float transmittance_at_depth, out float total_transmittance, float depth, vec2 sv_pos)
{
ivec2 idx0 = ivec2(sv_pos);
ivec2 idx1 = idx0;
transmittance_at_depth = 1;
total_transmittance = 1;
float b_0 = texelFetch(_flw_zeroth_moment_sampler, idx0, 0).x;
clip(b_0 - 0.00100050033f);
total_transmittance = exp(-b_0);
#if NUM_MOMENTS == 4
#if TRIGONOMETRIC
vec4 b_tmp = texelFetch(_flw_moments0_sampler, idx0, 0);
vec2 trig_b[2];
trig_b[0] = b_tmp.xy;
trig_b[1] = b_tmp.zw;
#if SINGLE_PRECISION
trig_b[0] /= b_0;
trig_b[1] /= b_0;
#else
trig_b[0] = fma(trig_b[0], 2.0, -1.0);
trig_b[1] = fma(trig_b[1], 2.0, -1.0);
#endif
transmittance_at_depth = computeTransmittanceAtDepthFrom2TrigonometricMoments(b_0, trig_b, depth, moment_bias, overestimation, wrapping_zone_parameters);
#else
vec4 b_1234 = texelFetch(_flw_moments0_sampler, idx0, 0).xyzw;
#if SINGLE_PRECISION
vec2 b_even = b_1234.yw;
vec2 b_odd = b_1234.xz;
b_even /= b_0;
b_odd /= b_0;
const vec4 bias_vector = vec4(0, 0.375, 0, 0.375);
#else
vec2 b_even_q = b_1234.yw;
vec2 b_odd_q = b_1234.xz;
// Dequantize the moments
vec2 b_even;
vec2 b_odd;
offsetAndDequantizeMoments(b_even, b_odd, b_even_q, b_odd_q);
const vec4 bias_vector = vec4(0, 0.628, 0, 0.628);
#endif
transmittance_at_depth = computeTransmittanceAtDepthFrom4PowerMoments(b_0, b_even, b_odd, depth, moment_bias, overestimation, bias_vector);
#endif
#elif NUM_MOMENTS == 6
ivec2 idx2 = idx0;
#if TRIGONOMETRIC
vec2 trig_b[3];
trig_b[0] = texelFetch(_flw_moments0_sampler, idx0, 0).xy;
#if USE_R_RG_RBBA_FOR_MBOIT6
vec4 tmp = texelFetch(extra_moments, idx0, 0);
trig_b[1] = tmp.xy;
trig_b[2] = tmp.zw;
#else
trig_b[1] = texelFetch(_flw_moments1_sampler, idx1, 0).xy;
trig_b[2] = texelFetch(_flw_moments0_sampler, idx2, 0).xy;
#endif
#if SINGLE_PRECISION
trig_b[0] /= b_0;
trig_b[1] /= b_0;
trig_b[2] /= b_0;
#else
trig_b[0] = fma(trig_b[0], 2.0, -1.0);
trig_b[1] = fma(trig_b[1], 2.0, -1.0);
trig_b[2] = fma(trig_b[2], 2.0, -1.0);
#endif
transmittance_at_depth = computeTransmittanceAtDepthFrom3TrigonometricMoments(b_0, trig_b, depth, moment_bias, overestimation, wrapping_zone_parameters);
#else
vec2 b_12 = texelFetch(_flw_moments0_sampler, idx0, 0).xy;
#if USE_R_RG_RBBA_FOR_MBOIT6
vec4 tmp = texelFetch(extra_moments, idx0, 0);
vec2 b_34 = tmp.xy;
vec2 b_56 = tmp.zw;
#else
vec2 b_34 = texelFetch(_flw_moments1_sampler, idx1, 0).xy;
vec2 b_56 = texelFetch(_flw_moments0_sampler, idx2, 0).xy;
#endif
#if SINGLE_PRECISION
vec3 b_even = vec3(b_12.y, b_34.y, b_56.y);
vec3 b_odd = vec3(b_12.x, b_34.x, b_56.x);
b_even /= b_0;
b_odd /= b_0;
const float bias_vector[6] = { 0, 0.48, 0, 0.451, 0, 0.45 };
#else
vec3 b_even_q = vec3(b_12.y, b_34.y, b_56.y);
vec3 b_odd_q = vec3(b_12.x, b_34.x, b_56.x);
// Dequantize b_0 and the other moments
vec3 b_even;
vec3 b_odd;
offsetAndDequantizeMoments(b_even, b_odd, b_even_q, b_odd_q);
const float bias_vector[6] = { 0, 0.5566, 0, 0.489, 0, 0.47869382 };
#endif
transmittance_at_depth = computeTransmittanceAtDepthFrom6PowerMoments(b_0, b_even, b_odd, depth, moment_bias, overestimation, bias_vector);
#endif
#elif NUM_MOMENTS == 8
#if TRIGONOMETRIC
vec4 b_tmp = texelFetch(_flw_moments0_sampler, idx0, 0);
vec4 b_tmp2 = texelFetch(_flw_moments1_sampler, idx1, 0);
#if SINGLE_PRECISION
vec2 trig_b[4] = {
b_tmp2.xy / b_0,
b_tmp.xy / b_0,
b_tmp2.zw / b_0,
b_tmp.zw / b_0
};
#else
vec2 trig_b[4] = {
fma(b_tmp2.xy, 2.0, -1.0),
fma(b_tmp.xy, 2.0, -1.0),
fma(b_tmp2.zw, 2.0, -1.0),
fma(b_tmp.zw, 2.0, -1.0)
};
#endif
transmittance_at_depth = computeTransmittanceAtDepthFrom4TrigonometricMoments(b_0, trig_b, depth, moment_bias, overestimation, wrapping_zone_parameters);
#else
#if SINGLE_PRECISION
vec4 b_even = texelFetch(_flw_moments0_sampler, idx0, 0);
vec4 b_odd = texelFetch(_flw_moments1_sampler, idx1, 0);
b_even /= b_0;
b_odd /= b_0;
const float bias_vector[8] = { 0, 0.75, 0, 0.67666666666666664, 0, 0.63, 0, 0.60030303030303034 };
#else
vec4 b_even_q = texelFetch(_flw_moments0_sampler, idx0, 0);
vec4 b_odd_q = texelFetch(_flw_moments1_sampler, idx1, 0);
// Dequantize the moments
vec4 b_even;
vec4 b_odd;
offsetAndDequantizeMoments(b_even, b_odd, b_even_q, b_odd_q);
const float bias_vector[8] = { 0, 0.42474916387959866, 0, 0.22407802675585284, 0, 0.15369230769230768, 0, 0.12900440529089119 };
#endif
transmittance_at_depth = computeTransmittanceAtDepthFrom8PowerMoments(b_0, b_even, b_odd, depth, moment_bias, overestimation, bias_vector);
#endif
#endif
}
#endif

View file

@ -1,311 +0,0 @@
/*! \file
This header provides the utility functions to reconstruct the transmittance
from a given vector of trigonometric moments (2, 3 or 4 trigonometric
moments) at a specified depth.*/
#include "flywheel:internal/mboit/complex_algebra.glsl"
/*! This utility function turns a point on the unit circle into a scalar
parameter. It is guaranteed to grow monotonically for (cos(phi),sin(phi))
with phi in 0 to 2*pi. There are no other guarantees. In particular it is
not an arclength parametrization. If you change this function, you must
also change circleToParameter() in MomentOIT.cpp.*/
float circleToParameter(vec2 circle_point){
float result=abs(circle_point.y)-abs(circle_point.x);
result=(circle_point.x<0.0f)?(2.0f-result):result;
return (circle_point.y<0.0f)?(6.0f-result):result;
}
/*! This utility function returns the appropriate weight factor for a root at
the given location. Both inputs are supposed to be unit vectors. If a
circular arc going counter clockwise from (1.0,0.0) meets root first, it
returns 1.0, otherwise 0.0 or a linear ramp in the wrapping zone.*/
float getRootWeightFactor(float reference_parameter, float root_parameter, vec4 wrapping_zone_parameters){
float binary_weight_factor=(root_parameter<reference_parameter)?1.0f:0.0f;
float linear_weight_factor=saturate(fma(root_parameter, wrapping_zone_parameters.z, wrapping_zone_parameters.w));
return binary_weight_factor+linear_weight_factor;
}
/*! This function reconstructs the transmittance at the given depth from two
normalized trigonometric moments.*/
float computeTransmittanceAtDepthFrom2TrigonometricMoments(float b_0, vec2 trig_b[2], float depth, float bias, float overestimation, vec4 wrapping_zone_parameters)
{
// Apply biasing and reformat the inputs a little bit
float moment_scale = 1.0f - bias;
vec2 b[3] = {
vec2(1.0f, 0.0f),
trig_b[0] * moment_scale,
trig_b[1] * moment_scale
};
// Compute a Cholesky factorization of the Toeplitz matrix
float D00=RealPart(b[0]);
float InvD00=1.0f/D00;
vec2 L10=(b[1])*InvD00;
float D11=RealPart(b[0]-D00*Multiply(L10, Conjugate(L10)));
float InvD11=1.0f/D11;
vec2 L20=(b[2])*InvD00;
vec2 L21=(b[1]-D00*Multiply(L20, Conjugate(L10)))*InvD11;
float D22=RealPart(b[0]-D00*Multiply(L20, Conjugate(L20))-D11*Multiply(L21, Conjugate(L21)));
float InvD22=1.0f/D22;
// Solve a linear system to get the relevant polynomial
float phase = fma(depth, wrapping_zone_parameters.y, wrapping_zone_parameters.y);
vec2 circle_point;
sincos(phase, circle_point.y, circle_point.x);
vec2 c[3] = {
vec2(1.0f, 0.0f),
circle_point,
Multiply(circle_point, circle_point)
};
c[1]-=Multiply(L10, c[0]);
c[2]-=Multiply(L20, c[0])+Multiply(L21, c[1]);
c[0]*=InvD00;
c[1]*=InvD11;
c[2]*=InvD22;
c[1]-=Multiply(Conjugate(L21), c[2]);
c[0]-=Multiply(Conjugate(L10), c[1])+Multiply(Conjugate(L20), c[2]);
// Compute roots of the polynomial
vec2 pRoot[2];
SolveQuadratic(pRoot, Conjugate(c[2]), Conjugate(c[1]), Conjugate(c[0]));
// Figure out how to weight the weights
float depth_parameter = circleToParameter(circle_point);
vec3 weight_factor;
weight_factor[0] = overestimation;
for (int i = 0; i != 2; ++i)
{
float root_parameter = circleToParameter(pRoot[i]);
weight_factor[i+1] = getRootWeightFactor(depth_parameter, root_parameter, wrapping_zone_parameters);
}
// Compute the appropriate linear combination of weights
vec2 z[3] = { circle_point, pRoot[0], pRoot[1] };
float f0=weight_factor[0];
float f1=weight_factor[1];
float f2=weight_factor[2];
vec2 f01=Divide(f1-f0, z[1]-z[0]);
vec2 f12=Divide(f2-f1, z[2]-z[1]);
vec2 f012=Divide(f12-f01, z[2]-z[0]);
vec2 polynomial[3];
polynomial[0]=f012;
polynomial[1]=polynomial[0];
polynomial[0]=f01-Multiply(polynomial[0], z[1]);
polynomial[2]=polynomial[1];
polynomial[1]=polynomial[0]-Multiply(polynomial[1], z[0]);
polynomial[0]=f0-Multiply(polynomial[0], z[0]);
float weight_sum=0.0f;
weight_sum+=RealPart(Multiply(b[0], polynomial[0]));
weight_sum+=RealPart(Multiply(b[1], polynomial[1]));
weight_sum+=RealPart(Multiply(b[2], polynomial[2]));
// Turn the normalized absorbance into transmittance
return exp(-b_0 * weight_sum);
}
/*! This function reconstructs the transmittance at the given depth from three
normalized trigonometric moments. */
float computeTransmittanceAtDepthFrom3TrigonometricMoments(float b_0, vec2 trig_b[3], float depth, float bias, float overestimation, vec4 wrapping_zone_parameters)
{
// Apply biasing and reformat the inputs a little bit
float moment_scale = 1.0f - bias;
vec2 b[4] = {
vec2(1.0f, 0.0f),
trig_b[0] * moment_scale,
trig_b[1] * moment_scale,
trig_b[2] * moment_scale
};
// Compute a Cholesky factorization of the Toeplitz matrix
float D00=RealPart(b[0]);
float InvD00=1.0f/D00;
vec2 L10=(b[1])*InvD00;
float D11=RealPart(b[0]-D00*Multiply(L10, Conjugate(L10)));
float InvD11=1.0f/D11;
vec2 L20=(b[2])*InvD00;
vec2 L21=(b[1]-D00*Multiply(L20, Conjugate(L10)))*InvD11;
float D22=RealPart(b[0]-D00*Multiply(L20, Conjugate(L20))-D11*Multiply(L21, Conjugate(L21)));
float InvD22=1.0f/D22;
vec2 L30=(b[3])*InvD00;
vec2 L31=(b[2]-D00*Multiply(L30, Conjugate(L10)))*InvD11;
vec2 L32=(b[1]-D00*Multiply(L30, Conjugate(L20))-D11*Multiply(L31, Conjugate(L21)))*InvD22;
float D33=RealPart(b[0]-D00*Multiply(L30, Conjugate(L30))-D11*Multiply(L31, Conjugate(L31))-D22*Multiply(L32, Conjugate(L32)));
float InvD33=1.0f/D33;
// Solve a linear system to get the relevant polynomial
float phase = fma(depth, wrapping_zone_parameters.y, wrapping_zone_parameters.y);
vec2 circle_point;
sincos(phase, circle_point.y, circle_point.x);
vec2 circle_point_pow2 = Multiply(circle_point, circle_point);
vec2 c[4] = {
vec2(1.0f, 0.0f),
circle_point,
circle_point_pow2,
Multiply(circle_point, circle_point_pow2)
};
c[1]-=Multiply(L10, c[0]);
c[2]-=Multiply(L20, c[0])+Multiply(L21, c[1]);
c[3]-=Multiply(L30, c[0])+Multiply(L31, c[1])+Multiply(L32, c[2]);
c[0]*=InvD00;
c[1]*=InvD11;
c[2]*=InvD22;
c[3]*=InvD33;
c[2]-=Multiply(Conjugate(L32), c[3]);
c[1]-=Multiply(Conjugate(L21), c[2])+Multiply(Conjugate(L31), c[3]);
c[0]-=Multiply(Conjugate(L10), c[1])+Multiply(Conjugate(L20), c[2])+Multiply(Conjugate(L30), c[3]);
// Compute roots of the polynomial
vec2 pRoot[3];
SolveCubicBlinn(pRoot, Conjugate(c[3]), Conjugate(c[2]), Conjugate(c[1]), Conjugate(c[0]));
// The roots are known to be normalized but for reasons of numerical
// stability it can be better to enforce that
//pRoot[0]=normalize(pRoot[0]);
//pRoot[1]=normalize(pRoot[1]);
//pRoot[2]=normalize(pRoot[2]);
// Figure out how to weight the weights
float depth_parameter = circleToParameter(circle_point);
vec4 weight_factor;
weight_factor[0] = overestimation;
for (int i = 0; i != 3; ++i)
{
float root_parameter = circleToParameter(pRoot[i]);
weight_factor[i+1] = getRootWeightFactor(depth_parameter, root_parameter, wrapping_zone_parameters);
}
// Compute the appropriate linear combination of weights
vec2 z[4] = { circle_point, pRoot[0], pRoot[1], pRoot[2] };
float f0=weight_factor[0];
float f1=weight_factor[1];
float f2=weight_factor[2];
float f3=weight_factor[3];
vec2 f01=Divide(f1-f0, z[1]-z[0]);
vec2 f12=Divide(f2-f1, z[2]-z[1]);
vec2 f23=Divide(f3-f2, z[3]-z[2]);
vec2 f012=Divide(f12-f01, z[2]-z[0]);
vec2 f123=Divide(f23-f12, z[3]-z[1]);
vec2 f0123=Divide(f123-f012, z[3]-z[0]);
vec2 polynomial[4];
polynomial[0]=f0123;
polynomial[1]=polynomial[0];
polynomial[0]=f012-Multiply(polynomial[0], z[2]);
polynomial[2]=polynomial[1];
polynomial[1]=polynomial[0]-Multiply(polynomial[1], z[1]);
polynomial[0]=f01-Multiply(polynomial[0], z[1]);
polynomial[3]=polynomial[2];
polynomial[2]=polynomial[1]-Multiply(polynomial[2], z[0]);
polynomial[1]=polynomial[0]-Multiply(polynomial[1], z[0]);
polynomial[0]=f0-Multiply(polynomial[0], z[0]);
float weight_sum=0;
weight_sum+=RealPart(Multiply(b[0], polynomial[0]));
weight_sum+=RealPart(Multiply(b[1], polynomial[1]));
weight_sum+=RealPart(Multiply(b[2], polynomial[2]));
weight_sum+=RealPart(Multiply(b[3], polynomial[3]));
// Turn the normalized absorbance into transmittance
return exp(-b_0 * weight_sum);
}
/*! This function reconstructs the transmittance at the given depth from four
normalized trigonometric moments.*/
float computeTransmittanceAtDepthFrom4TrigonometricMoments(float b_0, vec2 trig_b[4], float depth, float bias, float overestimation, vec4 wrapping_zone_parameters)
{
// Apply biasing and reformat the inputs a little bit
float moment_scale = 1.0f - bias;
vec2 b[5] = {
vec2(1.0f, 0.0f),
trig_b[0] * moment_scale,
trig_b[1] * moment_scale,
trig_b[2] * moment_scale,
trig_b[3] * moment_scale
};
// Compute a Cholesky factorization of the Toeplitz matrix
float D00=RealPart(b[0]);
float InvD00=1.0/D00;
vec2 L10=(b[1])*InvD00;
float D11=RealPart(b[0]-D00*Multiply(L10, Conjugate(L10)));
float InvD11=1.0/D11;
vec2 L20=(b[2])*InvD00;
vec2 L21=(b[1]-D00*Multiply(L20, Conjugate(L10)))*InvD11;
float D22=RealPart(b[0]-D00*Multiply(L20, Conjugate(L20))-D11*Multiply(L21, Conjugate(L21)));
float InvD22=1.0/D22;
vec2 L30=(b[3])*InvD00;
vec2 L31=(b[2]-D00*Multiply(L30, Conjugate(L10)))*InvD11;
vec2 L32=(b[1]-D00*Multiply(L30, Conjugate(L20))-D11*Multiply(L31, Conjugate(L21)))*InvD22;
float D33=RealPart(b[0]-D00*Multiply(L30, Conjugate(L30))-D11*Multiply(L31, Conjugate(L31))-D22*Multiply(L32, Conjugate(L32)));
float InvD33=1.0/D33;
vec2 L40=(b[4])*InvD00;
vec2 L41=(b[3]-D00*Multiply(L40, Conjugate(L10)))*InvD11;
vec2 L42=(b[2]-D00*Multiply(L40, Conjugate(L20))-D11*Multiply(L41, Conjugate(L21)))*InvD22;
vec2 L43=(b[1]-D00*Multiply(L40, Conjugate(L30))-D11*Multiply(L41, Conjugate(L31))-D22*Multiply(L42, Conjugate(L32)))*InvD33;
float D44=RealPart(b[0]-D00*Multiply(L40, Conjugate(L40))-D11*Multiply(L41, Conjugate(L41))-D22*Multiply(L42, Conjugate(L42))-D33*Multiply(L43, Conjugate(L43)));
float InvD44=1.0/D44;
// Solve a linear system to get the relevant polynomial
float phase = fma(depth, wrapping_zone_parameters.y, wrapping_zone_parameters.y);
vec2 circle_point;
sincos(phase, circle_point.y, circle_point.x);
vec2 circle_point_pow2 = Multiply(circle_point, circle_point);
vec2 c[5] = {
vec2(1.0f, 0.0f),
circle_point,
circle_point_pow2,
Multiply(circle_point, circle_point_pow2),
Multiply(circle_point_pow2, circle_point_pow2)
};
c[1]-=Multiply(L10, c[0]);
c[2]-=Multiply(L20, c[0])+Multiply(L21, c[1]);
c[3]-=Multiply(L30, c[0])+Multiply(L31, c[1])+Multiply(L32, c[2]);
c[4]-=Multiply(L40, c[0])+Multiply(L41, c[1])+Multiply(L42, c[2])+Multiply(L43, c[3]);
c[0]*=InvD00;
c[1]*=InvD11;
c[2]*=InvD22;
c[3]*=InvD33;
c[4]*=InvD44;
c[3]-=Multiply(Conjugate(L43), c[4]);
c[2]-=Multiply(Conjugate(L32), c[3])+Multiply(Conjugate(L42), c[4]);
c[1]-=Multiply(Conjugate(L21), c[2])+Multiply(Conjugate(L31), c[3])+Multiply(Conjugate(L41), c[4]);
c[0]-=Multiply(Conjugate(L10), c[1])+Multiply(Conjugate(L20), c[2])+Multiply(Conjugate(L30), c[3])+Multiply(Conjugate(L40), c[4]);
// Compute roots of the polynomial
vec2 pRoot[4];
SolveQuarticNeumark(pRoot, Conjugate(c[4]), Conjugate(c[3]), Conjugate(c[2]), Conjugate(c[1]), Conjugate(c[0]));
// Figure out how to weight the weights
float depth_parameter = circleToParameter(circle_point);
float weight_factor[5];
weight_factor[0] = overestimation;
for (int i = 0; i != 4; ++i)
{
float root_parameter = circleToParameter(pRoot[i]);
weight_factor[i+1] = getRootWeightFactor(depth_parameter, root_parameter, wrapping_zone_parameters);
}
// Compute the appropriate linear combination of weights
vec2 z[5] = { circle_point, pRoot[0], pRoot[1], pRoot[2], pRoot[3] };
float f0=weight_factor[0];
float f1=weight_factor[1];
float f2=weight_factor[2];
float f3=weight_factor[3];
float f4=weight_factor[4];
vec2 f01=Divide(f1-f0, z[1]-z[0]);
vec2 f12=Divide(f2-f1, z[2]-z[1]);
vec2 f23=Divide(f3-f2, z[3]-z[2]);
vec2 f34=Divide(f4-f3, z[4]-z[3]);
vec2 f012=Divide(f12-f01, z[2]-z[0]);
vec2 f123=Divide(f23-f12, z[3]-z[1]);
vec2 f234=Divide(f34-f23, z[4]-z[2]);
vec2 f0123=Divide(f123-f012, z[3]-z[0]);
vec2 f1234=Divide(f234-f123, z[4]-z[1]);
vec2 f01234=Divide(f1234-f0123, z[4]-z[0]);
vec2 polynomial[5];
polynomial[0]=f01234;
polynomial[1]=polynomial[0];
polynomial[0]=f0123-Multiply(polynomial[0], z[3]);
polynomial[2]=polynomial[1];
polynomial[1]=polynomial[0]-Multiply(polynomial[1], z[2]);
polynomial[0]=f012-Multiply(polynomial[0], z[2]);
polynomial[3]=polynomial[2];
polynomial[2]=polynomial[1]-Multiply(polynomial[2], z[1]);
polynomial[1]=polynomial[0]-Multiply(polynomial[1], z[1]);
polynomial[0]=f01-Multiply(polynomial[0], z[1]);
polynomial[4]=polynomial[3];
polynomial[3]=polynomial[2]-Multiply(polynomial[3], z[0]);
polynomial[2]=polynomial[1]-Multiply(polynomial[2], z[0]);
polynomial[1]=polynomial[0]-Multiply(polynomial[1], z[0]);
polynomial[0]=f0-Multiply(polynomial[0], z[0]);
float weight_sum=0;
weight_sum+=RealPart(Multiply(b[0], polynomial[0]));
weight_sum+=RealPart(Multiply(b[1], polynomial[1]));
weight_sum+=RealPart(Multiply(b[2], polynomial[2]));
weight_sum+=RealPart(Multiply(b[3], polynomial[3]));
weight_sum+=RealPart(Multiply(b[4], polynomial[4]));
// Turn the normalized absorbance into transmittance
return exp(-b_0 * weight_sum);
}