|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | #version 460 | 
					
						
						|  | #extension GL_EXT_shader_8bit_storage : require | 
					
						
						|  | #extension GL_EXT_shader_16bit_storage : require | 
					
						
						|  | #extension GL_EXT_shader_explicit_arithmetic_types : require | 
					
						
						|  | #extension GL_EXT_shader_explicit_arithmetic_types_int8 : require | 
					
						
						|  | #extension GL_EXT_shader_explicit_arithmetic_types_float16 : require | 
					
						
						|  | #extension GL_EXT_shader_explicit_arithmetic_types_float32 : require | 
					
						
						|  | #extension GL_GOOGLE_include_directive : enable | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | #define SCALE_1_0X 0 | 
					
						
						|  | #define SCALE_1_3X 1 | 
					
						
						|  | #define SCALE_1_5X 2 | 
					
						
						|  | #define SCALE_2_0X 3 | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | #define HISTORY_CATMULL | 
					
						
						|  | #define SCALE_MODE SCALE_2_0X | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | #include "typedefs.h" | 
					
						
						|  | #include "common.h" | 
					
						
						|  | #include "kernel_lut.h" | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | layout (set=0, binding=0) uniform mediump   sampler2D _ColourTex; | 
					
						
						|  | layout (set=0, binding=1) uniform mediump   sampler2D _MotionVectorTex; | 
					
						
						|  | layout (set=0, binding=2) uniform mediump   sampler2D _HistoryTex; | 
					
						
						|  | layout (set=0, binding=3) uniform lowp      sampler2D _K0Tensor; | 
					
						
						|  | layout (set=0, binding=4) uniform lowp      sampler2D _K1Tensor; | 
					
						
						|  | layout (set=0, binding=5) uniform lowp      sampler2D _K2Tensor; | 
					
						
						|  | layout (set=0, binding=6) uniform lowp      sampler2D _K3Tensor; | 
					
						
						|  | layout (set=0, binding=7) uniform lowp      sampler2D _TemporalTensor; | 
					
						
						|  | layout (set=0, binding=8) uniform lowp      sampler2D _NearestDepthCoordTex; | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | layout (set=1, binding=0, r11f_g11f_b10f) uniform writeonly mediump image2D _UpsampledColourOut; | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | layout(push_constant, std430) uniform PushConstants { | 
					
						
						|  |  | 
					
						
						|  | layout(offset =  0) int32_t2 _OutputDims; | 
					
						
						|  | layout(offset =  8) int32_t2 _InputDims; | 
					
						
						|  | layout(offset = 16) float2   _InvOutputDims; | 
					
						
						|  | layout(offset = 24) float2   _InvInputDims; | 
					
						
						|  | layout(offset = 32) float2   _Scale; | 
					
						
						|  | layout(offset = 40) float2   _InvScale; | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | layout(offset = 48) int16_t2 _IndexModulo; | 
					
						
						|  | layout(offset = 52) half2    _QuantParams; | 
					
						
						|  | layout(offset = 56) int16_t2 _LutOffset; | 
					
						
						|  | layout(offset = 60) half2    _ExposurePair; | 
					
						
						|  | layout(offset = 64) half2    _HistoryPad; | 
					
						
						|  | layout(offset = 68) half2    _MotionThreshPad; | 
					
						
						|  | layout(offset = 72) int32_t  _Padding0; | 
					
						
						|  |  | 
					
						
						|  | }; | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | #define _Exposure        _ExposurePair.x | 
					
						
						|  | #define _InvExposure     _ExposurePair.y | 
					
						
						|  | #define _NotHistoryReset _HistoryPad.x | 
					
						
						|  | #define _MotionThresh    _MotionThreshPad.x | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | #ifndef _K0QuantParams | 
					
						
						|  |  | 
					
						
						|  | #define _K0QuantParams _QuantParams.xy | 
					
						
						|  | #endif | 
					
						
						|  | #ifndef _K1QuantParams | 
					
						
						|  |  | 
					
						
						|  | #define _K1QuantParams _QuantParams.xy | 
					
						
						|  | #endif | 
					
						
						|  | #ifndef _K2QuantParams | 
					
						
						|  |  | 
					
						
						|  | #define _K2QuantParams _QuantParams.xy | 
					
						
						|  | #endif | 
					
						
						|  | #ifndef _K3QuantParams | 
					
						
						|  |  | 
					
						
						|  | #define _K3QuantParams _QuantParams.xy | 
					
						
						|  | #endif | 
					
						
						|  | #ifndef _TemporalQuantParams | 
					
						
						|  |  | 
					
						
						|  | #define _TemporalQuantParams _QuantParams.xy | 
					
						
						|  | #endif | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | half2 LoadMotion(int32_t2 pixel) | 
					
						
						|  | { | 
					
						
						|  | return half2(texelFetch(_MotionVectorTex, pixel, 0).rg); | 
					
						
						|  | } | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | half3 LoadHistory(float2 uv) | 
					
						
						|  | { | 
					
						
						|  | return half3(textureLod(_HistoryTex, uv, 0).rgb); | 
					
						
						|  | } | 
					
						
						|  |  | 
					
						
						|  | half3 LoadHistoryCatmull(float2 uv) | 
					
						
						|  | { | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | float2 scaledUV = uv * _OutputDims; | 
					
						
						|  | float2 baseFloor = floor(scaledUV - 0.5) + 0.5; | 
					
						
						|  |  | 
					
						
						|  | half2 f  = half2(scaledUV - baseFloor); | 
					
						
						|  | half2 f2 = f * f; | 
					
						
						|  | half2 f3 = f2 * f; | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | half2 w0 = f2 - 0.5HF * (f3 + f); | 
					
						
						|  | half2 w1 = 1.5HF * f3 - 2.5HF * f2 + 1.0HF; | 
					
						
						|  | half2 w3 = 0.5HF * (f3 - f2); | 
					
						
						|  | half2 w2 = (1.0HF - w0) - w1 - w3; | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | half2 w12 = w1 + w2; | 
					
						
						|  | half wx0  = w0.x, wy0  = w0.y; | 
					
						
						|  | half wx1  = w12.x, wy1 = w12.y; | 
					
						
						|  | half wx2  = w3.x, wy2  = w3.y; | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | half wUp     = wx1 * wy0; | 
					
						
						|  | half wDown   = wx1 * wy2; | 
					
						
						|  | half wLeft   = wx0 * wy1; | 
					
						
						|  | half wRight  = wx2 * wy1; | 
					
						
						|  | half wCenter = wx1 * wy1; | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | half dx = w2.x / wx1; | 
					
						
						|  | half dy = w2.y / wy1; | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | half4 left   = half4(LoadHistory((baseFloor + float2(-1.0, dy))  * _InvOutputDims ), 1.HF); | 
					
						
						|  | half4 up     = half4(LoadHistory((baseFloor + float2(dx,  -1.0)) * _InvOutputDims ), 1.HF); | 
					
						
						|  | half4 center = half4(LoadHistory((baseFloor + float2(dx,  dy))   * _InvOutputDims ), 1.HF); | 
					
						
						|  | half4 right  = half4(LoadHistory((baseFloor + float2(2.0, dy))   * _InvOutputDims ), 1.HF); | 
					
						
						|  | half4 down   = half4(LoadHistory((baseFloor + float2(dx,  2.0))  * _InvOutputDims ), 1.HF); | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | half4 accum = up    * wUp     + | 
					
						
						|  | left  * wLeft   + | 
					
						
						|  | center* wCenter + | 
					
						
						|  | right * wRight  + | 
					
						
						|  | down  * wDown; | 
					
						
						|  | half3 cmin3 = min(up.rgb, | 
					
						
						|  | min(left.rgb, | 
					
						
						|  | min(center.rgb, | 
					
						
						|  | min(right.rgb, down.rgb)))); | 
					
						
						|  | half3 cmax3 = max(up.rgb, | 
					
						
						|  | max(left.rgb, | 
					
						
						|  | max(center.rgb, | 
					
						
						|  | max(right.rgb, down.rgb)))); | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | half3 color = accum.rgb * rcp(accum.w); | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | return any(lessThan(color, half3(0.HF))) | 
					
						
						|  | ? clamp(color, cmin3, cmax3) | 
					
						
						|  | : color; | 
					
						
						|  | } | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | int32_t2 LoadNearestDepthOffset(int32_t2 pixel) | 
					
						
						|  | { | 
					
						
						|  | half encNorm = half(texelFetch(_NearestDepthCoordTex, pixel, 0).r); | 
					
						
						|  | int32_t code = int32_t(encNorm * 255.0 + 0.5); | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | return DecodeNearestDepthCoord(code); | 
					
						
						|  | } | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | half3 LoadWarpedHistory(float2 uv, int32_t2 input_pixel, out half onscreen) | 
					
						
						|  | { | 
					
						
						|  |  | 
					
						
						|  | int32_t2 nearest_offset = LoadNearestDepthOffset(input_pixel); | 
					
						
						|  | half2 motion = LoadMotion(input_pixel + nearest_offset); | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | half2  motion_pix = motion * half2(_OutputDims); | 
					
						
						|  | motion *= half(dot(motion_pix, motion_pix) > _MotionThresh); | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | float2 reproj_uv = uv - float2(motion); | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | onscreen = half( | 
					
						
						|  | all(greaterThanEqual(reproj_uv, float2(0.0))) && | 
					
						
						|  | all(lessThan(reproj_uv, float2(1.0))) | 
					
						
						|  | ); | 
					
						
						|  |  | 
					
						
						|  | #ifdef HISTORY_CATMULL | 
					
						
						|  | half3 warped_history = LoadHistoryCatmull(reproj_uv); | 
					
						
						|  | #else | 
					
						
						|  | half3 warped_history = LoadHistory(reproj_uv); | 
					
						
						|  | #endif | 
					
						
						|  |  | 
					
						
						|  | return SafeColour(warped_history * _Exposure); | 
					
						
						|  | } | 
					
						
						|  |  | 
					
						
						|  | #if SCALE_MODE == SCALE_2_0X | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | half4 LoadKPNWeight(float2 uv, int16_t lut_idx) | 
					
						
						|  | { | 
					
						
						|  |  | 
					
						
						|  | half4 k0 = Dequantize(half4(textureLod(_K0Tensor, uv, 0)), _K0QuantParams); | 
					
						
						|  | half4 k1 = Dequantize(half4(textureLod(_K1Tensor, uv, 0)), _K1QuantParams); | 
					
						
						|  | half4 k2 = Dequantize(half4(textureLod(_K2Tensor, uv, 0)), _K2QuantParams); | 
					
						
						|  | half4 k3 = Dequantize(half4(textureLod(_K3Tensor, uv, 0)), _K3QuantParams); | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | half4 p0 = half4(k0.x, k2.x, k0.z, k2.z); | 
					
						
						|  | half4 p1 = half4(k1.x, k3.x, k1.z, k3.z); | 
					
						
						|  | half4 p2 = half4(k0.y, k2.y, k0.w, k2.w); | 
					
						
						|  | half4 p3 = half4(k1.y, k3.y, k1.w, k3.w); | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | return (lut_idx == 0) ? p0 : | 
					
						
						|  | (lut_idx == 1) ? p1 : | 
					
						
						|  | (lut_idx == 2) ? p2 : | 
					
						
						|  | p3; | 
					
						
						|  | } | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | half3 LoadAndFilterColour(int32_t2 output_pixel, float2 uv, out half4 col_to_accum) | 
					
						
						|  | { | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | float2 out_tex = float2(output_pixel) + 0.5f; | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | int16_t2 tiled_idx = (int16_t2(output_pixel) + _LutOffset) % int16_t2(_IndexModulo); | 
					
						
						|  | int16_t lut_idx = tiled_idx.y * int16_t(_IndexModulo) + tiled_idx.x; | 
					
						
						|  | KernelTile lut = kernelLUT[lut_idx]; | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | half4 kpn_weights = clamp(LoadKPNWeight(uv, lut_idx), half4(EPS), half4(1.HF)); | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | int16_t4 tap_x = clamp(int16_t4(floor((float4(out_tex.x) + float4(lut.dx)) * _InvScale.x)), int16_t4(0), int16_t4(_InputDims.x - 1)); | 
					
						
						|  | int16_t4 tap_y = clamp(int16_t4(floor((float4(out_tex.y) + float4(lut.dy)) * _InvScale.y)), int16_t4(0), int16_t4(_InputDims.y - 1)); | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | f16mat4x4 interm; | 
					
						
						|  | interm[0] = half4(SafeColour(half3(texelFetch(_ColourTex, int16_t2(tap_x[0], tap_y[0]), 0).rgb) * half3(_Exposure)), 1.HF); | 
					
						
						|  | interm[1] = half4(SafeColour(half3(texelFetch(_ColourTex, int16_t2(tap_x[1], tap_y[1]), 0).rgb) * half3(_Exposure)), 1.HF); | 
					
						
						|  | interm[2] = half4(SafeColour(half3(texelFetch(_ColourTex, int16_t2(tap_x[2], tap_y[2]), 0).rgb) * half3(_Exposure)), 1.HF); | 
					
						
						|  | interm[3] = half4(SafeColour(half3(texelFetch(_ColourTex, int16_t2(tap_x[3], tap_y[3]), 0).rgb) * half3(_Exposure)), 1.HF); | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | half match = half(lut.dx[CENTER_TAP] == 0 && lut.dy[CENTER_TAP] == 0); | 
					
						
						|  | col_to_accum = interm[CENTER_TAP] * match; | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | half4 out_colour = interm * kpn_weights; | 
					
						
						|  |  | 
					
						
						|  | return half3(out_colour.rgb * rcp(out_colour.w)); | 
					
						
						|  | } | 
					
						
						|  | #else | 
					
						
						|  | #error "Unsupported SCALE_MODE" | 
					
						
						|  | #endif // SCALE_MODE == SCALE_2_0X | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | void LoadTemporalParameters(float2 uv, out half theta, out half alpha) | 
					
						
						|  | { | 
					
						
						|  | half2 tp = Dequantize(half2(textureLod(_TemporalTensor, uv, 0).xy), _TemporalQuantParams); | 
					
						
						|  | theta = tp.x * _NotHistoryReset; | 
					
						
						|  | alpha = tp.y * 0.35HF + 0.05HF; | 
					
						
						|  | } | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | void WriteUpsampledColour(int32_t2 pixel, half3 colour) | 
					
						
						|  | { | 
					
						
						|  | half3 to_write = SafeColour(colour); | 
					
						
						|  |  | 
					
						
						|  | imageStore(_UpsampledColourOut, pixel, half4(to_write, 1.0)); | 
					
						
						|  | } | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | layout(local_size_x = 16, local_size_y = 16) in; | 
					
						
						|  | void main() | 
					
						
						|  | { | 
					
						
						|  | int32_t2 output_pixel = int32_t2(gl_GlobalInvocationID.xy); | 
					
						
						|  | if (any(greaterThanEqual(output_pixel, _OutputDims))) return; | 
					
						
						|  |  | 
					
						
						|  | float2 uv = (float2(output_pixel) + 0.5) * _InvOutputDims; | 
					
						
						|  | int32_t2 input_pixel = int32_t2(uv * _InputDims); | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | half  onscreen; | 
					
						
						|  | half3 history = LoadWarpedHistory(uv, input_pixel, onscreen); | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | half4 col_to_accum; | 
					
						
						|  | half3 colour = LoadAndFilterColour(output_pixel, uv, col_to_accum); | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | half theta, alpha; | 
					
						
						|  | LoadTemporalParameters(uv, theta, alpha); | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | half3 rectified = lerp(colour, history, theta * onscreen); | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | half3 accumulated = lerp(Tonemap(rectified), Tonemap(col_to_accum.rgb), alpha * col_to_accum.a); | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | half3 out_linear = InverseTonemap(accumulated) * _InvExposure; | 
					
						
						|  | WriteUpsampledColour(output_pixel, out_linear); | 
					
						
						|  | } | 
					
						
						|  |  |