2021-09-09 20:42:29 -04:00

103 lines
4.0 KiB
Plaintext

#include "Packages/com.unity.render-pipelines.core/ShaderLibrary/Common.hlsl"
#include "Packages/com.unity.render-pipelines.high-definition/Runtime/Material/Builtin/BuiltinData.hlsl"
#include "Packages/com.unity.render-pipelines.high-definition/Runtime/ShaderLibrary/ShaderVariables.hlsl"
#pragma only_renderers d3d11 playstation xboxone xboxseries vulkan metal switch
#pragma multi_compile _ ENABLE_MAX_BLENDING
#pragma kernel KMain
TEXTURE2D_X(_InputCoCTexture);
TEXTURE2D_X(_InputHistoryCoCTexture);
RW_TEXTURE2D_X(float, _OutputCoCTexture);
#ifdef ENABLE_MAX_BLENDING
SAMPLER(sampler_PointClamp);
#define SAMPLER_STATE sampler_PointClamp
#else
SAMPLER(sampler_LinearClamp);
#define SAMPLER_STATE sampler_LinearClamp
#endif
CBUFFER_START(cb0)
float4 _Params;
CBUFFER_END
#define MotionBlending _Params.x
#define ScreenToTargetScaleHistory _Params.yz
#define GROUP_SIZE 8
float MaxOp(float a, float b)
{
return abs(a) > abs(b) ? a : b;
}
[numthreads(GROUP_SIZE, GROUP_SIZE, 1)]
void KMain(uint3 dispatchThreadId : SV_DispatchThreadID)
{
UNITY_XR_ASSIGN_VIEW_INDEX(dispatchThreadId.z);
PositionInputs posInputs = GetPositionInput(float2(dispatchThreadId.xy), _ScreenSize.zw, uint2(GROUP_SIZE, GROUP_SIZE));
float2 uv = posInputs.positionNDC;
#if 0
// Using Gather4 here doesn't show any performance gain (profiled on Nvidia) compared to doing
// four loads so it's disabled for the sake of maximum compatibility
float4 cocTL = GATHER_TEXTURE2D_X(_InputCoCTexture, sampler_LinearClamp, uv * _RTHandleScale.xy - _ScreenSize.zw * 0.5); // Top-Left
float4 cocBR = GATHER_TEXTURE2D_X(_InputCoCTexture, sampler_LinearClamp, uv * _RTHandleScale.xy + _ScreenSize.zw * 0.5); // Bottom-Right
float coc1 = cocTL.x;
float coc2 = cocTL.z;
float coc3 = cocBR.x;
float coc4 = cocBR.z;
#else
float coc1 = LOAD_TEXTURE2D_X(_InputCoCTexture, posInputs.positionSS - uint2(1u, 0u)).x; // Left
float coc2 = LOAD_TEXTURE2D_X(_InputCoCTexture, posInputs.positionSS - uint2(0u, 1u)).x; // Top
float coc3 = LOAD_TEXTURE2D_X(_InputCoCTexture, posInputs.positionSS + uint2(0u, 1u)).x; // Bottom
float coc4 = LOAD_TEXTURE2D_X(_InputCoCTexture, posInputs.positionSS + uint2(1u, 0u)).x; // Right
#endif
// Dejittered center sample
float coc0 = SAMPLE_TEXTURE2D_X_LOD(_InputCoCTexture, SAMPLER_STATE, ClampAndScaleUVForBilinear(uv - _TaaJitterStrength.zw), 0.0).x;
// CoC dilation: determine the closest point in the four neighbors
float3 closest = float3(0.0, 0.0, coc0);
closest = coc1 < closest.z ? float3(-1.0, 0.0, coc1) : closest;
closest = coc2 < closest.z ? float3( 0.0, -1.0, coc2) : closest;
closest = coc3 < closest.z ? float3( 0.0, 1.0, coc3) : closest;
closest = coc4 < closest.z ? float3( 1.0, 0.0, coc4) : closest;
// Sample the history buffer with the motion vector at the closest point
float2 motionVector;
DecodeMotionVector(LOAD_TEXTURE2D_X(_CameraMotionVectorsTexture, max(int2(posInputs.positionSS) + int2(closest.xy), 0)), motionVector);
float cocHis = SAMPLE_TEXTURE2D_X_LOD(_InputHistoryCoCTexture, SAMPLER_STATE, (uv - motionVector) * ScreenToTargetScaleHistory.xy, 0.0).x;
// Neighborhood clamping
#ifdef ENABLE_MAX_BLENDING
float cocMin = closest.z;
#else
float cocMin = min(closest.z, 1.0f);
#endif
float cocMax = Max3(Max3(coc0, coc1, coc2), coc3, coc4);
cocHis = clamp(cocHis, cocMin, cocMax);
float outputCoC = lerp(coc0, cocHis, MotionBlending);
#ifdef ENABLE_MAX_BLENDING
if (coc0 * cocHis < 0.0f)
{
outputCoC = MaxOp(cocHis, coc0);
}
#endif
#if defined(SHADER_API_XBOXONE)
// In some cases, it looks like the compiler reorganizes code so that we end up at the end with a NaN in the history (disabling compiler optimizations get rid of the NaN).
// As a failsafe, we capture the situation here and reject history when that is the case.
if (IsNaN(cocHis) || IsInf(cocHis))
outputCoC = coc0;
#endif
// Blend with the history
_OutputCoCTexture[COORD_TEXTURE2D_X(posInputs.positionSS)] = outputCoC;
}