#include "Packages/com.unity.render-pipelines.core/ShaderLibrary/Common.hlsl" #include "Packages/com.unity.render-pipelines.high-definition/Runtime/Material/Builtin/BuiltinData.hlsl" #include "Packages/com.unity.render-pipelines.high-definition/Runtime/ShaderLibrary/ShaderVariables.hlsl" #pragma only_renderers d3d11 playstation xboxone xboxseries vulkan metal switch #pragma multi_compile _ ENABLE_MAX_BLENDING #pragma kernel KMain TEXTURE2D_X(_InputCoCTexture); TEXTURE2D_X(_InputHistoryCoCTexture); RW_TEXTURE2D_X(float, _OutputCoCTexture); #ifdef ENABLE_MAX_BLENDING SAMPLER(sampler_PointClamp); #define SAMPLER_STATE sampler_PointClamp #else SAMPLER(sampler_LinearClamp); #define SAMPLER_STATE sampler_LinearClamp #endif CBUFFER_START(cb0) float4 _Params; CBUFFER_END #define MotionBlending _Params.x #define ScreenToTargetScaleHistory _Params.yz #define GROUP_SIZE 8 float MaxOp(float a, float b) { return abs(a) > abs(b) ? a : b; } [numthreads(GROUP_SIZE, GROUP_SIZE, 1)] void KMain(uint3 dispatchThreadId : SV_DispatchThreadID) { UNITY_XR_ASSIGN_VIEW_INDEX(dispatchThreadId.z); PositionInputs posInputs = GetPositionInput(float2(dispatchThreadId.xy), _ScreenSize.zw, uint2(GROUP_SIZE, GROUP_SIZE)); float2 uv = posInputs.positionNDC; #if 0 // Using Gather4 here doesn't show any performance gain (profiled on Nvidia) compared to doing // four loads so it's disabled for the sake of maximum compatibility float4 cocTL = GATHER_TEXTURE2D_X(_InputCoCTexture, sampler_LinearClamp, uv * _RTHandleScale.xy - _ScreenSize.zw * 0.5); // Top-Left float4 cocBR = GATHER_TEXTURE2D_X(_InputCoCTexture, sampler_LinearClamp, uv * _RTHandleScale.xy + _ScreenSize.zw * 0.5); // Bottom-Right float coc1 = cocTL.x; float coc2 = cocTL.z; float coc3 = cocBR.x; float coc4 = cocBR.z; #else float coc1 = LOAD_TEXTURE2D_X(_InputCoCTexture, posInputs.positionSS - uint2(1u, 0u)).x; // Left float coc2 = LOAD_TEXTURE2D_X(_InputCoCTexture, posInputs.positionSS - uint2(0u, 1u)).x; // Top float coc3 = LOAD_TEXTURE2D_X(_InputCoCTexture, posInputs.positionSS + uint2(0u, 1u)).x; // Bottom float coc4 = LOAD_TEXTURE2D_X(_InputCoCTexture, posInputs.positionSS + uint2(1u, 0u)).x; // Right #endif // Dejittered center sample float coc0 = SAMPLE_TEXTURE2D_X_LOD(_InputCoCTexture, SAMPLER_STATE, ClampAndScaleUVForBilinear(uv - _TaaJitterStrength.zw), 0.0).x; // CoC dilation: determine the closest point in the four neighbors float3 closest = float3(0.0, 0.0, coc0); closest = coc1 < closest.z ? float3(-1.0, 0.0, coc1) : closest; closest = coc2 < closest.z ? float3( 0.0, -1.0, coc2) : closest; closest = coc3 < closest.z ? float3( 0.0, 1.0, coc3) : closest; closest = coc4 < closest.z ? float3( 1.0, 0.0, coc4) : closest; // Sample the history buffer with the motion vector at the closest point float2 motionVector; DecodeMotionVector(LOAD_TEXTURE2D_X(_CameraMotionVectorsTexture, max(int2(posInputs.positionSS) + int2(closest.xy), 0)), motionVector); float cocHis = SAMPLE_TEXTURE2D_X_LOD(_InputHistoryCoCTexture, SAMPLER_STATE, (uv - motionVector) * ScreenToTargetScaleHistory.xy, 0.0).x; // Neighborhood clamping #ifdef ENABLE_MAX_BLENDING float cocMin = closest.z; #else float cocMin = min(closest.z, 1.0f); #endif float cocMax = Max3(Max3(coc0, coc1, coc2), coc3, coc4); cocHis = clamp(cocHis, cocMin, cocMax); float outputCoC = lerp(coc0, cocHis, MotionBlending); #ifdef ENABLE_MAX_BLENDING if (coc0 * cocHis < 0.0f) { outputCoC = MaxOp(cocHis, coc0); } #endif #if defined(SHADER_API_XBOXONE) // In some cases, it looks like the compiler reorganizes code so that we end up at the end with a NaN in the history (disabling compiler optimizations get rid of the NaN). // As a failsafe, we capture the situation here and reject history when that is the case. if (IsNaN(cocHis) || IsInf(cocHis)) outputCoC = coc0; #endif // Blend with the history _OutputCoCTexture[COORD_TEXTURE2D_X(posInputs.positionSS)] = outputCoC; }