#include "Packages/com.unity.render-pipelines.core/ShaderLibrary/Common.hlsl" #include "Packages/com.unity.render-pipelines.high-definition/Runtime/ShaderLibrary/ShaderVariables.hlsl" #include "Packages/com.unity.render-pipelines.high-definition/Runtime/PostProcessing/Shaders/PostProcessDefines.hlsl" #include "Packages/com.unity.render-pipelines.high-definition/Runtime/PostProcessing/Shaders/DepthOfFieldCommon.hlsl" #include "Packages/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/Raytracing/Shaders/RaytracingSampling.hlsl" #pragma only_renderers d3d11 playstation xboxone xboxseries vulkan metal switch #pragma kernel KMain #pragma multi_compile _ ENABLE_ALPHA CBUFFER_START(cb0) float4 _Params; float4 _Params2; CBUFFER_END #define NumRings _Params.x #define MaxCoCRadius _Params.y #define Anamorphism _Params.z #define MaxCoCMipLevel _Params2.x #define MaxColorMip _Params2.w // Input textures TEXTURE2D_X(_InputTexture); TEXTURE2D_X(_InputCoCTexture); TEXTURE2D_X(_TileList); // Outpute texture RW_TEXTURE2D_X(CTYPE, _OutputTexture); #define TILE_RES 8u // A set of Defines to fine-tune the algorithm #define ADAPTIVE_SAMPLING #define STRATIFY #define FORCE_POINT_SAMPLING #define RING_OCCLUSION #define PER_TILE_BG_FG_CLASSIFICATION #define PHYSICAL_WEIGHTS #define GROUP_RES 8u #define GROUP_SIZE (GROUP_RES * GROUP_RES) struct AccumData { float4 color; float alpha; float destAlpha; float CoC; }; struct DoFTile { float maxRadius; float layerBorder; int numSamples; }; struct SampleData { CTYPE color; float CoC; }; float GetCoCRadius(int2 positionSS) { float CoCRadius = LOAD_TEXTURE2D_X(_InputCoCTexture, positionSS).x; return CoCRadius; } CTYPE GetColorSample(float2 sampleTC, float lod) { #ifndef FORCE_POINT_SAMPLING return SAMPLE_TEXTURE2D_X_LOD(_InputTexture, s_trilinear_clamp_sampler, ClampAndScaleUVForBilinear(sampleTC * _ScreenSize.zw), lod).CTYPE_SWIZZLE; #else return LOAD_TEXTURE2D_X(_InputTexture, sampleTC).CTYPE_SWIZZLE; #endif } float GetSampleWeight(float cocRadius) { #ifdef PHYSICAL_WEIGHTS float pixelRadius = 0.7071f; float radius = max(pixelRadius, abs(cocRadius)); return PI * pixelRadius * pixelRadius * rcp(PI * radius * radius); #else return 1.0f; #endif } void LoadTileData(float2 sampleTC, SampleData centerSample, inout DoFTile tileData) { float4 cocRanges = LOAD_TEXTURE2D_X(_TileList, sampleTC / TILE_RES); // Note: for the far-field, we don't need to search further than than the central CoC. // If there is a larger CoC that overlaps the central pixel then it will have greater depth tileData.maxRadius = max(2 * abs(centerSample.CoC), -cocRanges.w); // Detect tiles than need more samples tileData.numSamples = NumRings; tileData.numSamples = tileData.maxRadius > 0 ? tileData.numSamples : 0; #ifdef ADAPTIVE_SAMPLING float minRadius = min(cocRanges.x, -cocRanges.z); tileData.numSamples = (minRadius / tileData.maxRadius < 0.1) ? tileData.numSamples * 4 : tileData.numSamples; #endif // By default split the fg and bg layers at 0 tileData.layerBorder = 0; #ifdef PER_TILE_BG_FG_CLASSIFICATION if (cocRanges.w != 0 && cocRanges.y == 0) { // If there is no far field, then compute a splitting threshold that puts fg and bg in the near field // We do it this way becayse we don't want any layers that span both the near and far field (CoC < 0 & CoC > 0) tileData.layerBorder = (/*cocRanges.z*/ 0 + cocRanges.w) / 2; } #endif } float2 PointInCircle(float angle) { return float2(cos(angle), sin(angle)) * float2 (1 - Anamorphism, 1 + Anamorphism); } void ResolveColorAndAlpha(inout float4 outColor, inout float outAlpha, CTYPE defaultValue) { outColor.xyz = outColor.w > 0 ? outColor.xyz / outColor.w : defaultValue.xyz; #ifdef ENABLE_ALPHA outAlpha = outColor.w > 0 ? outAlpha / outColor.w : defaultValue.w; #endif } void AccumulateSample(SampleData sampleData, float weight, inout AccumData accumData) { accumData.color += float4(sampleData.color.xyz * weight, weight); accumData.CoC += abs(sampleData.CoC) * weight; #ifdef ENABLE_ALPHA accumData.alpha += sampleData.color.w * weight; #endif } void AccumulateCenterSample(SampleData centerSample, inout AccumData accumData) { float centerAlpha = GetSampleWeight(centerSample.CoC); accumData.color.xyz = accumData.color.xyz * (1 - centerAlpha) + centerAlpha * centerSample.color.xyz; accumData.color.w = accumData.color.w * (1 - centerAlpha) + centerAlpha; #ifdef ENABLE_ALPHA accumData.alpha = accumData.alpha * (1 - centerAlpha) + centerAlpha * centerSample.color.w; #endif } void AccumulateSampleData(SampleData sampleData[2], SampleData centerSample, float sampleRadius, float borderRadius, float layerBorder, const bool isForeground, inout AccumData ringAccum, inout AccumData accumData) { UNITY_UNROLL for (int k = 0; k < 2; k++) { // saturate allows a small overlap between the layers, this helps conceal any continuity artifacts due to differences in sorting float w = saturate(sampleData[k].CoC - layerBorder); float layerWeight = isForeground ? 1.0 - w : w; float CoC = abs(sampleData[k].CoC); float sampleWeight = GetSampleWeight(CoC); //float visibility = saturate(CoC - sampleRadius); float visibility = step(0.0, CoC - sampleRadius); // Check if the sample belongs to the current bucket float borderWeight = saturate(CoC - borderRadius); #ifndef RING_OCCLUSION borderWeight = 0; #endif float weight = layerWeight * visibility * sampleWeight; AccumulateSample(sampleData[k], borderWeight * weight, accumData); AccumulateSample(sampleData[k], (1.0 - borderWeight) * weight, ringAccum); #if 0 // Disabled for now due to artifacts // Mirroring improves the near blur, but since the background reconstruction is not perfect, we limit the radius it is applied const float mirrorLimit = 2; const float radius = sampleRadius - CoC; if (ringData.isForeground && visibility == 0 && radius < mirrorLimit) { int pairIndex = k == 0 ? 1 : 0; float mirrorWeight = 1 ; //AccumulateSample(sampleData[pairIndex], mirrorWeight * sampleWeight, ringAccum); } #endif } } void AccumulateRingData(float numSamples, const bool isNearField, AccumData ringData, inout AccumData accumData) { if (ringData.color.w == 0) { // nothing to accumulate return; } float ringAvgCoC = ringData.color.w > 0 ? ringData.CoC * rcp(ringData.color.w) : 0; float accumAvgCoC = accumData.color.w > 0 ? accumData.CoC * rcp(accumData.color.w) : 0; float ringOcclusion = saturate(accumAvgCoC - ringAvgCoC); //float ringOpacity = 1.0 - saturate(ringData.coverage * rcp(numSamples)); float normCoC = ringData.CoC * rcp(ringData.color.w); float ringOpacity = saturate(ringData.color.w * rcp(GetSampleWeight(normCoC)) * rcp(numSamples)); // Near-field is the region where CoC > 0. In this case sorting is reversed. if (isNearField) { const float occlusionWeight = 0.5; float accumOcclusion = occlusionWeight * (1 - saturate(ringAvgCoC - accumAvgCoC)); // front-to-back blending float alpha = 1.0; #ifdef RING_OCCLUSION alpha = accumData.destAlpha; #endif accumData.color += alpha * ringData.color; accumData.alpha += alpha * ringData.alpha; accumData.CoC += alpha * ringData.CoC; accumData.destAlpha *= saturate(1 - 2 * ringOpacity * accumOcclusion); } else { // back-to-front blending float alpha = 0.0; #ifdef RING_OCCLUSION alpha = (accumData.color.w > 0.0) ? ringOpacity * ringOcclusion : 1.0; #endif accumData.color = accumData.color * (1.0 - alpha) + ringData.color; accumData.alpha = accumData.alpha * (1.0 - alpha) + ringData.alpha; accumData.CoC = accumData.CoC * (1.0 - alpha) + ringData.CoC; } } void DoFGatherRings(PositionInputs posInputs, DoFTile tileData, SampleData centerSample, out float4 color, out float alpha) { AccumData bgAccumData, fgAccumData; ZERO_INITIALIZE(AccumData, bgAccumData); ZERO_INITIALIZE(AccumData, fgAccumData); // Layers in the near field are using front-to-back accumulation (so start with a dest alpha of 1, ignored if in the far field) fgAccumData.destAlpha = 1; bgAccumData.destAlpha = 1; const bool isBgLayerInNearField = tileData.layerBorder < 0; float dR = rcp((float)tileData.numSamples); int blueNoiseOffset = _TaaFrameInfo.w != 0.0 ? _TaaFrameInfo.z : 0; int halfSamples = tileData.numSamples >> 1; float dAng = PI * rcp(halfSamples); // Select the appropriate mip to sample based on the amount of samples. Lower sample counts will be faster at the cost of "leaking" float lod = min(MaxColorMip, log2(2 * PI * tileData.maxRadius * rcp(tileData.numSamples))); // Gather the DoF samples for (int ring = tileData.numSamples - 1; ring >= 0; ring--) { AccumData bgRingData, fgRingData; ZERO_INITIALIZE(AccumData, bgRingData); ZERO_INITIALIZE(AccumData, fgRingData); for (int i = 0; i < halfSamples; i++) { float r1 = GetBNDSequenceSample(posInputs.positionSS.xy, ring * tileData.numSamples + i + blueNoiseOffset, 0); float r2 = GetBNDSequenceSample(posInputs.positionSS.xy, ring * tileData.numSamples + i + blueNoiseOffset, 1); #ifdef STRATIFY float sampleRadius = sqrt((ring + r1) * dR) * tileData.maxRadius; #else float sampleRadius = sqrt(r2) * tileData.maxRadius; #endif float borderRadius = sqrt((ring + 1.5) * dR) * tileData.maxRadius; SampleData sampleData[2]; const float offset[2] = {0, PI}; UNITY_UNROLL for (int j = 0; j < 2; j++) { #ifdef STRATIFY float2 sampleTC = posInputs.positionSS + sampleRadius * PointInCircle(offset[j] + (i + r2) * dAng); #else float2 sampleTC = posInputs.positionSS + sampleRadius * PointInCircle(offset[j] + r2 * PI); #endif sampleData[j].color = GetColorSample(sampleTC, lod); sampleData[j].CoC = GetCoCRadius(sampleTC); } const float borderFudgingFactor = 9; float layerBorder = min(0, tileData.layerBorder - borderFudgingFactor * r2); AccumulateSampleData(sampleData, centerSample, sampleRadius, borderRadius, layerBorder, false, bgRingData, bgAccumData); AccumulateSampleData(sampleData, centerSample, sampleRadius, borderRadius, layerBorder, true, fgRingData, fgAccumData); } AccumulateRingData(tileData.numSamples, isBgLayerInNearField, bgRingData, bgAccumData); AccumulateRingData(tileData.numSamples, true, fgRingData, fgAccumData); } ResolveColorAndAlpha(bgAccumData.color, bgAccumData.alpha, centerSample.color); ResolveColorAndAlpha(fgAccumData.color, fgAccumData.alpha, centerSample.color); // Accumulate center sample in bg AccumulateCenterSample(centerSample, bgAccumData); // Compute the fg alpha. Needs to be normalized based on search radius. float normCoC = fgAccumData.CoC / fgAccumData.color.w; float scaleFactor = (normCoC * normCoC) / (tileData.maxRadius * tileData.maxRadius); float correctSamples = scaleFactor * (tileData.numSamples * tileData.numSamples); // Now blend the bg and fg layes float fgAlpha = saturate(2 * fgAccumData.color.w * rcp(GetSampleWeight(normCoC)) * rcp(correctSamples)); color = bgAccumData.color * (1.0 - fgAlpha) + fgAlpha * fgAccumData.color; alpha = bgAccumData.alpha * (1.0 - fgAlpha) + fgAlpha * fgAccumData.alpha; } void DebugTiles(DoFTile tileData, inout float4 outColor) { // Debug the tile type const bool isBgLayerInNearField = tileData.layerBorder < 0; if (isBgLayerInNearField) { outColor.xyz *= float3(0.5, 0.5, 1.0); } } [numthreads(GROUP_RES, GROUP_RES, 1)] void KMain(uint3 dispatchThreadId : SV_DispatchThreadID) { UNITY_XR_ASSIGN_VIEW_INDEX(dispatchThreadId.z); PositionInputs posInputs = GetPositionInput(float2(dispatchThreadId.xy), _ScreenSize.zw, uint2(GROUP_RES, GROUP_RES)); SampleData centerSample; centerSample.color = GetColorSample(posInputs.positionSS, 0); centerSample.CoC = GetCoCRadius(posInputs.positionSS); DoFTile tileData; LoadTileData(posInputs.positionSS, centerSample, tileData); float4 outColor; float outAlpha; #ifdef FAST_APPROXIMAION DoFGatherFast(posInputs, tileData, centerSample, outColor, outAlpha); #else DoFGatherRings(posInputs, tileData, centerSample, outColor, outAlpha); #endif //DebugTiles(tileData, outColor); #ifdef ENABLE_ALPHA // Preserve the original value of the pixels with zero alpha. // The second line with the lerp+smoothstep combination avoids a hard transition in edge cases //outColor.xyz = outAlpha > 0 ? outColor.xyz : originalColor.xyz; outColor.xyz = lerp(centerSample.color.xyz, outColor.xyz, smoothstep(0, 0.01, outAlpha)); outColor.w = outAlpha; #endif _OutputTexture[COORD_TEXTURE2D_X(posInputs.positionSS)] = (CTYPE) outColor; }