// Parts of this shader are largely based on MiniEngine shader, with some modifications. The copyright of said shader is as follow: // // Copyright (c) Microsoft. All rights reserved. // This code is licensed under the MIT License (MIT). // THIS CODE IS PROVIDED *AS IS* WITHOUT WARRANTY OF // ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING ANY // IMPLIED WARRANTIES OF FITNESS FOR A PARTICULAR // PURPOSE, MERCHANTABILITY, OR NON-INFRINGEMENT. // // Developed by Minigraph // // Author: James Stanard // #include "Packages/com.unity.render-pipelines.high-definition/Runtime/Lighting/ScreenSpaceLighting/GTAOCommon.hlsl" #pragma kernel BlurUpsample BLUR_AND_UPSAMPLE #pragma kernel BilateralUpsampling UPSAMPLE_KERNEL=BilateralUpsampling UPSAMPLE_ONLY #pragma kernel BoxUpsampling UPSAMPLE_KERNEL=BoxUpsampling UPSAMPLE_ONLY BOX // Currently not used. #pragma kernel Blur BLUR_KERNEL_NAME=Blur BLUR #pragma kernel Blur_FullRes BLUR_KERNEL_NAME=Blur_FullRes BLUR FULL_RES TEXTURE2D_X(_AOPackedData); RW_TEXTURE2D_X(float, _OcclusionTexture); RW_TEXTURE2D_X(float, _AOPackedBlurred); groupshared float DepthCache[256]; groupshared float AOCache1[256]; groupshared float AOCache2[256]; float SmartBlur(float a, float b, float c, float d, float e, bool Left, bool Middle, bool Right) { b = Left | Middle ? b : c; a = Left ? a : b; d = Right | Middle ? d : c; e = Right ? e : d; return ((a + e) / 2.0 + b + c + d) / 4.0; } bool CompareDeltas(float d1, float d2, float l1, float l2) { float temp = d1 * d2 + _StepSize; return temp * temp > l1 * l2 * _BlurTolerance; } void PrefetchData(uint index, float2 uv) { float4 UnpackedAOs, UnpackedDepths; GatherAOData(_AOPackedData, uv, UnpackedAOs, UnpackedDepths); AOCache1[index] = UnpackedAOs.w; AOCache1[index + 1] = UnpackedAOs.z; AOCache1[index + 16] = UnpackedAOs.x; AOCache1[index + 17] = UnpackedAOs.y; float4 ID = 1.0 / UnpackedDepths; DepthCache[index] = ID.w; DepthCache[index + 1] = ID.z; DepthCache[index + 16] = ID.x; DepthCache[index + 17] = ID.y; } void BlurHorizontally(uint leftMostIndex) { float a0 = AOCache1[leftMostIndex]; float a1 = AOCache1[leftMostIndex + 1]; float a2 = AOCache1[leftMostIndex + 2]; float a3 = AOCache1[leftMostIndex + 3]; float a4 = AOCache1[leftMostIndex + 4]; float a5 = AOCache1[leftMostIndex + 5]; float a6 = AOCache1[leftMostIndex + 6]; float d0 = DepthCache[leftMostIndex]; float d1 = DepthCache[leftMostIndex + 1]; float d2 = DepthCache[leftMostIndex + 2]; float d3 = DepthCache[leftMostIndex + 3]; float d4 = DepthCache[leftMostIndex + 4]; float d5 = DepthCache[leftMostIndex + 5]; float d6 = DepthCache[leftMostIndex + 6]; float d01 = d1 - d0; float d12 = d2 - d1; float d23 = d3 - d2; float d34 = d4 - d3; float d45 = d5 - d4; float d56 = d6 - d5; float l01 = d01 * d01 + _StepSize; float l12 = d12 * d12 + _StepSize; float l23 = d23 * d23 + _StepSize; float l34 = d34 * d34 + _StepSize; float l45 = d45 * d45 + _StepSize; float l56 = d56 * d56 + _StepSize; bool c02 = CompareDeltas(d01, d12, l01, l12); bool c13 = CompareDeltas(d12, d23, l12, l23); bool c24 = CompareDeltas(d23, d34, l23, l34); bool c35 = CompareDeltas(d34, d45, l34, l45); bool c46 = CompareDeltas(d45, d56, l45, l56); AOCache2[leftMostIndex] = SmartBlur(a0, a1, a2, a3, a4, c02, c13, c24); AOCache2[leftMostIndex + 1] = SmartBlur(a1, a2, a3, a4, a5, c13, c24, c35); AOCache2[leftMostIndex + 2] = SmartBlur(a2, a3, a4, a5, a6, c24, c35, c46); } void BlurVertically(uint topMostIndex) { float a0 = AOCache2[topMostIndex]; float a1 = AOCache2[topMostIndex + 16]; float a2 = AOCache2[topMostIndex + 32]; float a3 = AOCache2[topMostIndex + 48]; float a4 = AOCache2[topMostIndex + 64]; float a5 = AOCache2[topMostIndex + 80]; float d0 = DepthCache[topMostIndex + 2]; float d1 = DepthCache[topMostIndex + 18]; float d2 = DepthCache[topMostIndex + 34]; float d3 = DepthCache[topMostIndex + 50]; float d4 = DepthCache[topMostIndex + 66]; float d5 = DepthCache[topMostIndex + 82]; float d01 = d1 - d0; float d12 = d2 - d1; float d23 = d3 - d2; float d34 = d4 - d3; float d45 = d5 - d4; float l01 = d01 * d01 + _StepSize; float l12 = d12 * d12 + _StepSize; float l23 = d23 * d23 + _StepSize; float l34 = d34 * d34 + _StepSize; float l45 = d45 * d45 + _StepSize; bool c02 = CompareDeltas(d01, d12, l01, l12); bool c13 = CompareDeltas(d12, d23, l12, l23); bool c24 = CompareDeltas(d23, d34, l23, l34); bool c35 = CompareDeltas(d34, d45, l34, l45); float aoResult1 = SmartBlur(a0, a1, a2, a3, a4, c02, c13, c24); float aoResult2 = SmartBlur(a1, a2, a3, a4, a5, c13, c24, c35); AOCache1[topMostIndex] = aoResult1; AOCache1[topMostIndex + 16] = aoResult2; } // We essentially want 5 weights: 4 for each low-res pixel and 1 to blend in when none of the 4 really // match. The filter strength is 1 / DeltaZTolerance. So a tolerance of 0.01 would yield a strength of 100. // Note that a perfect match of low to high depths would yield a weight of 10^6, completely superceding any // noise filtering. The noise filter is intended to soften the effects of shimmering when the high-res depth // buffer has a lot of small holes in it causing the low-res depth buffer to inaccurately represent it. float BilateralUpsample(float HiDepth, float4 LowDepths, float4 LowAO) { float4 weights = float4(9, 3, 1, 3) / (abs(HiDepth - LowDepths) + _UpsampleTolerance); float TotalWeight = dot(weights, 1) + _NoiseFilterStrength; float WeightedSum = dot(LowAO, weights) + _NoiseFilterStrength; return WeightedSum / TotalWeight; } #if BLUR_AND_UPSAMPLE [numthreads(8,8,1)] void BlurUpsample(uint3 Gid : SV_GroupID, uint GI : SV_GroupIndex, uint3 GTid : SV_GroupThreadID, uint3 DTid : SV_DispatchThreadID) { UNITY_XR_ASSIGN_VIEW_INDEX(DTid.z); float2 UV = ClampAndScaleUVForBilinear((int2(DTid.xy + GTid.xy - 2)) * _AOBufferSize.zw, _AOBufferSize.zw); PrefetchData(GTid.x << 1 | GTid.y << 5, UV); GroupMemoryBarrierWithGroupSync(); if (GI < 39) BlurHorizontally((GI / 3) * 16 + (GI % 3) * 3); GroupMemoryBarrierWithGroupSync(); if (GI < 45) BlurVertically((GI / 9) * 32 + GI % 9); GroupMemoryBarrierWithGroupSync(); uint Idx0 = GTid.x + GTid.y * 16; float4 LoSSAOs = float4(AOCache1[Idx0 + 16], AOCache1[Idx0 + 17], AOCache1[Idx0 + 1], AOCache1[Idx0]); float2 UV0 = ClampAndScaleUVForBilinear(DTid.xy * _AOBufferSize.zw, _AOBufferSize.zw); float2 UV1 = ClampAndScaleUVForPoint(DTid.xy * 2 * _ScreenSize.zw); UV1.y *= 2.0f / 3.0f; float4 highDepth = GATHER_TEXTURE2D_X(_CameraDepthTexture, s_point_clamp_sampler, UV1); float4 LowDepth = rcp(float4(DepthCache[Idx0 + 16], DepthCache[Idx0 + 17], DepthCache[Idx0 + 1], DepthCache[Idx0])); int2 OutST = DTid.xy << 1; _OcclusionTexture[COORD_TEXTURE2D_X(OutST + int2(0, -1))] = OutputFinalAO(BilateralUpsample(highDepth.z, LowDepth.zwxy, LoSSAOs.zwxy)); _OcclusionTexture[COORD_TEXTURE2D_X(OutST)] = OutputFinalAO(BilateralUpsample(highDepth.y, LowDepth.yzwx, LoSSAOs.yzwx)); _OcclusionTexture[COORD_TEXTURE2D_X(OutST + int2(-1, -1))] = OutputFinalAO(BilateralUpsample(highDepth.w, LowDepth.wxyz, LoSSAOs.wxyz)); _OcclusionTexture[COORD_TEXTURE2D_X(OutST + int2(-1, 0))] = OutputFinalAO(BilateralUpsample(highDepth.x, LowDepth.xyzw, LoSSAOs.xyzw)); } #elif UPSAMPLE_ONLY [numthreads(8, 8, 1)] void UPSAMPLE_KERNEL(uint3 DTid : SV_DispatchThreadID) { UNITY_XR_ASSIGN_VIEW_INDEX(DTid.z); float2 UV0 = ClampAndScaleUVForBilinear(DTid.xy * _AOBufferSize.zw, _AOBufferSize.zw); float2 UV1 = ClampAndScaleUVForPoint(DTid.xy * 2 * _ScreenSize.zw); UV1.y *= 2.0f / 3.0f; float4 UnpackedAOs, UnpackedDepths; GatherAOData(_AOPackedData, UV0, UnpackedAOs, UnpackedDepths); int2 OutST = DTid.xy << 1; #ifdef BOX _OcclusionTexture[COORD_TEXTURE2D_X(OutST + int2(0, -1))] = OutputFinalAO(dot(0.25, UnpackedAOs)); _OcclusionTexture[COORD_TEXTURE2D_X(OutST)] = OutputFinalAO(dot(0.25, UnpackedAOs)); _OcclusionTexture[COORD_TEXTURE2D_X(OutST + int2(-1, -1))] = OutputFinalAO(dot(0.25, UnpackedAOs)); _OcclusionTexture[COORD_TEXTURE2D_X(OutST + int2(-1, 0))] = OutputFinalAO(dot(0.25, UnpackedAOs)); #else float4 highDepth = GATHER_TEXTURE2D_X(_CameraDepthTexture, s_point_clamp_sampler, UV1); _OcclusionTexture[COORD_TEXTURE2D_X(OutST + int2(0, -1))] = OutputFinalAO(BilateralUpsample(highDepth.z, UnpackedDepths.zwxy, UnpackedAOs.zwxy)); _OcclusionTexture[COORD_TEXTURE2D_X(OutST)] = OutputFinalAO(BilateralUpsample(highDepth.y, UnpackedDepths.yzwx, UnpackedAOs.yzwx)); _OcclusionTexture[COORD_TEXTURE2D_X(OutST + int2(-1, -1))] = OutputFinalAO(BilateralUpsample(highDepth.w, UnpackedDepths.wxyz, UnpackedAOs.wxyz)); _OcclusionTexture[COORD_TEXTURE2D_X(OutST + int2(-1, 0))] = OutputFinalAO(BilateralUpsample(highDepth.x, UnpackedDepths.xyzw, UnpackedAOs.xyzw)); #endif } #else // Just Blur case [numthreads(8, 8, 1)] void BLUR_KERNEL_NAME(uint3 Gid : SV_GroupID, uint GI : SV_GroupIndex, uint3 GTid : SV_GroupThreadID, uint3 dispatchID : SV_DispatchThreadID) { UNITY_XR_ASSIGN_VIEW_INDEX(dispatchID.z); float2 UV = ClampAndScaleUVForBilinear(int2(dispatchID.xy + GTid.xy - 2) * _AOBufferSize.zw, _AOBufferSize.zw); PrefetchData(GTid.x << 1 | GTid.y << 5, UV); GroupMemoryBarrierWithGroupSync(); if (GI < 39) BlurHorizontally((GI / 3) * 16 + (GI % 3) * 3); GroupMemoryBarrierWithGroupSync(); if (GI < 45) BlurVertically((GI / 9) * 32 + GI % 9); GroupMemoryBarrierWithGroupSync(); uint Idx0 = GTid.x + GTid.y * 16; float4 LoSSAOs = float4(AOCache1[Idx0 + 16], AOCache1[Idx0 + 17], AOCache1[Idx0 + 1], AOCache1[Idx0]); float LoSSAO = dot(0.25, LoSSAOs); #ifdef FULL_RES _OcclusionTexture[COORD_TEXTURE2D_X(dispatchID.xy)] = OutputFinalAO(LoSSAO); #else _AOPackedBlurred[COORD_TEXTURE2D_X(dispatchID.xy)] = PackAOOutput(LoSSAO, 1.0f / DepthCache[Idx0 + 16]); #endif } #endif