261 lines
10 KiB
Plaintext
261 lines
10 KiB
Plaintext
// Parts of this shader are largely based on MiniEngine shader, with some modifications. The copyright of said shader is as follow:
|
|
//
|
|
// Copyright (c) Microsoft. All rights reserved.
|
|
// This code is licensed under the MIT License (MIT).
|
|
// THIS CODE IS PROVIDED *AS IS* WITHOUT WARRANTY OF
|
|
// ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING ANY
|
|
// IMPLIED WARRANTIES OF FITNESS FOR A PARTICULAR
|
|
// PURPOSE, MERCHANTABILITY, OR NON-INFRINGEMENT.
|
|
//
|
|
// Developed by Minigraph
|
|
//
|
|
// Author: James Stanard
|
|
//
|
|
|
|
#include "Packages/com.unity.render-pipelines.high-definition/Runtime/Lighting/ScreenSpaceLighting/GTAOCommon.hlsl"
|
|
|
|
#pragma kernel BlurUpsample BLUR_AND_UPSAMPLE
|
|
#pragma kernel BilateralUpsampling UPSAMPLE_KERNEL=BilateralUpsampling UPSAMPLE_ONLY
|
|
#pragma kernel BoxUpsampling UPSAMPLE_KERNEL=BoxUpsampling UPSAMPLE_ONLY BOX
|
|
// Currently not used.
|
|
#pragma kernel Blur BLUR_KERNEL_NAME=Blur BLUR
|
|
#pragma kernel Blur_FullRes BLUR_KERNEL_NAME=Blur_FullRes BLUR FULL_RES
|
|
|
|
TEXTURE2D_X(_AOPackedData);
|
|
RW_TEXTURE2D_X(float, _OcclusionTexture);
|
|
RW_TEXTURE2D_X(float, _AOPackedBlurred);
|
|
|
|
|
|
groupshared float DepthCache[256];
|
|
groupshared float AOCache1[256];
|
|
groupshared float AOCache2[256];
|
|
|
|
|
|
float SmartBlur(float a, float b, float c, float d, float e, bool Left, bool Middle, bool Right)
|
|
{
|
|
b = Left | Middle ? b : c;
|
|
a = Left ? a : b;
|
|
d = Right | Middle ? d : c;
|
|
e = Right ? e : d;
|
|
return ((a + e) / 2.0 + b + c + d) / 4.0;
|
|
}
|
|
|
|
bool CompareDeltas(float d1, float d2, float l1, float l2)
|
|
{
|
|
float temp = d1 * d2 + _StepSize;
|
|
return temp * temp > l1 * l2 * _BlurTolerance;
|
|
}
|
|
|
|
void PrefetchData(uint index, float2 uv)
|
|
{
|
|
float4 UnpackedAOs, UnpackedDepths;
|
|
GatherAOData(_AOPackedData, uv, UnpackedAOs, UnpackedDepths);
|
|
|
|
AOCache1[index] = UnpackedAOs.w;
|
|
AOCache1[index + 1] = UnpackedAOs.z;
|
|
AOCache1[index + 16] = UnpackedAOs.x;
|
|
AOCache1[index + 17] = UnpackedAOs.y;
|
|
|
|
float4 ID = 1.0 / UnpackedDepths;
|
|
DepthCache[index] = ID.w;
|
|
DepthCache[index + 1] = ID.z;
|
|
DepthCache[index + 16] = ID.x;
|
|
DepthCache[index + 17] = ID.y;
|
|
}
|
|
|
|
void BlurHorizontally(uint leftMostIndex)
|
|
{
|
|
float a0 = AOCache1[leftMostIndex];
|
|
float a1 = AOCache1[leftMostIndex + 1];
|
|
float a2 = AOCache1[leftMostIndex + 2];
|
|
float a3 = AOCache1[leftMostIndex + 3];
|
|
float a4 = AOCache1[leftMostIndex + 4];
|
|
float a5 = AOCache1[leftMostIndex + 5];
|
|
float a6 = AOCache1[leftMostIndex + 6];
|
|
|
|
float d0 = DepthCache[leftMostIndex];
|
|
float d1 = DepthCache[leftMostIndex + 1];
|
|
float d2 = DepthCache[leftMostIndex + 2];
|
|
float d3 = DepthCache[leftMostIndex + 3];
|
|
float d4 = DepthCache[leftMostIndex + 4];
|
|
float d5 = DepthCache[leftMostIndex + 5];
|
|
float d6 = DepthCache[leftMostIndex + 6];
|
|
|
|
float d01 = d1 - d0;
|
|
float d12 = d2 - d1;
|
|
float d23 = d3 - d2;
|
|
float d34 = d4 - d3;
|
|
float d45 = d5 - d4;
|
|
float d56 = d6 - d5;
|
|
|
|
float l01 = d01 * d01 + _StepSize;
|
|
float l12 = d12 * d12 + _StepSize;
|
|
float l23 = d23 * d23 + _StepSize;
|
|
float l34 = d34 * d34 + _StepSize;
|
|
float l45 = d45 * d45 + _StepSize;
|
|
float l56 = d56 * d56 + _StepSize;
|
|
|
|
bool c02 = CompareDeltas(d01, d12, l01, l12);
|
|
bool c13 = CompareDeltas(d12, d23, l12, l23);
|
|
bool c24 = CompareDeltas(d23, d34, l23, l34);
|
|
bool c35 = CompareDeltas(d34, d45, l34, l45);
|
|
bool c46 = CompareDeltas(d45, d56, l45, l56);
|
|
|
|
AOCache2[leftMostIndex] = SmartBlur(a0, a1, a2, a3, a4, c02, c13, c24);
|
|
AOCache2[leftMostIndex + 1] = SmartBlur(a1, a2, a3, a4, a5, c13, c24, c35);
|
|
AOCache2[leftMostIndex + 2] = SmartBlur(a2, a3, a4, a5, a6, c24, c35, c46);
|
|
}
|
|
|
|
void BlurVertically(uint topMostIndex)
|
|
{
|
|
float a0 = AOCache2[topMostIndex];
|
|
float a1 = AOCache2[topMostIndex + 16];
|
|
float a2 = AOCache2[topMostIndex + 32];
|
|
float a3 = AOCache2[topMostIndex + 48];
|
|
float a4 = AOCache2[topMostIndex + 64];
|
|
float a5 = AOCache2[topMostIndex + 80];
|
|
|
|
float d0 = DepthCache[topMostIndex + 2];
|
|
float d1 = DepthCache[topMostIndex + 18];
|
|
float d2 = DepthCache[topMostIndex + 34];
|
|
float d3 = DepthCache[topMostIndex + 50];
|
|
float d4 = DepthCache[topMostIndex + 66];
|
|
float d5 = DepthCache[topMostIndex + 82];
|
|
|
|
float d01 = d1 - d0;
|
|
float d12 = d2 - d1;
|
|
float d23 = d3 - d2;
|
|
float d34 = d4 - d3;
|
|
float d45 = d5 - d4;
|
|
|
|
float l01 = d01 * d01 + _StepSize;
|
|
float l12 = d12 * d12 + _StepSize;
|
|
float l23 = d23 * d23 + _StepSize;
|
|
float l34 = d34 * d34 + _StepSize;
|
|
float l45 = d45 * d45 + _StepSize;
|
|
|
|
bool c02 = CompareDeltas(d01, d12, l01, l12);
|
|
bool c13 = CompareDeltas(d12, d23, l12, l23);
|
|
bool c24 = CompareDeltas(d23, d34, l23, l34);
|
|
bool c35 = CompareDeltas(d34, d45, l34, l45);
|
|
|
|
float aoResult1 = SmartBlur(a0, a1, a2, a3, a4, c02, c13, c24);
|
|
float aoResult2 = SmartBlur(a1, a2, a3, a4, a5, c13, c24, c35);
|
|
|
|
AOCache1[topMostIndex] = aoResult1;
|
|
AOCache1[topMostIndex + 16] = aoResult2;
|
|
}
|
|
|
|
// We essentially want 5 weights: 4 for each low-res pixel and 1 to blend in when none of the 4 really
|
|
// match. The filter strength is 1 / DeltaZTolerance. So a tolerance of 0.01 would yield a strength of 100.
|
|
// Note that a perfect match of low to high depths would yield a weight of 10^6, completely superceding any
|
|
// noise filtering. The noise filter is intended to soften the effects of shimmering when the high-res depth
|
|
// buffer has a lot of small holes in it causing the low-res depth buffer to inaccurately represent it.
|
|
float BilateralUpsample(float HiDepth, float4 LowDepths, float4 LowAO)
|
|
{
|
|
float4 weights = float4(9, 3, 1, 3) / (abs(HiDepth - LowDepths) + _UpsampleTolerance);
|
|
float TotalWeight = dot(weights, 1) + _NoiseFilterStrength;
|
|
float WeightedSum = dot(LowAO, weights) + _NoiseFilterStrength;
|
|
return WeightedSum / TotalWeight;
|
|
}
|
|
|
|
#if BLUR_AND_UPSAMPLE
|
|
|
|
[numthreads(8,8,1)]
|
|
void BlurUpsample(uint3 Gid : SV_GroupID, uint GI : SV_GroupIndex, uint3 GTid : SV_GroupThreadID, uint3 DTid : SV_DispatchThreadID)
|
|
{
|
|
UNITY_XR_ASSIGN_VIEW_INDEX(DTid.z);
|
|
|
|
float2 UV = ClampAndScaleUVForBilinear((int2(DTid.xy + GTid.xy - 2)) * _AOBufferSize.zw, _AOBufferSize.zw);
|
|
PrefetchData(GTid.x << 1 | GTid.y << 5, UV);
|
|
GroupMemoryBarrierWithGroupSync();
|
|
|
|
if (GI < 39)
|
|
BlurHorizontally((GI / 3) * 16 + (GI % 3) * 3);
|
|
GroupMemoryBarrierWithGroupSync();
|
|
|
|
if (GI < 45)
|
|
BlurVertically((GI / 9) * 32 + GI % 9);
|
|
|
|
GroupMemoryBarrierWithGroupSync();
|
|
|
|
uint Idx0 = GTid.x + GTid.y * 16;
|
|
float4 LoSSAOs = float4(AOCache1[Idx0 + 16], AOCache1[Idx0 + 17], AOCache1[Idx0 + 1], AOCache1[Idx0]);
|
|
|
|
float2 UV0 = ClampAndScaleUVForBilinear(DTid.xy * _AOBufferSize.zw, _AOBufferSize.zw);
|
|
float2 UV1 = ClampAndScaleUVForPoint(DTid.xy * 2 * _ScreenSize.zw);
|
|
UV1.y *= 2.0f / 3.0f;
|
|
|
|
float4 highDepth = GATHER_TEXTURE2D_X(_CameraDepthTexture, s_point_clamp_sampler, UV1);
|
|
float4 LowDepth = rcp(float4(DepthCache[Idx0 + 16], DepthCache[Idx0 + 17], DepthCache[Idx0 + 1], DepthCache[Idx0]));
|
|
|
|
int2 OutST = DTid.xy << 1;
|
|
|
|
_OcclusionTexture[COORD_TEXTURE2D_X(OutST + int2(0, -1))] = OutputFinalAO(BilateralUpsample(highDepth.z, LowDepth.zwxy, LoSSAOs.zwxy));
|
|
_OcclusionTexture[COORD_TEXTURE2D_X(OutST)] = OutputFinalAO(BilateralUpsample(highDepth.y, LowDepth.yzwx, LoSSAOs.yzwx));
|
|
_OcclusionTexture[COORD_TEXTURE2D_X(OutST + int2(-1, -1))] = OutputFinalAO(BilateralUpsample(highDepth.w, LowDepth.wxyz, LoSSAOs.wxyz));
|
|
_OcclusionTexture[COORD_TEXTURE2D_X(OutST + int2(-1, 0))] = OutputFinalAO(BilateralUpsample(highDepth.x, LowDepth.xyzw, LoSSAOs.xyzw));
|
|
}
|
|
|
|
#elif UPSAMPLE_ONLY
|
|
[numthreads(8, 8, 1)]
|
|
void UPSAMPLE_KERNEL(uint3 DTid : SV_DispatchThreadID)
|
|
{
|
|
UNITY_XR_ASSIGN_VIEW_INDEX(DTid.z);
|
|
|
|
float2 UV0 = ClampAndScaleUVForBilinear(DTid.xy * _AOBufferSize.zw, _AOBufferSize.zw);
|
|
float2 UV1 = ClampAndScaleUVForPoint(DTid.xy * 2 * _ScreenSize.zw);
|
|
UV1.y *= 2.0f / 3.0f;
|
|
|
|
float4 UnpackedAOs, UnpackedDepths;
|
|
GatherAOData(_AOPackedData, UV0, UnpackedAOs, UnpackedDepths);
|
|
|
|
int2 OutST = DTid.xy << 1;
|
|
|
|
#ifdef BOX
|
|
_OcclusionTexture[COORD_TEXTURE2D_X(OutST + int2(0, -1))] = OutputFinalAO(dot(0.25, UnpackedAOs));
|
|
_OcclusionTexture[COORD_TEXTURE2D_X(OutST)] = OutputFinalAO(dot(0.25, UnpackedAOs));
|
|
_OcclusionTexture[COORD_TEXTURE2D_X(OutST + int2(-1, -1))] = OutputFinalAO(dot(0.25, UnpackedAOs));
|
|
_OcclusionTexture[COORD_TEXTURE2D_X(OutST + int2(-1, 0))] = OutputFinalAO(dot(0.25, UnpackedAOs));
|
|
#else
|
|
float4 highDepth = GATHER_TEXTURE2D_X(_CameraDepthTexture, s_point_clamp_sampler, UV1);
|
|
_OcclusionTexture[COORD_TEXTURE2D_X(OutST + int2(0, -1))] = OutputFinalAO(BilateralUpsample(highDepth.z, UnpackedDepths.zwxy, UnpackedAOs.zwxy));
|
|
_OcclusionTexture[COORD_TEXTURE2D_X(OutST)] = OutputFinalAO(BilateralUpsample(highDepth.y, UnpackedDepths.yzwx, UnpackedAOs.yzwx));
|
|
_OcclusionTexture[COORD_TEXTURE2D_X(OutST + int2(-1, -1))] = OutputFinalAO(BilateralUpsample(highDepth.w, UnpackedDepths.wxyz, UnpackedAOs.wxyz));
|
|
_OcclusionTexture[COORD_TEXTURE2D_X(OutST + int2(-1, 0))] = OutputFinalAO(BilateralUpsample(highDepth.x, UnpackedDepths.xyzw, UnpackedAOs.xyzw));
|
|
#endif
|
|
}
|
|
|
|
#else // Just Blur case
|
|
|
|
[numthreads(8, 8, 1)]
|
|
void BLUR_KERNEL_NAME(uint3 Gid : SV_GroupID, uint GI : SV_GroupIndex, uint3 GTid : SV_GroupThreadID, uint3 dispatchID : SV_DispatchThreadID)
|
|
{
|
|
UNITY_XR_ASSIGN_VIEW_INDEX(dispatchID.z);
|
|
|
|
float2 UV = ClampAndScaleUVForBilinear(int2(dispatchID.xy + GTid.xy - 2) * _AOBufferSize.zw, _AOBufferSize.zw);
|
|
PrefetchData(GTid.x << 1 | GTid.y << 5, UV);
|
|
GroupMemoryBarrierWithGroupSync();
|
|
|
|
if (GI < 39)
|
|
BlurHorizontally((GI / 3) * 16 + (GI % 3) * 3);
|
|
GroupMemoryBarrierWithGroupSync();
|
|
|
|
if (GI < 45)
|
|
BlurVertically((GI / 9) * 32 + GI % 9);
|
|
|
|
GroupMemoryBarrierWithGroupSync();
|
|
|
|
uint Idx0 = GTid.x + GTid.y * 16;
|
|
float4 LoSSAOs = float4(AOCache1[Idx0 + 16], AOCache1[Idx0 + 17], AOCache1[Idx0 + 1], AOCache1[Idx0]);
|
|
float LoSSAO = dot(0.25, LoSSAOs);
|
|
|
|
#ifdef FULL_RES
|
|
_OcclusionTexture[COORD_TEXTURE2D_X(dispatchID.xy)] = OutputFinalAO(LoSSAO);
|
|
#else
|
|
_AOPackedBlurred[COORD_TEXTURE2D_X(dispatchID.xy)] = PackAOOutput(LoSSAO, 1.0f / DepthCache[Idx0 + 16]);
|
|
#endif
|
|
|
|
}
|
|
#endif
|