// Definitions
//--------------------------------------------------------------------------------------------------

// #pragma enable_d3d11_debug_symbols
#pragma only_renderers d3d11 playstation xboxone xboxseries vulkan metal switch

#pragma kernel ScreenSpaceReflectionsTracing        SSR_TRACE
#pragma kernel ScreenSpaceReflectionsReprojection   SSR_REPROJECT
#pragma kernel ScreenSpaceReflectionsAccumulate     SSR_ACCUMULATE

#pragma multi_compile _ DEPTH_SOURCE_NOT_FROM_MIP_CHAIN
#pragma multi_compile _ SSR_APPROX

// Tweak parameters.
// #define DEBUG
#define SSR_TRACE_BEHIND_OBJECTS
#define SSR_TRACE_TOWARDS_EYE
#ifndef SSR_APPROX
    #define SAMPLES_VNDF
#endif
#define SSR_TRACE_EPS               0.000488281f // 2^-11, should be good up to 4K
#define MIN_GGX_ROUGHNESS           0.00001f
#define MAX_GGX_ROUGHNESS           0.99999f

//--------------------------------------------------------------------------------------------------
// Included headers
//--------------------------------------------------------------------------------------------------

#include "Packages/com.unity.render-pipelines.core/ShaderLibrary/Common.hlsl"
#include "Packages/com.unity.render-pipelines.core/ShaderLibrary/Packing.hlsl"
#include "Packages/com.unity.render-pipelines.high-definition/Runtime/ShaderLibrary/ShaderVariables.hlsl"
#include "Packages/com.unity.render-pipelines.high-definition/Runtime/Lighting/ScreenSpaceLighting/ScreenSpaceLighting.hlsl"
#include "Packages/com.unity.render-pipelines.high-definition/Runtime/Lighting/ScreenSpaceLighting/ShaderVariablesScreenSpaceReflection.cs.hlsl"
#include "Packages/com.unity.render-pipelines.high-definition/Runtime/Material/Builtin/BuiltinData.hlsl"

#ifdef DEBUG_DISPLAY
    #include "Packages/com.unity.render-pipelines.core/ShaderLibrary/Debug.hlsl"
    #include "Packages/com.unity.render-pipelines.high-definition/Runtime/Debug/DebugDisplay.hlsl"
#endif

#include "Packages/com.unity.render-pipelines.high-definition/Runtime/Material/NormalBuffer.hlsl"

#include "Packages/com.unity.render-pipelines.core/ShaderLibrary/ImageBasedLighting.hlsl"
#include "Packages/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/Raytracing/Shaders/RaytracingSampling.hlsl"
#include "Packages/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/Raytracing/Shaders/RayTracingCommon.hlsl"
#include "Packages/com.unity.render-pipelines.high-definition/Runtime/Material/PreIntegratedFGD/PreIntegratedFGD.cs.hlsl"
#include "Packages/com.unity.render-pipelines.high-definition/Runtime/Material/PreIntegratedFGD/PreIntegratedFGD.hlsl"

#include "Packages/com.unity.render-pipelines.high-definition/Runtime/PostProcessing/Shaders/TemporalAntialiasing.hlsl"
#include "Packages/com.unity.render-pipelines.high-definition/Runtime/ShaderLibrary/ShaderVariablesGlobal.cs.hlsl"

//--------------------------------------------------------------------------------------------------
// Inputs & outputs
//--------------------------------------------------------------------------------------------------

#ifdef DEBUG
    RW_TEXTURE2D(float4, _SsrDebugTexture);
#endif

// For opaque we do the following operation:
// - Render opaque object in depth buffer
// - Generate depth pyramid from opaque depth buffer
// - Trigger ray from position recover from depth pyramid and raymarch with depth pyramid
// For transparent reflection we chose to not regenerate a depth pyramid to save performance. So we have
// - Generate depth pyramid from opaque depth buffer
// - Trigger ray from position recover from depth buffer (use depth pyramid) and raymarch with depth pyramid
// - Render transparent object with reflection in depth buffer in transparent prepass
// - Trigger ray from position recover from new depth buffer and raymarch with opaque depth pyramid
// So we need a seperate texture for the mip chain and for the depth source when doing the transprent reflection
#ifdef DEPTH_SOURCE_NOT_FROM_MIP_CHAIN
TEXTURE2D_X(_DepthTexture);
#endif

#ifdef SSR_TRACE
    TEXTURE2D_X_UINT2(         _StencilTexture);
    RW_TEXTURE2D_X(float2, _SsrHitPointTexture);
#elif defined(SSR_REPROJECT)
       TEXTURE2D_X(        _SsrHitPointTexture);
    RW_TEXTURE2D_X(float4, _SSRAccumTexture);
#else //if defined(SSR_ACCUMULATE)
       TEXTURE2D_X(        _SsrHitPointTexture);
    RW_TEXTURE2D_X(float4, _SsrAccumPrev);
    RW_TEXTURE2D_X(float4, _SsrLightingTextureRW);
    RW_TEXTURE2D_X(float4, _SSRAccumTexture);
#endif

    TEXTURE2D_X(            _SsrClearCoatMaskTexture);
    StructuredBuffer<int2>  _DepthPyramidMipLevelOffsets;
    StructuredBuffer<uint>  _CoarseStencilBuffer;

//--------------------------------------------------------------------------------------------------
// Helpers
//--------------------------------------------------------------------------------------------------

// Weight for SSR where Fresnel == 1 (returns value/pdf)
float GetSSRSampleWeight(float3 V, float3 L, float roughness)
{
    // Simplification:
    // value = D_GGX / (lambdaVPlusOne + lambdaL);
    // pdf = D_GGX / lambdaVPlusOne;

    const float lambdaVPlusOne = Lambda_GGX(roughness, V) + 1.0;
    const float lambdaL = Lambda_GGX(roughness, L);

    return lambdaVPlusOne / (lambdaVPlusOne + lambdaL);
}

// Specialization without Fresnel (see PathTracingBSDF.hlsl for the reference implementation)
bool SampleGGX_VNDF(float roughness_,
                    float3x3 localToWorld,
                    float3 V,
                    float2 inputSample,
                out float3 outgoingDir,
                out float weight)
{
    weight = 0.0f;

    float roughness = clamp(roughness_, MIN_GGX_ROUGHNESS, MAX_GGX_ROUGHNESS);

    float VdotH;
    float3 localV, localH;
    SampleGGXVisibleNormal(inputSample.xy, V, localToWorld, roughness, localV, localH, VdotH);

    // Compute the reflection direction
    float3 localL = 2.0 * VdotH * localH - localV;
    outgoingDir = mul(localL, localToWorld);

    if (localL.z < 0.001)
    {
        return false;
    }

    weight = GetSSRSampleWeight(localV, localL, roughness);

    if (weight < 0.001)
        return false;

    return true;
}

float PerceptualRoughnessFade(float perceptualRoughness, float fadeRcpLength, float fadeEndTimesRcpLength)
{
    float t = Remap10(perceptualRoughness, fadeRcpLength, fadeEndTimesRcpLength);
    return Smoothstep01(t);
}

void GetHitInfos(uint2 positionSS, out float2 hitPositionNDC, out float srcPerceptualRoughness, out float3 positionWS, out float weight, out float3 N, out float3 L, out float3 V, out float NdotL, out float NdotH, out float VdotH, out float NdotV)
{
    float2 uv = float2(positionSS) * _RTHandleScale.xy;

    hitPositionNDC = _SsrHitPointTexture[COORD_TEXTURE2D_X(positionSS)].xy;

    float2 Xi;
    Xi.x = GetBNDSequenceSample(positionSS, _FrameCount, 0);
    Xi.y = GetBNDSequenceSample(positionSS, _FrameCount, 1);

    NormalData normalData;
    DecodeFromNormalBuffer(positionSS, normalData);

    srcPerceptualRoughness = normalData.perceptualRoughness;

    float roughness = PerceptualRoughnessToRoughness(normalData.perceptualRoughness);
    float3x3 localToWorld = GetLocalFrame(normalData.normalWS);

    Xi.x = lerp(Xi.x, 0.0f, srcPerceptualRoughness * 0.7f); // 0.7 is an arbitrary bias to reduce noise for high roughness

#ifdef DEPTH_SOURCE_NOT_FROM_MIP_CHAIN
    float  deviceDepth = LOAD_TEXTURE2D_X(_DepthTexture, positionSS).r;
#else
    float  deviceDepth = LOAD_TEXTURE2D_X(_CameraDepthTexture, positionSS).r;
#endif

    float2 positionNDC = positionSS * _ScreenSize.zw + (0.5 * _ScreenSize.zw);
    positionWS = ComputeWorldSpacePosition(positionNDC, deviceDepth, UNITY_MATRIX_I_VP);
    V = GetWorldSpaceNormalizeViewDir(positionWS);

    N = normalData.normalWS;

#ifdef SAMPLES_VNDF
    float value;

    SampleGGX_VNDF(roughness,
        localToWorld,
        V,
        Xi,
        L,
        weight);

    NdotV = dot(normalData.normalWS, V);
    NdotL = dot(normalData.normalWS, L);
    float3 H = normalize(V + L);
    NdotH = dot(normalData.normalWS, H);
    VdotH = dot(V, H);
#else
    SampleGGXDir(Xi, V, localToWorld, roughness, L, NdotL, NdotH, VdotH);

    NdotV = dot(normalData.normalWS, V);
    float Vg = V_SmithJointGGX(NdotL, NdotV, roughness);

    weight = 4.0f * NdotL * VdotH * Vg / NdotH;
#endif
}

float2 GetHitNDC(float2 positionNDC)
{
    // TODO: it's important to account for occlusion/disocclusion to avoid artifacts in motion.
    // This would require keeping the depth buffer from the previous frame.
    float2 motionVectorNDC;
    DecodeMotionVector(SAMPLE_TEXTURE2D_X_LOD(_CameraMotionVectorsTexture, s_linear_clamp_sampler, min(positionNDC, 1.0f - 0.5f * _ScreenSize.zw) * _RTHandleScale.xy, 0), motionVectorNDC);
    float2 prevFrameNDC = positionNDC - motionVectorNDC;
    return prevFrameNDC;
}

float3 GetWorldSpacePosition(uint2 positionSS)
{
    float2 uv = float2(positionSS) * _RTHandleScale.xy;

#ifdef DEPTH_SOURCE_NOT_FROM_MIP_CHAIN
    float  deviceDepth = LOAD_TEXTURE2D_X(_DepthTexture, positionSS).r;
#else
    float  deviceDepth = LOAD_TEXTURE2D_X(_CameraDepthTexture, positionSS).r;
#endif

    float2 positionNDC = positionSS *_ScreenSize.zw + (0.5 * _ScreenSize.zw);

    return ComputeWorldSpacePosition(positionNDC, deviceDepth, UNITY_MATRIX_I_VP);
}

float2 GetWorldSpacePoint(uint2 positionSS, out float3 positionSrcWS, out float3 positionDstWS)
{
    positionSrcWS = GetWorldSpacePosition(positionSS);

    float2 hitData = _SsrHitPointTexture[COORD_TEXTURE2D_X(positionSS)].xy;
    uint2 positionDstSS = (hitData.xy - (0.5 * _ScreenSize.zw)) / _ScreenSize.zw;

    positionDstWS = GetWorldSpacePosition(positionDstSS);

    return hitData.xy;
}

float3 GetHitColor(float2 hitPositionNDC, float perceptualRoughness, out float opacity, int mipLevel = 0)
{
    float2 prevFrameNDC = GetHitNDC(hitPositionNDC);
    float2 prevFrameUV = prevFrameNDC * _ColorPyramidUvScaleAndLimitPrevFrame.xy;

    float tmpCoef = PerceptualRoughnessFade(perceptualRoughness, _SsrRoughnessFadeRcpLength, _SsrRoughnessFadeEndTimesRcpLength);
    opacity = EdgeOfScreenFade(prevFrameNDC, _SsrEdgeFadeRcpLength) * tmpCoef;
    return SAMPLE_TEXTURE2D_X_LOD(_ColorPyramidTexture, s_trilinear_clamp_sampler, prevFrameUV, mipLevel).rgb;
}

float2 GetSampleInfo(uint2 positionSS, out float3 color, out float weight, out float opacity)
{
    float3 positionSrcWS;
    float3 positionDstWS;
    float2 hitData = GetWorldSpacePoint(positionSS, positionSrcWS, positionDstWS);

    float3 V = GetWorldSpaceNormalizeViewDir(positionSrcWS);
    float3 L = normalize(positionDstWS - positionSrcWS);
    float3 H = normalize(V + L);

    NormalData normalData;
    DecodeFromNormalBuffer(positionSS, normalData);

    float roughness = PerceptualRoughnessToRoughness(normalData.perceptualRoughness);

    roughness = clamp(roughness, MIN_GGX_ROUGHNESS, MAX_GGX_ROUGHNESS);

    weight = GetSSRSampleWeight(V, L, roughness);

    color = GetHitColor(hitData.xy, normalData.perceptualRoughness, opacity, 0);

    return hitData;
}

void GetNormalAndPerceptualRoughness(uint2 positionSS, out float3 normalWS, out float perceptualRoughness)
{
    // Load normal and perceptualRoughness.
    NormalData normalData;
    DecodeFromNormalBuffer(positionSS, normalData);
    normalWS = normalData.normalWS;
    float4 packedCoatMask = _SsrClearCoatMaskTexture[COORD_TEXTURE2D_X(positionSS)];
    perceptualRoughness = HasClearCoatMask(packedCoatMask) ? CLEAR_COAT_PERCEPTUAL_ROUGHNESS : normalData.perceptualRoughness;
}

void WriteDebugInfo(uint2 positionSS, float4 value)
{
#ifdef DEBUG
    _SsrDebugTexture[positionSS] = value;
#endif
}

//--------------------------------------------------------------------------------------------------
// Implementation
//--------------------------------------------------------------------------------------------------

#define USE_COARSE_STENCIL 0
#ifdef SSR_TRACE

[numthreads(8, 8, 1)]
void ScreenSpaceReflectionsTracing(uint3 groupId          : SV_GroupID,
                                   uint3 dispatchThreadId : SV_DispatchThreadID)
{
    UNITY_XR_ASSIGN_VIEW_INDEX(dispatchThreadId.z);
    uint2 positionSS = dispatchThreadId.xy;

    bool doesntReceiveSSR = false;

    // NOTE: Currently we profiled that generating the HTile for SSR and using it is not worth it the optimization.
    // However if the generated HTile will be used for something else but SSR, this should be re-enabled and in C#.
#if USE_COARSE_STENCIL
    // Check HTile first. Note htileAddress should be already in scalar before WaveReadLaneFirst, but forcing it to be sure.
    // TODO: Verify the need of WaveReadLaneFirst
    uint htileAddress = WaveReadLaneFirst(Get1DAddressFromPixelCoord(groupId.xy, _CoarseStencilBufferSize.xy, groupId.z));
    uint htileValue   = _CoarseStencilBuffer[htileAddress];

    doesntReceiveSSR = (htileValue & _SsrStencilBit) == 0;
    if (doesntReceiveSSR)
    {
        WriteDebugInfo(positionSS, -1);
        return;
    }
#endif

    uint stencilValue = GetStencilValue(LOAD_TEXTURE2D_X(_StencilTexture, dispatchThreadId.xy));
    doesntReceiveSSR = (stencilValue & _SsrStencilBit) == 0;
    if (doesntReceiveSSR)
    {
        WriteDebugInfo(positionSS, -1);
        return;
    }

    NormalData normalData;
    DecodeFromNormalBuffer(positionSS, normalData);

#ifdef DEPTH_SOURCE_NOT_FROM_MIP_CHAIN
    float  deviceDepth = LOAD_TEXTURE2D_X(_DepthTexture, positionSS).r;
#else
    float  deviceDepth = LOAD_TEXTURE2D_X(_CameraDepthTexture, positionSS).r;
#endif

#ifdef SSR_APPROX
    float2 positionNDC = positionSS * _ScreenSize.zw + (0.5 * _ScreenSize.zw); // Should we precompute the half-texel bias? We seem to use it a lot.
    float3 positionWS = ComputeWorldSpacePosition(positionNDC, deviceDepth, UNITY_MATRIX_I_VP); // Jittered
    float3 V = GetWorldSpaceNormalizeViewDir(positionWS);

    float3 N;
    float perceptualRoughness;
    GetNormalAndPerceptualRoughness(positionSS, N, perceptualRoughness);

    float3 R = reflect(-V, N);
#else
    float weight;
    float NdotL, NdotH, VdotH, NdotV;
    float3 R, V, N;
    float3 positionWS;
    float2 hitPositionNDC;
    float perceptualRoughness;
    GetHitInfos(positionSS, hitPositionNDC, perceptualRoughness, positionWS, weight, N, R, V, NdotL, NdotH, VdotH, NdotV);

    if (NdotL < 0.001f || weight < 0.001f)
    {
        WriteDebugInfo(positionSS, -1);
        return;
    }
#endif

    float3 camPosWS = GetCurrentViewPosition();

    // Apply normal bias with the magnitude dependent on the distance from the camera.
    // Unfortunately, we only have access to the shading normal, which is less than ideal...
    positionWS = camPosWS + (positionWS - camPosWS) * (1 - 0.001 * rcp(max(dot(N, V), FLT_EPS)));
    deviceDepth = ComputeNormalizedDeviceCoordinatesWithZ(positionWS, UNITY_MATRIX_VP).z;
    bool killRay = deviceDepth == UNITY_RAW_FAR_CLIP_VALUE;

    // Ref. #1: Michal Drobot - Quadtree Displacement Mapping with Height Blending.
    // Ref. #2: Yasin Uludag  - Hi-Z Screen-Space Cone-Traced Reflections.
    // Ref. #3: Jean-Philippe Grenier - Notes On Screen Space HIZ Tracing.
    // Warning: virtually all of the code below assumes reverse Z.

    // We start tracing from the center of the current pixel, and do so up to the far plane.
    float3 rayOrigin = float3(positionSS + 0.5, deviceDepth);

    float3 reflPosWS  = positionWS + R;
    float3 reflPosNDC = ComputeNormalizedDeviceCoordinatesWithZ(reflPosWS, UNITY_MATRIX_VP); // Jittered
    float3 reflPosSS  = float3(reflPosNDC.xy * _ScreenSize.xy, reflPosNDC.z);
    float3 rayDir     = reflPosSS - rayOrigin;
    float3 rcpRayDir  = rcp(rayDir);
    int2   rayStep    = int2(rcpRayDir.x >= 0 ? 1 : 0,
                             rcpRayDir.y >= 0 ? 1 : 0);
    float3 raySign  = float3(rcpRayDir.x >= 0 ? 1 : -1,
                             rcpRayDir.y >= 0 ? 1 : -1,
                             rcpRayDir.z >= 0 ? 1 : -1);
    bool   rayTowardsEye  =  rcpRayDir.z >= 0;

    // Note that we don't need to store or read the perceptualRoughness value
    // if we mark stencil during the G-Buffer pass with pixels which should receive SSR,
    // and sample the color pyramid during the lighting pass.
    killRay = killRay || (reflPosSS.z <= 0);
    killRay = killRay || (dot(N, V) <= 0);
    killRay = killRay || (perceptualRoughness > _SsrRoughnessFadeEnd);
#ifndef SSR_TRACE_TOWARDS_EYE
    killRay = killRay || rayTowardsEye;
#endif

    if (killRay)
    {
        WriteDebugInfo(positionSS, -1);
        return;
    }

    // Extend and clip the end point to the frustum.
    float tMax;
    {
        // Shrink the frustum by half a texel for efficiency reasons.
        const float halfTexel = 0.5;

        float3 bounds;
        bounds.x = (rcpRayDir.x >= 0) ? _ScreenSize.x - halfTexel : halfTexel;
        bounds.y = (rcpRayDir.y >= 0) ? _ScreenSize.y - halfTexel : halfTexel;
        // If we do not want to intersect the skybox, it is more efficient to not trace too far.
        float maxDepth = (_SsrReflectsSky != 0) ? -0.00000024 : 0.00000024; // 2^-22
        bounds.z = (rcpRayDir.z >= 0) ? 1 : maxDepth;

        float3 dist = bounds * rcpRayDir - (rayOrigin * rcpRayDir);
        tMax = Min3(dist.x, dist.y, dist.z);
    }

    // Clamp the MIP level to give the compiler more information to optimize.
    const int maxMipLevel = min(_SsrDepthPyramidMaxMip, 14);

    // Start ray marching from the next texel to avoid self-intersections.
    float t;
    {
        // 'rayOrigin' is the exact texel center.
        float2 dist = abs(0.5 * rcpRayDir.xy);
        t = min(dist.x, dist.y);
    }

    float3 rayPos;

    int  mipLevel  = 0;
    int  iterCount = 0;
    bool hit       = false;
    bool miss      = false;
    bool belowMip0 = false; // This value is set prior to entering the cell

    while (!(hit || miss) && (t <= tMax) && (iterCount < _SsrIterLimit))
    {
        rayPos = rayOrigin + t * rayDir;

        // Ray position often ends up on the edge. To determine (and look up) the right cell,
        // we need to bias the position by a small epsilon in the direction of the ray.
        float2 sgnEdgeDist = round(rayPos.xy) - rayPos.xy;
        float2 satEdgeDist = clamp(raySign.xy * sgnEdgeDist + SSR_TRACE_EPS, 0, SSR_TRACE_EPS);
        rayPos.xy += raySign.xy * satEdgeDist;

        int2 mipCoord  = (int2)rayPos.xy >> mipLevel;
        int2 mipOffset = _DepthPyramidMipLevelOffsets[mipLevel];
        // Bounds define 4 faces of a cube:
        // 2 walls in front of the ray, and a floor and a base below it.
        float4 bounds;

        bounds.xy = (mipCoord + rayStep) << mipLevel;
        bounds.z  = LOAD_TEXTURE2D_X(_CameraDepthTexture, mipOffset + mipCoord).r;

        // We define the depth of the base as the depth value as:
        // b = DeviceDepth((1 + thickness) * LinearDepth(d))
        // b = ((f - n) * d + n * (1 - (1 + thickness))) / ((f - n) * (1 + thickness))
        // b = ((f - n) * d - n * thickness) / ((f - n) * (1 + thickness))
        // b = d / (1 + thickness) - n / (f - n) * (thickness / (1 + thickness))
        // b = d * k_s + k_b
        bounds.w = bounds.z * _SsrThicknessScale + _SsrThicknessBias;

        float4 dist      = bounds * rcpRayDir.xyzz - (rayOrigin.xyzz * rcpRayDir.xyzz);
        float  distWall  = min(dist.x, dist.y);
        float  distFloor = dist.z;
        float  distBase  = dist.w;

        // Note: 'rayPos' given by 't' can correspond to one of several depth values:
        // - above or exactly on the floor
        // - inside the floor (between the floor and the base)
        // - below the base
    #if 0
        bool belowFloor  = (raySign.z * (t - distFloor)) <  0;
        bool aboveBase   = (raySign.z * (t - distBase )) >= 0;
    #else
        bool belowFloor  = rayPos.z  < bounds.z;
        bool aboveBase   = rayPos.z >= bounds.w;
    #endif
        bool insideFloor = belowFloor && aboveBase;
        bool hitFloor    = (t <= distFloor) && (distFloor <= distWall);

        // Game rules:
        // * if the closest intersection is with the wall of the cell, switch to the coarser MIP, and advance the ray.
        // * if the closest intersection is with the heightmap below,  switch to the finer   MIP, and advance the ray.
        // * if the closest intersection is with the heightmap above,  switch to the finer   MIP, and do NOT advance the ray.
        // Victory conditions:
        // * See below. Do NOT reorder the statements!

    #ifdef SSR_TRACE_BEHIND_OBJECTS
        miss      = belowMip0 && insideFloor;
    #else
        miss      = belowMip0;
    #endif
        hit       = (mipLevel == 0) && (hitFloor || insideFloor);
        belowMip0 = (mipLevel == 0) && belowFloor;

        // 'distFloor' can be smaller than the current distance 't'.
        // We can also safely ignore 'distBase'.
        // If we hit the floor, it's always safe to jump there.
        // If we are at (mipLevel != 0) and we are below the floor, we should not move.
        t = hitFloor ? distFloor : (((mipLevel != 0) && belowFloor) ? t : distWall);
        rayPos.z = bounds.z; // Retain the depth of the potential intersection

        // Warning: both rays towards the eye, and tracing behind objects has linear
        // rather than logarithmic complexity! This is due to the fact that we only store
        // the maximum value of depth, and not the min-max.
        mipLevel += (hitFloor || belowFloor || rayTowardsEye) ? -1 : 1;
        mipLevel  = clamp(mipLevel, 0, maxMipLevel);

        // mipLevel = 0;

        iterCount++;
    }

    // Treat intersections with the sky as misses.
    miss = miss || ((_SsrReflectsSky == 0) && (rayPos.z == 0));
    hit  = hit && !miss;

    if (hit)
    {
        // Note that we are using 'rayPos' from the penultimate iteration, rather than
        // recompute it using the last value of 't', which would result in an overshoot.
        // It also needs to be precisely at the center of the pixel to avoid artifacts.
        float2 hitPositionNDC = floor(rayPos.xy) * _ScreenSize.zw + (0.5 * _ScreenSize.zw); // Should we precompute the half-texel bias? We seem to use it a lot.
        _SsrHitPointTexture[COORD_TEXTURE2D_X(positionSS)] = hitPositionNDC.xy;
    }

    // If we do not hit anything, 'rayPos.xy' provides an indication where we stopped the search.
    WriteDebugInfo(positionSS, float4(rayPos.xy, iterCount, hit ? 1 : 0));
}

#elif defined(SSR_REPROJECT)

[numthreads(8, 8, 1)]
void ScreenSpaceReflectionsReprojection(uint3 dispatchThreadId : SV_DispatchThreadID)
{
    UNITY_XR_ASSIGN_VIEW_INDEX(dispatchThreadId.z);

    const uint2 positionSS0 = dispatchThreadId.xy;

    float3 N;
    float perceptualRoughness;
    GetNormalAndPerceptualRoughness(positionSS0, N, perceptualRoughness);

    // Compute the actual roughness
    float roughness = PerceptualRoughnessToRoughness(perceptualRoughness);
    roughness = clamp(roughness, MIN_GGX_ROUGHNESS, MAX_GGX_ROUGHNESS);

    float2 hitPositionNDC = LOAD_TEXTURE2D_X(_SsrHitPointTexture, positionSS0).xy;

    if (max(hitPositionNDC.x, hitPositionNDC.y) == 0)
    {
        // Miss.
        return;
    }

#ifdef DEPTH_SOURCE_NOT_FROM_MIP_CHAIN
    float  depthOrigin = LOAD_TEXTURE2D_X(_DepthTexture, positionSS0.xy).r;
#else
    float  depthOrigin = LOAD_TEXTURE2D_X(_CameraDepthTexture, positionSS0.xy).r;
#endif

    PositionInputs posInputOrigin = GetPositionInput(positionSS0.xy, _ScreenSize.zw, depthOrigin, UNITY_MATRIX_I_VP, UNITY_MATRIX_V, uint2(8, 8));
    float3 originWS = posInputOrigin.positionWS + _WorldSpaceCameraPos;

    // TODO: this texture is sparse (mostly black). Can we avoid reading every texel? How about using Hi-S?
    float2 motionVectorNDC;
    DecodeMotionVector(SAMPLE_TEXTURE2D_X_LOD(_CameraMotionVectorsTexture, s_linear_clamp_sampler, min(hitPositionNDC, 1.0f - 0.5f * _ScreenSize.zw) * _RTHandleScale.xy, 0), motionVectorNDC);
    float2 prevFrameNDC = hitPositionNDC - motionVectorNDC;
    float2 prevFrameUV = prevFrameNDC * _ColorPyramidUvScaleAndLimitPrevFrame.xy;

    // TODO: optimize with max().
    if ((prevFrameUV.x < 0) || (prevFrameUV.x > _ColorPyramidUvScaleAndLimitPrevFrame.z) ||
        (prevFrameUV.y < 0) || (prevFrameUV.y > _ColorPyramidUvScaleAndLimitPrevFrame.w))
    {
        // Off-Screen.
        return;
    }
    float  opacity  = EdgeOfScreenFade(prevFrameNDC, _SsrEdgeFadeRcpLength)
                    * PerceptualRoughnessFade(perceptualRoughness, _SsrRoughnessFadeRcpLength, _SsrRoughnessFadeEndTimesRcpLength);

#ifdef SSR_APPROX
    // TODO: filtering is quite awful. Needs to be non-Gaussian, bilateral and anisotropic.
    float  mipLevel = lerp(0, _SsrColorPyramidMaxMip, perceptualRoughness);

    // Note that the color pyramid uses it's own viewport scale, since it lives on the camera.
    float3 color    = SAMPLE_TEXTURE2D_X_LOD(_ColorPyramidTexture, s_trilinear_clamp_sampler, prevFrameUV, mipLevel).rgb;

    // Disable SSR for negative, infinite and NaN history values.
    uint3 intCol   = asuint(color);
    bool  isPosFin = Max3(intCol.r, intCol.g, intCol.b) < 0x7F800000;

    color   = isPosFin ? color   : 0;
    opacity = isPosFin ? opacity : 0;

    _SSRAccumTexture[COORD_TEXTURE2D_X(positionSS0)] = float4(color, 1.0f) * opacity;
#else
    float3 color = 0.0f;

    float4 accum = _SSRAccumTexture[COORD_TEXTURE2D_X(positionSS0)];

#define BLOCK_SAMPLE_RADIUS 1
    int samplesCount = 0;
    float4 outputs = 0.0f;
    float wAll = 0.0f;
    for (int y = -BLOCK_SAMPLE_RADIUS; y <= BLOCK_SAMPLE_RADIUS; ++y)
    {
        for (int x = -BLOCK_SAMPLE_RADIUS; x <= BLOCK_SAMPLE_RADIUS; ++x)
        {
            if (abs(x) == abs(y) && abs(x) == 1)
                continue;

            uint2 positionSS = uint2(int2(positionSS0) + int2(x, y));

            float3 color;
            float opacity;
            float weight;
            float2 hitData = GetSampleInfo(positionSS, color, weight, opacity);
            if (max(hitData.x, hitData.y) != 0.0f && opacity > 0.0f)
            {
                //// Note that the color pyramid uses it's own viewport scale, since it lives on the camera.
                // Disable SSR for negative, infinite and NaN history values.
                uint3 intCol   = asuint(color);
                bool  isPosFin = Max3(intCol.r, intCol.g, intCol.b) < 0x7F800000;

                float2 prevFrameUV = hitData * _ColorPyramidUvScaleAndLimitPrevFrame.xy;

                color   = isPosFin ? color : 0;

                outputs += weight * float4(color, 1.0f);
                wAll += weight;
            }
        }
    }
#undef BLOCK_SAMPLE_RADIUS

    if (wAll > 0.0f)
    {
        uint3 intCol = asuint(outputs.rgb);
        bool  isPosFin = Max3(intCol.r, intCol.g, intCol.b) < 0x7F800000;

        outputs.rgb = isPosFin ? outputs.rgb : 0;
        opacity     = isPosFin ? opacity : 0;
        wAll = isPosFin ? wAll : 0;

        _SSRAccumTexture[COORD_TEXTURE2D_X(positionSS0)] = opacity * outputs / wAll;
    }
#endif
}

#elif defined(SSR_ACCUMULATE)

[numthreads(8, 8, 1)]
void ScreenSpaceReflectionsAccumulate(uint3 dispatchThreadId : SV_DispatchThreadID)
{
    UNITY_XR_ASSIGN_VIEW_INDEX(dispatchThreadId.z);
    uint2 positionSS = dispatchThreadId.xy;

    float3 N;
    float perceptualRoughness0;
    GetNormalAndPerceptualRoughness(positionSS, N, perceptualRoughness0);

    // Compute the actual roughness
    float roughness = PerceptualRoughnessToRoughness(perceptualRoughness0);
    roughness = clamp(roughness, MIN_GGX_ROUGHNESS, MAX_GGX_ROUGHNESS);

    float4 data0 = _SSRAccumTexture[COORD_TEXTURE2D_X(int2(positionSS))];

    float2 hitPositionNDC = LOAD_TEXTURE2D_X(_SsrHitPointTexture, positionSS).xy;

    // Approximate the footprint based on the hit normal
    float2 hitSS = (hitPositionNDC.xy - (0.5 * _ColorPyramidUvScaleAndLimitPrevFrame.zw)) / _ColorPyramidUvScaleAndLimitPrevFrame.zw;

    NormalData hitNormalData;
    DecodeFromNormalBuffer(hitSS, hitNormalData);
    float3 hitN = hitNormalData.normalWS;

    float2 prevHistoryScale = _RTHandleScaleHistory.zw / _RTHandleScaleHistory.xy;

    float4 original = _SSRAccumTexture[COORD_TEXTURE2D_X(positionSS)];
    float4 previous = _SsrAccumPrev[COORD_TEXTURE2D_X(positionSS * prevHistoryScale + 0.5f / prevHistoryScale)];

    float2 motionVectorNDC;
    DecodeMotionVector(SAMPLE_TEXTURE2D_X_LOD(_CameraMotionVectorsTexture, s_linear_clamp_sampler, min(hitPositionNDC.xy, 1.0f - 0.5f * _ScreenSize.zw) * _RTHandleScale.xy, 0), motionVectorNDC);
    float speedDst = length(motionVectorNDC);

    float2 motionVectorCenterNDC;
    float2 positionNDC = positionSS * _ScreenSize.zw + (0.5 * _ScreenSize.zw);
    DecodeMotionVector(SAMPLE_TEXTURE2D_X_LOD(_CameraMotionVectorsTexture, s_linear_clamp_sampler, min(positionNDC, 1.0f - 0.5f * _ScreenSize.zw) * _RTHandleScale.xy, 0), motionVectorCenterNDC);
    float speedSrc = length(motionVectorCenterNDC);
    float speed = saturate((speedDst + speedSrc) * 128.0f); // 128 is arbitrary

    float coefExpAvg = lerp(_SsrAccumulationAmount, 1.0f, speed);

    float4 result = lerp(previous, original, coefExpAvg);

    uint3 intCol = asuint(result.rgb);
    bool  isPosFin = Max3(intCol.r, intCol.g, intCol.b) < 0x7F800000;

    result.rgb = isPosFin ? result.rgb : 0;
    result.w = isPosFin ? result.w : 0;

    _SsrLightingTextureRW[COORD_TEXTURE2D_X(positionSS)] = result;
    _SSRAccumTexture[COORD_TEXTURE2D_X(positionSS)] = result;
}

#endif

#undef MIN_GGX_ROUGHNESS
#undef MAX_GGX_ROUGHNESS