Arcade-Maniac/Source/Library/PackageCache/com.unity.render-pipelines.high-definition@11.0.0/Runtime/Lighting/ScreenSpaceLighting/ScreenSpaceGlobalIllumination.compute

// HDRP generic includes
#include "Packages/com.unity.render-pipelines.core/ShaderLibrary/Common.hlsl"
#include "Packages/com.unity.render-pipelines.core/ShaderLibrary/Color.hlsl"
#include "Packages/com.unity.render-pipelines.high-definition/Runtime/ShaderLibrary/ShaderVariables.hlsl"
#include "Packages/com.unity.render-pipelines.high-definition/Runtime/Material/Material.hlsl"
#include "Packages/com.unity.render-pipelines.high-definition/Runtime/Material/NormalBuffer.hlsl"

// Raytracing includes (should probably be in generic files)
#include "Packages/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/Raytracing/Shaders/ShaderVariablesRaytracing.hlsl"
#include "Packages/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/Raytracing/Shaders/RaytracingSampling.hlsl"
#include "Packages/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/Raytracing/Shaders/RayTracingCommon.hlsl"

// #pragma enable_d3d11_debug_symbols

#pragma kernel TraceGlobalIllumination          TRACE_GLOBAL_ILLUMINATION=TraceGlobalIllumination GI_TRACE
#pragma kernel TraceGlobalIlluminationHalf      TRACE_GLOBAL_ILLUMINATION=TraceGlobalIlluminationHalf GI_TRACE HALF_RES
#pragma kernel ReprojectGlobalIllumination      REPROJECT_GLOBAL_ILLUMINATION=ReprojectGlobalIllumination GI_REPROJECT
#pragma kernel ReprojectGlobalIlluminationHalf  REPROJECT_GLOBAL_ILLUMINATION=ReprojectGlobalIlluminationHalf GI_REPROJECT HALF_RES
#pragma kernel ConvertYCoCgToRGB                CONVERT_YCOCG_TO_RGB=ConvertYCoCgToRGB
#pragma kernel ConvertYCoCgToRGBHalf            CONVERT_YCOCG_TO_RGB=ConvertYCoCgToRGBHalf HALF_RES

// The dispatch tile resolution
#define INDIRECT_DIFFUSE_TILE_SIZE 8

// Epslon value used for the computation
#define GI_TRACE_EPS 0.00024414

#define PERCEPTUAL_SPACE

// Input depth pyramid texture
TEXTURE2D_X(_DepthTexture);
// Input texture that holds the offset for every level of the depth pyramid
StructuredBuffer<int2>  _DepthPyramidMipLevelOffsets;

// Flag value that defines if a given pixel recieves reflections or not
TEXTURE2D_X_UINT2(_StencilTexture);

// Constant buffer that holds all scalar that we need
CBUFFER_START(UnityScreenSpaceGlobalIllumination)
    int _IndirectDiffuseSteps;
    float _IndirectDiffuseThicknessScale;
    float _IndirectDiffuseThicknessBias;
    int _IndirectDiffuseProbeFallbackFlag;
    int _IndirectDiffuseProbeFallbackBias;
    float4 _ColorPyramidUvScaleAndLimitPrevFrame;
    int _SsrStencilBit;
    int _IndirectDiffuseFrameIndex;
CBUFFER_END

// Output texture that holds the hit point NDC coordinates
RW_TEXTURE2D_X(float2, _IndirectDiffuseHitPointTextureRW);

// TODO: It is not really possible to share all of this with SSR for couple reason, but it probably possible share a part of it
bool RayMarch(float3 positionWS, float3 sampleDir, float3 normalWS, float2 positionSS, float deviceDepth, bool killRay, out float3 rayPos)
{
    // Initialize ray pos to invalid value
    rayPos = float3(-1.0, -1.0, -1.0);

    // Due to a warning on Vulkan and Metal if returning early, this is the only way we found to avoid it.
    bool status = false;

    // If the point is behind the camera or the ray is invalid, this ray should not be cast
    if (!killRay)
    {
        // We start tracing from the center of the current pixel, and do so up to the far plane.
        float3 rayOrigin = float3(positionSS + 0.5, deviceDepth);

        float3 sampledPosWS  = positionWS + sampleDir * 0.001;
        float3 sampledPosNDC = ComputeNormalizedDeviceCoordinatesWithZ(sampledPosWS, UNITY_MATRIX_VP); // Jittered
        float3 sampledPosSS  = float3(sampledPosNDC.xy * _ScreenSize.xy, sampledPosNDC.z);

        float3 rayDir = (sampledPosSS - rayOrigin);

        float2 rayDirSS = normalize(rayDir.xy);

        float tx = (rayDirSS.x > 0 ? (_ScreenSize.x - rayOrigin.x) : rayOrigin.x) / rayDirSS.x;
        float ty = (rayDirSS.y > 0 ? (_ScreenSize.y - rayOrigin.y) : rayOrigin.y) / rayDirSS.y;

        // Compàute the actual ray direction
        rayDir = min(abs(tx), abs(ty)) * normalize(rayDir);

        // Compute the reciprocal of the direction (not sure why tho ftm)
        float3 rcpRayDir  = rcp(rayDir);

        // Compute a ray step (added an abs here looks better, maybe its wrong need to check mmore)
        int2   rayStep    = int2((rcpRayDir.x) >= 0 ? 1 : 0,
                                 (rcpRayDir.y) >= 0 ? 1 : 0);

        float3 raySign  = float3(rcpRayDir.x >= 0 ? 1 : -1,
                                 rcpRayDir.y >= 0 ? 1 : -1,
                                 rcpRayDir.z >= 0 ? 1 : -1);
        bool   rayTowardsEye  =  rcpRayDir.z >= 0;

        // Build the bounds that start at the center of the pixel and travel to the edge of the screen
        float tMax;
        {
            // Shrink the frustum by half a texel for efficiency reasons.
            const float halfTexel = 0.5;

            float3 bounds;
            bounds.x = clamp(rayOrigin.x + rayDir.x, halfTexel, _ScreenSize.x - halfTexel);
            bounds.y = clamp(rayOrigin.y + rayDir.y, halfTexel, _ScreenSize.y - halfTexel);
            // If we do not want to intersect the skybox, it is more efficient to not trace too far.
            float maxDepth = -0.00000024; // 2^-22
            bounds.z = (rcpRayDir.z >= 0) ? 1 : maxDepth;

            float3 dist = bounds * rcpRayDir - (rayOrigin * rcpRayDir);
            tMax = Min3(dist.x, dist.y, dist.z);
        }

        // Start ray marching from the next texel to avoid self-intersections.
        float t;
        {
            // 'rayOrigin' is the exact texel center.
            float2 dist = abs(0.5 * rcpRayDir.xy);
            t = min(dist.x, dist.y);
        }

        int  mipLevel  = 0;
        int2 mipOffset = _DepthPyramidMipLevelOffsets[mipLevel];
        int  iterCount = 0;
        bool hit       = false;
        bool miss      = false;
        bool belowMip0 = false; // This value is set prior to entering the cell

        while (!(hit || miss) && (t <= tMax) && (iterCount < _IndirectDiffuseSteps))
        {
            rayPos = rayOrigin + t * rayDir;

            // Ray position often ends up on the edge. To determine (and look up) the right cell,
            // we need to bias the position by a small epsilon in the direction of the ray.
            float2 sgnEdgeDist = round(rayPos.xy) - rayPos.xy;
            float2 satEdgeDist = clamp(raySign.xy * sgnEdgeDist + GI_TRACE_EPS, 0, GI_TRACE_EPS);
            rayPos.xy += raySign.xy * satEdgeDist;

            int2 mipCoord  = (int2)rayPos.xy >> mipLevel;
            // Bounds define 4 faces of a cube:
            // 2 walls in front of the ray, and a floor and a base below it.
            float4 bounds;

            bounds.z  = LOAD_TEXTURE2D_X(_CameraDepthTexture, mipOffset + mipCoord).r;
            bounds.xy = (mipCoord + rayStep) << mipLevel;

            // We define the depth of the base as the depth value as:
            // b = DeviceDepth((1 + thickness) * LinearDepth(d))
            // b = ((f - n) * d + n * (1 - (1 + thickness))) / ((f - n) * (1 + thickness))
            // b = ((f - n) * d - n * thickness) / ((f - n) * (1 + thickness))
            // b = d / (1 + thickness) - n / (f - n) * (thickness / (1 + thickness))
            // b = d * k_s + k_b
            bounds.w = bounds.z * _IndirectDiffuseThicknessScale + _IndirectDiffuseThicknessBias;

            float4 dist      = bounds * rcpRayDir.xyzz - (rayOrigin.xyzz * rcpRayDir.xyzz);
            float  distWall  = min(dist.x, dist.y);
            float  distFloor = dist.z;
            float  distBase  = dist.w;

            // Note: 'rayPos' given by 't' can correspond to one of several depth values:
            // - above or exactly on the floor
            // - inside the floor (between the floor and the base)
            // - below the base
            bool belowFloor  = rayPos.z  < bounds.z;
            bool aboveBase   = rayPos.z >= bounds.w;

            bool insideFloor = belowFloor && aboveBase;
            bool hitFloor    = (t <= distFloor) && (distFloor <= distWall);

            // Game rules:
            // * if the closest intersection is with the wall of the cell, switch to the coarser MIP, and advance the ray.
            // * if the closest intersection is with the heightmap below,  switch to the finer   MIP, and advance the ray.
            // * if the closest intersection is with the heightmap above,  switch to the finer   MIP, and do NOT advance the ray.
            // Victory conditions:
            // * See below. Do NOT reorder the statements!

            miss      = belowMip0 && insideFloor;
            hit       = (mipLevel == 0) && (hitFloor || insideFloor);
            belowMip0 = (mipLevel == 0) && belowFloor;

            // 'distFloor' can be smaller than the current distance 't'.
            // We can also safely ignore 'distBase'.
            // If we hit the floor, it's always safe to jump there.
            // If we are at (mipLevel != 0) and we are below the floor, we should not move.
            t = hitFloor ? distFloor : (((mipLevel != 0) && belowFloor) ? t : distWall);
            rayPos.z = bounds.z; // Retain the depth of the potential intersection

            // Warning: both rays towards the eye, and tracing behind objects has linear
            // rather than logarithmic complexity! This is due to the fact that we only store
            // the maximum value of depth, and not the min-max.
            mipLevel += (hitFloor || belowFloor || rayTowardsEye) ? -1 : 1;
            mipLevel  = clamp(mipLevel, 0, 6);
            mipOffset = _DepthPyramidMipLevelOffsets[mipLevel];
            // mipLevel = 0;

            iterCount++;
        }

        // Treat intersections with the sky as misses.
        miss = miss || ((rayPos.z == 0));
        status = hit && !miss;
    }
    return status;
}

[numthreads(INDIRECT_DIFFUSE_TILE_SIZE, INDIRECT_DIFFUSE_TILE_SIZE, 1)]
void TRACE_GLOBAL_ILLUMINATION(uint3 dispatchThreadId : SV_DispatchThreadID, uint2 groupThreadId : SV_GroupThreadID, uint2 groupId : SV_GroupID)
{
    UNITY_XR_ASSIGN_VIEW_INDEX(dispatchThreadId.z);

    // Compute the pixel position to process
    uint2 currentCoord = dispatchThreadId.xy;

#if HALF_RES
    // Compute the full resolution pixel for the inputs that do not have a pyramid
    currentCoord = currentCoord * 2;
#endif

    // Read the depth value as early as possible
    float deviceDepth = LOAD_TEXTURE2D_X(_DepthTexture, currentCoord).x;

    // Initialize the hitpoint texture to a miss
    _IndirectDiffuseHitPointTextureRW[COORD_TEXTURE2D_X(dispatchThreadId.xy)] = float2(99.0, 0.0);

    // Read the pixel normal
    NormalData normalData;
    DecodeFromNormalBuffer(currentCoord.xy, normalData);

    // Generete a new direction to follow
    float2 newSample;
    newSample.x = GetBNDSequenceSample(currentCoord.xy, _RaytracingFrameIndex, 0);
    newSample.y = GetBNDSequenceSample(currentCoord.xy, _RaytracingFrameIndex, 1);

    // Importance sample with a cosine lobe (direction that will be used for ray casting)
    float3 sampleDir = SampleHemisphereCosine(newSample.x, newSample.y, normalData.normalWS);

    // Compute the camera position
    float3 camPosWS = GetCurrentViewPosition();

    // If this is a background pixel, we flag the ray as a dead ray (we are also trying to keep the usage of the depth buffer the latest possible)
    bool killRay = deviceDepth == UNITY_RAW_FAR_CLIP_VALUE;
    // Convert this to a world space position (camera relative)
    PositionInputs posInput = GetPositionInput(currentCoord, _ScreenSize.zw, deviceDepth, UNITY_MATRIX_I_VP, GetWorldToViewMatrix(), 0);

    // Compute the view direction (world space)
    float3 viewWS = GetWorldSpaceNormalizeViewDir(posInput.positionWS);

    // Apply normal bias with the magnitude dependent on the distance from the camera.
    // Unfortunately, we only have access to the shading normal, which is less than ideal...
    posInput.positionWS  = camPosWS + (posInput.positionWS - camPosWS) * (1 - 0.001 * rcp(max(dot(normalData.normalWS, viewWS), FLT_EPS)));
    deviceDepth = ComputeNormalizedDeviceCoordinatesWithZ(posInput.positionWS, UNITY_MATRIX_VP).z;

    // Ray March along our ray
    float3 rayPos;
    bool hit = RayMarch(posInput.positionWS, sampleDir, normalData.normalWS, posInput.positionSS, deviceDepth, killRay, rayPos);

    // If we had a hit, store the NDC position of the intersection point
    if (hit)
    {
        // Note that we are using 'rayPos' from the penultimate iteration, rather than
        // recompute it using the last value of 't', which would result in an overshoot.
        // It also needs to be precisely at the center of the pixel to avoid artifacts.
        float2 hitPositionNDC = floor(rayPos.xy) * _ScreenSize.zw + (0.5 * _ScreenSize.zw); // Should we precompute the half-texel bias? We seem to use it a lot.
        _IndirectDiffuseHitPointTextureRW[COORD_TEXTURE2D_X(dispatchThreadId.xy)] = hitPositionNDC;
    }
}

// Input hit point texture that holds the NDC position if an intersection was found
TEXTURE2D_X(_IndirectDiffuseHitPointTexture);
// Depth buffer of the previous frame (full resolution)
TEXTURE2D_X(_HistoryDepthTexture);
// Output indirect diffuse texture
RW_TEXTURE2D_X(float4, _IndirectDiffuseTexture0RW);
RW_TEXTURE2D_X(float4, _IndirectDiffuseTexture1RW);

// The maximal difference in depth that is considered acceptable to read from the color pyramid
#define DEPTH_DIFFERENCE_THRESHOLD 0.1

void RGBToYCoCgUtil(float3 inColor, float3 inDirection, out float4 outYSH, out float2 outCoCg)
{
    // Convert the color to ycocg space
    float3 yCoCg = RGBToYCoCg(inColor);

    // Compute the coeffs required for the projection
    float Y00     = 0.282095;
    float Y11     = 0.488603 * inDirection.x;
    float Y10     = 0.488603 * inDirection.z;
    float Y1_1    = 0.488603 * inDirection.y;

    // Output the values
    outYSH.x = yCoCg.x * Y00;
    outYSH.y = yCoCg.x * Y11;
    outYSH.z = yCoCg.x * Y10;
    outYSH.w = yCoCg.x * Y1_1;
    outCoCg.x = yCoCg.y;
    outCoCg.y = yCoCg.z;
}

[numthreads(INDIRECT_DIFFUSE_TILE_SIZE, INDIRECT_DIFFUSE_TILE_SIZE, 1)]
void REPROJECT_GLOBAL_ILLUMINATION(uint3 dispatchThreadId : SV_DispatchThreadID, uint2 groupThreadId : SV_GroupThreadID, uint2 groupId : SV_GroupID)
{
    UNITY_XR_ASSIGN_VIEW_INDEX(dispatchThreadId.z);

    // Compute the pixel position to process
    uint2 currentCoord = groupId * INDIRECT_DIFFUSE_TILE_SIZE + groupThreadId;

#if HALF_RES
    // Compute the full resolution pixel for the inputs that do not have a pyramid
    currentCoord = currentCoord * 2;
#endif

    float deviceDepth = LOAD_TEXTURE2D_X(_DepthTexture, currentCoord).x;

    // Read the hit point ndc position to fetch
    float2 hitPositionNDC = LOAD_TEXTURE2D_X(_IndirectDiffuseHitPointTexture, dispatchThreadId.xy).xy;

    // Grab the depth of the hit point
    float hitPointDepth = LOAD_TEXTURE2D_X(_DepthTexture, hitPositionNDC * _ScreenSize.xy).x;

    // Flag that tracks if this ray lead to a valid result
    bool invalid = false;

    // If this missed, we need to find something else to fallback on
    if (hitPositionNDC.x > 1.0)
        invalid = true;

    // Fetch the motion vector of the current target pixel
    float2 motionVectorNDC;
    DecodeMotionVector(SAMPLE_TEXTURE2D_X_LOD(_CameraMotionVectorsTexture, s_linear_clamp_sampler, hitPositionNDC, 0), motionVectorNDC);

    float2 prevFrameNDC = hitPositionNDC - motionVectorNDC;
    float2 prevFrameUV  = prevFrameNDC * _ColorPyramidUvScaleAndLimitPrevFrame.xy;

    // If the previous value to read was out of screen, this is invalid, needs a fallback
    if ((prevFrameUV.x < 0)
        || (prevFrameUV.x > _ColorPyramidUvScaleAndLimitPrevFrame.z)
        || (prevFrameUV.y < 0)
        || (prevFrameUV.y > _ColorPyramidUvScaleAndLimitPrevFrame.w))
        invalid = true;

    // Grab the depth of the hit point and reject the history buffer if the depth is too different
    // TODO: Find a better metric
    float hitPointHistoryDepth = LOAD_TEXTURE2D_X(_HistoryDepthTexture, prevFrameNDC * _ScreenSize.xy).x;
    if (abs(hitPointHistoryDepth - hitPointDepth) > DEPTH_DIFFERENCE_THRESHOLD)
        invalid = true;

    // Based on if the intersection was valid (or not, pick a source for the lighting)
    float3 color = 0.0;
    if (!invalid)
        // The intersection was considered valid, we can read from the color pyramid
        color = SAMPLE_TEXTURE2D_X_LOD(_ColorPyramidTexture, s_linear_clamp_sampler, prevFrameUV, 0).rgb;

    // TODO: Remove me when you can find where the nans come from
    if (AnyIsNaN(color))
        color = 0.0f;

    // We need to recreate the direction that was generated
    float2 newSample;
    newSample.x = GetBNDSequenceSample(currentCoord.xy, _IndirectDiffuseFrameIndex, 0);
    newSample.y = GetBNDSequenceSample(currentCoord.xy, _IndirectDiffuseFrameIndex, 1);

    // Read the pixel normal
    NormalData normalData;
    DecodeFromNormalBuffer(currentCoord.xy, normalData);

#ifdef PERCEPTUAL_SPACE
    // We tone map the signal. Due to the very small budget for denoising, we need to compress the range of the signal
    color = color / (1.0 + color);
#endif

    // Re-compute the direction that was used to do the generation
    float3 sampleDir = SampleHemisphereCosine(newSample.x, newSample.y, normalData.normalWS);

    // Convert the color to our target format
    float4 outYSH;
    float2 outCoCg;
    RGBToYCoCgUtil(color, sampleDir, outYSH, outCoCg);

    // We are simply interested to know if the intersected pixel was moving, so we multiply it by a big number
    // TODO: make this process not binary
    // Write the output to the target pixel
    _IndirectDiffuseTexture0RW[COORD_TEXTURE2D_X(dispatchThreadId.xy)] = float4(outYSH);
    _IndirectDiffuseTexture1RW[COORD_TEXTURE2D_X(dispatchThreadId.xy)] = float4(outCoCg, invalid ? 0.0 : 1.0, length(motionVectorNDC * 10000.0f));
}

void ConvertYCoCgToRGBUtil(float4 inYSH, float2 inCoCg, float3 inNormal, out float3 outColor)
{
    // Compute the coeffs required for the projection
    float Y00     = 0.282095;
    float Y11     = 0.488603 * inNormal.x;
    float Y10     = 0.488603 * inNormal.z;
    float Y1_1    = 0.488603 * inNormal.y;

    // Compute the Y value
    float y = 3.141593 * Y00 * inYSH.x
                + 2.094395 * Y1_1 * inYSH.w
                + 2.094395 * Y10 * inYSH.z
                + 2.094395 * Y11 * inYSH.y;

    // Compute the output color
    outColor = y != 0.0 ? YCoCgToRGB(float3(y, inCoCg.xy)) : 0.0;
    outColor = max(outColor, 0.0);
}

TEXTURE2D_X(_IndirectDiffuseTexture1);

[numthreads(INDIRECT_DIFFUSE_TILE_SIZE, INDIRECT_DIFFUSE_TILE_SIZE, 1)]
void CONVERT_YCOCG_TO_RGB(uint3 dispatchThreadId : SV_DispatchThreadID, uint2 groupThreadId : SV_GroupThreadID, uint2 groupId : SV_GroupID)
{
    UNITY_XR_ASSIGN_VIEW_INDEX(dispatchThreadId.z);

    // Fetch the current pixel coordinate
    uint2 currentCoord = dispatchThreadId.xy;

    // If the depth of this pixel is the depth of the background, we can end the process right away
#if HALF_RES
    currentCoord = currentCoord * 2;
#endif

    // Fetch the depth of the current pixel
    float deviceDepth = LOAD_TEXTURE2D_X(_DepthTexture, currentCoord).x;

    if (deviceDepth == UNITY_RAW_FAR_CLIP_VALUE)
    {
        _IndirectDiffuseTexture0RW[COORD_TEXTURE2D_X(dispatchThreadId.xy)] = float4(0.0, 0.0, 0.0, 0.0);
        return;
    }

    // Fetch the normal
    NormalData normalData;
    DecodeFromNormalBuffer(currentCoord.xy, normalData);

    // Convert the signal back to a color
    float3 color;
    float4 ySH = _IndirectDiffuseTexture0RW[COORD_TEXTURE2D_X(dispatchThreadId.xy)];
    float3 cocgB = LOAD_TEXTURE2D_X(_IndirectDiffuseTexture1, dispatchThreadId.xy).xyz;
    ConvertYCoCgToRGBUtil(ySH, cocgB.xy, normalData.normalWS, color);

#ifdef PERCEPTUAL_SPACE
    // We invert the tonemap
    color = color / (1.0 - color);

    // The mulitplication is wrong, but with all the approximations that we need to compensate a bit
    // the fact that the signal was significantly attenuated (due to blurring in tonemapped space to reduce the blobbyness).
    // This has been experimentally tested. However, it needs more testing and potetially reverted if found more harmful than useful
    color *= (lerp(5.0, 1.0, cocgB.z));
#endif

    // Does this pixel recieve SSGI?
    uint stencilValue = GetStencilValue(LOAD_TEXTURE2D_X(_StencilTexture, currentCoord));
    if ((stencilValue & _SsrStencilBit) == 0)
        cocgB.z = 0.0;

    // Output the color as well as the blend factor
    _IndirectDiffuseTexture0RW[COORD_TEXTURE2D_X(dispatchThreadId.xy)] = float4(color, cocgB.z);
}