111 lines
3.6 KiB
Plaintext
111 lines
3.6 KiB
Plaintext
#include "Packages/com.unity.render-pipelines.core/ShaderLibrary/Common.hlsl"
|
|
#include "Packages/com.unity.render-pipelines.high-definition/Runtime/ShaderLibrary/ShaderVariables.hlsl"
|
|
|
|
#pragma only_renderers d3d11 playstation xboxone xboxseries vulkan metal switch
|
|
|
|
#pragma kernel KMain
|
|
|
|
TEXTURE2D_X(_InputCoCTexture);
|
|
|
|
RW_TEXTURE2D_X(float, _OutputCoCTexture);
|
|
|
|
CBUFFER_START(cb0)
|
|
float4 _Params;
|
|
CBUFFER_END
|
|
|
|
#define Size uint2(_Params.xy)
|
|
|
|
// 16x16 pixels with an 8x8 center that we will be blurring writing out. Each uint is two color
|
|
// channels packed together.
|
|
groupshared uint gs_cache[128];
|
|
|
|
void Store2Pixels(uint index, float pixel1, float pixel2)
|
|
{
|
|
gs_cache[index] = f32tof16(pixel1) | f32tof16(pixel2) << 16;
|
|
}
|
|
|
|
void Load2Pixels(uint index, out float pixel1, out float pixel2)
|
|
{
|
|
uint rr = gs_cache[index];
|
|
pixel1 = f16tof32(rr );
|
|
pixel2 = f16tof32(rr >> 16);
|
|
}
|
|
|
|
void Store1Pixel(uint index, float pixel)
|
|
{
|
|
gs_cache[index] = asuint(pixel);
|
|
}
|
|
|
|
void Load1Pixel(uint index, out float pixel)
|
|
{
|
|
pixel = asfloat(gs_cache[index]);
|
|
}
|
|
|
|
// Look for the max pixel in LDS horizontally two pixels at a time, this reduces LDS reads and
|
|
// pixel unpacking
|
|
void HorizontalPass(uint outIndex, uint leftMostIndex)
|
|
{
|
|
float s0, s1, s2, s3, s4, s5, s6, s7, s8, s9;
|
|
Load2Pixels(leftMostIndex , s0, s1);
|
|
Load2Pixels(leftMostIndex + 1, s2, s3);
|
|
Load2Pixels(leftMostIndex + 2, s4, s5);
|
|
Load2Pixels(leftMostIndex + 3, s6, s7);
|
|
Load2Pixels(leftMostIndex + 4, s8, s9);
|
|
|
|
float maxCoC1 = Max3(Max3(s0, s1, s2), Max3(s3, s4, s5), Max3(s6, s7, s8));
|
|
float maxCoC2 = Max3(Max3(s1, s2, s3), Max3(s4, s5, s6), Max3(s7, s8, s9));
|
|
|
|
Store1Pixel(outIndex, maxCoC1);
|
|
Store1Pixel(outIndex + 1, maxCoC2);
|
|
}
|
|
|
|
void VerticalPass(uint2 pixelCoord, uint topMostIndex)
|
|
{
|
|
float s0, s1, s2, s3, s4, s5, s6, s7, s8;
|
|
Load1Pixel(topMostIndex , s0);
|
|
Load1Pixel(topMostIndex + 8, s1);
|
|
Load1Pixel(topMostIndex + 16, s2);
|
|
Load1Pixel(topMostIndex + 24, s3);
|
|
Load1Pixel(topMostIndex + 32, s4);
|
|
Load1Pixel(topMostIndex + 40, s5);
|
|
Load1Pixel(topMostIndex + 48, s6);
|
|
Load1Pixel(topMostIndex + 56, s7);
|
|
Load1Pixel(topMostIndex + 64, s8);
|
|
|
|
float maxCoC = Max3(Max3(s0, s1, s2), Max3(s3, s4, s5), Max3(s6, s7, s8));
|
|
|
|
// Write to the final target
|
|
_OutputCoCTexture[COORD_TEXTURE2D_X(pixelCoord)] = maxCoC;
|
|
}
|
|
|
|
#define GROUP_SIZE 8
|
|
|
|
[numthreads(GROUP_SIZE, GROUP_SIZE, 1)]
|
|
void KMain(uint2 groupId : SV_GroupID, uint2 groupThreadId : SV_GroupThreadID, uint3 dispatchThreadId : SV_DispatchThreadID)
|
|
{
|
|
UNITY_XR_ASSIGN_VIEW_INDEX(dispatchThreadId.z);
|
|
|
|
// Upper-left pixel coordinate of quad that this thread will read
|
|
int2 threadUL = (groupThreadId << 1) + (groupId << 3) - 4;
|
|
uint2 uthreadUL = uint2(max(0, threadUL));
|
|
|
|
float coc00 = _InputCoCTexture[COORD_TEXTURE2D_X(min(uthreadUL + uint2(0u, 0u), Size))].x;
|
|
float coc10 = _InputCoCTexture[COORD_TEXTURE2D_X(min(uthreadUL + uint2(1u, 0u), Size))].x;
|
|
float coc01 = _InputCoCTexture[COORD_TEXTURE2D_X(min(uthreadUL + uint2(0u, 1u), Size))].x;
|
|
float coc11 = _InputCoCTexture[COORD_TEXTURE2D_X(min(uthreadUL + uint2(1u, 1u), Size))].x;
|
|
|
|
// Store the four CoCs in LDS
|
|
uint destIdx = groupThreadId.x + (groupThreadId.y << 4u);
|
|
Store2Pixels(destIdx , coc00, coc10);
|
|
Store2Pixels(destIdx + 8u, coc01, coc11);
|
|
|
|
GroupMemoryBarrierWithGroupSync();
|
|
|
|
uint row = groupThreadId.y << 4u;
|
|
HorizontalPass(row + (groupThreadId.x << 1u), row + groupThreadId.x + (groupThreadId.x & 4u));
|
|
|
|
GroupMemoryBarrierWithGroupSync();
|
|
|
|
VerticalPass(dispatchThreadId.xy, (groupThreadId.y << 3u) + groupThreadId.x);
|
|
}
|