111 lines
4.2 KiB
Plaintext
111 lines
4.2 KiB
Plaintext
#pragma only_renderers d3d11 playstation xboxone xboxseries vulkan metal switch
|
|
|
|
#pragma kernel MAIN KERNEL_NAME=MAIN NUM_SAMPLES=1 COARSE_STENCIL
|
|
#pragma kernel MAIN_MSAA_2 KERNEL_NAME=MAIN_MSAA_2 NUM_SAMPLES=2 MSAA COARSE_STENCIL
|
|
#pragma kernel MAIN_MSAA_4 KERNEL_NAME=MAIN_MSAA_4 NUM_SAMPLES=4 MSAA COARSE_STENCIL
|
|
#pragma kernel MAIN_MSAA_8 KERNEL_NAME=MAIN_MSAA_8 NUM_SAMPLES=8 MSAA COARSE_STENCIL
|
|
|
|
#pragma kernel MAIN_MSAA_2_RESOLVE KERNEL_NAME=MAIN_MSAA_2_RESOLVE NUM_SAMPLES=2 MSAA RESOLVE COARSE_STENCIL
|
|
#pragma kernel MAIN_MSAA_4_RESOLVE KERNEL_NAME=MAIN_MSAA_4_RESOLVE NUM_SAMPLES=4 MSAA RESOLVE COARSE_STENCIL
|
|
#pragma kernel MAIN_MSAA_8_RESOLVE KERNEL_NAME=MAIN_MSAA_8_RESOLVE NUM_SAMPLES=8 MSAA RESOLVE COARSE_STENCIL
|
|
|
|
#pragma kernel MAIN_MSAA_2_RESOLVE_ONLY KERNEL_NAME=MAIN_MSAA_2_RESOLVE_ONLY NUM_SAMPLES=2 MSAA RESOLVE
|
|
#pragma kernel MAIN_MSAA_4_RESOLVE_ONLY KERNEL_NAME=MAIN_MSAA_4_RESOLVE_ONLY NUM_SAMPLES=4 MSAA RESOLVE
|
|
#pragma kernel MAIN_MSAA_8_RESOLVE_ONLY KERNEL_NAME=MAIN_MSAA_8_RESOLVE_ONLY NUM_SAMPLES=8 MSAA RESOLVE
|
|
|
|
|
|
#include "Packages/com.unity.render-pipelines.core/ShaderLibrary/Common.hlsl"
|
|
#include "Packages/com.unity.render-pipelines.high-definition/Runtime/ShaderLibrary/ShaderVariables.hlsl"
|
|
|
|
#ifdef MSAA
|
|
TEXTURE2D_X_MSAA(uint2, _StencilTexture);
|
|
RW_TEXTURE2D_X(uint2, _OutputStencilBuffer);
|
|
#else
|
|
TEXTURE2D_X_UINT2(_StencilTexture);
|
|
#endif
|
|
|
|
// TODO: Wasting 3 bytes here per entry, but still better than a texture as can be scalar read.
|
|
// We could sub-index the right byte inside the uint, but it takes extra ALU and won't save bandwidth (just memory)
|
|
// For now the extra memory cost is acceptable (3 bytes * 1/64th of a render target).
|
|
// Note that using RawBuffers seems to have problem, so using structured buffers for now, but is worth revisiting if the perf difference is a concern.
|
|
RWStructuredBuffer<uint> _CoarseStencilBuffer;
|
|
|
|
#ifdef PLATFORM_SUPPORTS_WAVE_INTRINSICS
|
|
#define USE_INTRINSICS (PLATFORM_LANE_COUNT == 64)
|
|
#else
|
|
#define USE_INTRINSICS 0
|
|
#endif
|
|
|
|
#if USE_INTRINSICS == 0
|
|
groupshared uint coarseStencilValue;
|
|
#endif
|
|
|
|
[numthreads(8, 8, 1)]
|
|
void KERNEL_NAME(uint3 groupId : SV_GroupID,
|
|
uint3 groupThreadId : SV_GroupThreadID,
|
|
uint3 dispatchThreadID : SV_DispatchThreadID)
|
|
{
|
|
UNITY_XR_ASSIGN_VIEW_INDEX(dispatchThreadID.z);
|
|
|
|
// The best shot at resolving is being overly conservative, hence the OR operator. This is by nature inaccurate.
|
|
uint resolvedStencil = 0;
|
|
|
|
if (dispatchThreadID.x < (uint)_ScreenSize.x && dispatchThreadID.y < (uint)_ScreenSize.y)
|
|
{
|
|
UNITY_UNROLL
|
|
for (uint i = 0; i < NUM_SAMPLES; i++)
|
|
{
|
|
uint2 sampledStencil;
|
|
#ifndef MSAA
|
|
sampledStencil = LOAD_TEXTURE2D_X(_StencilTexture, dispatchThreadID.xy);
|
|
#else
|
|
sampledStencil = LOAD_TEXTURE2D_X_MSAA(_StencilTexture, dispatchThreadID.xy, i);
|
|
#endif
|
|
resolvedStencil = GetStencilValue(sampledStencil);
|
|
}
|
|
}
|
|
#ifdef RESOLVE
|
|
_OutputStencilBuffer[COORD_TEXTURE2D_X(dispatchThreadID.xy)] = uint2(resolvedStencil, resolvedStencil);
|
|
#endif
|
|
|
|
#ifdef COARSE_STENCIL
|
|
|
|
#if USE_INTRINSICS
|
|
|
|
// Need to workaround a warning incorrectly triggered when on Xbox One, so instead of using WaveIsFirstLane()
|
|
// we check the groupThreadId as in the non intrinsic version.
|
|
//bool isFirstThread = WaveIsFirstLane();
|
|
bool isFirstThread = groupThreadId.x == 0 && groupThreadId.y == 0;
|
|
uint coarseStencilValue = WaveActiveBitOr(resolvedStencil);
|
|
|
|
|
|
#else
|
|
|
|
bool isFirstThread = groupThreadId.x == 0 && groupThreadId.y == 0;
|
|
if (isFirstThread)
|
|
{
|
|
coarseStencilValue = 0;
|
|
}
|
|
|
|
GroupMemoryBarrierWithGroupSync();
|
|
|
|
InterlockedOr(coarseStencilValue, resolvedStencil);
|
|
|
|
GroupMemoryBarrierWithGroupSync();
|
|
|
|
#endif
|
|
|
|
//This temp is needed outside the if(isFirstThread) condition to workaround a DXC DXIL codegen
|
|
// issue https://github.com/microsoft/DirectXShaderCompiler/issues/2743 until it's fixed
|
|
uint perThreadCoarseStencilValue = coarseStencilValue;
|
|
|
|
if (isFirstThread)
|
|
{
|
|
uint addressIndex = Get1DAddressFromPixelCoord(groupId.xy, _CoarseStencilBufferSize.xy, groupId.z);
|
|
_CoarseStencilBuffer[addressIndex] = perThreadCoarseStencilValue;
|
|
}
|
|
|
|
#endif
|
|
|
|
}
|