2021-09-09 20:42:29 -04:00

90 lines
3.5 KiB
Plaintext

#pragma kernel MaterialFlagsGen
#pragma multi_compile _ USE_OR
// #pragma enable_d3d11_debug_symbols
#include "Packages/com.unity.render-pipelines.core/ShaderLibrary/Common.hlsl"
#include "Packages/com.unity.render-pipelines.high-definition/Runtime/Lighting/LightLoop/ShaderBase.hlsl"
#include "Packages/com.unity.render-pipelines.high-definition/Runtime/Lighting/LightLoop/LightLoop.cs.hlsl"
#include "Packages/com.unity.render-pipelines.high-definition/Runtime/ShaderLibrary/ShaderVariables.hlsl"
#include "Packages/com.unity.render-pipelines.high-definition/Runtime/Material/Material.hlsl"
#include "Packages/com.unity.render-pipelines.high-definition/Runtime/RenderPipeline/HDStencilUsage.cs.hlsl"
#include "Packages/com.unity.render-pipelines.high-definition/Runtime/Material/Lit/Lit.hlsl"
#pragma only_renderers d3d11 playstation xboxone xboxseries vulkan metal switch
#define USE_MATERIAL_FEATURE_FLAGS
#ifdef PLATFORM_LANE_COUNT // We can infer the size of a wave. This is currently not possible on non-consoles, so we have to fallback to a sensible default in those cases.
#define NR_THREADS PLATFORM_LANE_COUNT
#else
#define NR_THREADS 64 // default to 64 threads per group on other platforms..
#endif
groupshared uint ldsFeatureFlags;
RWStructuredBuffer<uint> g_TileFeatureFlags;
TEXTURE2D_X_UINT2(_StencilTexture);
[numthreads(NR_THREADS, 1, 1)]
void MaterialFlagsGen(uint3 dispatchThreadId : SV_DispatchThreadID, uint threadID : SV_GroupIndex, uint3 u3GroupID : SV_GroupID)
{
UNITY_XR_ASSIGN_VIEW_INDEX(dispatchThreadId.z);
uint2 tileIDX = u3GroupID.xy;
uint iWidth = g_viDimensions.x;
uint iHeight = g_viDimensions.y;
uint nrTilesX = (iWidth + (TILE_SIZE_FPTL - 1)) / TILE_SIZE_FPTL;
uint nrTilesY = (iHeight + (TILE_SIZE_FPTL - 1)) / TILE_SIZE_FPTL;
// 16 * 4 = 64. We process data by group of 4 pixel
uint2 viTilLL = 16 * tileIDX;
float2 invScreenSize = float2(1.0 / iWidth, 1.0 / iHeight);
if (threadID == 0)
{
ldsFeatureFlags = 0;
}
GroupMemoryBarrierWithGroupSync();
uint materialFeatureFlags = g_BaseFeatureFlags; // Contain all lightFeatures or 0 (depends if we enable light classification or not)
UNITY_UNROLL
for (int i = 0; i < 4; i++)
{
int idx = i * NR_THREADS + threadID;
uint2 uCrd = min(uint2(viTilLL.x + (idx & 0xf), viTilLL.y + (idx >> 4)), uint2(iWidth - 1, iHeight - 1));
// Unlit object, sky/background and forward opaque tag don't tag the StencilUsage.RequiresDeferredLighting bit
uint stencilVal = GetStencilValue(LOAD_TEXTURE2D_X(_StencilTexture, uCrd));
if ((stencilVal & STENCILUSAGE_REQUIRES_DEFERRED_LIGHTING) > 0)
{
PositionInputs posInput = GetPositionInput(uCrd, invScreenSize);
materialFeatureFlags |= MATERIAL_FEATURE_FLAGS_FROM_GBUFFER(posInput.positionSS);
}
}
InterlockedOr(ldsFeatureFlags, materialFeatureFlags); //TODO: driver might optimize this or we might have to do a manual reduction
GroupMemoryBarrierWithGroupSync();
if (threadID == 0)
{
uint tileIndex = tileIDX.y * nrTilesX + tileIDX.x;
// TODO: shouldn't this always enabled?
#if defined(UNITY_STEREO_INSTANCING_ENABLED)
tileIndex += unity_StereoEyeIndex * nrTilesX * nrTilesY;
#endif
#ifdef USE_OR
g_TileFeatureFlags[tileIndex] |= ldsFeatureFlags;
#else // Use in case we have disabled light classification
g_TileFeatureFlags[tileIndex] = ldsFeatureFlags;
#endif
}
}