114 lines
4.3 KiB
Plaintext

#pragma kernel ClearBuffer
#pragma kernel TextureReduction
#pragma kernel BufferReduction
#pragma only_renderers d3d11 ps5
#include "Packages/com.unity.render-pipelines.core/ShaderLibrary/Common.hlsl"
#include "Packages/com.unity.render-pipelines.high-definition/Runtime/ShaderLibrary/ShaderVariables.hlsl"
#include "Packages/com.unity.render-pipelines.high-definition/Runtime/ShaderLibrary/TextureXR.hlsl"
#include "Packages/com.unity.render-pipelines.high-definition/Runtime/Debug/RayCountManager.cs.hlsl"
#define REDUCTION_GROUP_SIZE 32
#define REDUCTION_TOTAL_THREADS 1024
RWStructuredBuffer<uint> _OutputRayCountBuffer;
uint _OutputBufferDimension;
[numthreads(REDUCTION_GROUP_SIZE, REDUCTION_GROUP_SIZE,1)]
void ClearBuffer(uint2 groupId : SV_GroupID, uint2 groupThreadId : SV_GroupThreadID)
{
uint2 sampleIdx = groupId.xy * REDUCTION_GROUP_SIZE + groupThreadId.xy;
for (uint counterType = 0; counterType < RAYCOUNTVALUES_COUNT; counterType++)
{
_OutputRayCountBuffer[sampleIdx.y * _OutputBufferDimension + sampleIdx.x * RAYCOUNTVALUES_COUNT + counterType] = 0;
}
}
Texture2DArray<uint> _InputRayCountTexture;
StructuredBuffer<uint> _InputRayCountBuffer;
uint _InputBufferDimension;
groupshared uint gs_rayCount[REDUCTION_TOTAL_THREADS];
[numthreads(REDUCTION_GROUP_SIZE, REDUCTION_GROUP_SIZE, 1)]
void TextureReduction(uint2 groupId : SV_GroupID, uint2 groupThreadId : SV_GroupThreadID)
{
// Compute the thread id and global texture index
uint threadIdx = groupThreadId.y * REDUCTION_GROUP_SIZE + groupThreadId.x;
uint2 sampleIdx = groupId.xy * REDUCTION_GROUP_SIZE + groupThreadId.xy;
for (uint counterType = 0; counterType < RAYCOUNTVALUES_COUNT; counterType++)
{
// Copy the local values to to kernel
uint3 counterIdx = uint3(sampleIdx, INDEX_TEXTURE2D_ARRAY_X(counterType));
gs_rayCount[threadIdx] = _InputRayCountTexture[counterIdx];
#if defined(USE_TEXTURE2D_X_AS_ARRAY) && defined(UNITY_STEREO_INSTANCING_ENABLED)
// Accumulate results from all remaining XR single-pass views.
for (uint viewIndex = 1; viewIndex < _XRViewCount; ++viewIndex)
{
counterIdx.z = counterIdx.z + 1;
gs_rayCount[threadIdx] += _InputRayCountTexture[counterIdx];
}
#endif
// Make sure everyone has copied it value to the LDS
GroupMemoryBarrierWithGroupSync();
// do reduction in shared mem
UNITY_UNROLL
for (uint s = REDUCTION_TOTAL_THREADS / 2u; s > 0u; s >>= 1u)
{
if (threadIdx < s)
{
gs_rayCount[threadIdx] += gs_rayCount[threadIdx + s];
}
GroupMemoryBarrierWithGroupSync();
}
// write result for this block to global mem
if (threadIdx == 0u)
{
_OutputRayCountBuffer[groupId.y * _OutputBufferDimension + groupId.x * RAYCOUNTVALUES_COUNT + counterType] = gs_rayCount[0];
}
}
}
[numthreads(REDUCTION_GROUP_SIZE, REDUCTION_GROUP_SIZE, 1)]
void BufferReduction(uint2 groupId : SV_GroupID, uint2 groupThreadId : SV_GroupThreadID)
{
// Compute the thread id and global texture index
uint threadIdx = groupThreadId.y * REDUCTION_GROUP_SIZE + groupThreadId.x;
uint2 sampleIdx = groupId.xy * REDUCTION_GROUP_SIZE + groupThreadId.xy;
for (uint counterType = 0; counterType < RAYCOUNTVALUES_COUNT; counterType++)
{
// Copy the local values to to kernel
int counterIdx = sampleIdx.y * _InputBufferDimension + sampleIdx.x * RAYCOUNTVALUES_COUNT + counterType;
gs_rayCount[threadIdx] = _InputRayCountBuffer[counterIdx];
// Make sure everyone has copied it value to the LDS
GroupMemoryBarrierWithGroupSync();
// do reduction in shared mem
UNITY_UNROLL
for (uint s = REDUCTION_TOTAL_THREADS / 2u; s > 0u; s >>= 1u)
{
if (threadIdx < s)
{
gs_rayCount[threadIdx] += gs_rayCount[threadIdx + s];
}
GroupMemoryBarrierWithGroupSync();
}
// write result for this block to global mem
if (threadIdx == 0u)
{
_OutputRayCountBuffer[groupId.y * _OutputBufferDimension + groupId.x * RAYCOUNTVALUES_COUNT + counterType] = gs_rayCount[0];
}
}
}