#pragma kernel ClearBuffer #pragma kernel TextureReduction #pragma kernel BufferReduction #pragma only_renderers d3d11 ps5 #include "Packages/com.unity.render-pipelines.core/ShaderLibrary/Common.hlsl" #include "Packages/com.unity.render-pipelines.high-definition/Runtime/ShaderLibrary/ShaderVariables.hlsl" #include "Packages/com.unity.render-pipelines.high-definition/Runtime/ShaderLibrary/TextureXR.hlsl" #include "Packages/com.unity.render-pipelines.high-definition/Runtime/Debug/RayCountManager.cs.hlsl" #define REDUCTION_GROUP_SIZE 32 #define REDUCTION_TOTAL_THREADS 1024 RWStructuredBuffer _OutputRayCountBuffer; uint _OutputBufferDimension; [numthreads(REDUCTION_GROUP_SIZE, REDUCTION_GROUP_SIZE,1)] void ClearBuffer(uint2 groupId : SV_GroupID, uint2 groupThreadId : SV_GroupThreadID) { uint2 sampleIdx = groupId.xy * REDUCTION_GROUP_SIZE + groupThreadId.xy; for (uint counterType = 0; counterType < RAYCOUNTVALUES_COUNT; counterType++) { _OutputRayCountBuffer[sampleIdx.y * _OutputBufferDimension + sampleIdx.x * RAYCOUNTVALUES_COUNT + counterType] = 0; } } Texture2DArray _InputRayCountTexture; StructuredBuffer _InputRayCountBuffer; uint _InputBufferDimension; groupshared uint gs_rayCount[REDUCTION_TOTAL_THREADS]; [numthreads(REDUCTION_GROUP_SIZE, REDUCTION_GROUP_SIZE, 1)] void TextureReduction(uint2 groupId : SV_GroupID, uint2 groupThreadId : SV_GroupThreadID) { // Compute the thread id and global texture index uint threadIdx = groupThreadId.y * REDUCTION_GROUP_SIZE + groupThreadId.x; uint2 sampleIdx = groupId.xy * REDUCTION_GROUP_SIZE + groupThreadId.xy; for (uint counterType = 0; counterType < RAYCOUNTVALUES_COUNT; counterType++) { // Copy the local values to to kernel uint3 counterIdx = uint3(sampleIdx, INDEX_TEXTURE2D_ARRAY_X(counterType)); gs_rayCount[threadIdx] = _InputRayCountTexture[counterIdx]; #if defined(USE_TEXTURE2D_X_AS_ARRAY) && defined(UNITY_STEREO_INSTANCING_ENABLED) // Accumulate results from all remaining XR single-pass views. for (uint viewIndex = 1; viewIndex < _XRViewCount; ++viewIndex) { counterIdx.z = counterIdx.z + 1; gs_rayCount[threadIdx] += _InputRayCountTexture[counterIdx]; } #endif // Make sure everyone has copied it value to the LDS GroupMemoryBarrierWithGroupSync(); // do reduction in shared mem UNITY_UNROLL for (uint s = REDUCTION_TOTAL_THREADS / 2u; s > 0u; s >>= 1u) { if (threadIdx < s) { gs_rayCount[threadIdx] += gs_rayCount[threadIdx + s]; } GroupMemoryBarrierWithGroupSync(); } // write result for this block to global mem if (threadIdx == 0u) { _OutputRayCountBuffer[groupId.y * _OutputBufferDimension + groupId.x * RAYCOUNTVALUES_COUNT + counterType] = gs_rayCount[0]; } } } [numthreads(REDUCTION_GROUP_SIZE, REDUCTION_GROUP_SIZE, 1)] void BufferReduction(uint2 groupId : SV_GroupID, uint2 groupThreadId : SV_GroupThreadID) { // Compute the thread id and global texture index uint threadIdx = groupThreadId.y * REDUCTION_GROUP_SIZE + groupThreadId.x; uint2 sampleIdx = groupId.xy * REDUCTION_GROUP_SIZE + groupThreadId.xy; for (uint counterType = 0; counterType < RAYCOUNTVALUES_COUNT; counterType++) { // Copy the local values to to kernel int counterIdx = sampleIdx.y * _InputBufferDimension + sampleIdx.x * RAYCOUNTVALUES_COUNT + counterType; gs_rayCount[threadIdx] = _InputRayCountBuffer[counterIdx]; // Make sure everyone has copied it value to the LDS GroupMemoryBarrierWithGroupSync(); // do reduction in shared mem UNITY_UNROLL for (uint s = REDUCTION_TOTAL_THREADS / 2u; s > 0u; s >>= 1u) { if (threadIdx < s) { gs_rayCount[threadIdx] += gs_rayCount[threadIdx + s]; } GroupMemoryBarrierWithGroupSync(); } // write result for this block to global mem if (threadIdx == 0u) { _OutputRayCountBuffer[groupId.y * _OutputBufferDimension + groupId.x * RAYCOUNTVALUES_COUNT + counterType] = gs_rayCount[0]; } } }