2021-09-09 20:42:29 -04:00

328 lines
15 KiB
HLSL

// Part of this code has been given by Christoph Peter in the Demo code of the "Improved Moment Shadow Maps for Translucent Occluders, Soft Shadows and Single Scattering" paper
//
// To the extent possible under law, the author(s) have dedicated all copyright and
// related and neighboring rights to this software to the public domain worldwide.
// This software is distributed without any warranty.
//
// You should have received a copy of the CC0 Public Domain Dedication along with
// this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
// Link: http://jcgt.org/published/0006/01/03/
// Given that the default texture2d type matches a float4, we beed to manually declare this texture for the improved moment shadow algorithm
Texture2D<uint4> _SummedAreaTableInputInt;
float2 ComputeFixedPrecision(float3x2 kernelSizeParameter, float2 shadowmapSize)
{
float2 maxKernelSize = ceil(1.0f+2.0f * kernelSizeParameter._21_22 * shadowmapSize);
float fixedPres = 4294967295.0f/(maxKernelSize.x*maxKernelSize.y);
return float2(fixedPres, 1.0 / fixedPres);
}
// Soft IMS
float4 ComputeIntegerRectangleAverage_uint4(float4 searchRegion, float2 shadowTextureSize, float invFixedPrecision)
{
// Pixel position of the range
int2 topPixel = int2(searchRegion.xy * shadowTextureSize.xy) - 1;
int2 bottomPixel = int2(searchRegion.zw * shadowTextureSize.xy) - 1;
uint2 reactangleSize = bottomPixel - topPixel;
// Evaluate the integral
uint4 integral = uint4(0,0,0,0);
integral += _SummedAreaTableInputInt.Load(int3(topPixel.x, topPixel.y,0));
integral += _SummedAreaTableInputInt.Load(int3(bottomPixel.x, bottomPixel.y,0));
integral -= _SummedAreaTableInputInt.Load(int3(topPixel.x, bottomPixel.y,0));
integral -= _SummedAreaTableInputInt.Load(int3(bottomPixel.x, topPixel.y,0));
return float4(integral) * invFixedPrecision / float(reactangleSize.x * reactangleSize.y);
}
float2 GetRoots(float3 Coefficients)
{
float Scaling=1.0f/Coefficients[2];
float p=Coefficients[1]*Scaling;
float q=Coefficients[0]*Scaling;
float D=p*p*0.25f-q;
float r=sqrt(D);
return float2(-0.5f*p-r,-0.5f*p+r);
}
void Compute4MomentAverageBlockerDepth(out float OutAverageBlockerDepth, out float OutBlockerSearchShadowIntensity, float depthBias, float4 BlockerSearchBiased4Moments,float FragmentDepth)
{
// Use short-hands for the many formulae to come
float4 b=BlockerSearchBiased4Moments;
float3 z;
z[0]=FragmentDepth - depthBias;
// Compute a Cholesky factorization of the Hankel matrix B storing only non-
// trivial entries or related products
float L21D11=mad(-b[0],b[1],b[2]);
float D11=mad(-b[0],b[0], b[1]);
float SquaredDepthVariance=mad(-b[1],b[1], b[3]);
float D22D11=dot(float2(SquaredDepthVariance,-L21D11),float2(D11,L21D11));
float InvD11=1.0f/D11;
float L21=L21D11*InvD11;
// Obtain a scaled inverse image of bz=(1,z[0],z[0]*z[0])^T
float3 c=float3(1.0f,z[0],z[0]*z[0]);
// Forward substitution to solve L*c1=bz
c[1]-=b.x;
c[2]-=b.y+L21*c[1];
// Scaling to solve D*c2=c1
c[1]*=InvD11;
c[2]*=D11/D22D11;
// Backward substitution to solve L^T*c3=c2
c[1]-=L21*c[2];
c[0]-=dot(c.yz,b.xy);
// Solve the quadratic equation c[0]+c[1]*z+c[2]*z^2 to obtain solutions
// z[1] and z[2]
z.yz=GetRoots(c);
// Compute weights of the Dirac-deltas at the roots
float3 Weight;
Weight[0]=(z[1]*z[2]-b[0]*(z[1]+z[2])+b[1])/((z[0]-z[1])*(z[0]-z[2]));
Weight[1]=(z[0]*z[2]-b[0]*(z[0]+z[2])+b[1])/((z[2]-z[1])*(z[0]-z[1]));
Weight[2]=1.0f-Weight[0]-Weight[1];
// Compute the shadow intensity and the unnormalized average depth of occluders
float AverageBlockerDepthIntegral=((z[1]<z[0])?(Weight[1]*z[1]):0.0f)+((z[2]<z[0])?(Weight[2]*z[2]):0.0f);
float BlockerSearchShadowIntensity=((z[1]<z[0])?Weight[1]:0.0f)+((z[2]<z[0])?Weight[2]:0.0f);
const float FullyLitBias=1.0e-3f;
OutAverageBlockerDepth=clamp((FullyLitBias*z[0]+AverageBlockerDepthIntegral)/(FullyLitBias+BlockerSearchShadowIntensity),-1.0f,1.0f);
OutBlockerSearchShadowIntensity=(BlockerSearchShadowIntensity>0.99f)?1.0f:(-1.0f);
}
void EstimatePenumbraSize(out float2 OutKernelSize,out float OutDepthBias, float OccluderDepth, float FragmentDepth, float3x2 LightParameter, float3x2 KernelSizeParameter, float MaxDepthBias)
{
float2 Numerator=LightParameter._11_12*(FragmentDepth-OccluderDepth);
float2 Denominator=LightParameter._21_22*OccluderDepth+LightParameter._31_32;
OutKernelSize=max(KernelSizeParameter._11_12,min(KernelSizeParameter._21_22,Numerator/Denominator));
OutDepthBias=MaxDepthBias*clamp(OutKernelSize.x*KernelSizeParameter._31,KernelSizeParameter._32,1.0f);
}
float4 ComputeRectangleAverage_uint4(float2 LeftTop,float2 RightBottom, float4 TextureSize, float2 FixedPrecision)
{
// The summed area table is essentially off by one because the top left texel
// already holds the integral over the top left texel. Compensate for that.
LeftTop-=TextureSize.zw;
RightBottom-=TextureSize.zw;
// Round the texture coordinates to integer texels
float2 LeftTopPixel=LeftTop*TextureSize.xy;
float2 RightBottomPixel=RightBottom*TextureSize.xy;
float2 LeftTopPixelFloor=floor(LeftTopPixel);
float2 RightBottomPixelFloor=floor(RightBottomPixel);
int2 iLeftTop=int2(LeftTopPixelFloor);
int2 iRightBottom=int2(RightBottomPixelFloor);
float2 LeftTopFactor=float2(1.0f,1.0f)-(LeftTopPixel-LeftTopPixelFloor);
float2 RightBottomFactor=RightBottomPixel-RightBottomPixelFloor;
// Sample the summed area table at all relevant locations. The first two indices
// determine whether we are at the left top or right bottom of the rectangle,
// the latter two determine the pixel offset.
int x, y;
uint4 Samples[2][2][2][2];
[unroll] for(x=0;x!=2;++x){
int TexelX=(x==0)?iLeftTop.x:iRightBottom.x;
[unroll] for(y=0;y!=2;++y){
int TexelY=(y==0)?iLeftTop.y:iRightBottom.y;
[unroll] for(int z=0;z!=2;++z){
[unroll] for(int w=0;w!=2;++w){
Samples[x][y][z][w] = _SummedAreaTableInputInt.Load(int3(TexelX+z,TexelY+w,0));
}
}
}
}
// Compute integrals for various rectangles
float4 pCornerIntegral[2][2];
[unroll] for(x=0;x!=2;++x){
[unroll] for(y=0;y!=2;++y){
pCornerIntegral[x][y]=float4(Samples[x][y][0][0]+Samples[x][y][1][1]-Samples[x][y][1][0]-Samples[x][y][0][1]);
}
}
float4 pEdgeIntegral[4]={
// Right edge
float4(Samples[1][0][0][1]+Samples[1][1][1][0]-Samples[1][0][1][1]-Samples[1][1][0][0]),
// Top edge
float4(Samples[0][0][1][0]+Samples[1][0][0][1]-Samples[0][0][1][1]-Samples[1][0][0][0]),
// Left edge
float4(Samples[0][0][0][1]+Samples[0][1][1][0]-Samples[0][0][1][1]-Samples[0][1][0][0]),
// Bottom edge
float4(Samples[0][1][1][0]+Samples[1][1][0][1]-Samples[0][1][1][1]-Samples[1][1][0][0])
};
float4 CenterIntegral=float4(Samples[0][0][1][1]+Samples[1][1][0][0]-Samples[0][1][1][0]-Samples[1][0][0][1]);
// Compute the integral over the given rectangle
float4 Integral=CenterIntegral;
Integral+=pCornerIntegral[0][0]*(LeftTopFactor.x*LeftTopFactor.y);
Integral+=pCornerIntegral[0][1]*(LeftTopFactor.x*RightBottomFactor.y);
Integral+=pCornerIntegral[1][0]*(RightBottomFactor.x*LeftTopFactor.y);
Integral+=pCornerIntegral[1][1]*(RightBottomFactor.x*RightBottomFactor.y);
Integral+=pEdgeIntegral[0]*RightBottomFactor.x;
Integral+=pEdgeIntegral[1]*LeftTopFactor.y;
Integral+=pEdgeIntegral[2]*LeftTopFactor.x;
Integral+=pEdgeIntegral[3]*RightBottomFactor.y;
// Get from a non-normalized integral to moments
float2 Size=RightBottomPixel-LeftTopPixel;
return Integral*FixedPrecision.y/(Size.x*Size.y);
}
void Convert4MomentOptimizedToCanonical(out float4 OutBiased4Moments,float4 OptimizedMoments0,float MomentBias=6.0e-5f){
OutBiased4Moments.xz=mul(OptimizedMoments0.xz-0.5f,float2x2(-1.0f/3.0f,-0.75f,sqrt(3.0f),0.75f*sqrt(3.0f)));
OutBiased4Moments.yw=mul(OptimizedMoments0.yw,float2x2(0.125f,-0.125f,1.0f,1.0f));
OutBiased4Moments=lerp(OutBiased4Moments,float4(0.0f,0.628f,0.0f,0.628f),MomentBias);
}
void Compute4MomentUnboundedShadowIntensity(out float OutShadowIntensity,
float4 Biased4Moments,float FragmentDepth,float DepthBias)
{
// Use short-hands for the many formulae to come
float4 b=Biased4Moments;
float3 z;
z[0]=FragmentDepth-DepthBias;
// Compute a Cholesky factorization of the Hankel matrix B storing only non-
// trivial entries or related products
float L21D11=mad(-b[0],b[1],b[2]);
float D11=mad(-b[0],b[0], b[1]);
float SquaredDepthVariance=mad(-b[1],b[1], b[3]);
float D22D11=dot(float2(SquaredDepthVariance,-L21D11),float2(D11,L21D11));
float InvD11=1.0f/D11;
float L21=L21D11*InvD11;
float D22=D22D11*InvD11;
float InvD22=1.0f/D22;
// Obtain a scaled inverse image of bz=(1,z[0],z[0]*z[0])^T
float3 c=float3(1.0f,z[0],z[0]*z[0]);
// Forward substitution to solve L*c1=bz
c[1]-=b.x;
c[2]-=b.y+L21*c[1];
// Scaling to solve D*c2=c1
c[1]*=InvD11;
c[2]*=InvD22;
// Backward substitution to solve L^T*c3=c2
c[1]-=L21*c[2];
c[0]-=dot(c.yz,b.xy);
// Solve the quadratic equation c[0]+c[1]*z+c[2]*z^2 to obtain solutions
// z[1] and z[2]
float InvC2=1.0f/c[2];
float p=c[1]*InvC2;
float q=c[0]*InvC2;
float D=(p*p*0.25f)-q;
float r=sqrt(D);
z[1]=-p*0.5f-r;
z[2]=-p*0.5f+r;
// Compute the shadow intensity by summing the appropriate weights
float4 Switch=
(z[2]<z[0])?float4(z[1],z[0],1.0f,1.0f):(
(z[1]<z[0])?float4(z[0],z[1],0.0f,1.0f):
float4(0.0f,0.0f,0.0f,0.0f));
float Quotient=(Switch[0]*z[2]-b[0]*(Switch[0]+z[2])+b[1])/((z[2]-Switch[1])*(z[0]-z[1]));
OutShadowIntensity=Switch[2]+Switch[3]*Quotient;
OutShadowIntensity=saturate(OutShadowIntensity);
}
float3x2 ComputeDirectionalLightSoftShadowParameters(float4x4 ViewToProjectionSpace, float LightSourceAngle)
{
// Compute the bounding box dimensions of the view frustum of the shadow map
float3 FrustumExtents=float3(
2.0f*(ViewToProjectionSpace._11*ViewToProjectionSpace._44-ViewToProjectionSpace._14*ViewToProjectionSpace._41)/
(ViewToProjectionSpace._11*ViewToProjectionSpace._11-ViewToProjectionSpace._41*ViewToProjectionSpace._41),
2.0f*(ViewToProjectionSpace._22*ViewToProjectionSpace._44-ViewToProjectionSpace._24*ViewToProjectionSpace._42)/
(ViewToProjectionSpace._22*ViewToProjectionSpace._22-ViewToProjectionSpace._42*ViewToProjectionSpace._42),
dot(float4(ViewToProjectionSpace._43,-ViewToProjectionSpace._43,-ViewToProjectionSpace._33,ViewToProjectionSpace._33),float4(ViewToProjectionSpace._33,ViewToProjectionSpace._43,ViewToProjectionSpace._34,ViewToProjectionSpace._44))/
(ViewToProjectionSpace._33*(ViewToProjectionSpace._33-ViewToProjectionSpace._43))
);
// Compute a factor that turns a depth difference into a kernel size as texture
// coordinate
float3x2 OutLightParameter;
OutLightParameter._11=0.5f*tan(0.5f*LightSourceAngle)*FrustumExtents.z/FrustumExtents.x;
OutLightParameter._12=0.5f*tan(0.5f*LightSourceAngle)*FrustumExtents.z/FrustumExtents.y;
// The denominator is constant one
OutLightParameter._21_22=float2(0.0f,0.0f);
OutLightParameter._31_32=float2(1.0f,1.0f);
return OutLightParameter;
}
// Minimal kernel size for our system
#define MIN_KERNEL_SIZE 1.0
float3x2 ComputeKernelSizePameter(float kernelSize, float2 texResolution)
{
float3x2 result;
result._11 = MIN_KERNEL_SIZE/ texResolution.x;
result._12 = MIN_KERNEL_SIZE/ texResolution.y;
result._21 = kernelSize * 0.5f / texResolution.x;
result._22 = kernelSize * 0.5f / texResolution.y;
result._31 = texResolution.x / max(0.5f * kernelSize - 0.5f, 0.5f);
result._32 = 0.1f;
return result;
}
float4x4 GetProjectionMatrix(HDShadowData sd)
{
float4x4 proj;
proj = 0.0;
proj._m00 = sd.proj[0];
proj._m11 = sd.proj[1];
proj._m22 = sd.proj[2];
proj._m23 = sd.proj[3];
proj._m33 = 1.0;
return proj;
}
#define MOMENT_BIAS 6e-7
float SampleShadow_IMS(HDShadowData sd, float3 tcs, float depthBias, float kernelSize, float lightAngle, float maxDepthBias)
{
// Depth value in the [-1, 1]
float pixelShadowmapDepth = (1.0 - tcs.z) * 2.0f - 1.0f;
// Compute the kernel size parameters (this should be done on the scripting side and injected, but for the moment we do not have
// enough constant buffer params). The code for this was retro-engineered from the demo that the paper gives.
float3x2 kernelSizeParameter = ComputeKernelSizePameter(kernelSize, _CascadeShadowAtlasSize.xy);
// Compute the inverse fixed precision value (that allows us to move from uint precision to float)
float2 fixedPrecision = ComputeFixedPrecision(kernelSizeParameter, _CascadeShadowAtlasSize.xy);
// The blocker search rectangle is in texCoord space [minShadowmapUV, maxShadowmapUV]
// XY matches the top left kernel position in texCoords
// ZW matches the bottom right kernel position in texCoords
float4 searchRegion = float4(0.0, 0.0, 0.0, 0.0);
searchRegion.xy = saturate(tcs.xy - kernelSizeParameter._21_22 - 0.5f * _CascadeShadowAtlasSize.zw);
searchRegion.zw = saturate(tcs.xy + kernelSizeParameter._21_22 + 0.5f * _CascadeShadowAtlasSize.zw);
// Compute the integral in the target rectangle
float4 averageOptimizedMoment = ComputeIntegerRectangleAverage_uint4(searchRegion, _CascadeShadowAtlasSize.xy, fixedPrecision.y);
// Convert the moment to canonical
Convert4MomentOptimizedToCanonical(averageOptimizedMoment, averageOptimizedMoment, MOMENT_BIAS);
// Compute the average depth and the shadow intensity
float averageBlockerDepth = 0.0f;
float blockerSearchShadowIntensity = 0.0f;
Compute4MomentAverageBlockerDepth(averageBlockerDepth, blockerSearchShadowIntensity, depthBias, averageOptimizedMoment, pixelShadowmapDepth);
// Compute the view projection matrix
float4x4 viewToProjection = GetProjectionMatrix(sd);
// Let's compute the light parameters
float3x2 lightParameters = ComputeDirectionalLightSoftShadowParameters(viewToProjection, lightAngle);
// Estimate the size of the penumbra for this pixel
float2 penumbraKernelSize = float2(0.0, 0.0);
float estimatedDepthBias = 0.0f;
EstimatePenumbraSize(penumbraKernelSize, estimatedDepthBias, averageBlockerDepth, pixelShadowmapDepth, lightParameters, kernelSizeParameter, maxDepthBias);
// Estimate the region of the filter with the penumbra estimation
float4 filterRegion = float4(0.0, 0.0, 0.0, 0.0);
filterRegion.xy = saturate(tcs.xy - penumbraKernelSize);
filterRegion.zw = saturate(tcs.xy + penumbraKernelSize);
float4 optimizedMoment = ComputeRectangleAverage_uint4(filterRegion.xy, filterRegion.zw, _CascadeShadowAtlasSize, fixedPrecision.y);
// Optimized to regular moments
float4 biasedMoment = float4(0.0, 0.0, 0.0, 0.0);
Convert4MomentOptimizedToCanonical(biasedMoment, optimizedMoment, MOMENT_BIAS);
// Compute the shadow intensity
float shadowIntensity = 0.0f;
Compute4MomentUnboundedShadowIntensity(shadowIntensity, biasedMoment, pixelShadowmapDepth, estimatedDepthBias);
// Inverse it and we are done
return 1.0 - shadowIntensity;
}