// Part of this code has been given by Christoph Peter in the Demo code of the "Improved Moment Shadow Maps for Translucent Occluders, Soft Shadows and Single Scattering" paper // // To the extent possible under law, the author(s) have dedicated all copyright and // related and neighboring rights to this software to the public domain worldwide. // This software is distributed without any warranty. // // You should have received a copy of the CC0 Public Domain Dedication along with // this software. If not, see . // Link: http://jcgt.org/published/0006/01/03/ // Given that the default texture2d type matches a float4, we beed to manually declare this texture for the improved moment shadow algorithm Texture2D _SummedAreaTableInputInt; float2 ComputeFixedPrecision(float3x2 kernelSizeParameter, float2 shadowmapSize) { float2 maxKernelSize = ceil(1.0f+2.0f * kernelSizeParameter._21_22 * shadowmapSize); float fixedPres = 4294967295.0f/(maxKernelSize.x*maxKernelSize.y); return float2(fixedPres, 1.0 / fixedPres); } // Soft IMS float4 ComputeIntegerRectangleAverage_uint4(float4 searchRegion, float2 shadowTextureSize, float invFixedPrecision) { // Pixel position of the range int2 topPixel = int2(searchRegion.xy * shadowTextureSize.xy) - 1; int2 bottomPixel = int2(searchRegion.zw * shadowTextureSize.xy) - 1; uint2 reactangleSize = bottomPixel - topPixel; // Evaluate the integral uint4 integral = uint4(0,0,0,0); integral += _SummedAreaTableInputInt.Load(int3(topPixel.x, topPixel.y,0)); integral += _SummedAreaTableInputInt.Load(int3(bottomPixel.x, bottomPixel.y,0)); integral -= _SummedAreaTableInputInt.Load(int3(topPixel.x, bottomPixel.y,0)); integral -= _SummedAreaTableInputInt.Load(int3(bottomPixel.x, topPixel.y,0)); return float4(integral) * invFixedPrecision / float(reactangleSize.x * reactangleSize.y); } float2 GetRoots(float3 Coefficients) { float Scaling=1.0f/Coefficients[2]; float p=Coefficients[1]*Scaling; float q=Coefficients[0]*Scaling; float D=p*p*0.25f-q; float r=sqrt(D); return float2(-0.5f*p-r,-0.5f*p+r); } void Compute4MomentAverageBlockerDepth(out float OutAverageBlockerDepth, out float OutBlockerSearchShadowIntensity, float depthBias, float4 BlockerSearchBiased4Moments,float FragmentDepth) { // Use short-hands for the many formulae to come float4 b=BlockerSearchBiased4Moments; float3 z; z[0]=FragmentDepth - depthBias; // Compute a Cholesky factorization of the Hankel matrix B storing only non- // trivial entries or related products float L21D11=mad(-b[0],b[1],b[2]); float D11=mad(-b[0],b[0], b[1]); float SquaredDepthVariance=mad(-b[1],b[1], b[3]); float D22D11=dot(float2(SquaredDepthVariance,-L21D11),float2(D11,L21D11)); float InvD11=1.0f/D11; float L21=L21D11*InvD11; // Obtain a scaled inverse image of bz=(1,z[0],z[0]*z[0])^T float3 c=float3(1.0f,z[0],z[0]*z[0]); // Forward substitution to solve L*c1=bz c[1]-=b.x; c[2]-=b.y+L21*c[1]; // Scaling to solve D*c2=c1 c[1]*=InvD11; c[2]*=D11/D22D11; // Backward substitution to solve L^T*c3=c2 c[1]-=L21*c[2]; c[0]-=dot(c.yz,b.xy); // Solve the quadratic equation c[0]+c[1]*z+c[2]*z^2 to obtain solutions // z[1] and z[2] z.yz=GetRoots(c); // Compute weights of the Dirac-deltas at the roots float3 Weight; Weight[0]=(z[1]*z[2]-b[0]*(z[1]+z[2])+b[1])/((z[0]-z[1])*(z[0]-z[2])); Weight[1]=(z[0]*z[2]-b[0]*(z[0]+z[2])+b[1])/((z[2]-z[1])*(z[0]-z[1])); Weight[2]=1.0f-Weight[0]-Weight[1]; // Compute the shadow intensity and the unnormalized average depth of occluders float AverageBlockerDepthIntegral=((z[1]0.99f)?1.0f:(-1.0f); } void EstimatePenumbraSize(out float2 OutKernelSize,out float OutDepthBias, float OccluderDepth, float FragmentDepth, float3x2 LightParameter, float3x2 KernelSizeParameter, float MaxDepthBias) { float2 Numerator=LightParameter._11_12*(FragmentDepth-OccluderDepth); float2 Denominator=LightParameter._21_22*OccluderDepth+LightParameter._31_32; OutKernelSize=max(KernelSizeParameter._11_12,min(KernelSizeParameter._21_22,Numerator/Denominator)); OutDepthBias=MaxDepthBias*clamp(OutKernelSize.x*KernelSizeParameter._31,KernelSizeParameter._32,1.0f); } float4 ComputeRectangleAverage_uint4(float2 LeftTop,float2 RightBottom, float4 TextureSize, float2 FixedPrecision) { // The summed area table is essentially off by one because the top left texel // already holds the integral over the top left texel. Compensate for that. LeftTop-=TextureSize.zw; RightBottom-=TextureSize.zw; // Round the texture coordinates to integer texels float2 LeftTopPixel=LeftTop*TextureSize.xy; float2 RightBottomPixel=RightBottom*TextureSize.xy; float2 LeftTopPixelFloor=floor(LeftTopPixel); float2 RightBottomPixelFloor=floor(RightBottomPixel); int2 iLeftTop=int2(LeftTopPixelFloor); int2 iRightBottom=int2(RightBottomPixelFloor); float2 LeftTopFactor=float2(1.0f,1.0f)-(LeftTopPixel-LeftTopPixelFloor); float2 RightBottomFactor=RightBottomPixel-RightBottomPixelFloor; // Sample the summed area table at all relevant locations. The first two indices // determine whether we are at the left top or right bottom of the rectangle, // the latter two determine the pixel offset. int x, y; uint4 Samples[2][2][2][2]; [unroll] for(x=0;x!=2;++x){ int TexelX=(x==0)?iLeftTop.x:iRightBottom.x; [unroll] for(y=0;y!=2;++y){ int TexelY=(y==0)?iLeftTop.y:iRightBottom.y; [unroll] for(int z=0;z!=2;++z){ [unroll] for(int w=0;w!=2;++w){ Samples[x][y][z][w] = _SummedAreaTableInputInt.Load(int3(TexelX+z,TexelY+w,0)); } } } } // Compute integrals for various rectangles float4 pCornerIntegral[2][2]; [unroll] for(x=0;x!=2;++x){ [unroll] for(y=0;y!=2;++y){ pCornerIntegral[x][y]=float4(Samples[x][y][0][0]+Samples[x][y][1][1]-Samples[x][y][1][0]-Samples[x][y][0][1]); } } float4 pEdgeIntegral[4]={ // Right edge float4(Samples[1][0][0][1]+Samples[1][1][1][0]-Samples[1][0][1][1]-Samples[1][1][0][0]), // Top edge float4(Samples[0][0][1][0]+Samples[1][0][0][1]-Samples[0][0][1][1]-Samples[1][0][0][0]), // Left edge float4(Samples[0][0][0][1]+Samples[0][1][1][0]-Samples[0][0][1][1]-Samples[0][1][0][0]), // Bottom edge float4(Samples[0][1][1][0]+Samples[1][1][0][1]-Samples[0][1][1][1]-Samples[1][1][0][0]) }; float4 CenterIntegral=float4(Samples[0][0][1][1]+Samples[1][1][0][0]-Samples[0][1][1][0]-Samples[1][0][0][1]); // Compute the integral over the given rectangle float4 Integral=CenterIntegral; Integral+=pCornerIntegral[0][0]*(LeftTopFactor.x*LeftTopFactor.y); Integral+=pCornerIntegral[0][1]*(LeftTopFactor.x*RightBottomFactor.y); Integral+=pCornerIntegral[1][0]*(RightBottomFactor.x*LeftTopFactor.y); Integral+=pCornerIntegral[1][1]*(RightBottomFactor.x*RightBottomFactor.y); Integral+=pEdgeIntegral[0]*RightBottomFactor.x; Integral+=pEdgeIntegral[1]*LeftTopFactor.y; Integral+=pEdgeIntegral[2]*LeftTopFactor.x; Integral+=pEdgeIntegral[3]*RightBottomFactor.y; // Get from a non-normalized integral to moments float2 Size=RightBottomPixel-LeftTopPixel; return Integral*FixedPrecision.y/(Size.x*Size.y); } void Convert4MomentOptimizedToCanonical(out float4 OutBiased4Moments,float4 OptimizedMoments0,float MomentBias=6.0e-5f){ OutBiased4Moments.xz=mul(OptimizedMoments0.xz-0.5f,float2x2(-1.0f/3.0f,-0.75f,sqrt(3.0f),0.75f*sqrt(3.0f))); OutBiased4Moments.yw=mul(OptimizedMoments0.yw,float2x2(0.125f,-0.125f,1.0f,1.0f)); OutBiased4Moments=lerp(OutBiased4Moments,float4(0.0f,0.628f,0.0f,0.628f),MomentBias); } void Compute4MomentUnboundedShadowIntensity(out float OutShadowIntensity, float4 Biased4Moments,float FragmentDepth,float DepthBias) { // Use short-hands for the many formulae to come float4 b=Biased4Moments; float3 z; z[0]=FragmentDepth-DepthBias; // Compute a Cholesky factorization of the Hankel matrix B storing only non- // trivial entries or related products float L21D11=mad(-b[0],b[1],b[2]); float D11=mad(-b[0],b[0], b[1]); float SquaredDepthVariance=mad(-b[1],b[1], b[3]); float D22D11=dot(float2(SquaredDepthVariance,-L21D11),float2(D11,L21D11)); float InvD11=1.0f/D11; float L21=L21D11*InvD11; float D22=D22D11*InvD11; float InvD22=1.0f/D22; // Obtain a scaled inverse image of bz=(1,z[0],z[0]*z[0])^T float3 c=float3(1.0f,z[0],z[0]*z[0]); // Forward substitution to solve L*c1=bz c[1]-=b.x; c[2]-=b.y+L21*c[1]; // Scaling to solve D*c2=c1 c[1]*=InvD11; c[2]*=InvD22; // Backward substitution to solve L^T*c3=c2 c[1]-=L21*c[2]; c[0]-=dot(c.yz,b.xy); // Solve the quadratic equation c[0]+c[1]*z+c[2]*z^2 to obtain solutions // z[1] and z[2] float InvC2=1.0f/c[2]; float p=c[1]*InvC2; float q=c[0]*InvC2; float D=(p*p*0.25f)-q; float r=sqrt(D); z[1]=-p*0.5f-r; z[2]=-p*0.5f+r; // Compute the shadow intensity by summing the appropriate weights float4 Switch= (z[2]