Skip to content

Commit

Permalink
NRD updated to v4.11.0:
Browse files Browse the repository at this point in the history
- PT: added 1-bit mask for glass and hair
- PT: fixed dark tails, left by moving emissive objects
- PT: ray offsetting tweaks
- UI: adjustments
- NRI updated to v1.154
- eliminated awkwardness around "materialID"
- dropped 3D MV
- improved TAA, also fixed potential ghosting and pixelation, while keeping IQ on glass and hair
- added "USE_CAMERA_ATTACHED_REFLECTION_TEST"
  • Loading branch information
dzhdanNV committed Nov 5, 2024
1 parent 2573b8b commit f6f2478
Show file tree
Hide file tree
Showing 13 changed files with 214 additions and 137 deletions.
2 changes: 1 addition & 1 deletion External/NRD
Submodule NRD updated from b68073 to dc0ba4
2 changes: 1 addition & 1 deletion External/NRIFramework
8 changes: 4 additions & 4 deletions Shaders/Composition.cs.hlsl
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,8 @@ void main( int2 pixelPos : SV_DispatchThreadId )
float3 Lemi = gIn_DirectEmission[ pixelPos ];

// Normal, roughness and material ID
float normMaterialID;
float4 normalAndRoughness = NRD_FrontEnd_UnpackNormalAndRoughness( gIn_Normal_Roughness[ pixelPos ], normMaterialID );
float materialID;
float4 normalAndRoughness = NRD_FrontEnd_UnpackNormalAndRoughness( gIn_Normal_Roughness[ pixelPos ], materialID );
float3 N = normalAndRoughness.xyz;
float roughness = normalAndRoughness.w;

Expand Down Expand Up @@ -202,7 +202,7 @@ void main( int2 pixelPos : SV_DispatchThreadId )
float3 diffDemod = ( 1.0 - Fenv ) * albedo * 0.99 + 0.01;
float3 specDemod = Fenv * 0.99 + 0.01;

if( normMaterialID == MATERIAL_ID_HAIR / MATERIAL_NORM )
if( materialID == MATERIAL_ID_HAIR )
{
diffDemod = 1.0;
specDemod = 1.0;
Expand Down Expand Up @@ -245,7 +245,7 @@ void main( int2 pixelPos : SV_DispatchThreadId )
else if( gOnScreen == SHOW_METALNESS )
Ldiff = baseColorMetalness.w;
else if( gOnScreen == SHOW_MATERIAL_ID )
Ldiff = normMaterialID;
Ldiff = materialID / 3.0;
else if( gOnScreen == SHOW_PSR_THROUGHPUT )
Ldiff = psrThroughput;
else if( gOnScreen == SHOW_WORLD_UNITS )
Expand Down
12 changes: 1 addition & 11 deletions Shaders/DlssBefore.cs.hlsl
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ license agreement from NVIDIA CORPORATION is strictly prohibited.
#include "Include/Shared.hlsli"

NRI_RESOURCE( RWTexture2D<float>, gInOut_ViewZ, u, 0, 1 );
NRI_RESOURCE( RWTexture2D<float3>, gInOut_Mv, u, 1, 1 );

[numthreads( 16, 16, 1 )]
void main( uint2 pixelPos : SV_DispatchThreadId )
Expand All @@ -29,16 +28,7 @@ void main( uint2 pixelPos : SV_DispatchThreadId )
if( gSR )
{
float4 clipPos = Geometry::ProjectiveTransform( gViewToClip, Xv );
gInOut_ViewZ[ pixelPos ] = clipPos.z / clipPos.w;
}

// Patch MV, because 2D MVs needed
if( gIsWorldSpaceMotionEnabled )
{
float3 mv = gInOut_Mv[ pixelPos ];
float3 Xprev = Geometry::AffineTransform( gViewToWorld, Xv ) + mv;
float2 pixelUvPrev = Geometry::GetScreenUv( gWorldToClipPrev, Xprev );
mv.xy = ( pixelUvPrev - pixelUv ) * gRenderSize;
gInOut_Mv[ pixelPos ] = mv;
gInOut_ViewZ[ pixelPos ] = clipPos.z / clipPos.w;
}
}
5 changes: 5 additions & 0 deletions Shaders/Final.cs.hlsl
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,11 @@ void main( uint2 pixelPos : SV_DispatchThreadId )
result = lerp( result, validation.xyz, validation.w );
}

// Debug
#if( USE_TAA_DEBUG == 1 )
result = gIn_PostAA[ pixelPos ].w;
#endif

// Output
gOut_Final[ pixelPos ] = result;
}
2 changes: 1 addition & 1 deletion Shaders/Include/RaytracingShared.hlsli
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ struct GeometryProps
float viewZ = Geometry::AffineTransform( gWorldToView, X ).z;
amount *= gUnproject * lerp( abs( viewZ ), 1.0, abs( gOrthoMode ) );

return X + offsetDir * amount;
return X + offsetDir * max( amount, 0.00001 );
}

bool Has( uint flag )
Expand Down
38 changes: 19 additions & 19 deletions Shaders/Include/Shared.hlsli
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,13 @@ license agreement from NVIDIA CORPORATION is strictly prohibited.
#define USE_TRANSLUCENCY 1 // translucent foliage
#define USE_NIS 1 // NIS filter (debug only)
#define USE_SHARC_V_DEPENDENT 1 // needed to get a full match with prev frame data // TODO: improve multi-bounce low-roughness case
#define USE_MOVING_EMISSION_FIX 1 // fixes a dark tail, left by an animated emissive object

// Default = 0
#define USE_SANITIZATION 0 // NRD sample is NAN/INF free
#define USE_SIMULATED_MATERIAL_ID_TEST 0 // for "material ID" support debugging
#define USE_SIMULATED_MATERIAL_ID_TEST 0 // "material ID" debugging
#define USE_SIMULATED_FIREFLY_TEST 0 // "anti-firefly" debugging
#define USE_CAMERA_ATTACHED_REFLECTION_TEST 0 // test special treatment for reflections of objects attached to the camera
#define USE_RUSSIAN_ROULETTE 0 // bad practice for real-time denoising
#define USE_DRS_STRESS_TEST 0 // test for verifying that NRD doesn't touch data outside of DRS rectangle
#define USE_INF_STRESS_TEST 0 // test for verifying that NRD doesn't touch data outside of denoising range
Expand All @@ -44,6 +46,7 @@ license agreement from NVIDIA CORPORATION is strictly prohibited.
#define USE_RANDOMIZED_ROUGHNESS 0 // randomize roughness ( a common case in games )
#define USE_LOAD 0 // Load vs SampleLevel
#define USE_SHARC_DEBUG 0 // 1 - show cache, 2 - show grid
#define USE_TAA_DEBUG 0 // 1 - show weight

//=============================================================================================
// CONSTANTS
Expand Down Expand Up @@ -90,10 +93,10 @@ license agreement from NVIDIA CORPORATION is strictly prohibited.
#define MATERIAL_COBALT 2

// Material ID
#define MATERIAL_ID_DEFAULT 0
#define MATERIAL_ID_METAL 1
#define MATERIAL_ID_HAIR 2
#define MATERIAL_NORM 3.0
#define MATERIAL_ID_DEFAULT 0.0
#define MATERIAL_ID_METAL 1.0
#define MATERIAL_ID_HAIR 2.0
#define MATERIAL_ID_SELF_REFLECTION 3.0

// Mip mode
#define MIP_VISIBILITY 0 // for visibility: emission, shadow and alpha mask
Expand All @@ -119,10 +122,11 @@ license agreement from NVIDIA CORPORATION is strictly prohibited.
#define SPEC_LOBE_ENERGY 0.95 // trimmed to 95%
#define LEAF_TRANSLUCENCY 0.25
#define LEAF_THICKNESS 0.001 // TODO: viewZ dependent?
#define STRAND_THICKNESS 80e-6
#define STRAND_THICKNESS 80e-6f
#define TAA_HISTORY_SHARPNESS 0.5
#define BOUNCE_RAY_OFFSET 0.05 // pixels
#define SHADOW_RAY_OFFSET 1.0 // pixels
#define BOUNCE_RAY_OFFSET 0.25 // pixels
#define GLASS_RAY_OFFSET 0.05 // pixels

#define SHARC_CAPACITY ( 1 << 22 )
#define SHARC_SCENE_SCALE 50.0
Expand Down Expand Up @@ -290,7 +294,6 @@ NRI_RESOURCE( cbuffer, GlobalConstants, b, 0, SET_GLOBAL )
uint32_t gFrameIndex;
uint32_t gForcedMaterial;
uint32_t gUseNormalMap;
uint32_t gIsWorldSpaceMotionEnabled;
uint32_t gTracingMode;
uint32_t gSampleNum;
uint32_t gBounceNum;
Expand Down Expand Up @@ -395,20 +398,17 @@ float3 GetMotion( float3 X, float3 Xprev )
{
float3 motion = Xprev - X;

if( !gIsWorldSpaceMotionEnabled )
{
float viewZ = Geometry::AffineTransform( gWorldToView, X ).z;
float2 sampleUv = Geometry::GetScreenUv( gWorldToClip, X );
float viewZ = Geometry::AffineTransform( gWorldToView, X ).z;
float2 sampleUv = Geometry::GetScreenUv( gWorldToClip, X );

float viewZprev = Geometry::AffineTransform( gWorldToViewPrev, Xprev ).z;
float2 sampleUvPrev = Geometry::GetScreenUv( gWorldToClipPrev, Xprev );
float viewZprev = Geometry::AffineTransform( gWorldToViewPrev, Xprev ).z;
float2 sampleUvPrev = Geometry::GetScreenUv( gWorldToClipPrev, Xprev );

// IMPORTANT: scaling to "pixel" unit significantly improves utilization of FP16
motion.xy = ( sampleUvPrev - sampleUv ) * gRectSize;
// IMPORTANT: scaling to "pixel" unit significantly improves utilization of FP16
motion.xy = ( sampleUvPrev - sampleUv ) * gRectSize;

// IMPORTANT: 2.5D motion is preferred over 3D motion due to imprecision issues caused by FP16 rounding negative effects
motion.z = viewZprev - viewZ;
}
// IMPORTANT: 2.5D motion is preferred over 3D motion due to imprecision issues caused by FP16 rounding negative effects
motion.z = viewZprev - viewZ;

return motion;
}
Expand Down
78 changes: 43 additions & 35 deletions Shaders/Taa.cs.hlsl
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,9 @@ license agreement from NVIDIA CORPORATION is strictly prohibited.

#include "Include/Shared.hlsli"

NRI_RESOURCE( Texture2D<float>, gIn_ViewZ, t, 0, 1 );
NRI_RESOURCE( Texture2D<float3>, gIn_Mv, t, 1, 1 );
NRI_RESOURCE( Texture2D<float3>, gIn_Composed, t, 2, 1 );
NRI_RESOURCE( Texture2D<float4>, gIn_History, t, 3, 1 );
NRI_RESOURCE( Texture2D<float4>, gIn_Mv, t, 0, 1 );
NRI_RESOURCE( Texture2D<float3>, gIn_Composed, t, 1, 1 );
NRI_RESOURCE( Texture2D<float4>, gIn_History, t, 2, 1 );

NRI_RESOURCE( RWTexture2D<float4>, gOut_Result, u, 0, 1 );

Expand All @@ -36,17 +35,15 @@ NRI_RESOURCE( RWTexture2D<float4>, gOut_Result, u, 0, 1 );
} \
GroupMemoryBarrierWithGroupSync( )

groupshared float4 s_Data[ BUFFER_Y ][ BUFFER_X ];
groupshared float3 s_Color[ BUFFER_Y ][ BUFFER_X ];
groupshared float3 s_Mv[ BUFFER_Y ][ BUFFER_X ];

void Preload( uint2 sharedPos, int2 globalPos )
{
globalPos = clamp( globalPos, 0, gRectSize - 1.0 );

float4 color_viewZ;
color_viewZ.xyz = ApplyTonemap( gIn_Composed[ globalPos ] );
color_viewZ.w = gIn_ViewZ[ globalPos ];

s_Data[ sharedPos.y ][ sharedPos.x ] = color_viewZ;
s_Color[ sharedPos.y ][ sharedPos.x ] = ApplyTonemap( gIn_Composed[ globalPos ] );
s_Mv[ sharedPos.y ][ sharedPos.x ] = gIn_Mv[ globalPos ].xyw; // dZ is not needed
}

// TODO: move to ml?
Expand Down Expand Up @@ -76,52 +73,59 @@ void main( int2 threadPos : SV_GroupThreadId, int2 pixelPos : SV_DispatchThreadI
return;

// Neighborhood
float sum = 0;
float3 m1 = 0;
float3 m2 = 0;
float3 input = 0;

float viewZ = s_Data[ threadPos.y + BORDER ][threadPos.x + BORDER ].w;
float viewZnearest = viewZ;
float3 centerMv = s_Mv[ threadPos.y + BORDER ][threadPos.x + BORDER ];
float mvLengthSqMax = Math::LengthSquared( centerMv.xy );
int2 offseti = int2( BORDER, BORDER );

bool want5x5 = centerMv.z < 0.0; // 5x5 is needed for hair ( super thin ) and glass ( noisy ), also it's safe to use it for sky to get better edges

[unroll]
for( int dy = 0; dy <= BORDER * 2; dy++ )
{
[unroll]
for( int dx = 0; dx <= BORDER * 2; dx++ )
{
int2 t = int2( dx, dy );
int2 smemPos = threadPos + t;
float4 data = s_Data[ smemPos.y ][ smemPos.x ];
if( !want5x5 && ( dx == 0 || dx == BORDER * 2 || dy == 0 || dy == BORDER * 2 ) )
continue;

int2 offset = int2( dx, dy );
int2 smemPos = threadPos + offset;

float3 c = s_Color[ smemPos.y ][ smemPos.x ];
float2 mv = s_Mv[ smemPos.y ][ smemPos.x ].xy;
float mvLengthSq = Math::LengthSquared( mv.xy );

if( dx == BORDER && dy == BORDER )
input = data.xyz;
else if( abs( data.w ) < abs( viewZnearest ) )
input = c;
else if( mvLengthSq > mvLengthSqMax )
{
viewZnearest = data.w;
offseti = t;
mvLengthSqMax = mvLengthSq;
offseti = offset;
}

m1 += data.xyz;
m2 += data.xyz * data.xyz;
float r2 = Math::LengthSquared( offset / BORDER - 1.0 );
float w = exp( -r2 );

m1 += c * w;
m2 += c * c * w;
sum += w;
}
}

float invSum = 1.0 / ( ( BORDER * 2 + 1 ) * ( BORDER * 2 + 1 ) );
m1 *= invSum;
m2 *= invSum;
m1 /= sum;
m2 /= sum;

float3 sigma = sqrt( abs( m2 - m1 * m1 ) ); // TODO: increase sigma for hair and glass?

// Previous pixel position
float3 Xv = Geometry::ReconstructViewPosition( pixelUv, gCameraFrustum, viewZnearest, gOrthoMode );
float3 X = Geometry::AffineTransform( gViewToWorld, Xv );
float3 mv = gIn_Mv[ pixelPos + offseti - BORDER ] * ( gIsWorldSpaceMotionEnabled ? 1.0 : gInvRectSize.xyy );
float3 mv = s_Mv[ threadPos.y + offseti.y ][ threadPos.x + offseti.x ].xyz * float3( gInvRectSize.xy, 1.0 );
float2 pixelUvPrev = pixelUv + mv.xy;

if( gIsWorldSpaceMotionEnabled )
pixelUvPrev = Geometry::GetScreenUv( gWorldToClipPrev, X + mv );

// History
float2 pixelPosPrev = saturate( pixelUvPrev ) * gRectSizePrev;
float4 history = BicubicFilterNoCorners( gIn_History, gLinearSampler, pixelPosPrev, gInvRenderSize, TAA_HISTORY_SHARPNESS );
Expand All @@ -142,21 +146,25 @@ void main( int2 threadPos : SV_GroupThreadId, int2 pixelPos : SV_DispatchThreadI

// Disocclusion #2
float3 clampedHistory = Color::ClampAabb( m1, sigma, history.xyz );
#if 1 // good enough
mixRate += length( clampedHistory - history.xyz ) * 0.75;
#if 0 // good enough?
float diff = length( clampedHistory - history.xyz );
diff = Math::Pow01( diff, 1.2 );
#else
float3 a = XyzToLab( Color::RgbToXyz( clampedHistory ) );
float3 b = XyzToLab( Color::RgbToXyz( history.xyz ) );

const float JND = 2.3; // just noticable difference
mixRate += length( a - b ) / ( JND * 3.0 );
float diff = length( a - b ) / ( JND * 3.0 );
#endif
mixRate += diff;

// Clamp mix rate
mixRate = clamp( mixRate, gTAA, 1.0 );
mixRate = saturate( mixRate );

// TODO: anti-flickering, compatible with "mixRate"?

// Final mix
float3 result = lerp( clampedHistory, input, mixRate );
float3 result = lerp( clampedHistory, input, max( mixRate, gTAA ) );

// Apply transfer
if( gIsSrgb )
Expand Down
23 changes: 21 additions & 2 deletions Shaders/TraceOpaque.cs.hlsl
Original file line number Diff line number Diff line change
Expand Up @@ -288,7 +288,7 @@ TraceOpaqueResult TraceOpaque( inout TraceOpaqueDesc desc )
float3 XvirtualPrev = Xvirtual + geometryProps.Xprev - geometryProps.X;
float3 motion = GetMotion( Xvirtual, XvirtualPrev );

gOut_Mv[ desc.pixelPos ] = float4( motion, viewZ * FP16_VIEWZ_SCALE ); // keep viewZ before PSR ( needed for glass )
gOut_Mv[ desc.pixelPos ].xyz = motion; // IMPORTANT: keep viewZ before PSR ( needed for glass )

// PSR - Update viewZ
viewZ = Geometry::AffineTransform( gWorldToView, Xvirtual ).z;
Expand Down Expand Up @@ -651,6 +651,16 @@ TraceOpaqueResult TraceOpaque( inout TraceOpaqueDesc desc )
accumulatedHitDist += ApplyThinLensEquation( geometryProps.hitT, accumulatedCurvature ) * Math::SmoothStep( 0.2, 0.0, accumulatedDiffuseLikeMotion );
accumulatedDiffuseLikeMotion += 1.0 - importance * ( 1.0 - diffuseLikeMotion );
accumulatedCurvature += materialProps.curvature; // yes, after hit

#if( USE_CAMERA_ATTACHED_REFLECTION_TEST == 1 && NRD_NORMAL_ENCODING == NRD_NORMAL_ENCODING_R10G10B10A2_UNORM )
// IMPORTANT: lazy ( no checkerboard support ) implementation of reflections masking for objects attached to the camera
// TODO: better find a generic solution for tracking of reflections for objects attached to the camera
if( bounce == 1 && !isDiffuse && desc.materialProps.roughness < 0.01 )
{
if( !geometryProps.IsSky( ) && !geometryProps.Has( FLAG_STATIC ) )
gOut_Normal_Roughness[ desc.pixelPos ].w = MATERIAL_ID_SELF_REFLECTION;
}
#endif
}
}

Expand Down Expand Up @@ -819,7 +829,10 @@ void main( uint2 pixelPos : SV_DispatchThreadId )
// Motion
float3 motion = GetMotion( geometryProps0.X, geometryProps0.Xprev );

gOut_Mv[ pixelPos ] = float4( motion, viewZ * FP16_VIEWZ_SCALE );
float viewZAndTaaMask = abs( viewZ ) * FP16_VIEWZ_SCALE;
viewZAndTaaMask *= ( geometryProps0.Has( FLAG_HAIR ) || geometryProps0.IsSky( ) ) ? -1.0 : 1.0;

gOut_Mv[ pixelPos ] = float4( motion, viewZAndTaaMask );

// Early out - sky
if( geometryProps0.IsSky( ) )
Expand Down Expand Up @@ -902,6 +915,12 @@ void main( uint2 pixelPos : SV_DispatchThreadId )

TraceOpaqueResult result = TraceOpaque( desc );

#if( USE_MOVING_EMISSION_FIX == 1 )
// Or emissives ( not having lighting in diffuse and specular ) can use a different material ID
result.diffRadiance += desc.materialProps.Lemi / Math::Pi( 2.0 );
result.specRadiance += desc.materialProps.Lemi / Math::Pi( 2.0 );
#endif

#if( USE_SIMULATED_MATERIAL_ID_TEST == 1 )
if( frac( geometryProps0.X ).x < 0.05 )
result.diffRadiance = float3( 0, 10, 0 ) * Color::Luminance( result.diffRadiance );
Expand Down
Loading

0 comments on commit f6f2478

Please sign in to comment.