Skip to content
Open

RWMC #218

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 33 additions & 9 deletions 31_HLSLPathTracer/app_resources/hlsl/pathtracer.hlsl
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
#include <nbl/builtin/hlsl/colorspace/encodeCIEXYZ.hlsl>
#include <nbl/builtin/hlsl/math/functions.hlsl>
#include <nbl/builtin/hlsl/bxdf/bxdf_traits.hlsl>
#include <nbl/builtin/hlsl/vector_utils/vector_traits.hlsl>
#include <nbl/builtin/hlsl/concepts.hlsl>

#include "rand_gen.hlsl"
#include "ray_gen.hlsl"
Expand Down Expand Up @@ -40,10 +42,32 @@ struct PathTracerCreationParams
BxDFCreation dielectricParams;
};

template<class RandGen, class RayGen, class Intersector, class MaterialSystem, /* class PathGuider, */ class NextEventEstimator>
template<typename OutputTypeVec NBL_PRIMARY_REQUIRES(concepts::FloatingPointVector<OutputTypeVec>)
struct DefaultAccumulator
{
struct DefaultAccumulatorInitializationSettings {};

using output_storage_type = OutputTypeVec;
using initialization_data = DefaultAccumulatorInitializationSettings;
output_storage_type accumulation;

void initialize(in initialization_data initializationData)
{
accumulation = (output_storage_type)0.0f;
}

void addSample(uint32_t sampleIndex, float32_t3 sample)
{
using ScalarType = typename vector_traits<OutputTypeVec>::scalar_type;
ScalarType rcpSampleSize = 1.0 / (sampleIndex + 1);
accumulation += (sample - accumulation) * rcpSampleSize;
}
};

template<class RandGen, class RayGen, class Intersector, class MaterialSystem, /* class PathGuider, */ class NextEventEstimator, class Accumulator>
struct Unidirectional
{
using this_t = Unidirectional<RandGen, RayGen, Intersector, MaterialSystem, NextEventEstimator>;
using this_t = Unidirectional<RandGen, RayGen, Intersector, MaterialSystem, NextEventEstimator, Accumulator>;
using randgen_type = RandGen;
using raygen_type = RayGen;
using intersector_type = Intersector;
Expand All @@ -53,6 +77,7 @@ struct Unidirectional
using scalar_type = typename MaterialSystem::scalar_type;
using vector3_type = vector<scalar_type, 3>;
using measure_type = typename MaterialSystem::measure_type;
using output_storage_type = typename Accumulator::output_storage_type;
using sample_type = typename NextEventEstimator::sample_type;
using ray_dir_info_type = typename sample_type::ray_dir_info_type;
using ray_type = typename RayGen::ray_type;
Expand Down Expand Up @@ -266,10 +291,11 @@ struct Unidirectional
}

// Li
measure_type getMeasure(uint32_t numSamples, uint32_t depth, NBL_CONST_REF_ARG(scene_type) scene)
output_storage_type getMeasure(uint32_t numSamples, uint32_t depth, NBL_CONST_REF_ARG(scene_type) scene, NBL_REF_ARG(typename Accumulator::initialization_data) accumulatorInitData)

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

one suggestion to refactor, take sampleIndex (the i in the loop) and do the loop outside the path tracer (initialize the accumulator outside as well)

Also I know its not your code, but better rename depth to maxDepth

{
measure_type Li = (measure_type)0.0;
scalar_type meanLumaSq = 0.0;
Accumulator accumulator;
accumulator.initialize(accumulatorInitData);
//scalar_type meanLumaSq = 0.0;
Comment on lines +296 to +298

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

take accumulator byt deferend from the outside, otherwise static polymorphism gets harder with stateful accumulators

Consider this scenario, I already have an accumulator, and I just want to add a fwe samples

for (uint32_t i = 0; i < numSamples; i++)
{
vector3_type uvw = rand3d(0u, i, randGen.rng()); // TODO: take from scramblebuf?
Expand All @@ -290,16 +316,14 @@ struct Unidirectional
if (!hit)
missProgram(ray);

measure_type accumulation = ray.payload.accumulation;
scalar_type rcpSampleSize = 1.0 / (i + 1);
Li += (accumulation - Li) * rcpSampleSize;
accumulator.addSample(i, ray.payload.accumulation);

// TODO: visualize high variance

// TODO: russian roulette early exit?
}

return Li;
return accumulator.accumulation;
Comment on lines -302 to +326

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

take accumulator by reference and make the function void

}

NBL_CONSTEXPR_STATIC_INLINE uint32_t MAX_DEPTH_LOG2 = 4u;
Expand Down
58 changes: 53 additions & 5 deletions 31_HLSLPathTracer/app_resources/hlsl/render.comp.hlsl
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,32 @@
#define BXDF_COUNT 7

#include "render_common.hlsl"
#include "rwmc_global_settings_common.hlsl"

#ifdef RWMC_ENABLED
#include <nbl/builtin/hlsl/rwmc/CascadeAccumulator.hlsl>
#include "render_rwmc_common.hlsl"
#endif
Comment on lines +41 to +44

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you should be able to always include stuff like this without a penalty


#ifdef RWMC_ENABLED
[[vk::push_constant]] RenderRWMCPushConstants pc;
#else
[[vk::push_constant]] RenderPushConstants pc;
#endif

[[vk::combinedImageSampler]] [[vk::binding(0, 2)]] Texture2D<float3> envMap; // unused
[[vk::combinedImageSampler]] [[vk::binding(0, 2)]] SamplerState envSampler;

[[vk::binding(1, 2)]] Buffer<uint3> sampleSequence;

[[vk::combinedImageSampler]] [[vk::binding(2, 2)]] Texture2D<uint2> scramblebuf; // unused
[[vk::combinedImageSampler]] [[vk::binding(2, 2)]] SamplerState scrambleSampler;

#ifdef RWMC_ENABLED
[[vk::image_format("rgba16f")]] [[vk::binding(0, 1)]] RWTexture2DArray<float32_t4> cascade;
#endif
[[vk::image_format("rgba16f")]] [[vk::binding(0, 0)]] RWTexture2D<float32_t4> outImage;
Comment on lines +60 to +63

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

always use an array view (without RWMC you can just make the array 1 layer), and use the same binding

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if you're only writing the image (not loading) you can skip declaring the format, because we use the Unformatted Storage extension

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

after taking another look at the code, i figured it's best to keep cascade and outImage in two separate descriptor sets. this way descriptor set 0 (the one with outImage only) can be reused across different shaders.


#include "pathtracer.hlsl"

using namespace nbl;
Expand Down Expand Up @@ -96,7 +122,14 @@ using raygen_type = ext::RayGen::Basic<ray_type>;
using intersector_type = ext::Intersector::Comprehensive<ray_type, light_type, bxdfnode_type>;
using material_system_type = ext::MaterialSystem::System<diffuse_bxdf_type, conductor_bxdf_type, dielectric_bxdf_type>;
using nee_type = ext::NextEventEstimator::Estimator<scene_type, ray_type, sample_t, aniso_interaction, ext::IntersectMode::IM_PROCEDURAL, LIGHT_TYPE, POLYGON_METHOD>;
using pathtracer_type = ext::PathTracer::Unidirectional<randgen_type, raygen_type, intersector_type, material_system_type, nee_type>;

#ifdef RWMC_ENABLED
using accumulator_type = rwmc::CascadeAccumulator<float32_t3, CascadeSize>;

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

naming nitpick, CascadeCount

#else
using accumulator_type = ext::PathTracer::DefaultAccumulator<float32_t3>;
#endif

using pathtracer_type = ext::PathTracer::Unidirectional<randgen_type, raygen_type, intersector_type, material_system_type, nee_type, accumulator_type>;

static const ext::Shape<ext::PST_SPHERE> spheres[SPHERE_COUNT] = {
ext::Shape<ext::PST_SPHERE>::create(float3(0.0, -100.5, -1.0), 100.0, 0u, light_type::INVALID_ID),
Expand Down Expand Up @@ -129,7 +162,7 @@ static const ext::Shape<ext::PST_RECTANGLE> rectangles[1];
#endif

static const light_type lights[LIGHT_COUNT] = {
light_type::create(spectral_t(30.0,25.0,15.0),
light_type::create(LightEminence,
#ifdef SPHERE_LIGHT
8u,
#else
Expand Down Expand Up @@ -217,9 +250,24 @@ void main(uint32_t3 threadID : SV_DispatchThreadID)

pathtracer_type pathtracer = pathtracer_type::create(ptCreateParams);

float32_t3 color = pathtracer.getMeasure(pc.sampleCount, pc.depth, scene);
float32_t4 pixCol = float32_t4(color, 1.0);
outImage[coords] = pixCol;
#ifdef RWMC_ENABLED
accumulator_type::initialization_data accumulatorInitData;
accumulatorInitData.size = CascadeSize;
accumulatorInitData.start = pc.start;
accumulatorInitData.base = pc.base;
accumulator_type::output_storage_type cascadeEntry = pathtracer.getMeasure(pc.sampleCount, pc.depth, scene, accumulatorInitData);
for (uint32_t i = 0; i < CascadeSize; ++i)
{
float32_t4 cascadeLayerEntry = float32_t4(cascadeEntry.data[i], 1.0f);
cascade[uint3(coords.x, coords.y, i)] = cascadeLayerEntry;
}
#else
accumulator_type::initialization_data accumulatorInitData;
float32_t3 color = pathtracer.getMeasure(pc.sampleCount, pc.depth, scene, accumulatorInitData);
outImage[coords] = float32_t4(color, 1.0);
#endif



#ifdef PERSISTENT_WORKGROUPS
}
Expand Down
23 changes: 11 additions & 12 deletions 31_HLSLPathTracer/app_resources/hlsl/render_common.hlsl
Original file line number Diff line number Diff line change
@@ -1,23 +1,22 @@
#ifndef _NBL_HLSL_PATHTRACER_RENDER_COMMON_INCLUDED_
#define _NBL_HLSL_PATHTRACER_RENDER_COMMON_INCLUDED_
#include "nbl/builtin/hlsl/cpp_compat.hlsl"

struct SPushConstants
#ifndef __HLSL_VERSION
#include "matrix4SIMD.h"
#endif

struct RenderPushConstants
{
#ifdef __HLSL_VERSION
float32_t4x4 invMVP;
#else
nbl::core::matrix4SIMD invMVP;
#endif
Comment on lines -4 to +15

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

no we use hlsl::float32_t4x4 in C++ and float32_t4x4 in HLSL only!

int sampleCount;
int depth;
};

[[vk::push_constant]] SPushConstants pc;

[[vk::combinedImageSampler]][[vk::binding(0, 2)]] Texture2D<float3> envMap; // unused
[[vk::combinedImageSampler]][[vk::binding(0, 2)]] SamplerState envSampler;

[[vk::binding(1, 2)]] Buffer<uint3> sampleSequence;

[[vk::combinedImageSampler]][[vk::binding(2, 2)]] Texture2D<uint2> scramblebuf; // unused
[[vk::combinedImageSampler]][[vk::binding(2, 2)]] SamplerState scrambleSampler;

[[vk::image_format("rgba16f")]][[vk::binding(0, 0)]] RWTexture2D<float32_t4> outImage;
NBL_CONSTEXPR nbl::hlsl::float32_t3 LightEminence = nbl::hlsl::float32_t3(30.0f, 25.0f, 15.0f);

#endif
23 changes: 23 additions & 0 deletions 31_HLSLPathTracer/app_resources/hlsl/render_rwmc_common.hlsl
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
#ifndef _NBL_HLSL_PATHTRACER_RENDER_RWMC_COMMON_INCLUDED_
#define _NBL_HLSL_PATHTRACER_RENDER_RWMC_COMMON_INCLUDED_
#include "nbl/builtin/hlsl/cpp_compat.hlsl"

#ifndef __HLSL_VERSION
#include "matrix4SIMD.h"
#endif

struct RenderRWMCPushConstants
{
#ifdef __HLSL_VERSION
float32_t4x4 invMVP;
#else
nbl::core::matrix4SIMD invMVP;
#endif
int sampleCount;
int depth;
Comment on lines +9 to +17

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

make one push cosntant struct in terms of the other, preferably through composition, so

struct RenderRWMCPushConstants
{
   RenderPushConstants base;
   rwmc::SplattingParameters rwmc;
};

float start;
float base;
float kappa;
Comment on lines +18 to +20

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

dont use the same push constants for resolve and rendering & splatting

I really want you to pack up start and base into its own rwmc::SplattingParameters struct

};

#endif
48 changes: 48 additions & 0 deletions 31_HLSLPathTracer/app_resources/hlsl/resolve.comp.hlsl
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
#include <nbl/builtin/hlsl/rwmc/rwmc.hlsl>
#include "resolve_common.hlsl"
#include "rwmc_global_settings_common.hlsl"
#ifdef PERSISTENT_WORKGROUPS
#include "nbl/builtin/hlsl/math/morton.hlsl"
#endif

[[vk::push_constant]] ResolvePushConstants pc;
[[vk::image_format("rgba16f")]] [[vk::binding(0, 0)]] RWTexture2D<float32_t4> outImage;
[[vk::image_format("rgba16f")]] [[vk::binding(0, 1)]] RWTexture2DArray<float32_t4> cascade;

using namespace nbl;
using namespace hlsl;

NBL_CONSTEXPR uint32_t WorkgroupSize = 512;
NBL_CONSTEXPR uint32_t MAX_DEPTH_LOG2 = 4;
NBL_CONSTEXPR uint32_t MAX_SAMPLES_LOG2 = 10;
Comment on lines +16 to +17

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you dont need those here I think


int32_t2 getCoordinates()
{
uint32_t width, height;
outImage.GetDimensions(width, height);
return int32_t2(glsl::gl_GlobalInvocationID().x % width, glsl::gl_GlobalInvocationID().x / width);
}
Comment on lines +19 to +24

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

use a 2D dispatch


[numthreads(WorkgroupSize, 1, 1)]
void main(uint32_t3 threadID : SV_DispatchThreadID)
{
#ifdef PERSISTENT_WORKGROUPS
uint32_t virtualThreadIndex;
[loop]
for (uint32_t virtualThreadBase = glsl::gl_WorkGroupID().x * WorkgroupSize; virtualThreadBase < 1920 * 1080; virtualThreadBase += glsl::gl_NumWorkGroups().x * WorkgroupSize)

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

don't hardcode rendertarget resolution

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you can query it from the cascade image HLSL has intrinsics for that, you can add SPIR-V instrinscis for GLSL functions textureSize and imageSize

{
virtualThreadIndex = virtualThreadBase + glsl::gl_LocalInvocationIndex().x;
const int32_t2 coords = (int32_t2)math::Morton<uint32_t>::decode2d(virtualThreadIndex);
#else
const int32_t2 coords = getCoordinates();
#endif

rwmc::ReweightingParameters reweightingParameters = rwmc::computeReweightingParameters(pc.base, pc.sampleCount, pc.minReliableLuma, pc.kappa, CascadeSize);
Copy link
Member

@devshgraphicsprogramming devshgraphicsprogramming Oct 29, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this should be done on CPU and the reweightingParamaters should be in your ResolvePushConstants push constants here.

float32_t3 color = rwmc::reweight(reweightingParameters, cascade, coords);

outImage[coords] = float32_t4(color, 1.0f);

#ifdef PERSISTENT_WORKGROUPS
}
#endif
Comment on lines +29 to +47

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Resolve doesn't need to use persistent WG, you can use a Regular 2D dispatch

}
13 changes: 13 additions & 0 deletions 31_HLSLPathTracer/app_resources/hlsl/resolve_common.hlsl
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#ifndef _NBL_HLSL_PATHTRACER_RESOLVE_COMMON_INCLUDED_
#define _NBL_HLSL_PATHTRACER_RESOLVE_COMMON_INCLUDED_
#include "nbl/builtin/hlsl/cpp_compat.hlsl"

struct ResolvePushConstants
{
uint32_t sampleCount;
float base;
float minReliableLuma;
float kappa;
};

#endif
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#ifndef _NBL_HLSL_PATHTRACER_RWMC_GLOBAL_SETTINGS_COMMON_INCLUDED_
#define _NBL_HLSL_PATHTRACER_RWMC_GLOBAL_SETTINGS_COMMON_INCLUDED_
#include "nbl/builtin/hlsl/cpp_compat.hlsl"

NBL_CONSTEXPR uint32_t CascadeSize = 6u;

#endif
Loading