-
Notifications
You must be signed in to change notification settings - Fork 14
RWMC #218
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: hlsl_path_tracer
Are you sure you want to change the base?
RWMC #218
Changes from all commits
3d206fd
2107be7
57a6a0f
389248c
ca8c232
04296d9
6168e14
8ecc60f
bbc8ab8
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -5,6 +5,8 @@ | |
| #include <nbl/builtin/hlsl/colorspace/encodeCIEXYZ.hlsl> | ||
| #include <nbl/builtin/hlsl/math/functions.hlsl> | ||
| #include <nbl/builtin/hlsl/bxdf/bxdf_traits.hlsl> | ||
| #include <nbl/builtin/hlsl/vector_utils/vector_traits.hlsl> | ||
| #include <nbl/builtin/hlsl/concepts.hlsl> | ||
|
|
||
| #include "rand_gen.hlsl" | ||
| #include "ray_gen.hlsl" | ||
|
|
@@ -40,10 +42,32 @@ struct PathTracerCreationParams | |
| BxDFCreation dielectricParams; | ||
| }; | ||
|
|
||
| template<class RandGen, class RayGen, class Intersector, class MaterialSystem, /* class PathGuider, */ class NextEventEstimator> | ||
| template<typename OutputTypeVec NBL_PRIMARY_REQUIRES(concepts::FloatingPointVector<OutputTypeVec>) | ||
| struct DefaultAccumulator | ||
| { | ||
| struct DefaultAccumulatorInitializationSettings {}; | ||
|
|
||
| using output_storage_type = OutputTypeVec; | ||
| using initialization_data = DefaultAccumulatorInitializationSettings; | ||
| output_storage_type accumulation; | ||
|
|
||
| void initialize(in initialization_data initializationData) | ||
| { | ||
| accumulation = (output_storage_type)0.0f; | ||
| } | ||
|
|
||
| void addSample(uint32_t sampleIndex, float32_t3 sample) | ||
| { | ||
| using ScalarType = typename vector_traits<OutputTypeVec>::scalar_type; | ||
| ScalarType rcpSampleSize = 1.0 / (sampleIndex + 1); | ||
| accumulation += (sample - accumulation) * rcpSampleSize; | ||
| } | ||
| }; | ||
|
|
||
| template<class RandGen, class RayGen, class Intersector, class MaterialSystem, /* class PathGuider, */ class NextEventEstimator, class Accumulator> | ||
| struct Unidirectional | ||
| { | ||
| using this_t = Unidirectional<RandGen, RayGen, Intersector, MaterialSystem, NextEventEstimator>; | ||
| using this_t = Unidirectional<RandGen, RayGen, Intersector, MaterialSystem, NextEventEstimator, Accumulator>; | ||
| using randgen_type = RandGen; | ||
| using raygen_type = RayGen; | ||
| using intersector_type = Intersector; | ||
|
|
@@ -53,6 +77,7 @@ struct Unidirectional | |
| using scalar_type = typename MaterialSystem::scalar_type; | ||
| using vector3_type = vector<scalar_type, 3>; | ||
| using measure_type = typename MaterialSystem::measure_type; | ||
| using output_storage_type = typename Accumulator::output_storage_type; | ||
| using sample_type = typename NextEventEstimator::sample_type; | ||
| using ray_dir_info_type = typename sample_type::ray_dir_info_type; | ||
| using ray_type = typename RayGen::ray_type; | ||
|
|
@@ -266,10 +291,11 @@ struct Unidirectional | |
| } | ||
|
|
||
| // Li | ||
| measure_type getMeasure(uint32_t numSamples, uint32_t depth, NBL_CONST_REF_ARG(scene_type) scene) | ||
| output_storage_type getMeasure(uint32_t numSamples, uint32_t depth, NBL_CONST_REF_ARG(scene_type) scene, NBL_REF_ARG(typename Accumulator::initialization_data) accumulatorInitData) | ||
| { | ||
| measure_type Li = (measure_type)0.0; | ||
| scalar_type meanLumaSq = 0.0; | ||
| Accumulator accumulator; | ||
| accumulator.initialize(accumulatorInitData); | ||
| //scalar_type meanLumaSq = 0.0; | ||
|
Comment on lines
+296
to
+298
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. take accumulator byt deferend from the outside, otherwise static polymorphism gets harder with stateful accumulators Consider this scenario, I already have an accumulator, and I just want to add a fwe samples |
||
| for (uint32_t i = 0; i < numSamples; i++) | ||
| { | ||
| vector3_type uvw = rand3d(0u, i, randGen.rng()); // TODO: take from scramblebuf? | ||
|
|
@@ -290,16 +316,14 @@ struct Unidirectional | |
| if (!hit) | ||
| missProgram(ray); | ||
|
|
||
| measure_type accumulation = ray.payload.accumulation; | ||
| scalar_type rcpSampleSize = 1.0 / (i + 1); | ||
| Li += (accumulation - Li) * rcpSampleSize; | ||
| accumulator.addSample(i, ray.payload.accumulation); | ||
|
|
||
| // TODO: visualize high variance | ||
|
|
||
| // TODO: russian roulette early exit? | ||
| } | ||
|
|
||
| return Li; | ||
| return accumulator.accumulation; | ||
|
Comment on lines
-302
to
+326
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. take accumulator by reference and make the function |
||
| } | ||
|
|
||
| NBL_CONSTEXPR_STATIC_INLINE uint32_t MAX_DEPTH_LOG2 = 4u; | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -36,6 +36,32 @@ | |
| #define BXDF_COUNT 7 | ||
|
|
||
| #include "render_common.hlsl" | ||
| #include "rwmc_global_settings_common.hlsl" | ||
|
|
||
| #ifdef RWMC_ENABLED | ||
| #include <nbl/builtin/hlsl/rwmc/CascadeAccumulator.hlsl> | ||
| #include "render_rwmc_common.hlsl" | ||
| #endif | ||
|
Comment on lines
+41
to
+44
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. you should be able to always include stuff like this without a penalty |
||
|
|
||
| #ifdef RWMC_ENABLED | ||
| [[vk::push_constant]] RenderRWMCPushConstants pc; | ||
| #else | ||
| [[vk::push_constant]] RenderPushConstants pc; | ||
| #endif | ||
|
|
||
| [[vk::combinedImageSampler]] [[vk::binding(0, 2)]] Texture2D<float3> envMap; // unused | ||
| [[vk::combinedImageSampler]] [[vk::binding(0, 2)]] SamplerState envSampler; | ||
|
|
||
| [[vk::binding(1, 2)]] Buffer<uint3> sampleSequence; | ||
|
|
||
| [[vk::combinedImageSampler]] [[vk::binding(2, 2)]] Texture2D<uint2> scramblebuf; // unused | ||
| [[vk::combinedImageSampler]] [[vk::binding(2, 2)]] SamplerState scrambleSampler; | ||
|
|
||
| #ifdef RWMC_ENABLED | ||
| [[vk::image_format("rgba16f")]] [[vk::binding(0, 1)]] RWTexture2DArray<float32_t4> cascade; | ||
| #endif | ||
| [[vk::image_format("rgba16f")]] [[vk::binding(0, 0)]] RWTexture2D<float32_t4> outImage; | ||
|
Comment on lines
+60
to
+63
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. always use an array view (without RWMC you can just make the array 1 layer), and use the same binding
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. if you're only writing the image (not loading) you can skip declaring the format, because we use the Unformatted Storage extension
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. after taking another look at the code, i figured it's best to keep cascade and outImage in two separate descriptor sets. this way descriptor set 0 (the one with outImage only) can be reused across different shaders. |
||
|
|
||
| #include "pathtracer.hlsl" | ||
|
|
||
| using namespace nbl; | ||
|
|
@@ -96,7 +122,14 @@ using raygen_type = ext::RayGen::Basic<ray_type>; | |
| using intersector_type = ext::Intersector::Comprehensive<ray_type, light_type, bxdfnode_type>; | ||
| using material_system_type = ext::MaterialSystem::System<diffuse_bxdf_type, conductor_bxdf_type, dielectric_bxdf_type>; | ||
| using nee_type = ext::NextEventEstimator::Estimator<scene_type, ray_type, sample_t, aniso_interaction, ext::IntersectMode::IM_PROCEDURAL, LIGHT_TYPE, POLYGON_METHOD>; | ||
| using pathtracer_type = ext::PathTracer::Unidirectional<randgen_type, raygen_type, intersector_type, material_system_type, nee_type>; | ||
|
|
||
| #ifdef RWMC_ENABLED | ||
| using accumulator_type = rwmc::CascadeAccumulator<float32_t3, CascadeSize>; | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. naming nitpick, |
||
| #else | ||
| using accumulator_type = ext::PathTracer::DefaultAccumulator<float32_t3>; | ||
| #endif | ||
|
|
||
| using pathtracer_type = ext::PathTracer::Unidirectional<randgen_type, raygen_type, intersector_type, material_system_type, nee_type, accumulator_type>; | ||
|
|
||
| static const ext::Shape<ext::PST_SPHERE> spheres[SPHERE_COUNT] = { | ||
| ext::Shape<ext::PST_SPHERE>::create(float3(0.0, -100.5, -1.0), 100.0, 0u, light_type::INVALID_ID), | ||
|
|
@@ -129,7 +162,7 @@ static const ext::Shape<ext::PST_RECTANGLE> rectangles[1]; | |
| #endif | ||
|
|
||
| static const light_type lights[LIGHT_COUNT] = { | ||
| light_type::create(spectral_t(30.0,25.0,15.0), | ||
| light_type::create(LightEminence, | ||
| #ifdef SPHERE_LIGHT | ||
| 8u, | ||
| #else | ||
|
|
@@ -217,9 +250,24 @@ void main(uint32_t3 threadID : SV_DispatchThreadID) | |
|
|
||
| pathtracer_type pathtracer = pathtracer_type::create(ptCreateParams); | ||
|
|
||
| float32_t3 color = pathtracer.getMeasure(pc.sampleCount, pc.depth, scene); | ||
| float32_t4 pixCol = float32_t4(color, 1.0); | ||
| outImage[coords] = pixCol; | ||
| #ifdef RWMC_ENABLED | ||
| accumulator_type::initialization_data accumulatorInitData; | ||
| accumulatorInitData.size = CascadeSize; | ||
| accumulatorInitData.start = pc.start; | ||
| accumulatorInitData.base = pc.base; | ||
| accumulator_type::output_storage_type cascadeEntry = pathtracer.getMeasure(pc.sampleCount, pc.depth, scene, accumulatorInitData); | ||
| for (uint32_t i = 0; i < CascadeSize; ++i) | ||
| { | ||
| float32_t4 cascadeLayerEntry = float32_t4(cascadeEntry.data[i], 1.0f); | ||
| cascade[uint3(coords.x, coords.y, i)] = cascadeLayerEntry; | ||
| } | ||
| #else | ||
| accumulator_type::initialization_data accumulatorInitData; | ||
| float32_t3 color = pathtracer.getMeasure(pc.sampleCount, pc.depth, scene, accumulatorInitData); | ||
| outImage[coords] = float32_t4(color, 1.0); | ||
| #endif | ||
|
|
||
|
|
||
|
|
||
| #ifdef PERSISTENT_WORKGROUPS | ||
| } | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,23 +1,22 @@ | ||
| #ifndef _NBL_HLSL_PATHTRACER_RENDER_COMMON_INCLUDED_ | ||
| #define _NBL_HLSL_PATHTRACER_RENDER_COMMON_INCLUDED_ | ||
| #include "nbl/builtin/hlsl/cpp_compat.hlsl" | ||
|
|
||
| struct SPushConstants | ||
| #ifndef __HLSL_VERSION | ||
| #include "matrix4SIMD.h" | ||
| #endif | ||
|
|
||
| struct RenderPushConstants | ||
| { | ||
| #ifdef __HLSL_VERSION | ||
| float32_t4x4 invMVP; | ||
| #else | ||
| nbl::core::matrix4SIMD invMVP; | ||
| #endif | ||
|
Comment on lines
-4
to
+15
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. no we use |
||
| int sampleCount; | ||
| int depth; | ||
| }; | ||
|
|
||
| [[vk::push_constant]] SPushConstants pc; | ||
|
|
||
| [[vk::combinedImageSampler]][[vk::binding(0, 2)]] Texture2D<float3> envMap; // unused | ||
| [[vk::combinedImageSampler]][[vk::binding(0, 2)]] SamplerState envSampler; | ||
|
|
||
| [[vk::binding(1, 2)]] Buffer<uint3> sampleSequence; | ||
|
|
||
| [[vk::combinedImageSampler]][[vk::binding(2, 2)]] Texture2D<uint2> scramblebuf; // unused | ||
| [[vk::combinedImageSampler]][[vk::binding(2, 2)]] SamplerState scrambleSampler; | ||
|
|
||
| [[vk::image_format("rgba16f")]][[vk::binding(0, 0)]] RWTexture2D<float32_t4> outImage; | ||
| NBL_CONSTEXPR nbl::hlsl::float32_t3 LightEminence = nbl::hlsl::float32_t3(30.0f, 25.0f, 15.0f); | ||
|
|
||
| #endif | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,23 @@ | ||
| #ifndef _NBL_HLSL_PATHTRACER_RENDER_RWMC_COMMON_INCLUDED_ | ||
| #define _NBL_HLSL_PATHTRACER_RENDER_RWMC_COMMON_INCLUDED_ | ||
| #include "nbl/builtin/hlsl/cpp_compat.hlsl" | ||
|
|
||
| #ifndef __HLSL_VERSION | ||
| #include "matrix4SIMD.h" | ||
| #endif | ||
|
|
||
| struct RenderRWMCPushConstants | ||
| { | ||
| #ifdef __HLSL_VERSION | ||
| float32_t4x4 invMVP; | ||
| #else | ||
| nbl::core::matrix4SIMD invMVP; | ||
| #endif | ||
| int sampleCount; | ||
| int depth; | ||
|
Comment on lines
+9
to
+17
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. make one push cosntant struct in terms of the other, preferably through composition, so struct RenderRWMCPushConstants
{
RenderPushConstants base;
rwmc::SplattingParameters rwmc;
}; |
||
| float start; | ||
| float base; | ||
| float kappa; | ||
|
Comment on lines
+18
to
+20
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. dont use the same push constants for resolve and rendering & splatting I really want you to pack up |
||
| }; | ||
|
|
||
| #endif | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,48 @@ | ||
| #include <nbl/builtin/hlsl/rwmc/rwmc.hlsl> | ||
| #include "resolve_common.hlsl" | ||
| #include "rwmc_global_settings_common.hlsl" | ||
| #ifdef PERSISTENT_WORKGROUPS | ||
| #include "nbl/builtin/hlsl/math/morton.hlsl" | ||
| #endif | ||
|
|
||
| [[vk::push_constant]] ResolvePushConstants pc; | ||
| [[vk::image_format("rgba16f")]] [[vk::binding(0, 0)]] RWTexture2D<float32_t4> outImage; | ||
| [[vk::image_format("rgba16f")]] [[vk::binding(0, 1)]] RWTexture2DArray<float32_t4> cascade; | ||
|
|
||
| using namespace nbl; | ||
| using namespace hlsl; | ||
|
|
||
| NBL_CONSTEXPR uint32_t WorkgroupSize = 512; | ||
| NBL_CONSTEXPR uint32_t MAX_DEPTH_LOG2 = 4; | ||
| NBL_CONSTEXPR uint32_t MAX_SAMPLES_LOG2 = 10; | ||
|
Comment on lines
+16
to
+17
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. you dont need those here I think |
||
|
|
||
| int32_t2 getCoordinates() | ||
| { | ||
| uint32_t width, height; | ||
| outImage.GetDimensions(width, height); | ||
| return int32_t2(glsl::gl_GlobalInvocationID().x % width, glsl::gl_GlobalInvocationID().x / width); | ||
| } | ||
|
Comment on lines
+19
to
+24
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. use a 2D dispatch |
||
|
|
||
| [numthreads(WorkgroupSize, 1, 1)] | ||
| void main(uint32_t3 threadID : SV_DispatchThreadID) | ||
| { | ||
| #ifdef PERSISTENT_WORKGROUPS | ||
| uint32_t virtualThreadIndex; | ||
| [loop] | ||
| for (uint32_t virtualThreadBase = glsl::gl_WorkGroupID().x * WorkgroupSize; virtualThreadBase < 1920 * 1080; virtualThreadBase += glsl::gl_NumWorkGroups().x * WorkgroupSize) | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. don't hardcode rendertarget resolution
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. you can query it from the cascade image HLSL has intrinsics for that, you can add SPIR-V instrinscis for GLSL functions |
||
| { | ||
| virtualThreadIndex = virtualThreadBase + glsl::gl_LocalInvocationIndex().x; | ||
| const int32_t2 coords = (int32_t2)math::Morton<uint32_t>::decode2d(virtualThreadIndex); | ||
| #else | ||
| const int32_t2 coords = getCoordinates(); | ||
| #endif | ||
|
|
||
| rwmc::ReweightingParameters reweightingParameters = rwmc::computeReweightingParameters(pc.base, pc.sampleCount, pc.minReliableLuma, pc.kappa, CascadeSize); | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this should be done on CPU and the |
||
| float32_t3 color = rwmc::reweight(reweightingParameters, cascade, coords); | ||
|
|
||
| outImage[coords] = float32_t4(color, 1.0f); | ||
|
|
||
| #ifdef PERSISTENT_WORKGROUPS | ||
| } | ||
| #endif | ||
|
Comment on lines
+29
to
+47
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Resolve doesn't need to use persistent WG, you can use a Regular 2D dispatch |
||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,13 @@ | ||
| #ifndef _NBL_HLSL_PATHTRACER_RESOLVE_COMMON_INCLUDED_ | ||
| #define _NBL_HLSL_PATHTRACER_RESOLVE_COMMON_INCLUDED_ | ||
| #include "nbl/builtin/hlsl/cpp_compat.hlsl" | ||
|
|
||
| struct ResolvePushConstants | ||
| { | ||
| uint32_t sampleCount; | ||
| float base; | ||
| float minReliableLuma; | ||
| float kappa; | ||
| }; | ||
|
|
||
| #endif |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,7 @@ | ||
| #ifndef _NBL_HLSL_PATHTRACER_RWMC_GLOBAL_SETTINGS_COMMON_INCLUDED_ | ||
| #define _NBL_HLSL_PATHTRACER_RWMC_GLOBAL_SETTINGS_COMMON_INCLUDED_ | ||
| #include "nbl/builtin/hlsl/cpp_compat.hlsl" | ||
|
|
||
| NBL_CONSTEXPR uint32_t CascadeSize = 6u; | ||
|
|
||
| #endif |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
one suggestion to refactor, take
sampleIndex(theiin the loop) and do the loop outside the path tracer (initialize the accumulator outside as well)Also I know its not your code, but better rename
depthtomaxDepth