diff --git a/31_HLSLPathTracer/app_resources/hlsl/pathtracer.hlsl b/31_HLSLPathTracer/app_resources/hlsl/pathtracer.hlsl index f5d5206dc..9fe4ff5e7 100644 --- a/31_HLSLPathTracer/app_resources/hlsl/pathtracer.hlsl +++ b/31_HLSLPathTracer/app_resources/hlsl/pathtracer.hlsl @@ -5,6 +5,8 @@ #include #include #include +#include +#include #include "rand_gen.hlsl" #include "ray_gen.hlsl" @@ -40,10 +42,32 @@ struct PathTracerCreationParams BxDFCreation dielectricParams; }; -template +template) +struct DefaultAccumulator +{ + struct DefaultAccumulatorInitializationSettings {}; + + using output_storage_type = OutputTypeVec; + using initialization_data = DefaultAccumulatorInitializationSettings; + output_storage_type accumulation; + + void initialize(in initialization_data initializationData) + { + accumulation = (output_storage_type)0.0f; + } + + void addSample(uint32_t sampleIndex, float32_t3 sample) + { + using ScalarType = typename vector_traits::scalar_type; + ScalarType rcpSampleSize = 1.0 / (sampleIndex + 1); + accumulation += (sample - accumulation) * rcpSampleSize; + } +}; + +template struct Unidirectional { - using this_t = Unidirectional; + using this_t = Unidirectional; using randgen_type = RandGen; using raygen_type = RayGen; using intersector_type = Intersector; @@ -53,6 +77,7 @@ struct Unidirectional using scalar_type = typename MaterialSystem::scalar_type; using vector3_type = vector; using measure_type = typename MaterialSystem::measure_type; + using output_storage_type = typename Accumulator::output_storage_type; using sample_type = typename NextEventEstimator::sample_type; using ray_dir_info_type = typename sample_type::ray_dir_info_type; using ray_type = typename RayGen::ray_type; @@ -266,10 +291,11 @@ struct Unidirectional } // Li - measure_type getMeasure(uint32_t numSamples, uint32_t depth, NBL_CONST_REF_ARG(scene_type) scene) + output_storage_type getMeasure(uint32_t numSamples, uint32_t depth, NBL_CONST_REF_ARG(scene_type) scene, NBL_REF_ARG(typename Accumulator::initialization_data) accumulatorInitData) { - measure_type Li = (measure_type)0.0; - scalar_type meanLumaSq = 0.0; + Accumulator accumulator; + accumulator.initialize(accumulatorInitData); + //scalar_type meanLumaSq = 0.0; for (uint32_t i = 0; i < numSamples; i++) { vector3_type uvw = rand3d(0u, i, randGen.rng()); // TODO: take from scramblebuf? @@ -290,16 +316,14 @@ struct Unidirectional if (!hit) missProgram(ray); - measure_type accumulation = ray.payload.accumulation; - scalar_type rcpSampleSize = 1.0 / (i + 1); - Li += (accumulation - Li) * rcpSampleSize; + accumulator.addSample(i, ray.payload.accumulation); // TODO: visualize high variance // TODO: russian roulette early exit? } - return Li; + return accumulator.accumulation; } NBL_CONSTEXPR_STATIC_INLINE uint32_t MAX_DEPTH_LOG2 = 4u; diff --git a/31_HLSLPathTracer/app_resources/hlsl/render.comp.hlsl b/31_HLSLPathTracer/app_resources/hlsl/render.comp.hlsl index 81736f508..c17c18dfd 100644 --- a/31_HLSLPathTracer/app_resources/hlsl/render.comp.hlsl +++ b/31_HLSLPathTracer/app_resources/hlsl/render.comp.hlsl @@ -36,6 +36,32 @@ #define BXDF_COUNT 7 #include "render_common.hlsl" +#include "rwmc_global_settings_common.hlsl" + +#ifdef RWMC_ENABLED +#include +#include "render_rwmc_common.hlsl" +#endif + +#ifdef RWMC_ENABLED +[[vk::push_constant]] RenderRWMCPushConstants pc; +#else +[[vk::push_constant]] RenderPushConstants pc; +#endif + +[[vk::combinedImageSampler]] [[vk::binding(0, 2)]] Texture2D envMap; // unused +[[vk::combinedImageSampler]] [[vk::binding(0, 2)]] SamplerState envSampler; + +[[vk::binding(1, 2)]] Buffer sampleSequence; + +[[vk::combinedImageSampler]] [[vk::binding(2, 2)]] Texture2D scramblebuf; // unused +[[vk::combinedImageSampler]] [[vk::binding(2, 2)]] SamplerState scrambleSampler; + +#ifdef RWMC_ENABLED +[[vk::image_format("rgba16f")]] [[vk::binding(0, 1)]] RWTexture2DArray cascade; +#endif +[[vk::image_format("rgba16f")]] [[vk::binding(0, 0)]] RWTexture2D outImage; + #include "pathtracer.hlsl" using namespace nbl; @@ -96,7 +122,14 @@ using raygen_type = ext::RayGen::Basic; using intersector_type = ext::Intersector::Comprehensive; using material_system_type = ext::MaterialSystem::System; using nee_type = ext::NextEventEstimator::Estimator; -using pathtracer_type = ext::PathTracer::Unidirectional; + +#ifdef RWMC_ENABLED +using accumulator_type = rwmc::CascadeAccumulator; +#else +using accumulator_type = ext::PathTracer::DefaultAccumulator; +#endif + +using pathtracer_type = ext::PathTracer::Unidirectional; static const ext::Shape spheres[SPHERE_COUNT] = { ext::Shape::create(float3(0.0, -100.5, -1.0), 100.0, 0u, light_type::INVALID_ID), @@ -129,7 +162,7 @@ static const ext::Shape rectangles[1]; #endif static const light_type lights[LIGHT_COUNT] = { - light_type::create(spectral_t(30.0,25.0,15.0), + light_type::create(LightEminence, #ifdef SPHERE_LIGHT 8u, #else @@ -217,9 +250,24 @@ void main(uint32_t3 threadID : SV_DispatchThreadID) pathtracer_type pathtracer = pathtracer_type::create(ptCreateParams); - float32_t3 color = pathtracer.getMeasure(pc.sampleCount, pc.depth, scene); - float32_t4 pixCol = float32_t4(color, 1.0); - outImage[coords] = pixCol; +#ifdef RWMC_ENABLED + accumulator_type::initialization_data accumulatorInitData; + accumulatorInitData.size = CascadeSize; + accumulatorInitData.start = pc.start; + accumulatorInitData.base = pc.base; + accumulator_type::output_storage_type cascadeEntry = pathtracer.getMeasure(pc.sampleCount, pc.depth, scene, accumulatorInitData); + for (uint32_t i = 0; i < CascadeSize; ++i) + { + float32_t4 cascadeLayerEntry = float32_t4(cascadeEntry.data[i], 1.0f); + cascade[uint3(coords.x, coords.y, i)] = cascadeLayerEntry; + } +#else + accumulator_type::initialization_data accumulatorInitData; + float32_t3 color = pathtracer.getMeasure(pc.sampleCount, pc.depth, scene, accumulatorInitData); + outImage[coords] = float32_t4(color, 1.0); +#endif + + #ifdef PERSISTENT_WORKGROUPS } diff --git a/31_HLSLPathTracer/app_resources/hlsl/render_common.hlsl b/31_HLSLPathTracer/app_resources/hlsl/render_common.hlsl index 5e5cf89da..6c3f63ee4 100644 --- a/31_HLSLPathTracer/app_resources/hlsl/render_common.hlsl +++ b/31_HLSLPathTracer/app_resources/hlsl/render_common.hlsl @@ -1,23 +1,22 @@ #ifndef _NBL_HLSL_PATHTRACER_RENDER_COMMON_INCLUDED_ #define _NBL_HLSL_PATHTRACER_RENDER_COMMON_INCLUDED_ +#include "nbl/builtin/hlsl/cpp_compat.hlsl" -struct SPushConstants +#ifndef __HLSL_VERSION +#include "matrix4SIMD.h" +#endif + +struct RenderPushConstants { +#ifdef __HLSL_VERSION float32_t4x4 invMVP; +#else + nbl::core::matrix4SIMD invMVP; +#endif int sampleCount; int depth; }; -[[vk::push_constant]] SPushConstants pc; - -[[vk::combinedImageSampler]][[vk::binding(0, 2)]] Texture2D envMap; // unused -[[vk::combinedImageSampler]][[vk::binding(0, 2)]] SamplerState envSampler; - -[[vk::binding(1, 2)]] Buffer sampleSequence; - -[[vk::combinedImageSampler]][[vk::binding(2, 2)]] Texture2D scramblebuf; // unused -[[vk::combinedImageSampler]][[vk::binding(2, 2)]] SamplerState scrambleSampler; - -[[vk::image_format("rgba16f")]][[vk::binding(0, 0)]] RWTexture2D outImage; +NBL_CONSTEXPR nbl::hlsl::float32_t3 LightEminence = nbl::hlsl::float32_t3(30.0f, 25.0f, 15.0f); #endif diff --git a/31_HLSLPathTracer/app_resources/hlsl/render_rwmc_common.hlsl b/31_HLSLPathTracer/app_resources/hlsl/render_rwmc_common.hlsl new file mode 100644 index 000000000..7a0674869 --- /dev/null +++ b/31_HLSLPathTracer/app_resources/hlsl/render_rwmc_common.hlsl @@ -0,0 +1,23 @@ +#ifndef _NBL_HLSL_PATHTRACER_RENDER_RWMC_COMMON_INCLUDED_ +#define _NBL_HLSL_PATHTRACER_RENDER_RWMC_COMMON_INCLUDED_ +#include "nbl/builtin/hlsl/cpp_compat.hlsl" + +#ifndef __HLSL_VERSION +#include "matrix4SIMD.h" +#endif + +struct RenderRWMCPushConstants +{ +#ifdef __HLSL_VERSION + float32_t4x4 invMVP; +#else + nbl::core::matrix4SIMD invMVP; +#endif + int sampleCount; + int depth; + float start; + float base; + float kappa; +}; + +#endif diff --git a/31_HLSLPathTracer/app_resources/hlsl/resolve.comp.hlsl b/31_HLSLPathTracer/app_resources/hlsl/resolve.comp.hlsl new file mode 100644 index 000000000..e4aa95923 --- /dev/null +++ b/31_HLSLPathTracer/app_resources/hlsl/resolve.comp.hlsl @@ -0,0 +1,48 @@ +#include +#include "resolve_common.hlsl" +#include "rwmc_global_settings_common.hlsl" +#ifdef PERSISTENT_WORKGROUPS +#include "nbl/builtin/hlsl/math/morton.hlsl" +#endif + +[[vk::push_constant]] ResolvePushConstants pc; +[[vk::image_format("rgba16f")]] [[vk::binding(0, 0)]] RWTexture2D outImage; +[[vk::image_format("rgba16f")]] [[vk::binding(0, 1)]] RWTexture2DArray cascade; + +using namespace nbl; +using namespace hlsl; + +NBL_CONSTEXPR uint32_t WorkgroupSize = 512; +NBL_CONSTEXPR uint32_t MAX_DEPTH_LOG2 = 4; +NBL_CONSTEXPR uint32_t MAX_SAMPLES_LOG2 = 10; + +int32_t2 getCoordinates() +{ + uint32_t width, height; + outImage.GetDimensions(width, height); + return int32_t2(glsl::gl_GlobalInvocationID().x % width, glsl::gl_GlobalInvocationID().x / width); +} + +[numthreads(WorkgroupSize, 1, 1)] +void main(uint32_t3 threadID : SV_DispatchThreadID) +{ +#ifdef PERSISTENT_WORKGROUPS + uint32_t virtualThreadIndex; + [loop] + for (uint32_t virtualThreadBase = glsl::gl_WorkGroupID().x * WorkgroupSize; virtualThreadBase < 1920 * 1080; virtualThreadBase += glsl::gl_NumWorkGroups().x * WorkgroupSize) + { + virtualThreadIndex = virtualThreadBase + glsl::gl_LocalInvocationIndex().x; + const int32_t2 coords = (int32_t2)math::Morton::decode2d(virtualThreadIndex); +#else + const int32_t2 coords = getCoordinates(); +#endif + + rwmc::ReweightingParameters reweightingParameters = rwmc::computeReweightingParameters(pc.base, pc.sampleCount, pc.minReliableLuma, pc.kappa, CascadeSize); + float32_t3 color = rwmc::reweight(reweightingParameters, cascade, coords); + + outImage[coords] = float32_t4(color, 1.0f); + +#ifdef PERSISTENT_WORKGROUPS + } +#endif +} diff --git a/31_HLSLPathTracer/app_resources/hlsl/resolve_common.hlsl b/31_HLSLPathTracer/app_resources/hlsl/resolve_common.hlsl new file mode 100644 index 000000000..5937c42e2 --- /dev/null +++ b/31_HLSLPathTracer/app_resources/hlsl/resolve_common.hlsl @@ -0,0 +1,13 @@ +#ifndef _NBL_HLSL_PATHTRACER_RESOLVE_COMMON_INCLUDED_ +#define _NBL_HLSL_PATHTRACER_RESOLVE_COMMON_INCLUDED_ +#include "nbl/builtin/hlsl/cpp_compat.hlsl" + +struct ResolvePushConstants +{ + uint32_t sampleCount; + float base; + float minReliableLuma; + float kappa; +}; + +#endif diff --git a/31_HLSLPathTracer/app_resources/hlsl/rwmc_global_settings_common.hlsl b/31_HLSLPathTracer/app_resources/hlsl/rwmc_global_settings_common.hlsl new file mode 100644 index 000000000..6a5d1b9d4 --- /dev/null +++ b/31_HLSLPathTracer/app_resources/hlsl/rwmc_global_settings_common.hlsl @@ -0,0 +1,7 @@ +#ifndef _NBL_HLSL_PATHTRACER_RWMC_GLOBAL_SETTINGS_COMMON_INCLUDED_ +#define _NBL_HLSL_PATHTRACER_RWMC_GLOBAL_SETTINGS_COMMON_INCLUDED_ +#include "nbl/builtin/hlsl/cpp_compat.hlsl" + +NBL_CONSTEXPR uint32_t CascadeSize = 6u; + +#endif diff --git a/31_HLSLPathTracer/main.cpp b/31_HLSLPathTracer/main.cpp index 0dc5fc053..cf9e58764 100644 --- a/31_HLSLPathTracer/main.cpp +++ b/31_HLSLPathTracer/main.cpp @@ -6,6 +6,11 @@ #include "nbl/asset/interchange/IImageAssetHandlerBase.h" #include "nbl/ext/FullScreenTriangle/FullScreenTriangle.h" #include "nbl/builtin/hlsl/surface_transform.h" +#include "nbl/builtin/hlsl/colorspace/encodeCIEXYZ.hlsl" +#include "app_resources/hlsl/render_common.hlsl" +#include "app_resources/hlsl/render_rwmc_common.hlsl" +#include "app_resources/hlsl/resolve_common.hlsl" +#include "app_resources/hlsl/rwmc_global_settings_common.hlsl" using namespace nbl; using namespace core; @@ -15,12 +20,6 @@ using namespace asset; using namespace ui; using namespace video; -struct PTPushConstant { - matrix4SIMD invMVP; - int sampleCount; - int depth; -}; - // TODO: Add a QueryPool for timestamping once its ready // TODO: Do buffer creation using assConv class HLSLComputePathtracer final : public examples::SimpleWindowedApplication, public application_templates::MonoAssetManagerAndBuiltinResourceApplication @@ -60,6 +59,7 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication, static inline std::array PTGLSLShaderPaths = { "app_resources/glsl/litBySphere.comp", "app_resources/glsl/litByTriangle.comp", "app_resources/glsl/litByRectangle.comp" }; static inline std::string PTHLSLShaderPath = "app_resources/hlsl/render.comp.hlsl"; static inline std::array PTHLSLShaderVariants = { "SPHERE_LIGHT", "TRIANGLE_LIGHT", "RECTANGLE_LIGHT" }; + static inline std::string ResolveShaderPath = "app_resources/hlsl/resolve.comp.hlsl"; static inline std::string PresentShaderPath = "app_resources/hlsl/present.frag.hlsl"; const char* shaderNames[E_LIGHT_GEOMETRY::ELG_COUNT] = { @@ -257,6 +257,7 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication, }; std::array descriptorSet0Bindings = {}; + std::array descriptorSet1Bindings = {}; std::array descriptorSet3Bindings = {}; std::array presentDescriptorSetBindings; @@ -268,6 +269,16 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication, .count = 1u, .immutableSamplers = nullptr }; + + descriptorSet1Bindings[0] = { + .binding = 0u, + .type = nbl::asset::IDescriptor::E_TYPE::ET_STORAGE_IMAGE, + .createFlags = ICPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, + .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, + .count = 1u, + .immutableSamplers = nullptr + }; + descriptorSet3Bindings[0] = { .binding = 0u, .type = nbl::asset::IDescriptor::E_TYPE::ET_COMBINED_IMAGE_SAMPLER, @@ -292,6 +303,7 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication, .count = 1u, .immutableSamplers = nullptr }; + presentDescriptorSetBindings[0] = { .binding = 0u, .type = nbl::asset::IDescriptor::E_TYPE::ET_COMBINED_IMAGE_SAMPLER, @@ -302,16 +314,20 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication, }; auto cpuDescriptorSetLayout0 = make_smart_refctd_ptr(descriptorSet0Bindings); + auto cpuDescriptorSetLayout1 = make_smart_refctd_ptr(descriptorSet1Bindings); auto cpuDescriptorSetLayout2 = make_smart_refctd_ptr(descriptorSet3Bindings); auto gpuDescriptorSetLayout0 = convertDSLayoutCPU2GPU(cpuDescriptorSetLayout0); + auto gpuDescriptorSetLayout1 = convertDSLayoutCPU2GPU(cpuDescriptorSetLayout1); auto gpuDescriptorSetLayout2 = convertDSLayoutCPU2GPU(cpuDescriptorSetLayout2); auto gpuPresentDescriptorSetLayout = m_device->createDescriptorSetLayout(presentDescriptorSetBindings); auto cpuDescriptorSet0 = make_smart_refctd_ptr(std::move(cpuDescriptorSetLayout0)); + auto cpuDescriptorSet1 = make_smart_refctd_ptr(std::move(cpuDescriptorSetLayout1)); auto cpuDescriptorSet2 = make_smart_refctd_ptr(std::move(cpuDescriptorSetLayout2)); m_descriptorSet0 = convertDSCPU2GPU(cpuDescriptorSet0); + m_descriptorSet1 = convertDSCPU2GPU(cpuDescriptorSet1); m_descriptorSet2 = convertDSCPU2GPU(cpuDescriptorSet2); smart_refctd_ptr presentDSPool; @@ -371,7 +387,7 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication, return shader; }; - auto loadAndCompileHLSLShader = [&](const std::string& pathToShader, const std::string& defineMacro = "", bool persistentWorkGroups = false) -> smart_refctd_ptr + auto loadAndCompileHLSLShader = [&](const std::string& pathToShader, const std::string& defineMacro = "", bool persistentWorkGroups = false, bool rwmc = false) -> smart_refctd_ptr { IAssetLoader::SAssetLoadParams lp = {}; lp.workingDirectory = localInputCWD; @@ -402,11 +418,16 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication, options.preprocessorOptions.logger = m_logger.get(); options.preprocessorOptions.includeFinder = compiler->getDefaultIncludeFinder(); - const IShaderCompiler::SMacroDefinition defines[2] = { {defineMacro, ""}, { "PERSISTENT_WORKGROUPS", "1" } }; - if (!defineMacro.empty() && persistentWorkGroups) - options.preprocessorOptions.extraDefines = { defines, defines + 2 }; - else if (!defineMacro.empty() && !persistentWorkGroups) - options.preprocessorOptions.extraDefines = { defines, defines + 1 }; + core::vector defines; + defines.reserve(3); + if (!defineMacro.empty()) + defines.push_back({ defineMacro, "" }); + if(persistentWorkGroups) + defines.push_back({ "PERSISTENT_WORKGROUPS", "1" }); + if(rwmc) + defines.push_back({ "RWMC_ENABLED", "" }); + + options.preprocessorOptions.extraDefines = defines; source = compiler->compileToSPIRV((const char*)source->getContent()->getPointer(), options); @@ -420,13 +441,27 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication, return shader; }; + auto getComputePipelineCreationParams = [](IGPUShader* shader, IGPUPipelineLayout* pipelineLayout) -> IGPUComputePipeline::SCreationParams + { + IGPUComputePipeline::SCreationParams params = {}; + params.layout = pipelineLayout; + params.shader.shader = shader; + params.shader.entryPoint = "main"; + params.shader.entries = nullptr; + params.shader.requireFullSubgroups = true; + params.shader.requiredSubgroupSize = static_cast(5); + + return params; + }; + // Create compute pipelines { - for (int index = 0; index < E_LIGHT_GEOMETRY::ELG_COUNT; index++) { + for (int index = 0; index < E_LIGHT_GEOMETRY::ELG_COUNT; index++) + { const nbl::asset::SPushConstantRange pcRange = { .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, .offset = 0, - .size = sizeof(PTPushConstant) + .size = sizeof(RenderPushConstants) }; auto ptPipelineLayout = m_device->createPipelineLayout( { &pcRange, 1 }, @@ -435,33 +470,35 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication, core::smart_refctd_ptr(gpuDescriptorSetLayout2), nullptr ); - if (!ptPipelineLayout) { + if (!ptPipelineLayout) return logFail("Failed to create Pathtracing pipeline layout"); - } + + const nbl::asset::SPushConstantRange rwmcPcRange = { + .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, + .offset = 0, + .size = sizeof(RenderRWMCPushConstants) + }; + auto rwmcPtPipelineLayout = m_device->createPipelineLayout( + { &rwmcPcRange, 1 }, + core::smart_refctd_ptr(gpuDescriptorSetLayout0), + core::smart_refctd_ptr(gpuDescriptorSetLayout1), + core::smart_refctd_ptr(gpuDescriptorSetLayout2), + nullptr + ); + if (!rwmcPtPipelineLayout) + return logFail("Failed to create RWMC Pathtracing pipeline layout"); { auto ptShader = loadAndCompileGLSLShader(PTGLSLShaderPaths[index]); + auto params = getComputePipelineCreationParams(ptShader.get(), ptPipelineLayout.get()); - IGPUComputePipeline::SCreationParams params = {}; - params.layout = ptPipelineLayout.get(); - params.shader.shader = ptShader.get(); - params.shader.entryPoint = "main"; - params.shader.entries = nullptr; - params.shader.requireFullSubgroups = true; - params.shader.requiredSubgroupSize = static_cast(5); if (!m_device->createComputePipelines(nullptr, { ¶ms, 1 }, m_PTGLSLPipelines.data() + index)) return logFail("Failed to create GLSL compute pipeline!\n"); } { auto ptShader = loadAndCompileHLSLShader(PTHLSLShaderPath, PTHLSLShaderVariants[index]); - - IGPUComputePipeline::SCreationParams params = {}; - params.layout = ptPipelineLayout.get(); - params.shader.shader = ptShader.get(); - params.shader.entryPoint = "main"; - params.shader.entries = nullptr; - params.shader.requireFullSubgroups = true; - params.shader.requiredSubgroupSize = static_cast(5); + auto params = getComputePipelineCreationParams(ptShader.get(), ptPipelineLayout.get()); + if (!m_device->createComputePipelines(nullptr, { ¶ms, 1 }, m_PTHLSLPipelines.data() + index)) return logFail("Failed to create HLSL compute pipeline!\n"); } @@ -469,31 +506,70 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication, // persistent wg pipelines { auto ptShader = loadAndCompileGLSLShader(PTGLSLShaderPaths[index], true); - - IGPUComputePipeline::SCreationParams params = {}; - params.layout = ptPipelineLayout.get(); - params.shader.shader = ptShader.get(); - params.shader.entryPoint = "main"; - params.shader.entries = nullptr; - params.shader.requireFullSubgroups = true; - params.shader.requiredSubgroupSize = static_cast(5); + auto params = getComputePipelineCreationParams(ptShader.get(), ptPipelineLayout.get()); + if (!m_device->createComputePipelines(nullptr, { ¶ms, 1 }, m_PTGLSLPersistentWGPipelines.data() + index)) return logFail("Failed to create GLSL PersistentWG compute pipeline!\n"); } { auto ptShader = loadAndCompileHLSLShader(PTHLSLShaderPath, PTHLSLShaderVariants[index], true); - - IGPUComputePipeline::SCreationParams params = {}; - params.layout = ptPipelineLayout.get(); - params.shader.shader = ptShader.get(); - params.shader.entryPoint = "main"; - params.shader.entries = nullptr; - params.shader.requireFullSubgroups = true; - params.shader.requiredSubgroupSize = static_cast(5); + auto params = getComputePipelineCreationParams(ptShader.get(), ptPipelineLayout.get()); + if (!m_device->createComputePipelines(nullptr, { ¶ms, 1 }, m_PTHLSLPersistentWGPipelines.data() + index)) return logFail("Failed to create HLSL PersistentWG compute pipeline!\n"); } + + // rwmc pipelines + { + auto ptShader = loadAndCompileHLSLShader(PTHLSLShaderPath, PTHLSLShaderVariants[index], false, true); + auto params = getComputePipelineCreationParams(ptShader.get(), rwmcPtPipelineLayout.get()); + + if (!m_device->createComputePipelines(nullptr, { ¶ms, 1 }, m_PTHLSLPipelinesRWMC.data() + index)) + return logFail("Failed to create HLSL RWMC compute pipeline!\n"); + } + { + auto ptShader = loadAndCompileHLSLShader(PTHLSLShaderPath, PTHLSLShaderVariants[index], true, true); + auto params = getComputePipelineCreationParams(ptShader.get(), rwmcPtPipelineLayout.get()); + + if (!m_device->createComputePipelines(nullptr, { ¶ms, 1 }, m_PTHLSLPersistentWGPipelinesRWMC.data() + index)) + return logFail("Failed to create HLSL RWMC PersistentWG compute pipeline!\n"); + } + } + } + + // Create resolve pipelines + { + const nbl::asset::SPushConstantRange pcRange = { + .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, + .offset = 0, + .size = sizeof(ResolvePushConstants) + }; + + auto pipelineLayout = m_device->createPipelineLayout( + { &pcRange, 1 }, + core::smart_refctd_ptr(gpuDescriptorSetLayout0), + core::smart_refctd_ptr(gpuDescriptorSetLayout1) + ); + + if (!pipelineLayout) { + return logFail("Failed to create resolve pipeline layout"); + } + + { + auto shader = loadAndCompileHLSLShader(ResolveShaderPath); + auto params = getComputePipelineCreationParams(shader.get(), pipelineLayout.get()); + + if (!m_device->createComputePipelines(nullptr, { ¶ms, 1 }, &m_resolvePipeline)) + return logFail("Failed to create HLSL resolve compute pipeline!\n"); } + { + auto shader = loadAndCompileHLSLShader(ResolveShaderPath, "", true); + auto params = getComputePipelineCreationParams(shader.get(), pipelineLayout.get()); + + if (!m_device->createComputePipelines(nullptr, { ¶ms, 1 }, &m_resolvePersistentWGPipeline)) + return logFail("Failed to create HLSL resolve compute pipeline!\n"); + } + } // Create graphics pipeline @@ -676,7 +752,7 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication, // create views for textures { - auto createHDRIImage = [this](const asset::E_FORMAT colorFormat, const uint32_t width, const uint32_t height) -> smart_refctd_ptr { + auto createHDRIImage = [this](const asset::E_FORMAT colorFormat, const uint32_t width, const uint32_t height, const bool useCascadeCreationParameters = false) -> smart_refctd_ptr { IGPUImage::SCreationParams imgInfo; imgInfo.format = colorFormat; imgInfo.type = IGPUImage::ET_2D; @@ -684,10 +760,19 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication, imgInfo.extent.height = height; imgInfo.extent.depth = 1u; imgInfo.mipLevels = 1u; - imgInfo.arrayLayers = 1u; imgInfo.samples = IGPUImage::ESCF_1_BIT; imgInfo.flags = static_cast(0u); - imgInfo.usage = asset::IImage::EUF_STORAGE_BIT | asset::IImage::EUF_TRANSFER_DST_BIT | asset::IImage::EUF_SAMPLED_BIT; + + if (!useCascadeCreationParameters) + { + imgInfo.arrayLayers = 1u; + imgInfo.usage = asset::IImage::EUF_STORAGE_BIT | asset::IImage::EUF_TRANSFER_DST_BIT | asset::IImage::EUF_SAMPLED_BIT; + } + else + { + imgInfo.arrayLayers = CascadeSize; + imgInfo.usage = asset::IImage::EUF_STORAGE_BIT; + } auto image = m_device->createImage(std::move(imgInfo)); auto imageMemReqs = image->getMemoryReqs(); @@ -696,35 +781,54 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication, return image; }; - auto createHDRIImageView = [this](smart_refctd_ptr img) -> smart_refctd_ptr + auto createHDRIImageView = [this](smart_refctd_ptr img, const bool useCascadeCreationParameters = false) -> smart_refctd_ptr { auto format = img->getCreationParameters().format; IGPUImageView::SCreationParams imgViewInfo; imgViewInfo.image = std::move(img); imgViewInfo.format = format; - imgViewInfo.viewType = IGPUImageView::ET_2D; imgViewInfo.flags = static_cast(0u); imgViewInfo.subresourceRange.aspectMask = IImage::E_ASPECT_FLAGS::EAF_COLOR_BIT; imgViewInfo.subresourceRange.baseArrayLayer = 0u; imgViewInfo.subresourceRange.baseMipLevel = 0u; - imgViewInfo.subresourceRange.layerCount = 1u; imgViewInfo.subresourceRange.levelCount = 1u; + if (!useCascadeCreationParameters) + { + imgViewInfo.subresourceRange.layerCount = 1u; + imgViewInfo.viewType = IGPUImageView::ET_2D; + } + else + { + imgViewInfo.subresourceRange.layerCount = CascadeSize; + imgViewInfo.viewType = IGPUImageView::ET_2D_ARRAY; + } + return m_device->createImageView(std::move(imgViewInfo)); }; auto params = envMap->getCreationParameters(); auto extent = params.extent; + envMap->setObjectDebugName("Env Map"); m_envMapView = createHDRIImageView(envMap); m_envMapView->setObjectDebugName("Env Map View"); + scrambleMap->setObjectDebugName("Scramble Map"); m_scrambleView = createHDRIImageView(scrambleMap); m_scrambleView->setObjectDebugName("Scramble Map View"); + auto outImg = createHDRIImage(asset::E_FORMAT::EF_R16G16B16A16_SFLOAT, WindowDimensions.x, WindowDimensions.y); outImg->setObjectDebugName("Output Image"); m_outImgView = createHDRIImageView(outImg); m_outImgView->setObjectDebugName("Output Image View"); + + auto cascade = createHDRIImage(asset::E_FORMAT::EF_R16G16B16A16_SFLOAT, WindowDimensions.x, WindowDimensions.y, true); + cascade->setObjectDebugName("Cascade"); + m_cascadeView = createHDRIImageView(cascade, true); + m_cascadeView->setObjectDebugName("Cascade View"); + + // TODO: change cascade layout to general } // create sequence buffer view @@ -855,22 +959,24 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication, }; auto sampler1 = m_device->createSampler(samplerParams1); - std::array writeDSInfos = {}; + std::array writeDSInfos = {}; writeDSInfos[0].desc = m_outImgView; writeDSInfos[0].info.image.imageLayout = IImage::LAYOUT::GENERAL; - writeDSInfos[1].desc = m_envMapView; + writeDSInfos[1].desc = m_cascadeView; + writeDSInfos[1].info.image.imageLayout = IImage::LAYOUT::GENERAL; + writeDSInfos[2].desc = m_envMapView; // ISampler::SParams samplerParams = { ISampler::ETC_CLAMP_TO_EDGE, ISampler::ETC_CLAMP_TO_EDGE, ISampler::ETC_CLAMP_TO_EDGE, ISampler::ETBC_FLOAT_OPAQUE_BLACK, ISampler::ETF_LINEAR, ISampler::ETF_LINEAR, ISampler::ESMM_LINEAR, 0u, false, ECO_ALWAYS }; - writeDSInfos[1].info.combinedImageSampler.sampler = sampler0; - writeDSInfos[1].info.combinedImageSampler.imageLayout = asset::IImage::LAYOUT::READ_ONLY_OPTIMAL; - writeDSInfos[2].desc = m_sequenceBufferView; - writeDSInfos[3].desc = m_scrambleView; + writeDSInfos[2].info.combinedImageSampler.sampler = sampler0; + writeDSInfos[2].info.combinedImageSampler.imageLayout = asset::IImage::LAYOUT::READ_ONLY_OPTIMAL; + writeDSInfos[3].desc = m_sequenceBufferView; + writeDSInfos[4].desc = m_scrambleView; // ISampler::SParams samplerParams = { ISampler::ETC_CLAMP_TO_EDGE, ISampler::ETC_CLAMP_TO_EDGE, ISampler::ETC_CLAMP_TO_EDGE, ISampler::ETBC_INT_OPAQUE_BLACK, ISampler::ETF_NEAREST, ISampler::ETF_NEAREST, ISampler::ESMM_NEAREST, 0u, false, ECO_ALWAYS }; - writeDSInfos[3].info.combinedImageSampler.sampler = sampler1; - writeDSInfos[3].info.combinedImageSampler.imageLayout = asset::IImage::LAYOUT::READ_ONLY_OPTIMAL; - writeDSInfos[4].desc = m_outImgView; - writeDSInfos[4].info.image.imageLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; + writeDSInfos[4].info.combinedImageSampler.sampler = sampler1; + writeDSInfos[4].info.combinedImageSampler.imageLayout = asset::IImage::LAYOUT::READ_ONLY_OPTIMAL; + writeDSInfos[5].desc = m_outImgView; + writeDSInfos[5].info.image.imageLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; - std::array writeDescriptorSets = {}; + std::array writeDescriptorSets = {}; writeDescriptorSets[0] = { .dstSet = m_descriptorSet0.get(), .binding = 0, @@ -879,7 +985,7 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication, .info = &writeDSInfos[0] }; writeDescriptorSets[1] = { - .dstSet = m_descriptorSet2.get(), + .dstSet = m_descriptorSet1.get(), .binding = 0, .arrayElement = 0u, .count = 1u, @@ -887,24 +993,31 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication, }; writeDescriptorSets[2] = { .dstSet = m_descriptorSet2.get(), - .binding = 1, + .binding = 0, .arrayElement = 0u, .count = 1u, .info = &writeDSInfos[2] }; writeDescriptorSets[3] = { .dstSet = m_descriptorSet2.get(), - .binding = 2, + .binding = 1, .arrayElement = 0u, .count = 1u, .info = &writeDSInfos[3] }; writeDescriptorSets[4] = { + .dstSet = m_descriptorSet2.get(), + .binding = 2, + .arrayElement = 0u, + .count = 1u, + .info = &writeDSInfos[4] + }; + writeDescriptorSets[5] = { .dstSet = m_presentDescriptorSet.get(), .binding = 0, .arrayElement = 0u, .count = 1u, - .info = &writeDSInfos[4] + .info = &writeDSInfos[5] }; m_device->updateDescriptorSets(writeDescriptorSets, {}); @@ -1000,6 +1113,13 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication, ImGui::Text("X: %f Y: %f", io.MousePos.x, io.MousePos.y); + ImGui::Text("\nRWMC settings:"); + ImGui::Checkbox("Enable RWMC", &useRWMC); + ImGui::SliderFloat("start", &rwmcCascadeStart, 1.0f, 32.0f); + ImGui::SliderFloat("base", &rwmcCascadeBase, 1.0f, 32.0f); + ImGui::SliderFloat("minReliableLuma", &rwmcMinReliableLuma, 0.1f, 32.0f); + ImGui::SliderFloat("kappa", &rwmcKappa, 0.1f, 1024.0f); + ImGui::End(); } ); @@ -1022,6 +1142,13 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication, m_oracle.reportBeginFrameRecord(); m_camera.mapKeysToWASD(); + // set initial rwmc settings + + rwmcCascadeStart = hlsl::dot(hlsl::transpose(colorspace::scRGBtoXYZ)[1], LightEminence); + rwmcCascadeBase = 8.0f; + rwmcMinReliableLuma = 1.0f; + rwmcKappa = 5.0f; + return true; } @@ -1067,7 +1194,7 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication, } const auto resourceIx = m_realFrameIx % MaxFramesInFlight; - m_api->startCapture(); + //m_api->startCapture(); // CPU events update(); @@ -1078,97 +1205,12 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication, if (!keepRunning()) return; - // render whole scene to offline frame buffer & submit - { - cmdbuf->reset(IGPUCommandBuffer::RESET_FLAGS::NONE); - // disregard surface/swapchain transformation for now - const auto viewProjectionMatrix = m_camera.getConcatenatedMatrix(); - PTPushConstant pc; - viewProjectionMatrix.getInverseTransform(pc.invMVP); - pc.sampleCount = spp; - pc.depth = depth; - - // safe to proceed - // upload buffer data - cmdbuf->beginDebugMarker("ComputeShaderPathtracer IMGUI Frame"); - cmdbuf->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); - - // TRANSITION m_outImgView to GENERAL (because of descriptorSets0 -> ComputeShader Writes into the image) - { - const IGPUCommandBuffer::SImageMemoryBarrier imgBarriers[] = { - { - .barrier = { - .dep = { - .srcStageMask = PIPELINE_STAGE_FLAGS::ALL_TRANSFER_BITS, - .srcAccessMask = ACCESS_FLAGS::TRANSFER_WRITE_BIT, - .dstStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, - .dstAccessMask = ACCESS_FLAGS::SHADER_WRITE_BITS - } - }, - .image = m_outImgView->getCreationParameters().image.get(), - .subresourceRange = { - .aspectMask = IImage::EAF_COLOR_BIT, - .baseMipLevel = 0u, - .levelCount = 1u, - .baseArrayLayer = 0u, - .layerCount = 1u - }, - .oldLayout = IImage::LAYOUT::UNDEFINED, - .newLayout = IImage::LAYOUT::GENERAL - } - }; - cmdbuf->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = imgBarriers }); - } - - // cube envmap handle - { - IGPUComputePipeline* pipeline; - if (usePersistentWorkGroups) - pipeline = renderMode == E_RENDER_MODE::ERM_HLSL ? m_PTHLSLPersistentWGPipelines[PTPipeline].get() : m_PTGLSLPersistentWGPipelines[PTPipeline].get(); - else - pipeline = renderMode == E_RENDER_MODE::ERM_HLSL ? m_PTHLSLPipelines[PTPipeline].get() : m_PTGLSLPipelines[PTPipeline].get(); - cmdbuf->bindComputePipeline(pipeline); - cmdbuf->bindDescriptorSets(EPBP_COMPUTE, pipeline->getLayout(), 0u, 1u, &m_descriptorSet0.get()); - cmdbuf->bindDescriptorSets(EPBP_COMPUTE, pipeline->getLayout(), 2u, 1u, &m_descriptorSet2.get()); - cmdbuf->pushConstants(pipeline->getLayout(), IShader::E_SHADER_STAGE::ESS_COMPUTE, 0, sizeof(PTPushConstant), &pc); - if (usePersistentWorkGroups) - { - uint32_t dispatchSize = m_physicalDevice->getLimits().computeOptimalPersistentWorkgroupDispatchSize(WindowDimensions.x * WindowDimensions.y, DefaultWorkGroupSize); - cmdbuf->dispatch(dispatchSize, 1u, 1u); - } - else - cmdbuf->dispatch(1 + (WindowDimensions.x * WindowDimensions.y - 1) / DefaultWorkGroupSize, 1u, 1u); - } - - // TRANSITION m_outImgView to READ (because of descriptorSets0 -> ComputeShader Writes into the image) - { - const IGPUCommandBuffer::SImageMemoryBarrier imgBarriers[] = { - { - .barrier = { - .dep = { - .srcStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, - .srcAccessMask = ACCESS_FLAGS::SHADER_WRITE_BITS, - .dstStageMask = PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT, - .dstAccessMask = ACCESS_FLAGS::SHADER_READ_BITS - } - }, - .image = m_outImgView->getCreationParameters().image.get(), - .subresourceRange = { - .aspectMask = IImage::EAF_COLOR_BIT, - .baseMipLevel = 0u, - .levelCount = 1u, - .baseArrayLayer = 0u, - .layerCount = 1u - }, - .oldLayout = IImage::LAYOUT::GENERAL, - .newLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL - } - }; - cmdbuf->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = imgBarriers }); - } + if (useRWMC) + beginCommandBufferAndDispatchPathracerPipelineUseRWMC(cmdbuf); + else + beginCommandBufferAndDispatchPathracerPipeline(cmdbuf); - // TODO: tone mapping and stuff - } + // TODO: tone mapping and stuff asset::SViewport viewport; { @@ -1262,7 +1304,7 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication, m_window->setCaption("[Nabla Engine] HLSL Compute Path Tracer"); m_surface->present(m_currentImageAcquire.imageIndex, rendered); } - m_api->endCapture(); + //m_api->endCapture(); } inline bool keepRunning() override @@ -1360,6 +1402,260 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication, m_ui.manager->update(params); } + + private: + void beginCommandBufferAndDispatchPathracerPipeline(IGPUCommandBuffer* cmdbuf) + { + cmdbuf->reset(IGPUCommandBuffer::RESET_FLAGS::NONE); + // disregard surface/swapchain transformation for now + const auto viewProjectionMatrix = m_camera.getConcatenatedMatrix(); + viewProjectionMatrix.getInverseTransform(pc.invMVP); + pc.sampleCount = spp; + pc.depth = depth; + + // safe to proceed + // upload buffer data + cmdbuf->beginDebugMarker("ComputeShaderPathtracer IMGUI Frame"); + cmdbuf->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); + + // TRANSITION m_outImgView to GENERAL (because of descriptorSets0 -> ComputeShader Writes into the image) + { + const IGPUCommandBuffer::SImageMemoryBarrier imgBarriers[] = { + { + .barrier = { + .dep = { + .srcStageMask = PIPELINE_STAGE_FLAGS::ALL_TRANSFER_BITS, + .srcAccessMask = ACCESS_FLAGS::TRANSFER_WRITE_BIT, + .dstStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, + .dstAccessMask = ACCESS_FLAGS::SHADER_WRITE_BITS + } + }, + .image = m_outImgView->getCreationParameters().image.get(), + .subresourceRange = { + .aspectMask = IImage::EAF_COLOR_BIT, + .baseMipLevel = 0u, + .levelCount = 1u, + .baseArrayLayer = 0u, + .layerCount = 1u + }, + .oldLayout = IImage::LAYOUT::UNDEFINED, + .newLayout = IImage::LAYOUT::GENERAL + } + }; + cmdbuf->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = imgBarriers }); + } + + // cube envmap handle + { + IGPUComputePipeline* pipeline; + if (usePersistentWorkGroups) + pipeline = renderMode == E_RENDER_MODE::ERM_HLSL ? m_PTHLSLPersistentWGPipelines[PTPipeline].get() : m_PTGLSLPersistentWGPipelines[PTPipeline].get(); + else + pipeline = renderMode == E_RENDER_MODE::ERM_HLSL ? m_PTHLSLPipelines[PTPipeline].get() : m_PTGLSLPipelines[PTPipeline].get(); + cmdbuf->bindComputePipeline(pipeline); + cmdbuf->bindDescriptorSets(EPBP_COMPUTE, pipeline->getLayout(), 0u, 1u, &m_descriptorSet0.get()); + cmdbuf->bindDescriptorSets(EPBP_COMPUTE, pipeline->getLayout(), 2u, 1u, &m_descriptorSet2.get()); + cmdbuf->pushConstants(pipeline->getLayout(), IShader::E_SHADER_STAGE::ESS_COMPUTE, 0, sizeof(RenderPushConstants), &pc); + + // TODO: shouldn't it be computed only at initialization stage and on window resize? + const uint32_t dispatchSize = usePersistentWorkGroups ? + m_physicalDevice->getLimits().computeOptimalPersistentWorkgroupDispatchSize(WindowDimensions.x * WindowDimensions.y, DefaultWorkGroupSize) : + 1 + (WindowDimensions.x * WindowDimensions.y - 1) / DefaultWorkGroupSize; + + cmdbuf->dispatch(dispatchSize, 1u, 1u); + } + + // TRANSITION m_outImgView to READ (because of descriptorSets0 -> ComputeShader Writes into the image) + { + const IGPUCommandBuffer::SImageMemoryBarrier imgBarriers[] = { + { + .barrier = { + .dep = { + .srcStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, + .srcAccessMask = ACCESS_FLAGS::SHADER_WRITE_BITS, + .dstStageMask = PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT, + .dstAccessMask = ACCESS_FLAGS::SHADER_READ_BITS + } + }, + .image = m_outImgView->getCreationParameters().image.get(), + .subresourceRange = { + .aspectMask = IImage::EAF_COLOR_BIT, + .baseMipLevel = 0u, + .levelCount = 1u, + .baseArrayLayer = 0u, + .layerCount = 1u + }, + .oldLayout = IImage::LAYOUT::GENERAL, + .newLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL + } + }; + cmdbuf->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = imgBarriers }); + } + + + } + + void beginCommandBufferAndDispatchPathracerPipelineUseRWMC(IGPUCommandBuffer* cmdbuf) + { + if (renderMode != E_RENDER_MODE::ERM_HLSL) + { + m_logger->log("Only HLSL render mode is supported.", ILogger::ELL_ERROR); + std::exit(-1); + } + + cmdbuf->reset(IGPUCommandBuffer::RESET_FLAGS::NONE); + // disregard surface/swapchain transformation for now + const auto viewProjectionMatrix = m_camera.getConcatenatedMatrix(); + viewProjectionMatrix.getInverseTransform(rwmcPushConstants.invMVP); + + rwmcPushConstants.start = rwmcCascadeStart; + rwmcPushConstants.depth = depth; + rwmcPushConstants.sampleCount = resolvePushConstants.sampleCount = spp; + rwmcPushConstants.base = resolvePushConstants.base = rwmcCascadeBase; + resolvePushConstants.minReliableLuma = rwmcMinReliableLuma; + rwmcPushConstants.kappa = resolvePushConstants.kappa = rwmcKappa; + + // safe to proceed + // upload buffer data + cmdbuf->beginDebugMarker("ComputeShaderPathtracer IMGUI Frame"); + cmdbuf->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); + + // TRANSITION m_outImgView to GENERAL (because of descriptorSets0 -> ComputeShader Writes into the image) + { + const IGPUCommandBuffer::SImageMemoryBarrier imgBarriers[] = { + { + .barrier = { + .dep = { + .srcStageMask = PIPELINE_STAGE_FLAGS::ALL_TRANSFER_BITS, + .srcAccessMask = ACCESS_FLAGS::TRANSFER_WRITE_BIT, + .dstStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, + .dstAccessMask = ACCESS_FLAGS::SHADER_WRITE_BITS + } + }, + .image = m_outImgView->getCreationParameters().image.get(), + .subresourceRange = { + .aspectMask = IImage::EAF_COLOR_BIT, + .baseMipLevel = 0u, + .levelCount = 1u, + .baseArrayLayer = 0u, + .layerCount = 1u + }, + .oldLayout = IImage::LAYOUT::UNDEFINED, + .newLayout = IImage::LAYOUT::GENERAL + } + }; + cmdbuf->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = imgBarriers }); + } + + // transit m_cascadeView layout to GENERAL, block until previous shader is done with reading from cascade + { + const IGPUCommandBuffer::SImageMemoryBarrier cascadeBarrier[] = { + { + .barrier = { + .dep = { + .srcStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, + .srcAccessMask = ACCESS_FLAGS::NONE, + .dstStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, + .dstAccessMask = ACCESS_FLAGS::NONE + } + }, + .image = m_cascadeView->getCreationParameters().image.get(), + .subresourceRange = { + .aspectMask = IImage::EAF_COLOR_BIT, + .baseMipLevel = 0u, + .levelCount = 1u, + .baseArrayLayer = 0u, + .layerCount = CascadeSize + }, + .oldLayout = IImage::LAYOUT::UNDEFINED, + .newLayout = IImage::LAYOUT::GENERAL + } + }; + cmdbuf->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = cascadeBarrier }); + } + + // TODO: shouldn't it be computed only at initialization stage and on window resize? + const uint32_t dispatchSize = usePersistentWorkGroups ? + m_physicalDevice->getLimits().computeOptimalPersistentWorkgroupDispatchSize(WindowDimensions.x * WindowDimensions.y, DefaultWorkGroupSize) : + 1 + (WindowDimensions.x * WindowDimensions.y - 1) / DefaultWorkGroupSize; + + { + IGPUComputePipeline* pipeline = usePersistentWorkGroups ? m_PTHLSLPersistentWGPipelinesRWMC[PTPipeline].get() : m_PTHLSLPipelinesRWMC[PTPipeline].get(); + + cmdbuf->bindComputePipeline(pipeline); + cmdbuf->bindDescriptorSets(EPBP_COMPUTE, pipeline->getLayout(), 0u, 1u, &m_descriptorSet0.get()); + cmdbuf->bindDescriptorSets(EPBP_COMPUTE, pipeline->getLayout(), 1u, 1u, &m_descriptorSet1.get()); + cmdbuf->bindDescriptorSets(EPBP_COMPUTE, pipeline->getLayout(), 2u, 1u, &m_descriptorSet2.get()); + cmdbuf->pushConstants(pipeline->getLayout(), IShader::E_SHADER_STAGE::ESS_COMPUTE, 0, sizeof(RenderRWMCPushConstants), &rwmcPushConstants); + + cmdbuf->dispatch(dispatchSize, 1u, 1u); + } + + // m_cascadeView synchronization - wait for previous compute shader to write into the cascade + // TODO: create this and every other barrier once outside of the loop? + { + const IGPUCommandBuffer::SImageMemoryBarrier cascadeBarrier[] = { + { + .barrier = { + .dep = { + .srcStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, + .srcAccessMask = ACCESS_FLAGS::SHADER_WRITE_BITS, + .dstStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, + .dstAccessMask = ACCESS_FLAGS::SHADER_READ_BITS + } + }, + .image = m_cascadeView->getCreationParameters().image.get(), + .subresourceRange = { + .aspectMask = IImage::EAF_COLOR_BIT, + .baseMipLevel = 0u, + .levelCount = 1u, + .baseArrayLayer = 0u, + .layerCount = CascadeSize + } + } + }; + cmdbuf->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = cascadeBarrier }); + } + + // reweighting + { + IGPUComputePipeline* pipeline = usePersistentWorkGroups ? m_resolvePersistentWGPipeline.get() : m_resolvePipeline.get(); + + cmdbuf->bindComputePipeline(pipeline); + cmdbuf->bindDescriptorSets(EPBP_COMPUTE, pipeline->getLayout(), 0u, 1u, &m_descriptorSet0.get()); + cmdbuf->bindDescriptorSets(EPBP_COMPUTE, pipeline->getLayout(), 1u, 1u, &m_descriptorSet1.get()); + cmdbuf->pushConstants(pipeline->getLayout(), IShader::E_SHADER_STAGE::ESS_COMPUTE, 0, sizeof(ResolvePushConstants), &resolvePushConstants); + + cmdbuf->dispatch(dispatchSize, 1u, 1u); + } + + // TRANSITION m_outImgView to READ (because of descriptorSets0 -> ComputeShader Writes into the image) + { + const IGPUCommandBuffer::SImageMemoryBarrier imgBarriers[] = { + { + .barrier = { + .dep = { + .srcStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, + .srcAccessMask = ACCESS_FLAGS::SHADER_WRITE_BITS, + .dstStageMask = PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT, + .dstAccessMask = ACCESS_FLAGS::SHADER_READ_BITS + } + }, + .image = m_outImgView->getCreationParameters().image.get(), + .subresourceRange = { + .aspectMask = IImage::EAF_COLOR_BIT, + .baseMipLevel = 0u, + .levelCount = 1u, + .baseArrayLayer = 0u, + .layerCount = 1u + }, + .oldLayout = IImage::LAYOUT::GENERAL, + .newLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL + } + }; + cmdbuf->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = imgBarriers }); + } + } private: smart_refctd_ptr m_window; @@ -1371,11 +1667,15 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication, std::array, E_LIGHT_GEOMETRY::ELG_COUNT> m_PTHLSLPipelines; std::array, E_LIGHT_GEOMETRY::ELG_COUNT> m_PTGLSLPersistentWGPipelines; std::array, E_LIGHT_GEOMETRY::ELG_COUNT> m_PTHLSLPersistentWGPipelines; + std::array, E_LIGHT_GEOMETRY::ELG_COUNT> m_PTHLSLPipelinesRWMC; + std::array, E_LIGHT_GEOMETRY::ELG_COUNT> m_PTHLSLPersistentWGPipelinesRWMC; + smart_refctd_ptr m_resolvePipeline; + smart_refctd_ptr m_resolvePersistentWGPipeline; smart_refctd_ptr m_presentPipeline; uint64_t m_realFrameIx = 0; std::array, MaxFramesInFlight> m_cmdBufs; ISimpleManagedSurface::SAcquireResult m_currentImageAcquire = {}; - smart_refctd_ptr m_descriptorSet0, m_descriptorSet2, m_presentDescriptorSet; + smart_refctd_ptr m_descriptorSet0, m_descriptorSet1, m_descriptorSet2, m_presentDescriptorSet; core::smart_refctd_ptr m_guiDescriptorSetPool; @@ -1388,6 +1688,7 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication, smart_refctd_ptr m_envMapView, m_scrambleView; smart_refctd_ptr m_sequenceBufferView; smart_refctd_ptr m_outImgView; + smart_refctd_ptr m_cascadeView; // sync smart_refctd_ptr m_semaphore; @@ -1422,7 +1723,15 @@ class HLSLComputePathtracer final : public examples::SimpleWindowedApplication, int renderMode = E_RENDER_MODE::ERM_HLSL; int spp = 32; int depth = 3; + float rwmcCascadeStart; + float rwmcCascadeBase; + float rwmcMinReliableLuma; + float rwmcKappa; bool usePersistentWorkGroups = false; + bool useRWMC = false; + RenderRWMCPushConstants rwmcPushConstants; + RenderPushConstants pc; + ResolvePushConstants resolvePushConstants; bool m_firstFrame = true; IGPUCommandBuffer::SClearColorValue clearColor = { .float32 = {0.f,0.f,0.f,1.f} };