Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
117 changes: 117 additions & 0 deletions include/boost/random/multivariate_normal_distribution.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
/*
* Copyright Nick Thompson, 2024
* Use, modification and distribution are subject to the
* Boost Software License, Version 1.0. (See accompanying file
* LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
*/
#ifndef BOOST_RANDOM_MULTIVARIATE_NORMAL_HPP
#define BOOST_RANDOM_MULTIVARIATE_NORMAL_HPP
#include <algorithm>
#include <cmath>
#include <limits>
#include <sstream>
#include <stdexcept>
#include <random>
#if __has_include(<Eigen/Dense>)
#include <Eigen/Dense>
#else
#error "The Eigen library is required for the operation of this class"
#endif

namespace boost::random {

// This is super useful functionality, but nonetheless it must be shunted off into a dark corner of the library
// because even today there is no standard matrix class and no standard was to do a Cholesky decomposition.
// Hence a more public place in the library just puts users in dependency hell.
template<class RandomAccessContainer>
class multivariate_normal_distribution {
public:
using Real = typename RandomAccessContainer::value_type;
multivariate_normal_distribution(RandomAccessContainer const & mean, Eigen::Matrix<Real, Eigen::Dynamic, Eigen::Dynamic> const & covariance_matrix) : m_{mean} {
using std::sqrt;
if (covariance_matrix.rows() != covariance_matrix.cols()) {
std::ostringstream oss;
oss << __FILE__ << ":" << __LINE__ << ":" << __func__;
oss << ": The covariance matrix must be square, but received a (" << covariance_matrix.rows() << ", " << covariance_matrix.cols() << ") matrix.";
throw std::domain_error(oss.str());
}
if (mean.size() != covariance_matrix.cols()) {
std::ostringstream oss;
oss << __FILE__ << ":" << __LINE__ << ":" << __func__;
oss << ": The mean has dimension " << mean.size() << " but the covariance matrix has " << covariance_matrix.cols() << " columns.";
throw std::domain_error(oss.str());
}
ldlt_.compute(covariance_matrix);
if(ldlt_.info() != Eigen::Success) {
std::ostringstream oss;
oss << __FILE__ << ":" << __LINE__ << ":" << __func__;
if (ldlt_.info() == Eigen::NumericalIssue) {
oss << ": The covariance matrix is not positive definite. We probably need to use Eigen::LDLT instead.";
throw std::domain_error(oss.str());
} else if (ldlt_.info() == Eigen::InvalidInput) {
oss << ": Invalid input detected from Eigen.";
throw std::domain_error(oss.str());
} else if (ldlt_.info() == Eigen::NoConvergence) {
oss << ": Iterative procedure did not converge.";
throw std::domain_error(oss.str());
}
}
if (!ldlt_.isPositive()) {
std::ostringstream oss;
oss << __FILE__ << ":" << __LINE__ << ":" << __func__;
oss << ": The covariance matrix provided is not positive semi-definite.";
throw std::domain_error(oss.str());
}
}

template<class URNG>
RandomAccessContainer operator()(URNG& g) const {
RandomAccessContainer x;
if constexpr (detail::has_resize_v<RandomAccessContainer>) {
x.resize(m_.size());
}
(*this)(x, g);
return x;
}

template<class URNG>
void operator()(RandomAccessContainer& x, URNG& g) const {
using std::normal_distribution;
if (x.size() != m_.size()) {
std::ostringstream oss;
oss << __FILE__ << ":" << __LINE__ << ":" << __func__;
oss << ": Must provide a vector of the same length as the mean.";
throw std::domain_error(oss.str());
}

auto dis = normal_distribution<Real>(0, 1);
/// First generate standard normal random vector:
Eigen::Vector<Real, Eigen::Dynamic> u;
u.resize(x.size());
for (size_t i = 0; i < x.size(); ++i) {
u[i] = dis(g);
}
// Transform it with the LDLT decomposition:
// This means: u->PLD^{1/2}u:
auto const & D = ldlt_.vectorD();
for (size_t i = 0; i < u.size(); ++i) {
u[i] = sqrt(D[i])*u[i];
}
// Now apply L:
u = ldlt_.matrixL()*u;
// And the permutation:
u = ldlt_.transpositionsP()*u;
for (size_t i = 0; i < x.size(); ++i) {
x[i] = u[i] + m_[i];
}
}



private:
RandomAccessContainer m_;
Eigen::LDLT<Eigen::Matrix<Real, Eigen::Dynamic, Eigen::Dynamic> > ldlt_;
};

} // namespace boost::random
#endif
145 changes: 145 additions & 0 deletions test/multivariate_normal_test.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
/*
* Copyright Nick Thompson, 2024
* Use, modification and distribution are subject to the
* Boost Software License, Version 1.0. (See accompanying file
* LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
*/

#include "math_unit_test.hpp"
#include <boost/random/multivariate_normal_distribution.hpp>
#include <array>
#include <algorithm>
#include <cmath>
#include <random>
#include <limits>

using std::abs;
using boost::random::multivariate_normal_distribution;

template <class Real> void test_multivariate_normal() {
using Eigen::Matrix;
using Eigen::Dynamic;
constexpr const size_t n = 7;
Matrix<Real, Dynamic, Dynamic> C = Matrix<Real, Dynamic, Dynamic>::Identity(n, n);
std::mt19937_64 mt(12345);
std::array<Real, n> mean;
std::uniform_real_distribution<Real> dis(-1, 1);
std::generate(mean.begin(), mean.end(), [&]() { return dis(mt); });
auto mvn = multivariate_normal_distribution<decltype(mean)>(mean, C);
std::array<Real, n> x;
std::array<Real, n> empirical_means;
empirical_means.fill(0);

size_t i = 0;
size_t samples = 2048;
do {
x = mvn(mt);
for (size_t j = 0; j < n; ++j) {
empirical_means[j] += x[j];
}
} while(i++ < samples);

for (size_t j = 0; j < n; ++j) {
empirical_means[j] /= samples;
CHECK_ABSOLUTE_ERROR(mean[j], empirical_means[j], 0.05);
}

// Exhibits why we need to use the LDL^T decomposition:
C = Matrix<Real, Dynamic, Dynamic>::Zero(n, n);
mvn = multivariate_normal_distribution<decltype(mean)>(mean, C);
i = 0;
do {
x = mvn(mt);
for (size_t j = 0; j < n; ++j) {
CHECK_EQUAL(mean[j], x[j]);
}
} while(i++ < 10);
// Test that we're applying the permutation matrix correctly:
C = Matrix<Real, Dynamic, Dynamic>::Zero(n, n);
C(0,0) = 1;
mvn = multivariate_normal_distribution<decltype(mean)>(mean, C);
i = 0;
do {
x = mvn(mt);
for (size_t j = 1; j < mean.size(); ++j) {
CHECK_EQUAL(mean[j], x[j]);
}
} while(i++ < 3);

C(0,0) = 0;
C(n-1,n-1) = 1;
mvn = multivariate_normal_distribution<decltype(mean)>(mean, C);
i = 0;
do {
x = mvn(mt);
// All but the last entry must be identical to the mean:
for (size_t j = 0; j < mean.size() - 1; ++j) {
CHECK_EQUAL(mean[j], x[j]);
}
} while(i++ < 3);

C(0,0) = 0;
C(1,1) = 1;
C(n-1,n-1) = 0;
mvn = multivariate_normal_distribution<decltype(mean)>(mean, C);
i = 0;
do {
x = mvn(mt);
for (size_t j = 0; j < mean.size() - 1; ++j) {
if (j != 1) {
CHECK_EQUAL(mean[j], x[j]);
}
}
} while(i++ < 10);

C(1,1) = 0;
C(n-2,n-2) = 1;
mvn = multivariate_normal_distribution<decltype(mean)>(mean, C);
i = 0;
do {
x = mvn(mt);
for (size_t j = 0; j < mean.size() - 1; ++j) {
if (j != n-2) {
CHECK_EQUAL(mean[j], x[j]);
}
}
} while(i++ < 3);

// Scaling test: If C->kC for some constant k, then A->sqrt(k)A.
// First we build a random positive semidefinite matrix:
Matrix<Real, Dynamic, Dynamic> C1 = Matrix<Real, Dynamic, Dynamic>::Random(n, n);
C = C1.transpose()*C1;
// Set the mean to 0:
for (auto & m : mean) {
m = 0;
}
samples = 1;
std::vector<std::array<Real, n>> x1(samples);
mt.seed(12859);
mvn = multivariate_normal_distribution<decltype(mean)>(mean, C);
for (size_t i = 0; i < x1.size(); ++i) {
x1[i] = mvn(mt);
}
// Now scale C:
C *= 16;
// Set the seed back to the original:
mt.seed(12859);
std::vector<std::array<Real, n>> x2(samples);
mvn = multivariate_normal_distribution<decltype(mean)>(mean, C);
for (size_t i = 0; i < x2.size(); ++i) {
x2[i] = mvn(mt);
}
// Now x2 = 4*x1 is expected:
for (size_t i = 0; i < x1.size(); ++i) {
for (size_t j = 0; j < n; ++j) {
CHECK_ULP_CLOSE(4*x1[i][j], x2[i][j], 2);
}
}

}

int main() {
test_multivariate_normal<float>();
test_multivariate_normal<double>();
return boost::math::test::report_errors();
}