Probability Distributions and Markov Chain Monte Carlo

O2scl

MCMC Contents

Probability distributions

Probability distributions are provided by the C++ standard library, but some useful functionality is not included and multidimensional and conditional distributions are not provided. For the time being, some experimental probability distributions are included in O₂scl.

The one-dimensional probability distributions are children of prob_dens_func and are modeled similar to the C++ standard distributions:

Multi-dimensional distributions are children of prob_dens_mdim, including

Conditional probability distributions are children of prob_cond_mdim. These classes can be used as proposal distributions for the O₂scl MCMC classes.

Markov chain Monte Carlo (MCMC)

The class mcmc_para_base performs generic MCMC simulations and the child class mcmc_para_table performs MCMC simulations and stores the results in a table_units object. These classes contain support for OpenMP and MPI (or both). The class mcmc_para_cli is a specialized class which automatically handles the command-line interface when using MCMC.

MCMC example

The first example shows an MCMC of a bimodal distribution,

f(x)=exp(x2)[sin(x7/5)+1]

with a naive random walk step. It uses that MCMC to compute the value

x2=(55x2f(x))(55f(x))1

and compares that value with the exact result obtained by quadrature.

/* Example: ex_mcmc.cpp
   ───────────────────────────────────────────────────────────────────
   An example which demonstrates the generation of an arbitrary
   distribution through Markov chain Monte Carlo. See "License 
   Information" section of the documentation for license information.
*/
#include <o2scl/mcmc_para.h>
#include <o2scl/vec_stats.h>
#include <o2scl/test_mgr.h>
#include <o2scl/hdf_io.h>
#include <o2scl/inte_qag_gsl.h>

using namespace std;
using namespace o2scl;
using namespace o2scl_hdf;

/// Convenient typedefs

typedef boost::numeric::ublas::vector<double> ubvector;

typedef boost::numeric::ublas::matrix<double> ubmatrix;

typedef std::function<int(size_t,const ubvector &,double &,
			  std::vector<double> &)> point_funct;

typedef std::function<int(const ubvector &,double,std::vector<double> &,
			  std::vector<double> &)> fill_funct;
/// The MCMC object
mcmc_para_table<point_funct,fill_funct,std::vector<double>,ubvector> mct;

/** \brief A demonstration class for the MCMC example. This example
    could have been written with global functions, but we put them
    in a class to show how it would work in that case.
 */
class exc {

public:
  
  /** \brief A one-dimensional bimodal distribution

      Here, the variable 'log_weight' stores the natural logarithm of
      the objective function based on the parameters stored in \c pars.
      The object 'dat' stores any auxillary quantities which can be
      computed at every point in parameter space.
  */
  int bimodal(size_t nv, const ubvector &pars, double &log_weight,
	      std::vector<double> &dat) {
    
    double x=pars[0];
    log_weight=log(exp(-x*x)*(sin(x-1.4)+1.0));
    dat[0]=x*x;
    return 0;
  }

  /** \brief Add auxillary quantities to 'line' so they can be
      stored in the table
  */
  int fill_line(const ubvector &pars, double log_weight, 
		std::vector<double> &line, std::vector<double> &dat) {
    line.push_back(dat[0]);
    return 0;
  }

  exc() {
  }
  
};

int main(int argc, char *argv[]) {
  
  cout.setf(ios::scientific);
  
  exc e;
  
  test_mgr tm;
  tm.set_output_level(1);

  // Parameter limits and initial points

  ubvector low_bimodal(1), high_bimodal(1);
  low_bimodal[0]=-5.0;
  high_bimodal[0]=5.0;

  // Function objects for the MCMC object
  point_funct bimodal_func=std::bind
    (std::mem_fn<int(size_t,const ubvector &,double &,
		     std::vector<double> &)>(&exc::bimodal),&e,
     std::placeholders::_1,std::placeholders::_2,std::placeholders::_3,
     std::placeholders::_4);
  fill_funct fill_func=std::bind
    (std::mem_fn<int(const ubvector &,double,std::vector<double> &,
		     std::vector<double> &)>(&exc::fill_line),&e,
     std::placeholders::_1,std::placeholders::_2,std::placeholders::_3,
     std::placeholders::_4);

  // Create function object vectors
  vector<point_funct> bimodal_vec;
  bimodal_vec.push_back(bimodal_func);
  vector<fill_funct> fill_vec;
  fill_vec.push_back(fill_func);

  // Create and allocate data objects
  vector<std::vector<double> > data_vec(2);
  data_vec[0].resize(1);
  data_vec[1].resize(1);

  // Compute exact value of <x^2>. The function format is a bit
  // different so we use lambda expressions to construct the
  // functions for the integrators. 
  inte_qag_gsl<> iqg;
  funct f=[e,data_vec](double x) mutable -> double {
    ubvector u(1); double lw; u[0]=x; 
    e.bimodal(1,u,lw,data_vec[0]); return exp(lw); };
  funct fx2=[e,data_vec](double x) mutable -> double {
    ubvector u(1); double lw; u[0]=x; 
    e.bimodal(1,u,lw,data_vec[0]); return data_vec[0][0]*exp(lw); };
  cout << iqg.integ(fx2,low_bimodal[0],high_bimodal[0]) << " "
       << iqg.integ(f,low_bimodal[0],high_bimodal[0]) << endl;
  double exact=iqg.integ(fx2,low_bimodal[0],high_bimodal[0])/
    iqg.integ(f,low_bimodal[0],high_bimodal[0]);
  cout << "exact: " << exact << endl;
  
  cout << "──────────────────────────────────────────────────────────"
       << endl;
  cout << "Plain MCMC example with a bimodal distribution:" << endl;
    
  // Set parameter names and units
  vector<string> pnames={"x","x2"};
  vector<string> punits={"",""};
  mct.set_names_units(pnames,punits);

  // Set MCMC parameters
  mct.def_stepper->step_fac.resize(1);
  mct.def_stepper->step_fac[0]=2.0;
  mct.max_iters=20000;
  mct.prefix="ex_mcmc";
  mct.table_prealloc=mct.max_iters/3;
  
  // Perform MCMC
  mct.mcmc_fill(1,low_bimodal,high_bimodal,bimodal_vec,fill_vec,
                data_vec);

  // Output acceptance and rejection rate
  cout << "n_accept, n_reject: " << mct.n_accept[0] << " "
       << mct.n_reject[0] << endl;

  // Get the MCMC results
  shared_ptr<table_units<> > t=mct.get_table();

  // Remove empty rows from table
  t->delete_rows_func("mult<0.5");
  
  // Compute the autocorrelation length
  std::vector<double> ac, ftom;
  o2scl::vector_autocorr_vector_mult(t->get_nlines(),
				     (*t)["x2"],(*t)["mult"],ac);
  size_t ac_len=o2scl::vector_autocorr_tau(ac,ftom);

  // Create a separate table of statistically independent samples
  table_units<> indep;
  copy_table_thin_mcmc(ac_len,*t,indep,"mult");
  
  cout << "Autocorrelation length, effective sample size: "
       << ac_len << " " << indep.get_nlines() << endl;

  // Write the data to a file
  hdf_file hf;
  hf.open_or_create("ex_mcmc.o2");
  hdf_output(hf,*t,"mcmc");
  hdf_output(hf,indep,"indep");
  hf.close();

  // Use the independent samples to compute the final integral and
  // compare to the exact result. Note that we must specify the
  // number of elements in the vector, indep["x2"], because the
  // table_units object often has space at the end to add extra rows.
  
  double avg=vector_mean(indep.get_nlines(),indep["x2"]);
  double std=vector_stddev(indep.get_nlines(),indep["x2"]);
  tm.test_rel(avg,exact,10.0*std/sqrt(indep.get_nlines()),"ex_mcmc");
  cout << "avg. from MCMC, exact avg., diff., std. from MCMC, "
       << "unc. in mean times 10:\n  "
       << avg << " " << exact << " " << fabs(avg-exact) << " "
       << std << " " << 10.0*std/sqrt(indep.get_nlines()) << endl;
  
  tm.report();
  
  return 0;
}

The image plots the posterior distribution from this first MCMC as well as a second MCMC using a proposal distribution generated from a kernel density estimation (from the kde_python class) of the data generated from the first MCMC. This simulation is about five times more efficient, and thus gives a more accurate result for the integral.

MCMC simulations of exp(-x*x)*(sin(x-1.4)+1.0)

Below is the code for the second MCMC, which uses O₂scl C++ interface to SciPy’s KDE algorithm.

/* Example: ex_mcmc_kde.cpp
   ───────────────────────────────────────────────────────────────────
   An example which demonstrates the generation of an arbitrary
   distribution through Markov chain Monte Carlo using a conditional
   probability distribution built using a KDE applied to previous
   data. See "License Information" section of the documentation for
   license information.
*/
#include <o2scl/mcmc_para.h>
#include <o2scl/vec_stats.h>
#include <o2scl/test_mgr.h>
#include <o2scl/hdf_io.h>
#include <o2scl/inte_qag_gsl.h>
#include <o2scl/set_openmp.h>
#include <o2scl/kde_python.h>

using namespace std;
using namespace o2scl;
using namespace o2scl_hdf;

/// Convenient typedefs

typedef boost::numeric::ublas::vector<double> ubvector;

typedef boost::numeric::ublas::matrix<double> ubmatrix;

typedef std::function<int(size_t,const ubvector &,double &,
			  std::vector<double> &)> point_funct;

typedef std::function<int(const ubvector &,double,std::vector<double> &,
			  std::vector<double> &)> fill_funct;

class mcmc_stepper_mh_record :
  public mcmc_stepper_mh<point_funct,std::vector<double>,ubvector,
                         ubmatrix,prob_cond_mdim_indep<>> {
  
  public:

  std::vector<double> vw_next, vq_next;

  virtual ~mcmc_stepper_mh_record() {
  }
  
  virtual void step(size_t i_thread, size_t n_params, point_funct &f,
                    ubvector &current, ubvector &next, double w_current,
                    double &w_next, ubvector &low, ubvector &high,
                    int &func_ret, bool &accept, std::vector<double> &dat,
                    rng<> &r, int verbose) {
    
    // Use proposal distribution and compute associated weight
    double q_prop=proposal[i_thread % proposal.size()].log_metrop_hast
      (current,next);
    
    accept=false;
    
    func_ret=success;
    this->check_bounds(i_thread,n_params,next,low,high,
                       func_ret,verbose);
    if (func_ret!=this->mcmc_skip) {
      func_ret=f(n_params,next,w_next,dat);
    } 

    vw_next.push_back(w_next);
    double q_next=proposal[i_thread %
                           proposal.size()].log_pdf(current,next);
    vq_next.push_back(q_next);
    
    if (func_ret==success) {
      double rand=r.random();
      
      // Metropolis-Hastings algorithm
      if (rand<exp(w_next-w_current+q_prop)) {
        accept=true;
      }
    }
    
    return;
  }
  
};

/// The MCMC object
mcmc_para_table<point_funct,fill_funct,std::vector<double>,ubvector> mct;

/** \brief A demonstration class for the MCMC example. This example
    could have been written with global functions, but we put them
    in a class to show how it would work in that case.
 */
class exc {

public:
  
  /** \brief A one-dimensional bimodal distribution

      Here, the variable 'log_weight' stores the natural logarithm of
      the objective function based on the parameters stored in \c pars.
      The object 'dat' stores any auxillary quantities which can be
      computed at every point in parameter space.
  */
  int bimodal(size_t nv, const ubvector &pars, double &log_weight,
	      std::vector<double> &dat) {
    
    double x=pars[0];
    log_weight=log(exp(-x*x)*(sin(x-1.4)+1.0));
    dat[0]=x*x;
    return 0;
  }

  /** \brief Add auxillary quantities to 'line' so they can be
      stored in the table
  */
  int fill_line(const ubvector &pars, double log_weight, 
		std::vector<double> &line, std::vector<double> &dat) {
    line.push_back(dat[0]);
    return 0;
  }

  exc() {
  }
  
};

int main(int argc, char *argv[]) {
  
  cout.setf(ios::scientific);
  
  exc e;
  
  test_mgr tm;
  tm.set_output_level(1);

  // Parameter limits and initial points

  ubvector low_bimodal(1), high_bimodal(1);
  low_bimodal[0]=-5.0;
  high_bimodal[0]=5.0;

  // Function objects for the MCMC object
  point_funct bimodal_func=std::bind
    (std::mem_fn<int(size_t,const ubvector &,double &,
		     std::vector<double> &)>(&exc::bimodal),&e,
     std::placeholders::_1,std::placeholders::_2,std::placeholders::_3,
     std::placeholders::_4);
  fill_funct fill_func=std::bind
    (std::mem_fn<int(const ubvector &,double,std::vector<double> &,
		     std::vector<double> &)>(&exc::fill_line),&e,
     std::placeholders::_1,std::placeholders::_2,std::placeholders::_3,
     std::placeholders::_4);

  // Create function object vectors
  vector<point_funct> bimodal_vec;
  bimodal_vec.push_back(bimodal_func);
  vector<fill_funct> fill_vec;
  fill_vec.push_back(fill_func);

  // Create and allocate data objects
  vector<std::vector<double> > data_vec(2);
  data_vec[0].resize(1);
  data_vec[1].resize(1);

  // Compute exact value of <x^2>. The function format is a bit
  // different so we use lambda expressions to construct the
  // functions for the integrators. 
  inte_qag_gsl<> iqg;
  funct f=[e,data_vec](double x) mutable -> double {
    ubvector u(1); double lw; u[0]=x; 
    e.bimodal(1,u,lw,data_vec[0]); return exp(lw); };
  funct fx2=[e,data_vec](double x) mutable -> double {
    ubvector u(1); double lw; u[0]=x; 
    e.bimodal(1,u,lw,data_vec[0]); return data_vec[0][0]*exp(lw); };
  double exact=iqg.integ(fx2,low_bimodal[0],high_bimodal[0])/
    iqg.integ(f,low_bimodal[0],high_bimodal[0]);
  cout << "exact: " << exact << endl;
  
  cout << "──────────────────────────────────────────────────────────"
       << endl;
  cout << "Plain MCMC example with a bimodal distribution:" << endl;
    
  // Set parameter names and units
  vector<string> pnames={"x","x2"};
  vector<string> punits={"",""};
  mct.set_names_units(pnames,punits);

  // Read the preliminary data from a file
  hdf_file hf;
  hf.open("ex_mcmc.o2");
  table_units<> tab_in;
  hdf_input(hf,tab_in,"indep");
  hf.close();
  
  // Copy the table data to a tensor for use in kde_python.
  // We need a copy for each thread because kde_python takes
  // over the tensor data.
  tensor<> ten_in;
  vector<size_t> in_size={tab_in.get_nlines(),1};
  ten_in.resize(2,in_size);
  
  for(size_t i=0;i<tab_in.get_nlines();i++) {
    vector<size_t> ix;
    ix={i,0};
    ten_in.get(ix)=tab_in.get("x",i);
  }

  // Train the KDE
  vector<double> weights;
  uniform_grid_log_end<double> ug(1.0e-3,1.0e3,99);
  vector<double> bw_array;
  ug.vector(bw_array);
  std::shared_ptr<kde_python<ubvector>> kp(new kde_python<ubvector>);
  kp->set_function("o2sclpy",ten_in,
                   bw_array,"verbose=0","kde_sklearn");
  
  // Setting the KDE as the base distribution for the independent
  // conditional probability.
  shared_ptr<mcmc_stepper_mh_record> local_stepper
    (new mcmc_stepper_mh_record);
  mct.stepper=local_stepper;
  local_stepper->proposal.resize(1);
  local_stepper->proposal[0].set_base(kp);
  
  // Set the MCMC parameters
  mct.max_iters=20000;
  mct.prefix="ex_mcmc_kde";
  mct.n_threads=1;
  mct.verbose=3;
  
  // Perform MCMC
  mct.mcmc_fill(1,low_bimodal,high_bimodal,bimodal_vec,fill_vec,
                data_vec);

  // Output acceptance and rejection rate
  cout << "n_accept, n_reject: " << mct.n_accept[0] << " "
       << mct.n_reject[0] << endl;

  // Get results
  shared_ptr<table_units<> > t=mct.get_table();

  // Remove empty rows from table.
  t->delete_rows_func("mult<0.5");

  // Compute the autocorrelation length
  std::vector<double> ac, ftom;
  o2scl::vector_autocorr_vector_mult(t->get_nlines(),
                                     (*t)["x2"],(*t)["mult"],ac);
  size_t ac_len=o2scl::vector_autocorr_tau(ac,ftom);
  
  // Create a separate table of statistically independent samples
  table_units<> indep;
  copy_table_thin_mcmc(ac_len,*t,indep,"mult");
  
  cout << "Autocorrelation length, effective sample size: "
       << ac_len << " " << indep.get_nlines() << endl;
  
  // Write these samples to a file
  hf.open_or_create("ex_mcmc_kde.o2");
  hdf_output(hf,*t,"mcmc");
  hdf_output(hf,indep,"indep");
  hf.setd_vec("q_next",local_stepper->vq_next);
  hf.setd_vec("w_next",local_stepper->vw_next);
  hf.close();

  // Compute the average of the correlated samples for comparison
  double avg2=vector_mean(t->get_nlines(),(*t)["x2"]);
  cout << "Average of correlated samples: " << avg2 << endl;
  
  // Use the independent samples to compute the final integral and
  // compare to the exact result. Note that we must specify the
  // number of elements in the vector, indep["x2"], because the
  // table_units object often has space at the end to add extra rows.
  
  double avg=vector_mean(indep.get_nlines(),indep["x2"]);
  double std=vector_stddev(indep.get_nlines(),indep["x2"]);
  cout << "Average and std. dev. of uncorrelated samples: "
       << avg << " " << std << endl;
  cout << "Absolute difference: " << fabs(avg-exact) << endl;
  cout << "Uncertainty in the average: "
       << std/sqrt(indep.get_nlines()) << endl;
  tm.test_rel(avg,exact,10.0*std/sqrt(indep.get_nlines()),"ex_mcmc");
  
  tm.report();
  
  return 0;
}