CosmoBolognaLib
Free Software C++/Python libraries for cosmological calculations
Posterior.cpp
Go to the documentation of this file.
1 /********************************************************************
2  * Copyright (C) 2014 by Federico Marulli and Alfonso Veropalumbo *
3  * federico.marulli3@unibo.it *
4  * *
5  * This program is free software; you can redistribute it and/or *
6  * modify it under the terms of the GNU General Public License as *
7  * published by the Free Software Foundation; either version 2 of *
8  * the License, or (at your option) any later version. *
9  * *
10  * This program is distributed in the hope that it will be useful, *
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of *
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13  * GNU General Public License for more details. *
14  * *
15  * You should have received a copy of the GNU General Public *
16  * License along with this program; if not, write to the Free *
17  * Software Foundation, Inc., *
18  * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. *
19  ********************************************************************/
20 
33 #include "FITSwrapper.h"
34 #include "PosteriorParameters.h"
35 #include "Sampler.h"
36 #include "Posterior.h"
37 #include "ChainMesh.h"
38 
39 using namespace std;
40 
41 using namespace cbl;
42 
43 
44 // ============================================================================================
45 
46 
48 {
49  m_seed = seed;
50  m_seed_generator = make_shared<random::UniformRandomNumbers_Int>(random::UniformRandomNumbers_Int(0, std::numeric_limits<int>::max(), m_seed));
51  m_model_parameters->set_prior_distribution_seed(m_seed_generator);
52 }
53 
54 
55 // ============================================================================================
56 
57 
58 cbl::statistics::Posterior::Posterior (const std::vector<std::shared_ptr<PriorDistribution>> prior_distributions, const std::shared_ptr<data::Data> data, const std::shared_ptr<Model> model, const LikelihoodType likelihood_type, const std::vector<size_t> x_index, const int w_index, const int seed)
59 {
60  set(prior_distributions, data, model, likelihood_type, x_index, w_index, seed);
61 }
62 
63 
64 // ============================================================================================
65 
66 
67 cbl::statistics::Posterior::Posterior (const std::vector<std::shared_ptr<PriorDistribution>> prior_distributions, const Likelihood &likelihood, const int seed) : Likelihood(likelihood)
68 {
69  m_model_parameters = make_shared<PosteriorParameters>(PosteriorParameters(m_model->parameters()->nparameters(), prior_distributions, m_model->parameters()->type(), m_model->parameters()->name()));
70 
71  m_model->set_parameters(m_model_parameters);
72 
73  m_prior = m_model_parameters->prior();
74 
75  m_likelihood_inputs = make_shared<STR_likelihood_inputs>(STR_likelihood_inputs(m_data, m_model, m_x_index, m_w_index));
76 
77  m_set_seed(seed);
78 }
79 
80 
81 // ============================================================================================
82 
83 
84 cbl::statistics::Posterior::Posterior (const std::string file_name, const std::string path_file, const std::vector<int> usecols, const std::vector<std::string> parameter_names, const int skip_header)
85 {
86  std::vector<std::vector<double>> chain = cbl::read_file(file_name, path_file, usecols, skip_header);
87 
88  m_Nparameters = usecols.size()-1;
89  m_log_posterior = chain[m_Nparameters];
90  m_parameters.resize(m_Nparameters);
91 
92  for (int N=0; N<m_Nparameters; N++)
93  m_parameters[N] = chain[N];
94 
95  m_seed = -1;
96 
97  m_model_parameters = make_shared<ModelParameters> (ModelParameters(m_Nparameters, parameter_names));
98 }
99 
100 
101 // ============================================================================================
102 
103 
104 
105 double cbl::statistics::Posterior::operator () (std::vector<double> &pp) const
106 {
107  pp = m_model_parameters->full_parameter(pp);
108  double prior = m_prior->operator()(pp);
109 
110  if (prior>0)
111  return (m_use_grid) ? m_likelihood_function_grid(pp, m_likelihood_inputs)*prior : m_likelihood_function(pp, m_likelihood_inputs)*prior;
112 
113  return 0.;
114 }
115 
116 
117 // ============================================================================================
118 
119 
120 vector<double> cbl::statistics::Posterior::weight (const int start, const int thin) const
121 {
122  vector<double> ww;
123  const int chain_size = m_model_parameters->chain_size();
124  const int n_walkers = m_model_parameters->chain_nwalkers();
125 
126  for (int j=start; j<chain_size; j+=thin)
127  for (int i=0; i<n_walkers; i++)
128  ww.push_back(m_weight[j*n_walkers+i]);
129 
130  return ww;
131 }
132 
133 
134 // ============================================================================================
135 
136 
137 double cbl::statistics::Posterior::log (std::vector<double> &pp) const
138 {
139  pp = m_model_parameters->full_parameter(pp);
140  const double logprior = m_prior->log(pp);
141 
142  double val;
143  if (logprior>par::defaultDouble)
144  val = (m_use_grid) ? m_log_likelihood_function_grid(pp, m_likelihood_inputs)+logprior : m_log_likelihood_function(pp, m_likelihood_inputs)+logprior;
145  else
146  val = par::defaultDouble;
147 
148  return val;
149 }
150 
151 
152 // ============================================================================================
153 
154 
155 double cbl::statistics::Posterior::chi2 (const std::vector<double> parameter) const
156 {
157  vector<double> pp = (parameter.size()>0) ? parameter : m_model_parameters->bestfit_value();
158 
159  return (m_use_grid) ? -2.*m_log_likelihood_function_grid(pp, m_likelihood_inputs) : -2.*m_log_likelihood_function(pp, m_likelihood_inputs);
160 }
161 
162 
163 // ============================================================================================
164 
165 
166 void cbl::statistics::Posterior::set_model (const std::shared_ptr<Model> model, const std::shared_ptr<ModelParameters> model_parameters)
167 {
168  switch (model->dimension()) {
169  case Dim::_1D_:
170  m_model = make_shared<Model1D>(*static_pointer_cast<Model1D>(model));
171  break;
172  case Dim::_2D_:
173  m_model = make_shared<Model2D>(*static_pointer_cast<Model2D>(model));
174  break;
175  default:
176  ErrorCBL("the model dimension must be Dim::_1D_ or Dim::_2D_ !", "set_model", "Posterior.cpp");
177  }
178 
179  if (model_parameters!=NULL)
180  m_model->set_parameters(model_parameters);
181  else
182  m_model->set_parameters(m_model_parameters);
183 
184 }
185 
186 
187 // ============================================================================================
188 
189 
190 void cbl::statistics::Posterior::set (const std::vector<std::shared_ptr<PriorDistribution>> prior_distributions, const std::shared_ptr<data::Data> data, const std::shared_ptr<Model> model, const LikelihoodType likelihood_type, const std::vector<size_t> x_index, const int w_index, const int seed)
191 {
192  set_data(data);
193  set_model(model);
194 
195  m_model_parameters = make_shared<PosteriorParameters>(PosteriorParameters(m_model->parameters()->nparameters(), prior_distributions, m_model->parameters()->type(), m_model->parameters()->name()));
196 
197  m_model->set_parameters(m_model_parameters);
198  m_model_parameters->set_prior_distribution(prior_distributions);
199 
200  m_prior = m_model_parameters->prior();
201 
202  set_function(likelihood_type, x_index, w_index);
203  m_likelihood_inputs = make_shared<STR_likelihood_inputs>(STR_likelihood_inputs(m_data, m_model, m_x_index, m_w_index));
204 
205  m_set_seed(seed);
206 }
207 
208 
209 // ============================================================================================
210 
211 
212 void cbl::statistics::Posterior::maximize (const std::vector<double> start, const unsigned int max_iter, const double tol, const double epsilon)
213 {
214  vector<double> starting_par;
215 
216  const unsigned int npar_free = m_model->parameters()->nparameters_free();
217  const unsigned int npar = m_model->parameters()->nparameters();
218 
219  if (start.size()==npar_free)
220  starting_par = start;
221  else if (start.size()==npar)
222  for (size_t i=0; i<npar_free; i++)
223  starting_par.push_back(start[m_model->parameters()->free_parameter()[i]]);
224  else
225  ErrorCBL("check your inputs: start.size()="+conv(start.size(), par::fINT)+" must be equal to either npar_free="+conv(npar_free, par::fINT)+" or npar="+conv(npar, par::fINT)+"!", "maximize", "Posterior.cpp");
226 
227  function<double(vector<double> &)> post = [this] (vector<double> &pp) { return -this->log(pp); };
228 
229 
230  // extra check on epsilon
231  function<bool(vector<double> &)> checkWrong = [&] (vector<double> &pp) {
232  bool ch = true;
233  if (post(pp)<-par::defaultDouble) ch = false;
234  return ch;
235  };
236 
237  vector<double> par = starting_par;
238  if (checkWrong(par))
239  ErrorCBL("The starting position is outside the prior range: the first input parameter must be changed!", "maximize", "Posterior.cpp");
240 
241  // loop on simplex side
242  for (size_t i=0; i<npar_free; i++) {
243  par = starting_par;
244  par[i] += epsilon;
245  if (checkWrong(par))
246  ErrorCBL("The simplex side is outside prior range: the epsilon parameter or the starting position must be changed.", "maximize", "Posterior.cpp");
247  }
248 
249  // everything is fine up to here... let's go
250  coutCBL << "Maximizing the posterior..." << endl;
251  vector<double> result = cbl::wrapper::gsl::GSL_minimize_nD(post, starting_par, {}, max_iter, tol, epsilon);
252 
253  // check if the result is inside the prior ranges
254  if (m_prior->log(result)>par::defaultDouble) {
255  coutCBL << "Done!" << endl << endl;
256  m_model_parameters->set_bestfit_values(result);
257  m_model_parameters->write_bestfit_info();
258  coutCBL << "-log(posterior) = " << post(result) << endl << endl;
259  }
260  else
261  ErrorCBL("the maximization ended with parameter values out of the priors: check your inputs or change the epsilon value!", "maximize", "Posterior.cpp");
262 }
263 
264 
265 // ============================================================================================
266 
267 
268 void cbl::statistics::Posterior::sample_stretch_move (const double aa, const bool parallel, const string outputFile, const int start, const int thin, const int nbins)
269 {
270  if (parallel && outputFile!=cbl::par::defaultString)
271  WarningMsgCBL("no run-time output available for parallel stretch-move algorithm: this option will be ignored!", "sample_stretch_move", "Posterior.cpp");
272 
273  coutCBL << "Sampling the posterior..." << endl;
274 
275  const int seed = m_generate_seed();
276  const int nparameters = m_model_parameters->nparameters();
277  const int nparameters_free = m_model_parameters->nparameters_free();
278  const int chain_size = m_model_parameters->chain_size();
279  const int n_walkers = m_model_parameters->chain_nwalkers();
280 
281  vector<vector<double>> Start(n_walkers, vector<double>(nparameters, 0));
282 
283  for (int i=0; i<nparameters; i++)
284  for (int j=0; j<n_walkers; j++)
285  Start[j][i] = m_model_parameters->chain_value(i, 0, j);
286 
287  auto posterior = [this] (vector<double> &pp) { return log(pp); };
288 
289  cbl::statistics::Sampler sampler(nparameters, nparameters_free, posterior);
290  if (parallel)
291  sampler.sample_stretch_move_parallel(chain_size, n_walkers, Start, seed, aa);
292  else
293  sampler.sample_stretch_move(chain_size, n_walkers, Start, seed, aa, outputFile);
294 
295  vector<vector<double>> chain_values;
296 
297  sampler.get_chain_function_acceptance(chain_values, m_log_posterior, m_acceptance);
298 
299  m_model_parameters->set_chain_values(chain_values, n_walkers);
300 
301  // set the best-fit parameters to the meadian values of the MCMC chain
302  m_model_parameters->set_bestfit_values(start, thin, nbins, m_generate_seed());
303 
304  m_weight.erase(m_weight.begin(), m_weight.end());
305  m_weight.resize(m_log_posterior.size(), 1.);
306 }
307 
308 
309 // ============================================================================================
310 
311 
312 void cbl::statistics::Posterior::importance_sampling (const std::string input_dir, const std::string input_file, const vector<size_t> column, const int header_lines_to_skip, const bool is_FITS_format, const bool apply_to_likelihood, const int n_walkers)
313 {
314  // import the chain
315  read_chain(input_dir, input_file, n_walkers, column, header_lines_to_skip, is_FITS_format);
316 
317  const int n_parameters = m_model_parameters->nparameters();
318  const int chain_size = m_model_parameters->chain_size();
319 
320  vector<double> log_dist(m_log_likelihood.size(), 1.), input_log_dist(m_log_likelihood.size(), 1.);
321 
322  // loop on the chain
323  for (int chain_position=0; chain_position<chain_size; ++chain_position) {
324 
325  // loop to parallelize the computation
326 #pragma omp parallel for schedule(dynamic)
327  for (int walker_index=0; walker_index<n_walkers; ++walker_index) {
328 
329  vector<double> pp(n_parameters);
330  for (int parameter=0; parameter<n_parameters; ++parameter)
331  pp[parameter] = m_model_parameters->chain_value(parameter, chain_position, walker_index);
332 
333  // compute the logarithm of the posterior, of likelihood, at the parameters of the input chain
334  log_dist[chain_position*n_walkers+walker_index] = (apply_to_likelihood) ? Likelihood::log(pp) : log(pp);
335 
336  // value of the logarithm of the posterior, or likelihood, of the input chain
337  input_log_dist[chain_position*n_walkers+walker_index] =
338  (apply_to_likelihood) ? m_log_likelihood[chain_position*n_walkers+walker_index]
339  : m_log_posterior[chain_position*n_walkers+walker_index];
340  }
341 
342  coutCBL << int(double(chain_position)/(chain_size)*100.) << "% \r"; cout.flush();
343  }
344 
345 
346  // compute the weights as posterior, or likelihood, normalized ratios
347 
348  const double shift = Max(input_log_dist)-Max(log_dist);
349 
350  for (int chain_position=0; chain_position<chain_size; ++chain_position)
351 #pragma omp parallel for schedule(dynamic)
352  for (int walker_index=0; walker_index<n_walkers; ++walker_index)
353  m_weight[chain_position*n_walkers+walker_index] = exp(log_dist[chain_position*n_walkers+walker_index]-input_log_dist[chain_position*n_walkers+walker_index]+shift);
354 
355 }
356 
357 
358 // ============================================================================================
359 
360 
361 void cbl::statistics::Posterior::read_chain (const string input_dir, const string input_file, const int n_walkers, const vector<size_t> column, const int header_lines_to_skip, const bool is_FITS_format)
362 {
363  if (is_FITS_format)
364  read_chain_fits(input_dir, input_file, n_walkers, column);
365  else
366  read_chain_ascii(input_dir, input_file, n_walkers, column, header_lines_to_skip);
367 }
368 
369 
370 // ============================================================================================
371 
372 
373 void cbl::statistics::Posterior::read_chain_ascii (const string input_dir, const string input_file, const int n_walkers, const vector<size_t> column, const int skip_header)
374 {
375  const string file = input_dir+input_file;
376  coutCBL << "Reading the chain file " << file << endl;
377 
378  const int nparameters = m_model_parameters->nparameters();
379 
380  std::function<vector<double>(const vector<double> params)> assign;
381 
382  if (column.size()==0) {
383  assign = [&] (const vector<double> params) {
384  vector<double> pp(nparameters);
385  for (int i=0; i<nparameters; i++)
386  pp[i] = params[i+1];
387  return pp;
388  };
389  }
390  else {
391  checkDim(column, nparameters, "columns");
392  assign = [&] (const vector<double> params) {
393  vector<double> pp(nparameters);
394  for (int i=0; i<nparameters; i++)
395  pp[i] = params[column[i]+1];
396  return pp;
397  };
398  }
399 
400  ifstream fin(file.c_str()); checkIO(fin, file);
401 
402  string line;
403  for (int i=0; i<skip_header; i++)
404  getline(fin, line);
405 
406  vector<vector<double>> chain_value;
407 
408  m_log_likelihood.erase(m_log_likelihood.begin(), m_log_likelihood.end());
409  m_log_posterior.erase(m_log_posterior.begin(), m_log_posterior.end());
410  m_weight.erase(m_weight.begin(), m_weight.end());
411 
412  while (getline(fin, line)) {
413  stringstream ss(line);
414  double NUM;
415  vector<double> ll;
416 
417  while (ss>>NUM) ll.push_back(NUM);
418 
419  vector<double> params = assign(ll);
420 
421  m_log_likelihood.push_back(ll[ll.size()-4]);
422  m_log_posterior.push_back(ll[ll.size()-2]);
423  m_weight.push_back(ll[ll.size()-1]);
424 
425  chain_value.push_back(params);
426  }
427  const int chain_size = chain_value.size()/n_walkers;
428 
429  fin.clear(); fin.close();
430 
431  checkDim(chain_value, n_walkers*chain_size, m_model_parameters->nparameters(), "chain_value");
432  checkDim(m_log_posterior, n_walkers*chain_size, "m_log_posterior");
433  checkDim(m_weight, n_walkers*chain_size, "m_weight");
434 
435  chain_value = cbl::transpose(chain_value);
436 
437  m_model_parameters->set_chain_values(chain_value, n_walkers);
438 
439  coutCBL << "Done!" << endl << endl;
440 }
441 
442 
443 // ============================================================================================
444 
445 
446 void cbl::statistics::Posterior::read_chain_fits (const string input_dir, const string input_file, const int n_walkers, const vector<size_t> column)
447 {
448  (void)column;
449 
450  const int nparameters = m_model_parameters->nparameters();
451 
452  string file = input_dir+input_file;
453  coutCBL << "Reading the chain file " << file << endl;
454 
455  vector<string> names = m_model_parameters->name();
456  names.push_back("Log(Likelihood)");
457  names.push_back("Log(Posterior)");
458  names.push_back("Weight");
459 
460  vector<vector<double>> chain_value(nparameters);
461  m_log_likelihood.erase(m_log_likelihood.begin(), m_log_likelihood.end());
462  m_log_posterior.erase(m_log_posterior.begin(), m_log_posterior.end());
463  m_weight.erase(m_weight.begin(), m_weight.end());
464 
465  vector<vector<double>> values = cbl::wrapper::ccfits::read_table_fits(file, names);
466 
467  for (int i=0; i<nparameters; i++)
468  chain_value[i] = values[i];
469 
470  m_log_likelihood = values[nparameters-2];
471  m_log_posterior = values[nparameters];
472  m_weight = values[nparameters+1];
473 
474  int chain_size = m_log_posterior.size()/n_walkers;
475 
476  checkDim(chain_value, nparameters, n_walkers*chain_size, "chain_value");
477  checkDim(m_log_likelihood, n_walkers*chain_size, "m_log_likelihood");
478  checkDim(m_log_posterior, n_walkers*chain_size, "m_log_posterior");
479  checkDim(m_weight, n_walkers*chain_size, "m_weight");
480 
481  m_model_parameters->set_chain_values(chain_value, n_walkers);
482 
483  coutCBL << "Done!" << endl << endl;
484 }
485 
486 
487 // ============================================================================================
488 
489 
490 void cbl::statistics::Posterior::write_chain (const string output_dir, const string output_file, const int start, const int thin, const bool is_FITS_format, const int prec, const int ww)
491 {
492  if (is_FITS_format)
493  write_chain_fits(output_dir, output_file, start, thin);
494  else
495  write_chain_ascii(output_dir, output_file, start, thin, prec, ww);
496 }
497 
498 
499 // ============================================================================================
500 
501 
502 void cbl::statistics::Posterior::write_chain_ascii (const string output_dir, const string output_file, const int start, const int thin, const int prec, const int ww)
503 {
504  const int nparameters = m_model_parameters->nparameters();
505  const int chain_size = m_model_parameters->chain_size();
506  const int n_walkers = m_model_parameters->chain_nwalkers();
507 
508  checkDim(m_log_posterior, n_walkers*chain_size, "m_log_posterior", false);
509 
510  string MK = "mkdir -p "+output_dir;
511  if (system(MK.c_str())) {}
512 
513  string file = output_dir+output_file;
514 
515  ofstream fout(file.c_str()); checkIO(fout, file);
516  fout.precision(10);
517 
518  fout << "# step ";
519  for (int k=0; k<nparameters; k++)
520  fout << m_model_parameters->name(k) << " ";
521  fout << " log(Likelihood) log(Prior) log(Posterior) Weight" << endl;
522 
523  int nn = 0;
524 
525  for (int j=start; j<chain_size; j+=thin) {
526  for (int i=0; i<n_walkers; i++) {
527  fout << setw(6) << nn++ << " ";
528 
529  vector<double> pp(nparameters);
530 
531  for (int k=0; k<nparameters; k++) {
532  pp[k] = m_model_parameters->chain_value(k, j, i);
533  cbl::Print(m_model_parameters->chain_value(k, j, i), prec, ww, "", " ", false, fout);
534  }
535 
536  if (m_log_likelihood.size()>size_t(j*n_walkers+i)) {
537  cbl::Print(m_log_likelihood[j*n_walkers+i], prec, ww, "", " ", false, fout);
538  cbl::Print(m_log_posterior[j*n_walkers+i]-m_log_likelihood[j*n_walkers+i], prec, ww, "", " ", false, fout);
539  }
540  else {
541  const double pr = m_prior->log(pp);
542  cbl::Print(m_log_posterior[j*n_walkers+i]-pr, prec, ww, "", " ", false, fout);
543  cbl::Print(pr, prec, ww, "", " ", false, fout);
544  }
545 
546  cbl::Print(m_log_posterior[j*n_walkers+i], prec, ww, "", " ", false, fout);
547  cbl::Print(m_weight[j*n_walkers+i], prec, ww, "", "\n", false, fout);
548  }
549  }
550 
551  fout.clear(); fout.close();
552 
553  coutCBL << "I wrote the file: " << file << endl;
554 }
555 
556 
557 // ============================================================================================
558 
559 
560 void cbl::statistics::Posterior::write_chain_fits (const string output_dir, const string output_file, const int start, const int thin)
561 {
562  const int nparameters = m_model_parameters->nparameters();
563  const int chain_size = m_model_parameters->chain_size();
564  const int n_walkers = m_model_parameters->chain_nwalkers();
565 
566  checkDim(m_log_posterior, n_walkers*chain_size, "m_log_posterior", false);
567 
568  string MK = "mkdir -p "+output_dir;
569  if (system(MK.c_str())) {}
570 
571  vector<string> names;
572  names.push_back("Step");
573  for (int k=0; k<nparameters; k++)
574  names.emplace_back(m_model_parameters->name(k));
575 
576  names.push_back("Log(Likelihood)");
577  names.push_back("Log(Prior)");
578  names.push_back("Log(Posterior)");
579  names.push_back("Weight");
580 
581  vector<vector<double>> value(nparameters+5);
582 
583  int n = 0;
584  for (int j=start; j<chain_size; j+=thin)
585  for (int i=0; i<n_walkers; i++) {
586  value[0].emplace_back(n);
587  vector<double> pp(nparameters);
588  for (int k=0; k<nparameters; k++) {
589  value[k+1].emplace_back(m_model_parameters->chain_value(k, j, i));
590  pp[k] = m_model_parameters->chain_value(k, j, i);
591  }
592 
593  if (m_log_likelihood.size()>size_t(j*n_walkers+i)) {
594  value[nparameters+1].emplace_back(m_log_likelihood[j*n_walkers+i]);
595  value[nparameters+2].emplace_back(m_log_posterior[j*n_walkers+i]-m_log_likelihood[j*n_walkers+i]);
596  }
597  else {
598  const double lpr = m_prior->log(pp);
599  value[nparameters+1].emplace_back(m_log_posterior[j*n_walkers+i]-lpr);
600  value[nparameters+2].emplace_back(lpr);
601  }
602 
603  value[nparameters+3].emplace_back(m_log_posterior[j*n_walkers+i]);
604  value[nparameters+4].emplace_back(m_weight[j*n_walkers+i]);
605  n ++;
606  }
607 
608  cbl::wrapper::ccfits::write_table_fits(output_dir, output_file, names, value);
609 }
610 
611 
612 // ============================================================================================
613 
614 
615 void cbl::statistics::Posterior::initialize_chains (const int chain_size, const int n_walkers)
616 {
617  m_model_parameters->set_chain(chain_size, n_walkers);
618  m_model_parameters->initialize_chain_from_prior();
619 }
620 
621 
622 // ============================================================================================
623 
624 
625 void cbl::statistics::Posterior::initialize_chains (const int chain_size, const int n_walkers, const double radius, const std::vector<double> start, const unsigned int max_iter, const double tol, const double epsilon)
626 {
627  maximize(start, max_iter, tol, epsilon);
628 
629  m_model_parameters->set_chain(chain_size, n_walkers);
630  m_model_parameters->initialize_chain_ball_bestfit(radius, m_generate_seed());
631 }
632 
633 
634 // ============================================================================================
635 
636 
637 void cbl::statistics::Posterior::initialize_chains (const int chain_size, const int n_walkers, std::vector<double> &value, const double radius)
638 {
639  m_model_parameters->set_chain(chain_size, n_walkers);
640  m_model_parameters->initialize_chain_ball(value, radius, m_generate_seed());
641 }
642 
643 
644 // ============================================================================================
645 
646 
647 void cbl::statistics::Posterior::initialize_chains (const int chain_size, const std::vector<std::vector<double>> chain_value)
648 {
649  const int n_walkers = chain_value[0].size();
650  m_model_parameters->set_chain(chain_size, n_walkers);
651 
652  for (size_t pp=0; pp<m_model_parameters->nparameters(); pp++)
653  for (int ww=0; ww<n_walkers; ww++)
654  m_model_parameters->set_chain_value(pp, 0, ww, chain_value[pp][ww]);
655 }
656 
657 
658 // ============================================================================================
659 
660 
661 void cbl::statistics::Posterior::initialize_chains (const int chain_size, const int n_walkers, const std::string input_dir, const std::string input_file)
662 {
663  string last_step_file = input_dir+input_file+"_LastStep";
664  string get_last_step = "tail -n "+conv(n_walkers, par::fINT)+" "+input_dir+input_file+" > "+last_step_file;
665  if (system(get_last_step.c_str())) {}
666 
667  ifstream fin(last_step_file);
668  string line;
669 
670  vector<vector<double>> chain_value;
671 
672  while (getline(fin, line))
673  {
674  stringstream ss(line);
675  double NUM;
676  vector<double> ll, params;
677 
678  while (ss>>NUM) ll.push_back(NUM);
679  for (size_t i=1; i<ll.size()-3; i++)
680  params.push_back(ll[i]);
681 
682  chain_value.push_back(params);
683  }
684  fin.clear(); fin.close();
685 
686  string rm_last_step = "rm -r "+last_step_file;
687  if (system(rm_last_step.c_str())) {}
688 
689  checkDim(chain_value, n_walkers, m_model_parameters->nparameters(), "chain_from_LastStep_file");
690  chain_value = cbl::transpose(chain_value);
691 
692  initialize_chains(chain_size, chain_value);
693 }
694 
695 
696 // ============================================================================================
697 
698 
699 void cbl::statistics::Posterior::write_maximization_results (const string dir_output, const string file)
700 {
701  coutCBL << "Writing results of posterior maximization on " << dir_output+file << endl;
702  vector<double> bestFitValues = m_model_parameters->bestfit_value();
703  string name = LikelihoodTypeNames ()[static_cast<int>(m_likelihood_type)];
704  double posteriorValue = this->log(bestFitValues);
705 
706  string mkdir = "mkdir -p "+dir_output;
707  if (system(mkdir.c_str())) {}
708 
709  ofstream fout(dir_output+file);
710 
711  fout << "#Parameters information" << endl;
712  fout << "number of parameters = " << bestFitValues.size() << endl;
713 
714  for (size_t i=0; i<bestFitValues.size(); i++) {
715  fout << "par" << i+1 << "_name = " << m_model_parameters->name(i) << endl;
716  fout << "par" << i+1 << "_status = " << m_model_parameters->status(i) << endl;
717  fout << "par" << i+1 << "_bestfit_value = " << bestFitValues[i] << endl;
718  }
719 
720  fout << "#Likelihood information" << endl;
721  fout << "likelihoodType = " << name << endl;
722  fout << "logPosteriorValue = " << posteriorValue << endl;
723 
724  fout.clear(); fout.close();
725  coutCBL << "I wrote the file " << dir_output+file << endl;
726 }
727 
728 
729 // ============================================================================================
730 
731 
732 void cbl::statistics::Posterior::show_results (const int start, const int thin, const int nbins, const bool show_mode, const int ns, const int nb)
733 {
734  m_model_parameters->show_results(start, thin, nbins, m_generate_seed(), show_mode, ns, nb, weight(start, thin));
735 }
736 
737 
738 // ============================================================================================
739 
740 
741 void cbl::statistics::Posterior::write_results (const string output_dir, const string root_file, const int start, const int thin, const int nbins, const bool fits, const bool compute_mode, const int ns, const int nb)
742 {
743  const string extension = (fits) ? "_chain.fits" : "_chain.dat";
744  write_chain(output_dir, root_file+extension, start, thin, fits);
745 
746  m_model_parameters->write_results(output_dir, root_file, start, thin, nbins, m_generate_seed(), compute_mode, ns, nb, weight(start, thin));
747 }
748 
749 
750 // ============================================================================================
751 
752 
753 void cbl::statistics::Posterior::write_model_from_chain (const std::string output_dir, const std::string output_file, const std::vector<double> xx, const std::vector<double> yy, const int start, const int thin)
754 {
755  switch (m_model->dimension()) {
756 
757  case Dim::_1D_:
758  {
759  vector<double> xvec = xx;
760  if (xx.size()==0)
761  xvec = m_data->xx();
762 
763  m_model->write_from_chains(output_dir, output_file, xvec, start, thin);
764  }
765 
766  break;
767  case Dim::_2D_:
768  {
769  vector<double> xvec = xx, yvec = yy;
770  if (xx.size()==0)
771  xvec = m_data->xx();
772  if (yy.size()==0)
773  yvec = m_data->yy();
774 
775  m_model->write_from_chains(output_dir, output_file, xvec, yvec, start, thin);
776  }
777 
778  break;
779  default:
780  ErrorCBL("the input dimension must be Dim::_1D_ or Dim::_2D_ !", "write_model_from_chain", "Posterior.cpp");
781  }
782 }
Implementation of the chain-mesh data structure.
class FITSwrapper that wrap CCfits routines to manage FITS files
#define coutCBL
CBL print message.
Definition: Kernel.h:734
The class PosteriorParameters.
The class Posterior.
The class Sampler.
The class UniformRandomNumbers_Int.
The class Likelihood.
Definition: Likelihood.h:56
std::shared_ptr< void > m_likelihood_inputs
likelihood inputs
Definition: Likelihood.h:67
double log(std::vector< double > &parameter) const
evaluate the logarithm of the likelihood for the input parameters
Definition: Likelihood.cpp:177
std::shared_ptr< Model > m_model
model to test
Definition: Likelihood.h:64
int m_w_index
the index in extra info where the bin weight is stored
Definition: Likelihood.h:92
std::shared_ptr< ModelParameters > m_model_parameters
likelihood parameters
Definition: Likelihood.h:70
std::vector< size_t > m_x_index
Definition: Likelihood.h:89
std::shared_ptr< data::Data > m_data
data containers
Definition: Likelihood.h:61
The class ModelParameters.
The class PosteriorParameters.
void set_model(std::shared_ptr< Model > model=NULL, const std::shared_ptr< ModelParameters > model_parameters=NULL)
set the model for the posterior analysis
Definition: Posterior.cpp:166
void maximize(const std::vector< double > start, const std::vector< std::vector< double >> parameter_limits, const unsigned int max_iter=10000, const double tol=1.e-6, const double epsilon=1.e-3)
function that maximizes the posterior, finds the best-fit parameters and store them in the model
Definition: Posterior.h:428
void read_chain_ascii(const std::string input_dir, const std::string input_file, const int n_walkers, const std::vector< size_t > column={}, const int header_lines_to_skip=1)
read the chains from an ascii file
Definition: Posterior.cpp:373
double operator()(std::vector< double > &pp) const
the un-normalized posterior
Definition: Posterior.cpp:105
void initialize_chains(const int chain_size, const int n_walkers)
initialize the chains by drawing from the prior distributions
Definition: Posterior.cpp:615
std::vector< double > weight(const int start=0, const int thin=1) const
return the internal member m_weights
Definition: Posterior.cpp:120
std::shared_ptr< cbl::statistics::Prior > m_prior
the prior distribution
Definition: Posterior.h:60
void read_chain(const std::string input_dir, const std::string input_file, const int n_walkers, const std::vector< size_t > column={}, const int header_lines_to_skip=1, const bool is_FITS_format=false)
read the chains
Definition: Posterior.cpp:361
double log(std::vector< double > &pp) const
the logarithm of the un-normalized posterior
Definition: Posterior.cpp:137
void importance_sampling(const std::string input_dir, const std::string input_file, const std::vector< size_t > column={}, const int header_lines_to_skip=1, const bool is_FITS_format=false, const bool apply_to_likelihood=false, const int n_walkers=100)
perform importance sampling
Definition: Posterior.cpp:312
void m_set_seed(const int seed)
set the internal attribute m_seed and related quantities
Definition: Posterior.cpp:47
double chi2(const std::vector< double > parameter={}) const
the
Definition: Posterior.cpp:155
void write_model_from_chain(const std::string output_dir, const std::string output_file, const std::vector< double > xx={}, const std::vector< double > yy={}, const int start=0, const int thin=1)
write the model at xx, yy computing 16th, 50th and 84th percentiles from the MCMC chains
Definition: Posterior.cpp:753
void show_results(const int start, const int thin, const int nbins=50, const bool show_mode=false, const int ns=-1, const int nb=-1)
show the results of the MCMC sampling on screen
Definition: Posterior.cpp:732
void write_chain_ascii(const std::string output_dir, const std::string output_file, const int start=0, const int thin=1, const int prec=5, const int ww=14)
write the chains obtained after the MCMC sampling on an ascii file
Definition: Posterior.cpp:502
void write_results(const std::string output_dir, const std::string root_file, const int start=0, const int thin=1, const int nbins=50, const bool fits=false, const bool compute_mode=false, const int ns=-1, const int nb=-1)
store the results of the MCMC sampling to file
Definition: Posterior.cpp:741
void write_chain_fits(const std::string output_dir, const std::string output_file, const int start=0, const int thin=1)
write the chains obtained after the MCMC sampling on a FITS file
Definition: Posterior.cpp:560
void write_chain(const std::string output_dir, const std::string output_file, const int start=0, const int thin=1, const bool is_FITS_format=false, const int prec=5, const int ww=14)
write the chains obtained after the MCMC sampling
Definition: Posterior.cpp:490
void set(const std::vector< std::shared_ptr< PriorDistribution >> prior_distributions, const std::shared_ptr< data::Data > data, const std::shared_ptr< Model > model, const LikelihoodType likelihood_type, const std::vector< size_t > x_index, const int w_index, const int seed)
set the posterior type using the LikelihoodType object
Definition: Posterior.cpp:190
void sample_stretch_move(const double aa=2, const bool parallel=true, const std::string outputFile=par::defaultString, const int start=0, const int thin=1, const int nbins=50)
sample the posterior using the stretch-move sampler (Foreman-Mackey et al. 2012)
Definition: Posterior.cpp:268
void write_maximization_results(const std::string output_dir, const std::string root_file)
write maximization results on a file
Definition: Posterior.cpp:699
void read_chain_fits(const std::string input_dir, const std::string input_file, const int n_walkers, const std::vector< size_t > column)
read the chains from an ascii file
Definition: Posterior.cpp:446
Posterior()=default
default constructor
The class Sampler.
Definition: Sampler.h:115
void sample_stretch_move(const int chain_size, const int nwalkers, const std::vector< std::vector< double >> start, const int seed=4241, const double aa=2, const std::string outputFile=cbl::par::defaultString)
sample the input function using the stretch-move algorithm on n-dimensional parameter space
Definition: Sampler.cpp:133
void get_chain_function_acceptance(std::vector< std::vector< double >> &chains, std::vector< double > &function, std::vector< double > &acceptance, const int start=0, const int thin=1)
get the chain values and the function
Definition: Sampler.cpp:411
void sample_stretch_move_parallel(const int chain_size, const int nwalkers, const std::vector< std::vector< double >> start, const int seed=4241, const double aa=2)
sample the input function using the stretch-move algorithm on n-dimensional parameter space - paralle...
Definition: Sampler.cpp:396
static const char fINT[]
conversion symbol for: int -> std::string
Definition: Constants.h:121
static const std::string defaultString
default std::string value
Definition: Constants.h:336
static const double defaultDouble
default double value
Definition: Constants.h:348
LikelihoodType
the type of likelihood function
std::vector< std::string > LikelihoodTypeNames()
return a vector containing the LikelihoodType names
std::vector< std::vector< double > > read_table_fits(const std::string input_fits, const std::vector< std::string > column_names, const int next=1, const double fill_value=cbl::par::defaultDouble)
function to read a table from a fits file
Definition: FITSwrapper.cpp:45
void write_table_fits(const std::string output_dir, const std::string file_fits, const std::vector< std::string > column_names, const std::vector< std::vector< double >> table, const std::vector< std::string > column_units={})
function that writes a table on a fits file
Definition: FITSwrapper.cpp:88
std::vector< double > GSL_minimize_nD(FunctionDoubleVector func, const std::vector< double > start, const std::vector< std::vector< double >> ranges, const unsigned int max_iter=1000, const double tol=1.e-6, const double epsilon=0.1)
minimize the provided function using GSL procedure
Definition: GSLwrapper.cpp:497
The global namespace of the CosmoBolognaLib
Definition: CAMB.h:38
std::string conv(const T val, const char *fact)
convert a number to a std::string
Definition: Kernel.h:903
void Print(const T value, const int prec, const int ww, const std::string header="", const std::string end="\n", const bool use_coutCBL=true, std::ostream &stream=std::cout, const std::string colour=cbl::par::col_default)
function to print values with a proper homegenised format
Definition: Kernel.h:1142
std::vector< std::vector< double > > read_file(const std::string file_name, const std::string path_name, const std::vector< int > column_data, const int skip_nlines=0)
read a data from a file ASCII
Definition: Kernel.cpp:410
void checkDim(const std::vector< T > vect, const int val, const std::string vector, bool equal=true)
check if the dimension of a std::vector is equal/lower than an input value
Definition: Kernel.h:1532
void checkIO(const std::ifstream &fin, const std::string file="NULL")
check if an input file can be opened
Definition: Kernel.cpp:160
int ErrorCBL(const std::string msg, const std::string functionCBL, const std::string fileCBL, const cbl::glob::ExitCode exitCode=cbl::glob::ExitCode::_error_)
throw an exception: it is used for handling exceptions inside the CosmoBolognaLib
Definition: Kernel.h:780
T Max(const std::vector< T > vect)
maximum element of a std::vector
Definition: Kernel.h:1336
std::vector< std::vector< T > > transpose(std::vector< std::vector< T >> matrix)
transpose a matrix
Definition: Kernel.h:1729
void WarningMsgCBL(const std::string msg, const std::string functionCBL, const std::string fileCBL)
internal CBL warning message
Definition: Kernel.h:747
@ _2D_
2D pair, used e.g. for 2D pairs, in Cartesian or polar coordinates
@ _1D_
1D, used e.g. for 1D pairs, in angular or comoving separations
the struct STR_likelihood_inputs