CosmoBolognaLib
Free Software C++/Python libraries for cosmological calculations
Sampler.cpp
Go to the documentation of this file.
1 /********************************************************************
2  * Copyright (C) 2014 by Federico Marulli and Alfonso Veropalumbo *
3  * federico.marulli3@unibo.it *
4  * *
5  * This program is free software; you can redistribute it and/or *
6  * modify it under the terms of the GNU General Public License as *
7  * published by the Free Software Foundation; either version 2 of *
8  * the License, or (at your option) any later version. *
9  * *
10  * This program is distributed in the hope that it will be useful, *
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of *
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13  * GNU General Public License for more details. *
14  * *
15  * You should have received a copy of the GNU General Public *
16  * License along with this program; if not, write to the Free *
17  * Software Foundation, Inc., *
18  * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. *
19  ********************************************************************/
20 
33 #include "Sampler.h"
34 
35 using namespace std;
36 
37 using namespace cbl;
38 
39 
40 // ============================================================================================
41 
42 
43 shared_ptr<random::DistributionRandomNumbers> cbl::statistics::Sampler::m_set_gz (const int seed, const double aa)
44 {
45  if (aa<=1)
46  ErrorCBL("the stretch move parameter must be >1!", "m_set_gz", "Sampler.cpp");
47 
48  double zmin = 1./aa;
49  double zmax = aa;
50  vector<double> zz = linear_bin_vector(1000,zmin,zmax), gzz;
51  for (auto &&zzz : zz)
52  gzz.push_back(1./sqrt(zzz));
53 
54  return make_shared<random::DistributionRandomNumbers>(random::DistributionRandomNumbers(zz, gzz, "Spline", seed));
55 }
56 
57 
58 // ============================================================================================
59 
60 
61 
62 void cbl::statistics::Sampler::m_initialize_chains (const vector< vector<double> > start)
63 {
64  for (int i=0; i<m_nwalkers; i++) {
65  vector<double> pp = start[i];
66  m_function_chain[0][i] = m_function(pp);
67  m_chains[0][i] = pp;
68  }
69 }
70 
71 
72 // ============================================================================================
73 
74 
75 
76 void cbl::statistics::Sampler::m_initialize_chains_parallel (const vector< vector<double> > start)
77 {
78 #pragma omp parallel for schedule(dynamic)
79  for (int i=0; i<m_nwalkers; i++) {
80  vector<double> pp = start[i];
81  m_function_chain[0][i] = m_function(pp);
82  m_chains[0][i] = pp;
83  }
84 }
85 
86 
87 // ============================================================================================
88 
89 
90 void cbl::statistics::Sampler::set_chain (const int npar, const int npar_free, const int chain_size, const int nwalkers)
91 {
92  m_npar = npar;
93  m_npar_free = npar_free;
94 
95  m_nwalkers = nwalkers;
96  m_chain_size = chain_size;
97 
98  m_chains.erase(m_chains.begin(), m_chains.end());
99  m_chains.resize(m_chain_size, vector<vector<double>>(m_nwalkers, vector<double>(m_npar,0)));
100 
101  m_acceptance.erase(m_acceptance.begin(), m_acceptance.end());
102  m_acceptance.resize(m_nwalkers, 0.);
103 
104  m_function_chain.erase(m_function_chain.begin(), m_function_chain.end());
105  m_function_chain.resize(m_chain_size, vector<double>(m_nwalkers, 0.));
106 }
107 
108 
109 // ============================================================================================
110 
111 
112 void cbl::statistics::Sampler::set_function (const function<double(vector<double> &)> function)
113 {
114  m_function = function;
115  m_use_python = false;
116 }
117 
118 
119 // ============================================================================================
120 
121 /*
122 void cbl::statistics::Sampler::set_function (PyObject *pp)
123 {
124  PyCallback pc(pp);
125  m_function = bind(&PyCallback::V2D, pc, placeholders::_1);
126  m_use_python = true;
127 }
128 */
129 
130 // ============================================================================================
131 
132 
133 void cbl::statistics::Sampler::sample_stretch_move (const int chain_size, const int nwalkers, const vector<vector<double>> start, const int seed, const double aa, const string outputFile)
134 {
135  set_chain(m_npar, m_npar_free, chain_size, nwalkers);
136 
137  function<void(int, int, const vector<double>, const double)> write_func;
138  ofstream fout;
139 
140  write_func = [&] (const int nc, const int nw, const vector<double> parameters, const double func)
141  {(void)nc; (void)nw; (void)parameters; (void)func;};
142 
143  if (outputFile!=par::defaultString) {
144  fout.open(outputFile.c_str());
145  write_func = [&] (const int nc, const int nw, const vector<double> parameters, const double func)
146  {
147  fout << nc << " " << nw << " ";
148  for (int p=0; p<m_npar; p++)
149  fout << parameters[p] << " ";
150  fout << func << endl;
151  };
152  }
153 
154  // set seed for random_numbers
155  random::UniformRandomNumbers_Int seeds(0, 23412432, seed);
156 
157  const int chain_seed = seeds();
158  const int MH_seed = seeds();
159  const int gz_seed = seeds();
160 
161  coutCBL << "Starting seed = " << seed << endl;
162  coutCBL << "Seed for random walker choice = " << chain_seed << endl;
163  coutCBL << "Seed for Metropolis-Hastings algorithm = " << MH_seed << endl;
164  coutCBL << "Seed for random extraction from g(z) = " << gz_seed << endl;
165 
166  random::UniformRandomNumbers_Int chains(0, m_nwalkers-1, chain_seed);
167  random::UniformRandomNumbers MH_random(0., 1., MH_seed);
168  auto gz = m_set_gz(gz_seed, aa);
169 
170  // initialize chains
171  m_initialize_chains(start);
172 
173  for (int n=1; n<m_chain_size; n++) {
174  for (int i=0; i<m_nwalkers; i++)
175  {
176  int kk = i;
177  while (kk==i)
178  kk = chains();
179 
180  vector<double> parameters_i;
181  vector<double> parameters_k;
182 
183  parameters_i = m_chains[n-1][i];
184  parameters_k = m_chains[n-1][kk];
185 
186  vector<double> parameters(m_npar, 0);
187  double proposed_function_chains = par::defaultDouble;
188 
189  double gen_z = gz->operator()();
190 
191  gen_z = gz->operator()();
192  for (int p=0; p<m_npar; p++)
193  parameters[p] = parameters_k[p] + gen_z*(parameters_i[p]-parameters_k[p]);
194  proposed_function_chains = m_function(parameters);
195 
196  parameters_i = parameters;
197 
198  double ratio = min(1.,pow(gen_z, m_npar_free-1)*exp(proposed_function_chains-m_function_chain[n-1][i]));
199 
200  if (MH_random() <ratio) {
201  m_function_chain[n][i] = proposed_function_chains;
202  m_chains[n][i] = parameters_i;
203  m_acceptance[i] += 1./m_chain_size;
204  }
205  else {
206  m_function_chain[n][i] = m_function_chain[n-1][i];
207  m_chains[n][i] = m_chains[n-1][i];
208  }
209 
210  write_func(n, i, m_chains[n][i], m_function_chain[n][i]);
211  }
212 
213  const int progress = int(double((n+1)*m_nwalkers)/(m_nwalkers*m_chain_size)*100);
214  coutCBL << progress << "% \r"; cout.flush();
215  }
216 
217  cout << endl;
218  coutCBL << "Done!" << endl;
219 }
220 
221 
222 // ============================================================================================
223 
224 
225 void cbl::statistics::Sampler::m_sample_stretch_move_parallel_cpp (const int chain_size, const int nwalkers, const vector<vector<double>> start, const int seed, const double aa)
226 {
227  set_chain(m_npar, m_npar_free, chain_size, nwalkers);
228 
229  // set seeds and random_numbers
230  random::UniformRandomNumbers_Int seeds(0, 23412432, seed);
231 
232  vector<double> chains_index = linear_bin_vector(m_nwalkers, 0., m_nwalkers-1.);
233  vector<double> chains_weights(m_nwalkers,1);
234 
235  const int chain_seed1 = seeds();
236  const int chain_seed2 = seeds();
237  const int MH_seed = seeds();
238  const int gz_seed = seeds();
239 
240  coutCBL << "Starting seed = " << seed << endl;
241  coutCBL << "Seed for random walker choice, first half = " << chain_seed1 << endl;
242  coutCBL << "Seed for random walker choice, second half = " << chain_seed2 << endl;
243  coutCBL << "Seed for Metropolis-Hastings algorithm = " << MH_seed << endl;
244  coutCBL << "Seed for random extraction from g(z) = " << gz_seed << endl;
245 
246  int half = m_nwalkers/2;
247  random::UniformRandomNumbers_Int chains_first_half(0, half-1, chain_seed1);
248  random::UniformRandomNumbers_Int chains_second_half(half, m_nwalkers-1, chain_seed2);
249 
250  vector<random::UniformRandomNumbers_Int> chains_sample = {chains_second_half, chains_first_half};
251 
252  random::UniformRandomNumbers MH_random(0., 1., MH_seed);
253 
254  auto gz = m_set_gz(gz_seed, aa);
255 
256  // initialise the chains
257  m_initialize_chains_parallel(start);
258 
259  // stretch-move
260 
261  for (int n=1; n<m_chain_size; n++) {
262  for (int ss=0; ss<2; ss++) {
263 #pragma omp parallel for schedule(dynamic)
264  for (int ii=0; ii<half; ii++)
265  {
266  int nn = n-1+ss;
267  int i = half*ss+ii;
268  int kk = int(chains_sample[ss]());;
269 
270  vector<double> parameters_i= m_chains[n-1][i];
271  vector<double> parameters_k = m_chains[nn][kk];;
272 
273  vector<double> parameters(m_npar, 0);
274  double proposed_function_chains = par::defaultDouble;
275 
276  double gen_z = gz->operator()();
277 
278  gen_z = gz->operator()();
279  for (int p=0; p<m_npar; p++)
280  parameters[p] = parameters_k[p] + gen_z*(parameters_i[p]-parameters_k[p]);
281  proposed_function_chains = m_function(parameters);
282 
283  parameters_i = parameters;
284 
285  double ratio = min(1., pow(gen_z, m_npar_free-1)*exp(proposed_function_chains-m_function_chain[n-1][i]));
286 
287  if (MH_random()<ratio) {
288  m_function_chain[n][i] = proposed_function_chains;
289  m_chains[n][i] = parameters_i;
290  m_acceptance[i] += 1./m_chain_size;
291  }
292  else {
293  m_function_chain[n][i] = m_function_chain[n-1][i];
294  m_chains[n][i] = m_chains[n-1][i];
295  }
296 
297  }
298  }
299 
300  const int progress = int(double((n+1)*m_nwalkers)/(m_nwalkers*m_chain_size)*100);
301  coutCBL << progress << "% \r"; cout.flush();
302  }
303 
304  cout << endl;
305  coutCBL << "Done!" << endl;
306 }
307 
308 
309 // ============================================================================================
310 
311 
312 void cbl::statistics::Sampler::m_sample_stretch_move_parallel_py (const int chain_size, const int nwalkers, const vector<vector<double>> start, const int seed, const double aa)
313 {
314  (void)chain_size; (void)nwalkers; (void)start; (void)seed; (void)aa;
315  cbl::ErrorCBL("", "m_sample_stretch_move_parallel_py", "Sampler.cpp", glob::ExitCode::_workInProgress_);
316 
317 
318  /*
319  set_chain(m_npar, m_npar_free, chain_size, nwalkers);
320 
321  // set seeds and random_numbers
322  random::UniformRandomNumbers seeds(0, 23412432, seed);
323 
324  vector<double> chains_index = linear_bin_vector(m_nwalkers, 0., m_nwalkers-1.);
325  vector<double> chains_weights(m_nwalkers,1);
326 
327  int half = m_nwalkers/2;
328  random::DiscreteRandomNumbers chains_first_half(chains_index, chains_weights, int(seeds()), 0, half-1);
329  random::DiscreteRandomNumbers chains_second_half(chains_index, chains_weights, int(seeds()), half, m_nwalkers-1);
330 
331  vector<random::DiscreteRandomNumbers> chains_sample = {chains_second_half, chains_first_half};
332 
333  random::UniformRandomNumbers MH_random(0., 1., int(seeds()));
334 
335  auto gz = m_set_gz(int(seeds()), aa);
336 
337  // initialize chains
338  m_initialize_chains(start);
339 
340  // stretch-move
341 
342  Py_BEGIN_ALLOW_THREADS
343 
344  for (int n=1; n<m_chain_size; n++) {
345  for (int ss=0; ss<2; ss++) {
346 #pragma omp parallel for schedule(dynamic)
347  for (int ii=0; ii<half; ii++)
348  {
349  PyGILState_STATE gstate = PyGILState_Ensure();
350 
351  int nn = n-1+ss;
352  int i = half*ss+ii;
353  int kk = int(chains_sample[ss]());;
354 
355  vector<double> parameters_i= m_chains[n-1][i];
356  vector<double> parameters_k = m_chains[nn][kk];;
357 
358  double gen_z = gz->operator()();
359 
360  for (int p=0; p<m_npar; p++)
361  parameters_i[p] = parameters_k[p] + gen_z*(parameters_i[p]-parameters_k[p]);
362 
363  double proposed_function_chains = m_function(parameters_i);
364 
365  double ratio = min(1.,pow(gen_z, m_npar-1)*exp(proposed_function_chains-m_function_chain[n-1][i]));
366 
367  if (MH_random() <ratio) {
368  m_function_chain[n][i] = proposed_function_chains;
369  m_chains[n][i] = parameters_i;
370  m_acceptance[i] += 1./m_chain_size;
371  }
372  else{
373  m_function_chain[n][i] = m_function_chain[n-1][i];
374  m_chains[n][i] = m_chains[n-1][i];
375  }
376 
377  PyGILState_Release(gstate);
378 
379  }
380  }
381 
382  const int progress = int(double((n+1)*m_nwalkers)/(m_nwalkers*m_chain_size)*100);
383  coutCBL << progress << "% \r"; cout.flush();
384  }
385 
386  cout << endl;
387 
388  Py_END_ALLOW_THREADS
389  */
390 }
391 
392 
393 // ============================================================================================
394 
395 
396 void cbl::statistics::Sampler::sample_stretch_move_parallel (const int chain_size, const int nwalkers, const vector<vector<double>> start, const int seed, const double aa)
397 {
398  if (nwalkers%2!=0)
399  ErrorCBL("the number of walkers must be an even integer!", "sample_stretch_move_parallel", "Sampler.cpp");
400 
401  if (m_use_python)
402  m_sample_stretch_move_parallel_py(chain_size, nwalkers, start, seed, aa);
403  else
404  m_sample_stretch_move_parallel_cpp(chain_size, nwalkers, start, seed, aa);
405 }
406 
407 
408 // ============================================================================================
409 
410 
411 void cbl::statistics::Sampler::get_chain_function_acceptance (vector<vector<double>> &chains, vector<double> &function, vector<double> &acceptance, const int start, const int thin)
412 {
413  chains.resize(m_npar);
414  function.erase(function.begin(), function.end());
415  acceptance.erase(acceptance.begin(), acceptance.end());
416 
417  for (int i=start; i<m_chain_size; i+=thin) {
418  for (int j=0; j<m_nwalkers; j++) {
419  function.push_back(m_function_chain[i][j]);
420  acceptance.push_back(m_acceptance[j]);
421  for (int k=0; k<m_npar; k++)
422  chains[k].push_back(m_chains[i][j][k]);
423  }
424  }
425 }
426 
427 
428 // ============================================================================================
429 
430 
431 void cbl::statistics::Sampler::write_chain (const string dir_output, const string file, const int start, const int thin)
432 {
433  string file_out = dir_output+file;
434  ofstream fout(file_out.c_str());
435  fout.precision(10);
436 
437  for (int i=start;i<m_chain_size; i+=thin) {
438  for (int j=0; j<m_nwalkers; j++) {
439  fout << i*m_nwalkers+j << " ";
440  for (int k=0; k<m_npar; k++)
441  fout << m_chains[i][j][k] << " ";
442  fout << m_function_chain[i][j] << " " << m_acceptance[j] << endl;
443  }
444  }
445  fout.clear(); fout.close();
446 
447 }
#define coutCBL
CBL print message.
Definition: Kernel.h:734
The class Sampler.
The class DistributionRandomNumbers.
The class UniformRandomNumbers_Int.
The class UniformRandomNumbers.
void write_chain(const std::string dir_output, const std::string file, const int start, const int thin)
write the chains in an output file
Definition: Sampler.cpp:431
void sample_stretch_move(const int chain_size, const int nwalkers, const std::vector< std::vector< double >> start, const int seed=4241, const double aa=2, const std::string outputFile=cbl::par::defaultString)
sample the input function using the stretch-move algorithm on n-dimensional parameter space
Definition: Sampler.cpp:133
void m_sample_stretch_move_parallel_cpp(const int chain_size, const int nwalkers, const std::vector< std::vector< double >> start, const int seed=4241, const double aa=2)
sample the input function using the stretch-move algorithm on n-dimensional parameter space....
Definition: Sampler.cpp:225
void get_chain_function_acceptance(std::vector< std::vector< double >> &chains, std::vector< double > &function, std::vector< double > &acceptance, const int start=0, const int thin=1)
get the chain values and the function
Definition: Sampler.cpp:411
void set_chain(const int npar, const int npar_free, const int chain_size, const int nwalkers)
function to set the chains
Definition: Sampler.cpp:90
void set_function(const std::function< double(std::vector< double > &)> function)
set the function
Definition: Sampler.cpp:112
void sample_stretch_move_parallel(const int chain_size, const int nwalkers, const std::vector< std::vector< double >> start, const int seed=4241, const double aa=2)
sample the input function using the stretch-move algorithm on n-dimensional parameter space - paralle...
Definition: Sampler.cpp:396
std::shared_ptr< random::DistributionRandomNumbers > m_set_gz(const int seed, const double aa=2)
return the random generator for the stretch-move
Definition: Sampler.cpp:43
void m_initialize_chains_parallel(const std::vector< std::vector< double >> start)
initialize chains
Definition: Sampler.cpp:76
void m_initialize_chains(const std::vector< std::vector< double >> start)
initialize chains
Definition: Sampler.cpp:62
void m_sample_stretch_move_parallel_py(const int chain_size, const int nwalkers, const std::vector< std::vector< double >> start, const int seed=4241, const double aa=2)
sample the input function using the stretch-move algorithm on n-dimensional parameter space - paralle...
Definition: Sampler.cpp:312
static const std::string defaultString
default std::string value
Definition: Constants.h:336
static const double defaultDouble
default double value
Definition: Constants.h:348
The global namespace of the CosmoBolognaLib
Definition: CAMB.h:38
std::vector< T > linear_bin_vector(const size_t nn, const T min, const T max)
fill a std::vector with linearly spaced values
Definition: Kernel.h:1604
int ErrorCBL(const std::string msg, const std::string functionCBL, const std::string fileCBL, const cbl::glob::ExitCode exitCode=cbl::glob::ExitCode::_error_)
throw an exception: it is used for handling exceptions inside the CosmoBolognaLib
Definition: Kernel.h:780