CosmoBolognaLib
Free Software C++/Python libraries for cosmological calculations
Likelihood.cpp
Go to the documentation of this file.
1 /********************************************************************
2  * Copyright (C) 2014 by Federico Marulli and Alfonso Veropalumbo *
3  * federico.marulli3@unibo.it *
4  * *
5  * This program is free software; you can redistribute it and/or *
6  * modify it under the terms of the GNU General Public License as *
7  * published by the Free Software Foundation; either version 2 of *
8  * the License, or (at your option) any later version. *
9  * *
10  * This program is distributed in the hope that it will be useful, *
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of *
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13  * GNU General Public License for more details. *
14  * *
15  * You should have received a copy of the GNU General Public *
16  * License along with this program; if not, write to the Free *
17  * Software Foundation, Inc., *
18  * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. *
19  ********************************************************************/
20 
33 #include "LikelihoodParameters.h"
34 #include "Likelihood.h"
35 
36 using namespace std;
37 
38 using namespace cbl;
39 
40 // ============================================================================================
41 
42 
43 void cbl::statistics::Likelihood::m_set_grid_likelihood_1D (const int npoints, const std::vector<std::vector<double>> parameter_limits, const std::string output_file)
44 {
45  vector<double> pp(npoints), log_ll(npoints);
46  double binSize = (parameter_limits[0][1]-parameter_limits[0][0])/(npoints-1);
47  for (int i=0; i<npoints; i++) {
48  pp[i] = parameter_limits[0][0]+binSize*i;
49  vector<double> par = {pp[i]};
50  log_ll[i] = this->log(par);
51  }
52 
53  shared_ptr<statistics::STR_likelihood_inputs> likelihood_inputs = static_pointer_cast<statistics::STR_likelihood_inputs>(m_likelihood_inputs);
54  likelihood_inputs->interp_function1D = make_shared<glob::FuncGrid>(glob::FuncGrid(pp, log_ll, "Spline"));
55 
56  m_log_likelihood_function_grid = LogLikelihood_1D_interpolated;
57  m_likelihood_function_grid = [&] (vector<double> &par, const shared_ptr<void> input) { return exp(m_log_likelihood_function_grid(par, input));};
58 
59  if (output_file!= par::defaultString) {
60  ofstream fout(output_file); checkIO(fout, output_file);
61  for (int i=0; i<npoints; i++)
62  fout << pp[i] << " " << log_ll[i] << endl;
63 
64  fout.close();
65  }
66 }
67 
68 
69 // ============================================================================================
70 
71 
72 void cbl::statistics::Likelihood::m_set_grid_likelihood_2D (const int npoints, const std::vector<std::vector<double>> parameter_limits, const std::string output_file)
73 {
74  vector<double> pp1(npoints), pp2(npoints);
75  vector<vector<double>> log_ll(npoints, vector<double>(npoints, 0));
76  double binSize1 = (parameter_limits[0][1]-parameter_limits[0][0])/(npoints-1);
77  double binSize2 = (parameter_limits[1][1]-parameter_limits[1][0])/(npoints-1);
78  for (int i=0; i<npoints; i++) {
79  pp1[i] = parameter_limits[0][0]+binSize1*i;
80  for (int j=0; j<npoints; j++) {
81  pp2[j] = parameter_limits[1][0]+binSize2*j;
82  vector<double> par = {pp1[i], pp2[j]};
83  log_ll[i][j] = this->log(par);
84  }
85  }
86 
87  shared_ptr<statistics::STR_likelihood_inputs> likelihood_inputs = static_pointer_cast<statistics::STR_likelihood_inputs>(m_likelihood_inputs);
88  likelihood_inputs->interp_function2D = make_shared<glob::FuncGrid2D>(glob::FuncGrid2D(pp1, pp2, log_ll, "Spline"));
89 
90  m_log_likelihood_function_grid = LogLikelihood_2D_interpolated;
91  m_likelihood_function_grid = [&] (vector<double> &par, const shared_ptr<void> input) { return exp(m_log_likelihood_function_grid(par, input));};
92 
93  if (output_file!= par::defaultString) {
94  ofstream fout(output_file); checkIO(fout, output_file);
95  for (int i=0; i<npoints; i++) {
96  for (int j=0; j<npoints; j++)
97  fout << pp1[i] << " " << pp2[j] << " " << log_ll[i][j] << endl;
98  fout << endl;
99  }
100 
101  fout.close();
102  }
103 
104 }
105 
106 
107 // ============================================================================================
108 
109 
110 void cbl::statistics::Likelihood::m_set_grid_likelihood_1D (const std::string input_file)
111 {
112  vector<double> pp, log_ll;
113 
114  read_vector(input_file, pp, log_ll);
115 
116  shared_ptr<statistics::STR_likelihood_inputs> likelihood_inputs = static_pointer_cast<statistics::STR_likelihood_inputs>(m_likelihood_inputs);
117  likelihood_inputs->interp_function1D = make_shared<glob::FuncGrid>(glob::FuncGrid(pp, log_ll, "Spline"));
118 
119  m_log_likelihood_function_grid = LogLikelihood_1D_interpolated;
120  m_likelihood_function_grid = [&] (vector<double> &par, const shared_ptr<void> input) { return exp(m_log_likelihood_function_grid(par, input));};
121 
122 }
123 
124 
125 // ============================================================================================
126 
127 
128 void cbl::statistics::Likelihood::m_set_grid_likelihood_2D (const std::string input_file)
129 {
130  vector<double> pp1, pp2;
131  vector<vector<double>> log_ll;
132 
133  read_matrix(input_file, pp1, pp2, log_ll);
134 
135  shared_ptr<statistics::STR_likelihood_inputs> likelihood_inputs = static_pointer_cast<statistics::STR_likelihood_inputs>(m_likelihood_inputs);
136  likelihood_inputs->interp_function2D = make_shared<glob::FuncGrid2D>(glob::FuncGrid2D(pp1, pp2, log_ll, "Spline"));
137 
138  m_log_likelihood_function_grid = LogLikelihood_2D_interpolated;
139  m_likelihood_function_grid = [&] (vector<double> &par, const shared_ptr<void> input) { return exp(m_log_likelihood_function_grid(par, input));};
140 }
141 
142 
143 // ============================================================================================
144 
145 
146 cbl::statistics::Likelihood::Likelihood (const std::shared_ptr<data::Data> data, const std::shared_ptr<Model> model, const LikelihoodType likelihood_type, const std::vector<size_t> x_index, const int w_index, const std::shared_ptr<ModelParameters> model_parameters, const double prec, const int Nres)
147 {
148  set_data(data);
149  set_model(model, model_parameters);
150  set_function(likelihood_type, x_index, w_index, prec, Nres);
151 }
152 
153 
154 // ============================================================================================
155 
156 
157 cbl::statistics::Likelihood::Likelihood (const std::shared_ptr<data::Data> data, const std::shared_ptr<Model> model, const Likelihood_function log_likelihood_function, const std::shared_ptr<ModelParameters> model_parameters)
158 {
159  set_data(data);
160  set_model(model, model_parameters);
161  set_log_function(log_likelihood_function);
162 }
163 
164 
165 // ============================================================================================
166 
167 
168 double cbl::statistics::Likelihood::operator () (std::vector<double> &pp) const
169 {
170  return exp(this->log(pp));
171 }
172 
173 
174 // ============================================================================================
175 
176 
177 double cbl::statistics::Likelihood::log (std::vector<double> &parameter) const
178 {
179  return (m_use_grid) ? m_log_likelihood_function_grid(parameter, m_likelihood_inputs) : m_log_likelihood_function(parameter, m_likelihood_inputs);
180 }
181 
182 
183 // ============================================================================================
184 
185 
186 void cbl::statistics::Likelihood::set_data (const shared_ptr<data::Data> data)
187 {
188  m_data = data;
189 }
190 
191 
192 // ============================================================================================
193 
194 
195 void cbl::statistics::Likelihood::set_model (const shared_ptr<Model> model, const shared_ptr<ModelParameters> model_parameters)
196 {
197  switch (model->dimension()) {
198  case Dim::_1D_:
199  m_model = make_shared<Model1D>(*static_pointer_cast<Model1D>(model));
200  break;
201  case Dim::_2D_:
202  m_model = make_shared<Model2D>(*static_pointer_cast<Model2D>(model));
203  break;
204  default:
205  ErrorCBL("the model dimension must be Dim::_1D_ or Dim::_2D_ !", "set_model", "Likelihood.cpp");
206  }
207 
208  m_model_parameters = (model_parameters == NULL) ? make_shared<LikelihoodParameters>(LikelihoodParameters(m_model->parameters()->nparameters(), m_model->parameters()->type(), m_model->parameters()->name())) : model_parameters;
209  m_model->set_parameters(m_model_parameters);
210 }
211 
212 
213 // ============================================================================================
214 
215 
217 {
218  m_use_grid = false;
219 }
220 
221 
222 // ============================================================================================
223 
224 
225 void cbl::statistics::Likelihood::set_grid (const int npoints, const std::vector<std::vector<double>> parameter_limits, const string file, const bool read)
226 {
227  if (m_likelihood_type==statistics::LikelihoodType::_NotSet_)
228  ErrorCBL("the Likelihood function is not set!", "set_grid", "Likelihood.cpp");
229 
230  unsigned int npar = m_model->parameters()->nparameters_free();
231 
232  if (npar==0)
233  ErrorCBL("there is no parameter free to vary!", "set_grid", "Likelihood.cpp");
234  if (npar>2)
235  ErrorCBL("wrong size for the vector of starting parameters!", "set_grid", "Likelihood.cpp");
236  if (parameter_limits.size()!=npar)
237  ErrorCBL("wrong size for the vector of parameter limits!", "set_grid", "Likelihood.cpp");
238 
239  coutCBL << "Computing tabulated likelihood!" << endl;
240 
241  if (npar==1) {
242  if (read)
243  m_set_grid_likelihood_1D(file);
244  else
245  m_set_grid_likelihood_1D(npoints, parameter_limits, file);
246  }
247  else if (npar==2) {
248  if (read)
249  m_set_grid_likelihood_2D(file);
250  else
251  m_set_grid_likelihood_2D(npoints, parameter_limits, file);
252  }
253 
254  m_use_grid = true;
255  coutCBL << "Done!" << endl;
256 }
257 
258 
259 // ============================================================================================
260 
261 
262 void cbl::statistics::Likelihood::set_function (const LikelihoodType likelihood_type, const std::vector<size_t> x_index, const int w_index, const double prec, const int Nres)
263 {
264  m_x_index = x_index;
265  m_w_index = w_index;
266  m_likelihood_type = likelihood_type;
267 
268  if (m_data->dataType()==data::DataType::_1D_ || m_data->dataType()==data::DataType::_1D_extra_) {
269 
270  switch (m_likelihood_type)
271  {
272 
273  case (LikelihoodType::_Gaussian_Error_):
274  for (int i=0; i<m_data->ndata(); i++) {
275  if (std::isinf(pow(1./m_data->error(i), 2)))
276  ErrorCBL("error("+conv(i, par::fINT)+") is too small, and 1/error("+conv(i, par::fINT)+")^2 = inf!", "set_function", "Likelihood.cpp");
277  else
278  continue;
279  }
280 
281  m_log_likelihood_function = &LogLikelihood_Gaussian_1D_error;
282  break;
283 
284  case (LikelihoodType::_Gaussian_Covariance_):
285  m_data->invert_covariance(prec, Nres);
286  m_log_likelihood_function = &LogLikelihood_Gaussian_1D_covariance;
287  break;
288 
289  case (LikelihoodType::_Poissonian_):
290  m_log_likelihood_function = &LogLikelihood_Poissonian_1D_;
291  break;
292 
293  default:
294  ErrorCBL("type of likelihood not recognized or not yet implemented!", "set_function", "Likelihood.cpp");
295  break;
296  }
297  }
298 
299  else if (m_data->dataType()==data::DataType::_2D_ || m_data->dataType()==data::DataType::_2D_extra_) {
300  switch (m_likelihood_type)
301  {
302 
303  case (LikelihoodType::_Gaussian_Error_):
304  for (int i=0; i<m_data->xsize(); i++) {
305  for (int j=0; j<m_data->ysize(); j++) {
306  if (std::isinf(pow(1./m_data->error(i, j), 2)))
307  ErrorCBL("error("+conv(i, par::fINT)+", "+conv(j, par::fINT)+") is too small, and 1/error("+conv(i, par::fINT)+", "+conv(j, par::fINT)+")^2 = inf!", "set_function", "Likelihood.cpp");
308  else
309  continue;
310  }
311  }
312  m_log_likelihood_function = &LogLikelihood_Gaussian_2D_error;
313  break;
314 
315  case (LikelihoodType::_Poissonian_):
316  m_log_likelihood_function = &LogLikelihood_Poissonian_2D_;
317  break;
318 
319  default:
320  ErrorCBL("type of likelihood not recognized or not yet implemented!", "set_function", "Likelihood.cpp");
321  break;
322  }
323  }
324  else
325  ErrorCBL("data type not recognized or not yet implemented!", "set_function", "Likelihood.cpp");
326 
327  m_likelihood_function = [&] (vector<double> &par, const shared_ptr<void> input) { return exp(m_log_likelihood_function(par, input)); };
328 }
329 
330 
331 // ============================================================================================
332 
333 
335 {
336  m_likelihood_type = LikelihoodType::_UserDefined_;
337  m_likelihood_function = likelihood_function;
338  m_log_likelihood_function = [&] (vector<double> &par, const shared_ptr<void> input) { return std::log(m_likelihood_function(par, input)); };
339 }
340 
341 
342 // ============================================================================================
343 
344 
346 {
347  m_likelihood_type = LikelihoodType::_UserDefined_;
348  m_log_likelihood_function = log_likelihood_function;
349  m_likelihood_function = [&] (vector<double> &par, const shared_ptr<void> input) { return exp(m_log_likelihood_function(par, input)); };
350 }
351 
352 
353 // ============================================================================================
354 
355 
356 void cbl::statistics::Likelihood::maximize (const std::vector<double> start, const std::vector<std::vector<double>> parameter_limits, const unsigned int max_iter, const double tol, const double epsilon)
357 {
358  if (m_likelihood_type==statistics::LikelihoodType::_NotSet_)
359  ErrorCBL("the Likelihood function is not set!", "maximize", "Likelihood.cpp");
360 
361  vector<double> starting_par;
362  vector<vector<double>> limits_par;
363 
364  unsigned int npar_free = m_model->parameters()->nparameters_free();
365  unsigned int npar = m_model->parameters()->nparameters();
366 
367  if (start.size()==npar_free && parameter_limits.size()==npar_free)
368  {
369  starting_par = start;
370  limits_par = parameter_limits;
371  }
372  else if (start.size()==npar && parameter_limits.size()==npar)
373  {
374  for (size_t i=0; i<npar_free; i++) {
375  starting_par.push_back(start[m_model->parameters()->free_parameter()[i]]);
376  limits_par.push_back(parameter_limits[m_model->parameters()->free_parameter()[i]]);
377  }
378  }
379  else
380  ErrorCBL("check your inputs!", "maximize", "Likelihood.cpp");
381 
382  m_likelihood_inputs = make_shared<STR_likelihood_inputs>(STR_likelihood_inputs(m_data, m_model, m_x_index, m_w_index));
383 
384  function<double(vector<double> &)> ll = [this] (vector<double> &pp)
385  {
386  return -m_log_likelihood_function(pp, m_likelihood_inputs);
387  };
388 
389  coutCBL << "Maximizing the likelihood..." << endl;
390  vector<double> result = cbl::wrapper::gsl::GSL_minimize_nD(ll, starting_par, limits_par, max_iter, tol, epsilon);
391  coutCBL << "Done!" << endl << endl;
392 
393  m_model_parameters->set_bestfit_values(result);
394  m_model_parameters->write_bestfit_info();
395  coutCBL << "log(Likelihood) = " << this->log(result) << endl << endl;
396 }
397 
398 
399 // ============================================================================================
400 
401 
402 void cbl::statistics::Likelihood::write_results (const string dir_output, const string file)
403 {
404  coutCBL << "Writing results of Likelihood minimization on " << dir_output+file << endl;
405  vector<double> bestFitValue = m_model_parameters->bestfit_value();
406  string name = LikelihoodTypeNames ()[static_cast<int>(m_likelihood_type)];
407  double likelihoodValue = this->log(bestFitValue);
408 
409  string mkdir = "mkdir -p "+dir_output;
410  if (system(mkdir.c_str())) {}
411 
412  ofstream fout(dir_output+file);
413 
414  fout << "#Parameters information" << endl;
415  fout << "nParameters = " << bestFitValue.size() << endl;
416 
417  for (size_t i=0; i<bestFitValue.size(); i++) {
418  fout << "par" << i+1 << "_name = " << m_model_parameters->name(i) << endl;
419  fout << "par" << i+1 << "_status = " << m_model_parameters->status(i) << endl;
420  fout << "par" << i+1 << "_bestfit_value = " << bestFitValue[i] << endl;
421  }
422 
423  fout << "#Likelihood information" << endl;
424  fout << "likelihoodType = " << name << endl;
425  fout << "logLikelihoodValue = " << likelihoodValue << endl;
426 
427  fout.clear(); fout.close();
428  coutCBL << "I wrote the file " << dir_output+file << endl;
429 }
430 
431 
432 // ============================================================================================
433 
434 
435 void cbl::statistics::Likelihood::write_model (const string output_dir, const string output_file, const std::vector<double> parameters, const std::vector<double> xx, const std::vector<double> yy)
436 {
437  switch (m_model->dimension()) {
438 
439  case Dim::_1D_:
440  {
441  vector<double> xvec = xx;
442  if (xx.size()==0)
443  xvec = m_data->xx();
444 
445  m_model->write(output_dir, output_file, xvec, parameters);
446  }
447  break;
448 
449  case Dim::_2D_:
450  {
451  vector<double> xvec = xx, yvec = yy;
452  if (xx.size()==0)
453  xvec = m_data->xx();
454  if (yy.size()==0)
455  yvec = m_data->yy();
456 
457  m_model->write(output_dir, output_file, xvec, yvec, parameters);
458  }
459  break;
460 
461  default:
462  ErrorCBL("the input dimension must be Dim::_1D_ or Dim::_2D_!", "write_model", "Likelihood.cpp");
463  }
464 }
#define coutCBL
CBL print message.
Definition: Kernel.h:734
The class LikelihoodParameters.
The class Likelihood.
The class FuncGrid2D.
Definition: FuncGrid.h:302
The class FuncGrid.
Definition: FuncGrid.h:55
The class LikelihoodParameters.
void write_model(const std::string output_dir, const std::string output_file, const std::vector< double > parameters, const std::vector< double > xx={}, const std::vector< double > yy={})
write the model at xx at bestfit
Definition: Likelihood.cpp:435
void write_results(const std::string dir_output, const std::string file)
write best-fit results on a file
Definition: Likelihood.cpp:402
void set_model(std::shared_ptr< Model > model=NULL, const std::shared_ptr< ModelParameters > model_parameters=NULL)
set the model for the likelihood analysis
Definition: Likelihood.cpp:195
void maximize(const std::vector< double > start, const std::vector< std::vector< double >> parameter_limits, const unsigned int max_iter=10000, const double tol=1.e-6, const double epsilon=1.e-3)
function that maximizes the likelihood, finds the best-fit parameters and stores them in the model
Definition: Likelihood.cpp:356
void m_set_grid_likelihood_1D(const int npoints, const std::vector< std::vector< double >> parameter_limits, const std::string output_file)
set the likelihood grid and write the grid on a file
Definition: Likelihood.cpp:43
double log(std::vector< double > &parameter) const
evaluate the logarithm of the likelihood for the input parameters
Definition: Likelihood.cpp:177
void set_grid(const int npoints, const std::vector< std::vector< double >> parameter_limits, const std::string file, const bool read=false)
set the likelihood function as a grid, to speed up computation: this only works for one or two free p...
Definition: Likelihood.cpp:225
double operator()(std::vector< double > &pp) const
evaluate the likelihood
Definition: Likelihood.cpp:168
void unset_grid()
set the likelihood function with internal values of LikelihoodType
Definition: Likelihood.cpp:216
void set_log_function(const LogLikelihood_function loglikelihood_function)
set the natural logarithm of the likelihood function
Definition: Likelihood.cpp:345
void m_set_grid_likelihood_2D(const int npoints, const std::vector< std::vector< double >> parameter_limits, const std::string output_file)
set the likelihood grid and write the grid on a file
Definition: Likelihood.cpp:72
void set_function(const LikelihoodType likelihood_type, const std::vector< size_t > x_index={0, 2}, const int w_index=-1, const double prec=1.e-10, const int Nres=-1)
set the likelihood type using the LikelihoodType object
Definition: Likelihood.cpp:262
void set_data(std::shared_ptr< data::Data > data)
set the data for the likelihood analysis
Definition: Likelihood.cpp:186
Likelihood()
default constructor
Definition: Likelihood.h:160
static const char fINT[]
conversion symbol for: int -> std::string
Definition: Constants.h:121
static const std::string defaultString
default std::string value
Definition: Constants.h:336
double LogLikelihood_Gaussian_1D_covariance(std::vector< double > &likelihood_parameter, const std::shared_ptr< void > input)
function to compute the gaussian loglikelihood
double LogLikelihood_Gaussian_1D_error(std::vector< double > &likelihood_parameter, const std::shared_ptr< void > input)
function to compute the Gaussian log-likelihood
double LogLikelihood_Poissonian_1D_(std::vector< double > &likelihood_parameter, const std::shared_ptr< void > input)
function to compute the poissonian loglikelihood
LikelihoodType
the type of likelihood function
std::vector< std::string > LikelihoodTypeNames()
return a vector containing the LikelihoodType names
double LogLikelihood_1D_interpolated(std::vector< double > &likelihood_parameter, const std::shared_ptr< void > input)
function to compute the loglikelihood on a grid
std::function< double(std::vector< double > &, const std::shared_ptr< void >)> Likelihood_function
definition of a function for computation of the Likelihood
double LogLikelihood_Poissonian_2D_(std::vector< double > &likelihood_parameter, const std::shared_ptr< void > input)
function to compute the poissonian loglikelihood
double LogLikelihood_2D_interpolated(std::vector< double > &likelihood_parameter, const std::shared_ptr< void > input)
function to compute the loglikelihood on a grid
double LogLikelihood_Gaussian_2D_error(std::vector< double > &likelihood_parameter, const std::shared_ptr< void > input)
function to compute the gaussian loglikelihood model with one parameter
std::vector< double > GSL_minimize_nD(FunctionDoubleVector func, const std::vector< double > start, const std::vector< std::vector< double >> ranges, const unsigned int max_iter=1000, const double tol=1.e-6, const double epsilon=0.1)
minimize the provided function using GSL procedure
Definition: GSLwrapper.cpp:497
The global namespace of the CosmoBolognaLib
Definition: CAMB.h:38
std::string conv(const T val, const char *fact)
convert a number to a std::string
Definition: Kernel.h:903
void read_matrix(const std::string file_matrix, std::vector< double > &xx, std::vector< double > &yy, std::vector< std::vector< double >> &matrix, const std::vector< int > col={})
read a matrix from file
Definition: Func.cpp:612
void checkIO(const std::ifstream &fin, const std::string file="NULL")
check if an input file can be opened
Definition: Kernel.cpp:160
int ErrorCBL(const std::string msg, const std::string functionCBL, const std::string fileCBL, const cbl::glob::ExitCode exitCode=cbl::glob::ExitCode::_error_)
throw an exception: it is used for handling exceptions inside the CosmoBolognaLib
Definition: Kernel.h:780
void read_vector(const std::string file_vector, std::vector< double > &xx, std::vector< double > &vector, const std::vector< int > col={})
read a vector from file
Definition: Func.cpp:582
the struct STR_likelihood_inputs