CosmoBolognaLib
Free Software C++/Python libraries for cosmological calculations
NumberCounts1D.cpp
Go to the documentation of this file.
1 /********************************************************************
2  * Copyright (C) 2015 by Federico Marulli *
3  * federico.marulli3@unibo.it *
4  * *
5  * This program is free software; you can redistribute it and/or *
6  * modify it under the terms of the GNU General Public License as *
7  * published by the Free Software Foundation; either version 2 of *
8  * the License, or (at your option) any later version. *
9  * *
10  * This program is distributed in the hope that it will be useful, *
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of *
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13  * GNU General Public License for more details. *
14  * *
15  * You should have received a copy of the GNU General Public *
16  * License along with this program; if not, write to the Free *
17  * Software Foundation, Inc., *
18  * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. *
19  ********************************************************************/
20 
34 #include "NumberCounts1D.h"
35 #include "Data1D.h"
36 
37 using namespace std;
38 
39 using namespace cbl;
40 using namespace catalogue;
41 using namespace measure::numbercounts;
42 
43 
44 // ============================================================================
45 
46 
47 cbl::measure::numbercounts::NumberCounts1D::NumberCounts1D(const catalogue::Var var, const BinType bin_type, const catalogue::Catalogue data, const size_t nbins, const double minVar, const double maxVar, const double shift, const glob::HistogramType hist_type, const double fact)
48 {
49  if (bin_type!=cbl::BinType::_linear_ && bin_type!=cbl::BinType::_logarithmic_)
50  cbl::WarningMsgCBL("This constructor allows only a linear or logarithmic binning", "NumberCounts1D", "NumberCounts1D.cpp");
51 
52  m_Var = var;
53 
54  m_HistogramType = hist_type;
55  m_fact = fact;
56 
57  set_data(data);
58 
59  m_histogram = make_shared<glob::Histogram1D> (glob::Histogram1D ());
60 
61  double _minVar = (minVar>par::defaultDouble) ? minVar : m_data->Min(m_Var)*0.999;
62  double _maxVar = (maxVar>par::defaultDouble) ? maxVar : m_data->Max(m_Var)*1.001;
63 
64  m_histogram->set(nbins, _minVar, _maxVar, shift, bin_type);
65 }
66 
67 
68 
69 // ============================================================================
70 
71 
72 cbl::measure::numbercounts::NumberCounts1D::NumberCounts1D (const catalogue::Var var, const std::vector<double> vec_edges, const catalogue::Catalogue data, const glob::HistogramType hist_type, const double fact)
73 {
74  m_Var = var;
75 
76  m_HistogramType = hist_type;
77  m_fact = fact;
78 
79  set_data(data);
80 
81  m_histogram = make_shared<glob::Histogram1D> (glob::Histogram1D ());
82 
83  double _minVar = (vec_edges[0]>par::defaultDouble) ? vec_edges[0] : m_data->Min(m_Var)*0.999;
84  double _maxVar = (vec_edges[vec_edges.size()-1]>par::defaultDouble) ? vec_edges[vec_edges.size()-1] : m_data->Max(m_Var)*1.001;
85 
86  const size_t nbins = vec_edges.size()-1;
87  const double shift = 0.5;
88 
89  m_histogram->set(nbins, _minVar, _maxVar, shift, cbl::BinType::_custom_, vec_edges);
90 }
91 
92 
93 // ============================================================================
94 
95 
97 {
98  auto histogram = make_shared<glob::Histogram1D> (glob::Histogram1D ());
99  histogram->set(m_histogram->nbins(), m_histogram->minVar(), m_histogram->maxVar(), m_histogram->shift(), m_histogram->bin_type(), m_histogram->edges());
100 
101  histogram->put(m_data->var(m_Var), m_data->var(catalogue::Var::_Weight_));
102 
103  m_histogram = histogram;
104 
105  vector<double> bins = m_histogram->bins();
106  vector<double> edges = m_histogram->edges();
107  vector<double> hist = m_histogram->operator()(m_HistogramType, m_fact);
108  vector<double> error = m_histogram->error(m_HistogramType, m_fact);
109  vector<double> counts;
110  vector<double> counts_error;
111  vector<double> normalization;
112 
113  for (size_t i=0; i<m_histogram->nbins(); i++) {
114  counts.push_back(m_histogram->unweighted_counts(i));
115  counts_error.push_back(sqrt(static_cast<double>(counts[i])));
116  normalization.push_back(m_histogram->normalization(i, m_HistogramType, m_fact));
117  }
118 
119  vector<vector<double>> extra_info (7);
120  extra_info[0] = slice(edges, 0, m_histogram->nbins());
121  extra_info[1] = slice(edges, 1, m_histogram->nbins()+1);
122  extra_info[2] = counts;
123  extra_info[3] = counts_error;
124  extra_info[4] = normalization;
125  extra_info[5] = m_histogram->averaged_bins();
126  extra_info[6] = m_histogram->error_bins();
127 
128  std::vector<double> edges_in_mem_ = extra_info[0]; edges_in_mem_.emplace_back(extra_info[1][extra_info[1].size()-1]);
129  const std::vector<double> edges_in_mem = edges_in_mem_;
130  auto dataset = make_shared<data::Data1D_extra> (data::Data1D_extra(bins, hist, error, extra_info, edges_in_mem));
131 
132  return dataset;
133 }
134 
135 
136 // ============================================================================
137 
138 
139 shared_ptr<data::Data> cbl::measure::numbercounts::NumberCounts1D::m_measureJackknife (const string dir_output_resample)
140 {
141  m_dataset = m_measurePoisson();
142 
143  const int nRegions = m_data->nRegions();
144  vector<shared_ptr<glob::Histogram>> histo_JK(nRegions);
145 
146  for (int i=0; i<nRegions; i++){
147  histo_JK[i] = make_shared<glob::Histogram1D>(glob::Histogram1D());
148  histo_JK[i]->set(m_histogram->nbins(), m_histogram->minVar(), m_histogram->maxVar(), m_histogram->shift(), m_histogram->bin_type(), m_histogram->edges());
149  }
150 
151  vector<double> regions = m_data->var(catalogue::Var::_Region_);
152  vector<double> var = m_data->var(m_Var);
153  vector<double> weight = m_data->var(catalogue::Var::_Weight_);
154 
155  for (size_t i=0; i<var.size(); i++) {
156  int bin = m_histogram->digitize(var[i]);
157  for (int j=0; j<nRegions; j++)
158  if (j!=regions[i] and bin>-1)
159  histo_JK[j]->put(bin, weight[i], var[i]);
160  }
161 
162  //Write jackknife outputs
163 
164  if (dir_output_resample!=par::defaultString)
165  for (int i=0; i<nRegions; i++)
166  histo_JK[i]->write(dir_output_resample, "Jackknife_"+conv(i+1, par::fINT)+".dat", m_HistogramType, m_fact);
167 
168  vector<vector<double>> measures(histo_JK.size(), vector<double>(histo_JK[0]->nbins()));
169  vector<vector<double>> covariance;
170 
171  for (size_t i=0; i<histo_JK.size(); i++)
172  for (size_t j=0; j<histo_JK[i]->nbins(); j++)
173  measures[i][j] = histo_JK[i]->operator()(j, m_HistogramType, m_fact);
174 
175  covariance_matrix (measures, covariance, true);
176 
177  m_dataset->set_covariance(covariance);
178 
179  coutCBL << "Done!"<<endl;
180 
181  return m_dataset;
182 }
183 
184 // ============================================================================
185 
186 
187 shared_ptr<data::Data> cbl::measure::numbercounts::NumberCounts1D::m_measureBootstrap (const string dir_output_resample, const int nResamplings, const int seed)
188 {
189  m_dataset = m_measurePoisson();
190 
191  const int nRegions = m_data->nRegions();
192  vector<shared_ptr<glob::Histogram>> histo_BS(nResamplings);
193 
194  random::UniformRandomNumbers_Int ran(0., nRegions-1, seed);
195  vector<vector<int>> region_weights (nResamplings, vector<int> (nRegions, 0));
196 
197  for (int i=0; i<nResamplings; i++) {
198  histo_BS[i] = make_shared<glob::Histogram1D>(glob::Histogram1D());
199  histo_BS[i]->set(m_histogram->nbins(), m_histogram->minVar(), m_histogram->maxVar(), m_histogram->shift(), m_histogram->bin_type(), m_histogram->edges());
200  for (int n=0; n<nRegions; n++)
201  region_weights[i][ran()] ++;
202  }
203 
204  vector<double> regions = m_data->var(catalogue::Var::_Region_);
205  vector<double> var = m_data->var(m_Var);
206  vector<double> weight = m_data->var(catalogue::Var::_Weight_);
207 
208  for (size_t i=0; i<m_data->nObjects(); i++) {
209  int bin = m_histogram->digitize(var[i]);
210  if (bin>-1)
211  for (int j=0; j<nResamplings; j++)
212  histo_BS[j]->put(bin, weight[i]*region_weights[j][regions[i]], var[i]);
213  }
214 
215  vector<vector<double>> measures(histo_BS.size(), vector<double>(histo_BS[0]->nbins()));
216  vector<vector<double>> covariance;
217 
218  for (size_t i=0; i<histo_BS.size(); i++)
219  for (size_t j=0; j<histo_BS[i]->nbins(); j++)
220  measures[i][j] = histo_BS[i]->operator()(j, m_HistogramType, m_fact);
221 
222  covariance_matrix (measures, covariance, false);
223 
224  m_dataset->set_covariance(covariance);
225 
226  //Write bootstrap outputs
227 
228  if (dir_output_resample!=par::defaultString)
229  for (int i=0; i<nResamplings; i++)
230  histo_BS[i]->write(dir_output_resample, "Bootstrap_"+conv(i+1, par::fINT)+".dat", m_HistogramType, m_fact);
231 
232  return m_dataset;
233 }
234 
235 
236 // ============================================================================
237 
238 
239 void cbl::measure::numbercounts::NumberCounts1D::measure (const ErrorType errorType, const string dir_output_resample, const int nResamplings, const int seed, const bool conv, const double sigma)
240 {
241  switch (errorType) {
242  case (ErrorType::_Poisson_) :
243  m_dataset = m_measurePoisson();
244  break;
245  case (ErrorType::_Jackknife_) :
246  m_dataset = m_measureJackknife(dir_output_resample);
247  break;
248  case (ErrorType::_Bootstrap_) :
249  m_dataset = m_measureBootstrap(dir_output_resample, nResamplings, seed);
250  break;
251  default:
252  ErrorCBL("The input ErrorType is not allowed!", "measure", "NumberCounts1D.cpp");
253  }
254 
255  if (conv) m_dataset = Gaussian_smoothing(sigma);
256 }
257 
258 // ============================================================================
259 
260 
261 void cbl::measure::numbercounts::NumberCounts1D::compute_covariance (const vector<shared_ptr<glob::Histogram>> histo, const bool JK)
262 {
263  vector<vector<double>> measures(histo.size(), vector<double>(histo[0]->nbins()));
264  vector<vector<double>> cov;
265 
266  for (size_t i=0; i<histo.size(); i++)
267  for (size_t j=0; j<histo[i]->nbins(); j++)
268  measures[i][j] = histo[i]->operator()(j, m_HistogramType, m_fact);
269 
270  covariance_matrix (measures, cov, JK);
271  m_dataset->set_covariance(cov);
272 }
273 
274 
275 // ============================================================================
276 
277 
278 void cbl::measure::numbercounts::NumberCounts1D::write (const string dir, const string file, const int rank) const
279 {
280  (void)rank;
281 
282  string mkdir = "mkdir -p "+dir;
283  if (system(mkdir.c_str())) {}
284 
285  string header = "bin_center histogram error lower_edge upper_edge unweighted_counts unweighted_counts_error hist_normalization ave_bin err_bin";
286  m_dataset->write(dir, file, header, 4, 8, rank);
287 }
288 
289 
290 // ============================================================================
291 
292 
293 void cbl::measure::numbercounts::NumberCounts1D::write_covariance (const string dir, const string file) const
294 {
295  string mkdir = "mkdir -p "+dir;
296  if (system(mkdir.c_str())) {}
297 
298  m_dataset->write_covariance(dir, file, 8);
299 }
300 
301 
302 // ============================================================================
303 
304 
305 shared_ptr<data::Data> cbl::measure::numbercounts::NumberCounts1D::Gaussian_smoothing (const double sigma)
306 {
307  coutCBL << "The distribution is smoothed with a Gaussian filter" << endl;
308  double *func;
309  fftw_complex *func_tr;
310 
311  if (m_histogram->bin_type()!=BinType::_linear_) ErrorCBL("", "Gaussian_smoothing", "NumberCounts1D.cpp", glob::ExitCode::_workInProgress_);
312 
313  auto histogram = make_shared<glob::Histogram1D> (glob::Histogram1D ());
314  histogram->set(m_histogram->nbins(), m_histogram->minVar(), m_histogram->maxVar(), m_histogram->shift(), m_histogram->bin_type());
315 
316  int nbin = m_histogram->nbins();
317  int nbinN = 2*nbin;
318  int i1 = nbin*0.5, i2 = 1.5*nbin;
319 
320  int nbinK = 0.5*nbinN+1;
321 
322  func = fftw_alloc_real(nbinN);
323  func_tr = fftw_alloc_complex(nbinK);
324 
325  for (int i=0; i<nbinN; i++)
326  func[i] = 0;
327 
328  for (int i=i1; i<i2; i++)
329  func[i] = m_histogram->operator()(m_HistogramType, m_fact)[i-i1];
330 
331  for (int i=0; i<nbinK; i++) {
332  func_tr[i][0] = 0;
333  func_tr[i][1] = 0;
334  }
335 
336  fftw_plan real2complex;
337  real2complex = fftw_plan_dft_r2c_1d(nbinN, func, func_tr, FFTW_ESTIMATE);
338  fftw_execute(real2complex);
339  fftw_destroy_plan(real2complex);
340 
341  double delta = (m_histogram->maxVar()-m_histogram->minVar())/nbin;
342  double SS = pow(sigma,2);
343 
344  double fact = 2*par::pi/(nbinN*delta);
345  for (int i=0; i<nbinK; i++) {
346  double kk = i*fact;
347  func_tr[i][0] = func_tr[i][0]*exp(-0.5*kk*kk*SS);
348  func_tr[i][1] = func_tr[i][1]*exp(-0.5*kk*kk*SS);
349  }
350 
351  fftw_plan complex2real;
352  complex2real = fftw_plan_dft_c2r_1d(nbinN, func_tr, func, FFTW_ESTIMATE);
353  fftw_execute(complex2real);
354  fftw_destroy_plan(complex2real);
355 
356  vector<double> new_hist;
357 
358  for (int i=0; i<i2-i1; i++) new_hist.push_back(func[i+i1]/nbinN);
359 
360  auto dataset = make_shared<data::Data1D_extra> (data::Data1D_extra(m_histogram->bins(), new_hist, m_dataset->error(), m_dataset->extra_info()));
361 
362  //gsl_histogram_free(m_histogram);
363  fftw_cleanup();
364 
365  return dataset;
366 
367 }
The class Data1D.
#define coutCBL
CBL print message.
Definition: Kernel.h:734
The class NumberCounts1D.
The class Catalogue.
Definition: Catalogue.h:654
The class Data1D_extra.
Definition: Data1D_extra.h:52
void write(const std::string dir=par::defaultString, const std::string file=par::defaultString, const int rank=0) const override
write the measured number counts
std::shared_ptr< data::Data > m_measureJackknife(const std::string dir_output_resample=par::defaultString) override
measure the number counts with Jackknife covariance matrix
std::shared_ptr< data::Data > Gaussian_smoothing(const double sigma) override
apply a Gaussian filter to the distribution
void measure(const ErrorType errorType=ErrorType::_Poisson_, const std::string dir_output_resample=par::defaultString, const int nResamplings=0, const int seed=3213, const bool conv=false, const double sigma=0.) override
measure the number counts
void write_covariance(const std::string dir, const std::string file) const override
write the measured covariance matrix
void compute_covariance(const std::vector< std::shared_ptr< glob::Histogram >> hist, const bool JK) override
compute the covariance matrix
std::shared_ptr< data::Data > m_measureBootstrap(const std::string dir_output_resample=par::defaultString, const int nResamplings=0, const int seed=3213) override
measure the number counts with Bootstrap covariance matrix
std::shared_ptr< data::Data > m_measurePoisson() override
measure the number counts with Poisson errors
The class UniformRandomNumbers_Int.
static const char fINT[]
conversion symbol for: int -> std::string
Definition: Constants.h:121
static const std::string defaultString
default std::string value
Definition: Constants.h:336
static const double defaultDouble
default double value
Definition: Constants.h:348
static const double pi
: the ratio of a circle's circumference to its diameter
Definition: Constants.h:199
Var
the catalogue variables
Definition: Catalogue.h:70
HistogramType
the histogram type
Definition: Histogram.h:49
ErrorType
the two-point correlation function error type
Definition: Measure.h:59
The global namespace of the CosmoBolognaLib
Definition: CAMB.h:38
std::string conv(const T val, const char *fact)
convert a number to a std::string
Definition: Kernel.h:903
std::vector< T > slice(const std::vector< T > v, const int start=0, const int end=-1)
slice a std::vector from start to stop
Definition: Kernel.h:1947
int ErrorCBL(const std::string msg, const std::string functionCBL, const std::string fileCBL, const cbl::glob::ExitCode exitCode=cbl::glob::ExitCode::_error_)
throw an exception: it is used for handling exceptions inside the CosmoBolognaLib
Definition: Kernel.h:780
void covariance_matrix(const std::vector< std::vector< double >> mat, std::vector< std::vector< double >> &cov, const bool JK=false)
compute the covariance matrix from an input dataset
Definition: Func.cpp:761
BinType
the binning type
Definition: Kernel.h:505
@ _logarithmic_
logarithmic binning
@ _custom_
custom binning
@ _linear_
linear binning
void WarningMsgCBL(const std::string msg, const std::string functionCBL, const std::string fileCBL)
internal CBL warning message
Definition: Kernel.h:747