CosmoBolognaLib
Free Software C++/Python libraries for cosmological calculations
NumberCounts2D.cpp
Go to the documentation of this file.
1 /********************************************************************
2  * Copyright (C) 2015 by Federico Marulli *
3  * federico.marulli3@unibo.it *
4  * *
5  * This program is free software; you can redistribute it and/or *
6  * modify it under the terms of the GNU General Public License as *
7  * published by the Free Software Foundation; either version 2 of *
8  * the License, or (at your option) any later version. *
9  * *
10  * This program is distributed in the hope that it will be useful, *
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of *
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13  * GNU General Public License for more details. *
14  * *
15  * You should have received a copy of the GNU General Public *
16  * License along with this program; if not, write to the Free *
17  * Software Foundation, Inc., *
18  * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. *
19  ********************************************************************/
20 
34 #include "NumberCounts2D.h"
35 #include "Data2D_extra.h"
36 
37 using namespace std;
38 
39 using namespace cbl;
40 using namespace catalogue;
41 using namespace measure::numbercounts;
42 
43 
44 // ============================================================================
45 
46 
47 cbl::measure::numbercounts::NumberCounts2D::NumberCounts2D (const catalogue::Var var1, const BinType bin_type1, const catalogue::Var var2, const BinType bin_type2, const catalogue::Catalogue data, const size_t nbins1, const size_t nbins2, const double minVar1, const double maxVar1, const double minVar2, const double maxVar2, const double shift1, const double shift2, const glob::HistogramType hist_type, const double fact)
48 {
49  m_Var1 = var1;
50  m_Var2 = var2;
51 
52  m_HistogramType = hist_type;
53  m_fact = fact;
54 
55  set_data(data);
56 
57  m_histogram = make_shared<glob::Histogram2D> (glob::Histogram2D());
58 
59  double _minVar1 = (minVar1>par::defaultDouble) ? minVar1 : m_data->Min(m_Var1)*0.999;
60  double _maxVar1 = (maxVar1>par::defaultDouble) ? maxVar1 : m_data->Max(m_Var1)*1.001;
61  double _minVar2 = (minVar2>par::defaultDouble) ? minVar2 : m_data->Min(m_Var2)*0.999;
62  double _maxVar2 = (maxVar2>par::defaultDouble) ? maxVar2 : m_data->Max(m_Var2)*1.001;
63 
64  m_histogram->set(nbins1, nbins2, _minVar1, _maxVar1, _minVar2, _maxVar2, shift1, shift2, bin_type1, bin_type2);
65 }
66 
67 
68 // ============================================================================
69 
70 
71 cbl::measure::numbercounts::NumberCounts2D::NumberCounts2D (const catalogue::Var var1, const catalogue::Var var2, const std::vector<double> vec_edges1, const std::vector<double> vec_edges2, const catalogue::Catalogue data, const glob::HistogramType hist_type, const double fact)
72 {
73  m_Var1 = var1;
74  m_Var2 = var2;
75 
76  m_HistogramType = hist_type;
77  m_fact = fact;
78 
79  set_data(data);
80 
81  m_histogram = make_shared<glob::Histogram2D> (glob::Histogram2D());
82 
83  double _minVar1 = (vec_edges1[0]>par::defaultDouble) ? vec_edges1[0] : m_data->Min(m_Var1)*0.999;
84  double _maxVar1 = (vec_edges1[vec_edges1.size()-1]>par::defaultDouble) ? vec_edges1[vec_edges1.size()-1] : m_data->Max(m_Var1)*1.001;
85  double _minVar2 = (vec_edges2[0]>par::defaultDouble) ? vec_edges2[0] : m_data->Min(m_Var2)*0.999;
86  double _maxVar2 = (vec_edges2[vec_edges2.size()-1]>par::defaultDouble) ? vec_edges2[vec_edges2.size()-1] : m_data->Max(m_Var2)*1.001;
87 
88  const size_t nbins1 = vec_edges1.size()-1, nbins2 = vec_edges2.size()-1;
89  const double shift1 = 0.5, shift2 = 0.5;
90 
91  m_histogram->set(nbins1, nbins2, _minVar1, _maxVar1, _minVar2, _maxVar2, shift1, shift2, cbl::BinType::_custom_, cbl::BinType::_custom_, vec_edges1, vec_edges2);
92 }
93 
94 
95 // ============================================================================
96 
97 
99 {
100  auto histogram = make_shared<glob::Histogram2D> (glob::Histogram2D ());
101  histogram->set(m_histogram->nbins1(), m_histogram->nbins2(), m_histogram->minVar1(), m_histogram->maxVar1(), m_histogram->minVar2(), m_histogram->maxVar2(), m_histogram->shift1(), m_histogram->shift2(), m_histogram->bin_type1(), m_histogram->bin_type2(), m_histogram->edges1(), m_histogram->edges2());
102 
103  histogram->put(m_data->var(m_Var1), m_data->var(m_Var2), m_data->var(catalogue::Var::_Weight_));
104 
105  m_histogram = histogram;
106 
107  vector<double> bins1 = m_histogram->bins1();
108  vector<double> edges1 = m_histogram->edges1();
109 
110  vector<double> bins2 = m_histogram->bins2();
111  vector<double> edges2 = m_histogram->edges2();
112 
113  vector<double> hist(m_histogram->nbins1()*m_histogram->nbins2());
114  vector<double> error(m_histogram->nbins1()*m_histogram->nbins2());
115 
116  vector<vector<double>> ave_var1 = m_histogram->averaged_bins1();
117  vector<vector<double>> ave_var2 = m_histogram->averaged_bins2();
118  vector<vector<double>> err_var1 = m_histogram->error_bins1();
119  vector<vector<double>> err_var2 = m_histogram->error_bins2();
120 
121  vector<vector<double>> extra_info(11);
122 
123  for (size_t i=0; i<m_histogram->nbins1(); i++)
124  for (size_t j=0; j<m_histogram->nbins2(); j++) {
125  hist[i*m_histogram->nbins2()+j] = m_histogram->operator()(i, j, m_HistogramType, m_fact);
126  error[i*m_histogram->nbins2()+j] = m_histogram->error(i, j, m_HistogramType, m_fact);
127  extra_info[0].push_back(edges1[i]);
128  extra_info[1].push_back(edges1[i+1]);
129  extra_info[2].push_back(edges2[j]);
130  extra_info[3].push_back(edges2[j+1]);
131  extra_info[4].push_back(m_histogram->unweighted_counts(i, j));
132  extra_info[5].push_back(sqrt(m_histogram->unweighted_counts(i, j)));
133  extra_info[6].push_back(m_histogram->normalization(i, j, m_HistogramType, m_fact));
134  extra_info[7].push_back(ave_var1[i][j]);
135  extra_info[8].push_back(err_var1[i][j]);
136  extra_info[9].push_back(ave_var2[i][j]);
137  extra_info[10].push_back(err_var2[i][j]);
138  }
139 
140  const vector<double> x_edges = edges1;
141  const vector<double> y_edges = edges2;
142  auto dataset = make_shared<data::Data2D_extra> (data::Data2D_extra(bins1, bins2, hist, error, extra_info, x_edges, y_edges));
143 
144  return dataset;
145 }
146 
147 
148 // ============================================================================
149 
150 
151 shared_ptr<data::Data> cbl::measure::numbercounts::NumberCounts2D::m_measureJackknife (const string dir_output_resample)
152 {
153  m_dataset = m_measurePoisson();
154 
155  const int nRegions = m_data->nRegions();
156  vector<shared_ptr<glob::Histogram>> histo_JK(nRegions);
157 
158  for (int i=0; i<nRegions; i++) {
159  histo_JK[i] = make_shared<glob::Histogram2D>(glob::Histogram2D());
160  histo_JK[i]->set(m_histogram->nbins1(), m_histogram->nbins2(), m_histogram->minVar1(), m_histogram->maxVar1(), m_histogram->minVar2(), m_histogram->maxVar2(), m_histogram->shift1(), m_histogram->shift2(), m_histogram->bin_type1(), m_histogram->bin_type2(), m_histogram->edges1(), m_histogram->edges2());
161  }
162 
163  vector<double> regions = m_data->var(catalogue::Var::_Region_);
164  vector<double> var1 = m_data->var(m_Var1);
165  vector<double> var2 = m_data->var(m_Var2);
166  vector<double> weight = m_data->var(catalogue::Var::_Weight_);
167 
168  for (size_t i=0; i<m_data->nObjects(); i++) {
169  vector<int> bin = m_histogram->digitize(var1[i], var2[i]);
170  for (int j=0; j<nRegions; j++)
171  if (j!=regions[i] and bin[0]>-1 and bin[1]>-1)
172  histo_JK[regions[i]]->put(bin[0], bin[1], weight[i], var1[i], var2[i]);
173  }
174 
175  compute_covariance(histo_JK, true);
176 
177  // write jackknife outputs
178  if (dir_output_resample!=par::defaultString)
179  for (int i=0; i<nRegions; i++)
180  histo_JK[i]->write(dir_output_resample, "Jackknife_"+conv(i+1, par::fINT)+".dat", m_HistogramType, m_fact);
181 
182  return m_dataset;
183 }
184 
185 
186 // ============================================================================
187 
188 
189 shared_ptr<data::Data> cbl::measure::numbercounts::NumberCounts2D::m_measureBootstrap (const string dir_output_resample, const int nResamplings, const int seed)
190 {
191  m_dataset = m_measurePoisson();
192 
193  const int nRegions = m_data->nRegions();
194  vector<shared_ptr<glob::Histogram>> histo_BS(nResamplings);
195 
196  random::UniformRandomNumbers_Int ran(0., nRegions-1, seed);
197  vector<vector<int>> region_weights (nResamplings, vector<int> (nRegions, 0));
198 
199  for (int i=0; i<nResamplings; i++) {
200  histo_BS[i] = make_shared<glob::Histogram2D>(glob::Histogram2D());
201  histo_BS[i]->set(m_histogram->nbins1(), m_histogram->nbins2(), m_histogram->minVar1(), m_histogram->maxVar1(), m_histogram->minVar2(), m_histogram->maxVar2(), m_histogram->shift1(), m_histogram->shift2(), m_histogram->bin_type1(), m_histogram->bin_type2(), m_histogram->edges1(), m_histogram->edges2());
202  for (int n=0; n<nRegions; n++)
203  region_weights[i][ran()] ++;
204  }
205 
206  vector<double> regions = m_data->var(catalogue::Var::_Region_);
207  vector<double> var1 = m_data->var(m_Var1);
208  vector<double> var2 = m_data->var(m_Var2);
209  vector<double> weight = m_data->var(catalogue::Var::_Weight_);
210 
211  for (size_t i=0; i<m_data->nObjects(); i++) {
212  vector<int> bin = m_histogram->digitize(var1[i], var2[i]);
213  if (bin[0]>-1 and bin[1]>-1)
214  for (int j=0; j<nResamplings; j++)
215  histo_BS[j]->put (bin[0], bin[1], weight[i]*region_weights[j][regions[i]], var1[i], var2[i]);
216  }
217 
218  compute_covariance(histo_BS, false);
219 
220  // write bootstrap outputs
221  if (dir_output_resample!=par::defaultString)
222  for (int i=0; i<nResamplings; i++)
223  histo_BS[i]->write(dir_output_resample, "Bootstraps_"+conv(i+1, par::fINT)+".dat", m_HistogramType, m_fact);
224 
225  return m_dataset;
226 }
227 
228 
229 // ============================================================================
230 
231 void cbl::measure::numbercounts::NumberCounts2D::measure (const ErrorType errorType, const string dir_output_resample, const int nResamplings, const int seed, const bool conv, const double sigma)
232 {
233  switch (errorType) {
234  case (ErrorType::_Poisson_) :
235  m_dataset = m_measurePoisson();
236  break;
237  case (ErrorType::_Jackknife_) :
238  m_dataset = m_measureJackknife(dir_output_resample);
239  break;
240  case (ErrorType::_Bootstrap_) :
241  m_dataset = m_measureBootstrap(dir_output_resample, nResamplings, seed);
242  break;
243  default:
244  ErrorCBL("the input ErrorType is not allowed!", "measure", "NumberCounts2D.cpp");
245  }
246 
247  if (conv) m_dataset = Gaussian_smoothing(sigma);
248 
249 }
250 
251 // ============================================================================
252 
253 
254 void cbl::measure::numbercounts::NumberCounts2D::compute_covariance (const vector<shared_ptr<glob::Histogram>> histo, const bool JK)
255 {
256  vector<vector<double>> measures(histo.size(), vector<double>(histo[0]->nbins1()*histo[0]->nbins2(),0));
257  vector<vector<double>> cov;
258 
259  for (size_t i=0; i<histo.size(); i++)
260  for (size_t j=0; j<histo[i]->nbins1(); j++)
261  for (size_t k=0; k<histo[i]->nbins2(); k++)
262  measures[i][j*histo[0]->nbins2()+k] = histo[i]->operator()(j, k, m_HistogramType, m_fact);
263 
264  covariance_matrix(measures, cov, JK);
265  m_dataset->set_covariance(cov);
266 }
267 
268 
269 // ============================================================================
270 
271 
272 void cbl::measure::numbercounts::NumberCounts2D::write (const string dir, const string file, const int rank) const
273 {
274  string mkdir = "mkdir -p "+dir;
275  if (system(mkdir.c_str())) {}
276 
277  string header = "# bin1_center bin2_center histogram error bin1_low bin1_up bin2_low bin2_up unweighted_counts unweighted_counts_error hist_normalization bin1_mean bin1_std bin2_mean bin2_std";
278 
279  m_dataset->write(dir, file, header, false, 4, 8, rank);
280 }
281 
282 
283 // ============================================================================
284 
285 
286 void cbl::measure::numbercounts::NumberCounts2D::write_covariance (const string dir, const string file) const
287 {
288  string mkdir = "mkdir -p "+dir;
289  if (system(mkdir.c_str())) {}
290  m_dataset->write_covariance(dir, file, 8);
291 }
292 
293 // ============================================================================
294 
295 shared_ptr<data::Data> cbl::measure::numbercounts::NumberCounts2D::Gaussian_smoothing (const double sigma)
296 {
297  (void)sigma;
298  ErrorCBL("Gaussian smoothing is not implemented yet in 2D...", "Gaussian_smoothing", "NumberCounts2D.cpp", glob::ExitCode::_workInProgress_);
299 
300  return m_dataset;
301 }
The class Data2D_extra.
The class NumberCounts2D.
The class Catalogue.
Definition: Catalogue.h:654
The class Data2D_extra.
Definition: Data2D_extra.h:52
The class Histogram2D.
Definition: Histogram.h:1210
void compute_covariance(const std::vector< std::shared_ptr< glob::Histogram >> histo, const bool JK) override
compute the covariance matrix
void write_covariance(const std::string dir, const std::string file) const override
write measured covariance matrix
void write(const std::string dir=par::defaultString, const std::string file=par::defaultString, const int rank=0) const override
write the measured number counts
std::shared_ptr< data::Data > Gaussian_smoothing(const double sigma) override
apply a Gaussian filter to the distribution
void measure(const ErrorType errorType=ErrorType::_Poisson_, const std::string dir_output_resample=par::defaultString, const int nResamplings=0, const int seed=3213, const bool conv=false, const double sigma=0.) override
measure the number counts
std::shared_ptr< data::Data > m_measurePoisson() override
measure the number counts with Poisson errors
std::shared_ptr< data::Data > m_measureBootstrap(const std::string dir_output_resample=par::defaultString, const int nResamplings=0, const int seed=3213) override
measure the number counts with Bootstrap covariance matrix
std::shared_ptr< data::Data > m_measureJackknife(const std::string dir_output_resample=par::defaultString) override
measure the number counts with Jackknife covariance matrix
The class UniformRandomNumbers_Int.
static const char fINT[]
conversion symbol for: int -> std::string
Definition: Constants.h:121
static const std::string defaultString
default std::string value
Definition: Constants.h:336
static const double defaultDouble
default double value
Definition: Constants.h:348
Var
the catalogue variables
Definition: Catalogue.h:70
HistogramType
the histogram type
Definition: Histogram.h:49
ErrorType
the two-point correlation function error type
Definition: Measure.h:59
The global namespace of the CosmoBolognaLib
Definition: CAMB.h:38
std::string conv(const T val, const char *fact)
convert a number to a std::string
Definition: Kernel.h:903
int ErrorCBL(const std::string msg, const std::string functionCBL, const std::string fileCBL, const cbl::glob::ExitCode exitCode=cbl::glob::ExitCode::_error_)
throw an exception: it is used for handling exceptions inside the CosmoBolognaLib
Definition: Kernel.h:780
void covariance_matrix(const std::vector< std::vector< double >> mat, std::vector< std::vector< double >> &cov, const bool JK=false)
compute the covariance matrix from an input dataset
Definition: Func.cpp:761
BinType
the binning type
Definition: Kernel.h:505
@ _custom_
custom binning