TRIQS/TRIQS 4.0.0
Researching Interacting Quantum Systems
Loading...
Searching...
No Matches
log_binning.hpp
Go to the documentation of this file.
1// Copyright (c) 2018 Commissariat à l'énergie atomique et aux énergies alternatives (CEA)
2// Copyright (c) 2018 Centre national de la recherche scientifique (CNRS)
3// Copyright (c) 2018-2022 Simons Foundation
4//
5// This program is free software: you can redistribute it and/or modify
6// it under the terms of the GNU General Public License as published by
7// the Free Software Foundation, either version 3 of the License, or
8// (at your option) any later version.
9//
10// This program is distributed in the hope that it will be useful,
11// but WITHOUT ANY WARRANTY; without even the implied warranty of
12// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13// GNU General Public License for more details.
14//
15// You may obtain a copy of the License at
16// https://www.gnu.org/licenses/gpl-3.0.txt
17//
18// Authors: Philipp Dumitrescu, Olivier Parcollet, Nils Wentzell
19
24
25#pragma once
26
27#include "./concepts.hpp"
28#include "./mean_error.hpp"
29#include "./utils.hpp"
30
31#include <h5/h5.hpp>
32#include <mpi/mpi.hpp>
33#include <nda/h5.hpp>
34#include <nda/mpi.hpp>
35#include <nda/nda.hpp>
36
37#include <functional>
38#include <iterator>
39#include <stdexcept>
40#include <string>
41#include <tuple>
42#include <vector>
43
44namespace triqs::stat {
45
78 template <AccCompatible T> class log_binning {
79 public:
81 using value_t = T;
82
85
92 log_binning() = default;
93
100 log_binning(T const &sample, int max_n_bins) : max_n_bins_(max_n_bins) {
101 if (max_n_bins != -1 && max_n_bins < 1) throw std::runtime_error("log_binning: max_n_bins must be -1 (unbounded) or >= 1.");
102
103 if (max_n_bins > 0) {
104 mean_bins_.reserve(max_n_bins);
105 var_bins_.reserve(max_n_bins);
106 // bare bins start at block size 2; the size-1 bin is folded into mean_bins_/var_bins_ directly
107 bare_bins_.reserve(max_n_bins - 1);
108 bare_counts_.reserve(max_n_bins - 1);
109 }
110
111 mean_bins_.emplace_back(zeroed_sample(sample));
112 var_bins_.emplace_back(make_real(zeroed_sample(sample)));
113 if (max_n_bins != 1) {
114 bare_bins_.emplace_back(zeroed_sample(sample));
115 bare_counts_.push_back(0);
116 }
117 }
118
120 [[nodiscard]] long max_n_bins() const { return max_n_bins_; }
121
123 [[nodiscard]] bool is_unbounded() const { return max_n_bins_ == -1; }
124
126 [[nodiscard]] long n_bins() const { return var_bins_.size(); }
127
129 [[nodiscard]] long count() const { return count_; }
130
132 [[nodiscard]] auto effective_counts() const {
133 std::vector<long> res(n_bins());
134 for (int i = 0; i < n_bins(); ++i) res[i] = count_ >> i;
135 return res;
136 }
137
139 [[nodiscard]] auto const &mean_bins() const { return mean_bins_; }
140
142 [[nodiscard]] auto const &var_bins() const { return var_bins_; }
143
145 [[nodiscard]] auto const &bare_bins() const { return bare_bins_; }
146
148 [[nodiscard]] auto const &bare_counts() const { return bare_counts_; }
149
195 template <typename U> log_binning<T> &operator<<(U const &x) {
196 ++count_;
197
198 // seed shape from the first sample for default-constructed accumulators
199 if (mean_bins_.empty()) {
200 T s = x;
201 mean_bins_.emplace_back(zeroed_sample(s));
202 var_bins_.emplace_back(make_real(zeroed_sample(s)));
203 if (max_n_bins_ != 1) {
204 bare_bins_.emplace_back(zeroed_sample(s));
205 bare_counts_.push_back(0);
206 }
207 }
208
209 T x_m = (x - mean_bins_[0]);
210 var_bins_[0] += abs_square(x_m) * (static_cast<double>(count_ - 1) / count_);
211 mean_bins_[0] += x_m / count_;
212
213 if (max_n_bins_ == 1) return *this;
214
215 if (count_ == (1L << bare_bins_.size()) && (is_unbounded() || n_bins() < max_n_bins_)) {
216 mean_bins_.emplace_back(zeroed_sample(mean_bins_[0]));
217 var_bins_.emplace_back(zeroed_sample(var_bins_[0]));
218
219 // n_bins() has just been incremented; re-check whether another mean bin could still follow
220 if (is_unbounded() || n_bins() < max_n_bins_) {
221 bare_bins_.emplace_back(zeroed_sample(bare_bins_[0]));
222 bare_counts_.push_back(0);
223 }
224 }
225
226 bare_bins_[0] += x;
227 ++bare_counts_[0];
228
229 for (int i = 0; i < bare_bins_.size() && bare_counts_[i] == 2; ++i) {
230 if (i + 1 < bare_bins_.size()) {
231 bare_bins_[i + 1] += bare_bins_[i];
232 ++bare_counts_[i + 1];
233 }
234
235 auto const bc = static_cast<double>(1ul << (i + 1));
236 auto const k = count_ / bc;
237 x_m = (bare_bins_[i] / bc - mean_bins_[i + 1]);
238 var_bins_[i + 1] += abs_square(x_m) * ((k - 1) / k);
239 mean_bins_[i + 1] += x_m / k;
240
241 bare_counts_[i] = 0;
242 bare_bins_[i] = 0;
243 }
244
245 return *this;
246 }
247
274 [[nodiscard]] auto mean_errors_and_taus(int min_samples = 2) const {
275 return calculate_mean_errors_and_taus(mean_bins_, var_bins_, effective_counts(), min_samples);
276 }
277
297 [[nodiscard]] auto mean_errors_and_taus(mpi::communicator c, int min_samples = 2) const {
298 auto [mean_red, var_red, nsamples_red] = mpi_all_reduce(c);
299 return calculate_mean_errors_and_taus(mean_red, var_red, nsamples_red, min_samples);
300 }
301
314 [[nodiscard]] auto mpi_all_reduce(mpi::communicator c) const {
315 auto mean_red = mean_bins_;
316 auto var_red = var_bins_;
317 auto nsamples_red = effective_counts();
318
319 // make sure all ranks have the same number of bins
320 auto const nbins = mpi::all_reduce(n_bins(), c, MPI_MAX);
321 mean_red.resize(nbins, zeroed_sample(mean_bins_[0]));
322 var_red.resize(nbins, zeroed_sample(var_bins_[0]));
323 nsamples_red.resize(nbins, 0);
324
325 for (int i = 0; i < nbins; ++i) {
326 auto const ns = nsamples_red[i];
327 mpi::all_reduce_in_place(nsamples_red[i], c);
328
329 // skip the per-rank reweighting (and its 0/0) if no rank has samples at this bin level
330 if (nsamples_red[i] > 0) mean_red[i] *= static_cast<double>(ns) / static_cast<double>(nsamples_red[i]);
331 mpi::all_reduce_in_place(mean_red[i], c);
332
333 if (i < mean_bins_.size()) var_red[i] += ns * abs_square(mean_bins_[i] - mean_red[i]);
334 mpi::all_reduce_in_place(var_red[i], c);
335 }
336
337 return std::make_tuple(mean_red, var_red, nsamples_red);
338 }
339
341 [[nodiscard]] static std::string hdf5_format() { return "log_binning"; }
342
350 friend void h5_write(h5::group g, std::string const &name, log_binning const &acc) {
351 auto gr = g.create_group(name);
352 h5::write_hdf5_format(gr, acc);
353 h5::write(gr, "max_n_bins", acc.max_n_bins_);
354 h5::write(gr, "count", acc.count_);
355 h5::write(gr, "mean_bins", acc.mean_bins_);
356 h5::write(gr, "var_bins", acc.var_bins_);
357 h5::write(gr, "bare_bins", acc.bare_bins_);
358 h5::write(gr, "bare_counts", acc.bare_counts_);
359 }
360
368 friend void h5_read(h5::group g, std::string const &name, log_binning &acc) {
369 auto gr = g.open_group(name);
370 h5::assert_hdf5_format(gr, acc);
371 h5::read(gr, "max_n_bins", acc.max_n_bins_);
372 h5::read(gr, "count", acc.count_);
373 h5::read(gr, "mean_bins", acc.mean_bins_);
374 h5::read(gr, "var_bins", acc.var_bins_);
375 h5::read(gr, "bare_bins", acc.bare_bins_);
376 h5::read(gr, "bare_counts", acc.bare_counts_);
377
378 if (acc.max_n_bins_ != -1 && acc.max_n_bins_ < 1)
379 throw std::runtime_error("h5_read: invalid log_binning state (max_n_bins must be -1 or >= 1).");
380 }
381
382 private:
383 // Get the overall mean as well as the standard error, integrated autocorrelation time and effective number of
384 // samples for each bin with at least a given minimum number of samples.
385 [[nodiscard]] auto calculate_mean_errors_and_taus(std::vector<value_t> const &mk, std::vector<real_t> const &qk,
386 std::vector<long> const &nsamples, int min_samples) const {
387 // early return for empty vectors
388 if (mk.empty()) return std::make_tuple(value_t{}, std::vector<real_t>{}, std::vector<real_t>{}, std::vector<long>{});
389
390 // only consider bins with at least min_samples effective samples
391 auto const size = std::distance(nsamples.begin(), std::ranges::upper_bound(nsamples, min_samples, std::greater{}));
392 auto effs = std::vector(nsamples.begin(), nsamples.begin() + size);
393 auto errs = std::vector(qk.begin(), qk.begin() + size);
394 auto taus = std::vector<real_t>(size);
395
396 // calculate errors and taus
397 if (size > 0) {
398 real_t var0 = qk[0] / (static_cast<double>(nsamples[0]) * static_cast<double>(nsamples[0] - 1));
399 for (int i = 0; i < size; ++i) {
400 real_t var = errs[i] / (static_cast<double>(effs[i]) * static_cast<double>(effs[i] - 1));
401 errs[i] = nda::sqrt(var);
402 taus[i] = tau_estimate_from_vars(var, var0);
403 }
404 }
405 return std::make_tuple(mk[0], errs, taus, effs);
406 }
407
408 long max_n_bins_{-1};
409 long count_{0};
410 std::vector<value_t> mean_bins_{};
411 std::vector<real_t> var_bins_{};
412 std::vector<value_t> bare_bins_{};
413 std::vector<int> bare_counts_{};
414 };
415
416} // namespace triqs::stat
int size() const
Get the total number of blocks.
Logarithmic binning accumulator.
auto const & var_bins() const
Get the bins that store the sum of the squared deviations from the mean.
friend void h5_write(h5::group g, std::string const &name, log_binning const &acc)
Write a triqs::stat::log_binning accumulator to HDF5.
long max_n_bins() const
Get the maximum number of bins.
auto const & bare_counts() const
Get the number of samples currently in the bare bins.
get_real_t< T > real_t
Real type of the accumulated data type.
auto const & bare_bins() const
Get the bins that are used for accumulating the bare samples.
long n_bins() const
Get the current number of bins.
log_binning(T const &sample, int max_n_bins)
Construct a logarithmic binning accumulator with a dummy sample and the maximum number of bins.
auto effective_counts() const
Get the number of effective samples accumulated in each bin.
bool is_unbounded() const
True if the accumulator was constructed with an unbounded number of bins.
T value_t
Type of the accumulated data.
auto mean_errors_and_taus(int min_samples=2) const
Get the overall mean of the data as well as the estimated standard error, integrated autocorrelation ...
friend void h5_read(h5::group g, std::string const &name, log_binning &acc)
Read a triqs::stat::log_binning accumulator from HDF5.
log_binning< T > & operator<<(U const &x)
Accumulate a new sample.
long count() const
Get the total number of accumulated samples.
static std::string hdf5_format()
Get the HDF5 format string.
auto mpi_all_reduce(mpi::communicator c) const
Allreduce the accumulated data over multiple MPI processes.
auto const & mean_bins() const
Get the bins that store the mean data.
auto mean_errors_and_taus(mpi::communicator c, int min_samples=2) const
Get the overall mean of the data as well as the estimated standard error, integrated autocorrelation ...
log_binning()=default
Default construct a logarithmic binning accumulator.
auto tau_estimate_from_vars(auto const &var, auto const &var0)
Compute an estimate for the integrated auto-correlation time from variances.
std::remove_cvref_t< decltype(make_real(std::declval< T >()))> get_real_t
Type trait to get the type that would be returned by triqs::stat::make_real.
Definition utils.hpp:54
auto zeroed_sample(T const &sample)
Get a sample with all elements set to zero.
Definition utils.hpp:66
auto abs_square(auto const &x)
Calculate the (elementwise) absolute square of an array/view/scalar.
Definition utils.hpp:107
auto make_real(T &&t)
Make a given object real and regular.
Definition utils.hpp:51
Provides functions to calculate the arithmetic mean and standard error of a range of values.
Provides various concepts for the Utilities.
Provides various utilities for the Utilities.