TRIQS/TRIQS 4.0.0
Researching Interacting Quantum Systems
Loading...
Searching...
No Matches
lin_binning.hpp
Go to the documentation of this file.
1// Copyright (c) 2018 Commissariat à l'énergie atomique et aux énergies alternatives (CEA)
2// Copyright (c) 2018 Centre national de la recherche scientifique (CNRS)
3// Copyright (c) 2018-2022 Simons Foundation
4//
5// This program is free software: you can redistribute it and/or modify
6// it under the terms of the GNU General Public License as published by
7// the Free Software Foundation, either version 3 of the License, or
8// (at your option) any later version.
9//
10// This program is distributed in the hope that it will be useful,
11// but WITHOUT ANY WARRANTY; without even the implied warranty of
12// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13// GNU General Public License for more details.
14//
15// You may obtain a copy of the License at
16// https://www.gnu.org/licenses/gpl-3.0.txt
17//
18// Authors: Philipp Dumitrescu, Olivier Parcollet, Nils Wentzell
19
24
25#pragma once
26
27#include "./concepts.hpp"
28#include "./mean_error.hpp"
29#include "./utils.hpp"
30
31#include <h5/h5.hpp>
32#include <nda/h5.hpp>
33#include <nda/nda.hpp>
34
35#include <functional>
36#include <iterator>
37#include <stdexcept>
38#include <string>
39#include <tuple>
40#include <utility>
41#include <vector>
42
43namespace triqs::stat {
44
60 template <AccCompatible T> auto compress_bins(std::vector<T> const &bins, int fac) {
61 // early return if the compression factor < 2
62 if (fac < 2) return bins;
63
64 // prepare the compressed bins
65 int const nbins = bins.size() / fac;
66 std::vector<T> res(nbins);
67
68 // compress bins
69 for (int i = 0; i < nbins; ++i) {
70 res[i] = bins[fac * i];
71 for (int j = 1; j < fac; ++j) { res[i] += bins[fac * i + j]; }
72 res[i] /= fac;
73 }
74 return res;
75 }
76
108 template <AccCompatible T> class lin_binning {
109 public:
111 using value_t = T;
112
115
117 using callback_t = std::function<void(lin_binning const &)>;
118
125 lin_binning() = default;
126
138 lin_binning(T const &sample, long max_n_bins, long bin_capacity, callback_t callback = {})
139 : max_n_bins_(max_n_bins), callback_(std::move(callback)) {
140 if (max_n_bins != -1 && max_n_bins < 2) throw std::runtime_error("lin_binning: max_n_bins must be -1 (unbounded) or >= 2.");
141 if (bin_capacity < 1) throw std::runtime_error("lin_binning: bin_capacity must be >= 1.");
142 bin_capacity_ = bin_capacity;
143
144 if (max_n_bins > 0) mean_bins_.reserve(max_n_bins);
145 mean_bins_.emplace_back(zeroed_sample(sample));
146 mean_ = zeroed_sample(sample);
147 var_ = make_real(zeroed_sample(sample));
148 }
149
151 [[nodiscard]] long max_n_bins() const { return max_n_bins_; }
152
154 [[nodiscard]] bool is_unbounded() const { return max_n_bins_ == -1; }
155
157 [[nodiscard]] long n_bins() const { return mean_bins_.size(); }
158
160 [[nodiscard]] long n_full_bins() const { return (last_bin_count_ == bin_capacity_ ? n_bins() : n_bins() - 1); }
161
163 [[nodiscard]] auto const &bins() const { return mean_bins_; }
164
166 [[nodiscard]] std::vector<T> full_bins() const {
167 return (last_bin_count_ == bin_capacity_ ? mean_bins_ : std::vector<T>(mean_bins_.begin(), std::prev(mean_bins_.end())));
168 }
169
171 [[nodiscard]] long bin_capacity() const { return bin_capacity_; }
172
174 [[nodiscard]] long last_bin_count() const { return last_bin_count_; }
175
177 [[nodiscard]] long count() const { return count_; }
178
180 [[nodiscard]] auto const &mean() const { return mean_; }
181
183 [[nodiscard]] auto const &var_data() const { return var_; }
184
186 [[nodiscard]] auto const &callback() const { return callback_; }
187
214 template <typename U> auto &operator<<(U &&x) {
215 ++count_;
216
217 // seed shape from the first sample for default-constructed accumulators
218 if (mean_bins_.empty()) {
219 T s = x;
220 mean_bins_.emplace_back(zeroed_sample(s));
221 mean_ = zeroed_sample(s);
222 var_ = make_real(zeroed_sample(s));
223 }
224
225 if (!is_unbounded() && n_full_bins() == max_n_bins_) {
226 if (callback_) callback_(*this);
227 compress(2);
228 }
229
230 var_ += abs_square(x - mean_) * (static_cast<double>(count_ - 1) / count_);
231 mean_ += (x - mean_) / count_;
232
233 if (last_bin_count_ == bin_capacity_) {
234 mean_bins_.emplace_back(std::forward<U>(x));
235 last_bin_count_ = 1;
236 } else {
237 ++last_bin_count_;
238 mean_bins_.back() += (x - mean_bins_.back()) / last_bin_count_;
239 }
240
241 return *this;
242 }
243
258 void compress(int fac) {
259 // early return if there is only 1 bin or if the compression factor < 2
260 if (n_bins() == 1 || fac < 2) return;
261
262 // compress full bins into new full bins
263 int nbins = n_full_bins() / fac;
264 for (int i = 0; i < nbins; ++i) {
265 mean_bins_[i] = mean_bins_[fac * i];
266 for (int j = 1; j < fac; ++j) { mean_bins_[i] += mean_bins_[fac * i + j]; }
267 mean_bins_[i] /= fac;
268 }
269
270 // handle any left over bins
271 int const left_over = n_bins() - fac * nbins;
272 if (left_over != 0) {
273 // currently active bin might not be full
274 auto const bc = last_bin_count_ + (left_over - 1) * bin_capacity_;
275 value_t tmp_bin = mean_bins_.back() * (static_cast<double>(last_bin_count_) / bc);
276 for (int j = 0; j < left_over - 1; ++j) { tmp_bin += mean_bins_[fac * nbins + j] * (static_cast<double>(bin_capacity_) / bc); }
277 mean_bins_[nbins] = tmp_bin;
278 last_bin_count_ = bc;
279 ++nbins;
280 } else {
281 // no left overs, currently active bin is full w.r.t. the new bin capacity
282 last_bin_count_ = bin_capacity_ * fac;
283 }
284
285 // resize bin vector and update bin capacity
286 mean_bins_.resize(nbins);
287 bin_capacity_ *= fac;
288 }
289
299 [[nodiscard]] auto mean_error_and_tau() const {
300 auto [err, tau] = calculate_error_and_tau(mean_bins_, var_, count_);
301 return std::make_tuple(mean_, err, tau);
302 }
303
320 [[nodiscard]] auto mean_error_and_tau(mpi::communicator c) const {
321 auto [mean_red, var_red, count_red] = mpi_all_reduce(c);
322 auto bins_gathered = mpi_all_gather(c, true);
323 auto [err, tau] = calculate_error_and_tau(bins_gathered, var_red, count_red);
324 return std::make_tuple(mean_red, err, tau);
325 }
326
336 [[nodiscard]] auto mpi_all_reduce(mpi::communicator c) const {
337 auto count_red = mpi::all_reduce(count_, c);
338
339 // skip the per-rank reweighting (and its 0/0) if no rank has samples
340 if (count_red == 0) return std::make_tuple(zeroed_sample(mean_), make_real(zeroed_sample(mean_)), count_red);
341
342 value_t mean_red = mean_ * (static_cast<double>(count_) / static_cast<double>(count_red));
343 mpi::all_reduce_in_place(mean_red, c);
344
345 real_t var_red = var_ + count_ * abs_square(mean_ - mean_red);
346 mpi::all_reduce_in_place(var_red, c);
347
348 return std::make_tuple(mean_red, var_red, count_red);
349 }
350
361 [[nodiscard]] auto mpi_all_gather(mpi::communicator c, bool same_capacity = true) const {
362 auto fbins = full_bins();
363
364 if (same_capacity) {
365 auto bc_max = mpi::all_reduce(bin_capacity_, c, MPI_MAX);
366 // ranks whose capacity does not divide the max cannot align their bins and contribute nothing
367 if (bc_max % bin_capacity_ == 0)
368 fbins = compress_bins(fbins, bc_max / bin_capacity_);
369 else
370 fbins.clear();
371 }
372
373 auto nbins = mpi::all_gather(fbins.size(), c);
374 auto bins_gathered = std::vector<value_t>(std::accumulate(nbins.begin(), nbins.end(), 0L));
375 long start = 0;
376 for (int i = 0; i < c.size(); ++i) {
377 auto const end = start + nbins[i];
378 if (c.rank() == i) std::copy(fbins.begin(), fbins.end(), bins_gathered.begin() + start);
379 mpi::broadcast_range(std::span{bins_gathered.begin() + start, bins_gathered.begin() + end}, c, i);
380 start = end;
381 }
382
383 return bins_gathered;
384 }
385
387 [[nodiscard]] static std::string hdf5_format() { return "lin_binning"; }
388
396 friend void h5_write(h5::group g, std::string const &name, lin_binning const &acc) {
397 auto gr = g.create_group(name);
398 h5::write_hdf5_format(gr, acc);
399 h5::write(gr, "max_n_bins", acc.max_n_bins_);
400 h5::write(gr, "bin_capacity", acc.bin_capacity_);
401 h5::write(gr, "last_bin_count", acc.last_bin_count_);
402 h5::write(gr, "count", acc.count_);
403 h5::write(gr, "mean_bins", acc.mean_bins_);
404 h5::write(gr, "mean", acc.mean_);
405 h5::write(gr, "var", acc.var_);
406 }
407
415 friend void h5_read(h5::group g, std::string const &name, lin_binning &acc) {
416 auto gr = g.open_group(name);
417 h5::assert_hdf5_format(gr, acc);
418 h5::read(gr, "max_n_bins", acc.max_n_bins_);
419 h5::read(gr, "bin_capacity", acc.bin_capacity_);
420 h5::read(gr, "last_bin_count", acc.last_bin_count_);
421 h5::read(gr, "count", acc.count_);
422 h5::read(gr, "mean_bins", acc.mean_bins_);
423 h5::read(gr, "mean", acc.mean_);
424 h5::read(gr, "var", acc.var_);
425
426 if (acc.max_n_bins_ != -1 && (acc.max_n_bins_ < 2 || acc.bin_capacity_ < 1))
427 throw std::runtime_error("h5_read: invalid lin_binning state (max_n_bins must be -1 or >= 2, bin_capacity must be >= 1).");
428 }
429
430 private:
431 // Get the mean, its standard error and an estimate for the integrated autocorrelation time.
432 [[nodiscard]] auto calculate_error_and_tau(std::vector<value_t> const &bins, real_t const &q0, long ct) const {
433 // need at least 2 samples and 2 bins to estimate the error and tau
434 if (ct < 2 || bins.size() < 2) {
435 auto nan = nan_sample(q0);
436 return std::make_pair(nan, nan);
437 }
438
439 // unbinned estimate of the error of the mean
440 real_t var0 = q0 / (static_cast<double>(ct) * static_cast<double>(ct - 1));
441
442 // binned estimate of the error of the mean
443 auto err = mean_and_err(bins).second;
444
445 // estimate of the integrated autocorrelation time (NaN where var0 == 0)
446 real_t tau = tau_estimate_from_vars(abs_square(err), var0);
447
448 return std::make_pair(err, tau);
449 }
450
451 long max_n_bins_{-1};
452 long bin_capacity_{1};
453 long last_bin_count_{0};
454 long count_{0};
455 std::vector<value_t> mean_bins_{};
456 value_t mean_{};
457 real_t var_{};
458 callback_t callback_{};
459 };
460
461} // namespace triqs::stat
iterator end()
Get an iterator past the last block.
auto mpi_all_reduce(mpi::communicator c) const
Allreduce the overall mean and sum of squared deviations from the mean over multiple MPI processes.
auto const & mean() const
Get the overall mean.
auto const & var_data() const
Get the overall sum of the squared deviations from the mean.
get_real_t< T > real_t
Real type of the accumulated data type.
std::vector< T > full_bins() const
Get only the full bins, i.e. bins that are filled up to the bin capacity.
long max_n_bins() const
Get the maximum number of bins.
friend void h5_read(h5::group g, std::string const &name, lin_binning &acc)
Read a triqs::stat::lin_binning accumulator from HDF5.
static std::string hdf5_format()
Get the HDF5 format string.
auto mean_error_and_tau(mpi::communicator c) const
Get the mean, its standard error and an estimate for the integrated autocorrelation time from accumul...
auto mean_error_and_tau() const
Get the mean, its standard error and an estimate for the integrated autocorrelation time.
friend void h5_write(h5::group g, std::string const &name, lin_binning const &acc)
Write a triqs::stat::lin_binning accumulator to HDF5.
auto & operator<<(U &&x)
Accumulate a new sample.
long n_bins() const
Get the number of bins containing data including the currently active bin.
bool is_unbounded() const
True if the accumulator was constructed with an unbounded number of bins.
long bin_capacity() const
Get the current bin capacity.
long count() const
Get the total number of accumulated samples.
lin_binning()=default
Default construct a linear binning accumulator.
long last_bin_count() const
Get the number of samples in the currently active bin.
long n_full_bins() const
Get the number of full bins, i.e. bins that are filled up to the bin capacity.
std::function< void(lin_binning const &)> callback_t
Type of the callback function that is called when bins are compressed.
auto mpi_all_gather(mpi::communicator c, bool same_capacity=true) const
Allgather full bins from multiple MPI processes.
T value_t
Type of the accumulated data.
auto const & bins() const
Get all the bins containing data including the currently active bin.
auto const & callback() const
Get the callback function.
lin_binning(T const &sample, long max_n_bins, long bin_capacity, callback_t callback={})
Construct a linear binning accumulator with a dummy sample, the maximum number of bins,...
void compress(int fac)
Compress bins and increase bin capacity.
auto compress_bins(std::vector< T > const &bins, int fac)
Compress a given number of adjacent bins.
auto mean_and_err(R &&rg)
Calculate the arithmetic mean or the simple sum as well as a corresponding error estimate of some ran...
auto tau_estimate_from_vars(auto const &var, auto const &var0)
Compute an estimate for the integrated auto-correlation time from variances.
std::remove_cvref_t< decltype(make_real(std::declval< T >()))> get_real_t
Type trait to get the type that would be returned by triqs::stat::make_real.
Definition utils.hpp:54
auto nan_sample(T const &sample)
Get a sample with all elements set to NaN.
Definition utils.hpp:86
auto zeroed_sample(T const &sample)
Get a sample with all elements set to zero.
Definition utils.hpp:66
auto abs_square(auto const &x)
Calculate the (elementwise) absolute square of an array/view/scalar.
Definition utils.hpp:107
auto make_real(T &&t)
Make a given object real and regular.
Definition utils.hpp:51
Provides functions to calculate the arithmetic mean and standard error of a range of values.
Provides various concepts for the Utilities.
Provides various utilities for the Utilities.