31#include <fmt/format.h>
32#include <fmt/ranges.h>
34#include <mpi/monitor.hpp>
35#include <mpi/vector.hpp>
36#include <nda/macros.hpp>
46namespace triqs::mc_tools {
57 moves_.clear_statistics();
67 std::unique_ptr<mpi::monitor> exception_monitor;
68 if (params.
propagate_exception and mpi::has_env) exception_monitor = std::make_unique<mpi::monitor>(params.
comm);
69 std::unique_ptr<mpi::monitor> cycle_monitor;
73 auto const rank = params.
comm.rank();
74 bool stop_flag = params.
ncycles == 0;
75 std::int64_t cycle_counter = 1;
76 double next_print_info = 0.1;
79 percentage_done_ = stop_flag ? 100 : 0;
83 for (; !stop_flag; ++cycle_counter) {
94 bool const in_overtime = params.
ncycles > 0 and cycle_counter > params.
ncycles;
95 if (not in_overtime) after_cycle_duties(params);
99 std::cerr << fmt::format(
"[Rank {}] Signal caught in mc_generic::run: Stopping the simulation.\n", rank);
100 }
catch (std::exception
const &err) {
102 std::cerr << fmt::format(
"[Rank {}] Error int mc_generic::run: Exception occured:\n{}\n", rank, err.what());
103 if (exception_monitor) {
104 exception_monitor->report_local_event();
107 params.
comm.abort(2);
112 percentage_done_ =
static_cast<double>(cycle_counter) * 100.0 / params.
ncycles;
113 double runtime = run_timer_;
116 if (runtime > next_print_info) {
118 next_print_info = 1.25 * runtime + 2.0;
119 print_sim_info(params, cycle_counter);
123 if (exception_monitor && runtime > next_check_except) {
126 stop_flag |= exception_monitor->event_on_any_rank();
130 if (percentage_done_ >= 100) {
133 cycle_monitor->report_local_event();
135 if (runtime > next_check_cycles) {
138 stop_flag |= cycle_monitor->event_on_all_ranks();
154 switch (params.
phase) {
155 case mc_phase::warmup: warmup_timer_ = run_timer_;
break;
156 case mc_phase::accumulation: acc_timer_ = run_timer_;
break;
161 ncycles_done_ += cycle_counter;
164 print_sim_info(params, cycle_counter);
171 if (exception_monitor) {
172 exception_monitor->finalize_communications();
173 if (exception_monitor->event_on_any_rank())
174 throw std::runtime_error(fmt::format(
"[Rank {}] MC simulation stopped because an exception occurred on one of the MPI ranks", rank));
176 if (cycle_monitor) cycle_monitor->finalize_communications();
179 if (status == 1) report_ << fmt::format(
"[Rank {}] MC simulation stopped because stop_callback() returned true\n", rank);
180 if (status == 2) report_ << fmt::format(
"[Rank {}] MC simulation stopped because a signal has been received\n", rank);
186 report_(3) << fmt::format(
"[Rank {}] Performing warmup phase...\n", params.
comm.rank());
189 p.phase = mc_phase::warmup;
194 report_(3) << fmt::format(
"[Rank {}] Performing accumulation phase...\n", params.
comm.rank());
197 p.enable_calibration =
false;
198 p.phase = mc_phase::accumulation;
202 template <DoubleOrComplex MCSignType>
204 mpi::communicator c,
bool enable_calibration) {
205 return run({.ncycles = ncycles,
206 .cycle_length = cycle_length,
207 .stop_callback = stop_callback,
209 .enable_measures = enable_measures,
210 .enable_calibration = enable_calibration,
211 .phase = enable_measures ? mc_phase::accumulation : mc_phase::warmup});
214 template <DoubleOrComplex MCSignType>
216 mpi::communicator c) {
217 return warmup({.ncycles = ncycles, .cycle_length = cycle_length, .stop_callback = stop_callback, .initial_sign = initial_sign, .comm = c});
220 template <DoubleOrComplex MCSignType>
222 return warmup({.ncycles = ncycles, .cycle_length = cycle_length, .stop_callback = stop_callback, .comm = c});
225 template <DoubleOrComplex MCSignType>
227 return accumulate({.ncycles = ncycles, .cycle_length = cycle_length, .stop_callback = stop_callback, .comm = c});
230 template <DoubleOrComplex MCSignType>
232 std::function<
bool()> stop_callback, MCSignType initial_sign, mpi::communicator c) {
234 warmup({.ncycles = ncycles_warmup, .cycle_length = cycle_length, .stop_callback = stop_callback, .initial_sign = initial_sign, .comm = c});
235 if (status == 0) status =
accumulate({.ncycles = ncycles_acc, .cycle_length = cycle_length, .stop_callback = stop_callback, .comm = c});
239 template <DoubleOrComplex MCSignType>
241 std::function<
bool()> stop_callback, mpi::communicator c) {
242 return warmup_and_accumulate(ncycles_warmup, ncycles_acc, cycle_length, stop_callback, default_initial_sign, c);
246 report_(3) << fmt::format(
"[Rank {}] Collect results: Waiting for all MPI processes to finish accumulating...\n", c.rank());
249 measures_.collect_results(c);
250 moves_.collect_statistics(c);
251 auto tot_nmeasures = mpi::reduce(nmeasures_done_, c);
257 if (verbosity_lvl_ >= 3) {
261 info += fmt::format(
"[Rank {}] Number of measures: {}\n", c.rank(), nmeasures_done_);
262 info += fmt::format(
"[Rank {}] Cycles (measures) / second: {:.2e}\n", c.rank(), nmeasures_done_ /
get_accumulation_time());
263 info += fmt::format(
"[Rank {}] Measurement durations (total = {:.4f}):\n{}", c.rank(), measures_.total_duration(),
264 measures_.get_timings(fmt::format(
"[Rank {}] ", c.rank())));
265 info += fmt::format(
"[Rank {}] Move durations (total = {:.4f}):\n{}", c.rank(), moves_.total_duration(),
266 moves_.get_timings(fmt::format(
"[Rank {}] ", c.rank())));
267 info += fmt::format(
"[Rank {}] Move statistics:\n{}", c.rank(), moves_.get_statistics(fmt::format(
"[Rank {}] ", c.rank())));
271 auto all_infos = mpi::gather(info, c);
273 report_(3) << all_infos;
274 std::string more_info{
"\n"};
275 more_info += fmt::format(
"Total number of measures: {}\n", tot_nmeasures);
276 more_info += fmt::format(
"Total cycles (measures) / second: {:.2e}\n", tot_nmeasures / tot_duration);
277 report_(2) << more_info;
281 template <DoubleOrComplex MCSignType>
void mc_generic<MCSignType>::print_sim_info(run_param_t
const ¶ms, std::int64_t cycle_counter) {
283 auto const rank = params.comm.rank();
284 double const runtime = run_timer_;
285 auto const cycles_per_sec =
static_cast<double>(cycle_counter) / runtime;
288 if (percentage_done_ < 0) {
289 report_(3) << fmt::format(
"[Rank {}] {} cycle {}, {:.2e} cycles/sec\n", rank,
utility::timestamp(), cycle_counter, cycles_per_sec);
291 report_(3) << fmt::format(
"[Rank {}] {} {:>6.2f}% done, ETA {}, cycle {} of {}, {:.2e} cycles/sec\n", rank,
utility::timestamp(),
293 params.ncycles, cycles_per_sec);
295 if (params.enable_measures) report_(3) << measures_.report();
298 template <DoubleOrComplex MCSignType>
void mc_generic<MCSignType>::metropolis_step() {
299 double r = moves_.attempt();
300 if (rng_() < std::min(1.0, r)) {
301 sign_ *= moves_.accept();
307 template <DoubleOrComplex MCSignType>
void mc_generic<MCSignType>::after_cycle_duties(run_param_t
const ¶ms) {
308 params.after_cycle_duty();
309 if (params.enable_calibration) moves_.calibrate(params.comm);
312 template <DoubleOrComplex MCSignType>
void mc_generic<MCSignType>::do_measurements() {
314 for (
auto &m : measures_aux_) m();
315 measures_.accumulate(sign_);
Empty exception type that callers may use to signal a graceful shutdown.
std::string estimate_time_left(int N, int n, timer &t)
Linear extrapolation of the remaining time of a loop, formatted as HH:MM:SS.
std::string timestamp()
Current local time formatted as HH:MM:SS.
Provides a generic class to run Monte Carlo simulations.
void start()
Install the TRIQS signal handler.
void stop()
Restore the previous signal disposition.
bool received(bool pop_)
Whether at least one signal has been queued since the last reset.
Provides a signal handler for the TRIQS library.
Small helpers that format wall-clock timestamps and durations for human-readable logs.