TRIQS/mpi 2.0.0
C++ interface to MPI
Loading...
Searching...
No Matches
monitor.hpp
Go to the documentation of this file.
1// Copyright (c) 2020-2024 Simons Foundation
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0.txt
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14//
15// Authors: Philipp Dumitrescu, Thomas Hahn, Olivier Parcollet, Nils Wentzell
16
21
22#pragma once
23
24#include "./communicator.hpp"
25#include "./macros.hpp"
26#include "./utils.hpp"
27
28#include <mpi.h>
29
30#include <vector>
31#include <unistd.h>
32
33namespace mpi {
34
53 class monitor {
54 // Future struct for non-blocking MPI communication.
55 struct future {
56 // MPI request of the non-blocking MPI call.
57 MPI_Request request{};
58
59 // 0 means that no event has occurred, 1 means that an event has occurred.
60 int event = 0;
61 };
62
63 public:
77 monitor(mpi::communicator c) : comm_(c.duplicate()) {
78 if (comm_.rank() == 0) {
79 root_futures_.resize(c.size() - 1);
80 for (int rank = 1; rank < c.size(); ++rank) {
81 check_mpi_call(MPI_Irecv(&(root_futures_[rank - 1].event), 1, MPI_INT, rank, rank, comm_.get(), &(root_futures_[rank - 1].request)),
82 "MPI_Irecv");
83 }
84 } else {
85 check_mpi_call(MPI_Ibcast(&any_event_, 1, MPI_INT, 0, comm_.get(), &req_ibcast_any_), "MPI_Ibcast");
86 check_mpi_call(MPI_Ibcast(&all_events_, 1, MPI_INT, 0, comm_.get(), &req_ibcast_all_), "MPI_Ibcast");
87 }
88 }
89
91 monitor(monitor const &) = delete;
92
94 monitor &operator=(monitor const &) = delete;
95
98
111 // prevent sending multiple signals
112 if (local_event_ or finalized_) { return; }
113
114 // a local event has occurred
115 local_event_ = 1;
116 if (comm_.rank() == 0) {
117 // on root process, check all other nodes and perform necessary broadcasts
118 root_check_nodes_and_bcast();
119 } else {
120 // on non-root processes, let the root process know about the local event
121 check_mpi_call(MPI_Isend(&local_event_, 1, MPI_INT, 0, comm_.rank(), comm_.get(), &req_isent_), "MPI_Isend");
122 }
123 }
124
140 [[nodiscard]] bool event_on_any_rank() {
141 // if final_communications() has already been called, any_event == 0 if no event has occurred, otherwise it is 1
142 if (finalized_) return any_event_;
143
144 // if a local event has occurred, we return true
145 if (local_event_) return true;
146
147 // on the root process, we first check the status of all non-root processes, perform the necessary broadcasts and
148 // return true if an event has occurred
149 if (comm_.rank() == 0) {
150 root_check_nodes_and_bcast();
151 return any_event_;
152 }
153
154 // on non-root processes, we check the status of the corresponding broadcast and return true if an event has
155 // occurred
156 MPI_Status status;
157 int has_received = 0;
158 check_mpi_call(MPI_Test(&req_ibcast_any_, &has_received, &status), "MPI_Test");
159 return has_received and any_event_;
160 }
161
175 [[nodiscard]] bool event_on_all_ranks() {
176 // if final_communications() has already been called, all_events == 0 if an event has not occurred on every
177 // process, otherwise it is 1
178 if (finalized_) return all_events_;
179
180 // on the root process, we first check the status of all non-root processes, perform the necessary broadcasts and
181 // return true if an event has occurred on all of them
182 if (comm_.rank() == 0) {
183 root_check_nodes_and_bcast();
184 return all_events_;
185 }
186
187 // on non-root processes, we check the status of the broadcast and return true if an event has occurred on all
188 // processes
189 MPI_Status status;
190 int has_received = 0;
191 check_mpi_call(MPI_Test(&req_ibcast_all_, &has_received, &status), "MPI_Test");
192 return has_received and all_events_;
193 }
194
204 // prevent multiple calls
205 if (finalized_) return;
206
207 if (comm_.rank() == 0) {
208 // on root process, wait for all non-root processes to finish their MPI_Isend calls
209 while (root_check_nodes_and_bcast()) {
210 usleep(100); // 100 us (micro seconds)
211 }
212 // and perform broadcasts in case they have not been done yet
213 if (not any_event_) { check_mpi_call(MPI_Ibcast(&any_event_, 1, MPI_INT, 0, comm_.get(), &req_ibcast_any_), "MPI_Ibcast"); }
214 if (not all_events_) { check_mpi_call(MPI_Ibcast(&all_events_, 1, MPI_INT, 0, comm_.get(), &req_ibcast_all_), "MPI_Ibcast"); }
215 } else {
216 // on non-root processes, perform MPI_Isend call in case it has not been done yet
217 if (not local_event_) { check_mpi_call(MPI_Isend(&local_event_, 1, MPI_INT, 0, comm_.rank(), comm_.get(), &req_isent_), "MPI_Isend"); }
218 }
219
220 // all nodes wait for the broadcasts to be completed
221 MPI_Status status_any, status_all;
222 check_mpi_call(MPI_Wait(&req_ibcast_any_, &status_any), "MPI_Wait");
223 check_mpi_call(MPI_Wait(&req_ibcast_all_, &status_all), "MPI_Wait");
224
225 // free the communicator
226 comm_.free();
227 finalized_ = true;
228 }
229
230 private:
231 // Root process checks the status of all non-root processes, performs necessary broadcasts and returns a boolean
232 // that is true if at least one non-root process has not performed its MPI_Isend call yet.
233 bool root_check_nodes_and_bcast() {
234 EXPECTS(!finalized_);
235 EXPECTS(comm_.rank() == 0);
236 bool any = false;
237 bool all = true;
238 bool finished = true;
239 for (auto &[request, rank_event] : root_futures_) {
240 MPI_Status status;
241 int rank_received = 0;
242 check_mpi_call(MPI_Test(&request, &rank_received, &status), "MPI_Test");
243 any |= (rank_received and rank_event);
244 all &= (rank_received and rank_event);
245 finished &= rank_received;
246 }
247 if (not any_event_ and (any or local_event_)) {
248 any_event_ = 1;
249 check_mpi_call(MPI_Ibcast(&any_event_, 1, MPI_INT, 0, comm_.get(), &req_ibcast_any_), "MPI_Ibcast");
250 }
251 if (not all_events_ and all and local_event_) {
252 all_events_ = 1;
253 check_mpi_call(MPI_Ibcast(&all_events_, 1, MPI_INT, 0, comm_.get(), &req_ibcast_all_), "MPI_Ibcast");
254 }
255 return not finished;
256 }
257
258 private:
259 // MPI communicator.
260 mpi::communicator comm_;
261
262 // Future objects stored on the root process for local events on non-root processes.
263 std::vector<future> root_futures_;
264
265 // MPI request for the broadcasting done on the root process in case an event has occurred on any rank.
266 MPI_Request req_ibcast_any_{};
267
268 // MPI request for the broadcasting done on the root process in case an event has occurred on all ranks.
269 MPI_Request req_ibcast_all_{};
270
271 // MPI request for the sending done on non-root processes.
272 MPI_Request req_isent_{};
273
274 // Set to 1, if a local event has occurred on this process.
275 int local_event_ = 0;
276
277 // Set to 1, if an event has occurred on any process.
278 int any_event_ = 0;
279
280 // Set to 1, if an event has occurred on all processes.
281 int all_events_ = 0;
282
283 // Set to true, if finalize_communications() has been called.
284 bool finalized_ = false;
285 };
286
287} // namespace mpi
C++ wrapper around MPI_Comm providing various convenience functions.
MPI_Comm get() const noexcept
Get the wrapped MPI_Comm object.
void finalize_communications()
Finalize all pending communications.
Definition monitor.hpp:203
monitor(mpi::communicator c)
Construct a monitor on top of a given mpi::communicator.
Definition monitor.hpp:77
bool event_on_all_ranks()
Check if an event has occurred on all processes.
Definition monitor.hpp:175
~monitor()
Destructor calls finalize_communications().
Definition monitor.hpp:97
monitor & operator=(monitor const &)=delete
Deleted copy assignment operator.
void report_local_event()
Report a local event to the root process (rank == 0).
Definition monitor.hpp:110
monitor(monitor const &)=delete
Deleted copy constructor.
bool event_on_any_rank()
Check if an event has occurred on any process.
Definition monitor.hpp:140
Provides a C++ wrapper class for an MPI_Comm object.
void check_mpi_call(int errcode, const std::string &mpi_routine)
Check the success of an MPI call.
Definition utils.hpp:48
Macros used in the mpi library.
Provides general utilities related to MPI.