TRIQS/mpi 1.3.0
C++ interface to MPI
Loading...
Searching...
No Matches
monitor.hpp
Go to the documentation of this file.
1// Copyright (c) 2020-2024 Simons Foundation
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0.txt
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14//
15// Authors: Philipp Dumitrescu, Thomas Hahn, Olivier Parcollet, Nils Wentzell
16
22#pragma once
23
24#include "./communicator.hpp"
25#include "./macros.hpp"
26#include "./utils.hpp"
27
28#include <mpi.h>
29
30#include <vector>
31#include <unistd.h>
32
33namespace mpi {
34
53 class monitor {
54 // Future struct for non-blocking MPI communication.
55 struct future {
56 // MPI request of the non-blocking MPI call.
57 MPI_Request request{};
58
59 // 0 means that no event has occurred, 1 means that an event has occurred.
60 int event = 0;
61 };
62
63 // MPI communicator.
65
66 // Future objects stored on the root process for local events on non-root processes.
67 std::vector<future> root_futures;
68
69 // MPI request for the broadcasting done on the root process in case an event has occurred on any rank.
70 MPI_Request req_ibcast_any{};
71
72 // MPI request for the broadcasting done on the root process in case an event has occurred on all ranks.
73 MPI_Request req_ibcast_all{};
74
75 // MPI request for the sending done on non-root processes.
76 MPI_Request req_isent{};
77
78 // Set to 1, if a local event has occurred on this process.
79 int local_event = 0;
80
81 // Set to 1, if an event has occurred on any process.
82 int any_event = 0;
83
84 // Set to 1, if an event has occurred on all processes.
85 int all_events = 0;
86
87 // Set to true, if finialize_communications() has been called.
88 bool finalized = false;
89
90 public:
104 monitor(mpi::communicator c) : comm(c.duplicate()) {
105 if (comm.rank() == 0) {
106 root_futures.resize(c.size() - 1);
107 for (int rank = 1; rank < c.size(); ++rank) {
108 check_mpi_call(MPI_Irecv(&(root_futures[rank - 1].event), 1, MPI_INT, rank, rank, comm.get(), &(root_futures[rank - 1].request)),
109 "MPI_Irecv");
110 }
111 } else {
112 check_mpi_call(MPI_Ibcast(&any_event, 1, MPI_INT, 0, comm.get(), &req_ibcast_any), "MPI_Ibcast");
113 check_mpi_call(MPI_Ibcast(&all_events, 1, MPI_INT, 0, comm.get(), &req_ibcast_all), "MPI_Ibcast");
114 }
115 }
116
118 monitor(monitor const &) = delete;
119
121 monitor &operator=(monitor const &) = delete;
122
124 ~monitor() { finalize_communications(); }
125
138 // prevent sending multiple signals
139 if (local_event or finalized) { return; }
140
141 // a local event has occurred
142 local_event = 1;
143 if (comm.rank() == 0) {
144 // on root process, check all other nodes and perform necessary broadcasts
145 root_check_nodes_and_bcast();
146 } else {
147 // on non-root processes, let the root process know about the local event
148 check_mpi_call(MPI_Isend(&local_event, 1, MPI_INT, 0, comm.rank(), comm.get(), &req_isent), "MPI_Isend");
149 }
150 }
151
167 [[nodiscard]] bool event_on_any_rank() {
168 // if final_communications() has already been called, any_event == 0 if no event has occurred, otherwise it is 1
169 if (finalized) return any_event;
170
171 // if a local event has occurred, we return true
172 if (local_event) return true;
173
174 // on the root process, we first check the status of all non-root processes, perform the necessary broadcasts and
175 // return true if an event has occurred
176 if (comm.rank() == 0) {
177 root_check_nodes_and_bcast();
178 return any_event;
179 }
180
181 // on non-root processes, we check the status of the corresponding broadcast and return true if an event has
182 // occurred
183 MPI_Status status;
184 int has_received = 0;
185 check_mpi_call(MPI_Test(&req_ibcast_any, &has_received, &status), "MPI_Test");
186 return has_received and any_event;
187 }
188
202 [[nodiscard]] bool event_on_all_ranks() {
203 // if final_communications() has already been called, all_events == 0 if an event has not occurred on every
204 // process, otherwise it is 1
205 if (finalized) return all_events;
206
207 // on the root process, we first check the status of all non-root processes, perform the necessary broadcasts and
208 // return true if an event has occurred on all of them
209 if (comm.rank() == 0) {
210 root_check_nodes_and_bcast();
211 return all_events;
212 }
213
214 // on non-root processes, we check the status of the broadcast and return true if an event has occurred on all
215 // processes
216 MPI_Status status;
217 int has_received = 0;
218 check_mpi_call(MPI_Test(&req_ibcast_all, &has_received, &status), "MPI_Test");
219 return has_received and all_events;
220 }
221
231 // prevent multiple calls
232 if (finalized) return;
233
234 if (comm.rank() == 0) {
235 // on root process, wait for all non-root processes to finish their MPI_Isend calls
236 while (root_check_nodes_and_bcast()) {
237 usleep(100); // 100 us (micro seconds)
238 }
239 // and perform broadcasts in case they have not been done yet
240 if (not any_event) { check_mpi_call(MPI_Ibcast(&any_event, 1, MPI_INT, 0, comm.get(), &req_ibcast_any), "MPI_Ibcast"); }
241 if (not all_events) { check_mpi_call(MPI_Ibcast(&all_events, 1, MPI_INT, 0, comm.get(), &req_ibcast_all), "MPI_Ibcast"); }
242 } else {
243 // on non-root processes, perform MPI_Isend call in case it has not been done yet
244 if (not local_event) { check_mpi_call(MPI_Isend(&local_event, 1, MPI_INT, 0, comm.rank(), comm.get(), &req_isent), "MPI_Isend"); }
245 }
246
247 // all nodes wait for the broadcasts to be completed
248 MPI_Status status_any, status_all;
249 check_mpi_call(MPI_Wait(&req_ibcast_any, &status_any), "MPI_Wait");
250 check_mpi_call(MPI_Wait(&req_ibcast_all, &status_all), "MPI_Wait");
251
252 // free the communicator
253 comm.free();
254 finalized = true;
255 }
256
257 private:
258 // Root process checks the status of all non-root processes, performs necessary broadcasts and returns a boolean
259 // that is true if at least one non-root process has not performed its MPI_Isend call yet.
260 bool root_check_nodes_and_bcast() {
261 EXPECTS(!finalized);
262 EXPECTS(comm.rank() == 0);
263 bool any = false;
264 bool all = true;
265 bool finished = true;
266 for (auto &[request, rank_event] : root_futures) {
267 MPI_Status status;
268 int rank_received = 0;
269 check_mpi_call(MPI_Test(&request, &rank_received, &status), "MPI_Test");
270 any |= (rank_received and rank_event);
271 all &= (rank_received and rank_event);
272 finished &= rank_received;
273 }
274 if (not any_event and (any or local_event)) {
275 any_event = 1;
276 check_mpi_call(MPI_Ibcast(&any_event, 1, MPI_INT, 0, comm.get(), &req_ibcast_any), "MPI_Ibcast");
277 }
278 if (not all_events and all and local_event) {
279 all_events = 1;
280 check_mpi_call(MPI_Ibcast(&all_events, 1, MPI_INT, 0, comm.get(), &req_ibcast_all), "MPI_Ibcast");
281 }
282 return not finished;
283 }
284 };
285
286} // namespace mpi
C++ wrapper around MPI_Comm providing various convenience functions.
int rank() const
Get the rank of the calling process in the communicator.
void free()
Free the communicator.
MPI_Comm get() const noexcept
Get the wrapped MPI_Comm object.
Constructed on top of an MPI communicator, this class helps to monitor and communicate events across ...
Definition monitor.hpp:53
void finalize_communications()
Finalize all pending communications.
Definition monitor.hpp:230
monitor(mpi::communicator c)
Construct a monitor on top of a given mpi::communicator.
Definition monitor.hpp:104
bool event_on_all_ranks()
Check if an event has occurred on all processes.
Definition monitor.hpp:202
~monitor()
Destructor calls finalize_communications().
Definition monitor.hpp:124
monitor & operator=(monitor const &)=delete
Deleted copy assignment operator.
void report_local_event()
Report a local event to the root process (rank == 0).
Definition monitor.hpp:137
monitor(monitor const &)=delete
Deleted copy constructor.
bool event_on_any_rank()
Check if an event has occurred on any process.
Definition monitor.hpp:167
Provides a C++ wrapper class for an MPI_Comm object.
void check_mpi_call(int errcode, const std::string &mpi_routine)
Check the success of an MPI call.
Definition utils.hpp:72
Macros used in the mpi library.
Provides general utilities related to MPI.