TRIQS/mpi
2.0.0
C++ interface to MPI
Toggle main menu visibility
Loading...
Searching...
No Matches
monitor.hpp
Go to the documentation of this file.
1
// Copyright (c) 2020-2024 Simons Foundation
2
//
3
// Licensed under the Apache License, Version 2.0 (the "License");
4
// you may not use this file except in compliance with the License.
5
// You may obtain a copy of the License at
6
//
7
// http://www.apache.org/licenses/LICENSE-2.0.txt
8
//
9
// Unless required by applicable law or agreed to in writing, software
10
// distributed under the License is distributed on an "AS IS" BASIS,
11
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
// See the License for the specific language governing permissions and
13
// limitations under the License.
14
//
15
// Authors: Philipp Dumitrescu, Thomas Hahn, Olivier Parcollet, Nils Wentzell
16
21
22
#pragma once
23
24
#include "
./communicator.hpp
"
25
#include "
./macros.hpp
"
26
#include "
./utils.hpp
"
27
28
#include <mpi.h>
29
30
#include <vector>
31
#include <unistd.h>
32
33
namespace
mpi {
34
53
class
monitor
{
54
// Future struct for non-blocking MPI communication.
55
struct
future {
56
// MPI request of the non-blocking MPI call.
57
MPI_Request request{};
58
59
// 0 means that no event has occurred, 1 means that an event has occurred.
60
int
event
= 0;
61
};
62
63
public
:
77
monitor
(
mpi::communicator
c) : comm_(c.duplicate()) {
78
if
(comm_.rank() == 0) {
79
root_futures_.resize(c.size() - 1);
80
for (int rank = 1; rank < c.size(); ++rank) {
81
check_mpi_call(MPI_Irecv(&(root_futures_[rank - 1].event), 1, MPI_INT, rank, rank, comm_.get(), &(root_futures_[rank - 1].request)),
82
"MPI_Irecv"
);
83
}
84
}
else
{
85
check_mpi_call
(MPI_Ibcast(&any_event_, 1, MPI_INT, 0, comm_.
get
(), &req_ibcast_any_),
"MPI_Ibcast"
);
86
check_mpi_call
(MPI_Ibcast(&all_events_, 1, MPI_INT, 0, comm_.
get
(), &req_ibcast_all_),
"MPI_Ibcast"
);
87
}
88
}
89
91
monitor
(
monitor
const
&) =
delete
;
92
94
monitor
&
operator=
(
monitor
const
&) =
delete
;
95
97
~monitor
() {
finalize_communications
(); }
98
110
void
report_local_event
() {
111
// prevent sending multiple signals
112
if
(local_event_ or finalized_) {
return
; }
113
114
// a local event has occurred
115
local_event_ = 1;
116
if
(comm_.rank() == 0) {
117
// on root process, check all other nodes and perform necessary broadcasts
118
root_check_nodes_and_bcast();
119
}
else
{
120
// on non-root processes, let the root process know about the local event
121
check_mpi_call
(MPI_Isend(&local_event_, 1, MPI_INT, 0, comm_.rank(), comm_.get(), &req_isent_),
"MPI_Isend"
);
122
}
123
}
124
140
[[nodiscard]]
bool
event_on_any_rank
() {
141
// if final_communications() has already been called, any_event == 0 if no event has occurred, otherwise it is 1
142
if
(finalized_)
return
any_event_;
143
144
// if a local event has occurred, we return true
145
if
(local_event_)
return
true
;
146
147
// on the root process, we first check the status of all non-root processes, perform the necessary broadcasts and
148
// return true if an event has occurred
149
if
(comm_.rank() == 0) {
150
root_check_nodes_and_bcast();
151
return
any_event_;
152
}
153
154
// on non-root processes, we check the status of the corresponding broadcast and return true if an event has
155
// occurred
156
MPI_Status status;
157
int
has_received = 0;
158
check_mpi_call
(MPI_Test(&req_ibcast_any_, &has_received, &status),
"MPI_Test"
);
159
return
has_received and any_event_;
160
}
161
175
[[nodiscard]]
bool
event_on_all_ranks
() {
176
// if final_communications() has already been called, all_events == 0 if an event has not occurred on every
177
// process, otherwise it is 1
178
if
(finalized_)
return
all_events_;
179
180
// on the root process, we first check the status of all non-root processes, perform the necessary broadcasts and
181
// return true if an event has occurred on all of them
182
if
(comm_.rank() == 0) {
183
root_check_nodes_and_bcast();
184
return
all_events_;
185
}
186
187
// on non-root processes, we check the status of the broadcast and return true if an event has occurred on all
188
// processes
189
MPI_Status status;
190
int
has_received = 0;
191
check_mpi_call
(MPI_Test(&req_ibcast_all_, &has_received, &status),
"MPI_Test"
);
192
return
has_received and all_events_;
193
}
194
203
void
finalize_communications
() {
204
// prevent multiple calls
205
if
(finalized_)
return
;
206
207
if
(comm_.rank() == 0) {
208
// on root process, wait for all non-root processes to finish their MPI_Isend calls
209
while
(root_check_nodes_and_bcast()) {
210
usleep(100);
// 100 us (micro seconds)
211
}
212
// and perform broadcasts in case they have not been done yet
213
if
(not any_event_) {
check_mpi_call
(MPI_Ibcast(&any_event_, 1, MPI_INT, 0, comm_.get(), &req_ibcast_any_),
"MPI_Ibcast"
); }
214
if
(not all_events_) {
check_mpi_call
(MPI_Ibcast(&all_events_, 1, MPI_INT, 0, comm_.get(), &req_ibcast_all_),
"MPI_Ibcast"
); }
215
}
else
{
216
// on non-root processes, perform MPI_Isend call in case it has not been done yet
217
if
(not local_event_) {
check_mpi_call
(MPI_Isend(&local_event_, 1, MPI_INT, 0, comm_.rank(), comm_.get(), &req_isent_),
"MPI_Isend"
); }
218
}
219
220
// all nodes wait for the broadcasts to be completed
221
MPI_Status status_any, status_all;
222
check_mpi_call
(MPI_Wait(&req_ibcast_any_, &status_any),
"MPI_Wait"
);
223
check_mpi_call
(MPI_Wait(&req_ibcast_all_, &status_all),
"MPI_Wait"
);
224
225
// free the communicator
226
comm_.free();
227
finalized_ =
true
;
228
}
229
230
private
:
231
// Root process checks the status of all non-root processes, performs necessary broadcasts and returns a boolean
232
// that is true if at least one non-root process has not performed its MPI_Isend call yet.
233
bool
root_check_nodes_and_bcast() {
234
EXPECTS(!finalized_);
235
EXPECTS(comm_.rank() == 0);
236
bool
any =
false
;
237
bool
all =
true
;
238
bool
finished =
true
;
239
for
(
auto
&[request, rank_event] : root_futures_) {
240
MPI_Status status;
241
int
rank_received = 0;
242
check_mpi_call
(MPI_Test(&request, &rank_received, &status),
"MPI_Test"
);
243
any |= (rank_received and rank_event);
244
all &= (rank_received and rank_event);
245
finished &= rank_received;
246
}
247
if
(not any_event_ and (any or local_event_)) {
248
any_event_ = 1;
249
check_mpi_call
(MPI_Ibcast(&any_event_, 1, MPI_INT, 0, comm_.get(), &req_ibcast_any_),
"MPI_Ibcast"
);
250
}
251
if
(not all_events_ and all and local_event_) {
252
all_events_ = 1;
253
check_mpi_call
(MPI_Ibcast(&all_events_, 1, MPI_INT, 0, comm_.get(), &req_ibcast_all_),
"MPI_Ibcast"
);
254
}
255
return
not finished;
256
}
257
258
private
:
259
// MPI communicator.
260
mpi::communicator comm_;
261
262
// Future objects stored on the root process for local events on non-root processes.
263
std::vector<future> root_futures_;
264
265
// MPI request for the broadcasting done on the root process in case an event has occurred on any rank.
266
MPI_Request req_ibcast_any_{};
267
268
// MPI request for the broadcasting done on the root process in case an event has occurred on all ranks.
269
MPI_Request req_ibcast_all_{};
270
271
// MPI request for the sending done on non-root processes.
272
MPI_Request req_isent_{};
273
274
// Set to 1, if a local event has occurred on this process.
275
int
local_event_ = 0;
276
277
// Set to 1, if an event has occurred on any process.
278
int
any_event_ = 0;
279
280
// Set to 1, if an event has occurred on all processes.
281
int
all_events_ = 0;
282
283
// Set to true, if finalize_communications() has been called.
284
bool
finalized_ =
false
;
285
};
286
287
}
// namespace mpi
mpi::communicator
C++ wrapper around MPI_Comm providing various convenience functions.
Definition
communicator.hpp:51
mpi::communicator::get
MPI_Comm get() const noexcept
Get the wrapped MPI_Comm object.
Definition
communicator.hpp:63
mpi::monitor::finalize_communications
void finalize_communications()
Finalize all pending communications.
Definition
monitor.hpp:203
mpi::monitor::monitor
monitor(mpi::communicator c)
Construct a monitor on top of a given mpi::communicator.
Definition
monitor.hpp:77
mpi::monitor::event_on_all_ranks
bool event_on_all_ranks()
Check if an event has occurred on all processes.
Definition
monitor.hpp:175
mpi::monitor::~monitor
~monitor()
Destructor calls finalize_communications().
Definition
monitor.hpp:97
mpi::monitor::operator=
monitor & operator=(monitor const &)=delete
Deleted copy assignment operator.
mpi::monitor::report_local_event
void report_local_event()
Report a local event to the root process (rank == 0).
Definition
monitor.hpp:110
mpi::monitor::monitor
monitor(monitor const &)=delete
Deleted copy constructor.
mpi::monitor::event_on_any_rank
bool event_on_any_rank()
Check if an event has occurred on any process.
Definition
monitor.hpp:140
communicator.hpp
Provides a C++ wrapper class for an MPI_Comm object.
mpi::check_mpi_call
void check_mpi_call(int errcode, const std::string &mpi_routine)
Check the success of an MPI call.
Definition
utils.hpp:48
macros.hpp
Macros used in the mpi library.
utils.hpp
Provides general utilities related to MPI.
mpi
monitor.hpp
Generated by
1.17.0