TRIQS/nda 1.3.0
Multi-dimensional array library for C++
Loading...
Searching...
No Matches
h5.hpp
Go to the documentation of this file.
1// Copyright (c) 2019-2024 Simons Foundation
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0.txt
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14//
15// Authors: Thomas Hahn, Olivier Parcollet, Nils Wentzell
16
17/**
18 * @file
19 * @brief Provides HDF5 support for the nda library.
20 */
21
22#pragma once
23
24#include "./concepts.hpp"
25#include "./declarations.hpp"
26#include "./exceptions.hpp"
27#include "./layout/for_each.hpp"
28#include "./layout/range.hpp"
29#include "./traits.hpp"
30
31#include <h5/h5.hpp>
32
33#include <algorithm>
34#include <array>
35#include <concepts>
36#include <cstddef>
37#include <string>
38#include <tuple>
39#include <type_traits>
40#include <utility>
41#include <vector>
42
43namespace nda {
44
45 /**
46 * @addtogroup av_hdf5
47 * @{
48 */
49
50 namespace detail {
51
52 // Resize a given array to fit a given shape or check if a given view fits the shape.
53 template <MemoryArray A>
54 void resize_or_check(A &a, std::array<long, A::rank> const &shape) {
55 if constexpr (is_regular_v<A>) {
56 a.resize(shape);
57 } else {
58 if (a.shape() != shape) NDA_RUNTIME_ERROR << "Error in nda::detail::resize_or_check: Dimension mismatch: " << shape << " != " << a.shape();
59 }
60 }
61
62 // Given an array/view, prepare and return the corresponding h5::array_view to be written/read into.
63 template <MemoryArray A>
64 auto prepare_h5_array_view(const A &a) {
65 auto [parent_shape, h5_strides] = h5::array_interface::get_parent_shape_and_h5_strides(a.indexmap().strides().data(), A::rank, a.size());
66 auto v = h5::array_interface::array_view{h5::hdf5_type<get_value_t<A>>(), (void *)a.data(), A::rank, is_complex_v<typename A::value_type>};
67 for (int u = 0; u < A::rank; ++u) {
68 v.slab.count[u] = a.shape()[u];
69 v.slab.stride[u] = h5_strides[u];
70 v.parent_shape[u] = parent_shape[u];
71 }
72 return v;
73 }
74
75 // Create an h5::char_buf from an 1-dimensional nda::MemoryArray.
76 template <MemoryArrayOfRank<1> A>
77 h5::char_buf to_char_buf(A const &a) {
78 // get size of longest string
79 size_t s = 0;
80 for (auto &x : a) s = std::max(s, x.size() + 1);
81
82 // copy each string to a buffer and pad with zeros
83 std::vector<char> buf;
84 buf.resize(a.size() * s, 0x00);
85 size_t i = 0;
86 for (auto &x : a) {
87 strcpy(&buf[i * s], x.c_str());
88 ++i;
89 }
90
91 // return h5::char_buf
92 auto len = h5::v_t{size_t(a.size()), s};
93 return {buf, len};
94 }
95
96 // Write an h5::char_buf into a 1-dimensional nda::MemoryArray.
97 template <MemoryArrayOfRank<1> A>
98 void from_char_buf(h5::char_buf const &cb, A &a) {
99 // prepare the array
100 resize_or_check(a, std::array<long, 1>{static_cast<long>(cb.lengths[0])});
101
102 // loop over all strings
103 auto len_string = cb.lengths[1];
104 size_t i = 0;
105 for (auto &x : a) {
106 x = "";
107 x.append(&cb.buffer[i * len_string]);
108 ++i;
109 }
110 }
111
112 } // namespace detail
113
114 /**
115 * @brief Write an nda::MemoryArray to a new dataset/subgroup into an HDF5 file.
116 *
117 * @details The following arrays/views can be written to HDF5:
118 * - 1-dimensional array/view of strings: It is first converted to an `h5::char_buf` before it is written.
119 * - arbitrary array/view of scalars: The data is written directly into an `h5::dataset` with the same shape. If the
120 * stride order is not C-order, the elements of the array/view are first copied into an array with C-order layout.
121 * - arbitrary array/view of some generic type: The elements are written one-by-one into a subgroup.
122 *
123 * @tparam A nda::MemoryArray type.
124 * @param g `h5::group` in which the dataset/subgroup is created.
125 * @param name Name of the dataset/subgroup to which the nda::MemoryArray is written.
126 * @param a nda::MemoryArray to be written.
127 * @param compress Whether to compress the dataset.
128 */
129 template <MemoryArray A>
130 void h5_write(h5::group g, std::string const &name, A const &a, bool compress = true) {
131 if constexpr (std::is_same_v<typename A::value_type, std::string>) {
132 // 1-dimensional array/view of strings
133 h5_write(g, name, detail::to_char_buf(a));
134 } else if constexpr (is_scalar_v<typename A::value_type>) {
135 // n-dimensional array/view of scalars
136 // make a copy if the array/view is not in C-order and write the copy
137 if (not a.indexmap().is_stride_order_C()) {
138 using h5_arr_t = nda::array<get_value_t<A>, A::rank>;
139 auto a_c_layout = h5_arr_t{a.shape()};
140 a_c_layout() = a;
141 h5_write(g, name, a_c_layout, compress);
142 return;
143 }
144
145 // prepare the h5::array_view and write it
146 auto v = detail::prepare_h5_array_view(a);
147 h5::array_interface::write(g, name, v, compress);
148 } else {
149 // n-dimensional array/view of some generic unknown type
150 auto g2 = g.create_group(name);
151 h5_write(g2, "shape", a.shape());
152 auto make_name = [](auto i0, auto... is) { return (std::to_string(i0) + ... + ("_" + std::to_string(is))); };
153 nda::for_each(a.shape(), [&](auto... is) { h5_write(g2, make_name(is...), a(is...)); });
154 }
155 }
156
157 /**
158 * @brief Construct an `h5::array_interface::hyperslab` and the corresponding shape from a given slice, i.e. a tuple
159 * containing integer, `nda::range`, `nda::range::all_t` and nda::ellipsis objects.
160 *
161 * @details The hyperslab will have the same number of dimensions as the underlying dataspace, whereas the shape will
162 * only contain the selected dimensions. For example, a 1-dimensional slice of a 3-dimensional dataset will return a
163 * hyperslab with 3 dimensions and a shape with 1 dimension.
164 *
165 * @tparam NDim Number of selected dimensions (== Rank of the array/view to be written/read into).
166 * @tparam IRs Types in the slice, i.e. integer, `nda::range`, `nda::range::all_t` or nda::ellipsis.
167 * @param slice Tuple specifying the slice of the dataset to which the nda::MemoryArray is written.
168 * @param ds_shape Shape of the underlying dataset.
169 * @param is_complex Whether the data is complex valued.
170 * @return A pair containing the hyperslab and the shape of the selected slice.
171 */
172 template <size_t NDim, typename... IRs>
173 auto hyperslab_and_shape_from_slice(std::tuple<IRs...> const &slice, std::vector<h5::hsize_t> const &ds_shape, bool is_complex) {
174 // number of parameters specifying the slice
175 static constexpr auto size_of_slice = sizeof...(IRs);
176
177 // number of nda::ellipsis objects in the slice (only 1 is allowed)
178 static constexpr auto ellipsis_count = (std::is_same_v<IRs, ellipsis> + ... + 0);
179 static_assert(ellipsis_count < 2, "Error in nda::hyperslab_and_shape_from_slice: Only a single ellipsis is allowed in slicing");
180 static constexpr auto has_ellipsis = (ellipsis_count == 1);
181
182 // position of the ellipsis in the slice parameters
183 static constexpr auto ellipsis_position = [&]<size_t... Is>(std::index_sequence<Is...>) {
184 if constexpr (has_ellipsis) return ((std::is_same_v<IRs, ellipsis> * Is) + ... + 0);
185 return size_of_slice;
186 }(std::index_sequence_for<IRs...>{});
187
188 // number of integers in the slice parameters
189 static constexpr auto integer_count = (std::integral<IRs> + ... + 0);
190
191 // number of nda::range and nda::range::all_t objects in the slice parameters
192 static constexpr auto range_count = size_of_slice - integer_count - ellipsis_count;
193 static_assert((has_ellipsis && range_count <= NDim) || range_count == NDim,
194 "Error in nda::hyperslab_and_shape_from_slice: Rank does not match the number of non-trivial slice dimensions");
195
196 // number of dimensions spanned by the ellipsis
197 static constexpr auto ellipsis_width = NDim - range_count;
198
199 // number of dimensions in the hyperslab
200 static constexpr auto slab_rank = NDim + integer_count;
201
202 // check rank of the dataset (must match the rank of the hyperslabs)
203 auto ds_rank = ds_shape.size() - is_complex;
204 if (slab_rank != ds_rank)
205 NDA_RUNTIME_ERROR << "Error in nda::hyperslab_and_shape_from_slice: Incompatible dataset and slice ranks: " << ds_rank
206 << " != " << size_of_slice;
207
208 // create and return the hyperslab and the shape array
209 auto slab = h5::array_interface::hyperslab(slab_rank, is_complex);
210 auto shape = std::array<long, NDim>{};
211 [&, m = 0]<size_t... Is>(std::index_sequence<Is...>) mutable {
212 (
213 [&]<typename IR>(size_t n, IR const &ir) mutable {
214 if (n > ellipsis_position) n += (ellipsis_width - 1);
215 if constexpr (std::integral<IR>) {
216 slab.offset[n] = ir;
217 slab.count[n] = 1;
218 } else if constexpr (std::is_same_v<IR, nda::ellipsis>) {
219 for (auto k : range(n, n + ellipsis_width)) {
220 slab.count[k] = ds_shape[k];
221 shape[m++] = ds_shape[k];
222 }
223 } else if constexpr (std::is_same_v<IR, nda::range>) {
224 slab.offset[n] = ir.first();
225 slab.stride[n] = ir.step();
226 slab.count[n] = ir.size();
227 shape[m++] = ir.size();
228 } else {
229 static_assert(std::is_same_v<IR, nda::range::all_t>);
230 slab.count[n] = ds_shape[n];
231 shape[m++] = ds_shape[n];
232 }
233 }(Is, std::get<Is>(slice)),
234 ...);
235 }(std::make_index_sequence<size_of_slice>{});
236 return std::make_pair(slab, shape);
237 }
238
239 /**
240 * @brief Write an nda::MemoryArray into a slice (hyperslab) of an existing dataset in an HDF5 file.
241 *
242 * @details The hyperslab in the dataset is specified by providing a tuple of integer, `nda::range`,
243 * `nda::range::all_t` or nda::ellipsis objects. It's shape must match the shape of the nda::MemoryArray to be
244 * written, otherwise an exception is thrown.
245 *
246 * Only arrays/views with scalar types can be written to a slice.
247 *
248 * @tparam A nda::MemoryArray type.
249 * @tparam IRs Types in the slice, i.e. integer, `nda::range`, `nda::range::all_t` or nda::ellipsis.
250 * @param g `h5::group` which contains the dataset.
251 * @param name Name of the dataset.
252 * @param a nda::MemoryArray to be written.
253 * @param slice Tuple specifying the slice of the dataset to which the nda::MemoryArray is written.
254 */
255 template <MemoryArray A, typename... IRs>
256 void h5_write(h5::group g, std::string const &name, A const &a, std::tuple<IRs...> const &slice) {
257 // compile-time checks
258 constexpr int size_of_slice = sizeof...(IRs);
259 static_assert(size_of_slice > 0, "Error in nda::h5_write: Invalid empty slice");
260 static_assert(is_scalar_v<typename A::value_type>, "Error in nda::h5_write: Slicing is only supported for scalar types");
261 static constexpr bool is_complex = is_complex_v<typename A::value_type>;
262
263 // make a copy if the array/view is not in C-order and write the copy
264 if (not a.indexmap().is_stride_order_C()) {
265 using h5_arr_t = nda::array<get_value_t<A>, A::rank>;
266 auto a_c_layout = h5_arr_t{a.shape()};
267 a_c_layout() = a;
268 h5_write(g, name, a_c_layout, slice);
269 return;
270 }
271
272 // get dataset info and check that the datatypes of the dataset and the array match
273 auto ds_info = h5::array_interface::get_dataset_info(g, name);
274 if (is_complex != ds_info.has_complex_attribute)
275 NDA_RUNTIME_ERROR << "Error in nda::h5_write: Dataset and array/view must both be complex or non-complex";
276
277 // get hyperslab and shape from the slice and check that the shapes match
278 auto const [sl, sh] = hyperslab_and_shape_from_slice<A::rank>(slice, ds_info.lengths, is_complex);
279 if (sh != a.shape()) NDA_RUNTIME_ERROR << "Error in nda::h5_write: Incompatible slice and array shape: " << sh << " != " << a.shape();
280
281 // prepare the h5::array_view and write it
282 auto v = detail::prepare_h5_array_view(a);
283 h5::array_interface::write_slice(g, name, v, sl);
284 }
285
286 /**
287 * @brief Read into an nda::MemoryArray from an HDF5 file.
288 *
289 * @details The following arrays/views are supported:
290 * - 1-dimensional arrays/views of strings: The data is read into an `h5::char_buf` and then copied into the
291 * array/view.
292 * - arbitrary arrays/views of scalars: The data is read directly from an `h5::dataset` into the array/view. If the
293 * stride order is not C-order, the data is first read into an array with C-order layout, before it is copied into the
294 * array/view. The data to be read from the dataset can be restricted by providing a slice.
295 * - arbitrary arrays/views of some generic type: The elements are read one-by-one from the subgroup.
296 *
297 * @note While array objects will always be resized to fit the shape of the data read from the file, views must have
298 * the same shape as the data to be read. If this is not the case, an exception is thrown.
299 *
300 * @tparam A nda::MemoryArray type.
301 * @tparam IRs Types in the slice, i.e. integer, `nda::range`, `nda::range::all_t` or nda::ellipsis.
302 * @param g `h5::group` which contains the dataset/subgroup to read from.
303 * @param name Name of the dataset/subgroup.
304 * @param a nda::MemoryArray object to be read into.
305 * @param slice Optional tuple specifying the slice of the dataset to be read.
306 */
307 template <MemoryArray A, typename... IRs>
308 void h5_read(h5::group g, std::string const &name, A &a, std::tuple<IRs...> const &slice = {}) {
309 constexpr bool slicing = (sizeof...(IRs) > 0);
310
311 if constexpr (std::is_same_v<typename A::value_type, std::string>) {
312 // 1-dimensional array/view of strings
313 static_assert(!slicing, "Error in nda::h5_read: Slicing not supported for arrays/views of strings");
314 h5::char_buf cb;
315 h5_read(g, name, cb);
316 detail::from_char_buf(cb, a);
317 } else if constexpr (is_scalar_v<typename A::value_type>) {
318 // n-dimensional array/view of scalars
319 // read into a temporary array if the array/view is not in C-order and copy the elements
320 if (not a.indexmap().is_stride_order_C()) {
321 using h5_arr_t = nda::array<typename A::value_type, A::rank>;
322 auto a_c_layout = h5_arr_t{};
323 h5_read(g, name, a_c_layout, slice);
324 detail::resize_or_check(a, a_c_layout.shape());
325 a() = a_c_layout;
326 return;
327 }
328
329 // get dataset info
330 auto ds_info = h5::array_interface::get_dataset_info(g, name);
331
332 // allow to read non-complex data into a complex array
333 static constexpr bool is_complex = is_complex_v<typename A::value_type>;
334 if constexpr (is_complex) {
335 if (!ds_info.has_complex_attribute) {
336 array<double, A::rank> tmp;
337 h5_read(g, name, tmp, slice);
338 a = tmp;
339 return;
340 }
341 }
342
343 // get the hyperslab and the shape of the slice
344 std::array<long, A::rank> shape{};
345 auto slab = h5::array_interface::hyperslab{};
346 if constexpr (slicing) {
347 auto const [sl, sh] = hyperslab_and_shape_from_slice<A::rank>(slice, ds_info.lengths, is_complex);
348 slab = sl;
349 shape = sh;
350 } else {
351 for (int u = 0; u < A::rank; ++u) shape[u] = ds_info.lengths[u]; // NB : correct for complex
352 }
353
354 // resize the array or check that the shape matches
355 detail::resize_or_check(a, shape);
356
357 // check the rank of the dataset and the array
358 auto ds_rank = ds_info.rank() - is_complex;
359 if (!slicing && ds_rank != A::rank)
360 NDA_RUNTIME_ERROR << "Error in nda::h5_read: Incompatible dataset and array ranks: " << ds_rank << " != " << A::rank;
361
362 // prepare the h5::array_view and read into it
363 auto v = detail::prepare_h5_array_view(a);
364 h5::array_interface::read(g, name, v, slab);
365 } else {
366 // n-dimensional array/view of some generic unknown type
367 static_assert(!slicing, "Error in nda::h5_read: Slicing not supported for arrays/views of generic types");
368 auto g2 = g.open_group(name);
369
370 // get and check the shape or resize the array
371 std::array<long, A::rank> h5_shape;
372 h5_read(g2, "shape", h5_shape);
373 detail::resize_or_check(a, h5_shape);
374
375 // read element-by-element using the appropriate h5_read implementation
376 auto make_name = [](auto i0, auto... is) { return (std::to_string(i0) + ... + ("_" + std::to_string(is))); };
377 nda::for_each(a.shape(), [&](auto... is) { h5_read(g2, make_name(is...), a(is...)); });
378 }
379 }
380
381 /** @} */
382
383} // namespace nda
#define NDA_RUNTIME_ERROR
void h5_read(h5::group g, std::string const &name, A &a, std::tuple< IRs... > const &slice={})
Read into an nda::MemoryArray from an HDF5 file.
Definition h5.hpp:308
void h5_write(h5::group g, std::string const &name, A const &a, bool compress=true)
Write an nda::MemoryArray to a new dataset/subgroup into an HDF5 file.
Definition h5.hpp:130
void h5_write(h5::group g, std::string const &name, A const &a, std::tuple< IRs... > const &slice)
Write an nda::MemoryArray into a slice (hyperslab) of an existing dataset in an HDF5 file.
Definition h5.hpp:256
auto hyperslab_and_shape_from_slice(std::tuple< IRs... > const &slice, std::vector< h5::hsize_t > const &ds_shape, bool is_complex)
Construct an h5::array_interface::hyperslab and the corresponding shape from a given slice,...
Definition h5.hpp:173
Mimics Python's ... syntax.
Definition range.hpp:49