TRIQS/nda 1.3.0
Multi-dimensional array library for C++
Loading...
Searching...
No Matches
h5.hpp
Go to the documentation of this file.
1// Copyright (c) 2019-2024 Simons Foundation
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0.txt
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14//
15// Authors: Thomas Hahn, Olivier Parcollet, Nils Wentzell
16
22#pragma once
23
24#include "./concepts.hpp"
25#include "./declarations.hpp"
26#include "./exceptions.hpp"
27#include "./layout/for_each.hpp"
28#include "./layout/range.hpp"
29#include "./traits.hpp"
30
31#include <h5/h5.hpp>
32
33#include <algorithm>
34#include <array>
35#include <concepts>
36#include <cstddef>
37#include <string>
38#include <tuple>
39#include <type_traits>
40#include <utility>
41#include <vector>
42
43namespace nda {
44
50 namespace detail {
51
52 // Resize a given array to fit a given shape or check if a given view fits the shape.
53 template <MemoryArray A>
54 void resize_or_check(A &a, std::array<long, A::rank> const &shape) {
55 if constexpr (is_regular_v<A>) {
56 a.resize(shape);
57 } else {
58 if (a.shape() != shape) NDA_RUNTIME_ERROR << "Error in nda::detail::resize_or_check: Dimension mismatch: " << shape << " != " << a.shape();
59 }
60 }
61
62 // Given an array/view, prepare and return the corresponding h5::array_view to be written/read into.
63 template <MemoryArray A>
64 auto prepare_h5_array_view(const A &a) {
65 auto [parent_shape, h5_strides] = h5::array_interface::get_parent_shape_and_h5_strides(a.indexmap().strides().data(), A::rank, a.size());
66 auto v = h5::array_interface::array_view{h5::hdf5_type<get_value_t<A>>(), (void *)a.data(), A::rank, is_complex_v<typename A::value_type>};
67 for (int u = 0; u < A::rank; ++u) {
68 v.slab.count[u] = a.shape()[u];
69 v.slab.stride[u] = h5_strides[u];
70 v.parent_shape[u] = parent_shape[u];
71 }
72 return v;
73 }
74
75 // Create an h5::char_buf from an 1-dimensional nda::MemoryArray.
76 template <MemoryArrayOfRank<1> A>
77 h5::char_buf to_char_buf(A const &a) {
78 // get size of longest string
79 size_t s = 0;
80 for (auto &x : a) s = std::max(s, x.size() + 1);
81
82 // copy each string to a buffer and pad with zeros
83 std::vector<char> buf;
84 buf.resize(a.size() * s, 0x00);
85 size_t i = 0;
86 for (auto &x : a) {
87 strcpy(&buf[i * s], x.c_str());
88 ++i;
89 }
90
91 // return h5::char_buf
92 auto len = h5::v_t{size_t(a.size()), s};
93 return {buf, len};
94 }
95
96 // Write an h5::char_buf into a 1-dimensional nda::MemoryArray.
97 template <MemoryArrayOfRank<1> A>
98 void from_char_buf(h5::char_buf const &cb, A &a) {
99 // prepare the array
100 resize_or_check(a, std::array<long, 1>{static_cast<long>(cb.lengths[0])});
101
102 // loop over all strings
103 auto len_string = cb.lengths[1];
104 size_t i = 0;
105 for (auto &x : a) {
106 x = "";
107 x.append(&cb.buffer[i * len_string]);
108 ++i;
109 }
110 }
111
112 } // namespace detail
113
129 template <MemoryArray A>
130 void h5_write(h5::group g, std::string const &name, A const &a, bool compress = true) {
131 if constexpr (std::is_same_v<nda::get_value_t<A>, std::string>) {
132 // 1-dimensional array/view of strings
133 h5_write(g, name, detail::to_char_buf(a));
134 } else if constexpr (is_scalar_v<typename A::value_type>) {
135 // n-dimensional array/view of scalars
136 // make a copy if the array/view is not in C-order and write the copy
137 if (not a.indexmap().is_stride_order_C()) {
138 using h5_arr_t = nda::array<get_value_t<A>, A::rank>;
139 auto a_c_layout = h5_arr_t{a.shape()};
140 a_c_layout() = a;
141 h5_write(g, name, a_c_layout, compress);
142 return;
143 }
144
145 // prepare the h5::array_view and write it
146 auto v = detail::prepare_h5_array_view(a);
147 h5::array_interface::write(g, name, v, compress);
148 } else {
149 // n-dimensional array/view of some generic unknown type
150 auto g2 = g.create_group(name);
151 h5_write(g2, "shape", a.shape());
152 auto make_name = [](auto i0, auto... is) { return (std::to_string(i0) + ... + ("_" + std::to_string(is))); };
153 nda::for_each(a.shape(), [&](auto... is) { h5_write(g2, make_name(is...), a(is...)); });
154 }
155 }
156
172 template <size_t NDim, typename... IRs>
173 auto hyperslab_and_shape_from_slice(std::tuple<IRs...> const &slice, std::vector<h5::hsize_t> const &ds_shape, bool is_complex) {
174 // number of parameters specifying the slice
175 static constexpr auto size_of_slice = sizeof...(IRs);
176
177 // number of nda::ellipsis objects in the slice (only 1 is allowed)
178 static constexpr auto ellipsis_count = (std::is_same_v<IRs, ellipsis> + ... + 0);
179 static_assert(ellipsis_count < 2, "Error in nda::hyperslab_and_shape_from_slice: Only a single ellipsis is allowed in slicing");
180 static constexpr auto has_ellipsis = (ellipsis_count == 1);
181
182 // position of the ellipsis in the slice parameters
183 static constexpr auto ellipsis_position = [&]<size_t... Is>(std::index_sequence<Is...>) {
184 if constexpr (has_ellipsis) return ((std::is_same_v<IRs, ellipsis> * Is) + ... + 0);
185 return size_of_slice;
186 }(std::index_sequence_for<IRs...>{});
187
188 // number of integers in the slice parameters
189 static constexpr auto integer_count = (std::integral<IRs> + ... + 0);
190
191 // number of nda::range and nda::range::all_t objects in the slice parameters
192 static constexpr auto range_count = size_of_slice - integer_count - ellipsis_count;
193 static_assert((has_ellipsis && range_count <= NDim) || range_count == NDim,
194 "Error in nda::hyperslab_and_shape_from_slice: Rank does not match the number of non-trivial slice dimensions");
195
196 // number of dimensions spanned by the ellipsis
197 static constexpr auto ellipsis_width = NDim - range_count;
198
199 // number of dimensions in the hyperslab
200 static constexpr auto slab_rank = NDim + integer_count;
201
202 // check rank of the dataset (must match the rank of the hyperslabs)
203 auto ds_rank = ds_shape.size() - is_complex;
204 if (slab_rank != ds_rank)
205 NDA_RUNTIME_ERROR << "Error in nda::hyperslab_and_shape_from_slice: Incompatible dataset and slice ranks: " << ds_rank
206 << " != " << size_of_slice;
207
208 // create and return the hyperslab and the shape array
209 auto slab = h5::array_interface::hyperslab(slab_rank, is_complex);
210 auto shape = std::array<long, NDim>{};
211 [&, m = 0]<size_t... Is>(std::index_sequence<Is...>) mutable {
212 (
213 [&]<typename IR>(size_t n, IR const &ir) mutable {
214 if (n > ellipsis_position) n += (ellipsis_width - 1);
215 if constexpr (std::integral<IR>) {
216 slab.offset[n] = ir;
217 slab.count[n] = 1;
218 } else if constexpr (std::is_same_v<IR, nda::ellipsis>) {
219 for (auto k : range(n, n + ellipsis_width)) {
220 slab.count[k] = ds_shape[k];
221 shape[m++] = ds_shape[k];
222 }
223 } else if constexpr (std::is_same_v<IR, nda::range>) {
224 slab.offset[n] = ir.first();
225 slab.stride[n] = ir.step();
226 slab.count[n] = ir.size();
227 shape[m++] = ir.size();
228 } else {
229 static_assert(std::is_same_v<IR, nda::range::all_t>);
230 slab.count[n] = ds_shape[n];
231 shape[m++] = ds_shape[n];
232 }
233 }(Is, std::get<Is>(slice)),
234 ...);
235 }(std::make_index_sequence<size_of_slice>{});
236 return std::make_pair(slab, shape);
237 }
238
255 template <MemoryArray A, typename... IRs>
256 void h5_write(h5::group g, std::string const &name, A const &a, std::tuple<IRs...> const &slice) {
257 // compile-time checks
258 constexpr int size_of_slice = sizeof...(IRs);
259 static_assert(size_of_slice > 0, "Error in nda::h5_write: Invalid empty slice");
260 static_assert(is_scalar_v<typename A::value_type>, "Error in nda::h5_write: Slicing is only supported for scalar types");
261 static constexpr bool is_complex = is_complex_v<typename A::value_type>;
262
263 // make a copy if the array/view is not in C-order and write the copy
264 if (not a.indexmap().is_stride_order_C()) {
265 using h5_arr_t = nda::array<get_value_t<A>, A::rank>;
266 auto a_c_layout = h5_arr_t{a.shape()};
267 a_c_layout() = a;
268 h5_write(g, name, a_c_layout, slice);
269 return;
270 }
271
272 // get dataset info and check that the datatypes of the dataset and the array match
273 auto ds_info = h5::array_interface::get_dataset_info(g, name);
274 if (is_complex != ds_info.has_complex_attribute)
275 NDA_RUNTIME_ERROR << "Error in nda::h5_write: Dataset and array/view must both be complex or non-complex";
276
277 // get hyperslab and shape from the slice and check that the shapes match
278 auto const [sl, sh] = hyperslab_and_shape_from_slice<A::rank>(slice, ds_info.lengths, is_complex);
279 if (sh != a.shape()) NDA_RUNTIME_ERROR << "Error in nda::h5_write: Incompatible slice and array shape: " << sh << " != " << a.shape();
280
281 // prepare the h5::array_view and write it
282 auto v = detail::prepare_h5_array_view(a);
283 h5::array_interface::write_slice(g, name, v, sl);
284 }
285
307 template <MemoryArray A, typename... IRs>
308 void h5_read(h5::group g, std::string const &name, A &a, std::tuple<IRs...> const &slice = {}) {
309 constexpr bool slicing = (sizeof...(IRs) > 0);
310
311 if constexpr (std::is_same_v<typename A::value_type, std::string>) {
312 // 1-dimensional array/view of strings
313 static_assert(!slicing, "Error in nda::h5_read: Slicing not supported for arrays/views of strings");
314 h5::char_buf cb;
315 h5_read(g, name, cb);
316 detail::from_char_buf(cb, a);
317 } else if constexpr (is_scalar_v<typename A::value_type>) {
318 // n-dimensional array/view of scalars
319 // read into a temporary array if the array/view is not in C-order and copy the elements
320 if (not a.indexmap().is_stride_order_C()) {
322 auto a_c_layout = h5_arr_t{};
323 h5_read(g, name, a_c_layout, slice);
324 detail::resize_or_check(a, a_c_layout.shape());
325 a() = a_c_layout;
326 return;
327 }
328
329 // get dataset info
330 auto ds_info = h5::array_interface::get_dataset_info(g, name);
331
332 // allow to read non-complex data into a complex array
333 static constexpr bool is_complex = is_complex_v<typename A::value_type>;
334 if constexpr (is_complex) {
335 if (!ds_info.has_complex_attribute) {
337 h5_read(g, name, tmp, slice);
338 a = tmp;
339 return;
340 }
341 }
342
343 // get the hyperslab and the shape of the slice
344 std::array<long, A::rank> shape{};
345 auto slab = h5::array_interface::hyperslab{};
346 if constexpr (slicing) {
347 auto const [sl, sh] = hyperslab_and_shape_from_slice<A::rank>(slice, ds_info.lengths, is_complex);
348 slab = sl;
349 shape = sh;
350 } else {
351 for (int u = 0; u < A::rank; ++u) shape[u] = ds_info.lengths[u]; // NB : correct for complex
352 }
353
354 // resize the array or check that the shape matches
355 detail::resize_or_check(a, shape);
356
357 // check the rank of the dataset and the array
358 auto ds_rank = ds_info.rank() - is_complex;
359 if (!slicing && ds_rank != A::rank)
360 NDA_RUNTIME_ERROR << "Error in nda::h5_read: Incompatible dataset and array ranks: " << ds_rank << " != " << A::rank;
361
362 // prepare the h5::array_view and read into it
363 auto v = detail::prepare_h5_array_view(a);
364 h5::array_interface::read(g, name, v, slab);
365 } else {
366 // n-dimensional array/view of some generic unknown type
367 static_assert(!slicing, "Error in nda::h5_read: Slicing not supported for arrays/views of generic types");
368 auto g2 = g.open_group(name);
369
370 // get and check the shape or resize the array
371 std::array<long, A::rank> h5_shape;
372 h5_read(g2, "shape", h5_shape);
373 detail::resize_or_check(a, h5_shape);
374
375 // read element-by-element using the appropriate h5_read implementation
376 auto make_name = [](auto i0, auto... is) { return (std::to_string(i0) + ... + ("_" + std::to_string(is))); };
377 nda::for_each(a.shape(), [&](auto... is) { h5_read(g2, make_name(is...), a(is...)); });
378 }
379 }
380
383} // namespace nda
A generic multi-dimensional array.
Provides concepts for the nda library.
Provides various convenient aliases and helper functions for nda::basic_array and nda::basic_array_vi...
Provides a custom runtime error class and macros to assert conditions and throw exceptions.
Provides for_each functions for multi-dimensional arrays/views.
void h5_read(h5::group g, std::string const &name, A &a, std::tuple< IRs... > const &slice={})
Read into an nda::MemoryArray from an HDF5 file.
Definition h5.hpp:308
void h5_write(h5::group g, std::string const &name, A const &a, bool compress=true)
Write an nda::MemoryArray to a new dataset/subgroup into an HDF5 file.
Definition h5.hpp:130
auto hyperslab_and_shape_from_slice(std::tuple< IRs... > const &slice, std::vector< h5::hsize_t > const &ds_shape, bool is_complex)
Construct an h5::array_interface::hyperslab and the corresponding shape from a given slice,...
Definition h5.hpp:173
basic_array< ValueType, Rank, Layout, 'A', ContainerPolicy > array
Alias template of an nda::basic_array with an 'A' algebra.
__inline__ void for_each(std::array< Int, R > const &shape, F &&f)
Loop over all possible index values of a given shape and apply a function to them.
Definition for_each.hpp:129
std::string to_string(std::array< T, R > const &a)
Get a string representation of a std::array.
Definition array.hpp:65
constexpr bool is_complex_v
Constexpr variable that is true if type T is a std::complex type.
Definition traits.hpp:75
constexpr bool is_scalar_v
Constexpr variable that is true if type S is a scalar type, i.e. arithmetic or complex.
Definition traits.hpp:79
Includes the itertools header and provides some additional utilities.
Provides type traits for the nda library.