TRIQS/h5 2.0.0
C++ interface to HDF5
Loading...
Searching...
No Matches
array_interface.cpp
Go to the documentation of this file.
1// Copyright (c) 2019-2024 Simons Foundation
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0.txt
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14//
15// Authors: Thomas Hahn, Olivier Parcollet, Nils Wentzell
16
21
22#include "./array_interface.hpp"
23#include "./complex.hpp"
24#include "./macros.hpp"
25#include "./stl/string.hpp"
26
27#include <hdf5.h>
28#include <hdf5_hl.h>
29
30#include <algorithm>
31#include <iostream>
32#include <numeric>
33#include <stdexcept>
34#include <utility>
35#include <vector>
36
37namespace h5::array_interface {
38
39 namespace {
40
41 // Create an HDF5 memory dataspace.
42 dataspace make_mem_dspace(array_view const &v) {
43 // scalar case
44 if (v.rank() == 0) return H5Screate(H5S_SCALAR);
45
46 // create a dataspace of rank v.slab.rank() and with shape v.parent_shape.data()
47 dataspace dspace = H5Screate_simple(v.slab.rank(), v.parent_shape.data(), nullptr);
48 if (!dspace.is_valid()) throw std::runtime_error("Error in make_mem_dspace: Creating the dataspace for an array_view failed");
49
50 // select the hyperslab according to v.slab
51 herr_t err = H5Sselect_hyperslab(dspace, H5S_SELECT_SET, v.slab.offset.data(), v.slab.stride.data(), v.slab.count.data(),
52 (v.slab.block.empty() ? nullptr : v.slab.block.data()));
53 if (err < 0) throw std::runtime_error("Error in make_mem_dspace: Selecting the hyperslab failed");
54
55 // return the dataspace
56 return dspace;
57 }
58
59 } // namespace
60
61 std::pair<v_t, v_t> get_parent_shape_and_h5_strides(long const *np_strides, int rank, long const *view_shape) {
62 // scalar case: return empty vectors
63 if (rank == 0) return {};
64
65 // empty view case: return (0,0,0), (1,1,1)
66 auto const view_size = std::accumulate(view_shape, view_shape + rank, 1l, std::multiplies<>());
67 if (view_size == 0) return {v_t(rank, 0), v_t(rank, 1)};
68
69 // for the general case, we would like to find a parent_shape and h5_strides such that the following equations hold (rank = N):
70 // 1. np_strides[N - 1] = h5_strides[N - 1]
71 // 2. np_strides[N - 2] = h5_strides[N - 2] * parent_shape[N - 1]
72 // 3. np_strides[N - 3] = h5_strides[N - 3] * parent_shape[N - 1] * parent_shape[N - 2]
73 // ...
74 // N. np_strides[N - N] = h5_strides[N - N] * parent_shape[N - 1] * parent_shape[N - 2] * ... * parent_shape[1]
75 //
76 // Note: The shape of the parent array as well as the HDF5 strides that fulfill the above equations are not unique.
77 // This is not a problem as long as HDF5 manages to select the correct elements in memory.
78
79 // create the result vectors
80 v_t parent_shape(rank), h5_strides(rank);
81
82 // We initialize the h5_strides array to the values of np_strides
83 // and successively divide off the parent_shape values as they appear
84 // in the equations above.
85 for (int u = 0; u < rank; ++u) h5_strides[u] = np_strides[u];
86 for (int u = rank - 2; u >= 0; --u) {
87 hsize_t gcd = h5_strides[u];
88 for (int v = u - 1; v >= 0; --v) { gcd = std::gcd(gcd, h5_strides[v]); }
89 parent_shape[u + 1] = gcd;
90 for (int v = u; v >= 0; --v) { h5_strides[v] /= gcd; }
91 }
92
93 // from the above equations it follows that parent_shape[0] (size of the slowest varying dimension)
94 // is arbitrary as long as it is big enough to contain the elements selected by the hyperslab
95 parent_shape[0] = view_shape[0] * h5_strides[0];
96
97 return {parent_shape, h5_strides};
98 }
99
101 // retrieve shape and datatype information
102 datatype ty = H5Dget_type(ds);
103 bool has_complex_attribute = H5LTfind_attribute(ds, "__complex__");
104 dataspace dspace = H5Dget_space(ds);
105 int rank = H5Sget_simple_extent_ndims(dspace);
106 v_t dims_out(rank);
107 H5Sget_simple_extent_dims(dspace, dims_out.data(), nullptr);
108
109 return {.lengths = std::move(dims_out), .ty = ty, .has_complex_attribute = has_complex_attribute};
110 }
111
112 dataset_info get_dataset_info(group g, std::string const &name) {
113 dataset ds = g.open_dataset(name);
114 return get_dataset_info(ds);
115 }
116
117 void write(group g, std::string const &name, array_view const &v, bool compress) {
118 // unlink the dataset if it already exists
119 g.unlink(name);
120
121 // shape of the hyperslab in memory
122 auto hs_shape = v.slab.shape();
123
124 // chunk the dataset and add compression
125 proplist cparms = H5P_DEFAULT;
126 if (compress and (v.rank() != 0)) {
127 int n_dims = v.rank();
128 std::vector<hsize_t> chunk_dims(n_dims);
129 hsize_t const max_chunk_size = hsize_t{1UL << 32} - hsize_t{1}; // 2^32 - 1 = 4 GB
130 hsize_t chunk_size = H5Tget_size(v.ty);
131 for (int i = v.rank() - 1; i >= 0; --i) {
132 H5_ASSERT(max_chunk_size >= chunk_size);
133 hsize_t max_dim = max_chunk_size / chunk_size;
134 chunk_dims[i] = std::clamp(hs_shape[i], hsize_t{1}, max_dim);
135 chunk_size *= chunk_dims[i];
136 }
137 cparms = H5Pcreate(H5P_DATASET_CREATE);
138 H5Pset_chunk(cparms, n_dims, chunk_dims.data());
139 H5Pset_deflate(cparms, 1);
140 }
141
142 // dataspace for the dataset in the file
143 dataspace file_dspace = H5Screate_simple(v.slab.rank(), hs_shape.data(), nullptr);
144
145 // create the dataset in the file
146 dataset ds = H5Dcreate2(g, name.c_str(), v.ty, file_dspace, H5P_DEFAULT, cparms, H5P_DEFAULT);
147 if (!ds.is_valid())
148 throw std::runtime_error("Error in h5::array_interface::write: Creating the dataset " + name + " in the group " + g.name() + " failed");
149
150 // memory dataspace
151 dataspace mem_dspace = make_mem_dspace(v);
152
153 // write to the file dataset
154 if (H5Sget_simple_extent_npoints(mem_dspace) > 0) { // avoid writing empty arrays
155 herr_t err = H5Dwrite(ds, v.ty, mem_dspace, H5S_ALL, H5P_DEFAULT, v.start);
156 if (err < 0)
157 throw std::runtime_error("Error in h5::array_interface::write: Writing to the dataset " + name + " in the group" + g.name() + " failed");
158 }
159
160 // mark TRIQS trailing-2 complex datasets with the __complex__ attribute
161 if (v.has_cplx_trailing_dim) h5_write_attribute(ds, "__complex__", "1");
162 }
163
164 void write_slice(group g, std::string const &name, array_view const &v, hyperslab sl) {
165 // empty hyperslab
166 if (sl.empty()) return;
167
168 // check consistency of input
169 if (v.slab.size() != sl.size()) throw std::runtime_error("Error in h5::array_interface::write_slice: Incompatible sizes");
170
171 auto ds_info = get_dataset_info(g, name);
172 if (not hdf5_type_equal(v.ty, ds_info.ty))
173 throw std::runtime_error("Error in h5::array_interface::write_slice: Incompatible HDF5 types: " + get_name_of_h5_type(v.ty)
174 + " != " + get_name_of_h5_type(ds_info.ty));
175
176 // open existing dataset, get dataspace and select hyperslab
177 dataset ds = g.open_dataset(name);
178 dataspace file_dspace = H5Dget_space(ds);
179 herr_t err = H5Sselect_hyperslab(file_dspace, H5S_SELECT_SET, sl.offset.data(), sl.stride.data(), sl.count.data(),
180 (sl.block.empty() ? nullptr : sl.block.data()));
181 if (err < 0) throw std::runtime_error("Error in h5::array_interface::write_slice: Selecting the hyperslab failed");
182
183 // memory dataspace
184 dataspace mem_dspace = make_mem_dspace(v);
185
186 // write to the selected hyperslab of the file dataset
187 if (H5Sget_simple_extent_npoints(file_dspace) > 0) {
188 err = H5Dwrite(ds, v.ty, mem_dspace, file_dspace, H5P_DEFAULT, v.start);
189 if (err < 0)
190 throw std::runtime_error("Error in h5::array_interface::write_slice: Writing the dataset " + name + " in the group " + g.name() + " failed");
191 }
192 }
193
194 void write_attribute(object obj, std::string const &name, array_view v) {
195 // check if the attribute already exists
196 if (H5LTfind_attribute(obj, name.c_str()) != 0)
197 throw std::runtime_error("Error in h5::array_interface::write_attribute: Attribute " + name + " already exists");
198
199 // memory dataspace
200 dataspace mem_dspace = make_mem_dspace(v);
201
202 // create attribute to write to
203 attribute attr = H5Acreate2(obj, name.c_str(), v.ty, mem_dspace, H5P_DEFAULT, H5P_DEFAULT);
204 if (!attr.is_valid()) throw std::runtime_error("Error in h5::array_interface::write_attribute: Creating the attribute " + name + " failed");
205
206 // write to the attribute
207 herr_t err = H5Awrite(attr, v.ty, v.start);
208 if (err < 0) throw std::runtime_error("Error in h5::array_interface::write_attribute: Writing to the attribute " + name + " failed");
209 }
210
211 void read(group g, std::string const &name, array_view v, hyperslab sl) {
212 // open dataset and get dataspace
213 dataset ds = g.open_dataset(name);
214 dataspace file_dspace = H5Dget_space(ds);
215
216 // if provided, select the hyperslab of the file dataspace
217 if (not sl.empty()) {
218 herr_t err = H5Sselect_hyperslab(file_dspace, H5S_SELECT_SET, sl.offset.data(), sl.stride.data(), sl.count.data(),
219 (sl.block.empty() ? nullptr : sl.block.data()));
220 if (err < 0) throw std::runtime_error("Error in h5::array_interface::read: selecting the hyperslab failed");
221 }
222
223 // check consistency of input
224 auto ds_info = get_dataset_info(g, name);
225
226 // file dataset uses the {r:double, i:double} compound type (Julia HDF5.jl, h5py): retype the view to dcplx_t and drop the trailing-2 dim
229 v.has_cplx_trailing_dim = false;
230 v.parent_shape.pop_back();
231 v.slab.offset.pop_back();
232 v.slab.stride.pop_back();
233 v.slab.count.pop_back();
234 v.slab.block.pop_back();
235 }
236
237 if (H5Tget_class(v.ty) != H5Tget_class(ds_info.ty))
238 throw std::runtime_error("Error in h5::array_interface::read: Incompatible HDF5 types: " + get_name_of_h5_type(v.ty)
239 + " != " + get_name_of_h5_type(ds_info.ty));
240
241 if (not hdf5_type_equal(v.ty, ds_info.ty))
242 std::cerr << "WARNING: HDF5 type mismatch while reading into an array_view: " + get_name_of_h5_type(v.ty)
243 + " != " + get_name_of_h5_type(ds_info.ty) + "\n";
244
245 auto sl_size = sl.size();
246 if (sl.empty()) { sl_size = std::accumulate(ds_info.lengths.begin(), ds_info.lengths.end(), (hsize_t)1, std::multiplies<>()); }
247 if (sl_size != v.slab.size()) throw std::runtime_error("Error in h5::array_interface::read: Incompatible sizes");
248
249 // memory dataspace
250 dataspace mem_dspace = make_mem_dspace(v);
251
252 // read the selected hyperslab from the file dataset
253 if (H5Sget_simple_extent_npoints(file_dspace) > 0) {
254 herr_t err = H5Dread(ds, v.ty, mem_dspace, file_dspace, H5P_DEFAULT, v.start);
255 if (err < 0)
256 throw std::runtime_error("Error in h5::array_interface::read: Reading the dataset " + name + " in the group " + g.name() + " failed");
257 }
258 }
259
260 void read_attribute(object obj, std::string const &name, array_view v) {
261 // open attribute
262 attribute attr = H5Aopen(obj, name.c_str(), H5P_DEFAULT);
263 if (!attr.is_valid()) throw std::runtime_error("Error in h5::array_interface::read_attribute: Opening the attribute " + name + " failed");
264
265 // check that the attribute's rank matches the view's rank (write_attribute writes v.rank(), which is 1
266 // for complex scalars due to the trailing-2 FLOAT convention)
267 dataspace space = H5Aget_space(attr);
268 int rank = H5Sget_simple_extent_ndims(space);
269 if (rank != v.rank())
270 throw std::runtime_error("Error in h5::array_interface::read_attribute: Attribute " + name + " has rank " + std::to_string(rank)
271 + " but the view has rank " + std::to_string(v.rank()));
272
273 // get datatype information
274 auto eq = H5Tequal(H5Aget_type(attr), v.ty);
275 if (eq < 0) throw std::runtime_error("Error in h5::array_interface::read_attribute: H5Tequal call failed");
276 if (eq == 0) throw std::runtime_error("Error in h5::array_interface::read_attribute: Incompatible HDF5 types");
277
278 // read the attribute
279 auto err = H5Aread(attr, v.ty, v.start);
280 if (err < 0) throw std::runtime_error("Error in h5::array_interface::read_attribute: Reading the attribute " + name + " failed");
281 }
282
283} // namespace h5::array_interface
Provides a generic interface to read/write n-dimensional arrays from/to HDF5.
A handle to an HDF5 group.
Definition group.hpp:44
dataset open_dataset(std::string const &key) const
Open a dataset with the given key in the group.
Definition group.cpp:140
std::string name() const
Get the name of the group.
Definition group.cpp:39
void unlink(std::string const &key, bool error_if_absent=false) const
Remove a link with the given key from the group.
Definition group.cpp:83
bool is_valid() const
Ensure that the wrapped HDF5 ID is valid (by calling H5Iis_valid).
Definition object.cpp:117
Provides a compound type and type traits for complex numbers.
object datatype
Type alias for an HDF5 datatype.
Definition object.hpp:123
object attribute
Type alias for an HDF5 attribute.
Definition object.hpp:132
object dataset
Type alias for an HDF5 dataset.
Definition object.hpp:120
object dataspace
Type alias for an HDF5 dataspace.
Definition object.hpp:126
object proplist
Type alias for an HDF5 property list.
Definition object.hpp:129
bool hdf5_type_equal(datatype dt1, datatype dt2)
Check if two HDF5 datatypes are equal.
Definition object.cpp:204
datatype hdf5_type()
Map a given C++ type to an HDF5 datatype.
Definition object.hpp:156
std::string get_name_of_h5_type(datatype dt)
Get the name of an h5::datatype (for error messages).
Definition object.cpp:184
std::string get_name_of_h5_type()
Get the name of the HDF5 datatype corresponding to a C++ type.
Definition object.hpp:177
void read_attribute(object obj, std::string const &name, array_view v)
Read from an HDF5 attribute into an array view.
void write_slice(group g, std::string const &name, array_view const &v, hyperslab sl)
Write an array view to a selected hyperslab of an existing HDF5 dataset.
void read(group g, std::string const &name, array_view v, hyperslab sl)
Read a given hyperslab from an HDF5 dataset into an array view.
void write_attribute(object obj, std::string const &name, array_view v)
Write an array view to an HDF5 attribute.
dataset_info get_dataset_info(dataset ds)
Retrieve the shape and the h5::datatype from a dataset.
void write(group g, std::string const &name, array_view const &v, bool compress)
Write an array view to an HDF5 dataset.
std::pair< v_t, v_t > get_parent_shape_and_h5_strides(long const *np_strides, int rank, long const *view_shape)
Given a view on an n-dimensional array (dataspace) by specifying its numpy/nda-style strides and its ...
void h5_write_attribute(object, std::string const &, std::string const &)
Write a std::string to an HDF5 attribute.
Definition string.cpp:99
std::vector< hsize_t > v_t
Vector of h5::hsize_t used throughout the h5 library.
Definition utils.hpp:59
unsigned long long hsize_t
Size type used in HDF5.
Definition utils.hpp:55
Macros used in the h5 library.
Provides functions to read/write std::string, char* and h5::char_buf objects from/to HDF5.
Struct representing a view on an n-dimensional array/dataspace.
void * start
Pointer to the data of the array.
v_t parent_shape
Shape of the (contiguous) parent array.
datatype ty
h5::datatype stored in the array.
int rank() const
Get the rank of the view (including the possible added imaginary dimension).
hyperslab slab
h5::array_interface::hyperslab specifying the selection of the view.
Simple struct to store basic information about an HDF5 dataset.
Struct representing an HDF5 hyperslab.
auto size() const
Get the total number of elements in the hyperslab.
v_t block
Shape of a single block selected from the dataspace.
bool empty() const
Check whether the hyperslab is empty (has been initialized).
v_t stride
Stride in each dimension (in the HDF5 sense).
v_t offset
Index offset for each dimension.
v_t count
Number of elements or blocks to select along each dimension.
v_t shape() const
Get the shape of the selected hyperslab.
int rank() const
Get the rank of the hyperslab (including the possible added imaginary dimension).