TRIQS/h5 1.3.0
C++ interface to HDF5
Loading...
Searching...
No Matches
array_interface.cpp
Go to the documentation of this file.
1// Copyright (c) 2019-2024 Simons Foundation
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0.txt
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14//
15// Authors: Thomas Hahn, Olivier Parcollet, Nils Wentzell
16
21
22#include "./array_interface.hpp"
23#include "./macros.hpp"
24#include "./stl/string.hpp"
25
26#include <hdf5.h>
27#include <hdf5_hl.h>
28
29#include <algorithm>
30#include <iostream>
31#include <numeric>
32#include <stdexcept>
33#include <utility>
34#include <vector>
35
36namespace h5::array_interface {
37
38 namespace {
39
40 // Create an HDF5 memory dataspace.
41 dataspace make_mem_dspace(array_view const &v) {
42 // scalar case
43 if (v.rank() == 0) return H5Screate(H5S_SCALAR);
44
45 // create a dataspace of rank v.rank() and with shape v.parent_shape
46 dataspace dspace = H5Screate_simple(v.slab.rank(), v.parent_shape.data(), nullptr);
47 if (!dspace.is_valid()) throw std::runtime_error("Error in make_mem_dspace: Creating the dataspace for an array_view failed");
48
49 // select the hyperslab according to v.slab
50 herr_t err = H5Sselect_hyperslab(dspace, H5S_SELECT_SET, v.slab.offset.data(), v.slab.stride.data(), v.slab.count.data(),
51 (v.slab.block.empty() ? nullptr : v.slab.block.data()));
52 if (err < 0) throw std::runtime_error("Error in make_mem_dspace: Selecting the hyperslab failed");
53
54 // return the dataspace
55 return dspace;
56 }
57
58 } // namespace
59
60 std::pair<v_t, v_t> get_parent_shape_and_h5_strides(long const *np_strides, int rank, long const *view_shape) {
61 // scalar case: return empty vectors
62 if (rank == 0) return {};
63
64 // empty view case: return (0,0,0), (1,1,1)
65 auto const view_size = std::accumulate(view_shape, view_shape + rank, 1l, std::multiplies<>());
66 if (view_size == 0) return {v_t(rank, 0), v_t(rank, 1)};
67
68 // for the general case, we would like to find a parent_shape and h5_strides such that the following equations hold (rank = N):
69 // 1. np_strides[N - 1] = h5_strides[N - 1]
70 // 2. np_strides[N - 2] = h5_strides[N - 2] * parent_shape[N - 1]
71 // 3. np_strides[N - 3] = h5_strides[N - 3] * parent_shape[N - 1] * parent_shape[N - 2]
72 // ...
73 // N. np_strides[N - N] = h5_strides[N - N] * parent_shape[N - 1] * parent_shape[N - 2] * ... * parent_shape[1]
74 //
75 // Note: The shape of the parent array as well as the HDF5 strides that fulfill the above equations are not unique.
76 // This is not a problem as long as HDF5 manages to select the correct elements in memory.
77
78 // create the result vectors
79 v_t parent_shape(rank), h5_strides(rank);
80
81 // We initialize the h5_strides array to the values of np_strides
82 // and successively divide off the parent_shape values as they appear
83 // in the equations above.
84 for (int u = 0; u < rank; ++u) h5_strides[u] = np_strides[u];
85 for (int u = rank - 2; u >= 0; --u) {
86 hsize_t gcd = h5_strides[u];
87 for (int v = u - 1; v >= 0; --v) { gcd = std::gcd(gcd, h5_strides[v]); }
88 parent_shape[u + 1] = gcd;
89 for (int v = u; v >= 0; --v) { h5_strides[v] /= gcd; }
90 }
91
92 // from the above equations it follows that parent_shape[0] (size of the slowest varying dimension)
93 // is arbitrary as long as it is big enough to contain the elements selected by the hyperslab
94 parent_shape[0] = view_shape[0] * h5_strides[0];
95
96 return {parent_shape, h5_strides};
97 }
98
100 // retrieve shape and datatype information
101 datatype ty = H5Dget_type(ds);
102 bool has_complex_attribute = H5LTfind_attribute(ds, "__complex__");
103 dataspace dspace = H5Dget_space(ds);
104 int rank = H5Sget_simple_extent_ndims(dspace);
105 v_t dims_out(rank);
106 H5Sget_simple_extent_dims(dspace, dims_out.data(), nullptr);
107
108 return {std::move(dims_out), ty, has_complex_attribute};
109 }
110
111 dataset_info get_dataset_info(group g, std::string const &name) {
112 dataset ds = g.open_dataset(name);
113 return get_dataset_info(ds);
114 }
115
116 void write(group g, std::string const &name, array_view const &v, bool compress) {
117 // unlink the dataset if it already exists
118 g.unlink(name);
119
120 // shape of the hyperslab in memory
121 auto hs_shape = v.slab.shape();
122
123 // chunk the dataset and add compression
124 proplist cparms = H5P_DEFAULT;
125 if (compress and (v.rank() != 0)) {
126 int n_dims = v.rank();
127 std::vector<hsize_t> chunk_dims(n_dims);
128 hsize_t const max_chunk_size = hsize_t{1UL << 32} - hsize_t{1}; // 2^32 - 1 = 4 GB
129 hsize_t chunk_size = H5Tget_size(v.ty);
130 for (int i = v.rank() - 1; i >= 0; --i) {
131 H5_ASSERT(max_chunk_size >= chunk_size);
132 hsize_t max_dim = max_chunk_size / chunk_size;
133 chunk_dims[i] = std::clamp(hs_shape[i], hsize_t{1}, max_dim);
134 chunk_size *= chunk_dims[i];
135 }
136 cparms = H5Pcreate(H5P_DATASET_CREATE);
137 H5Pset_chunk(cparms, n_dims, chunk_dims.data());
138 H5Pset_deflate(cparms, 1);
139 }
140
141 // dataspace for the dataset in the file
142 dataspace file_dspace = H5Screate_simple(v.slab.rank(), hs_shape.data(), nullptr);
143
144 // create the dataset in the file
145 dataset ds = H5Dcreate2(g, name.c_str(), v.ty, file_dspace, H5P_DEFAULT, cparms, H5P_DEFAULT);
146 if (!ds.is_valid())
147 throw std::runtime_error("Error in h5::array_interface::write: Creating the dataset " + name + " in the group " + g.name() + " failed");
148
149 // memory dataspace
150 dataspace mem_dspace = make_mem_dspace(v);
151
152 // write to the file dataset
153 if (H5Sget_simple_extent_npoints(mem_dspace) > 0) { // avoid writing empty arrays
154 herr_t err = H5Dwrite(ds, v.ty, mem_dspace, H5S_ALL, H5P_DEFAULT, v.start);
155 if (err < 0)
156 throw std::runtime_error("Error in h5::array_interface::write: Writing to the dataset " + name + " in the group" + g.name() + " failed");
157 }
158
159 // add complex attribute if the data is complex valued
160 if (v.is_complex) h5_write_attribute(ds, "__complex__", "1");
161 }
162
163 void write_slice(group g, std::string const &name, array_view const &v, hyperslab sl) {
164 // empty hyperslab
165 if (sl.empty()) return;
166
167 // check consistency of input
168 if (v.slab.size() != sl.size()) throw std::runtime_error("Error in h5::array_interface::write_slice: Incompatible sizes");
169
170 auto ds_info = get_dataset_info(g, name);
171 if (not hdf5_type_equal(v.ty, ds_info.ty))
172 throw std::runtime_error("Error in h5::array_interface::write_slice: Incompatible HDF5 types: " + get_name_of_h5_type(v.ty)
173 + " != " + get_name_of_h5_type(ds_info.ty));
174
175 // open existing dataset, get dataspace and select hyperslab
176 dataset ds = g.open_dataset(name);
177 dataspace file_dspace = H5Dget_space(ds);
178 herr_t err = H5Sselect_hyperslab(file_dspace, H5S_SELECT_SET, sl.offset.data(), sl.stride.data(), sl.count.data(),
179 (sl.block.empty() ? nullptr : sl.block.data()));
180 if (err < 0) throw std::runtime_error("Error in h5::array_interface::write_slice: Selecting the hyperslab failed");
181
182 // memory dataspace
183 dataspace mem_dspace = make_mem_dspace(v);
184
185 // write to the selected hyperslab of the file dataset
186 if (H5Sget_simple_extent_npoints(file_dspace) > 0) {
187 err = H5Dwrite(ds, v.ty, mem_dspace, file_dspace, H5P_DEFAULT, v.start);
188 if (err < 0)
189 throw std::runtime_error("Error in h5::array_interface::write_slice: Writing the dataset " + name + " in the group " + g.name() + " failed");
190 }
191 }
192
193 void write_attribute(object obj, std::string const &name, array_view v) {
194 // check if the attribute already exists
195 if (H5LTfind_attribute(obj, name.c_str()) != 0)
196 throw std::runtime_error("Error in h5::array_interface::write_attribute: Attribute " + name + " already exists");
197
198 // memory dataspace
199 dataspace mem_dspace = make_mem_dspace(v);
200
201 // create attribute to write to
202 attribute attr = H5Acreate2(obj, name.c_str(), v.ty, mem_dspace, H5P_DEFAULT, H5P_DEFAULT);
203 if (!attr.is_valid()) throw std::runtime_error("Error in h5::array_interface::write_attribute: Creating the attribute " + name + " failed");
204
205 // write to the attribute
206 herr_t err = H5Awrite(attr, v.ty, v.start);
207 if (err < 0) throw std::runtime_error("Error in h5::array_interface::write_attribute: Writing to the attribute " + name + " failed");
208 }
209
210 void read(group g, std::string const &name, array_view v, hyperslab sl) {
211 // open dataset and get dataspace
212 dataset ds = g.open_dataset(name);
213 dataspace file_dspace = H5Dget_space(ds);
214
215 // if provided, select the hyperslab of the file dataspace
216 if (not sl.empty()) {
217 herr_t err = H5Sselect_hyperslab(file_dspace, H5S_SELECT_SET, sl.offset.data(), sl.stride.data(), sl.count.data(),
218 (sl.block.empty() ? nullptr : sl.block.data()));
219 if (err < 0) throw std::runtime_error("Error in h5::array_interface::read: selecting the hyperslab failed");
220 }
221
222 // check consistency of input
223 auto ds_info = get_dataset_info(g, name);
224 if (H5Tget_class(v.ty) != H5Tget_class(ds_info.ty))
225 throw std::runtime_error("Error in h5::array_interface::read: Incompatible HDF5 types: " + get_name_of_h5_type(v.ty)
226 + " != " + get_name_of_h5_type(ds_info.ty));
227
228 if (not hdf5_type_equal(v.ty, ds_info.ty))
229 std::cerr << "WARNING: HDF5 type mismatch while reading into an array_view: " + get_name_of_h5_type(v.ty)
230 + " != " + get_name_of_h5_type(ds_info.ty) + "\n";
231
232 auto sl_size = sl.size();
233 if (sl.empty()) { sl_size = std::accumulate(ds_info.lengths.begin(), ds_info.lengths.end(), (hsize_t)1, std::multiplies<>()); }
234 if (sl_size != v.slab.size()) throw std::runtime_error("Error in h5::array_interface::read: Incompatible sizes");
235
236 // memory dataspace
237 dataspace mem_dspace = make_mem_dspace(v);
238
239 // read the selected hyperslab from the file dataset
240 if (H5Sget_simple_extent_npoints(file_dspace) > 0) {
241 herr_t err = H5Dread(ds, v.ty, mem_dspace, file_dspace, H5P_DEFAULT, v.start);
242 if (err < 0)
243 throw std::runtime_error("Error in h5::array_interface::read: Reading the dataset " + name + " in the group " + g.name() + " failed");
244 }
245 }
246
247 void read_attribute(object obj, std::string const &name, array_view v) {
248 // open attribute
249 attribute attr = H5Aopen(obj, name.c_str(), H5P_DEFAULT);
250 if (!attr.is_valid()) throw std::runtime_error("Error in h5::array_interface::read_attribute: Opening the attribute " + name + " failed");
251
252 // get dataspace information
253 dataspace space = H5Aget_space(attr);
254 int rank = H5Sget_simple_extent_ndims(space);
255 if (rank != 0) throw std::runtime_error("Error in h5::array_interface::read_attribute: Attribute " + name + " has a rank != 0");
256
257 // get datatype information
258 auto eq = H5Tequal(H5Aget_type(attr), v.ty);
259 if (eq < 0) throw std::runtime_error("Error in h5::array_interface::read_attribute: H5Tequal call failed");
260 if (eq == 0) throw std::runtime_error("Error in h5::array_interface::read_attribute: Incompatible HDF5 types");
261
262 // read the attribute
263 auto err = H5Aread(attr, v.ty, v.start);
264 if (err < 0) throw std::runtime_error("Error in h5::array_interface::read_attribute: Reading the attribute " + name + " failed");
265 }
266
267} // namespace h5::array_interface
Provides a generic interface to read/write n-dimensional arrays from/to HDF5.
A handle to an HDF5 group.
Definition group.hpp:44
dataset open_dataset(std::string const &key) const
Open a dataset with the given key in the group.
Definition group.cpp:140
std::string name() const
Get the name of the group.
Definition group.cpp:39
void unlink(std::string const &key, bool error_if_absent=false) const
Remove a link with the given key from the group.
Definition group.cpp:83
bool is_valid() const
Ensure that the wrapped HDF5 ID is valid (by calling H5Iis_valid).
Definition object.cpp:116
object datatype
Type alias for an HDF5 datatype.
Definition object.hpp:123
object attribute
Type alias for an HDF5 attribute.
Definition object.hpp:132
object dataset
Type alias for an HDF5 dataset.
Definition object.hpp:120
object dataspace
Type alias for an HDF5 dataspace.
Definition object.hpp:126
object proplist
Type alias for an HDF5 property list.
Definition object.hpp:129
bool hdf5_type_equal(datatype dt1, datatype dt2)
Check if two HDF5 datatypes are equal.
Definition object.cpp:198
std::string get_name_of_h5_type(datatype dt)
Get the name of an h5::datatype (for error messages).
Definition object.cpp:183
void read_attribute(object obj, std::string const &name, array_view v)
Read from an HDF5 attribute into an array view.
void write_slice(group g, std::string const &name, array_view const &v, hyperslab sl)
Write an array view to a selected hyperslab of an existing HDF5 dataset.
void read(group g, std::string const &name, array_view v, hyperslab sl)
Read a given hyperslab from an HDF5 dataset into an array view.
void write_attribute(object obj, std::string const &name, array_view v)
Write an array view to an HDF5 attribute.
dataset_info get_dataset_info(dataset ds)
Retrieve the shape and the h5::datatype from a dataset.
void write(group g, std::string const &name, array_view const &v, bool compress)
Write an array view to an HDF5 dataset.
std::pair< v_t, v_t > get_parent_shape_and_h5_strides(long const *np_strides, int rank, long const *view_shape)
Given a view on an n-dimensional array (dataspace) by specifying its numpy/nda-style strides and its ...
void h5_write_attribute(object obj, std::string const &name, T const &x) H5_REQUIRES(std
Write a scalar to an HDF5 attribute.
Definition scalar.hpp:125
std::vector< hsize_t > v_t
Vector of h5::hsize_t used throughout the h5 library.
Definition utils.hpp:59
unsigned long long hsize_t
Size type used in HDF5.
Definition utils.hpp:55
Macros used in the h5 library.
Provides functions to read/write std::string, char* and h5::char_buf objects from/to HDF5.
Struct representing a view on an n-dimensional array/dataspace.
void * start
Pointer to the data of the array.
datatype ty
h5::datatype stored in the array.
bool is_complex
Whether the data is complex valued.
int rank() const
Get the rank of the view (including the possible added imaginary dimension).
hyperslab slab
h5::array_interface::hyperslab specifying the selection of the view.
Simple struct to store basic information about an HDF5 dataset.
Struct representing an HDF5 hyperslab.
auto size() const
Get the total number of elements in the hyperslab.
v_t block
Shape of a single block selected from the dataspace.
bool empty() const
Check whether the hyperslab is empty (has been initialized).
v_t stride
Stride in each dimension (in the HDF5 sense).
v_t offset
Index offset for each dimension.
v_t count
Number of elements or blocks to select along each dimension.
v_t shape() const
Get the shape of the selected hyperslab.
int rank() const
Get the rank of the hyperslab (including the possible added imaginary dimension).