TRIQS/h5 1.3.0
C++ interface to HDF5
Loading...
Searching...
No Matches
array_interface.cpp
Go to the documentation of this file.
1// Copyright (c) 2019-2024 Simons Foundation
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0.txt
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14//
15// Authors: Thomas Hahn, Olivier Parcollet, Nils Wentzell
16
22#include "./array_interface.hpp"
23#include "./macros.hpp"
24#include "./stl/string.hpp"
25
26#include <hdf5.h>
27#include <hdf5_hl.h>
28
29#include <numeric>
30#include <algorithm>
31#include <iostream>
32#include <stdexcept>
33#include <utility>
34#include <vector>
35
36namespace h5::array_interface {
37
38 namespace {
39
40 // Create an HDF5 memory dataspace.
41 dataspace make_mem_dspace(array_view const &v) {
42 // scalar case
43 if (v.rank() == 0) return H5Screate(H5S_SCALAR);
44
45 // create a dataspace of rank v.rank() and with shape v.parent_shape
46 dataspace dspace = H5Screate_simple(v.slab.rank(), v.parent_shape.data(), nullptr);
47 if (!dspace.is_valid()) throw std::runtime_error("Error in make_mem_dspace: Creating the dataspace for an array_view failed");
48
49 // select the hyperslab according to v.slab
50 herr_t err = H5Sselect_hyperslab(dspace, H5S_SELECT_SET, v.slab.offset.data(), v.slab.stride.data(), v.slab.count.data(),
51 (v.slab.block.empty() ? nullptr : v.slab.block.data()));
52 if (err < 0) throw std::runtime_error("Error in make_mem_dspace: Selecting the hyperslab failed");
53
54 // return the dataspace
55 return dspace;
56 }
57
58 } // namespace
59
60 std::pair<v_t, v_t> get_parent_shape_and_h5_strides(long const *np_strides, int rank, long view_size) {
61 // scalar case: return empty vectors
62 if (rank == 0) return {};
63
64 // empty view case: return (0,0,0), (1,1,1)
65 if (view_size == 0) return {v_t(rank, 0), v_t(rank, 1)};
66
67 // for the general case, we would like to find a parent_shape and h5_strides such that the following equations hold (rank = N):
68 // 1. np_strides[N - 1] = h5_strides[N - 1]
69 // 2. np_strides[N - 2] = h5_strides[N - 2] * parent_shape[N - 1]
70 // 3. np_strides[N - 3] = h5_strides[N - 3] * parent_shape[N - 1] * parent_shape[N - 2]
71 // ...
72 // N. np_strides[N - N] = h5_strides[N - N] * parent_shape[N - 1] * parent_shape[N - 2] * ... * parent_shape[1]
73 //
74 // Note: The shape of the parent array as well as the HDF5 strides that fulfill the above equations are not unique.
75 // This is not a problem as long as HDF5 manages to select the correct elements in memory.
76
77 // create the result vectors
78 v_t parent_shape(rank), h5_strides(rank);
79
80 // We initialize the h5_strides array to the values of np_strides
81 // and successively divide off the parent_shape values as they appear
82 // in the equations above.
83 for (int u = 0; u < rank; ++u) h5_strides[u] = np_strides[u];
84 for (int u = rank - 2; u >= 0; --u) {
85 hsize_t gcd = h5_strides[u];
86 for (int v = u - 1; v >= 0; --v) { gcd = std::gcd(gcd, h5_strides[v]); }
87 parent_shape[u + 1] = gcd;
88 for (int v = u; v >= 0; --v) { h5_strides[v] /= gcd; }
89 }
90
91 // from the above equations it follows that parent_shape[0] (size of the slowest varying dimension)
92 // is arbitrary as long as it is big enough to contain the elements selected by the hyperslab
93 parent_shape[0] = view_size * h5_strides[0];
94
95 return {parent_shape, h5_strides};
96 }
97
99 // retrieve shape and datatype information
100 datatype ty = H5Dget_type(ds);
101 bool has_complex_attribute = H5LTfind_attribute(ds, "__complex__");
102 dataspace dspace = H5Dget_space(ds);
103 int rank = H5Sget_simple_extent_ndims(dspace);
104 v_t dims_out(rank);
105 H5Sget_simple_extent_dims(dspace, dims_out.data(), nullptr);
106
107 return {std::move(dims_out), ty, has_complex_attribute};
108 }
109
110 dataset_info get_dataset_info(group g, std::string const &name) {
111 dataset ds = g.open_dataset(name);
112 return get_dataset_info(ds);
113 }
114
115 void write(group g, std::string const &name, array_view const &v, bool compress) {
116 // unlink the dataset if it already exists
117 g.unlink(name);
118
119 // shape of the hyperslab in memory
120 auto hs_shape = v.slab.shape();
121
122 // chunk the dataset and add compression
123 proplist cparms = H5P_DEFAULT;
124 if (compress and (v.rank() != 0)) {
125 int n_dims = v.rank();
126 std::vector<hsize_t> chunk_dims(n_dims);
127 hsize_t const max_chunk_size = hsize_t{1UL << 32} - hsize_t{1}; // 2^32 - 1 = 4 GB
128 hsize_t chunk_size = H5Tget_size(v.ty);
129 for (int i = v.rank() - 1; i >= 0; --i) {
130 H5_ASSERT(max_chunk_size >= chunk_size);
131 hsize_t max_dim = max_chunk_size / chunk_size;
132 chunk_dims[i] = std::clamp(hs_shape[i], hsize_t{1}, max_dim);
133 chunk_size *= chunk_dims[i];
134 }
135 cparms = H5Pcreate(H5P_DATASET_CREATE);
136 H5Pset_chunk(cparms, n_dims, chunk_dims.data());
137 H5Pset_deflate(cparms, 1);
138 }
139
140 // dataspace for the dataset in the file
141 dataspace file_dspace = H5Screate_simple(v.slab.rank(), hs_shape.data(), nullptr);
142
143 // create the dataset in the file
144 dataset ds = H5Dcreate2(g, name.c_str(), v.ty, file_dspace, H5P_DEFAULT, cparms, H5P_DEFAULT);
145 if (!ds.is_valid())
146 throw std::runtime_error("Error in h5::array_interface::write: Creating the dataset " + name + " in the group " + g.name() + " failed");
147
148 // memory dataspace
149 dataspace mem_dspace = make_mem_dspace(v);
150
151 // write to the file dataset
152 if (H5Sget_simple_extent_npoints(mem_dspace) > 0) { // avoid writing empty arrays
153 herr_t err = H5Dwrite(ds, v.ty, mem_dspace, H5S_ALL, H5P_DEFAULT, v.start);
154 if (err < 0)
155 throw std::runtime_error("Error in h5::array_interface::write: Writing to the dataset " + name + " in the group" + g.name() + " failed");
156 }
157
158 // add complex attribute if the data is complex valued
159 if (v.is_complex) h5_write_attribute(ds, "__complex__", "1");
160 }
161
162 void write_slice(group g, std::string const &name, array_view const &v, hyperslab sl) {
163 // empty hyperslab
164 if (sl.empty()) return;
165
166 // check consistency of input
167 if (v.slab.size() != sl.size()) throw std::runtime_error("Error in h5::array_interface::write_slice: Incompatible sizes");
168
169 auto ds_info = get_dataset_info(g, name);
170 if (not hdf5_type_equal(v.ty, ds_info.ty))
171 throw std::runtime_error("Error in h5::array_interface::write_slice: Incompatible HDF5 types: " + get_name_of_h5_type(v.ty)
172 + " != " + get_name_of_h5_type(ds_info.ty));
173
174 // open existing dataset, get dataspace and select hyperslab
175 dataset ds = g.open_dataset(name);
176 dataspace file_dspace = H5Dget_space(ds);
177 herr_t err = H5Sselect_hyperslab(file_dspace, H5S_SELECT_SET, sl.offset.data(), sl.stride.data(), sl.count.data(),
178 (sl.block.empty() ? nullptr : sl.block.data()));
179 if (err < 0) throw std::runtime_error("Error in h5::array_interface::write_slice: Selecting the hyperslab failed");
180
181 // memory dataspace
182 dataspace mem_dspace = make_mem_dspace(v);
183
184 // write to the selected hyperslab of the file dataset
185 if (H5Sget_simple_extent_npoints(file_dspace) > 0) {
186 err = H5Dwrite(ds, v.ty, mem_dspace, file_dspace, H5P_DEFAULT, v.start);
187 if (err < 0)
188 throw std::runtime_error("Error in h5::array_interface::write_slice: Writing the dataset " + name + " in the group " + g.name() + " failed");
189 }
190 }
191
192 void write_attribute(object obj, std::string const &name, array_view v) {
193 // check if the attribute already exists
194 if (H5LTfind_attribute(obj, name.c_str()) != 0)
195 throw std::runtime_error("Error in h5::array_interface::write_attribute: Attribute " + name + " already exists");
196
197 // memory dataspace
198 dataspace mem_dspace = make_mem_dspace(v);
199
200 // create attribute to write to
201 attribute attr = H5Acreate2(obj, name.c_str(), v.ty, mem_dspace, H5P_DEFAULT, H5P_DEFAULT);
202 if (!attr.is_valid()) throw std::runtime_error("Error in h5::array_interface::write_attribute: Creating the attribute " + name + " failed");
203
204 // write to the attribute
205 herr_t err = H5Awrite(attr, v.ty, v.start);
206 if (err < 0) throw std::runtime_error("Error in h5::array_interface::write_attribute: Writing to the attribute " + name + " failed");
207 }
208
209 void read(group g, std::string const &name, array_view v, hyperslab sl) {
210 // open dataset and get dataspace
211 dataset ds = g.open_dataset(name);
212 dataspace file_dspace = H5Dget_space(ds);
213
214 // if provided, select the hyperslab of the file dataspace
215 if (not sl.empty()) {
216 herr_t err = H5Sselect_hyperslab(file_dspace, H5S_SELECT_SET, sl.offset.data(), sl.stride.data(), sl.count.data(),
217 (sl.block.empty() ? nullptr : sl.block.data()));
218 if (err < 0) throw std::runtime_error("Error in h5::array_interface::read: selecting the hyperslab failed");
219 }
220
221 // check consistency of input
222 auto ds_info = get_dataset_info(g, name);
223 if (H5Tget_class(v.ty) != H5Tget_class(ds_info.ty))
224 throw std::runtime_error("Error in h5::array_interface::read: Incompatible HDF5 types: " + get_name_of_h5_type(v.ty)
225 + " != " + get_name_of_h5_type(ds_info.ty));
226
227 if (not hdf5_type_equal(v.ty, ds_info.ty))
228 std::cerr << "WARNING: HDF5 type mismatch while reading into an array_view: " + get_name_of_h5_type(v.ty)
229 + " != " + get_name_of_h5_type(ds_info.ty) + "\n";
230
231 auto sl_size = sl.size();
232 if (sl.empty()) { sl_size = std::accumulate(ds_info.lengths.begin(), ds_info.lengths.end(), (hsize_t)1, std::multiplies<>()); }
233 if (sl_size != v.slab.size()) throw std::runtime_error("Error in h5::array_interface::read: Incompatible sizes");
234
235 // memory dataspace
236 dataspace mem_dspace = make_mem_dspace(v);
237
238 // read the selected hyperslab from the file dataset
239 if (H5Sget_simple_extent_npoints(file_dspace) > 0) {
240 herr_t err = H5Dread(ds, v.ty, mem_dspace, file_dspace, H5P_DEFAULT, v.start);
241 if (err < 0)
242 throw std::runtime_error("Error in h5::array_interface::read: Reading the dataset " + name + " in the group " + g.name() + " failed");
243 }
244 }
245
246 void read_attribute(object obj, std::string const &name, array_view v) {
247 // open attribute
248 attribute attr = H5Aopen(obj, name.c_str(), H5P_DEFAULT);
249 if (!attr.is_valid()) throw std::runtime_error("Error in h5::array_interface::read_attribute: Opening the attribute " + name + " failed");
250
251 // get dataspace information
252 dataspace space = H5Aget_space(attr);
253 int rank = H5Sget_simple_extent_ndims(space);
254 if (rank != 0) throw std::runtime_error("Error in h5::array_interface::read_attribute: Attribute " + name + " has a rank != 0");
255
256 // get datatype information
257 auto eq = H5Tequal(H5Aget_type(attr), v.ty);
258 if (eq < 0) throw std::runtime_error("Error in h5::array_interface::read_attribute: H5Tequal call failed");
259 if (eq == 0) throw std::runtime_error("Error in h5::array_interface::read_attribute: Incompatible HDF5 types");
260
261 // read the attribute
262 auto err = H5Aread(attr, v.ty, v.start);
263 if (err < 0) throw std::runtime_error("Error in h5::array_interface::read_attribute: Reading the attribute " + name + " failed");
264 }
265
266} // namespace h5::array_interface
Provides a generic interface to read/write n-dimensional arrays from/to HDF5.
A handle to an HDF5 group.
Definition group.hpp:44
dataset open_dataset(std::string const &key) const
Open a dataset with the given key in the group.
Definition group.cpp:140
std::string name() const
Get the name of the group.
Definition group.cpp:39
void unlink(std::string const &key, bool error_if_absent=false) const
Remove a link with the given key from the group.
Definition group.cpp:83
A generic handle for HDF5 objects.
Definition object.hpp:49
bool is_valid() const
Ensure that the wrapped HDF5 ID is valid (by calling H5Iis_valid).
Definition object.cpp:116
object dataspace
Type alias for an HDF5 dataspace.
Definition object.hpp:126
bool hdf5_type_equal(datatype dt1, datatype dt2)
Check if two HDF5 datatypes are equal.
Definition object.cpp:198
std::string get_name_of_h5_type(datatype dt)
Get the name of an h5::datatype (for error messages).
Definition object.cpp:183
void read_attribute(object obj, std::string const &name, array_view v)
Read from an HDF5 attribute into an array view.
void write_slice(group g, std::string const &name, array_view const &v, hyperslab sl)
Write an array view to a selected hyperslab of an existing HDF5 dataset.
void read(group g, std::string const &name, array_view v, hyperslab sl)
Read a given hyperslab from an HDF5 dataset into an array view.
void write_attribute(object obj, std::string const &name, array_view v)
Write an array view to an HDF5 attribute.
dataset_info get_dataset_info(dataset ds)
Retrieve the shape and the h5::datatype from a dataset.
void write(group g, std::string const &name, array_view const &v, bool compress)
Write an array view to an HDF5 dataset.
std::pair< v_t, v_t > get_parent_shape_and_h5_strides(long const *np_strides, int rank, long view_size)
Given a view on an n-dimensional array (dataspace) by specifying its numpy/nda-style strides and its ...
void h5_write_attribute(object obj, std::string const &name, T const &x) H5_REQUIRES(std
Write a scalar to an HDF5 attribute.
Definition scalar.hpp:125
std::vector< hsize_t > v_t
Vector of h5::hsize_t used throughout the h5 library.
Definition utils.hpp:59
unsigned long long hsize_t
Size type used in HDF5.
Definition utils.hpp:55
Macros used in the h5 library.
Provides functions to read/write std::string, char* and h5::char_buf objects from/to HDF5.
Struct representing a view on an n-dimensional array/dataspace.
void * start
Pointer to the data of the array.
datatype ty
h5::datatype stored in the array.
bool is_complex
Whether the data is complex valued.
int rank() const
Get the rank of the view (including the possible added imaginary dimension).
hyperslab slab
h5::array_interface::hyperslab specifying the selection of the view.
Simple struct to store basic information about an HDF5 dataset.
Struct representing an HDF5 hyperslab.
auto size() const
Get the total number of elements in the hyperslab.
v_t block
Shape of a single block selected from the dataspace.
bool empty() const
Check whether the hyperslab is empty (has been initialized).
v_t stride
Stride in each dimension (in the HDF5 sense).
v_t offset
Index offset for each dimension.
v_t count
Number of elements or blocks to select along each dimension.
v_t shape() const
Get the shape of the selected hyperslab.
int rank() const
Get the rank of the hyperslab (including the possible added imaginary dimension).