TRIQS/h5 2.0.0
C++ interface to HDF5
Loading...
Searching...
No Matches
h5py_io.cpp
Go to the documentation of this file.
1
5
6// Don't import array API here - it's imported in the main module (module.wrap.cxx)
7#define NO_IMPORT_ARRAY
8#define PY_ARRAY_UNIQUE_SYMBOL _cpp2py_ARRAY_API
9#include <Python.h>
10#include <numpy/arrayobject.h>
11#include <c2py/c2py.hpp>
12
13#include "./h5py_io.hpp"
14#include <h5/generic.hpp>
15#include <h5/object.hpp>
16#include <h5/scalar.hpp>
17#include <h5/stl/string.hpp>
18#include <h5/stl/vector.hpp>
19
20#include <hdf5.h>
21#include <hdf5_hl.h>
22
23#include <algorithm>
24#include <cstddef>
25#include <complex>
26#include <stdexcept>
27#include <string>
28#include <vector>
29
30namespace h5 {
31
32 // anonymous namespace for internal functions/types
33 namespace {
34
35 // Mapping between an HDF5 type and its numpy type.
36 struct h5_py_type_t {
37 datatype hdf5_type;
38 int numpy_type;
39 size_t size;
40 };
41
42 // Table of mappings between basic HDF5 types and numpy types.
43 const auto h5_py_type_table = std::vector<h5_py_type_t>{
44 {hdf5_type<char>(), NPY_STRING, sizeof(char)},
45 {hdf5_type<signed char>(), NPY_BYTE, sizeof(signed char)},
46 {hdf5_type<unsigned char>(), NPY_UBYTE, sizeof(unsigned char)},
47 {hdf5_type<bool>(), NPY_BOOL, sizeof(bool)},
48 {hdf5_type<short>(), NPY_SHORT, sizeof(short)},
49 {hdf5_type<unsigned short>(), NPY_USHORT, sizeof(unsigned short)},
50 {hdf5_type<int>(), NPY_INT, sizeof(int)},
51 {hdf5_type<unsigned int>(), NPY_UINT, sizeof(unsigned int)},
52 {hdf5_type<long>(), NPY_LONG, sizeof(long)},
53 {hdf5_type<unsigned long>(), NPY_ULONG, sizeof(unsigned long)},
54 {hdf5_type<long long>(), NPY_LONGLONG, sizeof(long long)},
55 {hdf5_type<unsigned long long>(), NPY_ULONGLONG, sizeof(unsigned long long)},
56 {hdf5_type<float>(), NPY_FLOAT, sizeof(float)},
57 {hdf5_type<double>(), NPY_DOUBLE, sizeof(double)},
58 {hdf5_type<long double>(), NPY_LONGDOUBLE, sizeof(long double)},
59 {hdf5_type<std::complex<float>>(), NPY_CFLOAT, sizeof(std::complex<float>)},
60 {hdf5_type<std::complex<double>>(), NPY_CDOUBLE, sizeof(std::complex<double>)},
61 {hdf5_type<std::complex<long double>>(), NPY_CLONGDOUBLE, sizeof(std::complex<long double>)} //
62 };
63
64 // Given an HDF5 datatype, return the size of the corresponding C data type in bytes.
65 long h5_c_size(datatype t) {
66 auto pos = std::ranges::find_if(h5_py_type_table, [t](auto const &x) { return hdf5_type_equal(x.hdf5_type, t); });
67 if (pos == h5_py_type_table.end()) throw std::runtime_error("HDF5/Python Error: HDF5 type not supported");
68 return static_cast<long>(pos->size);
69 }
70
71 // Given an HDF5 datatype, return the corresponding numpy type.
72 int h5_to_npy(datatype t, bool is_complex) {
73 auto pos = std::ranges::find_if(h5_py_type_table, [t](auto const &x) { return hdf5_type_equal(x.hdf5_type, t); });
74 if (pos == h5_py_type_table.end()) throw std::runtime_error("HDF5/Python Error: HDF5 type not supported");
75 int res = pos->numpy_type;
76 if (is_complex) {
77 if (res == NPY_DOUBLE) res = NPY_CDOUBLE;
78 if (res == NPY_FLOAT) res = NPY_CFLOAT;
79 if (res == NPY_LONGDOUBLE) res = NPY_CLONGDOUBLE;
80 }
81 return res;
82 }
83
84 // Given a numpy type, return the corresponding HDF5 type.
85 datatype npy_to_h5(int t) {
86 auto pos = std::ranges::find_if(h5_py_type_table, [t](auto const &x) { return x.numpy_type == t; });
87 if (pos == h5_py_type_table.end()) throw std::runtime_error("HDF5/Python Error: Numpy type not supported");
88 return pos->hdf5_type;
89 }
90
91 // Make an h5::array_interface::array_view from a given numpy array object.
92 array_interface::array_view make_av_from_npy(PyArrayObject *arr_obj) {
93 // get element type and rank of numpy array
94 int numpy_type = PyArray_DESCR(arr_obj)->type_num;
95 int rank = PyArray_NDIM(arr_obj);
96
97 // get corresponding HDF5 type
98 datatype dt = npy_to_h5(numpy_type);
99 const bool is_complex = (numpy_type == NPY_CDOUBLE) or (numpy_type == NPY_CLONGDOUBLE) or (numpy_type == NPY_CFLOAT);
100
101 // initialize array view and get the shape of the array and the numpy strides
102 array_interface::array_view res{dt, PyArray_DATA(arr_obj), rank, is_complex};
103 std::vector<long> c_strides(rank + is_complex, 0), c_shape(rank + is_complex, 2);
104 for (int i = 0; i < rank; ++i) {
105 c_shape[i] = PyArray_DIMS(arr_obj)[i];
106 res.slab.count[i] = static_cast<size_t>(c_shape[i]);
107 c_strides[i] = PyArray_STRIDES(arr_obj)[i] / static_cast<long>(h5_c_size(dt));
108 }
109
110 // get the parent shape and HDF5 strides from the numpy strides
111 auto [Ltot, stri] = h5::array_interface::get_parent_shape_and_h5_strides(c_strides.data(), rank + is_complex, c_shape.data());
112 for (int i = 0; i < rank; ++i) {
113 res.parent_shape[i] = Ltot[i];
114 res.slab.stride[i] = stri[i];
115 }
116
117 return res;
118 }
119
120 // Read any integer type from HDF5 and return a Python long.
121 PyObject *h5_read_any_int(group g, std::string const &name, datatype ty) {
122 if (H5Tequal(ty, H5T_NATIVE_SHORT)) {
123 return PyLong_FromLong(h5_read<short>(g, name));
124 } else if (H5Tequal(ty, H5T_NATIVE_INT)) {
125 return PyLong_FromLong(h5_read<int>(g, name));
126 } else if (H5Tequal(ty, H5T_NATIVE_LONG)) {
127 return PyLong_FromLong(h5_read<long>(g, name));
128 } else if (H5Tequal(ty, H5T_NATIVE_LLONG)) {
129 return PyLong_FromLongLong(h5_read<long long>(g, name));
130 } else if (H5Tequal(ty, H5T_NATIVE_USHORT)) {
131 return PyLong_FromUnsignedLong(h5_read<unsigned short>(g, name));
132 } else if (H5Tequal(ty, H5T_NATIVE_UINT)) {
133 return PyLong_FromUnsignedLong(h5_read<unsigned int>(g, name));
134 } else if (H5Tequal(ty, H5T_NATIVE_ULONG)) {
135 return PyLong_FromUnsignedLong(h5_read<unsigned long>(g, name));
136 } else if (H5Tequal(ty, H5T_NATIVE_ULLONG)) {
137 return PyLong_FromUnsignedLongLong(h5_read<unsigned long long>(g, name));
138 } else {
139 PyErr_SetString(PyExc_RuntimeError, "h5::h5_read_bare: Integer type can not be read from HDF5");
140 return nullptr;
141 }
142 }
143
144 } // namespace
145
146 void h5_write_bare(group g, std::string const &name, PyObject *ob) {
147 if (PyArray_Check(ob)) {
148 auto *arr_obj = (PyArrayObject *)ob; // NOLINT
149 array_interface::write(g, name, make_av_from_npy(arr_obj), true);
150 } else if (PyArray_CheckScalar(ob)) {
151 // treat numpy scalars as 0-dimensional ndarrays
152 c2py::pyref obsc = PyArray_FromScalar(ob, nullptr);
153 h5_write_bare(g, name, obsc);
154 } else if (PyFloat_Check(ob)) {
155 h5_write(g, name, PyFloat_AsDouble(ob));
156 } else if (PyBool_Check(ob)) {
157 h5_write(g, name, static_cast<bool>(PyLong_AsLong(ob)));
158 } else if (PyLong_Check(ob)) {
159 h5_write(g, name, static_cast<long>(PyLong_AsLong(ob)));
160 } else if (PyUnicode_Check(ob)) {
161 h5_write(g, name, static_cast<const char *>(PyUnicode_AsUTF8(ob)));
162 } else if (PyComplex_Check(ob)) {
163 h5_write(g, name, std::complex<double>{PyComplex_RealAsDouble(ob), PyComplex_ImagAsDouble(ob)});
164 } else {
165 PyErr_SetString(PyExc_RuntimeError, "h5::h5_write_bare: Python object can not be written to HDF5");
166 return;
167 }
168 }
169
170 c2py::pyref h5_read_bare(group g, std::string const &name) {
171 auto ds_info = array_interface::get_dataset_info(g, name);
172
173 // rank 0 - scalar case
174 if (ds_info.rank() == 0) {
175 // float
176 if (H5Tget_class(ds_info.ty) == H5T_FLOAT) {
177 double x{};
178 h5_read(g, name, x);
179 return PyFloat_FromDouble(x);
180 }
181
182 // integer
183 if (H5Tget_class(ds_info.ty) == H5T_INTEGER) { return h5_read_any_int(g, name, ds_info.ty); }
184
185 // bool
186 if (H5Tequal(ds_info.ty, h5::hdf5_type<bool>())) {
187 bool x{};
188 h5_read(g, name, x);
189 return PyBool_FromLong(long(x));
190 }
191
192 // string
193 if (H5Tget_class(ds_info.ty) == H5T_STRING) {
194 std::string x;
195 h5_read(g, name, x);
196 return PyUnicode_FromString(x.c_str());
197 }
198
199 // complex
200 if (H5Tequal(ds_info.ty, hdf5_type<dcplx_t>())) {
201 dcplx_t x{};
202 h5_read(g, name, x);
203 return PyComplex_FromDoubles(x.r, x.i);
204 }
205
206 // otherwise throw and error and return nullptr
207 PyErr_SetString(PyExc_RuntimeError, "h5::h5_read_bare: Scalar type can not be read from HDF5");
208 return nullptr;
209 }
210
211 // rank 1 - complex scalar case
212 if ((ds_info.rank() == 1) and ds_info.has_complex_attribute) {
213 std::complex<double> z{};
214 h5_read(g, name, z);
215 return PyComplex_FromDoubles(z.real(), z.imag());
216 }
217
218 // rank 1 or 2 - string array case
219 if (H5Tget_class(ds_info.ty) == H5T_STRING) {
220 if (ds_info.rank() == 1) {
221 auto x = h5_read<std::vector<std::string>>(g, name);
222 return c2py::cxx2py(x);
223 }
224
225 if (ds_info.rank() == 2) {
227 return c2py::cxx2py(x);
228 }
229
230 PyErr_SetString(PyExc_RuntimeError, "h5::h5_read_bare: String dataset with rank > 2 is not allowed");
231 return nullptr;
232 }
233
234 // rank > 0 - general array case
235 auto shape = std::vector<npy_intp>(ds_info.lengths.begin(), ds_info.lengths.end());
236 auto numpy_type = h5_to_npy(ds_info.ty, ds_info.has_complex_attribute);
237
238 // get rid of complex h5 dimension if necessary
239 if (ds_info.has_complex_attribute) shape.pop_back();
240
241 // create numpy array and read into it
242 PyObject *ob = PyArray_SimpleNewFromDescr(int(shape.size()), &shape[0], PyArray_DescrFromType(numpy_type));
243 if (PyErr_Occurred()) return nullptr;
244 array_interface::read(g, name, make_av_from_npy((PyArrayObject *)ob)); // NOLINT
245 return ob;
246 }
247
248} // namespace h5
A handle to an HDF5 group.
Definition group.hpp:44
Provides a generic interface for reading/writing data from/to various HDF5 objects.
object datatype
Type alias for an HDF5 datatype.
Definition object.hpp:123
bool hdf5_type_equal(datatype dt1, datatype dt2)
Check if two HDF5 datatypes are equal.
Definition object.cpp:198
datatype hdf5_type()
Map a given C++ type to an HDF5 datatype.
Definition object.hpp:156
c2py::pyref h5_read_bare(group g, std::string const &name)
Read a dataset from an HDF5 group and return it as a Python object.
Definition h5py_io.cpp:170
void h5_write_bare(group g, std::string const &name, PyObject *ob)
Write a Python object to an HDF5 group.
Definition h5py_io.cpp:146
void read(group g, std::string const &name, array_view v, hyperslab sl)
Read a given hyperslab from an HDF5 dataset into an array view.
dataset_info get_dataset_info(dataset ds)
Retrieve the shape and the h5::datatype from a dataset.
void write(group g, std::string const &name, array_view const &v, bool compress)
Write an array view to an HDF5 dataset.
std::pair< v_t, v_t > get_parent_shape_and_h5_strides(long const *np_strides, int rank, long const *view_shape)
Given a view on an n-dimensional array (dataspace) by specifying its numpy/nda-style strides and its ...
T h5_read(group g, std::string const &key)
Generic implementation for reading from an HDF5 dataset/subgroup.
Definition generic.hpp:53
void h5_write(group g, std::string const &name, T const &x)
Write a scalar to an HDF5 dataset.
Definition scalar.hpp:71
Provides helper functions to read/write Python objects to/from HDF5 files.
Provides a generic handle for HDF5 objects.
Provides a generic interface to read/write scalars from/to HDF5.
Provides functions to read/write std::string, char* and h5::char_buf objects from/to HDF5.
Struct representing a view on an n-dimensional array/dataspace.
A complex compound type consisting of two doubles to represent a complex number.
Definition complex.hpp:39
double r
Real part.
Definition complex.hpp:41
double i
Imaginary part.
Definition complex.hpp:44
Provides functions to read/write std::vector objects from/to HDF5.