TRIQS/h5 1.3.0
C++ interface to HDF5
Loading...
Searching...
No Matches
string.cpp
Go to the documentation of this file.
1// Copyright (c) 2019-2024 Simons Foundation
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0.txt
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14//
15// Authors: Thomas Hahn, Olivier Parcollet, Nils Wentzell
16
22#include "./string.hpp"
23#include "../macros.hpp"
24#include "../utils.hpp"
25
26#include <hdf5.h>
27#include <hdf5_hl.h>
28
29#include <algorithm>
30#include <array>
31#include <cstddef>
32#include <functional>
33#include <numeric>
34#include <stdexcept>
35#include <string>
36#include <utility>
37#include <vector>
38
39namespace h5 {
40
41 namespace {
42
43 // Returns an HDF5 datatype for a fixed-sized string with the given size or a variable-sized
44 // string if size == H5T_VARIABLE.
45 datatype str_dtype(size_t size = H5T_VARIABLE) {
46 datatype dt = H5Tcopy(H5T_C_S1);
47 auto err = H5Tset_size(dt, size);
48 H5Tset_cset(dt, H5T_CSET_UTF8);
49 if (err < 0) throw std::runtime_error("Error in str_dtype: H5Tset_size call failed");
50 return dt;
51 }
52
53 } // namespace
54
55 void h5_write(group g, std::string const &name, std::string const &s) {
56 // create the dataset for a variable-sized string
57 datatype dt = str_dtype();
58 dataspace space = H5Screate(H5S_SCALAR);
59 dataset ds = g.create_dataset(name, dt, space);
60
61 // write the string to dataset
62 auto *s_ptr = s.c_str();
63 auto err = H5Dwrite(ds, dt, H5S_ALL, H5S_ALL, H5P_DEFAULT, &s_ptr);
64 if (err < 0) throw std::runtime_error("Error in h5_write: Writing a string to the dataset " + name + " in the group " + g.name() + " failed");
65 }
66
67 void h5_read(group g, std::string const &name, std::string &s) {
68 // clear the string
69 s = "";
70
71 // open the dataset and get dataspace and datatype information
72 dataset ds = g.open_dataset(name);
73 dataspace dspace = H5Dget_space(ds);
74 int rank = H5Sget_simple_extent_ndims(dspace);
75 if (rank != 0) throw std::runtime_error("Error in h5_read: Reading a string from a dataspace with rank != 0 is not possible");
76
77 datatype dt = H5Dget_type(ds);
78 H5_ASSERT(H5Tget_class(dt) == H5T_STRING);
79
80 // variable-sized string
81 if (H5Tis_variable_str(dt)) {
82 // first read into a char* pointer, then copy into the string
83 std::array<char *, 1> rd_ptr{nullptr};
84 auto err = H5Dread(ds, dt, H5S_ALL, H5S_ALL, H5P_DEFAULT, rd_ptr.data());
85 if (err < 0) throw std::runtime_error("Error in h5_read: Reading a string from the dataset " + name + " in the group " + g.name() + " failed");
86 s.append(rd_ptr[0]);
87
88 // free the resources allocated in the variable-length read
89 err = H5Dvlen_reclaim(dt, dspace, H5P_DEFAULT, rd_ptr.data());
90 if (err < 0) throw std::runtime_error("Error in h5_read: Freeing resources after reading a variable-length string failed");
91 } else { // fixed-sized string
92 std::vector<char> buf(H5Tget_size(dt) + 1, 0x00);
93 auto err = H5Dread(ds, dt, H5S_ALL, H5S_ALL, H5P_DEFAULT, &buf[0]);
94 if (err < 0) throw std::runtime_error("Error in h5_read: Reading a string from the dataset " + name + " in the group " + g.name() + " failed");
95 s.append(&buf.front());
96 }
97 }
98
99 void h5_write_attribute(object obj, std::string const &name, std::string const &s) {
100 // create the variable-sized string datatype and the dataspace
101 datatype dt = str_dtype();
102 dataspace space = H5Screate(H5S_SCALAR);
103
104 // create the attribute
105 attribute attr = H5Acreate2(obj, name.c_str(), dt, space, H5P_DEFAULT, H5P_DEFAULT);
106 if (!attr.is_valid()) throw std::runtime_error("Error in h5_write_attribute: Creating the attribute " + name + " failed");
107
108 // write the string to attribute
109 auto *s_ptr = s.c_str();
110 herr_t err = H5Awrite(attr, dt, &s_ptr);
111 if (err < 0) throw std::runtime_error("Error in h5_write_attribute: Writing a string to the attribute " + name + " failed");
112 }
113
114 void h5_read_attribute(object obj, std::string const &name, std::string &s) {
115 // clear the string and return if the attribute is not present
116 s = "";
117 if (H5LTfind_attribute(obj, name.c_str()) == 0) return;
118
119 // open the attribute and get dataspace and datatype information
120 attribute attr = H5Aopen(obj, name.c_str(), H5P_DEFAULT);
121 dataspace dspace = H5Aget_space(attr);
122 int rank = H5Sget_simple_extent_ndims(dspace);
123 if (rank != 0) throw std::runtime_error("Error in h5_read_attribute: Reading a string from a dataspace with rank != 0 is not possible");
124
125 datatype dt = H5Aget_type(attr);
126 H5_ASSERT(H5Tget_class(dt) == H5T_STRING);
127
128 // variable-sized string
129 if (H5Tis_variable_str(dt)) {
130 // first read into a char* pointer, then copy into the string
131 std::array<char *, 1> rd_ptr{nullptr};
132 auto err = H5Aread(attr, dt, rd_ptr.data());
133 if (err < 0) throw std::runtime_error("Error in h5_read_attribute: Reading a string from the attribute " + name + " failed");
134 s.append(rd_ptr[0]);
135
136 // free the resources allocated in the variable-length read
137 err = H5Dvlen_reclaim(dt, dspace, H5P_DEFAULT, rd_ptr.data());
138 if (err < 0) throw std::runtime_error("Error in h5_read_attribute: Freeing resources after reading a variable-length string failed");
139 } else { // fixed-sized string
140 std::vector<char> buf(H5Tget_size(dt) + 1, 0x00);
141 auto err = H5Aread(attr, dt, (void *)(&buf[0]));
142 if (err < 0) throw std::runtime_error("Error in h5_read_attribute: Reading a string from the attribute " + name + " failed");
143 s.append(&buf.front());
144 }
145 }
146
147 void h5_write_attribute_to_key(group g, std::string const &key, std::string const &name, std::string const &s) {
148 // create the variable-sized string datatype and dataspace
149 datatype dt = str_dtype();
150 dataspace dspace = H5Screate(H5S_SCALAR);
151
152 // create the attribute for a given key
153 attribute attr = H5Acreate_by_name(g, key.c_str(), name.c_str(), dt, dspace, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
154 if (!attr.is_valid()) throw std::runtime_error("Error in h5_write_attribute_to_key: Creating the attribute " + name + " failed");
155
156 // write the string to the attribute
157 herr_t err = H5Awrite(attr, dt, (void *)(s.c_str()));
158 if (err < 0) throw std::runtime_error("Error in h5_write_attribute_to_key: Writing a string to the attribute " + name + " failed");
159 }
160
161 void h5_read_attribute_from_key(group g, std::string const &key, std::string const &name, std::string &s) {
162 // clear the string and return if the attribute is not present
163 s = "";
164 if (H5Aexists_by_name(g, key.c_str(), name.c_str(), H5P_DEFAULT) == 0) return;
165
166 // open the attribute and get dataspace and datatype information
167 attribute attr = H5Aopen_by_name(g, key.c_str(), name.c_str(), H5P_DEFAULT, H5P_DEFAULT);
168 dataspace dspace = H5Aget_space(attr);
169 int rank = H5Sget_simple_extent_ndims(dspace);
170 if (rank != 0) throw std::runtime_error("Error in h5_read_attribute_to_key: Reading a string from a dataspace with rank != 0 is not possible");
171
172 datatype dt = H5Aget_type(attr);
173 H5_ASSERT(H5Tget_class(dt) == H5T_STRING);
174
175 // variable-sized string
176 if (H5Tis_variable_str(dt)) {
177 // first read into a char* pointer, then copy into the string
178 std::array<char *, 1> rd_ptr{nullptr};
179 auto err = H5Aread(attr, dt, rd_ptr.data());
180 if (err < 0) throw std::runtime_error("Error in h5_read_attribute_to_key: Reading a string from the attribute " + name + " failed");
181 s.append(rd_ptr[0]);
182
183 // free the resources allocated in the variable-length read
184 err = H5Dvlen_reclaim(dt, dspace, H5P_DEFAULT, rd_ptr.data());
185 if (err < 0) throw std::runtime_error("Error in h5_read_attribute_to_key: Rreeing resources after reading a variable-length string failed");
186 } else { // fixed-sized string
187 std::vector<char> buf(H5Tget_size(dt) + 1, 0x00);
188 auto err = H5Aread(attr, dt, &buf[0]);
189 if (err < 0) throw std::runtime_error("Error in h5_read_attribute_to_key: Reading a string from the attribute " + name + " failed");
190 s.append(&buf.front());
191 }
192 }
193
194 // HDF5 datatype of a char_buf is a fixed-sized string
195 datatype char_buf::dtype() const { return str_dtype(lengths.back()); }
196
197 // dataspace is an n-dimensional array of fixed-sized strings, each of length max_length + 1
199 dataspace ds = H5Screate_simple(static_cast<int>(lengths.size()) - 1, lengths.data(), nullptr);
200 if (!ds.is_valid()) throw make_runtime_error("Error in h5::char_buf: Creating the dataspace for the char_buf failed");
201 return ds;
202 }
203
204 void h5_write(group g, std::string const &name, char_buf const &cb) {
205 // create the dataset for the char_buf
206 auto dt = cb.dtype();
207 auto dspace = cb.dspace();
208 dataset ds = g.create_dataset(name, dt, dspace);
209
210 // write to the dataset
211 auto err = H5Dwrite(ds, dt, dspace, H5S_ALL, H5P_DEFAULT, (void *)cb.buffer.data());
212 if (err < 0) throw make_runtime_error("Error in h5_write: Writing a char_buf to the dataset ", name, " in the group ", g.name(), " failed");
213 }
214
215 void h5_read(group g, std::string const &name, char_buf &cb) {
216 // open the dataset and get dataspace and datatype information
217 dataset ds = g.open_dataset(name);
218 dataspace dspace = H5Dget_space(ds);
219 datatype ty = H5Dget_type(ds);
220
221 // prepare the char_buf to be read into
222 char_buf cb_out;
223 // number of strings
224 int dim = H5Sget_simple_extent_ndims(dspace);
225 cb_out.lengths.resize(dim);
226 H5Sget_simple_extent_dims(dspace, cb_out.lengths.data(), nullptr);
227 // max. length of the strings + 1
228 size_t size = H5Tget_size(ty);
229 cb_out.lengths.push_back(size);
230 // resize the buffer
231 long ltot = std::accumulate(cb_out.lengths.begin(), cb_out.lengths.end(), 1, std::multiplies<>());
232 cb_out.buffer.resize(std::max(ltot, 1l), 0x00);
233
234 // read into the buffer
235 H5_ASSERT(hdf5_type_equal(ty, cb_out.dtype()));
236 auto err = H5Dread(ds, ty, cb_out.dspace(), H5S_ALL, H5P_DEFAULT, (void *)cb_out.buffer.data());
237 if (err < 0) throw make_runtime_error("Error in h5_read: Reading a char_buf from the dataset ", name, " in the group ", g.name(), " failed");
238
239 // move to output char_buf
240 cb = std::move(cb_out);
241 }
242
243 void h5_write_attribute(object obj, std::string const &name, char_buf const &cb) {
244 // datatype and dataspace of char_buf
245 auto dt = cb.dtype();
246 auto dspace = cb.dspace();
247
248 // create the attribute
249 attribute attr = H5Acreate2(obj, name.c_str(), dt, dspace, H5P_DEFAULT, H5P_DEFAULT);
250 if (!attr.is_valid()) throw make_runtime_error("Error in h5_write_attribute: Creating the attribute ", name, " failed");
251
252 // write the char_buf to the attribute
253 herr_t status = H5Awrite(attr, dt, (void *)cb.buffer.data());
254 if (status < 0) throw make_runtime_error("Error in h5_write_attribute: Writing a char_buf to the attribute ", name, " failed");
255 }
256
257 void h5_read_attribute(object obj, std::string const &name, char_buf &cb) {
258 // open the attribute and get dataspace and datatype information
259 attribute attr = H5Aopen(obj, name.c_str(), H5P_DEFAULT);
260 if (!attr.is_valid()) throw make_runtime_error("Error in h5_read_attribute: Opening the attribute ", name, " failed");
261
262 dataspace d_space = H5Aget_space(attr);
263 datatype ty = H5Aget_type(attr);
264
265 // prepare the char_buf to be read into
266 char_buf cb_out;
267 // number of strings
268 int dim = H5Sget_simple_extent_ndims(d_space);
269 cb_out.lengths.resize(dim);
270 H5Sget_simple_extent_dims(d_space, cb_out.lengths.data(), nullptr);
271 // max. length of the strings + 1
272 size_t size = H5Tget_size(ty);
273 cb_out.lengths.push_back(size);
274 // resize the buffer
275 long ltot = std::accumulate(cb_out.lengths.begin(), cb_out.lengths.end(), 1, std::multiplies<>());
276 cb_out.buffer.resize(std::max(ltot, 1l), 0x00);
277
278 // read into the buffer
279 H5_ASSERT(hdf5_type_equal(ty, cb_out.dtype()));
280 auto err = H5Aread(attr, ty, (void *)cb_out.buffer.data());
281 if (err < 0) throw make_runtime_error("Error in h5_read_attribute: Reading a char_buf from the attribute ", name, " failed");
282
283 // move to output char_buf
284 cb = std::move(cb_out);
285 }
286
287} // namespace h5
A handle to an HDF5 group.
Definition group.hpp:44
dataset create_dataset(std::string const &key, datatype ty, dataspace sp, hid_t pl) const
Create a dataset with the given key, datatype, dataspace and dataset creation property list in this g...
Definition group.cpp:150
dataset open_dataset(std::string const &key) const
Open a dataset with the given key in the group.
Definition group.cpp:140
std::string name() const
Get the name of the group.
Definition group.cpp:39
A generic handle for HDF5 objects.
Definition object.hpp:49
bool is_valid() const
Ensure that the wrapped HDF5 ID is valid (by calling H5Iis_valid).
Definition object.cpp:116
object datatype
Type alias for an HDF5 datatype.
Definition object.hpp:123
bool hdf5_type_equal(datatype dt1, datatype dt2)
Check if two HDF5 datatypes are equal.
Definition object.cpp:198
T h5_read(group g, std::string const &key)
Generic implementation for reading from an HDF5 dataset/subgroup.
Definition generic.hpp:51
T h5_read_attribute_from_key(group g, std::string const &key, std::string const &name)
Generic implementation for reading an HDF5 attribute.
Definition generic.hpp:184
T h5_read_attribute(object obj, std::string const &name)
Generic implementation for reading an HDF5 attribute.
Definition generic.hpp:120
void h5_write(group g, std::string const &name, T const &x) H5_REQUIRES(std
Write a scalar to an HDF5 dataset.
Definition scalar.hpp:70
void h5_write_attribute(object obj, std::string const &name, T const &x) H5_REQUIRES(std
Write a scalar to an HDF5 attribute.
Definition scalar.hpp:125
void h5_write_attribute_to_key(group g, std::string const &key, std::string const &name, std::string const &s)
Write a std::string to an HDF5 attribute.
Definition string.cpp:147
std::runtime_error make_runtime_error(Ts const &...ts)
Create a std::runtime_error with an error message constructed from the given arguments.
Definition utils.hpp:69
Macros used in the h5 library.
Provides functions to read/write std::string, char* and h5::char_buf objects from/to HDF5.
Stores an arbitrary number of strings in a 1-dimensional std::vector<char>.
Definition string.hpp:160
dataspace dspace() const
Get the HDF5 dataspace.
Definition string.cpp:198
datatype dtype() const
Get the HDF5 datatype.
Definition string.cpp:195
v_t lengths
Stores the number of strings in each dimension and the max. allowed length of the strings + 1.
Definition string.hpp:165
std::vector< char > buffer
Stores strings in a 1-dimensional vector.
Definition string.hpp:162
Provides some utility functions for h5.