TRIQS/nda 1.3.0
Multi-dimensional array library for C++
Loading...
Searching...
No Matches
fill.hpp
1// Copyright (c) 2022-2023 Simons Foundation
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0.txt
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14//
15// Authors: Miguel Morales, Nils Wentzell
16
17#pragma once
18
19#include <cstdlib>
20#include <algorithm>
21#include <vector>
22#include <span>
23#include <ranges>
24
25#include "address_space.hpp"
26#include "../traits.hpp"
27
28namespace nda::mem {
29
42 template <AddressSpace AdrSp, typename T>
44 T *fill_n(T *first, size_t count, const T &value) {
46 static_assert(nda::have_device == nda::have_cuda, "Adjust function for new device types");
47
48 if constexpr (AdrSp == Host) {
49 return std::fill_n(first, count, value);
50 } else { // Device or Unified
51 auto value_bytes = std::as_bytes(std::span(&value, 1));
52 bool is_zero = std::ranges::all_of(value_bytes, [](auto b) { return b == std::byte{0}; });
53 if (is_zero) {
54 device_error_check(cudaMemset(first, 0, count * sizeof(T)), "cudaMemset");
55 } else {
56 for (int n = 0; n < sizeof(T); ++n) {
57 const int byte_value [[maybe_unused]] = static_cast<int>(value_bytes[n]);
58 device_error_check(cudaMemset2D((char *)(first) + n, sizeof(T), byte_value, 1, count), "cudaMemset2D");
59 }
60 }
61 return first + count;
62 }
63 }
64
77 template <AddressSpace AdrSp, typename T>
79 T *fill(T *first, T *end, const T &value) {
80 if (std::distance(first, end) > 0) return fill_n<AdrSp>(first, std::distance(first, end), value);
81 return first;
82 }
83
97 template <AddressSpace AdrSp, typename T>
99 void fill2D_n(T *first [[maybe_unused]], size_t pitch [[maybe_unused]], size_t width, size_t height, const T &value) {
101 static_assert(nda::have_device == nda::have_cuda, "Adjust function for new device types");
102 static_assert(AdrSp == mem::Device or AdrSp == mem::Unified, "Not implemented for host memory");
103
104 bool is_zero = std::ranges::all_of(std::as_bytes(std::span(&value, 1)), [](auto b) { return b == std::byte{0}; });
105 if (is_zero) {
106 device_error_check(cudaMemset2D(first, pitch * sizeof(T), 0, width * sizeof(T), height), "cudaMemset2D");
107 } else {
108 std::vector<T> v(width * height, value);
109 device_error_check(cudaMemcpy2D(first, pitch * sizeof(T), v.data(), width * sizeof(T), width * sizeof(T), height, cudaMemcpyDefault),
110 "cudaMemcpy2D");
111 }
112 }
113
114} // namespace nda::mem
Provides definitions and type traits involving the different memory address spaces supported by nda.
static const auto check_adr_sp_valid
Check validity of a set of nda::mem::AddressSpace values.
static constexpr bool have_cuda
Constexpr variable that is true if the project is configured with CUDA support.
Definition device.hpp:135
#define device_error_check(ARG1, ARG2)
Trigger a compilation error every time the nda::device_error_check function is called.
Definition device.hpp:129
static constexpr bool have_device
Constexpr variable that is true if the project is configured with GPU support.
Definition device.hpp:132
constexpr bool is_scalar_or_convertible_v
Constexpr variable that is true if type S is a scalar type (see nda::is_scalar_v) or if a std::comple...
Definition traits.hpp:76
Provides type traits for the nda library.