TRIQS/nda 1.3.0
Multi-dimensional array library for C++
Loading...
Searching...
No Matches
fill.hpp
1// Copyright (c) 2022-2023 Simons Foundation
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0.txt
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14//
15// Authors: Miguel Morales, Nils Wentzell
16
17#pragma once
18
19#include "./address_space.hpp"
20#include "../device.hpp"
21#include "../traits.hpp"
22
23#include <algorithm>
24#include <cstddef>
25#include <cstdlib>
26#include <iterator>
27#include <ranges>
28#include <span>
29#include <vector>
30
31namespace nda::mem {
32
48 template <AddressSpace AdrSp, typename T>
50 T *fill_n(T *first, size_t count, const T &value) {
52 static_assert(nda::have_device == nda::have_cuda, "Adjust function for new device types");
53
54 if constexpr (AdrSp == Host) {
55 return std::fill_n(first, count, value);
56 } else { // Device or Unified
57 auto value_bytes = std::as_bytes(std::span(&value, 1));
58 bool is_zero = std::ranges::all_of(value_bytes, [](auto b) { return b == std::byte{0}; });
59 if (is_zero) {
60 device_error_check(cudaMemset(first, 0, count * sizeof(T)), "cudaMemset");
61 } else {
62 for (int n = 0; n < sizeof(T); ++n) {
63 device_error_check(cudaMemset2D((char *)(first) + n, sizeof(T), static_cast<int>(value_bytes[n]), 1, count), "cudaMemset2D");
64 }
65 }
66 return first + count;
67 }
68 }
69
82 template <AddressSpace AdrSp, typename T>
84 T *fill(T *first, T *end, const T &value) {
85 if (std::distance(first, end) > 0) return fill_n<AdrSp>(first, std::distance(first, end), value);
86 return first;
87 }
88
105 template <AddressSpace AdrSp, typename T>
107 void fill2D_n(T *first [[maybe_unused]], size_t pitch [[maybe_unused]], size_t width, size_t height, const T &value) {
109 static_assert(nda::have_device == nda::have_cuda, "Adjust function for new device types");
110 static_assert(AdrSp == mem::Device or AdrSp == mem::Unified, "Not implemented for host memory");
111
112 bool is_zero = std::ranges::all_of(std::as_bytes(std::span(&value, 1)), [](auto b) { return b == std::byte{0}; });
113 if (is_zero) {
114 device_error_check(cudaMemset2D(first, pitch * sizeof(T), 0, width * sizeof(T), height), "cudaMemset2D");
115 } else {
116 std::vector<T> v(width * height, value);
117 device_error_check(cudaMemcpy2D(first, pitch * sizeof(T), v.data(), width * sizeof(T), width * sizeof(T), height, cudaMemcpyDefault),
118 "cudaMemcpy2D");
119 }
120 }
121
122} // namespace nda::mem
Provides definitions and type traits involving the different memory address spaces supported by nda.
Provides GPU and non-GPU specific functionality.
static const auto check_adr_sp_valid
Check validity of a set of nda::mem::AddressSpace values.
static constexpr bool have_cuda
Constexpr variable that is true if the project is configured with CUDA support.
Definition device.hpp:135
#define device_error_check(ARG1, ARG2)
Trigger a compilation error every time the nda::device_error_check function is called.
Definition device.hpp:129
static constexpr bool have_device
Constexpr variable that is true if the project is configured with GPU support.
Definition device.hpp:132
constexpr bool is_scalar_or_convertible_v
Constexpr variable that is true if type S is a scalar type (see nda::is_scalar_v) or if a std::comple...
Definition traits.hpp:76
Provides type traits for the nda library.