nda/unstable/fill_8hpp_source.html

// Copyright (c) 2022-2023 Simons Foundation

//

// Licensed under the Apache License, Version 2.0 (the "License");

// you may not use this file except in compliance with the License.

// You may obtain a copy of the License at

//

//     http://www.apache.org/licenses/LICENSE-2.0.txt

//

// Unless required by applicable law or agreed to in writing, software

// distributed under the License is distributed on an "AS IS" BASIS,

// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

// See the License for the specific language governing permissions and

// limitations under the License.

//

// Authors: Miguel Morales, Nils Wentzell


#pragma once


#include <cstdlib>

#include <algorithm>

#include <vector>

#include <span>

#include <ranges>


#include "address_space.hpp"

#include "../traits.hpp"


namespace nda::mem {


  template <AddressSpace AdrSp, typename T>

    requires(nda::is_scalar_or_convertible_v<T>)

  T *fill_n(T *first, size_t count, const T &value) {

    check_adr_sp_valid<AdrSp>();

    static_assert(nda::have_device == nda::have_cuda, "Adjust function for new device types");


    if constexpr (AdrSp == Host) {

      return std::fill_n(first, count, value);

    } else { // Device or Unified

      auto value_bytes = std::as_bytes(std::span(&value, 1));

      bool is_zero     = std::ranges::all_of(value_bytes, [](auto b) { return b == std::byte{0}; });

      if (is_zero) {

        device_error_check(cudaMemset(first, 0, count * sizeof(T)), "cudaMemset");

      } else {

        for (int n = 0; n < sizeof(T); ++n) {

          const int byte_value [[maybe_unused]] = static_cast<int>(value_bytes[n]);

          device_error_check(cudaMemset2D((char *)(first) + n, sizeof(T), byte_value, 1, count), "cudaMemset2D");

        }

      }

      return first + count;

    }

  }


  template <AddressSpace AdrSp, typename T>

    requires(nda::is_scalar_or_convertible_v<T>)

  T *fill(T *first, T *end, const T &value) {

    if (std::distance(first, end) > 0) return fill_n<AdrSp>(first, std::distance(first, end), value);

    return first;

  }


  template <AddressSpace AdrSp, typename T>

    requires(nda::is_scalar_or_convertible_v<T>)

  void fill2D_n(T *first [[maybe_unused]], size_t pitch [[maybe_unused]], size_t width, size_t height, const T &value) {

    check_adr_sp_valid<AdrSp>();

    static_assert(nda::have_device == nda::have_cuda, "Adjust function for new device types");

    static_assert(AdrSp == mem::Device or AdrSp == mem::Unified, "Not implemented for host memory");


    bool is_zero = std::ranges::all_of(std::as_bytes(std::span(&value, 1)), [](auto b) { return b == std::byte{0}; });

    if (is_zero) {

      device_error_check(cudaMemset2D(first, pitch * sizeof(T), 0, width * sizeof(T), height), "cudaMemset2D");

    } else {

      std::vector<T> v(width * height, value);

      device_error_check(cudaMemcpy2D(first, pitch * sizeof(T), v.data(), width * sizeof(T), width * sizeof(T), height, cudaMemcpyDefault),

                         "cudaMemcpy2D");

    }

  }


} // namespace nda::mem

address_space.hpp
Provides definitions and type traits involving the different memory address spaces supported by nda.

nda::mem::check_adr_sp_valid
static const auto check_adr_sp_valid
Check validity of a set of nda::mem::AddressSpace values.
Definition address_space.hpp:140

nda::have_cuda
static constexpr bool have_cuda
Constexpr variable that is true if the project is configured with CUDA support.
Definition device.hpp:135

device_error_check
#define device_error_check(ARG1, ARG2)
Trigger a compilation error every time the nda::device_error_check function is called.
Definition device.hpp:129

nda::have_device
static constexpr bool have_device
Constexpr variable that is true if the project is configured with GPU support.
Definition device.hpp:132

nda::is_scalar_or_convertible_v
constexpr bool is_scalar_or_convertible_v
Constexpr variable that is true if type S is a scalar type (see nda::is_scalar_v) or if a std::comple...
Definition traits.hpp:76

traits.hpp
Provides type traits for the nda library.