nda/unstable/gemm_8hpp_source.html

// Copyright (c) 2019--present, The Simons Foundation

// This file is part of TRIQS/nda and is licensed under the Apache License, Version 2.0.

// SPDX-License-Identifier: Apache-2.0

// See LICENSE in the root of this distribution for details.


#pragma once


#include "./interface/cxx_interface.hpp"

#include "./tools.hpp"

#include "../concepts.hpp"

#include "../layout_transforms.hpp"

#include "../macros.hpp"

#include "../mem/address_space.hpp"

#include "../traits.hpp"


#ifndef NDA_HAVE_DEVICE

#include "../device.hpp"

#endif


#include <tuple>

#include <utility>


namespace nda::blas {


  template <Matrix A, Matrix B, MemoryMatrix C>


  void gemm_generic(typename A::value_type alpha, A const &a, B const &b, typename A::value_type beta,

                    C &&c) { // NOLINT (temporary views are allowed here)

    EXPECTS(a.extent(1) == b.extent(0));

    EXPECTS(a.extent(0) == c.extent(0));

    EXPECTS(b.extent(1) == c.extent(1));


    if (beta == 0.0) {

      c = 0 * alpha;

    } else {

      c *= beta;

    }


    for (int i = 0; i < a.extent(0); ++i) {

      for (int j = 0; j < b.extent(1); ++j) {

        for (int k = 0; k < a.extent(1); ++k) c(i, j) += alpha * a(i, k) * b(k, j);

      }

    }

  }


  template <Matrix A, Matrix B, MemoryMatrix C>

    requires((MemoryMatrix<A> or is_conj_array_expr<A>) and (MemoryMatrix<B> or is_conj_array_expr<B>)

             and have_same_value_type_v<A, B, C> and is_blas_lapack_v<get_value_t<A>>)


  void gemm(get_value_t<A> alpha, A const &a, B const &b, get_value_t<A> beta, C &&c) {

    // get underlying matrix in case it is given as a lazy expression

    auto to_mat = []<typename Z>(Z const &z) -> auto & {

      if constexpr (is_conj_array_expr<Z>)

        return std::get<0>(z.a);

      else

        return z;

    };

    auto &mat_a = to_mat(a);

    auto &mat_b = to_mat(b);


    // compile-time checks

    using mat_a_type = decltype(mat_a);

    using mat_b_type = decltype(mat_b);

    static_assert(mem::have_compatible_addr_space<mat_a_type, mat_b_type, C>, "Error in nda::blas::gemm: Incompatible memory address spaces");


    // runtime checks

    EXPECTS(mat_a.extent(1) == mat_b.extent(0));

    EXPECTS(mat_a.extent(0) == c.extent(0));

    EXPECTS(mat_b.extent(1) == c.extent(1));

    EXPECTS(mat_a.indexmap().min_stride() == 1);

    EXPECTS(mat_b.indexmap().min_stride() == 1);

    EXPECTS(c.indexmap().min_stride() == 1);


    // c is in C order: compute the transpose of the product in Fortran order

    if constexpr (has_C_layout<C>) {

      gemm(alpha, transpose(b), transpose(a), beta, transpose(std::forward<C>(c)));

    } else { // c is in Fortran order

      static constexpr bool conj_A = is_conj_array_expr<A>;

      static constexpr bool conj_B = is_conj_array_expr<B>;

      char op_a                    = get_op<conj_A, /* transpose = */ has_C_layout<mat_a_type>>;

      char op_b                    = get_op<conj_B, /* transpose = */ has_C_layout<mat_b_type>>;

      auto [m, k]                  = mat_a.shape();

      auto n                       = mat_b.extent(1);


      if constexpr (mem::have_device_compatible_addr_space<mat_a_type, mat_b_type, C>) {

#if defined(NDA_HAVE_DEVICE)

        device::gemm(op_a, op_b, m, n, k, alpha, mat_a.data(), get_ld(mat_a), mat_b.data(), get_ld(mat_b), beta, c.data(), get_ld(c));

#else

        compile_error_no_gpu();

#endif

      } else {

        f77::gemm(op_a, op_b, m, n, k, alpha, mat_a.data(), get_ld(mat_a), mat_b.data(), get_ld(mat_b), beta, c.data(), get_ld(c));

      }

    }

  }


} // namespace nda::blas

address_space.hpp
Provides definitions and type traits involving the different memory address spaces supported by nda.

cxx_interface.hpp
Provides a C++ interface for various BLAS routines.

nda::MemoryMatrix
Check if a given type is a memory matrix, i.e. an nda::MemoryArrayOfRank<2>.
Definition concepts.hpp:306

concepts.hpp
Provides concepts for the nda library.

device.hpp
Provides GPU and non-GPU specific functionality.

nda::transpose
auto transpose(A &&a)
Transpose the memory layout of an nda::MemoryArray or an nda::expr_call.
Definition layout_transforms.hpp:180

nda::have_same_value_type_v
constexpr bool have_same_value_type_v
Constexpr variable that is true if all types in As have the same value type as A0.
Definition traits.hpp:186

nda::get_value_t
std::decay_t< decltype(get_first_element(std::declval< A const  >()))> get_value_t
Get the value type of an array/view or a scalar type.
Definition traits.hpp:182

nda::blas::get_ld
int get_ld(A const &a)
Get the leading dimension in LAPACK jargon of an nda::MemoryMatrix.
Definition tools.hpp:98

nda::blas::has_C_layout
static constexpr bool has_C_layout
Constexpr variable that is true if the given nda::Array type has a C memory layout.
Definition tools.hpp:65

nda::blas::gemm_generic
void gemm_generic(typename A::value_type alpha, A const &a, B const &b, typename A::value_type beta, C &&c)
Generic nda::blas::gemm implementation for types not supported by BLAS/LAPACK.
Definition gemm.hpp:48

nda::blas::is_conj_array_expr
static constexpr bool is_conj_array_expr
Constexpr variable that is true if the given type is a conjugate lazy expression.
Definition tools.hpp:41

nda::blas::gemm
void gemm(get_value_t< A > alpha, A const &a, B const &b, get_value_t< A > beta, C &&c)
Interface to the BLAS gemm routine.
Definition gemm.hpp:96

nda::blas::get_op
const char get_op
Variable template that determines the BLAS matrix operation tag ('N','T','C') based on the given bool...
Definition tools.hpp:80

nda::mem::have_compatible_addr_space
static constexpr bool have_compatible_addr_space
Constexpr variable that is true if all given types have compatible address spaces.
Definition address_space.hpp:175

nda::mem::have_device_compatible_addr_space
static constexpr bool have_device_compatible_addr_space
Constexpr variable that is true if all given types have an address space compatible with Device.
Definition address_space.hpp:171

nda::compile_error_no_gpu
void compile_error_no_gpu()
Trigger a compilation error in case GPU specific functionality is used without configuring the projec...
Definition device.hpp:36

nda::is_blas_lapack_v
constexpr bool is_blas_lapack_v
Alias for nda::is_double_or_complex_v.
Definition traits.hpp:92

layout_transforms.hpp
Provides functions to transform the memory layout of an nda::basic_array or nda::basic_array_view.

macros.hpp
Macros used in the nda library.

tools.hpp
Provides various traits and utilities for the BLAS interface.

traits.hpp
Provides type traits for the nda library.