TRIQS/nda 1.3.0
Multi-dimensional array library for C++
Loading...
Searching...
No Matches
gemm.hpp
Go to the documentation of this file.
1// Copyright (c) 2019-2023 Simons Foundation
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0.txt
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14//
15// Authors: Miguel Morales, Olivier Parcollet, Nils Wentzell
16
17/**
18 * @file
19 * @brief Provides a generic interface to the BLAS `gemm` routine.
20 */
21
22#pragma once
23
24#include "./interface/cxx_interface.hpp"
25#include "./tools.hpp"
26#include "../concepts.hpp"
27#include "../layout_transforms.hpp"
28#include "../macros.hpp"
29#include "../mem/address_space.hpp"
30#include "../traits.hpp"
31
32#ifndef NDA_HAVE_DEVICE
33#include "../device.hpp"
34#endif
35
36#include <tuple>
37#include <utility>
38
39namespace nda::blas {
40
41 /**
42 * @addtogroup linalg_blas
43 * @{
44 */
45
46 /**
47 * @brief Generic nda::blas::gemm implementation for types not supported by BLAS/LAPACK.
48 *
49 * @tparam A Some matrix type.
50 * @tparam B Some matrix type.
51 * @tparam C Some matrix type.
52 * @param alpha Input scalar.
53 * @param a Input matrix of size m-by-k.
54 * @param b Input matrix of size k-by-n.
55 * @param beta Input scalar.
56 * @param c Input/Output matrix of size m-by-n.
57 */
58 template <Matrix A, Matrix B, MemoryMatrix C>
59 void gemm_generic(typename A::value_type alpha, A const &a, B const &b, typename A::value_type beta,
60 C &&c) { // NOLINT (temporary views are allowed here)
61 EXPECTS(a.extent(1) == b.extent(0));
62 EXPECTS(a.extent(0) == c.extent(0));
63 EXPECTS(b.extent(1) == c.extent(1));
64 for (int i = 0; i < a.extent(0); ++i) {
65 for (int j = 0; j < b.extent(1); ++j) {
66 c(i, j) = beta * c(i, j);
67 for (int k = 0; k < a.extent(1); ++k) c(i, j) += alpha * a(i, k) * b(k, j);
68 }
69 }
70 }
71
72 /**
73 * @brief Interface to the BLAS `gemm` routine.
74 *
75 * @details This function performs one of the matrix-matrix operations
76 * \f[
77 * \mathbf{C} \leftarrow \alpha \mathrm{op}(\mathbf{A}) \mathrm{op}(\mathbf{B}) + \beta \mathbf{C} \;,
78 * \f]
79 * where \f$ \mathrm{op}(\mathbf{X}) \f$ is one of
80 *
81 * - \f$ \mathrm{op}(\mathbf{X}) = \mathbf{X} \f$,
82 * - \f$ \mathrm{op}(\mathbf{X}) = \mathbf{X}^T \f$ or
83 * - \f$ \mathrm{op}(\mathbf{X}) = \mathbf{X}^H \f$.
84 *
85 * Here, \f$ \alpha \f$ and \f$ \beta \f$ are scalars, and \f$ \mathbf{A} \f$, \f$ \mathbf{B} \f$ are matrices with
86 * \f$ \mathrm{op}(\mathbf{A}) \f$ is an m-by-k matrix, \f$ \mathrm{op}(\mathbf{B}) \f$ is a k-by-n matrix and
87 * \f$ \mathrm{op}(\mathbf{C}) \f$ is an m-by-n matrix.
88 *
89 * @tparam A nda::Matrix type.
90 * @tparam B nda::Matrix type.
91 * @tparam C nda::MemoryMatrix type.
92 * @param alpha Input scalar.
93 * @param a Input matrix of size m-by-k.
94 * @param b Input matrix of size k-by-n.
95 * @param beta Input scalar.
96 * @param c Input/Output matrix of size m-by-n.
97 */
98 template <Matrix A, Matrix B, MemoryMatrix C>
99 requires((MemoryMatrix<A> or is_conj_array_expr<A>) and (MemoryMatrix<B> or is_conj_array_expr<B>)
100 and have_same_value_type_v<A, B, C> and is_blas_lapack_v<get_value_t<A>>)
101 void gemm(get_value_t<A> alpha, A const &a, B const &b, get_value_t<A> beta, C &&c) {
102 // get underlying matrix in case it is given as a lazy expression
103 auto to_mat = []<typename Z>(Z const &z) -> auto & {
104 if constexpr (is_conj_array_expr<Z>)
105 return std::get<0>(z.a);
106 else
107 return z;
108 };
109 auto &mat_a = to_mat(a);
110 auto &mat_b = to_mat(b);
111
112 // compile-time checks
113 using mat_a_type = decltype(mat_a);
114 using mat_b_type = decltype(mat_b);
115 static_assert(mem::have_compatible_addr_space<mat_a_type, mat_b_type, C>, "Error in nda::blas::gemm: Incompatible memory address spaces");
116
117 // runtime checks
118 EXPECTS(mat_a.extent(1) == mat_b.extent(0));
119 EXPECTS(mat_a.extent(0) == c.extent(0));
120 EXPECTS(mat_b.extent(1) == c.extent(1));
121 EXPECTS(mat_a.indexmap().min_stride() == 1);
122 EXPECTS(mat_b.indexmap().min_stride() == 1);
123 EXPECTS(c.indexmap().min_stride() == 1);
124
125 // c is in C order: compute the transpose of the product in Fortran order
126 if constexpr (has_C_layout<C>) {
127 gemm(alpha, transpose(b), transpose(a), beta, transpose(std::forward<C>(c)));
128 } else { // c is in Fortran order
129 static constexpr bool conj_A = is_conj_array_expr<A>;
130 static constexpr bool conj_B = is_conj_array_expr<B>;
131 char op_a = get_op<conj_A, /* transpose = */ has_C_layout<mat_a_type>>;
132 char op_b = get_op<conj_B, /* transpose = */ has_C_layout<mat_b_type>>;
133 auto [m, k] = mat_a.shape();
134 auto n = mat_b.extent(1);
135
136 if constexpr (mem::have_device_compatible_addr_space<mat_a_type, mat_b_type, C>) {
137#if defined(NDA_HAVE_DEVICE)
138 device::gemm(op_a, op_b, m, n, k, alpha, mat_a.data(), get_ld(mat_a), mat_b.data(), get_ld(mat_b), beta, c.data(), get_ld(c));
139#else
141#endif
142 } else {
143 f77::gemm(op_a, op_b, m, n, k, alpha, mat_a.data(), get_ld(mat_a), mat_b.data(), get_ld(mat_b), beta, c.data(), get_ld(c));
144 }
145 }
146 }
147
148 /** @} */
149
150} // namespace nda::blas
void gemm_generic(typename A::value_type alpha, A const &a, B const &b, typename A::value_type beta, C &&c)
Generic nda::blas::gemm implementation for types not supported by BLAS/LAPACK.
Definition gemm.hpp:59
void gemm(get_value_t< A > alpha, A const &a, B const &b, get_value_t< A > beta, C &&c)
Interface to the BLAS gemm routine.
Definition gemm.hpp:101
void compile_error_no_gpu()
Trigger a compilation error in case GPU specific functionality is used without configuring the projec...
Definition device.hpp:47
#define EXPECTS(X)
Definition macros.hpp:59