59 void gemm_generic(
typename A::value_type alpha, A
const &a, B
const &b,
typename A::value_type beta,
61 EXPECTS(a.extent(1) == b.extent(0));
62 EXPECTS(a.extent(0) == c.extent(0));
63 EXPECTS(b.extent(1) == c.extent(1));
64 for (
int i = 0; i < a.extent(0); ++i) {
65 for (
int j = 0; j < b.extent(1); ++j) {
66 c(i, j) = beta * c(i, j);
67 for (
int k = 0; k < a.extent(1); ++k) c(i, j) += alpha * a(i, k) * b(k, j);
100 and have_same_value_type_v<A, B, C>
and is_blas_lapack_v<get_value_t<A>>)
101 void gemm(get_value_t<A> alpha, A
const &a, B
const &b, get_value_t<A> beta, C &&c) {
103 auto to_mat = []<
typename Z>(Z
const &z) ->
auto & {
104 if constexpr (is_conj_array_expr<Z>)
105 return std::get<0>(z.a);
109 auto &mat_a = to_mat(a);
110 auto &mat_b = to_mat(b);
113 using mat_a_type =
decltype(mat_a);
114 using mat_b_type =
decltype(mat_b);
115 static_assert(mem::have_compatible_addr_space<mat_a_type, mat_b_type, C>,
"Error in nda::blas::gemm: Incompatible memory address spaces");
118 EXPECTS(mat_a.extent(1) == mat_b.extent(0));
119 EXPECTS(mat_a.extent(0) == c.extent(0));
120 EXPECTS(mat_b.extent(1) == c.extent(1));
121 EXPECTS(mat_a.indexmap().min_stride() == 1);
122 EXPECTS(mat_b.indexmap().min_stride() == 1);
123 EXPECTS(c.indexmap().min_stride() == 1);
126 if constexpr (has_C_layout<C>) {
127 gemm(alpha, transpose(b), transpose(a), beta, transpose(std::forward<C>(c)));
129 static constexpr bool conj_A = is_conj_array_expr<A>;
130 static constexpr bool conj_B = is_conj_array_expr<B>;
131 char op_a = get_op<conj_A, has_C_layout<mat_a_type>>;
132 char op_b = get_op<conj_B, has_C_layout<mat_b_type>>;
133 auto [m, k] = mat_a.shape();
134 auto n = mat_b.extent(1);
136 if constexpr (mem::have_device_compatible_addr_space<mat_a_type, mat_b_type, C>) {
137#if defined(NDA_HAVE_DEVICE)
138 device::gemm(op_a, op_b, m, n, k, alpha, mat_a.data(), get_ld(mat_a), mat_b.data(), get_ld(mat_b), beta, c.data(), get_ld(c));
143 f77::gemm(op_a, op_b, m, n, k, alpha, mat_a.data(), get_ld(mat_a), mat_b.data(), get_ld(mat_b), beta, c.data(), get_ld(c));
void gemm_generic(typename A::value_type alpha, A const &a, B const &b, typename A::value_type beta, C &&c)
Generic nda::blas::gemm implementation for types not supported by BLAS/LAPACK.
void gemm(get_value_t< A > alpha, A const &a, B const &b, get_value_t< A > beta, C &&c)
Interface to the BLAS gemm routine.