26#if defined(NDA_HAVE_CUDA)
30namespace nda::blas::f77 {
32 void axpy(
int N,
double alpha,
const double *x,
int incx,
double *Y,
int incy);
35 void copy(
int N,
const double *x,
int incx,
double *Y,
int incy);
38 double dot(
int M,
const double *x,
int incx,
const double *Y,
int incy);
42 void gemm(
char op_a,
char op_b,
int M,
int N,
int K,
double alpha,
const double *A,
int LDA,
const double *B,
int LDB,
double beta,
double *C,
44 void gemm(
char op_a,
char op_b,
int M,
int N,
int K,
dcomplex alpha,
const dcomplex *A,
int LDA,
const dcomplex *B,
int LDB,
dcomplex beta,
47 void gemm_batch(
char op_a,
char op_b,
int M,
int N,
int K,
double alpha,
const double **A,
int LDA,
const double **B,
int LDB,
double beta,
48 double **C,
int LDC,
int batch_count);
49 void gemm_batch(
char op_a,
char op_b,
int M,
int N,
int K,
dcomplex alpha,
const dcomplex **A,
int LDA,
const dcomplex **B,
int LDB,
dcomplex beta,
50 dcomplex **C,
int LDC,
int batch_count);
52 void gemm_vbatch(
char op_a,
char op_b,
int *M,
int *N,
int *K,
double alpha,
const double **A,
int *LDA,
const double **B,
int *LDB,
double beta,
53 double **C,
int *LDC,
int batch_count);
54 void gemm_vbatch(
char op_a,
char op_b,
int *M,
int *N,
int *K,
dcomplex alpha,
const dcomplex **A,
int *LDA,
const dcomplex **B,
int *LDB,
57 void gemm_batch_strided(
char op_a,
char op_b,
int M,
int N,
int K,
double alpha,
const double *A,
int LDA,
int strideA,
const double *B,
int LDB,
58 int strideB,
double beta,
double *C,
int LDC,
int strideC,
int batch_count);
59 void gemm_batch_strided(
char op_a,
char op_b,
int M,
int N,
int K,
dcomplex alpha,
const dcomplex *A,
int LDA,
int strideA,
const dcomplex *B,
60 int LDB,
int srideB,
dcomplex beta,
dcomplex *C,
int LDC,
int strideC,
int batch_count);
62 void gemv(
char op,
int M,
int N,
double alpha,
const double *A,
int LDA,
const double *x,
int incx,
double beta,
double *Y,
int incy);
63 void gemv(
char op,
int M,
int N,
dcomplex alpha,
const dcomplex *A,
int LDA,
const dcomplex *x,
int incx,
dcomplex beta,
dcomplex *Y,
int incy);
65 void ger(
int M,
int N,
double alpha,
const double *x,
int incx,
const double *Y,
int incy,
double *A,
int LDA);
68 void scal(
int M,
double alpha,
double *x,
int incx);
71 void swap(
int N,
double *x,
int incx,
double *Y,
int incy);
void swap(nda::basic_array_view< V1, R1, LP1, A1, AP1, OP1 > &a, nda::basic_array_view< V2, R2, LP2, A2, AP2, OP2 > &b)=delete
std::swap is deleted for nda::basic_array_view.
Provides a C++ interface for the GPU versions of various BLAS routines.
void gemm_batch_strided(get_value_t< A > alpha, A const &a, B const &b, get_value_t< A > beta, C &&c)
Implements a strided batched version of nda::blas::gemm taking 3-dimensional arrays as arguments.
std::complex< double > dcomplex
Alias for std::complex<double> type.
void gemm_vbatch(get_value_t< A > alpha, std::vector< A > const &va, std::vector< B > const &vb, get_value_t< A > beta, std::vector< C > &vc)
Wrapper of nda::blas::gemm_batch that allows variable sized matrices.
auto dotc(X const &x, Y const &y)
Interface to the BLAS dotc routine.
void gemv(get_value_t< A > alpha, A const &a, X const &x, get_value_t< A > beta, Y &&y)
Interface to the BLAS gemv routine.
auto dot(X const &x, Y const &y)
Interface to the BLAS dot routine.
void gemm(get_value_t< A > alpha, A const &a, B const &b, get_value_t< A > beta, C &&c)
Interface to the BLAS gemm routine.
void ger(get_value_t< X > alpha, X const &x, Y const &y, M &&m)
Interface to the BLAS ger routine.
void gemm_batch(get_value_t< A > alpha, std::vector< A > const &va, std::vector< B > const &vb, get_value_t< A > beta, std::vector< C > &vc)
Implements a batched version of nda::blas::gemm taking vectors of matrices as arguments.