15#if defined(NDA_HAVE_CUDA) 
   19namespace nda::blas::f77 {
 
   21  void axpy(
int N, 
double alpha, 
const double *x, 
int incx, 
double *Y, 
int incy);
 
   24  void copy(
int N, 
const double *x, 
int incx, 
double *Y, 
int incy);
 
   27  double dot(
int M, 
const double *x, 
int incx, 
const double *Y, 
int incy);
 
   31  void gemm(
char op_a, 
char op_b, 
int M, 
int N, 
int K, 
double alpha, 
const double *A, 
int LDA, 
const double *B, 
int LDB, 
double beta, 
double *C,
 
   33  void gemm(
char op_a, 
char op_b, 
int M, 
int N, 
int K, 
dcomplex alpha, 
const dcomplex *A, 
int LDA, 
const dcomplex *B, 
int LDB, 
dcomplex beta,
 
   36  void gemm_batch(
char op_a, 
char op_b, 
int M, 
int N, 
int K, 
double alpha, 
const double **A, 
int LDA, 
const double **B, 
int LDB, 
double beta,
 
   37                  double **C, 
int LDC, 
int batch_count);
 
   38  void gemm_batch(
char op_a, 
char op_b, 
int M, 
int N, 
int K, 
dcomplex alpha, 
const dcomplex **A, 
int LDA, 
const dcomplex **B, 
int LDB, 
dcomplex beta,
 
   39                  dcomplex **C, 
int LDC, 
int batch_count);
 
   41  void gemm_vbatch(
char op_a, 
char op_b, 
int *M, 
int *N, 
int *K, 
double alpha, 
const double **A, 
int *LDA, 
const double **B, 
int *LDB, 
double beta,
 
   42                   double **C, 
int *LDC, 
int batch_count);
 
   43  void gemm_vbatch(
char op_a, 
char op_b, 
int *M, 
int *N, 
int *K, 
dcomplex alpha, 
const dcomplex **A, 
int *LDA, 
const dcomplex **B, 
int *LDB,
 
   46  void gemm_batch_strided(
char op_a, 
char op_b, 
int M, 
int N, 
int K, 
double alpha, 
const double *A, 
int LDA, 
int strideA, 
const double *B, 
int LDB,
 
   47                          int strideB, 
double beta, 
double *C, 
int LDC, 
int strideC, 
int batch_count);
 
   48  void gemm_batch_strided(
char op_a, 
char op_b, 
int M, 
int N, 
int K, 
dcomplex alpha, 
const dcomplex *A, 
int LDA, 
int strideA, 
const dcomplex *B,
 
   49                          int LDB, 
int srideB, 
dcomplex beta, 
dcomplex *C, 
int LDC, 
int strideC, 
int batch_count);
 
   51  void gemv(
char op, 
int M, 
int N, 
double alpha, 
const double *A, 
int LDA, 
const double *x, 
int incx, 
double beta, 
double *Y, 
int incy);
 
   52  void gemv(
char op, 
int M, 
int N, 
dcomplex alpha, 
const dcomplex *A, 
int LDA, 
const dcomplex *x, 
int incx, 
dcomplex beta, 
dcomplex *Y, 
int incy);
 
   54  void ger(
int M, 
int N, 
double alpha, 
const double *x, 
int incx, 
const double *Y, 
int incy, 
double *A, 
int LDA);
 
   57  void scal(
int M, 
double alpha, 
double *x, 
int incx);
 
   60  void swap(
int N, 
double *x, 
int incx, 
double *Y, 
int incy);     
 
Provides a C++ interface for the GPU versions of various BLAS routines.
 
std::complex< double > dcomplex
Alias for std::complex<double> type.