26#if defined(NDA_HAVE_CUDA) 
   30namespace nda::blas::f77 {
 
   32  void axpy(
int N, 
double alpha, 
const double *x, 
int incx, 
double *Y, 
int incy);
 
   35  void copy(
int N, 
const double *x, 
int incx, 
double *Y, 
int incy);
 
   38  double dot(
int M, 
const double *x, 
int incx, 
const double *Y, 
int incy);
 
   42  void gemm(
char op_a, 
char op_b, 
int M, 
int N, 
int K, 
double alpha, 
const double *A, 
int LDA, 
const double *B, 
int LDB, 
double beta, 
double *C,
 
   44  void gemm(
char op_a, 
char op_b, 
int M, 
int N, 
int K, 
dcomplex alpha, 
const dcomplex *A, 
int LDA, 
const dcomplex *B, 
int LDB, 
dcomplex beta,
 
   47  void gemm_batch(
char op_a, 
char op_b, 
int M, 
int N, 
int K, 
double alpha, 
const double **A, 
int LDA, 
const double **B, 
int LDB, 
double beta,
 
   48                  double **C, 
int LDC, 
int batch_count);
 
   49  void gemm_batch(
char op_a, 
char op_b, 
int M, 
int N, 
int K, 
dcomplex alpha, 
const dcomplex **A, 
int LDA, 
const dcomplex **B, 
int LDB, 
dcomplex beta,
 
   50                  dcomplex **C, 
int LDC, 
int batch_count);
 
   52  void gemm_vbatch(
char op_a, 
char op_b, 
int *M, 
int *N, 
int *K, 
double alpha, 
const double **A, 
int *LDA, 
const double **B, 
int *LDB, 
double beta,
 
   53                   double **C, 
int *LDC, 
int batch_count);
 
   54  void gemm_vbatch(
char op_a, 
char op_b, 
int *M, 
int *N, 
int *K, 
dcomplex alpha, 
const dcomplex **A, 
int *LDA, 
const dcomplex **B, 
int *LDB,
 
   57  void gemm_batch_strided(
char op_a, 
char op_b, 
int M, 
int N, 
int K, 
double alpha, 
const double *A, 
int LDA, 
int strideA, 
const double *B, 
int LDB,
 
   58                          int strideB, 
double beta, 
double *C, 
int LDC, 
int strideC, 
int batch_count);
 
   59  void gemm_batch_strided(
char op_a, 
char op_b, 
int M, 
int N, 
int K, 
dcomplex alpha, 
const dcomplex *A, 
int LDA, 
int strideA, 
const dcomplex *B,
 
   60                          int LDB, 
int srideB, 
dcomplex beta, 
dcomplex *C, 
int LDC, 
int strideC, 
int batch_count);
 
   62  void gemv(
char op, 
int M, 
int N, 
double alpha, 
const double *A, 
int LDA, 
const double *x, 
int incx, 
double beta, 
double *Y, 
int incy);
 
   63  void gemv(
char op, 
int M, 
int N, 
dcomplex alpha, 
const dcomplex *A, 
int LDA, 
const dcomplex *x, 
int incx, 
dcomplex beta, 
dcomplex *Y, 
int incy);
 
   65  void ger(
int M, 
int N, 
double alpha, 
const double *x, 
int incx, 
const double *Y, 
int incy, 
double *A, 
int LDA);
 
   68  void scal(
int M, 
double alpha, 
double *x, 
int incx);
 
   71  void swap(
int N, 
double *x, 
int incx, 
double *Y, 
int incy);     
 
Provides a C++ interface for the GPU versions of various BLAS routines.
std::complex< double > dcomplex
Alias for std::complex<double> type.