nda/unstable/cusolver__interface_8cpp_source.html

// Copyright (c) 2022--present, The Simons Foundation

// This file is part of TRIQS/nda and is licensed under the Apache License, Version 2.0.

// SPDX-License-Identifier: Apache-2.0

// See LICENSE in the root of this distribution for details.


#include "./cusolver_interface.hpp"

#include "../../basic_array.hpp"

#include "../../blas/tools.hpp"

#include "../../declarations.hpp"

#include "../../device.hpp"

#include "../../exceptions.hpp"

#include "../../macros.hpp"

#include "../../mem/allocators.hpp"

#include "../../mem/handle.hpp"


#include <cusolverDn.h>


#include <string>


namespace nda::lapack::device {


  // Local function to get unique CuSolver handle.

  inline cusolverDnHandle_t &get_handle() {

    struct handle_storage_t { // RAII for handle

      handle_storage_t() { cusolverDnCreate(&handle); }

      ~handle_storage_t() { cusolverDnDestroy(handle); }

      cusolverDnHandle_t handle = {};

    };

    static auto sto = handle_storage_t{};

    return sto.handle;

  }


  // Get an integer pointer in unified memory to return info from lapack routines.

  int *get_info_ptr() {

    static auto info_u_handle = mem::handle_heap<int, mem::mallocator<mem::Unified>>(1);

    return info_u_handle.data();

  }


  // Global option to turn on/off the cudaDeviceSynchronize after cusolver library calls.

  static bool synchronize = true; // NOLINT  (global option is on purpose)


// Macro to check cusolver calls.

#define CUSOLVER_CHECK(X, info, ...)                                                                                                                 \

  auto err = X(get_handle(), __VA_ARGS__, get_info_ptr());                                                                                           \

  if (err != CUSOLVER_STATUS_SUCCESS) { NDA_RUNTIME_ERROR << AS_STRING(X) << " failed with error code " << std::to_string(err); }                    \

  if (synchronize) {                                                                                                                                 \

    auto errsync = cudaDeviceSynchronize();                                                                                                          \

    if (errsync != cudaSuccess) {                                                                                                                    \

      NDA_RUNTIME_ERROR << " cudaDeviceSynchronize failed after call to: " << AS_STRING(X) " \n "                                                    \

                        << " cudaGetErrorName: " << std::string(cudaGetErrorName(errsync)) << "\n"                                                   \

                        << " cudaGetErrorString: " << std::string(cudaGetErrorString(errsync)) << "\n";                                              \

    }                                                                                                                                                \

  }                                                                                                                                                  \

  info = *get_info_ptr();


  void gesvd(char JOBU, char JOBVT, int M, int N, double *A, int LDA, double *S, double *U, int LDU, double *VT, int LDVT, double *WORK, int LWORK,

             double *RWORK, int &INFO) {

    // Replicate behavior of Netlib gesvd

    if (LWORK == -1) {

      int bufferSize = 0;

      cusolverDnDgesvd_bufferSize(get_handle(), M, N, &bufferSize);

      *WORK = bufferSize;

    } else {

      CUSOLVER_CHECK(cusolverDnDgesvd, INFO, JOBU, JOBVT, M, N, A, LDA, S, U, LDU, VT, LDVT, WORK, LWORK, RWORK);

    }

  }

  void gesvd(char JOBU, char JOBVT, int M, int N, dcomplex *A, int LDA, double *S, dcomplex *U, int LDU, dcomplex *VT, int LDVT, dcomplex *WORK,

             int LWORK, double *RWORK, int &INFO) {

    // Replicate behavior of Netlib gesvd

    if (LWORK == -1) {

      int bufferSize = 0;

      cusolverDnZgesvd_bufferSize(get_handle(), M, N, &bufferSize);

      *WORK = bufferSize;

    } else {

      CUSOLVER_CHECK(cusolverDnZgesvd, INFO, JOBU, JOBVT, M, N, cucplx(A), LDA, S, cucplx(U), LDU, cucplx(VT), LDVT, cucplx(WORK), LWORK,

                     RWORK); // NOLINT

    }

  }


  void getrf(int M, int N, double *A, int LDA, int *ipiv, int &info) {

    int bufferSize = 0;

    cusolverDnDgetrf_bufferSize(get_handle(), M, N, A, LDA, &bufferSize);

    auto Workspace = nda::cuvector<double>(bufferSize);

    CUSOLVER_CHECK(cusolverDnDgetrf, info, M, N, A, LDA, Workspace.data(), ipiv);

  }

  void getrf(int M, int N, dcomplex *A, int LDA, int *ipiv, int &info) {

    int bufferSize = 0;

    cusolverDnZgetrf_bufferSize(get_handle(), M, N, cucplx(A), LDA, &bufferSize);

    auto Workspace = nda::cuvector<dcomplex>(bufferSize);

    CUSOLVER_CHECK(cusolverDnZgetrf, info, M, N, cucplx(A), LDA, cucplx(Workspace.data()), ipiv);

  }


  void getrs(char op, int N, int NRHS, double const *A, int LDA, int const *ipiv, double *B, int LDB, int &info) {

    CUSOLVER_CHECK(cusolverDnDgetrs, info, get_cublas_op(op), N, NRHS, A, LDA, ipiv, B, LDB);

  }

  void getrs(char op, int N, int NRHS, dcomplex const *A, int LDA, int const *ipiv, dcomplex *B, int LDB, int &info) {

    CUSOLVER_CHECK(cusolverDnZgetrs, info, get_cublas_op(op), N, NRHS, cucplx(A), LDA, ipiv, cucplx(B), LDB);

  }


} // namespace nda::lapack::device

allocators.hpp
Provides custom allocators for the nda library.

basic_array.hpp
Provides the generic class for arrays.

cusolver_interface.hpp
Provides a C++ interface for the GPU versions of various LAPACK routines.

declarations.hpp
Provides various convenient aliases and helper functions for nda::basic_array and nda::basic_array_vi...

device.hpp
Provides GPU and non-GPU specific functionality.

exceptions.hpp
Provides a custom runtime error class and macros to assert conditions and throw exceptions.

nda::cuvector
basic_array< ValueType, 1, C_layout, 'V', heap< mem::Device > > cuvector
Similar to nda::vector except the memory is stored on the device.
Definition declarations.hpp:284

nda::dcomplex
std::complex< double > dcomplex
Alias for std::complex<double> type.
Definition tools.hpp:28

handle.hpp
Provides various handles to take care of memory management for nda::basic_array and nda::basic_array_...

macros.hpp
Macros used in the nda library.

tools.hpp
Provides various traits and utilities for the BLAS interface.