31namespace nda::lapack {
36 template <
bool run_on_device>
37 auto getrf_batch_impl(
auto &&a,
auto &&ipiv, [[maybe_unused]]
auto &&work) {
39 auto const m = a.extent(0);
40 auto const n = a.extent(1);
41 auto const n_b = a.extent(2);
45 EXPECTS(a.indexmap().min_stride() == 1);
46 EXPECTS(ipiv.indexmap().min_stride() == 1);
48#if defined(__has_feature)
49#if __has_feature(memory_sanitizer)
55 auto loop_getrf = [n_b, &a, &ipiv, &work](
auto &info) {
56 for (
int i = 0; i < n_b; ++i) {
57 auto a_i = a(range::all, range::all, i);
58 auto ipiv_i = ipiv(range::all, i);
59 info(i) =
getrf(a_i, ipiv_i, work);
65 if constexpr (run_on_device) {
68 using arr_t = std::remove_cvref_t<
decltype(a)>;
71 blas::device::getrf_batch(n, ptr_d.data(),
get_ld(a(range::all, range::all, 0)), ipiv.data(), info_d.data(), n_b);
121 template <BlasArray<3> A, PivotArrayFor<A, 2> IPIV, BlasArrayFor<A, 1> W = vector_value_t<A>>
131 return detail::getrf_batch_impl<run_on_device>(
transpose(a), std::forward<IPIV>(ipiv), std::forward<W>(work));
133 return detail::getrf_batch_impl<run_on_device>(std::forward<A>(a), std::forward<IPIV>(ipiv), std::forward<W>(work));
144 template <BlasArray<3> A, PivotArrayFor<A, 2> IPIV, BlasArrayFor<A, 1> W = vector_value_t<A>>
147 return getrf_batch(std::forward<A>(a), std::forward<IPIV>(ipiv), std::forward<W>(work));
Provides definitions and type traits involving the different memory address spaces supported by nda.
Provides the generic class for arrays.
Provides basic functions to create and manipulate arrays and views.
Provides a C++ interface for various BLAS routines.
Provides concepts for the nda library.
Provides various convenient aliases and helper functions for nda::basic_array and nda::basic_array_vi...
Provides GPU and non-GPU specific functionality.
Provides a generic interface to the LAPACK/cuSOLVER getrf routine.
void resize_or_check_if_view(A &a, std::array< long, A::rank > const &sha)
Resize a given regular array to the given shape or check if a given view as the correct shape.
auto transpose(A &&a)
Transpose the memory layout of an nda::MemoryArray or an nda::expr_call.
decltype(auto) to_device(A &&a)
Convert an nda::MemoryArray to its regular type on device memory.
basic_array< ValueType, Rank, Layout, 'A', ContainerPolicy > array
Alias template of an nda::basic_array with an 'A' algebra.
basic_array< ValueType, 1, C_layout, 'V', ContainerPolicy > vector
Alias template of an nda::basic_array with rank 1 and a 'V' algebra.
int get_ld(A const &a)
Get the leading dimension of an nda::MemoryArray with rank 1 or 2 for BLAS/LAPACK calls.
auto batch_ptrs(A &&a)
Given a 2- or 3-dimensional array get an array of pointers to each of the submatrices/subvectors inde...
static constexpr bool has_C_layout
Constexpr variable that is true if all given nda::Array types have nda::C_layout.
vector< get_value_t< A >, heap< mem::get_addr_space< A > > > vector_value_t
Alias for an nda::vector with the same value type and address space as the given type.
static constexpr bool has_F_layout
Constexpr variable that is true if all given nda::Array types have nda::F_layout.
auto getrf_batch(A &&a, IPIV &&ipiv, W &&work=vector_value_t< A >{})
Interface to batched versions of the LAPACK/cuSOLVER getrf routine.
int getrf(A &&a, IPIV &&ipiv, W &&work=vector_value_t< A >{})
Interface to the LAPACK/cuSOLVER getrf routine.
static constexpr bool have_device_compatible_addr_space
Constexpr variable that is true if all given types have an address space compatible with Device.
Provides a C++ interface for various LAPACK routines.
Macros used in the nda library.
Provides type traits for the nda library.