TRIQS/nda 2.0.0
Multi-dimensional array library for C++
Loading...
Searching...
No Matches
LAPACK/cuSOLVER interface

Detailed Description

Low-level interface to parts of the LAPACK/cuSOLVER library.

Classes

class  nda::lapack::gelss_worker< T >
 Worker class for solving linear least squares problems. More...
class  nda::lapack::gelss_worker_hermitian
 Specialized worker class for solving linear least squares problems while enforcing a certain hermitian symmetry. More...

Functions

template<BlasArrayCplx< 2 > A, BlasArrayFor< A, 1 > W, BlasArrayFor< A, 2 > VL, BlasArrayFor< A, 2 > VR, BlasArrayFor< A, 1 > W1 = vector_value_t<A>, BlasArrayRealFor< A, 1 > W2 = vector_fp_t<A>>
requires (mem::have_host_compatible_addr_space<A> and has_F_layout<A, VL, VR>)
int nda::lapack::geev (A &&a, W &&w, VL &&vl, VR &&vr, char jobvl='N', char jobvr='V', W1 &&work=vector_value_t< A >{}, W2 &&rwork=vector_fp_t< A >{})
 Interface to the LAPACK geev routine for complex matrices.
template<BlasArrayReal< 2 > A, BlasArrayFor< A, 1 > WR, BlasArrayFor< A, 1 > WI, BlasArrayFor< A, 2 > VL, BlasArrayFor< A, 2 > VR, BlasArrayFor< A, 1 > W1 = vector_value_t<A>>
requires (mem::have_host_compatible_addr_space<A> and has_F_layout<A, VL, VR>)
int nda::lapack::geev (A &&a, WR &&wr, WI &&wi, VL &&vl, VR &&vr, char jobvl='N', char jobvr='V', W1 &&work=vector_value_t< A >{})
 Interface to the LAPACK geev routine for real matrices.
template<BlasArray< 2 > A, BlasArrayFor< A > B, BlasArrayRealFor< A, 1 > S, BlasArrayFor< A, 1 > W1 = vector_value_t<A>, BlasArrayRealFor< A, 1 > W2 = vector_fp_t<A>>
requires (mem::have_host_compatible_addr_space<A> and (get_rank<B> == 1 or get_rank<B> == 2) and has_F_layout<A, B>)
int nda::lapack::gelss (A &&a, B &&b, S &&s, get_fp_t< A > rcond, int &rank, W1 &&work=vector_value_t< A >{}, W2 &&rwork=vector_fp_t< A >{})
 Interface to the LAPACK gelss routine.
template<BlasArray< 2 > A, PivotArrayFor< A, 1 > JPVT, BlasArrayFor< A, 1 > TAU, BlasArrayFor< A, 1 > W1 = vector_value_t<A>, BlasArrayRealFor< A, 1 > W2 = vector_fp_t<A>>
requires (mem::have_host_compatible_addr_space<A> and has_F_layout<A>)
int nda::lapack::geqp3 (A &&a, JPVT &&jpvt, TAU &&tau, W1 &&work=vector_value_t< A >{}, W2 &&rwork=vector_fp_t< A >{})
 Interface to the LAPACK geqp3 routine.
template<BlasArray< 2 > A, BlasArrayFor< A, 1 > TAU, BlasArrayFor< A, 1 > W = vector_value_t<A>>
requires (has_F_layout<A>)
int nda::lapack::geqrf (A &&a, TAU &&tau, W &&work=vector_value_t< A >{})
 Interface to the LAPACK/cuSOLVER geqrf routine.
template<BlasArray< 3 > A, BlasArrayFor< A, 2 > TAU, BlasArrayFor< A, 1 > W = vector_value_t<A>>
requires (has_F_layout<A, TAU>)
int nda::lapack::geqrf_batch (A &&a, TAU &&tau, W &&work=vector_value_t< A >{})
 Interface to batched versions of the LAPACK/cuSOLVER geqrf routine.
template<BlasArray< 2 > A, BlasArrayRealFor< A, 1 > S, BlasArrayFor< A, 2 > U, BlasArrayFor< A, 2 > VH, BlasArrayFor< A, 1 > W1 = vector_value_t<A>, BlasArrayRealFor< A, 1 > W2 = vector_fp_t<A>>
requires (has_C_layout<A> == has_C_layout<U> and has_C_layout<A> == has_C_layout<VH>)
int nda::lapack::gesvd (A &&a, S &&s, U &&u, VH &&vh, W1 &&work=vector_value_t< A >{}, W2 &&rwork=vector_fp_t< A >{})
 Interface to the LAPACK/cuSOLVER gesvd routine.
template<BlasArray< 2 > A, PivotArrayFor< A, 1 > IPIV, BlasArrayFor< A, 1 > W = vector_value_t<A>>
int nda::lapack::getrf (A &&a, IPIV &&ipiv, W &&work=vector_value_t< A >{})
 Interface to the LAPACK/cuSOLVER getrf routine.
template<BlasArray< 3 > A, PivotArrayFor< A, 2 > IPIV, BlasArrayFor< A, 1 > W = vector_value_t<A>>
requires (has_F_layout<A> or has_C_layout<A>)
auto nda::lapack::getrf_batch (A &&a, IPIV &&ipiv, W &&work=vector_value_t< A >{})
 Interface to batched versions of the LAPACK/cuSOLVER getrf routine.
template<BlasArray< 2 > A, PivotArrayFor< A, 1 > IPIV, BlasArrayFor< A, 1 > W = vector_value_t<A>>
int nda::lapack::getri (A &&a, IPIV const &ipiv, W &&work=vector_value_t< A >{})
 Interface to the LAPACK getri routine.
template<BlasArray< 3 > A, PivotArrayFor< A, 2 > IPIV, BlasArrayFor< A, 1 > W = vector_value_t<A>>
requires (has_F_layout<A> or has_C_layout<A>)
auto nda::lapack::getri_batch (A &&a, IPIV const &ipiv, W &&work=vector_value_t< A >{})
 Interface to batched versions of the LAPACK/cuSOLVER getri routine.
template<BlasArrayOrConj< 2 > A, BlasArrayFor< A > B, PivotArrayFor< A, 1 > IPIV>
requires ((get_rank<B> == 1 or get_rank<B> == 2) and has_F_layout<B>)
int nda::lapack::getrs (A const &a, B &&b, IPIV const &ipiv)
 Interface to the LAPACK/cuSOLVER getrs routine.
template<BlasArrayOrConj< 3 > A, BlasArrayFor< A, 3 > B, PivotArrayFor< A, 2 > IPIV>
requires ((has_F_layout<A> or has_C_layout<A>) and has_F_layout<B> and (not is_conj_array_expr<A> or has_C_layout<A>))
int nda::lapack::getrs_batch (A const &a, B &&b, IPIV const &ipiv)
 Interface to batched versions of the LAPACK/cuSOLVER getrs routine.
template<BlasArrayCplx< 2 > A, BlasArrayFor< A, 2 > B, BlasArrayFor< A, 1 > A2, BlasArrayFor< A, 1 > B2, BlasArrayFor< A, 2 > VL, BlasArrayFor< A, 2 > VR, BlasArrayFor< A, 1 > W1 = vector_value_t<A>, BlasArrayRealFor< A, 1 > W2 = vector_fp_t<A>>
requires (mem::have_host_compatible_addr_space<A> and has_F_layout<A, B, VL, VR>)
int nda::lapack::ggev (A &&a, B &&b, A2 &&alpha, B2 &&beta, VL &&vl, VR &&vr, char jobvl='N', char jobvr='V', W1 &&work=vector_value_t< A >{}, W2 &&rwork=vector_fp_t< A >{})
 Interface to the LAPACK ggev routine for complex matrices.
template<BlasArrayReal< 2 > A, BlasArrayFor< A, 2 > B, BlasArrayFor< A, 1 > AR, BlasArrayFor< A, 1 > AI, BlasArrayFor< A, 1 > B2, BlasArrayFor< A, 2 > VL, BlasArrayFor< A, 2 > VR, BlasArrayFor< A, 1 > W1 = vector_value_t<A>>
requires (mem::have_host_compatible_addr_space<A> and has_F_layout<A, B, VL, VR>)
int nda::lapack::ggev (A &&a, B &&b, AR &&alphar, AI &&alphai, B2 &&beta, VL &&vl, VR &&vr, char jobvl='N', char jobvr='V', W1 &&work=vector_value_t< A >{})
 Interface to the LAPACK ggev routine for real matrices.
template<BlasArray< 2 > A, BlasArrayFor< A, 1 > TAU, BlasArrayFor< A, 1 > W = vector_value_t<A>>
requires (has_F_layout<A>)
int nda::lapack::gqr (A &&a, TAU &&tau, W &&work=vector_value_t< A >{})
 Dispatcher to nda::lapack::orgqr for real value types and to nda::lapack::ungqr for complex value types.
template<BlasArray< 1 > DL, BlasArrayFor< DL, 1 > D, BlasArrayFor< DL, 1 > DU, BlasArrayFor< DL > B>
requires (mem::have_host_compatible_addr_space<DL> and (get_rank<B> == 1 or get_rank<B> == 2) and has_F_layout<B>)
int nda::lapack::gtsv (DL &&dl, D &&d, DU &&du, B &&b)
 Interface to the LAPACK gtsv routine.
template<BlasArrayCplx< 2 > A, BlasArrayRealFor< A, 1 > W, BlasArrayFor< A, 1 > W1 = vector_value_t<A>, BlasArrayRealFor< A, 1 > W2 = vector_fp_t<A>>
requires (mem::have_host_compatible_addr_space<A> and has_F_layout<A>)
int nda::lapack::heev (A &&a, W &&w, char jobz='V', W1 &&work=vector_value_t< A >{}, W2 &&rwork=vector_fp_t< A >{})
 Interface to the LAPACK heev routine.
template<BlasArrayCplx< 2 > A, BlasArrayFor< A, 2 > B, BlasArrayRealFor< A, 1 > W, BlasArrayFor< A, 1 > W1 = vector_value_t<A>, BlasArrayRealFor< A, 1 > W2 = vector_fp_t<A>>
requires (mem::have_host_compatible_addr_space<A> and has_F_layout<A, B>)
int nda::lapack::hegv (A &&a, B &&b, W &&w, char jobz='V', int itype=1, W1 &&work=vector_value_t< A >{}, W2 &&rwork=vector_fp_t< A >{})
 Interface to the LAPACK hegv routine.
template<BlasArrayReal< 2 > A, BlasArrayFor< A, 1 > TAU, BlasArrayFor< A, 1 > W = vector_value_t<A>>
requires (has_F_layout<A>)
int nda::lapack::orgqr (A &&a, TAU &&tau, W &&work=vector_value_t< A >{})
 Interface to the LAPACK/cuSOLVER orgqr routine.
template<BlasArrayReal< 3 > A, BlasArrayFor< A, 2 > TAU, BlasArrayFor< A, 1 > W = vector_value_t<A>>
requires (has_F_layout<A, TAU>)
int nda::lapack::orgqr_batch (A &&a, TAU &&tau, W &&work=vector_value_t< A >{})
 Interface to batched versions of the LAPACK/cuSOLVER orgqr routine.
template<BlasArrayReal< 2 > A, BlasArrayFor< A, 1 > W, BlasArrayFor< A, 1 > W1 = vector_value_t<A>>
requires (mem::have_host_compatible_addr_space<A> and has_F_layout<A>)
int nda::lapack::syev (A &&a, W &&w, char jobz='V', W1 &&work=vector_value_t< A >{})
 Interface to the LAPACK syev routine.
template<BlasArrayReal< 2 > A, BlasArrayFor< A, 2 > B, BlasArrayFor< A, 1 > W, BlasArrayFor< A, 1 > W1 = vector_value_t<A>>
requires (mem::have_host_compatible_addr_space<A> and has_F_layout<A, B>)
int nda::lapack::sygv (A &&a, B &&b, W &&w, char jobz='V', int itype=1, W1 &&work=vector_value_t< A >{})
 Interface to the LAPACK sygv routine.
template<BlasArrayCplx< 2 > A, BlasArrayFor< A, 1 > TAU, BlasArrayFor< A, 1 > W = vector_value_t<A>>
requires (has_F_layout<A>)
int nda::lapack::ungqr (A &&a, TAU &&tau, W &&work=vector_value_t< A >{})
 Interface to the LAPACK/cuSOLVER ungqr routine.
template<BlasArrayCplx< 3 > A, BlasArrayFor< A, 2 > TAU, BlasArrayFor< A, 1 > W = vector_value_t<A>>
requires (has_F_layout<A, TAU>)
int nda::lapack::ungqr_batch (A &&a, TAU &&tau, W &&work=vector_value_t< A >{})
 Interface to batched versions of the LAPACK/cuSOLVER ungqr routine.

Function Documentation

◆ geev() [1/2]

template<BlasArrayCplx< 2 > A, BlasArrayFor< A, 1 > W, BlasArrayFor< A, 2 > VL, BlasArrayFor< A, 2 > VR, BlasArrayFor< A, 1 > W1 = vector_value_t<A>, BlasArrayRealFor< A, 1 > W2 = vector_fp_t<A>>
requires (mem::have_host_compatible_addr_space<A> and has_F_layout<A, VL, VR>)
int nda::lapack::geev ( A && a,
W && w,
VL && vl,
VR && vr,
char jobvl = 'N',
char jobvr = 'V',
W1 && work = vector_value_t<A>{},
W2 && rwork = vector_fp_t<A>{} )

#include <nda/lapack/geev.hpp>

Interface to the LAPACK geev routine for complex matrices.

Computes all eigenvalues \( \lambda_j \) and, optionally, right/left eigenvectors \( \mathbf{v}_j \)/ \( \mathbf{u}_j \) of a complex eigenvalue problem.

The right eigenvector \( \mathbf{v}_j \) satisfies

\[ \mathbf{A} \mathbf{v}_j = \lambda_j \mathbf{v}_j \; , \]

whereas the left eigenvector \( \mathbf{u}_j \) satisfies

\[ \mathbf{u}_j^H \mathbf{A} = \lambda_j \mathbf{u}_j^H \; . \]

Here, \( \mathbf{u}_j^H \) denotes the conjugate-transpose of \( \mathbf{u}_j \).

The computed eigenvectors are normalized to have Euclidean norm equal to 1 and largest component real.

Note
All input arrays are required to satisfy nda::mem::have_host_compatible_addr_space and all input matrices are required to have nda::F_layout.
Template Parameters
Anda::blas_lapack::BlasArrayCplx<2> type.
Wnda::blas_lapack::BlasArrayFor<A, 1> type.
VLnda::blas_lapack::BlasArrayFor<A, 2> type.
VRnda::blas_lapack::BlasArrayFor<A, 2> type.
W1nda::blas_lapack::BlasArrayFor<A, 1> type.
W2nda::blas_lapack::BlasArrayRealFor<A, 1> type.
Parameters
aInput/output matrix. On entry, the matrix \( \mathbf{A} \). On exit, \( \mathbf{A} \) is overwritten.
wOutput vector \( \mathbf{w} \). The computed eigenvalues, i.e. \( w_j = \lambda_j \).
vlOutput matrix \( \mathbf{V}_L \). If jobvl = V, the matrix contains the left eigenvectors. If jobvl = N, \( \mathbf{V}_L \) is not referenced.
vrOutput matrix \( \mathbf{V}_R \). If jobvr = V, the matrix contains the right eigenvectors. If jobvr = N, \( \mathbf{V}_R \) is not referenced.
jobvlCharacter indicating whether to compute left eigenvectors ('V') or not ('N').
jobvrCharacter indicating whether to compute right eigenvectors ('V') or not ('N').
workOuput vector. Workspace array used by the LAPACK routine.
rworkOuput vector. Workspace array used by the LAPACK routine.
Returns
Integer return code from the LAPACK call.

Definition at line 165 of file geev.hpp.

◆ geev() [2/2]

template<BlasArrayReal< 2 > A, BlasArrayFor< A, 1 > WR, BlasArrayFor< A, 1 > WI, BlasArrayFor< A, 2 > VL, BlasArrayFor< A, 2 > VR, BlasArrayFor< A, 1 > W1 = vector_value_t<A>>
requires (mem::have_host_compatible_addr_space<A> and has_F_layout<A, VL, VR>)
int nda::lapack::geev ( A && a,
WR && wr,
WI && wi,
VL && vl,
VR && vr,
char jobvl = 'N',
char jobvr = 'V',
W1 && work = vector_value_t<A>{} )

#include <nda/lapack/geev.hpp>

Interface to the LAPACK geev routine for real matrices.

Computes all eigenvalues \( \lambda_j \) and, optionally, right/left eigenvectors \( \mathbf{v}_j \)/ \( \mathbf{u}_j \) of a real eigenvalue problem.

The right eigenvector \( \mathbf{v}_j \) satisfies

\[ \mathbf{A} \mathbf{v}_j = \lambda_j \mathbf{v}_j \; , \]

whereas the left eigenvector \( \mathbf{u}_j \) satisfies

\[ \mathbf{u}_j^T \mathbf{A} = \lambda_j \mathbf{u}_j^T \; . \]

Here, \( \mathbf{u}_j^T \) denotes the transpose of \( \mathbf{u}_j \).

The computed eigenvectors are normalized to have Euclidean norm equal to 1 and largest component real.

For real matrices, complex eigenvalues always occur in complex conjugate pairs and the corresponding eigenvectors are stored in a special packed format (see nda::linalg::unpack_eigenvectors).

Note
All input arrays are required to satisfy nda::mem::have_host_compatible_addr_space and all input matrices are required to have nda::F_layout.
Template Parameters
Anda::blas_lapack::BlasArrayReal<2> type.
WRnda::blas_lapack::BlasArrayFor<A, 1> type.
WInda::blas_lapack::BlasArrayFor<A, 1> type.
VLnda::blas_lapack::BlasArrayFor<A, 2> type.
VRnda::blas_lapack::BlasArrayFor<A, 2> type.
W1nda::blas_lapack::BlasArrayFor<A, 1> type.
Parameters
aInput/output matrix. On entry, the matrix \( \mathbf{A} \). On exit, \( \mathbf{A} \) is overwritten.
wrOutput vector \( \mathbf{w}^{(r)} \). The real parts of the computed eigenvalues, i.e. \( w_j^{(r)} = \mathrm{Re}(\lambda_j) \).
wiOutput vector \( \mathbf{w}^{(i)} \). The imaginary parts of the computed eigenvalues, i.e. \(w_j^{(i)} = \mathrm{Im}(\lambda_j) \).
vlOutput matrix \( \mathbf{V}_L \). If jobvl = V, the matrix contains the left eigenvectors (in packed format for complex pairs). If jobvl = N, \( \mathbf{V}_L \) is not referenced.
vrOutput matrix \( \mathbf{V}_R \). If jobvr = V, the matrix contains the right eigenvectors (in packed format for complex pairs). If jobvr = N, \( \mathbf{V}_R \) is not referenced.
jobvlCharacter indicating whether to compute left eigenvectors ('V') or not ('N').
jobvrCharacter indicating whether to compute right eigenvectors ('V') or not ('N').
workOuput vector. Workspace array used by the LAPACK routine.
Returns
Integer return code from the LAPACK call.

Definition at line 77 of file geev.hpp.

◆ gelss()

template<BlasArray< 2 > A, BlasArrayFor< A > B, BlasArrayRealFor< A, 1 > S, BlasArrayFor< A, 1 > W1 = vector_value_t<A>, BlasArrayRealFor< A, 1 > W2 = vector_fp_t<A>>
requires (mem::have_host_compatible_addr_space<A> and (get_rank<B> == 1 or get_rank<B> == 2) and has_F_layout<A, B>)
int nda::lapack::gelss ( A && a,
B && b,
S && s,
get_fp_t< A > rcond,
int & rank,
W1 && work = vector_value_t<A>{},
W2 && rwork = vector_fp_t<A>{} )

#include <nda/lapack/gelss.hpp>

Interface to the LAPACK gelss routine.

Computes the minimum norm solution to a linear least squares problem:

\[ \min_{\mathbf{x}} | \mathbf{b} - \mathbf{A x} |_2 \]

using the singular value decomposition (SVD) of \( \mathbf{A} \), an \( m \times n \) matrix which may be rank-deficient.

Several right hand side vectors \( \mathbf{b} \) and solution vectors \( \mathbf{x} \) can be handled in a single call; they are stored as the columns of the \( m \times n_{\mathrm{rhs}} \) right hand side matrix \(\mathbf{B} \) and the \( n \times n_{\mathrm{rhs}} \) solution matrix \( \mathbf{X} \).

The effective rank of \( \mathbf{A} \) is determined by treating as zero those singular values which are less than \( r_{\mathrm{cond}} \) times the largest singular value.

Note
All input arrays are required to satisfy nda::mem::have_host_compatible_addr_space and all input matrices are required to have nda::F_layout. Since we do not resize the input array representing the right hand side, it is required to be large enough to hold the solution, i.e. it needs to have at least \( \max(m, n) \) rows.
Template Parameters
Anda::blas_lapack::BlasArray<2> type.
Bnda::blas_lapack::BlasArrayFor type.
Snda::blas_lapack::BlasArrayRealFor<A, 1> type.
W1nda::blas_lapack::BlasArrayFor<A, 1> type.
W2nda::blas_lapack::BlasArrayRealFor<A, 1> type.
Parameters
aInput/output matrix. On entry, the \( m \times n \) matrix \( \mathbf{A} \). On exit, the first \(\min(m,n) \) rows of \( \mathbf{A} \) are overwritten with its right singular vectors, stored rowwise.
bInput/output array. On entry, the \( m \times n_{\mathrm{rhs}} \) right hand side matrix \( \mathbf{B} \) or vector \( \mathbf{b} \). On exit, it is overwritten by the \( n \times n_{\mathrm{rhs}} \) solution matrix \( \mathbf{X} \) or vector \( \mathbf{x} \). If \( m \geq n \) and if the effective rank is equal \(n \), the residual sum-of-squares for the solution in the ith column is given by the sum of squares of the modulus of elements \( n + 1 \) to \( m \) in that column.
sOutput vector. The singular values of \( \mathbf{A} \) in decreasing order. The condition number of \(\mathbf{A} \) in the 2-norm is \( s_1 / s_{min(m,n)} \).
rcondIt is used to determine the effective rank of \( \mathbf{A} \). Singular values \( s_i \leq r_{\mathrm{cond}} s_1 \) are treated as zero. If \( r_{\mathrm{cond}} < 0 \), machine precision is used instead.
rankOutput variable. The effective rank of \( \mathbf{A} \), i.e. the number of singular values which are greater than \( r_{\mathrm{cond}} s_1 \).
workOuput vector. Workspace array used by the LAPACK routine.
rworkOuput vector. Workspace array used by the LAPACK routine.
Returns
Integer return code from the LAPACK call.

Definition at line 77 of file gelss.hpp.

◆ geqp3()

template<BlasArray< 2 > A, PivotArrayFor< A, 1 > JPVT, BlasArrayFor< A, 1 > TAU, BlasArrayFor< A, 1 > W1 = vector_value_t<A>, BlasArrayRealFor< A, 1 > W2 = vector_fp_t<A>>
requires (mem::have_host_compatible_addr_space<A> and has_F_layout<A>)
int nda::lapack::geqp3 ( A && a,
JPVT && jpvt,
TAU && tau,
W1 && work = vector_value_t<A>{},
W2 && rwork = vector_fp_t<A>{} )

#include <nda/lapack/geqp3.hpp>

Interface to the LAPACK geqp3 routine.

Computes a QR factorization of a matrix \( \mathbf{A} \) with column pivoting:

\[ \mathbf{A P} = \mathbf{Q R} \]

using Level 3 BLAS.

The matrix \( \mathbf{Q} \) is represented as a product of elementary reflectors

\[ \mathbf{Q} = \mathbf{H}(1) \mathbf{H}(2) \ldots \mathbf{H}(k) \; , \]

where \( k = \min(m,n) \).

Each \( \mathbf{H}(i) \) has the form

\[ \mathbf{H}(i) = \mathbf{I} - \tau_i * \mathbf{v}_i \mathbf{v}_i^H \]

where \( \tau_i \) is a real/complex scalar, and \( \mathbf{v}_i \) is a real/complex vector with

  • elements \( 1 \) to \( i - 1 \) equal to 0,
  • element \( i \) equal to 1 and
  • elements \( i + 1 \) to \( m \) stored on exit in the elements \( i + 1 \) to \( m \) in the column \( i \) of \( \mathbf{A} \).
Note
All input arrays are required to satisfy nda::mem::have_host_compatible_addr_space and \( \mathbf{A} \) is required to have nda::F_layout.
Template Parameters
Anda::blas_lapack::BlasArray<2> type.
JPVTnda::blas_lapack::PivotArrayFor<A, 1> type.
TAUnda::blas_lapack::BlasArrayFor<A, 1> type.
W1nda::blas_lapack::BlasArrayFor<A, 1> type.
W2nda::blas_lapack::BlasArrayRealFor<A, 1> type.
Parameters
aInput/output matrix. On entry, the \( m \times n \) matrix \( \mathbf{A} \). On exit, the upper triangle of the array contains the \( \min(m,n) \times n \) upper trapezoidal matrix \( \mathbf{R} \); the elements below the diagonal, together with the array \( \mathbf{\tau} \), represent the unitary matrix \(\mathbf{Q} \) as a product of \( \min(m,n) \) elementary reflectors.
jpvtInput/output vector. On entry, if the jth element is \( \neq 0 \), the jth column of \( \mathbf{A} \) is permuted to the front of \( \mathbf{A P} \) (a leading column); if the jth element is equal 0, the jth column of \( \mathbf{A} \) is a free column. On exit, if the jth element is equal \( k \), then the jth column of \( \mathbf{A P} \) was the kth column of \( \mathbf{A} \).
tauOutput vector. The scalar factors \( \tau_i \) of the elementary reflectors \( \mathbf{H}(i) \).
workOuput vector. Workspace array used by the LAPACK routine.
rworkOuput vector. Workspace array used by the LAPACK routine.
Returns
Integer return code from the LAPACK call.

Definition at line 81 of file geqp3.hpp.

◆ geqrf()

template<BlasArray< 2 > A, BlasArrayFor< A, 1 > TAU, BlasArrayFor< A, 1 > W = vector_value_t<A>>
requires (has_F_layout<A>)
int nda::lapack::geqrf ( A && a,
TAU && tau,
W && work = vector_value_t<A>{} )

#include <nda/lapack/geqrf.hpp>

Interface to the LAPACK/cuSOLVER geqrf routine.

Computes a QR factorization of a matrix \( \mathbf{A} \):

\[ \mathbf{A} = \mathbf{Q R} \; . \]

The matrix \( \mathbf{Q} \) is represented as a product of elementary reflectors

\[ \mathbf{Q} = \mathbf{H}(1) \mathbf{H}(2) \ldots \mathbf{H}(k) \; , \]

where \( k = \min(m,n) \).

Each \( \mathbf{H}(i) \) has the form

\[ \mathbf{H}(i) = \mathbf{I} - \tau_i * \mathbf{v}_i \mathbf{v}_i^H \]

where \( \tau_i \) is a real/complex scalar, and \( \mathbf{v}_i \) is a real/complex vector with

  • elements \( 1 \) to \( i - 1 \) equal to 0,
  • element \( i \) equal to 1 and
  • elements \( i + 1 \) to \( m \) stored on exit in the elements \( i + 1 \) to \( m \) in the column \( i \) of \( \mathbf{A} \).

If the input arrays satisfy nda::mem::have_device_compatible_addr_space, the cuSOLVER implementation is used.

Note
\( \mathbf{A} \) is required to have nda::F_layout.
Template Parameters
Anda::blas_lapack::BlasArray<2> type.
TAUnda::blas_lapack::BlasArrayFor<A, 1> type.
Wnda::blas_lapack::BlasArrayFor<A, 1> type.
Parameters
aInput/output matrix. On entry, the \( m \times n \) matrix \( \mathbf{A} \). On exit, the upper triangle of the array contains the \( \min(m,n) \times n \) upper trapezoidal matrix \( \mathbf{R} \); the elements below the diagonal, together with the array \( \mathbf{\tau} \), represent the unitary matrix \(\mathbf{Q} \) as a product of \( \min(m,n) \) elementary reflectors.
tauOutput vector. The scalar factors \( \tau_i \) of the elementary reflectors \( \mathbf{H}(i) \).
workOuput vector. Workspace array used by the LAPACK/cuSOLVER routine.
Returns
Integer return code from the LAPACK/cuSOLVER call.

Definition at line 72 of file geqrf.hpp.

◆ geqrf_batch()

template<BlasArray< 3 > A, BlasArrayFor< A, 2 > TAU, BlasArrayFor< A, 1 > W = vector_value_t<A>>
requires (has_F_layout<A, TAU>)
int nda::lapack::geqrf_batch ( A && a,
TAU && tau,
W && work = vector_value_t<A>{} )

#include <nda/lapack/geqrf_batch.hpp>

Interface to batched versions of the LAPACK/cuSOLVER geqrf routine.

This function computes a QR factorization

\[ \mathbf{A}_i = \mathbf{Q}_i \mathbf{R}_i \; , \]

for a batch of matrices indexed by \( i \in \{ 0, \ldots, N_b - 1 \} \). Here, \( N_b \) is the batch size. See also nda::lapack::geqrf.

A batch of matrices is just a 3-dimensional array in nda::F_layout where the last dimension indexes the individual matrices such that A(:,:,i) corresponds to the \( i \)-th matrix \( \mathbf{A}_i \) in the batch.

Depending on the input array types, the function does the following:

Note
\( \mathbf{A} \) and \( \mathbf{T} \) are required to have nda::F_layout.
Template Parameters
Anda::blas_lapack::BlasArray<3> type.
TAUnda::blas_lapack::BlasArrayFor<A, 2> type.
Wnda::blas_lapack::BlasArrayFor<A, 1> type.
Parameters
aInput/output array. On entry, the 3-dimensional array \( \mathbf{A} \) containing the matrices \(\mathbf{A}_i \) to be factored. On exit, the corresponding upper trapezoidal matrices \( \mathbf{R}_i \) and the elementary reflectors representing \( \mathbf{Q}_i \).
tauOutput matrix \( \mathbf{T} \). The \( i \)-th column contains the scalar factors of the elementary reflectors representing \( \mathbf{Q}_i \).
workOuput vector. Workspace array only used by the LAPACK routine.
Returns
Integer return code from the batched LAPACK/cuBLAS call(s). If zero, all calls were successful.

Definition at line 67 of file geqrf_batch.hpp.

◆ gesvd()

template<BlasArray< 2 > A, BlasArrayRealFor< A, 1 > S, BlasArrayFor< A, 2 > U, BlasArrayFor< A, 2 > VH, BlasArrayFor< A, 1 > W1 = vector_value_t<A>, BlasArrayRealFor< A, 1 > W2 = vector_fp_t<A>>
requires (has_C_layout<A> == has_C_layout<U> and has_C_layout<A> == has_C_layout<VH>)
int nda::lapack::gesvd ( A && a,
S && s,
U && u,
VH && vh,
W1 && work = vector_value_t<A>{},
W2 && rwork = vector_fp_t<A>{} )

#include <nda/lapack/gesvd.hpp>

Interface to the LAPACK/cuSOLVER gesvd routine.

Computes the singular value decomposition (SVD) of an \( m \times n \) matrix \( \mathbf{A} \). The SVD is written as

\[ \mathbf{A} = \mathbf{U} \mathbf{S} \mathbf{V}^H \; , \]

where \( \mathbf{S} \) is an \( m \times n \) matrix which is zero except for its \( \min(m,n) \) diagonal elements, \( \mathbf{U} \) is an \( m \times m \) unitary matrix, and \( \mathbf{V} \) is an \( n \times n \) unitary matrix. The diagonal elements of \( \mathbf{S} \) are the singular values of \( \mathbf{A} \); they are real and non-negative, and are returned in descending order. The first \( min(m,n) \) columns of \(\mathbf{U} \) and \( \mathbf{V} \) are the left and right singular vectors of \( \mathbf{A} \).

If the input arrays satisfy nda::mem::have_device_compatible_addr_space, the cuSOLVER implementation is used.

Note
\( \mathbf{A} \), \( \mathbf{U} \) and \( \mathbf{V}^H \) are required to have the same memory layout.
Template Parameters
Anda::blas_lapack::BlasArray<2> type.
Snda::blas_lapack::BlasArrayRealFor<A, 1> type.
Unda::blas_lapack::BlasArrayFor<A, 2> type.
VHnda::blas_lapack::BlasArrayFor<A, 2> type.
W1nda::blas_lapack::BlasArrayFor<A, 1> type.
W2nda::blas_lapack::BlasArrayRealFor<A, 1> type.
Parameters
aInput/output matrix. On entry, the \( m \times n \) matrix \( \mathbf{A} \). On exit, the contents of \( \mathbf{A} \) are destroyed.
sOutput vector. The singular values of \( \mathbf{A} \), sorted so that \( s_i \geq s_{i+1} \).
uOutput matrix. It contains the \( m \times m \) unitary matrix \( \mathbf{U} \).
vhOutput matrix. It contains the \( n \times n \) unitary matrix \( \mathbf{V}^H \).
workOuput vector. Workspace array used by the LAPACK/cuSOLVER routine.
rworkOutput vector. Workspace array used by the LAPACK/cuSOLVER routine.
Returns
Integer return code from the LAPACK/cuSOLVER call.

Definition at line 71 of file gesvd.hpp.

◆ getrf()

template<BlasArray< 2 > A, PivotArrayFor< A, 1 > IPIV, BlasArrayFor< A, 1 > W = vector_value_t<A>>
int nda::lapack::getrf ( A && a,
IPIV && ipiv,
W && work = vector_value_t<A>{} )

#include <nda/lapack/getrf.hpp>

Interface to the LAPACK/cuSOLVER getrf routine.

Computes an LU factorization of a general \( m \times n \) matrix \( \mathbf{A} \) using partial pivoting with row interchanges.

The factorization has the form

\[ \mathbf{A} = \mathbf{P L U} \]

where \( \mathbf{P} \) is a permutation matrix, \( \mathbf{L} \) is lower triangular with unit diagonal elements (lower trapezoidal if \( m > n \)), and \( \mathbf{U} \) is upper triangular (upper trapezoidal if \(m < n \)).

If the input arrays satisfy nda::mem::have_device_compatible_addr_space, the cuSOLVER implementation is used.

Note
If \( \mathbf{A} \) is stored in nda::C_layout, the factorization is actually performed on \( \mathbf{A}^T \). When the result is further used in nda::lapack::getrs or nda::lapack::getri, this is automatically taken into account and works as expected.
Template Parameters
Anda::blas_lapack::BlasArray<2> type.
IPIVnda::blas_lapack::PivotArrayFor<A, 1> type.
Wnda::blas_lapack::BlasArrayFor<A, 1> type.
Parameters
aInput/output matrix. On entry, the \( m \times n \) matrix to be factored. On exit, the factors \(\mathbf{L} \) and \( \mathbf{U} \) from the factorization \( \mathbf{A} = \mathbf{P L U} \); the unit diagonal elements of \( \mathbf{L} \) are not stored.
ipivOutput vector. The pivot indices, i.e. for \( 1 \leq i \leq \min(m,n) \), row \( i \) of the matrix was interchanged with row ipiv(i-1).
workOuput vector. Workspace array only used by the cuSOLVER routine.
Returns
Integer return code from the LAPACK/cuSOLVER call.

Definition at line 61 of file getrf.hpp.

◆ getrf_batch()

template<BlasArray< 3 > A, PivotArrayFor< A, 2 > IPIV, BlasArrayFor< A, 1 > W = vector_value_t<A>>
requires (has_F_layout<A> or has_C_layout<A>)
auto nda::lapack::getrf_batch ( A && a,
IPIV && ipiv,
W && work = vector_value_t<A>{} )

#include <nda/lapack/getrf_batch.hpp>

Interface to batched versions of the LAPACK/cuSOLVER getrf routine.

This function computes LU factorizations

\[ \mathbf{A}_i = \mathbf{P}_i \mathbf{L}_i \mathbf{U}_i \; , \]

for a batch of matrices indexed by \( i \in \{ 0, \ldots, N_b - 1 \} \). Here, \( N_b \) is the batch size. See also nda::lapack::getrf.

A batch of matrices is just a 3-dimensional array in either nda::C_layout or nda::F_layout. For a Fortran/C layout array, the last/first dimension indexes the individual matrices such that A(:,:,i)/A(i,:,:) corresponds to the \( i \)-th matrix \( \mathbf{A}_i \) in the batch.

Depending on the input array types, the function does the following:

Note
If \( \mathbf{A} \) is stored in nda::C_layout, the factorizations are actually performed on \(\mathbf{A}_i^T \). When the result is further used in nda::lapack::getrs_batch or nda::lapack::getri_batch, this is automatically taken into account and works as expected.
Template Parameters
Anda::blas_lapack::BlasArray<3> type.
IPIVnda::blas_lapack::PivotArrayFor<A, 2> type.
Wnda::blas_lapack::BlasArrayFor<A, 1> type.
Parameters
aInput/output array. On entry, the 3-dimensional array \( \mathbf{A} \) containing the matrices \(\mathbf{A}_i \) to be factored. On exit, the corresponding \( \mathbf{L}_i \) and \( \mathbf{U}_i \) matrices from the factorization.
ipivOutput matrix. If the matrix is in Fortran (C) layout, the \( i \)-th column (row) contains the pivot indices from the factorization of \( \mathbf{A}_i \).
workOuput vector. Workspace array only used by the cuSOLVER routine.
Returns
nda::array of integer return codes from the LAPACK/cuBLAS/cuSOLVER call(s).

Definition at line 123 of file getrf_batch.hpp.

◆ getri()

template<BlasArray< 2 > A, PivotArrayFor< A, 1 > IPIV, BlasArrayFor< A, 1 > W = vector_value_t<A>>
int nda::lapack::getri ( A && a,
IPIV const & ipiv,
W && work = vector_value_t<A>{} )

#include <nda/lapack/getri.hpp>

Interface to the LAPACK getri routine.

Computes the inverse of an \( n \times n \) matrix \( \mathbf{A} \) using the LU factorization computed by nda::lapack::getrf.

This method inverts \( \mathbf{U} \) and then computes \( \mathbf{A}^{-1} \) by solving the system \(\mathbf{A}^{-1} L = \mathbf{U}^{-1} \) for \( \mathbf{A}^{-1} \).

If the input arrays satisfy nda::mem::have_device_compatible_addr_space, a custom implementation that builds an \( N \times N \) identity matrix in the workspace and calls cuSOLVER ?getrs is used.

Template Parameters
Anda::blas_lapack::BlasArray<2> type.
IPIVnda::blas_lapack::PivotArrayFor<A, 1> type.
Wnda::blas_lapack::BlasArrayFor<A, 1> type.
Parameters
aInput/output matrix. On entry, the factors \( \mathbf{L} \) and \( \mathbf{U} \) from the factorization \( \mathbf{A} = \mathbf{P L U} \) as computed by nda::lapack::getrf. On exit \( \mathbf{A}^{-1} \), the inverse of the original matrix \( \mathbf{A} \).
ipivInput vector. The pivot indices from nda::lapack::getrf, i.e. for \( 1 \leq i \leq n \), row \( i \) of the matrix was interchanged with row ipiv(i-1).
workOuput vector. Workspace array used by the LAPACK routine.
Returns
Integer return code from the LAPACK call.

Definition at line 53 of file getri.hpp.

◆ getri_batch()

template<BlasArray< 3 > A, PivotArrayFor< A, 2 > IPIV, BlasArrayFor< A, 1 > W = vector_value_t<A>>
requires (has_F_layout<A> or has_C_layout<A>)
auto nda::lapack::getri_batch ( A && a,
IPIV const & ipiv,
W && work = vector_value_t<A>{} )

#include <nda/lapack/getri_batch.hpp>

Interface to batched versions of the LAPACK/cuSOLVER getri routine.

Computes the inverse of a batch of matrices indexed by \( i \in \{ 0, \ldots, N_b - 1 \} \). Here, \( N_b \) is the batch size. See also nda::lapack::getri.

A batch of matrices is just a 3-dimensional array in either nda::C_layout or nda::F_layout. For a Fortran/C layout array, the last/first dimension indexes the individual matrices such that A(:,:,i)/A(i,:,:) corresponds to the \( i \)-th matrix \( \mathbf{A}_i \) in the batch.

Depending on the input array types, the function does the following:

Note
\( \mathbf{A} \) is required to have nda::F_layout or nda::C_layout.
Template Parameters
Anda::blas_lapack::BlasArray<3> type.
IPIVnda::blas_lapack::PivotArrayFor<A, 2> type.
Wnda::blas_lapack::BlasArrayFor<A, 1> type.
Parameters
aInput/output array. On entry, the 3-dimensional array \( \mathbf{A} \) containing LU factorized matrices \( \mathbf{A}_i \) as computed by nda::lapack::getrf_batch. On exit, the corresponding inverse matrices \(\mathbf{A}_i^{-1} \).
ipivInput matrix. The pivot indices from nda::lapack::getrf_batch. If the matrix is in Fortran (C) layout, the \( i \)-th column (row) contains the pivot indices from the factorization of \( \mathbf{A}_i \).
workOuput vector. Workspace array used by the LAPACK/cuBLAS routine.
Returns
nda::array of integer return codes from the LAPACK/cuBLAS call(s).

Definition at line 111 of file getri_batch.hpp.

◆ getrs()

template<BlasArrayOrConj< 2 > A, BlasArrayFor< A > B, PivotArrayFor< A, 1 > IPIV>
requires ((get_rank<B> == 1 or get_rank<B> == 2) and has_F_layout<B>)
int nda::lapack::getrs ( A const & a,
B && b,
IPIV const & ipiv )

#include <nda/lapack/getrs.hpp>

Interface to the LAPACK/cuSOLVER getrs routine.

Solves a system of linear equations

  • \( \mathbf{A} \mathbf{X} = \mathbf{B} \) or
  • \( \mathbf{A} \mathbf{x} = \mathbf{b} \),

with a general \( n \times n \) matrix \( \mathbf{A} \) and either \( n \times n_{\mathrm{rhs}} \) matrices \( \mathbf{X} \) and \( \mathbf{B} \) or vectors \( \mathbf{x} \) and \( \mathbf{b} \) of size \( n \).

If the input arrays satisfy nda::mem::have_device_compatible_addr_space, the cuSOLVER implementation is used.

Note
\( \mathbf{A} \) is allowed to be a lazy conjugate expression (see nda::blas_lapack::is_conj_array_expr), in which case it is required to be in nda::C_layout. \( \mathbf{B} \) is required to be in nda::F_layout.
Template Parameters
Anda::blas_lapack::BlasArrayOrConj<2> type.
Bnda::blas_lapack::BlasArrayFor type.
IPIVnda::blas_lapack::PivotArrayFor<A, 1> type.
Parameters
aInput matrix. The factors \( \mathbf{L} \) and \( \mathbf{U} \) from the factorization \( \mathbf{A} = \mathbf{P L U} \) as computed by nda::lapack::getrf.
bInput/output matrix/vector. On entry, the right hand side matrix \( \mathbf{B} \) or vector \(\mathbf{b} \). On exit, the solution matrix \( \mathbf{X} \) or vector \( \mathbf{x} \).
ipivInput vector. The pivot indices from nda::lapack::getrf, i.e. for \( 1 \leq i \leq n \), row \( i \) of the matrix was interchanged with row ipiv(i-1).
Returns
Integer return code from the LAPACK call.

Definition at line 56 of file getrs.hpp.

◆ getrs_batch()

template<BlasArrayOrConj< 3 > A, BlasArrayFor< A, 3 > B, PivotArrayFor< A, 2 > IPIV>
requires ((has_F_layout<A> or has_C_layout<A>) and has_F_layout<B> and (not is_conj_array_expr<A> or has_C_layout<A>))
int nda::lapack::getrs_batch ( A const & a,
B && b,
IPIV const & ipiv )

#include <nda/lapack/getrs_batch.hpp>

Interface to batched versions of the LAPACK/cuSOLVER getrs routine.

This function solves systems of linear equations

\[ \mathbf{A}_i \mathbf{X}_i = \mathbf{B}_i \; , \]

for a batch of matrices indexed by \( i \in \{ 0, \ldots, N_b - 1 \} \). Here, \( N_b \) is the batch size. See also nda::lapack::getrs.

A batch of matrices is just a 3-dimensional array in either nda::C_layout or nda::F_layout. For a Fortran/C layout array, the last/first dimension indexes the individual matrices such that A(:,:,i)/A(i,:,:) corresponds to the \( i \)-th matrix \( \mathbf{A}_i \) in the batch.

Depending on the input array types, the function does the following:

Note
\( \mathbf{A} \) is allowed to be a lazy conjugate expression (see nda::blas_lapack::is_conj_array_expr), in which case it is required to be in nda::C_layout. Otherwise, it must have nda::F_layout or nda::C_layout. \(\mathbf{B} \) is required to be in nda::F_layout.
Template Parameters
Anda::blas_lapack::BlasArrayOrConj<3> type.
Bnda::blas_lapack::BlasArrayFor<A, 3> type.
IPIVnda::blas_lapack::PivotArrayFor<A, 2> type.
Parameters
aInput array. The 3-dimensional array \( \mathbf{A} \) containing LU factorized matrices \( \mathbf{A}_i \) as computed by nda::lapack::getrf_batch
bInput/output array. On entry, the 3-dimensional array \( \mathbf{B} \) containing the right hand side matrices \( \mathbf{B}_i \). On exit, it contains the corresponding solution matrices \( \mathbf{X}_i \).
ipivInput matrix. The pivot indices from nda::lapack::getrf_batch. If the matrix is in Fortran (C) layout, the \( i \)-th column (row) contains the pivot indices from the factorization of \( \mathbf{A}_i \).
Returns
Integer return code from the batched LAPACK/cuBLAS call(s). If zero, all calls were successful.

Definition at line 114 of file getrs_batch.hpp.

◆ ggev() [1/2]

template<BlasArrayCplx< 2 > A, BlasArrayFor< A, 2 > B, BlasArrayFor< A, 1 > A2, BlasArrayFor< A, 1 > B2, BlasArrayFor< A, 2 > VL, BlasArrayFor< A, 2 > VR, BlasArrayFor< A, 1 > W1 = vector_value_t<A>, BlasArrayRealFor< A, 1 > W2 = vector_fp_t<A>>
requires (mem::have_host_compatible_addr_space<A> and has_F_layout<A, B, VL, VR>)
int nda::lapack::ggev ( A && a,
B && b,
A2 && alpha,
B2 && beta,
VL && vl,
VR && vr,
char jobvl = 'N',
char jobvr = 'V',
W1 && work = vector_value_t<A>{},
W2 && rwork = vector_fp_t<A>{} )

#include <nda/lapack/ggev.hpp>

Interface to the LAPACK ggev routine for complex matrices.

Computes the generalized eigenvalues \( \lambda_j = \alpha_j / \beta_j \) and, optionally, right/left eigenvectors \( \mathbf{v}_j \)/ \( \mathbf{u}_j \) of a complex generalized eigenvalue problem.

The right eigenvector \( \mathbf{v}_j \) satisfies

\[ \mathbf{A} \mathbf{v}_j = \lambda_j \mathbf{B} \mathbf{v}_j \; , \]

whereas the left eigenvector \( \mathbf{u}_j \) satisfies

\[ \mathbf{u}_j^H \mathbf{A} = \lambda_j \mathbf{u}_j^H \mathbf{B} \; . \]

Here, \( \mathbf{u}_j^H \) denotes the conjugate-transpose of \( \mathbf{u}_j \).

The eigenvalues are stored as complex pairs \( \alpha_j \) and \( \beta_j \). If \( \beta_j \neq 0 \), the eigenvalue is \( \lambda_j = \alpha_j / \beta_j \).

The quotient \( \alpha_j / \beta_j \) may easily over- or underflow, and \( \beta_j \) may even be zero. Thus, the user should avoid naively computing the ratio. However, \( \alpha_j \) will be always less than and usually comparable with \( ||A|| \) in magnitude, and \( \beta_j \) always less than and usually comparable with \(||B|| \).

The computed eigenvectors are scaled so that their largest components \( z \) satisfy \( |\mathrm{Re}(z)| + |\mathrm{Im}(z)| = 1 \).

Note
All input arrays are required to satisfy nda::mem::have_host_compatible_addr_space and all input matrices are required to have nda::F_layout.
Template Parameters
Anda::blas_lapack::BlasArrayCplx<2> type.
Bnda::blas_lapack::BlasArrayFor<A, 2> type.
A2nda::blas_lapack::BlasArrayFor<A, 1> type.
B2nda::blas_lapack::BlasArrayFor<A, 1> type.
VLnda::blas_lapack::BlasArrayFor<A, 2> type.
VRnda::blas_lapack::BlasArrayFor<A, 2> type.
W1nda::blas_lapack::BlasArrayFor<A, 1> type.
W2nda::blas_lapack::BlasArrayRealFor<A, 1> type.
Parameters
aInput/output matrix. On entry, the matrix \( \mathbf{A} \). On exit, \( \mathbf{A} \) is overwritten.
bInput/output matrix. On entry, the matrix \( \mathbf{B} \). On exit, \( \mathbf{B} \) is overwritten.
alphaOutput vector \( \boldsymbol{\alpha} \). The numerators of the computed eigenvalues, i.e. \(\alpha_j \).
betaOutput vector \( \boldsymbol{\beta} \). The denominators of the computed eigenvalues, i.e. \(\beta_j \).
vlOutput matrix \( \mathbf{V}_L \). If jobvl = V, the matrix contains the left eigenvectors. If jobvl = N, \( \mathbf{V}_L \) is not referenced.
vrOutput matrix \( \mathbf{V}_R \). If jobvr = V, the matrix contains the right eigenvectors. If jobvr = N, \( \mathbf{V}_R \) is not referenced.
jobvlCharacter indicating whether to compute left eigenvectors ('V') or not ('N').
jobvrCharacter indicating whether to compute right eigenvectors ('V') or not ('N').
workOutput vector. Workspace array used by the LAPACK routine.
rworkOutput vector. Workspace array used by the LAPACK routine.
Returns
Integer return code from the LAPACK call.

Definition at line 202 of file ggev.hpp.

◆ ggev() [2/2]

template<BlasArrayReal< 2 > A, BlasArrayFor< A, 2 > B, BlasArrayFor< A, 1 > AR, BlasArrayFor< A, 1 > AI, BlasArrayFor< A, 1 > B2, BlasArrayFor< A, 2 > VL, BlasArrayFor< A, 2 > VR, BlasArrayFor< A, 1 > W1 = vector_value_t<A>>
requires (mem::have_host_compatible_addr_space<A> and has_F_layout<A, B, VL, VR>)
int nda::lapack::ggev ( A && a,
B && b,
AR && alphar,
AI && alphai,
B2 && beta,
VL && vl,
VR && vr,
char jobvl = 'N',
char jobvr = 'V',
W1 && work = vector_value_t<A>{} )

#include <nda/lapack/ggev.hpp>

Interface to the LAPACK ggev routine for real matrices.

Computes the generalized eigenvalues \( \lambda_j = \alpha_j / \beta_j \) and, optionally, right/left eigenvectors \( \mathbf{v}_j \)/ \( \mathbf{u}_j \) of a real generalized eigenvalue problem.

The right eigenvector \( \mathbf{v}_j \) satisfies

\[ \mathbf{A} \mathbf{v}_j = \lambda_j \mathbf{B} \mathbf{v}_j \; , \]

whereas the left eigenvector \( \mathbf{u}_j \) satisfies

\[ \mathbf{u}_j^T \mathbf{A} = \lambda_j \mathbf{u}_j^T \mathbf{B} \; . \]

Here, \( \mathbf{u}_j^T \) denotes the transpose of \( \mathbf{u}_j \).

The eigenvalues are stored as real triples \( \alpha^{(r)}_j \), \( \alpha^{(i)}_j \) and \( \beta_j \) such that \( \alpha_j = \alpha^{(r)}_j + i \alpha^{(i)}_j \). If \( \alpha^{(i)}_j = 0 \) and \( \beta_j \neq 0 \), then the eigenvalue is real.

The quotients \( \alpha^{(r)}_j / \beta_j \) and \( \alpha^{(i)}_j / \beta_j \) may easily over- or underflow, and \( \beta_j \) may even be zero. Thus, the user should avoid naively computing the ratios. However, \(\alpha^{(r)}_j \) and \( \alpha^{(i)}_j \) will be always less than and usually comparable with \( ||A|| \) in magnitude, and \( \beta_j \) always less than and usually comparable with \( ||B|| \).

The computed eigenvectors are scaled so that their largest components \( z \) satisfy \( |\mathrm{Re}(z)| + |\mathrm{Im}(z)| = 1 \).

For real matrices, complex eigenvalues always occur in complex conjugate pairs and the corresponding eigenvectors are stored in a special packed format (see nda::linalg::unpack_eigenvectors).

Note
All input arrays are required to satisfy nda::mem::have_host_compatible_addr_space and all input matrices are required to have nda::F_layout.
Template Parameters
Anda::blas_lapack::BlasArrayReal<2> type.
Bnda::blas_lapack::BlasArrayFor<A, 2> type.
ARnda::blas_lapack::BlasArrayFor<A, 1> type.
AInda::blas_lapack::BlasArrayFor<A, 1> type.
B2nda::blas_lapack::BlasArrayFor<A, 1> type.
VLnda::blas_lapack::BlasArrayFor<A, 2> type.
VRnda::blas_lapack::BlasArrayFor<A, 2> type.
W1nda::blas_lapack::BlasArrayFor<A, 1> type.
Parameters
aInput/output matrix. On entry, the matrix \( \mathbf{A} \). On exit, \( \mathbf{A} \) is overwritten.
bInput/output matrix. On entry, the matrix \( \mathbf{B} \). On exit, \( \mathbf{B} \) is overwritten.
alpharOutput vector \( \boldsymbol{\alpha}^{(r)} \). The real parts of the numerator of the computed eigenvalues, i.e. \( \alpha^{(r)}_j = \mathrm{Re}(\alpha_j) \).
alphaiOutput vector \( \boldsymbol{\alpha}^{(i)} \). The imaginary parts of the numerator of the computed eigenvalues, i.e. \( \alpha^{(i)}_j = \mathrm{Im}(\alpha_j) \).
betaOutput vector \( \boldsymbol{\beta} \). The denominators of the computed eigenvalues, i.e. \(\beta_j \).
vlOutput matrix \( \mathbf{V}_L \). If jobvl = V, the matrix contains the left eigenvectors (in packed format for complex pairs). If jobvl = N, \( \mathbf{V}_L \) is not referenced.
vrOutput matrix \( \mathbf{V}_R \). If jobvr = V, the matrix contains the right eigenvectors (in packed format for complex pairs). If jobvr = N, \( \mathbf{V}_R \) is not referenced.
jobvlCharacter indicating whether to compute left eigenvectors ('V') or not ('N').
jobvrCharacter indicating whether to compute right eigenvectors ('V') or not ('N').
workOutput vector. Workspace array used by the LAPACK routine.
Returns
Integer return code from the LAPACK call.

Definition at line 92 of file ggev.hpp.

◆ gtsv()

template<BlasArray< 1 > DL, BlasArrayFor< DL, 1 > D, BlasArrayFor< DL, 1 > DU, BlasArrayFor< DL > B>
requires (mem::have_host_compatible_addr_space<DL> and (get_rank<B> == 1 or get_rank<B> == 2) and has_F_layout<B>)
int nda::lapack::gtsv ( DL && dl,
D && d,
DU && du,
B && b )

#include <nda/lapack/gtsv.hpp>

Interface to the LAPACK gtsv routine.

Solves a system of linear equations

  • \( \mathbf{A} \mathbf{X} = \mathbf{B} \) or
  • \( \mathbf{A} \mathbf{x} = \mathbf{b} \),

with a tridiagonal \( n \times n \) matrix \( \mathbf{A} \) and either \( n \times n_{\mathrm{rhs}} \) matrices \( \mathbf{X} \) and \( \mathbf{B} \) or vectors \( \mathbf{x} \) and \( \mathbf{b} \) of size \( n \). It uses Gaussian elimination with partial pivoting.

Note
All input arrays are required to satisfy nda::mem::have_host_compatible_addr_space and \( \mathbf{B} \) is required to have nda::F_layout.
Template Parameters
DLnda::blas_lapack::BlasArray<1> type.
Dnda::blas_lapack::BlasArrayFor<DL, 1> type.
DUnda::blas_lapack::BlasArrayFor<DL, 1> type.
Bnda::blas_lapack::BlasArrayFor<DL> type.
Parameters
dlInput/Output vector. On entry, it must contain the \( n - 1 \) subdiagonal elements of \( \mathbf{A} \). On exit, it is overwritten by the \( n - 2 \) elements of the second superdiagonal of the upper triangular matrix \( \mathbf{U} \) from the LU factorization of \( \mathbf{A} \).
dInput/Output vector. On entry, it must contain the diagonal elements of \( \mathbf{A} \). On exit, it is overwritten by the \( n \) diagonal elements of \( \mathbf{U} \).
duInput/Output vector. On entry, it must contain the \( n - 1 \) superdiagonal elements of \( \mathbf{A} \). On exit, it is overwritten by the \( n - 1 \) elements of the first superdiagonal of \( \mathbf{U} \) .
bInput/Output array. On entry, the \( n \times n_{\mathrm{rhs}} \) right hand side matrix \( \mathbf{B} \) or the vector \( \mathbf{b} \). On exit, the \( n \times n_{\mathrm{rhs}} \) solution matrix \( \mathbf{X} \) or the vector \( \mathbf{x} \).
Returns
Integer return code from the LAPACK call.

Definition at line 57 of file gtsv.hpp.

◆ heev()

template<BlasArrayCplx< 2 > A, BlasArrayRealFor< A, 1 > W, BlasArrayFor< A, 1 > W1 = vector_value_t<A>, BlasArrayRealFor< A, 1 > W2 = vector_fp_t<A>>
requires (mem::have_host_compatible_addr_space<A> and has_F_layout<A>)
int nda::lapack::heev ( A && a,
W && w,
char jobz = 'V',
W1 && work = vector_value_t<A>{},
W2 && rwork = vector_fp_t<A>{} )

#include <nda/lapack/heev.hpp>

Interface to the LAPACK heev routine.

Computes all eigenvalues \( \lambda_i \) and, optionally, eigenvectors \( \mathbf{v}_i \) of a complex hermitian matrix eigenvalue problem of the form

\[ \mathbf{A} \mathbf{v}_i = \lambda_i \mathbf{v}_i \; , \]

for a given complex hermitian matrix \( \mathbf{A} \).

Note
All input arrays are required to satisfy nda::mem::have_host_compatible_addr_space and \( \mathbf{A} \) is required to have nda::F_layout.
Template Parameters
Anda::blas_lapack::BlasArrayCplx<2> type.
Wnda::blas_lapack::BlasArrayRealFor<A, 1> type.
W1nda::blas_lapack::BlasArrayFor<A, 1> type.
W2nda::blas_lapack::BlasArrayRealFor<A, 1> type.
Parameters
aInput/output matrix. On entry, the hermitian matrix \( \mathbf{A} \). On exit, if jobz = V, \(\mathbf{A} \) contains the orthonormal eigenvectors \( \mathbf{v}_i \) of the matrix \( \mathbf{A} \). If jobz = N, then on exit \( \mathbf{A} \) is destroyed.
wOutput vector. The eigenvalues \( \lambda_i \) in ascending order.
jobzCharacter indicating whether to compute eigenvectors and eigenvalues ('V') or eigenvalues only ('N').
workOuput vector. Workspace array used by the LAPACK routine.
rworkOuput vector. Workspace array used by the LAPACK routine.
Returns
Integer return code from the LAPACK call.

Definition at line 59 of file heev.hpp.

◆ hegv()

template<BlasArrayCplx< 2 > A, BlasArrayFor< A, 2 > B, BlasArrayRealFor< A, 1 > W, BlasArrayFor< A, 1 > W1 = vector_value_t<A>, BlasArrayRealFor< A, 1 > W2 = vector_fp_t<A>>
requires (mem::have_host_compatible_addr_space<A> and has_F_layout<A, B>)
int nda::lapack::hegv ( A && a,
B && b,
W && w,
char jobz = 'V',
int itype = 1,
W1 && work = vector_value_t<A>{},
W2 && rwork = vector_fp_t<A>{} )

#include <nda/lapack/hegv.hpp>

Interface to the LAPACK hegv routine.

Computes all eigenvalues \( \lambda_i \) and, optionally, eigenvectors \( \mathbf{v}_i \) of a complex generalized Hermitian-definite eigenvalue problem of the form

  • \( \mathbf{A} \mathbf{v}_i = \lambda_i \mathbf{B} \mathbf{v}_i \) (itype = 1),
  • \( \mathbf{A} \mathbf{B} \mathbf{v}_i = \lambda_i \mathbf{v}_i \) (itype = 2) or
  • \( \mathbf{B} \mathbf{A} \mathbf{v}_i = \lambda_i \mathbf{v}_i \) (itype = 3).

Here \( \mathbf{A} \) and \( \mathbf{B} \) are assumed to be Hermitian and \( \mathbf{B} \) is also positive definite.

Note
All input arrays are required to satisfy nda::mem::have_host_compatible_addr_space and \( \mathbf{A} \) and \( \mathbf{B} \) are required to have nda::F_layout.
Template Parameters
Anda::blas_lapack::BlasArrayCplx<2> type.
Bnda::blas_lapack::BlasArrayFor<A, 2> type.
Wnda::blas_lapack::BlasArrayRealFor<A, 1> type.
W1nda::blas_lapack::BlasArrayFor<A, 1> type.
W2nda::blas_lapack::BlasArrayRealFor<A, 1> type.
Parameters
aInput/output matrix. On entry, the Hermitian matrix \( \mathbf{A} \). On exit, if jobz = V, \(\mathbf{A} \) contains the matrix \( \mathbf{V} \) of normalized eigenvectors such that \( \mathbf{V}^H \mathbf{B} \mathbf{V} = \mathbf{I} \) (if itype = 1 or itype = 2) or \( \mathbf{V}^H \mathbf{B}^{-1} \mathbf{V} = \mathbf{I} \) (if itype = 3). If jobz = N, then on exit \( \mathbf{A} \) is destroyed.
bInput/output matrix. On entry, the hermitian, positive definite matrix \( \mathbf{B} \). On exit, the part of \( \mathbf{B} \) containing the matrix is overwritten by the triangular factor \( \mathbf{U} \) or \( \mathbf{L} \) from a Cholesky factorization.
wOutput vector. The eigenvalues \( \lambda_i \) in ascending order.
jobzCharacter indicating whether to compute eigenvectors and eigenvalues ('V') or eigenvalues only ('N').
itypeSpecifies the problem to be solved.
workOuput vector. Workspace array used by the LAPACK routine.
rworkOuput vector. Workspace array used by the LAPACK routine.
Returns
Integer return code from the LAPACK call.

Definition at line 69 of file hegv.hpp.

◆ orgqr()

template<BlasArrayReal< 2 > A, BlasArrayFor< A, 1 > TAU, BlasArrayFor< A, 1 > W = vector_value_t<A>>
requires (has_F_layout<A>)
int nda::lapack::orgqr ( A && a,
TAU && tau,
W && work = vector_value_t<A>{} )

#include <nda/lapack/orgqr.hpp>

Interface to the LAPACK/cuSOLVER orgqr routine.

Generates an \( m \times n \) real matrix \( \mathbf{Q} \) with orthonormal columns, which is defined as the first \( n \) columns of a product of \( k \) elementary reflectors of order \( m \)

\[ \mathbf{Q} = \mathbf{H}(1) \mathbf{H}(2) \ldots \mathbf{H}(k) \; , \]

as returned by nda::lapack::geqp3 or nda::lapack::geqrf.

Each \( \mathbf{H}(i) \) has the form

\[ \mathbf{H}(i) = \mathbf{I} - \tau_i * \mathbf{v}_i \mathbf{v}_i^T \]

where \( \tau_i \) is a real scalar, and \( \mathbf{v}_i \) is a real vector with

  • elements \( 1 \) to \( i - 1 \) equal to 0,
  • element \( i \) equal to 1 and
  • elements \( i + 1 \) to \( m \) stored in the elements \( i + 1 \) to \( m \) in column \( i \) of matrix \( \mathbf{A} \).

If the input arrays satisfy nda::mem::have_device_compatible_addr_space, the cuSOLVER implementation is used.

Note
\( \mathbf{A} \) is required to have nda::F_layout.
Template Parameters
Anda::blas_lapack::BlasArrayReal<2> type.
TAUnda::blas_lapack::BlasArrayFor<A, 1> type.
Wnda::blas_lapack::BlasArrayFor<A, 1> type.
Parameters
aInput/output matrix. On entry, the ith column must contain the vector which defines the elementary reflector \( H(i) \; , i = 1,2,...,k \), as returned by nda::lapack::geqp3 or nda::lapack::geqrf. On exit, the \( m \times n \) matrix \( \mathbf{Q} \).
tauInput vector. \( \tau_i \) must contain the scalar factor of the elementary reflector \(\mathbf{H}(i) \), as returned by nda::lapack::geqp3 or nda::lapack::geqrf.
workOuput vector. Workspace array used by the LAPACK/cuSOLVER routine.
Returns
Integer return code from the LAPACK/cuSOLVER call.

Definition at line 66 of file orgqr.hpp.

◆ orgqr_batch()

template<BlasArrayReal< 3 > A, BlasArrayFor< A, 2 > TAU, BlasArrayFor< A, 1 > W = vector_value_t<A>>
requires (has_F_layout<A, TAU>)
int nda::lapack::orgqr_batch ( A && a,
TAU && tau,
W && work = vector_value_t<A>{} )

#include <nda/lapack/orgqr_batch.hpp>

Interface to batched versions of the LAPACK/cuSOLVER orgqr routine.

This function generates the orthogonal matrix \( \mathbf{Q}_i \) for each matrix in a batch indexed by \( i \in \{ 0, \ldots, N_b - 1 \} \) from the elementary reflectors and scalar factors produced by nda::lapack::geqrf_batch. Here, \( N_b \) is the batch size. See also nda::lapack::orgqr.

A batch of matrices is just a 3-dimensional array in nda::F_layout where the last dimension indexes the individual matrices such that A(:,:,i) corresponds to the \( i \)-th matrix \( \mathbf{A}_i \) in the batch.

No library-level batching is currently available for this function. Instead, the function simply loops over all matrices in the batch and calls nda::lapack::orgqr on the first \( \min(m, n) \) columns of each slice. For wide matrices ( \( m < n \)), this produces an \( m \times m \) orthogonal matrix in the first \( m \) columns of each slice; the remaining columns are left untouched.

Note
\( \mathbf{A} \) and \( \mathbf{T} \) are required to have nda::F_layout.
Template Parameters
Anda::blas_lapack::BlasArrayReal<3> type.
TAUnda::blas_lapack::BlasArrayFor<A, 2> type.
Wnda::blas_lapack::BlasArrayFor<A, 1> type.
Parameters
aInput/output array. On entry, the 3-dimensional array containing the elementary reflectors for each batch (as returned by nda::lapack::geqrf_batch). On exit, the first \( \min(m, n) \) columns of each slice contain \( \mathbf{Q}_i \).
tauInput matrix. The \( i \)-th column contains the scalar factors of the elementary reflectors representing \( \mathbf{Q}_i \).
workWorkspace array used by the underlying single-matrix call (resized as needed).
Returns
First non-zero LAPACK/cuSOLVER info code observed across the batch (0 on success).

Definition at line 53 of file orgqr_batch.hpp.

◆ syev()

template<BlasArrayReal< 2 > A, BlasArrayFor< A, 1 > W, BlasArrayFor< A, 1 > W1 = vector_value_t<A>>
requires (mem::have_host_compatible_addr_space<A> and has_F_layout<A>)
int nda::lapack::syev ( A && a,
W && w,
char jobz = 'V',
W1 && work = vector_value_t<A>{} )

#include <nda/lapack/syev.hpp>

Interface to the LAPACK syev routine.

Computes all eigenvalues \( \lambda_i \) and, optionally, eigenvectors \( \mathbf{v}_i \) of a real symmetric eigenvalue problem of the form

\[ \mathbf{A} \mathbf{v}_i = \lambda_i \mathbf{v}_i \; , \]

for a given real symmetric matrix \( \mathbf{A} \).

Note
All input arrays are required to satisfy nda::mem::have_host_compatible_addr_space and \( \mathbf{A} \) is required to have nda::F_layout.
Template Parameters
Anda::blas_lapack::BlasArrayReal<2> type.
Wnda::blas_lapack::BlasArrayFor<A, 1> type.
W1nda::blas_lapack::BlasArrayFor<A, 1> type.
Parameters
aInput/output matrix. On entry, the symmetric matrix \( \mathbf{A} \). On exit, if jobz = V, \(\mathbf{A} \) contains the orthonormal eigenvectors \( \mathbf{v}_i \) of the matrix \( \mathbf{A} \). If jobz = N, then on exit \( \mathbf{A} \) is destroyed.
wOutput vector. The eigenvalues \( \lambda_i \) in ascending order.
jobzCharacter indicating whether to compute eigenvectors and eigenvalues ('V') or eigenvalues only ('N').
workOuput vector. Workspace array used by the LAPACK routine.
Returns
Integer return code from the LAPACK call.

Definition at line 55 of file syev.hpp.

◆ sygv()

template<BlasArrayReal< 2 > A, BlasArrayFor< A, 2 > B, BlasArrayFor< A, 1 > W, BlasArrayFor< A, 1 > W1 = vector_value_t<A>>
requires (mem::have_host_compatible_addr_space<A> and has_F_layout<A, B>)
int nda::lapack::sygv ( A && a,
B && b,
W && w,
char jobz = 'V',
int itype = 1,
W1 && work = vector_value_t<A>{} )

#include <nda/lapack/sygv.hpp>

Interface to the LAPACK sygv routine.

Computes all eigenvalues \( \lambda_i \) and, optionally, eigenvectors \( \mathbf{v}_i \) of a real generalized symmetric-definite eigenvalue problem of the form

  • \( \mathbf{A} \mathbf{v}_i = \lambda_i \mathbf{B} \mathbf{v}_i \) (itype = 1),
  • \( \mathbf{A} \mathbf{B} \mathbf{v}_i = \lambda_i \mathbf{v}_i \) (itype = 2) or
  • \( \mathbf{B} \mathbf{A} \mathbf{v}_i = \lambda_i \mathbf{v}_i \) (itype = 3).

Here \( \mathbf{A} \) and \( \mathbf{B} \) are assumed to be symmetric and \( \mathbf{B} \) is also positive definite.

Note
All input arrays are required to satisfy nda::mem::have_host_compatible_addr_space and \( \mathbf{A} \) and \( \mathbf{B} \) are required to have nda::F_layout.
Template Parameters
Anda::blas_lapack::BlasArrayReal<2> type.
Bnda::blas_lapack::BlasArrayFor<A, 2> type.
Wnda::blas_lapack::BlasArrayFor<A, 1> type.
W1nda::blas_lapack::BlasArrayFor<A, 1> type.
Parameters
aInput/output matrix. On entry, the symmetric matrix \( \mathbf{A} \). On exit, if jobz = V, \(\mathbf{A} \) contains the matrix \( \mathbf{V} \) of normalized eigenvectors such that \( \mathbf{V}^T \mathbf{B} \mathbf{V} = \mathbf{I} \) (if itype = 1 or itype = 2) or \( \mathbf{V}^T \mathbf{B}^{-1} \mathbf{V} = \mathbf{I} \) (if itype = 3). If jobz = N, then on exit \( \mathbf{A} \) is destroyed.
bInput/output matrix. On entry, the symmetric, positive definite matrix \( \mathbf{B} \). On exit, the part of \( \mathbf{B} \) containing the matrix is overwritten by the triangular factor \( \mathbf{U} \) or \( \mathbf{L} \) from a Cholesky factorization.
wOutput vector. The eigenvalues \( \lambda_i \) in ascending order.
jobzCharacter indicating whether to compute eigenvectors and eigenvalues ('V') or eigenvalues only ('N').
itypeSpecifies the problem to be solved.
workOuput vector. Workspace array used by the LAPACK routine.
Returns
Integer return code from the LAPACK call.

Definition at line 64 of file sygv.hpp.

◆ ungqr()

template<BlasArrayCplx< 2 > A, BlasArrayFor< A, 1 > TAU, BlasArrayFor< A, 1 > W = vector_value_t<A>>
requires (has_F_layout<A>)
int nda::lapack::ungqr ( A && a,
TAU && tau,
W && work = vector_value_t<A>{} )

#include <nda/lapack/ungqr.hpp>

Interface to the LAPACK/cuSOLVER ungqr routine.

Generates an \( m \times n \) complex matrix \( \mathbf{Q} \) with orthonormal columns, which is defined as the first \( n \) columns of a product of \( k \) elementary reflectors of order \( m \)

\[ \mathbf{Q} = \mathbf{H}(1) \mathbf{H}(2) \ldots \mathbf{H}(k) \; , \]

as returned by nda::lapack::geqp3 or nda::lapack::geqrf.

Each \( \mathbf{H}(i) \) has the form

\[ \mathbf{H}(i) = \mathbf{I} - \tau_i * \mathbf{v}_i \mathbf{v}_i^H \]

where \( \tau_i \) is a complex scalar, and \( \mathbf{v}_i \) is a complex vector with

  • elements \( 1 \) to \( i - 1 \) equal to 0,
  • element \( i \) equal to 1 and
  • elements \( i + 1 \) to \( m \) stored in the elements \( i + 1 \) to \( m \) in column \( i \) of matrix \( \mathbf{A} \).

If the input arrays satisfy nda::mem::have_device_compatible_addr_space, the cuSOLVER implementation is used.

Note
\( \mathbf{A} \) is required to have nda::F_layout.
Template Parameters
Anda::blas_lapack::BlasArrayCplx<2> type.
TAUnda::blas_lapack::BlasArrayFor<A, 1> type.
Wnda::blas_lapack::BlasArrayFor<A, 1> type.
Parameters
aInput/output matrix. On entry, the ith column must contain the vector which defines the elementary reflector \( H(i) \; , i = 1,2,...,k \), as returned by nda::lapack::geqp3 or nda::lapack::geqrf. On exit, the \( m \times n \) matrix \( \mathbf{Q} \).
tauInput vector. \( \tau_i \) must contain the scalar factor of the elementary reflector \(\mathbf{H}(i) \), as returned by nda::lapack::geqp3 or nda::lapack::geqrf.
workOuput vector. Workspace array used by the LAPACK/cuSOLVER routine.
Returns
Integer return code from the LAPACK/cuSOLVER call.

Definition at line 67 of file ungqr.hpp.

◆ ungqr_batch()

template<BlasArrayCplx< 3 > A, BlasArrayFor< A, 2 > TAU, BlasArrayFor< A, 1 > W = vector_value_t<A>>
requires (has_F_layout<A, TAU>)
int nda::lapack::ungqr_batch ( A && a,
TAU && tau,
W && work = vector_value_t<A>{} )

#include <nda/lapack/ungqr_batch.hpp>

Interface to batched versions of the LAPACK/cuSOLVER ungqr routine.

This function generates the unitary matrix \( \mathbf{Q}_i \) for each matrix in a batch indexed by \( i \in \{ 0, \ldots, N_b - 1 \} \) from the elementary reflectors and scalar factors produced by nda::lapack::geqrf_batch. Here, \( N_b \) is the batch size. See also nda::lapack::ungqr.

A batch of matrices is just a 3-dimensional array in nda::F_layout where the last dimension indexes the individual matrices such that A(:,:,i) corresponds to the \( i \)-th matrix \( \mathbf{A}_i \) in the batch.

No library-level batching is currently available for this function. Instead, the function simply loops over all matrices in the batch and calls nda::lapack::ungqr on the first \( \min(m, n) \) columns of each slice. For wide matrices ( \( m < n \)), this produces an \( m \times m \) unitary matrix in the first \( m \) columns of each slice; the remaining columns are left untouched.

Note
\( \mathbf{A} \) and \( \mathbf{T} \) are required to have nda::F_layout.
Template Parameters
Anda::blas_lapack::BlasArrayCplx<3> type.
TAUnda::blas_lapack::BlasArrayFor<A, 2> type.
Wnda::blas_lapack::BlasArrayFor<A, 1> type.
Parameters
aInput/output array. On entry, the 3-dimensional array containing the elementary reflectors for each batch (as returned by nda::lapack::geqrf_batch). On exit, the first \( \min(m, n) \) columns of each slice contain \( \mathbf{Q}_i \).
tauInput matrix. The \( i \)-th column contains the scalar factors of the elementary reflectors representing \( \mathbf{Q}_i \).
workWorkspace array used by the underlying single-matrix call (resized as needed).
Returns
First non-zero LAPACK/cuSOLVER info code observed across the batch (0 on success).

Definition at line 53 of file ungqr_batch.hpp.