|
TRIQS/nda 2.0.0
Multi-dimensional array library for C++
|
Low-level interface to parts of the LAPACK/cuSOLVER library.
Classes | |
| class | nda::lapack::gelss_worker< T > |
| Worker class for solving linear least squares problems. More... | |
| class | nda::lapack::gelss_worker_hermitian |
| Specialized worker class for solving linear least squares problems while enforcing a certain hermitian symmetry. More... | |
Functions | |
| template<BlasArrayCplx< 2 > A, BlasArrayFor< A, 1 > W, BlasArrayFor< A, 2 > VL, BlasArrayFor< A, 2 > VR, BlasArrayFor< A, 1 > W1 = vector_value_t<A>, BlasArrayRealFor< A, 1 > W2 = vector_fp_t<A>> requires (mem::have_host_compatible_addr_space<A> and has_F_layout<A, VL, VR>) | |
| int | nda::lapack::geev (A &&a, W &&w, VL &&vl, VR &&vr, char jobvl='N', char jobvr='V', W1 &&work=vector_value_t< A >{}, W2 &&rwork=vector_fp_t< A >{}) |
| Interface to the LAPACK geev routine for complex matrices. | |
| template<BlasArrayReal< 2 > A, BlasArrayFor< A, 1 > WR, BlasArrayFor< A, 1 > WI, BlasArrayFor< A, 2 > VL, BlasArrayFor< A, 2 > VR, BlasArrayFor< A, 1 > W1 = vector_value_t<A>> requires (mem::have_host_compatible_addr_space<A> and has_F_layout<A, VL, VR>) | |
| int | nda::lapack::geev (A &&a, WR &&wr, WI &&wi, VL &&vl, VR &&vr, char jobvl='N', char jobvr='V', W1 &&work=vector_value_t< A >{}) |
| Interface to the LAPACK geev routine for real matrices. | |
| template<BlasArray< 2 > A, BlasArrayFor< A > B, BlasArrayRealFor< A, 1 > S, BlasArrayFor< A, 1 > W1 = vector_value_t<A>, BlasArrayRealFor< A, 1 > W2 = vector_fp_t<A>> requires (mem::have_host_compatible_addr_space<A> and (get_rank<B> == 1 or get_rank<B> == 2) and has_F_layout<A, B>) | |
| int | nda::lapack::gelss (A &&a, B &&b, S &&s, get_fp_t< A > rcond, int &rank, W1 &&work=vector_value_t< A >{}, W2 &&rwork=vector_fp_t< A >{}) |
| Interface to the LAPACK gelss routine. | |
| template<BlasArray< 2 > A, PivotArrayFor< A, 1 > JPVT, BlasArrayFor< A, 1 > TAU, BlasArrayFor< A, 1 > W1 = vector_value_t<A>, BlasArrayRealFor< A, 1 > W2 = vector_fp_t<A>> requires (mem::have_host_compatible_addr_space<A> and has_F_layout<A>) | |
| int | nda::lapack::geqp3 (A &&a, JPVT &&jpvt, TAU &&tau, W1 &&work=vector_value_t< A >{}, W2 &&rwork=vector_fp_t< A >{}) |
| Interface to the LAPACK geqp3 routine. | |
| template<BlasArray< 2 > A, BlasArrayFor< A, 1 > TAU, BlasArrayFor< A, 1 > W = vector_value_t<A>> requires (has_F_layout<A>) | |
| int | nda::lapack::geqrf (A &&a, TAU &&tau, W &&work=vector_value_t< A >{}) |
| Interface to the LAPACK/cuSOLVER geqrf routine. | |
| template<BlasArray< 3 > A, BlasArrayFor< A, 2 > TAU, BlasArrayFor< A, 1 > W = vector_value_t<A>> requires (has_F_layout<A, TAU>) | |
| int | nda::lapack::geqrf_batch (A &&a, TAU &&tau, W &&work=vector_value_t< A >{}) |
| Interface to batched versions of the LAPACK/cuSOLVER geqrf routine. | |
| template<BlasArray< 2 > A, BlasArrayRealFor< A, 1 > S, BlasArrayFor< A, 2 > U, BlasArrayFor< A, 2 > VH, BlasArrayFor< A, 1 > W1 = vector_value_t<A>, BlasArrayRealFor< A, 1 > W2 = vector_fp_t<A>> requires (has_C_layout<A> == has_C_layout<U> and has_C_layout<A> == has_C_layout<VH>) | |
| int | nda::lapack::gesvd (A &&a, S &&s, U &&u, VH &&vh, W1 &&work=vector_value_t< A >{}, W2 &&rwork=vector_fp_t< A >{}) |
| Interface to the LAPACK/cuSOLVER gesvd routine. | |
| template<BlasArray< 2 > A, PivotArrayFor< A, 1 > IPIV, BlasArrayFor< A, 1 > W = vector_value_t<A>> | |
| int | nda::lapack::getrf (A &&a, IPIV &&ipiv, W &&work=vector_value_t< A >{}) |
| Interface to the LAPACK/cuSOLVER getrf routine. | |
| template<BlasArray< 3 > A, PivotArrayFor< A, 2 > IPIV, BlasArrayFor< A, 1 > W = vector_value_t<A>> requires (has_F_layout<A> or has_C_layout<A>) | |
| auto | nda::lapack::getrf_batch (A &&a, IPIV &&ipiv, W &&work=vector_value_t< A >{}) |
| Interface to batched versions of the LAPACK/cuSOLVER getrf routine. | |
| template<BlasArray< 2 > A, PivotArrayFor< A, 1 > IPIV, BlasArrayFor< A, 1 > W = vector_value_t<A>> | |
| int | nda::lapack::getri (A &&a, IPIV const &ipiv, W &&work=vector_value_t< A >{}) |
| Interface to the LAPACK getri routine. | |
| template<BlasArray< 3 > A, PivotArrayFor< A, 2 > IPIV, BlasArrayFor< A, 1 > W = vector_value_t<A>> requires (has_F_layout<A> or has_C_layout<A>) | |
| auto | nda::lapack::getri_batch (A &&a, IPIV const &ipiv, W &&work=vector_value_t< A >{}) |
| Interface to batched versions of the LAPACK/cuSOLVER getri routine. | |
| template<BlasArrayOrConj< 2 > A, BlasArrayFor< A > B, PivotArrayFor< A, 1 > IPIV> requires ((get_rank<B> == 1 or get_rank<B> == 2) and has_F_layout<B>) | |
| int | nda::lapack::getrs (A const &a, B &&b, IPIV const &ipiv) |
| Interface to the LAPACK/cuSOLVER getrs routine. | |
| template<BlasArrayOrConj< 3 > A, BlasArrayFor< A, 3 > B, PivotArrayFor< A, 2 > IPIV> requires ((has_F_layout<A> or has_C_layout<A>) and has_F_layout<B> and (not is_conj_array_expr<A> or has_C_layout<A>)) | |
| int | nda::lapack::getrs_batch (A const &a, B &&b, IPIV const &ipiv) |
| Interface to batched versions of the LAPACK/cuSOLVER getrs routine. | |
| template<BlasArrayCplx< 2 > A, BlasArrayFor< A, 2 > B, BlasArrayFor< A, 1 > A2, BlasArrayFor< A, 1 > B2, BlasArrayFor< A, 2 > VL, BlasArrayFor< A, 2 > VR, BlasArrayFor< A, 1 > W1 = vector_value_t<A>, BlasArrayRealFor< A, 1 > W2 = vector_fp_t<A>> requires (mem::have_host_compatible_addr_space<A> and has_F_layout<A, B, VL, VR>) | |
| int | nda::lapack::ggev (A &&a, B &&b, A2 &&alpha, B2 &&beta, VL &&vl, VR &&vr, char jobvl='N', char jobvr='V', W1 &&work=vector_value_t< A >{}, W2 &&rwork=vector_fp_t< A >{}) |
| Interface to the LAPACK ggev routine for complex matrices. | |
| template<BlasArrayReal< 2 > A, BlasArrayFor< A, 2 > B, BlasArrayFor< A, 1 > AR, BlasArrayFor< A, 1 > AI, BlasArrayFor< A, 1 > B2, BlasArrayFor< A, 2 > VL, BlasArrayFor< A, 2 > VR, BlasArrayFor< A, 1 > W1 = vector_value_t<A>> requires (mem::have_host_compatible_addr_space<A> and has_F_layout<A, B, VL, VR>) | |
| int | nda::lapack::ggev (A &&a, B &&b, AR &&alphar, AI &&alphai, B2 &&beta, VL &&vl, VR &&vr, char jobvl='N', char jobvr='V', W1 &&work=vector_value_t< A >{}) |
| Interface to the LAPACK ggev routine for real matrices. | |
|
template<BlasArray< 2 > A, BlasArrayFor< A, 1 > TAU, BlasArrayFor< A, 1 > W = vector_value_t<A>> requires (has_F_layout<A>) | |
| int | nda::lapack::gqr (A &&a, TAU &&tau, W &&work=vector_value_t< A >{}) |
| Dispatcher to nda::lapack::orgqr for real value types and to nda::lapack::ungqr for complex value types. | |
| template<BlasArray< 1 > DL, BlasArrayFor< DL, 1 > D, BlasArrayFor< DL, 1 > DU, BlasArrayFor< DL > B> requires (mem::have_host_compatible_addr_space<DL> and (get_rank<B> == 1 or get_rank<B> == 2) and has_F_layout<B>) | |
| int | nda::lapack::gtsv (DL &&dl, D &&d, DU &&du, B &&b) |
| Interface to the LAPACK gtsv routine. | |
| template<BlasArrayCplx< 2 > A, BlasArrayRealFor< A, 1 > W, BlasArrayFor< A, 1 > W1 = vector_value_t<A>, BlasArrayRealFor< A, 1 > W2 = vector_fp_t<A>> requires (mem::have_host_compatible_addr_space<A> and has_F_layout<A>) | |
| int | nda::lapack::heev (A &&a, W &&w, char jobz='V', W1 &&work=vector_value_t< A >{}, W2 &&rwork=vector_fp_t< A >{}) |
| Interface to the LAPACK heev routine. | |
| template<BlasArrayCplx< 2 > A, BlasArrayFor< A, 2 > B, BlasArrayRealFor< A, 1 > W, BlasArrayFor< A, 1 > W1 = vector_value_t<A>, BlasArrayRealFor< A, 1 > W2 = vector_fp_t<A>> requires (mem::have_host_compatible_addr_space<A> and has_F_layout<A, B>) | |
| int | nda::lapack::hegv (A &&a, B &&b, W &&w, char jobz='V', int itype=1, W1 &&work=vector_value_t< A >{}, W2 &&rwork=vector_fp_t< A >{}) |
| Interface to the LAPACK hegv routine. | |
| template<BlasArrayReal< 2 > A, BlasArrayFor< A, 1 > TAU, BlasArrayFor< A, 1 > W = vector_value_t<A>> requires (has_F_layout<A>) | |
| int | nda::lapack::orgqr (A &&a, TAU &&tau, W &&work=vector_value_t< A >{}) |
| Interface to the LAPACK/cuSOLVER orgqr routine. | |
| template<BlasArrayReal< 3 > A, BlasArrayFor< A, 2 > TAU, BlasArrayFor< A, 1 > W = vector_value_t<A>> requires (has_F_layout<A, TAU>) | |
| int | nda::lapack::orgqr_batch (A &&a, TAU &&tau, W &&work=vector_value_t< A >{}) |
| Interface to batched versions of the LAPACK/cuSOLVER orgqr routine. | |
| template<BlasArrayReal< 2 > A, BlasArrayFor< A, 1 > W, BlasArrayFor< A, 1 > W1 = vector_value_t<A>> requires (mem::have_host_compatible_addr_space<A> and has_F_layout<A>) | |
| int | nda::lapack::syev (A &&a, W &&w, char jobz='V', W1 &&work=vector_value_t< A >{}) |
| Interface to the LAPACK syev routine. | |
| template<BlasArrayReal< 2 > A, BlasArrayFor< A, 2 > B, BlasArrayFor< A, 1 > W, BlasArrayFor< A, 1 > W1 = vector_value_t<A>> requires (mem::have_host_compatible_addr_space<A> and has_F_layout<A, B>) | |
| int | nda::lapack::sygv (A &&a, B &&b, W &&w, char jobz='V', int itype=1, W1 &&work=vector_value_t< A >{}) |
| Interface to the LAPACK sygv routine. | |
| template<BlasArrayCplx< 2 > A, BlasArrayFor< A, 1 > TAU, BlasArrayFor< A, 1 > W = vector_value_t<A>> requires (has_F_layout<A>) | |
| int | nda::lapack::ungqr (A &&a, TAU &&tau, W &&work=vector_value_t< A >{}) |
| Interface to the LAPACK/cuSOLVER ungqr routine. | |
| template<BlasArrayCplx< 3 > A, BlasArrayFor< A, 2 > TAU, BlasArrayFor< A, 1 > W = vector_value_t<A>> requires (has_F_layout<A, TAU>) | |
| int | nda::lapack::ungqr_batch (A &&a, TAU &&tau, W &&work=vector_value_t< A >{}) |
| Interface to batched versions of the LAPACK/cuSOLVER ungqr routine. | |
| int nda::lapack::geev | ( | A && | a, |
| W && | w, | ||
| VL && | vl, | ||
| VR && | vr, | ||
| char | jobvl = 'N', | ||
| char | jobvr = 'V', | ||
| W1 && | work = vector_value_t<A>{}, | ||
| W2 && | rwork = vector_fp_t<A>{} ) |
#include <nda/lapack/geev.hpp>
Interface to the LAPACK geev routine for complex matrices.
Computes all eigenvalues \( \lambda_j \) and, optionally, right/left eigenvectors \( \mathbf{v}_j \)/ \( \mathbf{u}_j \) of a complex eigenvalue problem.
The right eigenvector \( \mathbf{v}_j \) satisfies
\[ \mathbf{A} \mathbf{v}_j = \lambda_j \mathbf{v}_j \; , \]
whereas the left eigenvector \( \mathbf{u}_j \) satisfies
\[ \mathbf{u}_j^H \mathbf{A} = \lambda_j \mathbf{u}_j^H \; . \]
Here, \( \mathbf{u}_j^H \) denotes the conjugate-transpose of \( \mathbf{u}_j \).
The computed eigenvectors are normalized to have Euclidean norm equal to 1 and largest component real.
| a | Input/output matrix. On entry, the matrix \( \mathbf{A} \). On exit, \( \mathbf{A} \) is overwritten. |
| w | Output vector \( \mathbf{w} \). The computed eigenvalues, i.e. \( w_j = \lambda_j \). |
| vl | Output matrix \( \mathbf{V}_L \). If jobvl = V, the matrix contains the left eigenvectors. If jobvl = N, \( \mathbf{V}_L \) is not referenced. |
| vr | Output matrix \( \mathbf{V}_R \). If jobvr = V, the matrix contains the right eigenvectors. If jobvr = N, \( \mathbf{V}_R \) is not referenced. |
| jobvl | Character indicating whether to compute left eigenvectors ('V') or not ('N'). |
| jobvr | Character indicating whether to compute right eigenvectors ('V') or not ('N'). |
| work | Ouput vector. Workspace array used by the LAPACK routine. |
| rwork | Ouput vector. Workspace array used by the LAPACK routine. |
| int nda::lapack::geev | ( | A && | a, |
| WR && | wr, | ||
| WI && | wi, | ||
| VL && | vl, | ||
| VR && | vr, | ||
| char | jobvl = 'N', | ||
| char | jobvr = 'V', | ||
| W1 && | work = vector_value_t<A>{} ) |
#include <nda/lapack/geev.hpp>
Interface to the LAPACK geev routine for real matrices.
Computes all eigenvalues \( \lambda_j \) and, optionally, right/left eigenvectors \( \mathbf{v}_j \)/ \( \mathbf{u}_j \) of a real eigenvalue problem.
The right eigenvector \( \mathbf{v}_j \) satisfies
\[ \mathbf{A} \mathbf{v}_j = \lambda_j \mathbf{v}_j \; , \]
whereas the left eigenvector \( \mathbf{u}_j \) satisfies
\[ \mathbf{u}_j^T \mathbf{A} = \lambda_j \mathbf{u}_j^T \; . \]
Here, \( \mathbf{u}_j^T \) denotes the transpose of \( \mathbf{u}_j \).
The computed eigenvectors are normalized to have Euclidean norm equal to 1 and largest component real.
For real matrices, complex eigenvalues always occur in complex conjugate pairs and the corresponding eigenvectors are stored in a special packed format (see nda::linalg::unpack_eigenvectors).
| a | Input/output matrix. On entry, the matrix \( \mathbf{A} \). On exit, \( \mathbf{A} \) is overwritten. |
| wr | Output vector \( \mathbf{w}^{(r)} \). The real parts of the computed eigenvalues, i.e. \( w_j^{(r)} = \mathrm{Re}(\lambda_j) \). |
| wi | Output vector \( \mathbf{w}^{(i)} \). The imaginary parts of the computed eigenvalues, i.e. \(w_j^{(i)} = \mathrm{Im}(\lambda_j) \). |
| vl | Output matrix \( \mathbf{V}_L \). If jobvl = V, the matrix contains the left eigenvectors (in packed format for complex pairs). If jobvl = N, \( \mathbf{V}_L \) is not referenced. |
| vr | Output matrix \( \mathbf{V}_R \). If jobvr = V, the matrix contains the right eigenvectors (in packed format for complex pairs). If jobvr = N, \( \mathbf{V}_R \) is not referenced. |
| jobvl | Character indicating whether to compute left eigenvectors ('V') or not ('N'). |
| jobvr | Character indicating whether to compute right eigenvectors ('V') or not ('N'). |
| work | Ouput vector. Workspace array used by the LAPACK routine. |
| int nda::lapack::gelss | ( | A && | a, |
| B && | b, | ||
| S && | s, | ||
| get_fp_t< A > | rcond, | ||
| int & | rank, | ||
| W1 && | work = vector_value_t<A>{}, | ||
| W2 && | rwork = vector_fp_t<A>{} ) |
#include <nda/lapack/gelss.hpp>
Interface to the LAPACK gelss routine.
Computes the minimum norm solution to a linear least squares problem:
\[ \min_{\mathbf{x}} | \mathbf{b} - \mathbf{A x} |_2 \]
using the singular value decomposition (SVD) of \( \mathbf{A} \), an \( m \times n \) matrix which may be rank-deficient.
Several right hand side vectors \( \mathbf{b} \) and solution vectors \( \mathbf{x} \) can be handled in a single call; they are stored as the columns of the \( m \times n_{\mathrm{rhs}} \) right hand side matrix \(\mathbf{B} \) and the \( n \times n_{\mathrm{rhs}} \) solution matrix \( \mathbf{X} \).
The effective rank of \( \mathbf{A} \) is determined by treating as zero those singular values which are less than \( r_{\mathrm{cond}} \) times the largest singular value.
| a | Input/output matrix. On entry, the \( m \times n \) matrix \( \mathbf{A} \). On exit, the first \(\min(m,n) \) rows of \( \mathbf{A} \) are overwritten with its right singular vectors, stored rowwise. |
| b | Input/output array. On entry, the \( m \times n_{\mathrm{rhs}} \) right hand side matrix \( \mathbf{B} \) or vector \( \mathbf{b} \). On exit, it is overwritten by the \( n \times n_{\mathrm{rhs}} \) solution matrix \( \mathbf{X} \) or vector \( \mathbf{x} \). If \( m \geq n \) and if the effective rank is equal \(n \), the residual sum-of-squares for the solution in the ith column is given by the sum of squares of the modulus of elements \( n + 1 \) to \( m \) in that column. |
| s | Output vector. The singular values of \( \mathbf{A} \) in decreasing order. The condition number of \(\mathbf{A} \) in the 2-norm is \( s_1 / s_{min(m,n)} \). |
| rcond | It is used to determine the effective rank of \( \mathbf{A} \). Singular values \( s_i \leq r_{\mathrm{cond}} s_1 \) are treated as zero. If \( r_{\mathrm{cond}} < 0 \), machine precision is used instead. |
| rank | Output variable. The effective rank of \( \mathbf{A} \), i.e. the number of singular values which are greater than \( r_{\mathrm{cond}} s_1 \). |
| work | Ouput vector. Workspace array used by the LAPACK routine. |
| rwork | Ouput vector. Workspace array used by the LAPACK routine. |
| int nda::lapack::geqp3 | ( | A && | a, |
| JPVT && | jpvt, | ||
| TAU && | tau, | ||
| W1 && | work = vector_value_t<A>{}, | ||
| W2 && | rwork = vector_fp_t<A>{} ) |
#include <nda/lapack/geqp3.hpp>
Interface to the LAPACK geqp3 routine.
Computes a QR factorization of a matrix \( \mathbf{A} \) with column pivoting:
\[ \mathbf{A P} = \mathbf{Q R} \]
using Level 3 BLAS.
The matrix \( \mathbf{Q} \) is represented as a product of elementary reflectors
\[ \mathbf{Q} = \mathbf{H}(1) \mathbf{H}(2) \ldots \mathbf{H}(k) \; , \]
where \( k = \min(m,n) \).
Each \( \mathbf{H}(i) \) has the form
\[ \mathbf{H}(i) = \mathbf{I} - \tau_i * \mathbf{v}_i \mathbf{v}_i^H \]
where \( \tau_i \) is a real/complex scalar, and \( \mathbf{v}_i \) is a real/complex vector with
| A | nda::blas_lapack::BlasArray<2> type. |
| JPVT | nda::blas_lapack::PivotArrayFor<A, 1> type. |
| TAU | nda::blas_lapack::BlasArrayFor<A, 1> type. |
| W1 | nda::blas_lapack::BlasArrayFor<A, 1> type. |
| W2 | nda::blas_lapack::BlasArrayRealFor<A, 1> type. |
| a | Input/output matrix. On entry, the \( m \times n \) matrix \( \mathbf{A} \). On exit, the upper triangle of the array contains the \( \min(m,n) \times n \) upper trapezoidal matrix \( \mathbf{R} \); the elements below the diagonal, together with the array \( \mathbf{\tau} \), represent the unitary matrix \(\mathbf{Q} \) as a product of \( \min(m,n) \) elementary reflectors. |
| jpvt | Input/output vector. On entry, if the jth element is \( \neq 0 \), the jth column of \( \mathbf{A} \) is permuted to the front of \( \mathbf{A P} \) (a leading column); if the jth element is equal 0, the jth column of \( \mathbf{A} \) is a free column. On exit, if the jth element is equal \( k \), then the jth column of \( \mathbf{A P} \) was the kth column of \( \mathbf{A} \). |
| tau | Output vector. The scalar factors \( \tau_i \) of the elementary reflectors \( \mathbf{H}(i) \). |
| work | Ouput vector. Workspace array used by the LAPACK routine. |
| rwork | Ouput vector. Workspace array used by the LAPACK routine. |
| int nda::lapack::geqrf | ( | A && | a, |
| TAU && | tau, | ||
| W && | work = vector_value_t<A>{} ) |
#include <nda/lapack/geqrf.hpp>
Interface to the LAPACK/cuSOLVER geqrf routine.
Computes a QR factorization of a matrix \( \mathbf{A} \):
\[ \mathbf{A} = \mathbf{Q R} \; . \]
The matrix \( \mathbf{Q} \) is represented as a product of elementary reflectors
\[ \mathbf{Q} = \mathbf{H}(1) \mathbf{H}(2) \ldots \mathbf{H}(k) \; , \]
where \( k = \min(m,n) \).
Each \( \mathbf{H}(i) \) has the form
\[ \mathbf{H}(i) = \mathbf{I} - \tau_i * \mathbf{v}_i \mathbf{v}_i^H \]
where \( \tau_i \) is a real/complex scalar, and \( \mathbf{v}_i \) is a real/complex vector with
If the input arrays satisfy nda::mem::have_device_compatible_addr_space, the cuSOLVER implementation is used.
| A | nda::blas_lapack::BlasArray<2> type. |
| TAU | nda::blas_lapack::BlasArrayFor<A, 1> type. |
| W | nda::blas_lapack::BlasArrayFor<A, 1> type. |
| a | Input/output matrix. On entry, the \( m \times n \) matrix \( \mathbf{A} \). On exit, the upper triangle of the array contains the \( \min(m,n) \times n \) upper trapezoidal matrix \( \mathbf{R} \); the elements below the diagonal, together with the array \( \mathbf{\tau} \), represent the unitary matrix \(\mathbf{Q} \) as a product of \( \min(m,n) \) elementary reflectors. |
| tau | Output vector. The scalar factors \( \tau_i \) of the elementary reflectors \( \mathbf{H}(i) \). |
| work | Ouput vector. Workspace array used by the LAPACK/cuSOLVER routine. |
| int nda::lapack::geqrf_batch | ( | A && | a, |
| TAU && | tau, | ||
| W && | work = vector_value_t<A>{} ) |
#include <nda/lapack/geqrf_batch.hpp>
Interface to batched versions of the LAPACK/cuSOLVER geqrf routine.
This function computes a QR factorization
\[ \mathbf{A}_i = \mathbf{Q}_i \mathbf{R}_i \; , \]
for a batch of matrices indexed by \( i \in \{ 0, \ldots, N_b - 1 \} \). Here, \( N_b \) is the batch size. See also nda::lapack::geqrf.
A batch of matrices is just a 3-dimensional array in nda::F_layout where the last dimension indexes the individual matrices such that A(:,:,i) corresponds to the \( i \)-th matrix \( \mathbf{A}_i \) in the batch.
Depending on the input array types, the function does the following:
| A | nda::blas_lapack::BlasArray<3> type. |
| TAU | nda::blas_lapack::BlasArrayFor<A, 2> type. |
| W | nda::blas_lapack::BlasArrayFor<A, 1> type. |
| a | Input/output array. On entry, the 3-dimensional array \( \mathbf{A} \) containing the matrices \(\mathbf{A}_i \) to be factored. On exit, the corresponding upper trapezoidal matrices \( \mathbf{R}_i \) and the elementary reflectors representing \( \mathbf{Q}_i \). |
| tau | Output matrix \( \mathbf{T} \). The \( i \)-th column contains the scalar factors of the elementary reflectors representing \( \mathbf{Q}_i \). |
| work | Ouput vector. Workspace array only used by the LAPACK routine. |
Definition at line 67 of file geqrf_batch.hpp.
| int nda::lapack::gesvd | ( | A && | a, |
| S && | s, | ||
| U && | u, | ||
| VH && | vh, | ||
| W1 && | work = vector_value_t<A>{}, | ||
| W2 && | rwork = vector_fp_t<A>{} ) |
#include <nda/lapack/gesvd.hpp>
Interface to the LAPACK/cuSOLVER gesvd routine.
Computes the singular value decomposition (SVD) of an \( m \times n \) matrix \( \mathbf{A} \). The SVD is written as
\[ \mathbf{A} = \mathbf{U} \mathbf{S} \mathbf{V}^H \; , \]
where \( \mathbf{S} \) is an \( m \times n \) matrix which is zero except for its \( \min(m,n) \) diagonal elements, \( \mathbf{U} \) is an \( m \times m \) unitary matrix, and \( \mathbf{V} \) is an \( n \times n \) unitary matrix. The diagonal elements of \( \mathbf{S} \) are the singular values of \( \mathbf{A} \); they are real and non-negative, and are returned in descending order. The first \( min(m,n) \) columns of \(\mathbf{U} \) and \( \mathbf{V} \) are the left and right singular vectors of \( \mathbf{A} \).
If the input arrays satisfy nda::mem::have_device_compatible_addr_space, the cuSOLVER implementation is used.
| a | Input/output matrix. On entry, the \( m \times n \) matrix \( \mathbf{A} \). On exit, the contents of \( \mathbf{A} \) are destroyed. |
| s | Output vector. The singular values of \( \mathbf{A} \), sorted so that \( s_i \geq s_{i+1} \). |
| u | Output matrix. It contains the \( m \times m \) unitary matrix \( \mathbf{U} \). |
| vh | Output matrix. It contains the \( n \times n \) unitary matrix \( \mathbf{V}^H \). |
| work | Ouput vector. Workspace array used by the LAPACK/cuSOLVER routine. |
| rwork | Output vector. Workspace array used by the LAPACK/cuSOLVER routine. |
| int nda::lapack::getrf | ( | A && | a, |
| IPIV && | ipiv, | ||
| W && | work = vector_value_t<A>{} ) |
#include <nda/lapack/getrf.hpp>
Interface to the LAPACK/cuSOLVER getrf routine.
Computes an LU factorization of a general \( m \times n \) matrix \( \mathbf{A} \) using partial pivoting with row interchanges.
The factorization has the form
\[ \mathbf{A} = \mathbf{P L U} \]
where \( \mathbf{P} \) is a permutation matrix, \( \mathbf{L} \) is lower triangular with unit diagonal elements (lower trapezoidal if \( m > n \)), and \( \mathbf{U} \) is upper triangular (upper trapezoidal if \(m < n \)).
If the input arrays satisfy nda::mem::have_device_compatible_addr_space, the cuSOLVER implementation is used.
| A | nda::blas_lapack::BlasArray<2> type. |
| IPIV | nda::blas_lapack::PivotArrayFor<A, 1> type. |
| W | nda::blas_lapack::BlasArrayFor<A, 1> type. |
| a | Input/output matrix. On entry, the \( m \times n \) matrix to be factored. On exit, the factors \(\mathbf{L} \) and \( \mathbf{U} \) from the factorization \( \mathbf{A} = \mathbf{P L U} \); the unit diagonal elements of \( \mathbf{L} \) are not stored. |
| ipiv | Output vector. The pivot indices, i.e. for \( 1 \leq i \leq \min(m,n) \), row \( i \) of the matrix was interchanged with row ipiv(i-1). |
| work | Ouput vector. Workspace array only used by the cuSOLVER routine. |
| auto nda::lapack::getrf_batch | ( | A && | a, |
| IPIV && | ipiv, | ||
| W && | work = vector_value_t<A>{} ) |
#include <nda/lapack/getrf_batch.hpp>
Interface to batched versions of the LAPACK/cuSOLVER getrf routine.
This function computes LU factorizations
\[ \mathbf{A}_i = \mathbf{P}_i \mathbf{L}_i \mathbf{U}_i \; , \]
for a batch of matrices indexed by \( i \in \{ 0, \ldots, N_b - 1 \} \). Here, \( N_b \) is the batch size. See also nda::lapack::getrf.
A batch of matrices is just a 3-dimensional array in either nda::C_layout or nda::F_layout. For a Fortran/C layout array, the last/first dimension indexes the individual matrices such that A(:,:,i)/A(i,:,:) corresponds to the \( i \)-th matrix \( \mathbf{A}_i \) in the batch.
Depending on the input array types, the function does the following:
| A | nda::blas_lapack::BlasArray<3> type. |
| IPIV | nda::blas_lapack::PivotArrayFor<A, 2> type. |
| W | nda::blas_lapack::BlasArrayFor<A, 1> type. |
| a | Input/output array. On entry, the 3-dimensional array \( \mathbf{A} \) containing the matrices \(\mathbf{A}_i \) to be factored. On exit, the corresponding \( \mathbf{L}_i \) and \( \mathbf{U}_i \) matrices from the factorization. |
| ipiv | Output matrix. If the matrix is in Fortran (C) layout, the \( i \)-th column (row) contains the pivot indices from the factorization of \( \mathbf{A}_i \). |
| work | Ouput vector. Workspace array only used by the cuSOLVER routine. |
Definition at line 123 of file getrf_batch.hpp.
| int nda::lapack::getri | ( | A && | a, |
| IPIV const & | ipiv, | ||
| W && | work = vector_value_t<A>{} ) |
#include <nda/lapack/getri.hpp>
Interface to the LAPACK getri routine.
Computes the inverse of an \( n \times n \) matrix \( \mathbf{A} \) using the LU factorization computed by nda::lapack::getrf.
This method inverts \( \mathbf{U} \) and then computes \( \mathbf{A}^{-1} \) by solving the system \(\mathbf{A}^{-1} L = \mathbf{U}^{-1} \) for \( \mathbf{A}^{-1} \).
If the input arrays satisfy nda::mem::have_device_compatible_addr_space, a custom implementation that builds an \( N \times N \) identity matrix in the workspace and calls cuSOLVER ?getrs is used.
| A | nda::blas_lapack::BlasArray<2> type. |
| IPIV | nda::blas_lapack::PivotArrayFor<A, 1> type. |
| W | nda::blas_lapack::BlasArrayFor<A, 1> type. |
| a | Input/output matrix. On entry, the factors \( \mathbf{L} \) and \( \mathbf{U} \) from the factorization \( \mathbf{A} = \mathbf{P L U} \) as computed by nda::lapack::getrf. On exit \( \mathbf{A}^{-1} \), the inverse of the original matrix \( \mathbf{A} \). |
| ipiv | Input vector. The pivot indices from nda::lapack::getrf, i.e. for \( 1 \leq i \leq n \), row \( i \) of the matrix was interchanged with row ipiv(i-1). |
| work | Ouput vector. Workspace array used by the LAPACK routine. |
| auto nda::lapack::getri_batch | ( | A && | a, |
| IPIV const & | ipiv, | ||
| W && | work = vector_value_t<A>{} ) |
#include <nda/lapack/getri_batch.hpp>
Interface to batched versions of the LAPACK/cuSOLVER getri routine.
Computes the inverse of a batch of matrices indexed by \( i \in \{ 0, \ldots, N_b - 1 \} \). Here, \( N_b \) is the batch size. See also nda::lapack::getri.
A batch of matrices is just a 3-dimensional array in either nda::C_layout or nda::F_layout. For a Fortran/C layout array, the last/first dimension indexes the individual matrices such that A(:,:,i)/A(i,:,:) corresponds to the \( i \)-th matrix \( \mathbf{A}_i \) in the batch.
Depending on the input array types, the function does the following:
| A | nda::blas_lapack::BlasArray<3> type. |
| IPIV | nda::blas_lapack::PivotArrayFor<A, 2> type. |
| W | nda::blas_lapack::BlasArrayFor<A, 1> type. |
| a | Input/output array. On entry, the 3-dimensional array \( \mathbf{A} \) containing LU factorized matrices \( \mathbf{A}_i \) as computed by nda::lapack::getrf_batch. On exit, the corresponding inverse matrices \(\mathbf{A}_i^{-1} \). |
| ipiv | Input matrix. The pivot indices from nda::lapack::getrf_batch. If the matrix is in Fortran (C) layout, the \( i \)-th column (row) contains the pivot indices from the factorization of \( \mathbf{A}_i \). |
| work | Ouput vector. Workspace array used by the LAPACK/cuBLAS routine. |
Definition at line 111 of file getri_batch.hpp.
| int nda::lapack::getrs | ( | A const & | a, |
| B && | b, | ||
| IPIV const & | ipiv ) |
#include <nda/lapack/getrs.hpp>
Interface to the LAPACK/cuSOLVER getrs routine.
Solves a system of linear equations
with a general \( n \times n \) matrix \( \mathbf{A} \) and either \( n \times n_{\mathrm{rhs}} \) matrices \( \mathbf{X} \) and \( \mathbf{B} \) or vectors \( \mathbf{x} \) and \( \mathbf{b} \) of size \( n \).
If the input arrays satisfy nda::mem::have_device_compatible_addr_space, the cuSOLVER implementation is used.
| A | nda::blas_lapack::BlasArrayOrConj<2> type. |
| B | nda::blas_lapack::BlasArrayFor type. |
| IPIV | nda::blas_lapack::PivotArrayFor<A, 1> type. |
| a | Input matrix. The factors \( \mathbf{L} \) and \( \mathbf{U} \) from the factorization \( \mathbf{A} = \mathbf{P L U} \) as computed by nda::lapack::getrf. |
| b | Input/output matrix/vector. On entry, the right hand side matrix \( \mathbf{B} \) or vector \(\mathbf{b} \). On exit, the solution matrix \( \mathbf{X} \) or vector \( \mathbf{x} \). |
| ipiv | Input vector. The pivot indices from nda::lapack::getrf, i.e. for \( 1 \leq i \leq n \), row \( i \) of the matrix was interchanged with row ipiv(i-1). |
| int nda::lapack::getrs_batch | ( | A const & | a, |
| B && | b, | ||
| IPIV const & | ipiv ) |
#include <nda/lapack/getrs_batch.hpp>
Interface to batched versions of the LAPACK/cuSOLVER getrs routine.
This function solves systems of linear equations
\[ \mathbf{A}_i \mathbf{X}_i = \mathbf{B}_i \; , \]
for a batch of matrices indexed by \( i \in \{ 0, \ldots, N_b - 1 \} \). Here, \( N_b \) is the batch size. See also nda::lapack::getrs.
A batch of matrices is just a 3-dimensional array in either nda::C_layout or nda::F_layout. For a Fortran/C layout array, the last/first dimension indexes the individual matrices such that A(:,:,i)/A(i,:,:) corresponds to the \( i \)-th matrix \( \mathbf{A}_i \) in the batch.
Depending on the input array types, the function does the following:
| A | nda::blas_lapack::BlasArrayOrConj<3> type. |
| B | nda::blas_lapack::BlasArrayFor<A, 3> type. |
| IPIV | nda::blas_lapack::PivotArrayFor<A, 2> type. |
| a | Input array. The 3-dimensional array \( \mathbf{A} \) containing LU factorized matrices \( \mathbf{A}_i \) as computed by nda::lapack::getrf_batch |
| b | Input/output array. On entry, the 3-dimensional array \( \mathbf{B} \) containing the right hand side matrices \( \mathbf{B}_i \). On exit, it contains the corresponding solution matrices \( \mathbf{X}_i \). |
| ipiv | Input matrix. The pivot indices from nda::lapack::getrf_batch. If the matrix is in Fortran (C) layout, the \( i \)-th column (row) contains the pivot indices from the factorization of \( \mathbf{A}_i \). |
Definition at line 114 of file getrs_batch.hpp.
| int nda::lapack::ggev | ( | A && | a, |
| B && | b, | ||
| A2 && | alpha, | ||
| B2 && | beta, | ||
| VL && | vl, | ||
| VR && | vr, | ||
| char | jobvl = 'N', | ||
| char | jobvr = 'V', | ||
| W1 && | work = vector_value_t<A>{}, | ||
| W2 && | rwork = vector_fp_t<A>{} ) |
#include <nda/lapack/ggev.hpp>
Interface to the LAPACK ggev routine for complex matrices.
Computes the generalized eigenvalues \( \lambda_j = \alpha_j / \beta_j \) and, optionally, right/left eigenvectors \( \mathbf{v}_j \)/ \( \mathbf{u}_j \) of a complex generalized eigenvalue problem.
The right eigenvector \( \mathbf{v}_j \) satisfies
\[ \mathbf{A} \mathbf{v}_j = \lambda_j \mathbf{B} \mathbf{v}_j \; , \]
whereas the left eigenvector \( \mathbf{u}_j \) satisfies
\[ \mathbf{u}_j^H \mathbf{A} = \lambda_j \mathbf{u}_j^H \mathbf{B} \; . \]
Here, \( \mathbf{u}_j^H \) denotes the conjugate-transpose of \( \mathbf{u}_j \).
The eigenvalues are stored as complex pairs \( \alpha_j \) and \( \beta_j \). If \( \beta_j \neq 0 \), the eigenvalue is \( \lambda_j = \alpha_j / \beta_j \).
The quotient \( \alpha_j / \beta_j \) may easily over- or underflow, and \( \beta_j \) may even be zero. Thus, the user should avoid naively computing the ratio. However, \( \alpha_j \) will be always less than and usually comparable with \( ||A|| \) in magnitude, and \( \beta_j \) always less than and usually comparable with \(||B|| \).
The computed eigenvectors are scaled so that their largest components \( z \) satisfy \( |\mathrm{Re}(z)| + |\mathrm{Im}(z)| = 1 \).
| a | Input/output matrix. On entry, the matrix \( \mathbf{A} \). On exit, \( \mathbf{A} \) is overwritten. |
| b | Input/output matrix. On entry, the matrix \( \mathbf{B} \). On exit, \( \mathbf{B} \) is overwritten. |
| alpha | Output vector \( \boldsymbol{\alpha} \). The numerators of the computed eigenvalues, i.e. \(\alpha_j \). |
| beta | Output vector \( \boldsymbol{\beta} \). The denominators of the computed eigenvalues, i.e. \(\beta_j \). |
| vl | Output matrix \( \mathbf{V}_L \). If jobvl = V, the matrix contains the left eigenvectors. If jobvl = N, \( \mathbf{V}_L \) is not referenced. |
| vr | Output matrix \( \mathbf{V}_R \). If jobvr = V, the matrix contains the right eigenvectors. If jobvr = N, \( \mathbf{V}_R \) is not referenced. |
| jobvl | Character indicating whether to compute left eigenvectors ('V') or not ('N'). |
| jobvr | Character indicating whether to compute right eigenvectors ('V') or not ('N'). |
| work | Output vector. Workspace array used by the LAPACK routine. |
| rwork | Output vector. Workspace array used by the LAPACK routine. |
| int nda::lapack::ggev | ( | A && | a, |
| B && | b, | ||
| AR && | alphar, | ||
| AI && | alphai, | ||
| B2 && | beta, | ||
| VL && | vl, | ||
| VR && | vr, | ||
| char | jobvl = 'N', | ||
| char | jobvr = 'V', | ||
| W1 && | work = vector_value_t<A>{} ) |
#include <nda/lapack/ggev.hpp>
Interface to the LAPACK ggev routine for real matrices.
Computes the generalized eigenvalues \( \lambda_j = \alpha_j / \beta_j \) and, optionally, right/left eigenvectors \( \mathbf{v}_j \)/ \( \mathbf{u}_j \) of a real generalized eigenvalue problem.
The right eigenvector \( \mathbf{v}_j \) satisfies
\[ \mathbf{A} \mathbf{v}_j = \lambda_j \mathbf{B} \mathbf{v}_j \; , \]
whereas the left eigenvector \( \mathbf{u}_j \) satisfies
\[ \mathbf{u}_j^T \mathbf{A} = \lambda_j \mathbf{u}_j^T \mathbf{B} \; . \]
Here, \( \mathbf{u}_j^T \) denotes the transpose of \( \mathbf{u}_j \).
The eigenvalues are stored as real triples \( \alpha^{(r)}_j \), \( \alpha^{(i)}_j \) and \( \beta_j \) such that \( \alpha_j = \alpha^{(r)}_j + i \alpha^{(i)}_j \). If \( \alpha^{(i)}_j = 0 \) and \( \beta_j \neq 0 \), then the eigenvalue is real.
The quotients \( \alpha^{(r)}_j / \beta_j \) and \( \alpha^{(i)}_j / \beta_j \) may easily over- or underflow, and \( \beta_j \) may even be zero. Thus, the user should avoid naively computing the ratios. However, \(\alpha^{(r)}_j \) and \( \alpha^{(i)}_j \) will be always less than and usually comparable with \( ||A|| \) in magnitude, and \( \beta_j \) always less than and usually comparable with \( ||B|| \).
The computed eigenvectors are scaled so that their largest components \( z \) satisfy \( |\mathrm{Re}(z)| + |\mathrm{Im}(z)| = 1 \).
For real matrices, complex eigenvalues always occur in complex conjugate pairs and the corresponding eigenvectors are stored in a special packed format (see nda::linalg::unpack_eigenvectors).
| a | Input/output matrix. On entry, the matrix \( \mathbf{A} \). On exit, \( \mathbf{A} \) is overwritten. |
| b | Input/output matrix. On entry, the matrix \( \mathbf{B} \). On exit, \( \mathbf{B} \) is overwritten. |
| alphar | Output vector \( \boldsymbol{\alpha}^{(r)} \). The real parts of the numerator of the computed eigenvalues, i.e. \( \alpha^{(r)}_j = \mathrm{Re}(\alpha_j) \). |
| alphai | Output vector \( \boldsymbol{\alpha}^{(i)} \). The imaginary parts of the numerator of the computed eigenvalues, i.e. \( \alpha^{(i)}_j = \mathrm{Im}(\alpha_j) \). |
| beta | Output vector \( \boldsymbol{\beta} \). The denominators of the computed eigenvalues, i.e. \(\beta_j \). |
| vl | Output matrix \( \mathbf{V}_L \). If jobvl = V, the matrix contains the left eigenvectors (in packed format for complex pairs). If jobvl = N, \( \mathbf{V}_L \) is not referenced. |
| vr | Output matrix \( \mathbf{V}_R \). If jobvr = V, the matrix contains the right eigenvectors (in packed format for complex pairs). If jobvr = N, \( \mathbf{V}_R \) is not referenced. |
| jobvl | Character indicating whether to compute left eigenvectors ('V') or not ('N'). |
| jobvr | Character indicating whether to compute right eigenvectors ('V') or not ('N'). |
| work | Output vector. Workspace array used by the LAPACK routine. |
| int nda::lapack::gtsv | ( | DL && | dl, |
| D && | d, | ||
| DU && | du, | ||
| B && | b ) |
#include <nda/lapack/gtsv.hpp>
Interface to the LAPACK gtsv routine.
Solves a system of linear equations
with a tridiagonal \( n \times n \) matrix \( \mathbf{A} \) and either \( n \times n_{\mathrm{rhs}} \) matrices \( \mathbf{X} \) and \( \mathbf{B} \) or vectors \( \mathbf{x} \) and \( \mathbf{b} \) of size \( n \). It uses Gaussian elimination with partial pivoting.
| DL | nda::blas_lapack::BlasArray<1> type. |
| D | nda::blas_lapack::BlasArrayFor<DL, 1> type. |
| DU | nda::blas_lapack::BlasArrayFor<DL, 1> type. |
| B | nda::blas_lapack::BlasArrayFor<DL> type. |
| dl | Input/Output vector. On entry, it must contain the \( n - 1 \) subdiagonal elements of \( \mathbf{A} \). On exit, it is overwritten by the \( n - 2 \) elements of the second superdiagonal of the upper triangular matrix \( \mathbf{U} \) from the LU factorization of \( \mathbf{A} \). |
| d | Input/Output vector. On entry, it must contain the diagonal elements of \( \mathbf{A} \). On exit, it is overwritten by the \( n \) diagonal elements of \( \mathbf{U} \). |
| du | Input/Output vector. On entry, it must contain the \( n - 1 \) superdiagonal elements of \( \mathbf{A} \). On exit, it is overwritten by the \( n - 1 \) elements of the first superdiagonal of \( \mathbf{U} \) . |
| b | Input/Output array. On entry, the \( n \times n_{\mathrm{rhs}} \) right hand side matrix \( \mathbf{B} \) or the vector \( \mathbf{b} \). On exit, the \( n \times n_{\mathrm{rhs}} \) solution matrix \( \mathbf{X} \) or the vector \( \mathbf{x} \). |
| int nda::lapack::heev | ( | A && | a, |
| W && | w, | ||
| char | jobz = 'V', | ||
| W1 && | work = vector_value_t<A>{}, | ||
| W2 && | rwork = vector_fp_t<A>{} ) |
#include <nda/lapack/heev.hpp>
Interface to the LAPACK heev routine.
Computes all eigenvalues \( \lambda_i \) and, optionally, eigenvectors \( \mathbf{v}_i \) of a complex hermitian matrix eigenvalue problem of the form
\[ \mathbf{A} \mathbf{v}_i = \lambda_i \mathbf{v}_i \; , \]
for a given complex hermitian matrix \( \mathbf{A} \).
| A | nda::blas_lapack::BlasArrayCplx<2> type. |
| W | nda::blas_lapack::BlasArrayRealFor<A, 1> type. |
| W1 | nda::blas_lapack::BlasArrayFor<A, 1> type. |
| W2 | nda::blas_lapack::BlasArrayRealFor<A, 1> type. |
| a | Input/output matrix. On entry, the hermitian matrix \( \mathbf{A} \). On exit, if jobz = V, \(\mathbf{A} \) contains the orthonormal eigenvectors \( \mathbf{v}_i \) of the matrix \( \mathbf{A} \). If jobz = N, then on exit \( \mathbf{A} \) is destroyed. |
| w | Output vector. The eigenvalues \( \lambda_i \) in ascending order. |
| jobz | Character indicating whether to compute eigenvectors and eigenvalues ('V') or eigenvalues only ('N'). |
| work | Ouput vector. Workspace array used by the LAPACK routine. |
| rwork | Ouput vector. Workspace array used by the LAPACK routine. |
| int nda::lapack::hegv | ( | A && | a, |
| B && | b, | ||
| W && | w, | ||
| char | jobz = 'V', | ||
| int | itype = 1, | ||
| W1 && | work = vector_value_t<A>{}, | ||
| W2 && | rwork = vector_fp_t<A>{} ) |
#include <nda/lapack/hegv.hpp>
Interface to the LAPACK hegv routine.
Computes all eigenvalues \( \lambda_i \) and, optionally, eigenvectors \( \mathbf{v}_i \) of a complex generalized Hermitian-definite eigenvalue problem of the form
Here \( \mathbf{A} \) and \( \mathbf{B} \) are assumed to be Hermitian and \( \mathbf{B} \) is also positive definite.
| a | Input/output matrix. On entry, the Hermitian matrix \( \mathbf{A} \). On exit, if jobz = V, \(\mathbf{A} \) contains the matrix \( \mathbf{V} \) of normalized eigenvectors such that \( \mathbf{V}^H \mathbf{B} \mathbf{V} = \mathbf{I} \) (if itype = 1 or itype = 2) or \( \mathbf{V}^H \mathbf{B}^{-1} \mathbf{V} = \mathbf{I} \) (if itype = 3). If jobz = N, then on exit \( \mathbf{A} \) is destroyed. |
| b | Input/output matrix. On entry, the hermitian, positive definite matrix \( \mathbf{B} \). On exit, the part of \( \mathbf{B} \) containing the matrix is overwritten by the triangular factor \( \mathbf{U} \) or \( \mathbf{L} \) from a Cholesky factorization. |
| w | Output vector. The eigenvalues \( \lambda_i \) in ascending order. |
| jobz | Character indicating whether to compute eigenvectors and eigenvalues ('V') or eigenvalues only ('N'). |
| itype | Specifies the problem to be solved. |
| work | Ouput vector. Workspace array used by the LAPACK routine. |
| rwork | Ouput vector. Workspace array used by the LAPACK routine. |
| int nda::lapack::orgqr | ( | A && | a, |
| TAU && | tau, | ||
| W && | work = vector_value_t<A>{} ) |
#include <nda/lapack/orgqr.hpp>
Interface to the LAPACK/cuSOLVER orgqr routine.
Generates an \( m \times n \) real matrix \( \mathbf{Q} \) with orthonormal columns, which is defined as the first \( n \) columns of a product of \( k \) elementary reflectors of order \( m \)
\[ \mathbf{Q} = \mathbf{H}(1) \mathbf{H}(2) \ldots \mathbf{H}(k) \; , \]
as returned by nda::lapack::geqp3 or nda::lapack::geqrf.
Each \( \mathbf{H}(i) \) has the form
\[ \mathbf{H}(i) = \mathbf{I} - \tau_i * \mathbf{v}_i \mathbf{v}_i^T \]
where \( \tau_i \) is a real scalar, and \( \mathbf{v}_i \) is a real vector with
If the input arrays satisfy nda::mem::have_device_compatible_addr_space, the cuSOLVER implementation is used.
| A | nda::blas_lapack::BlasArrayReal<2> type. |
| TAU | nda::blas_lapack::BlasArrayFor<A, 1> type. |
| W | nda::blas_lapack::BlasArrayFor<A, 1> type. |
| a | Input/output matrix. On entry, the ith column must contain the vector which defines the elementary reflector \( H(i) \; , i = 1,2,...,k \), as returned by nda::lapack::geqp3 or nda::lapack::geqrf. On exit, the \( m \times n \) matrix \( \mathbf{Q} \). |
| tau | Input vector. \( \tau_i \) must contain the scalar factor of the elementary reflector \(\mathbf{H}(i) \), as returned by nda::lapack::geqp3 or nda::lapack::geqrf. |
| work | Ouput vector. Workspace array used by the LAPACK/cuSOLVER routine. |
| int nda::lapack::orgqr_batch | ( | A && | a, |
| TAU && | tau, | ||
| W && | work = vector_value_t<A>{} ) |
#include <nda/lapack/orgqr_batch.hpp>
Interface to batched versions of the LAPACK/cuSOLVER orgqr routine.
This function generates the orthogonal matrix \( \mathbf{Q}_i \) for each matrix in a batch indexed by \( i \in \{ 0, \ldots, N_b - 1 \} \) from the elementary reflectors and scalar factors produced by nda::lapack::geqrf_batch. Here, \( N_b \) is the batch size. See also nda::lapack::orgqr.
A batch of matrices is just a 3-dimensional array in nda::F_layout where the last dimension indexes the individual matrices such that A(:,:,i) corresponds to the \( i \)-th matrix \( \mathbf{A}_i \) in the batch.
No library-level batching is currently available for this function. Instead, the function simply loops over all matrices in the batch and calls nda::lapack::orgqr on the first \( \min(m, n) \) columns of each slice. For wide matrices ( \( m < n \)), this produces an \( m \times m \) orthogonal matrix in the first \( m \) columns of each slice; the remaining columns are left untouched.
| A | nda::blas_lapack::BlasArrayReal<3> type. |
| TAU | nda::blas_lapack::BlasArrayFor<A, 2> type. |
| W | nda::blas_lapack::BlasArrayFor<A, 1> type. |
| a | Input/output array. On entry, the 3-dimensional array containing the elementary reflectors for each batch (as returned by nda::lapack::geqrf_batch). On exit, the first \( \min(m, n) \) columns of each slice contain \( \mathbf{Q}_i \). |
| tau | Input matrix. The \( i \)-th column contains the scalar factors of the elementary reflectors representing \( \mathbf{Q}_i \). |
| work | Workspace array used by the underlying single-matrix call (resized as needed). |
Definition at line 53 of file orgqr_batch.hpp.
| int nda::lapack::syev | ( | A && | a, |
| W && | w, | ||
| char | jobz = 'V', | ||
| W1 && | work = vector_value_t<A>{} ) |
#include <nda/lapack/syev.hpp>
Interface to the LAPACK syev routine.
Computes all eigenvalues \( \lambda_i \) and, optionally, eigenvectors \( \mathbf{v}_i \) of a real symmetric eigenvalue problem of the form
\[ \mathbf{A} \mathbf{v}_i = \lambda_i \mathbf{v}_i \; , \]
for a given real symmetric matrix \( \mathbf{A} \).
| A | nda::blas_lapack::BlasArrayReal<2> type. |
| W | nda::blas_lapack::BlasArrayFor<A, 1> type. |
| W1 | nda::blas_lapack::BlasArrayFor<A, 1> type. |
| a | Input/output matrix. On entry, the symmetric matrix \( \mathbf{A} \). On exit, if jobz = V, \(\mathbf{A} \) contains the orthonormal eigenvectors \( \mathbf{v}_i \) of the matrix \( \mathbf{A} \). If jobz = N, then on exit \( \mathbf{A} \) is destroyed. |
| w | Output vector. The eigenvalues \( \lambda_i \) in ascending order. |
| jobz | Character indicating whether to compute eigenvectors and eigenvalues ('V') or eigenvalues only ('N'). |
| work | Ouput vector. Workspace array used by the LAPACK routine. |
| int nda::lapack::sygv | ( | A && | a, |
| B && | b, | ||
| W && | w, | ||
| char | jobz = 'V', | ||
| int | itype = 1, | ||
| W1 && | work = vector_value_t<A>{} ) |
#include <nda/lapack/sygv.hpp>
Interface to the LAPACK sygv routine.
Computes all eigenvalues \( \lambda_i \) and, optionally, eigenvectors \( \mathbf{v}_i \) of a real generalized symmetric-definite eigenvalue problem of the form
Here \( \mathbf{A} \) and \( \mathbf{B} \) are assumed to be symmetric and \( \mathbf{B} \) is also positive definite.
| A | nda::blas_lapack::BlasArrayReal<2> type. |
| B | nda::blas_lapack::BlasArrayFor<A, 2> type. |
| W | nda::blas_lapack::BlasArrayFor<A, 1> type. |
| W1 | nda::blas_lapack::BlasArrayFor<A, 1> type. |
| a | Input/output matrix. On entry, the symmetric matrix \( \mathbf{A} \). On exit, if jobz = V, \(\mathbf{A} \) contains the matrix \( \mathbf{V} \) of normalized eigenvectors such that \( \mathbf{V}^T \mathbf{B} \mathbf{V} = \mathbf{I} \) (if itype = 1 or itype = 2) or \( \mathbf{V}^T \mathbf{B}^{-1} \mathbf{V} = \mathbf{I} \) (if itype = 3). If jobz = N, then on exit \( \mathbf{A} \) is destroyed. |
| b | Input/output matrix. On entry, the symmetric, positive definite matrix \( \mathbf{B} \). On exit, the part of \( \mathbf{B} \) containing the matrix is overwritten by the triangular factor \( \mathbf{U} \) or \( \mathbf{L} \) from a Cholesky factorization. |
| w | Output vector. The eigenvalues \( \lambda_i \) in ascending order. |
| jobz | Character indicating whether to compute eigenvectors and eigenvalues ('V') or eigenvalues only ('N'). |
| itype | Specifies the problem to be solved. |
| work | Ouput vector. Workspace array used by the LAPACK routine. |
| int nda::lapack::ungqr | ( | A && | a, |
| TAU && | tau, | ||
| W && | work = vector_value_t<A>{} ) |
#include <nda/lapack/ungqr.hpp>
Interface to the LAPACK/cuSOLVER ungqr routine.
Generates an \( m \times n \) complex matrix \( \mathbf{Q} \) with orthonormal columns, which is defined as the first \( n \) columns of a product of \( k \) elementary reflectors of order \( m \)
\[ \mathbf{Q} = \mathbf{H}(1) \mathbf{H}(2) \ldots \mathbf{H}(k) \; , \]
as returned by nda::lapack::geqp3 or nda::lapack::geqrf.
Each \( \mathbf{H}(i) \) has the form
\[ \mathbf{H}(i) = \mathbf{I} - \tau_i * \mathbf{v}_i \mathbf{v}_i^H \]
where \( \tau_i \) is a complex scalar, and \( \mathbf{v}_i \) is a complex vector with
If the input arrays satisfy nda::mem::have_device_compatible_addr_space, the cuSOLVER implementation is used.
| A | nda::blas_lapack::BlasArrayCplx<2> type. |
| TAU | nda::blas_lapack::BlasArrayFor<A, 1> type. |
| W | nda::blas_lapack::BlasArrayFor<A, 1> type. |
| a | Input/output matrix. On entry, the ith column must contain the vector which defines the elementary reflector \( H(i) \; , i = 1,2,...,k \), as returned by nda::lapack::geqp3 or nda::lapack::geqrf. On exit, the \( m \times n \) matrix \( \mathbf{Q} \). |
| tau | Input vector. \( \tau_i \) must contain the scalar factor of the elementary reflector \(\mathbf{H}(i) \), as returned by nda::lapack::geqp3 or nda::lapack::geqrf. |
| work | Ouput vector. Workspace array used by the LAPACK/cuSOLVER routine. |
| int nda::lapack::ungqr_batch | ( | A && | a, |
| TAU && | tau, | ||
| W && | work = vector_value_t<A>{} ) |
#include <nda/lapack/ungqr_batch.hpp>
Interface to batched versions of the LAPACK/cuSOLVER ungqr routine.
This function generates the unitary matrix \( \mathbf{Q}_i \) for each matrix in a batch indexed by \( i \in \{ 0, \ldots, N_b - 1 \} \) from the elementary reflectors and scalar factors produced by nda::lapack::geqrf_batch. Here, \( N_b \) is the batch size. See also nda::lapack::ungqr.
A batch of matrices is just a 3-dimensional array in nda::F_layout where the last dimension indexes the individual matrices such that A(:,:,i) corresponds to the \( i \)-th matrix \( \mathbf{A}_i \) in the batch.
No library-level batching is currently available for this function. Instead, the function simply loops over all matrices in the batch and calls nda::lapack::ungqr on the first \( \min(m, n) \) columns of each slice. For wide matrices ( \( m < n \)), this produces an \( m \times m \) unitary matrix in the first \( m \) columns of each slice; the remaining columns are left untouched.
| A | nda::blas_lapack::BlasArrayCplx<3> type. |
| TAU | nda::blas_lapack::BlasArrayFor<A, 2> type. |
| W | nda::blas_lapack::BlasArrayFor<A, 1> type. |
| a | Input/output array. On entry, the 3-dimensional array containing the elementary reflectors for each batch (as returned by nda::lapack::geqrf_batch). On exit, the first \( \min(m, n) \) columns of each slice contain \( \mathbf{Q}_i \). |
| tau | Input matrix. The \( i \)-th column contains the scalar factors of the elementary reflectors representing \( \mathbf{Q}_i \). |
| work | Workspace array used by the underlying single-matrix call (resized as needed). |
Definition at line 53 of file ungqr_batch.hpp.