fml  0.1-0
Fused Matrix Library
fml::linalg Namespace Reference

Linear algebra functions. More...

Functions

template<typename REAL >
void add (const bool transx, const bool transy, const REAL alpha, const REAL beta, const cpumat< REAL > &x, const cpumat< REAL > &y, cpumat< REAL > &ret)
 Returns alpha*op(x) + beta*op(y) where op(A) is A or A^T. More...
 
template<typename REAL >
cpumat< REAL > add (const bool transx, const bool transy, const REAL alpha, const REAL beta, const cpumat< REAL > &x, const cpumat< REAL > &y)
 This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.
 
template<typename REAL >
void chol (cpumat< REAL > &x)
 Compute the Choleski factorization. More...
 
template<typename REAL >
void crossprod (const REAL alpha, const cpumat< REAL > &x, cpumat< REAL > &ret)
 Computes lower triangle of alpha*x^T*x. More...
 
template<typename REAL >
cpumat< REAL > crossprod (const REAL alpha, const cpumat< REAL > &x)
 This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.
 
template<typename REAL >
void tcrossprod (const REAL alpha, const cpumat< REAL > &x, cpumat< REAL > &ret)
 Computes lower triangle of alpha*x*x^T. More...
 
template<typename REAL >
cpumat< REAL > tcrossprod (const REAL alpha, const cpumat< REAL > &x)
 
template<typename REAL >
void det (cpumat< REAL > &x, int &sign, REAL &modulus)
 Computes the determinant in logarithmic form. More...
 
template<typename REAL >
REAL dot (const cpuvec< REAL > &x, const cpuvec< REAL > &y)
 Computes the dot product of two vectors, i.e. the sum of the product of the elements. More...
 
template<typename REAL >
REAL dot (const cpuvec< REAL > &x)
 This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.
 
template<typename REAL >
void eigen_sym (cpumat< REAL > &x, cpuvec< REAL > &values)
 Compute the eigenvalues and optionally the eigenvectors for a symmetric matrix. More...
 
template<typename REAL >
void eigen_sym (cpumat< REAL > &x, cpuvec< REAL > &values, cpumat< REAL > &vectors)
 This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.
 
template<typename REAL >
void invert (cpumat< REAL > &x)
 Compute the matrix inverse. More...
 
template<typename REAL >
void trinv (const bool upper, const bool unit_diag, cpumat< REAL > &x)
 Compute the matrix inverse of a triangular matrix. More...
 
template<typename REAL >
void lu (cpumat< REAL > &x, cpuvec< int > &p, int &info)
 Computes the PLU factorization with partial pivoting. More...
 
template<typename REAL >
void lu (cpumat< REAL > &x)
 This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.
 
template<typename REAL >
void matmult (const bool transx, const bool transy, const REAL alpha, const cpumat< REAL > &x, const cpumat< REAL > &y, cpumat< REAL > &ret)
 Computes ret = alpha*op(x)*op(y) where op(A) is A or A^T. More...
 
template<typename REAL >
cpumat< REAL > matmult (const bool transx, const bool transy, const REAL alpha, const cpumat< REAL > &x, const cpumat< REAL > &y)
 This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.
 
template<typename REAL >
void matmult (const bool transx, const bool transy, const REAL alpha, const cpumat< REAL > &x, const cpuvec< REAL > &y, cpuvec< REAL > &ret)
 This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.
 
template<typename REAL >
cpuvec< REAL > matmult (const bool transx, const bool transy, const REAL alpha, const cpumat< REAL > &x, const cpuvec< REAL > &y)
 This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.
 
template<typename REAL >
void matmult (const bool transx, const bool transy, const REAL alpha, const cpuvec< REAL > &x, const cpumat< REAL > &y, cpuvec< REAL > &ret)
 This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.
 
template<typename REAL >
cpuvec< REAL > matmult (const bool transx, const bool transy, const REAL alpha, const cpuvec< REAL > &x, const cpumat< REAL > &y)
 This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.
 
template<typename REAL >
REAL norm_1 (const cpumat< REAL > &x)
 Computes the 1 matrix norm, the maximum absolute column sum. More...
 
template<typename REAL >
REAL norm_I (const cpumat< REAL > &x)
 Computes the infinity matrix norm, the maximum absolute row sum. More...
 
template<typename REAL >
REAL norm_F (const cpumat< REAL > &x)
 Computes the Frobenius/Euclidean matrix norm. More...
 
template<typename REAL >
REAL norm_M (const cpumat< REAL > &x)
 Computes the maximum modulus matrix norm. More...
 
template<typename REAL >
REAL norm_2 (cpumat< REAL > &x)
 Computes the 2/spectral matrix norm. More...
 
template<typename REAL >
REAL cond_1 (cpumat< REAL > &x)
 Estimates the condition number under the 1-norm. More...
 
template<typename REAL >
REAL cond_I (cpumat< REAL > &x)
 Estimates the condition number under the infinity norm. More...
 
template<typename REAL >
REAL cond_2 (cpumat< REAL > &x)
 Estimates the condition number under the 2 norm. More...
 
template<typename REAL >
void qr (const bool pivot, cpumat< REAL > &x, cpuvec< REAL > &qraux)
 Computes the QR decomposition. More...
 
template<typename REAL >
void qr_Q (const cpumat< REAL > &QR, const cpuvec< REAL > &qraux, cpumat< REAL > &Q, cpuvec< REAL > &work)
 Recover the Q matrix from a QR decomposition. More...
 
template<typename REAL >
void qr_R (const cpumat< REAL > &QR, cpumat< REAL > &R)
 Recover the R matrix from a QR decomposition. More...
 
template<typename REAL >
void lq (cpumat< REAL > &x, cpuvec< REAL > &lqaux)
 Computes the LQ decomposition. More...
 
template<typename REAL >
void lq_L (const cpumat< REAL > &LQ, cpumat< REAL > &L)
 Recover the L matrix from an LQ decomposition. More...
 
template<typename REAL >
void lq_Q (const cpumat< REAL > &LQ, const cpuvec< REAL > &lqaux, cpumat< REAL > &Q, cpuvec< REAL > &work)
 Recover the Q matrix from an LQ decomposition. More...
 
template<typename REAL >
void solve (cpumat< REAL > &x, cpuvec< REAL > &y)
 Solve a system of equations. More...
 
template<typename REAL >
void solve (cpumat< REAL > &x, cpumat< REAL > &y)
 This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.
 
template<typename REAL >
void svd (cpumat< REAL > &x, cpuvec< REAL > &s)
 Computes the singular value decomposition. More...
 
template<typename REAL >
void svd (cpumat< REAL > &x, cpuvec< REAL > &s, cpumat< REAL > &u, cpumat< REAL > &vt)
 This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.
 
template<typename REAL >
void qrsvd (cpumat< REAL > &x, cpuvec< REAL > &s, cpumat< REAL > &u, cpumat< REAL > &vt)
 Computes the singular value decomposition by first reducing the rectangular matrix to a square matrix using an orthogonal factorization. If the matrix is square, we skip the orthogonal factorization. More...
 
template<typename REAL >
void qrsvd (cpumat< REAL > &x, cpuvec< REAL > &s)
 This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.
 
template<typename REAL >
void cpsvd (const cpumat< REAL > &x, cpuvec< REAL > &s, cpumat< REAL > &u, cpumat< REAL > &vt)
 Computes the singular value decomposition using the "crossproducts SVD". This method is not numerically stable. More...
 
template<typename REAL >
void cpsvd (const cpumat< REAL > &x, cpuvec< REAL > &s)
 This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.
 
template<typename REAL >
void rsvd (const uint32_t seed, const int k, const int q, cpumat< REAL > &x, cpuvec< REAL > &s)
 Computes the truncated singular value decomposition using the normal projections method of Halko et al. This method is only an approximation. More...
 
template<typename REAL >
void rsvd (const uint32_t seed, const int k, const int q, cpumat< REAL > &x, cpuvec< REAL > &s, cpumat< REAL > &u, cpumat< REAL > &vt)
 This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.
 
template<typename REAL >
REAL trace (const cpumat< REAL > &x)
 Computes the trace, i.e. the sum of the diagonal. More...
 
template<typename REAL >
void xpose (const cpumat< REAL > &x, cpumat< REAL > &tx)
 Computes the transpose out-of-place (i.e. in a copy). More...
 
template<typename REAL >
cpumat< REAL > xpose (const cpumat< REAL > &x)
 This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.
 
template<typename REAL >
void add (const bool transx, const bool transy, const REAL alpha, const REAL beta, const gpumat< REAL > &x, const gpumat< REAL > &y, gpumat< REAL > &ret)
 Returns alpha*op(x) + beta*op(y) where op(A) is A or A^T. More...
 
template<typename REAL >
gpumat< REAL > add (const bool transx, const bool transy, const REAL alpha, const REAL beta, const gpumat< REAL > &x, const gpumat< REAL > &y)
 This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.
 
template<typename REAL >
void chol (gpumat< REAL > &x)
 Compute the Choleski factorization. More...
 
template<typename REAL >
void crossprod (const REAL alpha, const gpumat< REAL > &x, gpumat< REAL > &ret)
 Computes lower triangle of alpha*x^T*x. More...
 
template<typename REAL >
gpumat< REAL > crossprod (const REAL alpha, const gpumat< REAL > &x)
 This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.
 
template<typename REAL >
void tcrossprod (const REAL alpha, const gpumat< REAL > &x, gpumat< REAL > &ret)
 Computes lower triangle of alpha*x*x^T. More...
 
template<typename REAL >
gpumat< REAL > tcrossprod (const REAL alpha, const gpumat< REAL > &x)
 This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.
 
template<typename REAL >
void det (gpumat< REAL > &x, int &sign, REAL &modulus)
 Computes the determinant in logarithmic form. More...
 
template<typename REAL >
REAL dot (const gpuvec< REAL > &x, const gpuvec< REAL > &y)
 Computes the dot product of two vectors, i.e. the sum of the product of the elements. More...
 
template<typename REAL >
REAL dot (const gpuvec< REAL > &x)
 This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.
 
template<typename REAL >
void eigen_sym (gpumat< REAL > &x, gpuvec< REAL > &values)
 Compute the eigenvalues and optionally the eigenvectors for a symmetric matrix. More...
 
template<typename REAL >
void eigen_sym (gpumat< REAL > &x, gpuvec< REAL > &values, gpumat< REAL > &vectors)
 This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.
 
template<typename REAL >
void invert (gpumat< REAL > &x)
 Compute the matrix inverse. More...
 
template<typename REAL >
void trinv (const bool upper, const bool unit_diag, gpumat< REAL > &x)
 Compute the matrix inverse of a triangular matrix. More...
 
template<typename REAL >
void lu (gpumat< REAL > &x, gpuvec< int > &p, int &info)
 Computes the PLU factorization with partial pivoting. More...
 
template<typename REAL >
void lu (gpumat< REAL > &x)
 This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.
 
template<typename REAL >
void matmult (const bool transx, const bool transy, const REAL alpha, const gpumat< REAL > &x, const gpumat< REAL > &y, gpumat< REAL > &ret)
 Computes ret = alpha*op(x)*op(y) where op(A) is A or A^T. More...
 
template<typename REAL >
gpumat< REAL > matmult (const bool transx, const bool transy, const REAL alpha, const gpumat< REAL > &x, const gpumat< REAL > &y)
 This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.
 
template<typename REAL >
void matmult (const bool transx, const bool transy, const REAL alpha, const gpumat< REAL > &x, const gpuvec< REAL > &y, gpuvec< REAL > &ret)
 This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.
 
template<typename REAL >
gpuvec< REAL > matmult (const bool transx, const bool transy, const REAL alpha, const gpumat< REAL > &x, const gpuvec< REAL > &y)
 This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.
 
template<typename REAL >
void matmult (const bool transx, const bool transy, const REAL alpha, const gpuvec< REAL > &x, const gpumat< REAL > &y, gpuvec< REAL > &ret)
 This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.
 
template<typename REAL >
gpuvec< REAL > matmult (const bool transx, const bool transy, const REAL alpha, const gpuvec< REAL > &x, const gpumat< REAL > &y)
 This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.
 
template<typename REAL >
REAL norm_1 (const gpumat< REAL > &x)
 Computes the 1 matrix norm, the maximum absolute column sum. More...
 
template<typename REAL >
REAL norm_I (const gpumat< REAL > &x)
 Computes the infinity matrix norm, the maximum absolute row sum. More...
 
template<typename REAL >
REAL norm_F (const gpumat< REAL > &x)
 Computes the Frobenius/Euclidean matrix norm. More...
 
template<typename REAL >
REAL norm_M (const gpumat< REAL > &x)
 Computes the maximum modulus matrix norm. More...
 
template<typename REAL >
REAL norm_2 (gpumat< REAL > &x)
 Computes the 2/spectral matrix norm. More...
 
template<typename REAL >
REAL cond_1 (gpumat< REAL > &x)
 Estimates the condition number under the 1-norm. More...
 
template<typename REAL >
REAL cond_I (gpumat< REAL > &x)
 Estimates the condition number under the infinity norm. More...
 
template<typename REAL >
REAL cond_2 (gpumat< REAL > &x)
 Estimates the condition number under the 2 norm. More...
 
template<typename REAL >
void qr (const bool pivot, gpumat< REAL > &x, gpuvec< REAL > &qraux)
 Computes the QR decomposition. More...
 
template<typename REAL >
void qr_Q (const gpumat< REAL > &QR, const gpuvec< REAL > &qraux, gpumat< REAL > &Q, gpuvec< REAL > &work)
 Recover the Q matrix from a QR decomposition. More...
 
template<typename REAL >
void qr_R (const gpumat< REAL > &QR, gpumat< REAL > &R)
 Recover the R matrix from a QR decomposition. More...
 
template<typename REAL >
void lq (gpumat< REAL > &x, gpuvec< REAL > &lqaux)
 Computes the LQ decomposition. More...
 
template<typename REAL >
void lq_L (const gpumat< REAL > &LQ, gpumat< REAL > &L)
 Recover the L matrix from an LQ decomposition. More...
 
template<typename REAL >
void lq_Q (const gpumat< REAL > &LQ, const gpuvec< REAL > &lqaux, gpumat< REAL > &Q, gpuvec< REAL > &work)
 Recover the Q matrix from an LQ decomposition. More...
 
template<typename REAL >
void solve (gpumat< REAL > &x, gpuvec< REAL > &y)
 Solve a system of equations. More...
 
template<typename REAL >
void solve (gpumat< REAL > &x, gpumat< REAL > &y)
 This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.
 
template<typename REAL >
void svd (gpumat< REAL > &x, gpuvec< REAL > &s)
 Computes the singular value decomposition. More...
 
template<typename REAL >
void svd (gpumat< REAL > &x, gpuvec< REAL > &s, gpumat< REAL > &u, gpumat< REAL > &vt)
 This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.
 
template<typename REAL >
void qrsvd (gpumat< REAL > &x, gpuvec< REAL > &s, gpumat< REAL > &u, gpumat< REAL > &vt)
 Computes the singular value decomposition by first reducing the rectangular matrix to a square matrix using an orthogonal factorization. If the matrix is square, we skip the orthogonal factorization. More...
 
template<typename REAL >
void qrsvd (gpumat< REAL > &x, gpuvec< REAL > &s)
 This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.
 
template<typename REAL >
void cpsvd (const gpumat< REAL > &x, gpuvec< REAL > &s, gpumat< REAL > &u, gpumat< REAL > &vt)
 Computes the singular value decomposition using the "crossproducts SVD". This method is not numerically stable. More...
 
template<typename REAL >
void cpsvd (const gpumat< REAL > &x, gpuvec< REAL > &s)
 This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.
 
template<typename REAL >
void rsvd (const uint32_t seed, const int k, const int q, gpumat< REAL > &x, gpuvec< REAL > &s)
 Computes the truncated singular value decomposition using the normal projections method of Halko et al. This method is only an approximation. More...
 
template<typename REAL >
void rsvd (const uint32_t seed, const int k, const int q, gpumat< REAL > &x, gpuvec< REAL > &s, gpumat< REAL > &u, gpumat< REAL > &vt)
 This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.
 
template<typename REAL >
REAL trace (const gpumat< REAL > &x)
 Computes the trace, i.e. the sum of the diagonal. More...
 
template<typename REAL >
void xpose (const gpumat< REAL > &x, gpumat< REAL > &tx)
 Computes the transpose out-of-place (i.e. in a copy). More...
 
template<typename REAL >
gpumat< REAL > xpose (const gpumat< REAL > &x)
 This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.
 
template<typename REAL >
void add (const bool transx, const bool transy, const REAL alpha, const REAL beta, const mpimat< REAL > &x, const mpimat< REAL > &y, mpimat< REAL > &ret)
 Returns alpha*op(x) + beta*op(y) where op(A) is A or A^T. More...
 
template<typename REAL >
mpimat< REAL > add (const bool transx, const bool transy, const REAL alpha, const REAL beta, const mpimat< REAL > &x, const mpimat< REAL > &y)
 This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.
 
template<typename REAL >
void chol (mpimat< REAL > &x)
 Compute the Choleski factorization. More...
 
template<typename REAL >
void crossprod (const REAL alpha, const mpimat< REAL > &x, mpimat< REAL > &ret)
 Computes lower triangle of alpha*x^T*x. More...
 
template<typename REAL >
mpimat< REAL > crossprod (const REAL alpha, const mpimat< REAL > &x)
 This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.
 
template<typename REAL >
void tcrossprod (const REAL alpha, const mpimat< REAL > &x, mpimat< REAL > &ret)
 Computes lower triangle of alpha*x*x^T. More...
 
template<typename REAL >
mpimat< REAL > tcrossprod (const REAL alpha, const mpimat< REAL > &x)
 This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.
 
template<typename REAL >
void det (mpimat< REAL > &x, int &sign, REAL &modulus)
 Computes the determinant in logarithmic form. More...
 
template<typename REAL >
void eigen_sym (mpimat< REAL > &x, cpuvec< REAL > &values)
 Compute the eigenvalues and optionally the eigenvectors for a symmetric matrix. More...
 
template<typename REAL >
void eigen_sym (mpimat< REAL > &x, cpuvec< REAL > &values, mpimat< REAL > &vectors)
 This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.
 
template<typename REAL >
void invert (mpimat< REAL > &x)
 Compute the matrix inverse. More...
 
template<typename REAL >
void trinv (const bool upper, const bool unit_diag, mpimat< REAL > &x)
 Compute the matrix inverse of a triangular matrix. More...
 
template<typename REAL >
void lu (mpimat< REAL > &x, cpuvec< int > &p, int &info)
 Computes the PLU factorization with partial pivoting. More...
 
template<typename REAL >
void lu (mpimat< REAL > &x)
 This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.
 
template<typename REAL >
mpimat< REAL > matmult (const bool transx, const bool transy, const REAL alpha, const mpimat< REAL > &x, const mpimat< REAL > &y)
 Returns alpha*op(x)*op(y) where op(A) is A or A^T. More...
 
template<typename REAL >
void matmult (const bool transx, const bool transy, const REAL alpha, const mpimat< REAL > &x, const mpimat< REAL > &y, mpimat< REAL > &ret)
 Computes ret = alpha*op(x)*op(y) where op(A) is A or A^T. More...
 
template<typename REAL >
REAL norm_1 (const mpimat< REAL > &x)
 Computes the 1 matrix norm, the maximum absolute column sum. More...
 
template<typename REAL >
REAL norm_I (const mpimat< REAL > &x)
 Computes the infinity matrix norm, the maximum absolute row sum. More...
 
template<typename REAL >
REAL norm_F (const mpimat< REAL > &x)
 Computes the Frobenius/Euclidean matrix norm. More...
 
template<typename REAL >
REAL norm_M (const mpimat< REAL > &x)
 Computes the maximum modulus matrix norm. More...
 
template<typename REAL >
REAL norm_2 (mpimat< REAL > &x)
 Computes the 2/spectral matrix norm. More...
 
template<typename REAL >
REAL cond_1 (mpimat< REAL > &x)
 Estimates the condition number under the 1-norm. More...
 
template<typename REAL >
REAL cond_I (mpimat< REAL > &x)
 Estimates the condition number under the infinity norm. More...
 
template<typename REAL >
REAL cond_2 (mpimat< REAL > &x)
 Estimates the condition number under the 2 norm. More...
 
template<typename REAL >
void qr (const bool pivot, mpimat< REAL > &x, cpuvec< REAL > &qraux)
 Computes the QR decomposition. More...
 
template<typename REAL >
void qr_Q (const mpimat< REAL > &QR, const cpuvec< REAL > &qraux, mpimat< REAL > &Q, cpuvec< REAL > &work)
 Recover the Q matrix from a QR decomposition. More...
 
template<typename REAL >
void qr_R (const mpimat< REAL > &QR, mpimat< REAL > &R)
 Recover the R matrix from a QR decomposition. More...
 
template<typename REAL >
void lq (mpimat< REAL > &x, cpuvec< REAL > &lqaux)
 Computes the LQ decomposition. More...
 
template<typename REAL >
void lq_L (const mpimat< REAL > &LQ, mpimat< REAL > &L)
 Recover the L matrix from a LQ decomposition. More...
 
template<typename REAL >
void lq_Q (const mpimat< REAL > &LQ, const cpuvec< REAL > &lqaux, mpimat< REAL > &Q, cpuvec< REAL > &work)
 Recover the Q matrix from a LQ decomposition. More...
 
template<typename REAL >
void solve (mpimat< REAL > &x, mpimat< REAL > &y)
 Solve a system of equations. More...
 
template<typename REAL >
void qrsvd (mpimat< REAL > &x, cpuvec< REAL > &s, mpimat< REAL > &u, mpimat< REAL > &vt)
 Computes the singular value decomposition by first reducing the rectangular matrix to a square matrix using an orthogonal factorization. If the matrix is square, we skip the orthogonal factorization. More...
 
template<typename REAL >
void qrsvd (mpimat< REAL > &x, cpuvec< REAL > &s)
 This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.
 
template<typename REAL >
void cpsvd (const mpimat< REAL > &x, cpuvec< REAL > &s, mpimat< REAL > &u, mpimat< REAL > &vt)
 Computes the singular value decomposition using the "crossproducts SVD". This method is not numerically stable. More...
 
template<typename REAL >
void cpsvd (const mpimat< REAL > &x, cpuvec< REAL > &s)
 This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.
 
template<typename REAL >
void rsvd (const uint32_t seed, const int k, const int q, mpimat< REAL > &x, cpuvec< REAL > &s)
 Computes the truncated singular value decomposition using the normal projections method of Halko et al. This method is only an approximation. More...
 
template<typename REAL >
void rsvd (const uint32_t seed, const int k, const int q, mpimat< REAL > &x, cpuvec< REAL > &s, mpimat< REAL > &u, mpimat< REAL > &vt)
 This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.
 
template<typename REAL >
REAL trace (const mpimat< REAL > &x)
 Computes the trace, i.e. the sum of the diagonal. More...
 
template<typename REAL >
void xpose (const mpimat< REAL > &x, mpimat< REAL > &tx)
 Computes the transpose out-of-place (i.e. in a copy). More...
 
template<typename REAL >
mpimat< REAL > xpose (const mpimat< REAL > &x)
 This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.
 
template<typename REAL >
void matmult (const parmat_cpu< REAL > &x, const cpumat< REAL > &y, parmat_cpu< REAL > &ret)
 Computes the product of a distributed and a non-distributed matrix whose result is distributed, or the transpose of a distributed matrix with a distributed matrix whose result is non-distributed. More...
 
template<typename REAL >
parmat_cpu< REAL > matmult (const parmat_cpu< REAL > &x, const cpumat< REAL > &y)
 This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.
 
template<typename REAL >
void matmult (const parmat_cpu< REAL > &x, const parmat_cpu< REAL > &y, cpumat< REAL > &ret)
 This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.
 
template<typename REAL >
cpumat< REAL > matmult (const parmat_cpu< REAL > &x, const parmat_cpu< REAL > &y)
 This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.
 
template<typename REAL >
void crossprod (const REAL alpha, const parmat_cpu< REAL > &x, cpumat< REAL > &ret)
 Computes lower triangle of alpha*x^T*x. More...
 
template<typename REAL >
cpumat< REAL > crossprod (const REAL alpha, const parmat_cpu< REAL > &x)
 This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.
 
template<typename REAL >
void qr_R (const int root, parmat_cpu< REAL > &x, cpumat< REAL > &R)
 
template<typename REAL >
void qr_Q (parmat_cpu< REAL > &x, cpuvec< REAL > &qraux, parmat_cpu< REAL > &Q)
 
template<typename REAL >
void cpsvd (const parmat_cpu< REAL > &x, cpuvec< REAL > &s)
 Computes the singular value decomposition using the "crossproducts SVD". This method is not numerically stable. More...
 
template<typename REAL >
void cpsvd (const parmat_cpu< REAL > &x, cpuvec< REAL > &s, cpumat< REAL > &u, cpumat< REAL > &vt)
 This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.
 
template<typename REAL >
void tssvd (parmat_cpu< REAL > &x, cpuvec< REAL > &s)
 Computes the singular value decomposition by first reducing the rectangular matrix to a square matrix using an orthogonal factorization. If the matrix is square, we skip the orthogonal factorization. More...
 
template<typename REAL >
void tssvd (parmat_cpu< REAL > &x, cpuvec< REAL > &s, parmat_cpu< REAL > &u, cpumat< REAL > &vt)
 This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.
 
template<typename REAL >
void rsvd (const uint32_t seed, const int k, const int q, parmat_cpu< REAL > &x, cpuvec< REAL > &s)
 Computes the truncated singular value decomposition using the normal projections method of Halko et al. This method is only an approximation. More...
 
template<typename REAL >
void rsvd (const uint32_t seed, const int k, const int q, parmat_cpu< REAL > &x, cpuvec< REAL > &s, parmat_cpu< REAL > &u, cpumat< REAL > &vt)
 This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.
 
template<typename REAL >
void matmult (const parmat_gpu< REAL > &x, const gpumat< REAL > &y, parmat_gpu< REAL > &ret)
 Computes the product of a distributed and a non-distributed matrix whose result is distributed, or the transpose of a distributed matrix with a distributed matrix whose result is non-distributed. More...
 
template<typename REAL >
parmat_gpu< REAL > matmult (const parmat_gpu< REAL > &x, const gpumat< REAL > &y)
 This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.
 
template<typename REAL >
void matmult (const parmat_gpu< REAL > &x, const parmat_gpu< REAL > &y, gpumat< REAL > &ret)
 This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.
 
template<typename REAL >
gpumat< REAL > matmult (const parmat_gpu< REAL > &x, const parmat_gpu< REAL > &y)
 This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.
 
template<typename REAL >
void crossprod (const REAL alpha, const parmat_gpu< REAL > &x, gpumat< REAL > &ret)
 Computes lower triangle of alpha*x^T*x. More...
 
template<typename REAL >
gpumat< REAL > crossprod (const REAL alpha, const parmat_gpu< REAL > &x)
 This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.
 
template<typename REAL >
void qr_R (const int root, parmat_gpu< REAL > &x, gpumat< REAL > &R)
 
template<typename REAL >
void qr_Q (parmat_gpu< REAL > &x, gpuvec< REAL > &qraux, parmat_gpu< REAL > &Q)
 
template<typename REAL >
void cpsvd (const parmat_gpu< REAL > &x, gpuvec< REAL > &s)
 Computes the singular value decomposition using the "crossproducts SVD". This method is not numerically stable. More...
 
template<typename REAL >
void cpsvd (const parmat_gpu< REAL > &x, gpuvec< REAL > &s, gpumat< REAL > &u, gpumat< REAL > &vt)
 This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.
 
template<typename REAL >
void tssvd (parmat_gpu< REAL > &x, gpuvec< REAL > &s)
 Computes the singular value decomposition by first reducing the rectangular matrix to a square matrix using an orthogonal factorization. If the matrix is square, we skip the orthogonal factorization. More...
 
template<typename REAL >
void tssvd (parmat_gpu< REAL > &x, gpuvec< REAL > &s, parmat_gpu< REAL > &u, gpumat< REAL > &vt)
 This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.
 
template<typename REAL >
void rsvd (const uint32_t seed, const int k, const int q, parmat_gpu< REAL > &x, gpuvec< REAL > &s)
 Computes the truncated singular value decomposition using the normal projections method of Halko et al. This method is only an approximation. More...
 
template<typename REAL >
void rsvd (const uint32_t seed, const int k, const int q, parmat_gpu< REAL > &x, gpuvec< REAL > &s, parmat_gpu< REAL > &u, gpumat< REAL > &vt)
 This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.
 

Detailed Description

Linear algebra functions.

Function Documentation

◆ add() [1/3]

template<typename REAL >
void fml::linalg::add ( const bool  transx,
const bool  transy,
const REAL  alpha,
const REAL  beta,
const cpumat< REAL > &  x,
const cpumat< REAL > &  y,
cpumat< REAL > &  ret 
)

Returns alpha*op(x) + beta*op(y) where op(A) is A or A^T.

Parameters
[in]transxShould x^T be used?
[in]transyShould y^T be used?
[in]alpha,betaScalars.
[in]x,yThe inputs to the sum.
[out]retThe sum.

Exceptions
If x and y are inappropriately sized for the sum, the method will throw a 'runtime_error' exception.

Template Parameters
REALshould be 'float' or 'double'.

◆ add() [2/3]

template<typename REAL >
void fml::linalg::add ( const bool  transx,
const bool  transy,
const REAL  alpha,
const REAL  beta,
const gpumat< REAL > &  x,
const gpumat< REAL > &  y,
gpumat< REAL > &  ret 
)

Returns alpha*op(x) + beta*op(y) where op(A) is A or A^T.

Parameters
[in]transxShould x^T be used?
[in]transyShould y^T be used?
[in]alpha,betaScalars.
[in]x,yThe inputs to the sum.
[out]retThe sum.

Exceptions
If x and y are inappropriately sized for the sum, the method will throw a 'runtime_error' exception.

Implementation Details
Uses the cuBLAS function cublasXgeam().

Template Parameters
REALshould be '__half', 'float', or 'double'.

◆ add() [3/3]

template<typename REAL >
void fml::linalg::add ( const bool  transx,
const bool  transy,
const REAL  alpha,
const REAL  beta,
const mpimat< REAL > &  x,
const mpimat< REAL > &  y,
mpimat< REAL > &  ret 
)

Returns alpha*op(x) + beta*op(y) where op(A) is A or A^T.

Parameters
[in]transxShould x^T be used?
[in]transyShould y^T be used?
[in]alpha,betaScalars.
[in]x,yThe inputs to the sum.
[out]retThe sum.

Exceptions
If x and y are inappropriately sized for the sum, the method will throw a 'runtime_error' exception.

Communication Details
The method will communicate across all processes in the BLACS grid.

Implementation Details
Uses the PBLAS function pXgeadd().

Template Parameters
REALshould be 'float' or 'double'.

◆ chol() [1/3]

template<typename REAL >
void fml::linalg::chol ( cpumat< REAL > &  x)

Compute the Choleski factorization.

The matrix should be 1. square, 2. symmetric, 3. positive-definite. Failure of any of these conditions can lead to a runtime exception. The input is replaced by its lower-triangular Choleski factor.

Parameters
[in,out]xInput data matrix, replaced by its lower-triangular Choleski factor.

Implementation Details
Uses the LAPACK function Xpotrf().

Memory Allocations
Some temporary work storage is needed.

Exceptions
If a (re-)allocation is triggered and fails, a bad_alloc exception will be thrown.

Template Parameters
REALshould be 'float' or 'double'.

◆ chol() [2/3]

template<typename REAL >
void fml::linalg::chol ( gpumat< REAL > &  x)

Compute the Choleski factorization.

The matrix should be 1. square, 2. symmetric, 3. positive-definite. Failure of any of these conditions can lead to a runtime exception. The input is replaced by its lower-triangular Choleski factor.

Parameters
[in,out]xInput data matrix, replaced by its lower-triangular Choleski factor.

Uses the cuSOLVER function cusolverDnXpotrf().

Memory Allocations
Some temporary work storage is needed.

Exceptions
If a (re-)allocation is triggered and fails, a bad_alloc exception will be thrown.

Template Parameters
REALshould be 'float' or 'double'.

◆ chol() [3/3]

template<typename REAL >
void fml::linalg::chol ( mpimat< REAL > &  x)

Compute the Choleski factorization.

The matrix should be 1. square, 2. symmetric, 3. positive-definite. Failure of any of these conditions can lead to a runtime exception. The input is replaced by its lower-triangular Choleski factor.

Parameters
[in,out]xInput data matrix, replaced by its lower-triangular Choleski factor.

Implementation Details
Uses the ScaLAPACK function pXpotrf().

Memory Allocations
Some temporary work storage is needed.

Exceptions
If a (re-)allocation is triggered and fails, a bad_alloc exception will be thrown.

Template Parameters
REALshould be 'float' or 'double'.

◆ cond_1() [1/3]

template<typename REAL >
REAL fml::linalg::cond_1 ( cpumat< REAL > &  x)

Estimates the condition number under the 1-norm.

Parameters
[in]xInput data matrix.
[in,out]xInput data matrix. The data is overwritten.

Implementation Details
Computes L or R (whichever is smaller) and the LAPACK function Xtrcon() if the input is not square, and Xgecon() on the LU of the input otherwise.

Memory Allocations
Allocates temporary storage to compute the QR/LQ/LU, as well as workspace arrays for the LAPACK condition number function.

Exceptions
If an allocation is triggered and fails, a bad_alloc exception will be thrown.

Template Parameters
REALshould be 'float' or 'double'.

◆ cond_1() [2/3]

template<typename REAL >
REAL fml::linalg::cond_1 ( gpumat< REAL > &  x)

Estimates the condition number under the 1-norm.

Parameters
[in]xInput data matrix.
[in,out]xInput data matrix. The data is overwritten.

Implementation Details
Computes L or R (whichever is smaller) if the input is not square, and the inverse otherwise.

Memory Allocations
Allocates temporary storage to compute the QR/LQ/LU, as well as workspace arrays for the LAPACK condition number function.

Exceptions
If an allocation is triggered and fails, a bad_alloc exception will be thrown.

Template Parameters
REALshould be 'float' or 'double'.

◆ cond_1() [3/3]

template<typename REAL >
REAL fml::linalg::cond_1 ( mpimat< REAL > &  x)

Estimates the condition number under the 1-norm.

Parameters
[in]xInput data matrix.
[in,out]xInput data matrix. The data is overwritten.

Implementation Details
Computes L or R (whichever is smaller) and the LAPACK function Xtrcon() if the input is not square, and Xgecon() on the LU of the input otherwise.

Memory Allocations
Allocates temporary storage to compute the QR/LQ/LU, as well as workspace arrays for the LAPACK condition number function.

Exceptions
If an allocation is triggered and fails, a bad_alloc exception will be thrown.

Template Parameters
REALshould be 'float' or 'double'.

◆ cond_2() [1/3]

template<typename REAL >
REAL fml::linalg::cond_2 ( cpumat< REAL > &  x)

Estimates the condition number under the 2 norm.

Parameters
[in]xInput data matrix.
[in,out]xInput data matrix. The data is overwritten.

Implementation Details
Uses linalg::svd().

Memory Allocations
Allocates temporary storage to compute the QR/LQ/LU, as well as workspace arrays for the LAPACK condition number function.

Exceptions
If an allocation is triggered and fails, a bad_alloc exception will be thrown.

Template Parameters
REALshould be 'float' or 'double'.

◆ cond_2() [2/3]

template<typename REAL >
REAL fml::linalg::cond_2 ( gpumat< REAL > &  x)

Estimates the condition number under the 2 norm.

Parameters
[in]xInput data matrix.
[in,out]xInput data matrix. The data is overwritten.

Implementation Details
Uses linalg::cpsvd().

Memory Allocations
Allocates temporary storage to compute the QR/LQ/LU, as well as workspace arrays for the LAPACK condition number function.

Exceptions
If an allocation is triggered and fails, a bad_alloc exception will be thrown.

Template Parameters
REALshould be 'float' or 'double'.

◆ cond_2() [3/3]

template<typename REAL >
REAL fml::linalg::cond_2 ( mpimat< REAL > &  x)

Estimates the condition number under the 2 norm.

Parameters
[in]xInput data matrix.
[in,out]xInput data matrix. The data is overwritten.

Implementation Details
Uses linalg::svd().

Memory Allocations
Allocates temporary storage to compute the QR/LQ/LU, as well as workspace arrays for the LAPACK condition number function.

Exceptions
If an allocation is triggered and fails, a bad_alloc exception will be thrown.

Template Parameters
REALshould be 'float' or 'double'.

◆ cond_I() [1/3]

template<typename REAL >
REAL fml::linalg::cond_I ( cpumat< REAL > &  x)

Estimates the condition number under the infinity norm.

Parameters
[in]xInput data matrix.
[in,out]xInput data matrix. The data is overwritten.

Implementation Details
Computes L or R (whichever is smaller) and the LAPACK function Xtrcon() if the input is not square, and Xgecon() on the LU of the input otherwise.

Memory Allocations
Allocates temporary storage to compute the QR/LQ/LU, as well as workspace arrays for the LAPACK condition number function.

Exceptions
If an allocation is triggered and fails, a bad_alloc exception will be thrown.

Template Parameters
REALshould be 'float' or 'double'.

◆ cond_I() [2/3]

template<typename REAL >
REAL fml::linalg::cond_I ( gpumat< REAL > &  x)

Estimates the condition number under the infinity norm.

Parameters
[in]xInput data matrix.
[in,out]xInput data matrix. The data is overwritten.

Implementation Details
Computes L or R (whichever is smaller) if the input is not square, and the inverse otherwise.

Memory Allocations
Allocates temporary storage to compute the QR/LQ/LU, as well as workspace arrays for the LAPACK condition number function.

Exceptions
If an allocation is triggered and fails, a bad_alloc exception will be thrown.

Template Parameters
REALshould be 'float' or 'double'.

◆ cond_I() [3/3]

template<typename REAL >
REAL fml::linalg::cond_I ( mpimat< REAL > &  x)

Estimates the condition number under the infinity norm.

Parameters
[in]xInput data matrix.
[in,out]xInput data matrix. The data is overwritten.

Implementation Details
Computes L or R (whichever is smaller) and the LAPACK function Xtrcon() if the input is not square, and Xgecon() on the LU of the input otherwise.

Memory Allocations
Allocates temporary storage to compute the QR/LQ/LU, as well as workspace arrays for the LAPACK condition number function.

Exceptions
If an allocation is triggered and fails, a bad_alloc exception will be thrown.

Template Parameters
REALshould be 'float' or 'double'.

◆ cpsvd() [1/5]

template<typename REAL >
void fml::linalg::cpsvd ( const cpumat< REAL > &  x,
cpuvec< REAL > &  s,
cpumat< REAL > &  u,
cpumat< REAL > &  vt 
)

Computes the singular value decomposition using the "crossproducts SVD". This method is not numerically stable.

The operation works by computing the crossproducts matrix X^T * X or X * X^T (whichever is smaller) and then computing the eigenvalue decomposition.

Parameters
[in,out]xInput data matrix.
[out]sVector of singular values.
[out]uMatrix of left singular vectors.
[out]vtMatrix of (transposed) right singular vectors.

Implementation Details
Uses crossprod() or tcrossprod() (whichever is smaller), and eigen_sym().

Memory Allocations
If the any outputs are inappropriately sized, they will automatically be re-allocated. Additionally, some temporary work storage is needed.

Exceptions
If a (re-)allocation is triggered and fails, a bad_alloc exception will be thrown.

Template Parameters
REALshould be 'float' or 'double'.

◆ cpsvd() [2/5]

template<typename REAL >
void fml::linalg::cpsvd ( const gpumat< REAL > &  x,
gpuvec< REAL > &  s,
gpumat< REAL > &  u,
gpumat< REAL > &  vt 
)

Computes the singular value decomposition using the "crossproducts SVD". This method is not numerically stable.

The operation works by computing the crossproducts matrix X^T * X or X * X^T (whichever is smaller) and then computing the eigenvalue decomposition.

Parameters
[in,out]xInput data matrix.
[out]sVector of singular values.
[out]uMatrix of left singular vectors.
[out]vtMatrix of (transposed) right singular vectors.

Implementation Details
Uses crossprod() or tcrossprod() (whichever is smaller), and eigen_sym().

Memory Allocations
If the any outputs are inappropriately sized, they will automatically be re-allocated. Additionally, some temporary work storage is needed.

Exceptions
If a (re-)allocation is triggered and fails, a bad_alloc exception will be thrown.

Template Parameters
REALshould be 'float' or 'double'.

◆ cpsvd() [3/5]

template<typename REAL >
void fml::linalg::cpsvd ( const mpimat< REAL > &  x,
cpuvec< REAL > &  s,
mpimat< REAL > &  u,
mpimat< REAL > &  vt 
)

Computes the singular value decomposition using the "crossproducts SVD". This method is not numerically stable.

The operation works by computing the crossproducts matrix X^T * X or X * X^T (whichever is smaller) and then computing the eigenvalue decomposition.

Parameters
[in,out]xInput data matrix.
[out]sVector of singular values.
[out]uMatrix of left singular vectors.
[out]vtMatrix of (transposed) right singular vectors.

Implementation Details
Uses crossprod() or tcrossprod() (whichever is smaller), and eigen_sym().

Memory Allocations
If the any outputs are inappropriately sized, they will automatically be re-allocated. Additionally, some temporary work storage is needed.

Exceptions
If a (re-)allocation is triggered and fails, a bad_alloc exception will be thrown.

Template Parameters
REALshould be 'float' or 'double'.

◆ cpsvd() [4/5]

template<typename REAL >
void fml::linalg::cpsvd ( const parmat_cpu< REAL > &  x,
cpuvec< REAL > &  s 
)

Computes the singular value decomposition using the "crossproducts SVD". This method is not numerically stable.

The operation works by computing the crossproducts matrix X^T * X and then computing the eigenvalue decomposition.

Parameters
[in,out]xInput data matrix.
[out]sVector of singular values.
[out]uMatrix of left singular vectors.
[out]vtMatrix of (transposed) right singular vectors.

Implementation Details
Uses a crossproduct which requires communication, followed by a local linalg::eigen_sym() call.

Memory Allocations
If the any outputs are inappropriately sized, they will automatically be re-allocated. Additionally, some temporary work storage is needed.

Exceptions
If a (re-)allocation is triggered and fails, a bad_alloc exception will be thrown.

Template Parameters
REALshould be 'float' or 'double'.

◆ cpsvd() [5/5]

template<typename REAL >
void fml::linalg::cpsvd ( const parmat_gpu< REAL > &  x,
gpuvec< REAL > &  s 
)

Computes the singular value decomposition using the "crossproducts SVD". This method is not numerically stable.

The operation works by computing the crossproducts matrix X^T * X and then computing the eigenvalue decomposition.

Parameters
[in,out]xInput data matrix.
[out]sVector of singular values.
[out]uMatrix of left singular vectors.
[out]vtMatrix of (transposed) right singular vectors.

Implementation Details
Uses a crossproduct which requires communication, followed by a local linalg::eigen_sym() call.

Memory Allocations
If the any outputs are inappropriately sized, they will automatically be re-allocated. Additionally, some temporary work storage is needed.

Exceptions
If a (re-)allocation is triggered and fails, a bad_alloc exception will be thrown.

Template Parameters
REALshould be 'float' or 'double'.

◆ crossprod() [1/5]

template<typename REAL >
void fml::linalg::crossprod ( const REAL  alpha,
const cpumat< REAL > &  x,
cpumat< REAL > &  ret 
)

Computes lower triangle of alpha*x^T*x.

Parameters
[in]alphaScalar.
[in]xInput data matrix.
[out]retThe product.

Implementation Details
Uses the BLAS function Xsyrk().

Memory Allocations
If the output dimension is inappropriately sized, it will automatically be re-allocated.

Exceptions
If a reallocation is triggered and fails, a bad_alloc exception will be thrown.

Template Parameters
REALshould be 'float' or 'double'.

◆ crossprod() [2/5]

template<typename REAL >
void fml::linalg::crossprod ( const REAL  alpha,
const gpumat< REAL > &  x,
gpumat< REAL > &  ret 
)

Computes lower triangle of alpha*x^T*x.

Parameters
[in]alphaScalar.
[in]xInput data matrix.
[out]retThe product.

Implementation Details
Uses the cuBLAS function cublasXsyrk().

Memory Allocations
If the output dimension is inappropriately sized, it will automatically be re-allocated.

Exceptions
If a reallocation is triggered and fails, a bad_alloc exception will be thrown.

Template Parameters
REALshould be '__half', 'float', or 'double'.

◆ crossprod() [3/5]

template<typename REAL >
void fml::linalg::crossprod ( const REAL  alpha,
const mpimat< REAL > &  x,
mpimat< REAL > &  ret 
)

Computes lower triangle of alpha*x^T*x.

Parameters
[in]alphaScalar.
[in]xInput data matrix.
[out]retThe product.

Implementation Details
Uses the BLAS function pXsyrk().

Memory Allocations
If the output dimension is inappropriately sized, it will automatically be re-allocated.

Exceptions
If a reallocation is triggered and fails, a bad_alloc exception will be thrown.

Communication Details
The method will communicate across all processes in the BLACS grid.

Template Parameters
REALshould be 'float' or 'double'.

◆ crossprod() [4/5]

template<typename REAL >
void fml::linalg::crossprod ( const REAL  alpha,
const parmat_cpu< REAL > &  x,
cpumat< REAL > &  ret 
)

Computes lower triangle of alpha*x^T*x.

Parameters
[in]alphaScalar.
[in]xInput data matrix.
[out]retThe product.

Implementation Details
Uses the BLAS function Xsyrk().

Memory Allocations
If the output dimension is inappropriately sized, it will automatically be re-allocated.

Exceptions
If a reallocation is triggered and fails, a bad_alloc exception will be thrown.

Template Parameters
REALshould be 'float' or 'double'.

◆ crossprod() [5/5]

template<typename REAL >
void fml::linalg::crossprod ( const REAL  alpha,
const parmat_gpu< REAL > &  x,
gpumat< REAL > &  ret 
)

Computes lower triangle of alpha*x^T*x.

Parameters
[in]alphaScalar.
[in]xInput data matrix.
[out]retThe product.

Implementation Details
Uses the BLAS function Xsyrk().

Memory Allocations
If the output dimension is inappropriately sized, it will automatically be re-allocated.

Exceptions
If a reallocation is triggered and fails, a bad_alloc exception will be thrown.

Template Parameters
REALshould be 'float' or 'double'.

◆ det() [1/3]

template<typename REAL >
void fml::linalg::det ( cpumat< REAL > &  x,
int &  sign,
REAL &  modulus 
)

Computes the determinant in logarithmic form.

The input is replaced by its LU factorization.

Parameters
[in,out]xInput data matrix, replaced by its LU factorization.
[out]signThe sign of the determinant.
[out]modulusLog of the modulus.

Implementation Details
Uses lu().

Memory Allocations
Allocates temporary storage to compute the LU.

Exceptions
If an allocation is triggered and fails, a bad_alloc exception will be thrown.

Template Parameters
REALshould be 'float' or 'double'.

◆ det() [2/3]

template<typename REAL >
void fml::linalg::det ( gpumat< REAL > &  x,
int &  sign,
REAL &  modulus 
)

Computes the determinant in logarithmic form.

The input is replaced by its LU factorization.

Parameters
[in,out]xInput data matrix, replaced by its LU factorization.
[out]signThe sign of the determinant.
[out]modulusLog of the modulus.

Implementation Details
Uses lu().

Memory Allocations
Allocates temporary storage to compute the LU.

Exceptions
If an allocation is triggered and fails, a bad_alloc exception will be thrown.

Template Parameters
REALshould be 'float' or 'double'.

◆ det() [3/3]

template<typename REAL >
void fml::linalg::det ( mpimat< REAL > &  x,
int &  sign,
REAL &  modulus 
)

Computes the determinant in logarithmic form.

The input is replaced by its LU factorization.

Parameters
[in,out]xInput data matrix, replaced by its LU factorization.
[out]signThe sign of the determinant.
[out]modulusLog of the modulus.

Implementation Details
Uses lu().

Memory Allocations
Allocates temporary storage to compute the LU.

Exceptions
If an allocation is triggered and fails, a bad_alloc exception will be thrown.

Template Parameters
REALshould be 'float' or 'double'.

◆ dot() [1/2]

template<typename REAL >
REAL fml::linalg::dot ( const cpuvec< REAL > &  x,
const cpuvec< REAL > &  y 
)

Computes the dot product of two vectors, i.e. the sum of the product of the elements.

NOTE: if the vectors are of different length, the dot product will use only the indices of the smaller-sized vector.

Parameters
[in]x,yVectors.
Returns
The dot product.
Template Parameters
REALshould be 'float' or 'double' ('int' is also ok).

◆ dot() [2/2]

template<typename REAL >
REAL fml::linalg::dot ( const gpuvec< REAL > &  x,
const gpuvec< REAL > &  y 
)

Computes the dot product of two vectors, i.e. the sum of the product of the elements.

NOTE: if the vectors are of different length, the dot product will use only the indices of the smaller-sized vector.

Parameters
[in]x,yVectors.
Returns
The dot product.
Template Parameters
REALshould be 'float' or 'double'.

◆ eigen_sym() [1/3]

template<typename REAL >
void fml::linalg::eigen_sym ( cpumat< REAL > &  x,
cpuvec< REAL > &  values 
)

Compute the eigenvalues and optionally the eigenvectors for a symmetric matrix.

The input data is overwritten.

Parameters
[in,out]xInput data matrix. Should be square.
[out]valuesEigenvalues.
[out]vectorsEigenvectors.

Implementation Details
Uses the LAPACK functions Xsyevr().

Memory Allocations
If any output's dimension is inappropriately sized, it will automatically be re-allocated.

Exceptions
If the matrix is non-square, a runtime_error exception is thrown. If an allocation fails, a bad_alloc exception will be thrown.

Template Parameters
REALshould be 'float' or 'double'.

◆ eigen_sym() [2/3]

template<typename REAL >
void fml::linalg::eigen_sym ( gpumat< REAL > &  x,
gpuvec< REAL > &  values 
)

Compute the eigenvalues and optionally the eigenvectors for a symmetric matrix.

The input data is overwritten.

Parameters
[in,out]xInput data matrix. Should be square.
[out]valuesEigenvalues.
[out]vectorsEigenvectors.

Implementation Details
Uses the cuSOLVER functions cusolverDnXsyevd().

Memory Allocations
If any output's dimension is inappropriately sized, it will automatically be re-allocated.

Exceptions
If the matrix is non-square, a runtime_error exception is thrown. If an allocation fails, a bad_alloc exception will be thrown.

Template Parameters
REALshould be '__half', 'float', or 'double'.

◆ eigen_sym() [3/3]

template<typename REAL >
void fml::linalg::eigen_sym ( mpimat< REAL > &  x,
cpuvec< REAL > &  values 
)

Compute the eigenvalues and optionally the eigenvectors for a symmetric matrix.

The input data is overwritten.

Parameters
[in,out]xInput data matrix. Should be square.
[out]valuesEigenvalues.
[out]vectorsEigenvectors.

Implementation Details
Uses the ScaLAPACK functions pXsyevr().

Memory Allocations
If any output's dimension is inappropriately sized, it will automatically be re-allocated.

Exceptions
If the matrix is non-square, a runtime_error exception is thrown. If an allocation fails, a bad_alloc exception will be thrown.

Communication Details
The method will communicate across all processes in the BLACS grid.

Template Parameters
REALshould be 'float' or 'double'.

◆ invert() [1/3]

template<typename REAL >
void fml::linalg::invert ( cpumat< REAL > &  x)

Compute the matrix inverse.

The input is replaced by its inverse, computed via a PLU.

Parameters
[in,out]xInput data matrix. Should be square.

Implementation Details
Uses the LAPACK functions Xgetrf() (LU) and Xgetri() (solve).

Memory Allocations
LU pivot data is allocated internally.

Exceptions
If the matrix is non-square, a runtime_error exception is thrown. If an allocation fails, a bad_alloc exception will be thrown.

Template Parameters
REALshould be 'float' or 'double'.

◆ invert() [2/3]

template<typename REAL >
void fml::linalg::invert ( gpumat< REAL > &  x)

Compute the matrix inverse.

The input is replaced by its inverse, computed via a PLU.

Parameters
[in,out]xInput data matrix. Should be square.

Implementation Details
Uses the cuSOLVER functions cusolverDnXgetrf() (LU) and cusolverDnXgetrs() (solve).

Memory Allocations
LU pivot data is allocated internally. The inverse is computed in a copy before copying back to the input.

Exceptions
If the matrix is non-square, a runtime_error exception is thrown. If an allocation fails, a bad_alloc exception will be thrown.

Template Parameters
REALshould be '__half', 'float', or 'double'.

◆ invert() [3/3]

template<typename REAL >
void fml::linalg::invert ( mpimat< REAL > &  x)

Compute the matrix inverse.

The input is replaced by its inverse, computed via a PLU.

Parameters
[in,out]xInput data matrix. Should be square.

Implementation Details
Uses the ScaLAPACK functions pXgetrf() (LU) and pXgetri() (solve).

Memory Allocations
LU pivot data is allocated internally.

Exceptions
If the matrix is non-square, a runtime_error exception is thrown. If an allocation fails, a bad_alloc exception will be thrown.

Communication Details
The method will communicate across all processes in the BLACS grid.

Template Parameters
REALshould be 'float' or 'double'.

◆ lq() [1/3]

template<typename REAL >
void fml::linalg::lq ( cpumat< REAL > &  x,
cpuvec< REAL > &  lqaux 
)

Computes the LQ decomposition.

The factorization works mostly in-place by modifying the input data. After execution, the matrix will be the LAPACK-like compact LQ representation.

Parameters
[in,out]xInput data matrix. Values are overwritten.
[out]lqauxAuxiliary data for compact LQ.

Implementation Details
Uses the LAPACK function Xgelqf().

Memory Allocations
If the any outputs are inappropriately sized, they will automatically be re-allocated. Additionally, some temporary work storage is needed.

Exceptions
If a (re-)allocation is triggered and fails, a bad_alloc exception will be thrown.

Template Parameters
REALshould be 'float' or 'double'.

◆ lq() [2/3]

template<typename REAL >
void fml::linalg::lq ( gpumat< REAL > &  x,
gpuvec< REAL > &  lqaux 
)

Computes the LQ decomposition.

The factorization works mostly in-place by modifying the input data. After execution, the matrix will be the LAPACK-like compact LQ representation.

Parameters
[in,out]xInput data matrix. Values are overwritten.
[out]lqauxAuxiliary data for compact LQ.

Implementation Details
NOTE: not directly supported by cuSOLVER (vendor gpulapack + cuda backend). In that case, the matrix is transposed and a QR is performed.

Memory Allocations
If the any outputs are inappropriately sized, they will automatically be re-allocated. Additionally, some temporary work storage is needed.

Exceptions
If a (re-)allocation is triggered and fails, a bad_alloc exception will be thrown.

Template Parameters
REALshould be 'float' or 'double'.

◆ lq() [3/3]

template<typename REAL >
void fml::linalg::lq ( mpimat< REAL > &  x,
cpuvec< REAL > &  lqaux 
)

Computes the LQ decomposition.

The factorization works mostly in-place by modifying the input data. After execution, the matrix will be the LAPACK-like compact LQ representation.

Parameters
[in,out]xInput data matrix. Values are overwritten.
[out]lqauxAuxiliary data for compact LQ.

Implementation Details
Uses the ScaLAPACK function pXgelqf().

Memory Allocations
If the any outputs are inappropriately sized, they will automatically be re-allocated. Additionally, some temporary work storage is needed.

Exceptions
If a (re-)allocation is triggered and fails, a bad_alloc exception will be thrown.

Communication Details
The method will communicate across all processes in the BLACS grid.

Template Parameters
REALshould be 'float' or 'double'.

◆ lq_L() [1/3]

template<typename REAL >
void fml::linalg::lq_L ( const cpumat< REAL > &  LQ,
cpumat< REAL > &  L 
)

Recover the L matrix from an LQ decomposition.

Parameters
[in]LQThe compact LQ factorization, as computed via lq().
[out]LThe L matrix.

Implementation Details
Uses the LAPACK function Xlacpy().

Memory Allocations
If the any outputs are inappropriately sized, they will automatically be re-allocated. Additionally, some temporary work storage is needed.

Exceptions
If a (re-)allocation is triggered and fails, a bad_alloc exception will be thrown.

Template Parameters
REALshould be 'float' or 'double'.

◆ lq_L() [2/3]

template<typename REAL >
void fml::linalg::lq_L ( const gpumat< REAL > &  LQ,
gpumat< REAL > &  L 
)

Recover the L matrix from an LQ decomposition.

Parameters
[in]LQThe compact LQ factorization, as computed via lq().
[out]LThe L matrix.

Implementation Details
NOTE: not directly supported by cuSOLVER (vendor gpulapack + cuda backend). In that case, the matrix is transposed and a QR is performed.

Memory Allocations
If the any outputs are inappropriately sized, they will automatically be re-allocated. Additionally, some temporary work storage is needed.

Exceptions
If a (re-)allocation is triggered and fails, a bad_alloc exception will be thrown.

Template Parameters
REALshould be 'float' or 'double'.

◆ lq_L() [3/3]

template<typename REAL >
void fml::linalg::lq_L ( const mpimat< REAL > &  LQ,
mpimat< REAL > &  L 
)

Recover the L matrix from a LQ decomposition.

Parameters
[in]LQThe compact LQ factorization, as computed via lq().
[out]LThe L matrix.

Implementation Details
Uses the ScaLAPACK function pXlacpy().

Memory Allocations
If the any outputs are inappropriately sized, they will automatically be re-allocated. Additionally, some temporary work storage is needed.

Exceptions
If a (re-)allocation is triggered and fails, a bad_alloc exception will be thrown.

Communication Details
The method will communicate across all processes in the BLACS grid.

Template Parameters
REALshould be 'float' or 'double'.

◆ lq_Q() [1/3]

template<typename REAL >
void fml::linalg::lq_Q ( const cpumat< REAL > &  LQ,
const cpuvec< REAL > &  lqaux,
cpumat< REAL > &  Q,
cpuvec< REAL > &  work 
)

Recover the Q matrix from an LQ decomposition.

Parameters
[in]LQThe compact LQ factorization, as computed via lq().
[in]lqauxAuxiliary data for compact LQ.
[out]QThe Q matrix.
[out]workWorkspace array. Will be resized as necessary.

Implementation Details
Uses the LAPACK function Xorglq().

Memory Allocations
If the any outputs are inappropriately sized, they will automatically be re-allocated. Additionally, some temporary work storage is needed.

Exceptions
If a (re-)allocation is triggered and fails, a bad_alloc exception will be thrown.

Template Parameters
REALshould be 'float' or 'double'.

◆ lq_Q() [2/3]

template<typename REAL >
void fml::linalg::lq_Q ( const gpumat< REAL > &  LQ,
const gpuvec< REAL > &  lqaux,
gpumat< REAL > &  Q,
gpuvec< REAL > &  work 
)

Recover the Q matrix from an LQ decomposition.

Parameters
[in]LQThe compact LQ factorization, as computed via lq().
[in]lqauxAuxiliary data for compact LQ.
[out]QThe Q matrix.
[out]workWorkspace array. Will be resized as necessary.

Implementation Details
NOTE: not directly supported by cuSOLVER (vendor gpulapack + cuda backend). In that case, the matrix is transposed and a QR is performed.

Memory Allocations
If the any outputs are inappropriately sized, they will automatically be re-allocated. Additionally, some temporary work storage is needed.

Exceptions
If a (re-)allocation is triggered and fails, a bad_alloc exception will be thrown.

Template Parameters
REALshould be 'float' or 'double'.

◆ lq_Q() [3/3]

template<typename REAL >
void fml::linalg::lq_Q ( const mpimat< REAL > &  LQ,
const cpuvec< REAL > &  lqaux,
mpimat< REAL > &  Q,
cpuvec< REAL > &  work 
)

Recover the Q matrix from a LQ decomposition.

Parameters
[in]LQThe compact LQ factorization, as computed via lq().
[in]lqauxAuxiliary data for compact LQ.
[out]QThe Q matrix.
[out]workWorkspace array. Will be resized as necessary.

Implementation Details
Uses the ScaLAPACK function pXorglq().

Memory Allocations
If the any outputs are inappropriately sized, they will automatically be re-allocated. Additionally, some temporary work storage is needed.

Exceptions
If a (re-)allocation is triggered and fails, a bad_alloc exception will be thrown.

Communication Details
The method will communicate across all processes in the BLACS grid.

Template Parameters
REALshould be 'float' or 'double'.

◆ lu() [1/3]

template<typename REAL >
void fml::linalg::lu ( cpumat< REAL > &  x,
cpuvec< int > &  p,
int &  info 
)

Computes the PLU factorization with partial pivoting.

The input is replaced by its LU factorization, with L unit-diagonal.

Parameters
[in,out]xInput data matrix, replaced by its LU factorization.
[out]pVector of pivots, representing the diagonal matrix P in the PLU.
[out]infoThe LAPACK return number.

Implementation Details
Uses the LAPACK function Xgetrf().

Memory Allocations
If the pivot vector is inappropriately sized, it will automatically be re-allocated.

Exceptions
If a reallocation is triggered and fails, a bad_alloc exception will be thrown.

Template Parameters
REALshould be 'float' or 'double'.

◆ lu() [2/3]

template<typename REAL >
void fml::linalg::lu ( gpumat< REAL > &  x,
gpuvec< int > &  p,
int &  info 
)

Computes the PLU factorization with partial pivoting.

The input is replaced by its LU factorization, with L unit-diagonal.

Parameters
[in,out]xInput data matrix, replaced by its LU factorization.
[out]pVector of pivots, representing the diagonal matrix P in the PLU.
[out]infoThe LAPACK return number.

Implementation Details
Uses the cuSOLVER function cusolverDnXgetrf().

Memory Allocations
If the pivot vector is inappropriately sized, it will automatically be re-allocated.

Exceptions
If a reallocation is triggered and fails, a bad_alloc exception will be thrown.

Template Parameters
REALshould be '__half', 'float', or 'double'.

◆ lu() [3/3]

template<typename REAL >
void fml::linalg::lu ( mpimat< REAL > &  x,
cpuvec< int > &  p,
int &  info 
)

Computes the PLU factorization with partial pivoting.

The input is replaced by its LU factorization, with L unit-diagonal.

Parameters
[in,out]xInput data matrix, replaced by its LU factorization.
[out]pVector of pivots, representing the diagonal matrix P in the PLU.
[out]infoThe ScaLAPACK return number.

Implementation Details
Uses the ScaLAPACK function pXgetrf().

Memory Allocations
If the pivot vector is inappropriately sized, it will automatically be re-allocated.

Exceptions
If a reallocation is triggered and fails, a bad_alloc exception will be thrown.

Communication Details
The method will communicate across all processes in the BLACS grid.

Template Parameters
REALshould be 'float' or 'double'.

◆ matmult() [1/6]

template<typename REAL >
void fml::linalg::matmult ( const bool  transx,
const bool  transy,
const REAL  alpha,
const cpumat< REAL > &  x,
const cpumat< REAL > &  y,
cpumat< REAL > &  ret 
)

Computes ret = alpha*op(x)*op(y) where op(A) is A or A^T.

Parameters
[in]transxShould x^T be used?
[in]transyShould y^T be used?
[in]alphaScalar.
[in]xLeft multiplicand.
[in]yRight multiplicand.
[out]retThe product.

Exceptions
If x and y are inappropriately sized for a matrix product, the method will throw a 'runtime_error' exception.

Implementation Details
Uses the BLAS function Xgemm().

Template Parameters
REALshould be 'float' or 'double'.

◆ matmult() [2/6]

template<typename REAL >
void fml::linalg::matmult ( const bool  transx,
const bool  transy,
const REAL  alpha,
const gpumat< REAL > &  x,
const gpumat< REAL > &  y,
gpumat< REAL > &  ret 
)

Computes ret = alpha*op(x)*op(y) where op(A) is A or A^T.

Parameters
[in]transxShould x^T be used?
[in]transyShould y^T be used?
[in]alphaScalar.
[in]xLeft multiplicand.
[in]yRight multiplicand.
[out]retThe product.

Exceptions
If x and y are inappropriately sized for a matrix product, the method will throw a 'runtime_error' exception.

Implementation Details
Uses the cuBLAS function cublasXgemm().

Template Parameters
REALshould be '__half', 'float', or 'double'.

◆ matmult() [3/6]

template<typename REAL >
mpimat<REAL> fml::linalg::matmult ( const bool  transx,
const bool  transy,
const REAL  alpha,
const mpimat< REAL > &  x,
const mpimat< REAL > &  y 
)

Returns alpha*op(x)*op(y) where op(A) is A or A^T.

Parameters
[in]transxShould x^T be used?
[in]transyShould y^T be used?
[in]alphaScalar.
[in]xLeft multiplicand.
[in]yRight multiplicand.

Exceptions
If x and y are inappropriately sized for a matrix product, the method will throw a 'runtime_error' exception. If the inputs are distributed on different grids, a runtime_exception is thrown.

Implementation Details
Uses the PBLAS function pXgemm().

Communication Details
The method will communicate across all processes in the BLACS grid.

Template Parameters
REALshould be 'float' or 'double'.

◆ matmult() [4/6]

template<typename REAL >
void fml::linalg::matmult ( const bool  transx,
const bool  transy,
const REAL  alpha,
const mpimat< REAL > &  x,
const mpimat< REAL > &  y,
mpimat< REAL > &  ret 
)

Computes ret = alpha*op(x)*op(y) where op(A) is A or A^T.

Parameters
[in]transxShould x^T be used?
[in]transyShould y^T be used?
[in]alphaScalar.
[in]xLeft multiplicand.
[in]yRight multiplicand.
[out]retThe product.

Exceptions
If x and y are inappropriately sized for a matrix product, the method will throw a 'runtime_error' exception. If the inputs are distributed on different grids, a runtime_exception is thrown.

Implementation Details
Uses the PBLAS function pXgemm().

Communication Details
The method will communicate across all processes in the BLACS grid.

Template Parameters
REALshould be 'float' or 'double'.

◆ matmult() [5/6]

template<typename REAL >
void fml::linalg::matmult ( const parmat_cpu< REAL > &  x,
const cpumat< REAL > &  y,
parmat_cpu< REAL > &  ret 
)

Computes the product of a distributed and a non-distributed matrix whose result is distributed, or the transpose of a distributed matrix with a distributed matrix whose result is non-distributed.

Parameters
[in]alphaScalar.
[in]xLeft multiplicand.
[in]yRight multiplicand.
[out]retThe product.

Exceptions
If x and y are inappropriately sized for a matrix product, the method will throw a 'runtime_error' exception.

Implementation Details
Uses the BLAS function Xgemm().

Template Parameters
REALshould be 'float' or 'double'.

◆ matmult() [6/6]

template<typename REAL >
void fml::linalg::matmult ( const parmat_gpu< REAL > &  x,
const gpumat< REAL > &  y,
parmat_gpu< REAL > &  ret 
)

Computes the product of a distributed and a non-distributed matrix whose result is distributed, or the transpose of a distributed matrix with a distributed matrix whose result is non-distributed.

Parameters
[in]alphaScalar.
[in]xLeft multiplicand.
[in]yRight multiplicand.
[out]retThe product.

Exceptions
If x and y are inappropriately sized for a matrix product, the method will throw a 'runtime_error' exception.

Implementation Details
Uses the BLAS function Xgemm().

Template Parameters
REALshould be 'float' or 'double'.

◆ norm_1() [1/3]

template<typename REAL >
REAL fml::linalg::norm_1 ( const cpumat< REAL > &  x)

Computes the 1 matrix norm, the maximum absolute column sum.

Parameters
[in]xInput data matrix.
Returns
Returns the norm.
Template Parameters
REALshould be 'float' or 'double'.

◆ norm_1() [2/3]

template<typename REAL >
REAL fml::linalg::norm_1 ( const gpumat< REAL > &  x)

Computes the 1 matrix norm, the maximum absolute column sum.

Parameters
[in]xInput data matrix, replaced by its LU factorization.
Returns
Returns the norm.

Memory Allocations
Allocates temporary storage to store the col sums.

Exceptions
If an allocation is triggered and fails, a bad_alloc exception will be thrown.

Template Parameters
REALshould be 'float' or 'double'.

◆ norm_1() [3/3]

template<typename REAL >
REAL fml::linalg::norm_1 ( const mpimat< REAL > &  x)

Computes the 1 matrix norm, the maximum absolute column sum.

Parameters
[in]xInput data matrix, replaced by its LU factorization.
Returns
Returns the norm.

Memory Allocations
Allocates temporary storage to store the col sums.

Exceptions
If an allocation is triggered and fails, a bad_alloc exception will be thrown.

Template Parameters
REALshould be 'float' or 'double'.

◆ norm_2() [1/3]

template<typename REAL >
REAL fml::linalg::norm_2 ( cpumat< REAL > &  x)

Computes the 2/spectral matrix norm.

Returns the largest singular value.

Parameters
[in,out]xInput data matrix. Values are overwritten.
Returns
Returns the norm.

Implementation Details
Uses linalg::svd().

Memory Allocations
Allocates temporary storage to compute the singular values.

Exceptions
If an allocation is triggered and fails, a bad_alloc exception will be thrown.

Template Parameters
REALshould be 'float' or 'double'.

◆ norm_2() [2/3]

template<typename REAL >
REAL fml::linalg::norm_2 ( gpumat< REAL > &  x)

Computes the 2/spectral matrix norm.

Returns the largest singular value.

Parameters
[in,out]xInput data matrix. Values are overwritten.
Returns
Returns the norm.

Implementation Details
Uses linalg::cpsvd().

Memory Allocations
Allocates temporary storage to compute the singular values.

Exceptions
If an allocation is triggered and fails, a bad_alloc exception will be thrown.

Template Parameters
REALshould be 'float' or 'double'.

◆ norm_2() [3/3]

template<typename REAL >
REAL fml::linalg::norm_2 ( mpimat< REAL > &  x)

Computes the 2/spectral matrix norm.

Returns the largest singular value.

Parameters
[in,out]xInput data matrix. Values are overwritten.
Returns
Returns the norm.

Memory Allocations
Allocates temporary storage to compute the singular values.

Exceptions
If an allocation is triggered and fails, a bad_alloc exception will be thrown.

Template Parameters
REALshould be 'float' or 'double'.

◆ norm_F() [1/3]

template<typename REAL >
REAL fml::linalg::norm_F ( const cpumat< REAL > &  x)

Computes the Frobenius/Euclidean matrix norm.

Parameters
[in]xInput data matrix.
Returns
Returns the norm.
Template Parameters
REALshould be 'float' or 'double'.

◆ norm_F() [2/3]

template<typename REAL >
REAL fml::linalg::norm_F ( const gpumat< REAL > &  x)

Computes the Frobenius/Euclidean matrix norm.

Parameters
[in]xInput data matrix, replaced by its LU factorization.
Returns
Returns the norm.
Template Parameters
REALshould be 'float' or 'double'.

◆ norm_F() [3/3]

template<typename REAL >
REAL fml::linalg::norm_F ( const mpimat< REAL > &  x)

Computes the Frobenius/Euclidean matrix norm.

Parameters
[in]xInput data matrix, replaced by its LU factorization.
Returns
Returns the norm.
Template Parameters
REALshould be 'float' or 'double'.

◆ norm_I() [1/3]

template<typename REAL >
REAL fml::linalg::norm_I ( const cpumat< REAL > &  x)

Computes the infinity matrix norm, the maximum absolute row sum.

Parameters
[in]xInput data matrix.
Returns
Returns the norm.

Memory Allocations
Allocates temporary storage to store the row sums.

Exceptions
If an allocation is triggered and fails, a bad_alloc exception will be thrown.

Template Parameters
REALshould be 'float' or 'double'.

◆ norm_I() [2/3]

template<typename REAL >
REAL fml::linalg::norm_I ( const gpumat< REAL > &  x)

Computes the infinity matrix norm, the maximum absolute row sum.

Parameters
[in]xInput data matrix, replaced by its LU factorization.
Returns
Returns the norm.

Memory Allocations
Allocates temporary storage to store the row sums.

Exceptions
If an allocation is triggered and fails, a bad_alloc exception will be thrown.

Template Parameters
REALshould be 'float' or 'double'.

◆ norm_I() [3/3]

template<typename REAL >
REAL fml::linalg::norm_I ( const mpimat< REAL > &  x)

Computes the infinity matrix norm, the maximum absolute row sum.

Parameters
[in]xInput data matrix, replaced by its LU factorization.
Returns
Returns the norm.

Memory Allocations
Allocates temporary storage to store the row sums.

Exceptions
If an allocation is triggered and fails, a bad_alloc exception will be thrown.

Template Parameters
REALshould be 'float' or 'double'.

◆ norm_M() [1/3]

template<typename REAL >
REAL fml::linalg::norm_M ( const cpumat< REAL > &  x)

Computes the maximum modulus matrix norm.

Parameters
[in]xInput data matrix.
Returns
Returns the norm.
Template Parameters
REALshould be 'float' or 'double'.

◆ norm_M() [2/3]

template<typename REAL >
REAL fml::linalg::norm_M ( const gpumat< REAL > &  x)

Computes the maximum modulus matrix norm.

Parameters
[in]xInput data matrix, replaced by its LU factorization.
Returns
Returns the norm.
Template Parameters
REALshould be 'float' or 'double'.

◆ norm_M() [3/3]

template<typename REAL >
REAL fml::linalg::norm_M ( const mpimat< REAL > &  x)

Computes the maximum modulus matrix norm.

Parameters
[in]xInput data matrix, replaced by its LU factorization.
Returns
Returns the norm.
Template Parameters
REALshould be 'float' or 'double'.

◆ qr() [1/3]

template<typename REAL >
void fml::linalg::qr ( const bool  pivot,
cpumat< REAL > &  x,
cpuvec< REAL > &  qraux 
)

Computes the QR decomposition.

The factorization works mostly in-place by modifying the input data. After execution, the matrix will be the LAPACK-like compact QR representation.

Parameters
[in]pivotShould the factorization use column pivoting?
[in,out]xInput data matrix. Values are overwritten.
[out]qrauxAuxiliary data for compact QR.

Implementation Details
Uses the LAPACK function Xgeqp3() if pivoting and Xgeqrf() otherwise.

Memory Allocations
If the any outputs are inappropriately sized, they will automatically be re-allocated. Additionally, some temporary work storage is needed.

Exceptions
If a (re-)allocation is triggered and fails, a bad_alloc exception will be thrown.

Template Parameters
REALshould be 'float' or 'double'.

◆ qr() [2/3]

template<typename REAL >
void fml::linalg::qr ( const bool  pivot,
gpumat< REAL > &  x,
gpuvec< REAL > &  qraux 
)

Computes the QR decomposition.

The factorization works mostly in-place by modifying the input data. After execution, the matrix will be the LAPACK-like compact QR representation.

Parameters
[in]pivotNOTE Pivoting does not yet work on GPU. Should the factorization use column pivoting?
[in,out]xInput data matrix. Values are overwritten.
[out]qrauxAuxiliary data for compact QR.

Implementation Details
Uses the cuSOLVER function cusolverDnXgeqrf().

Memory Allocations
If the any outputs are inappropriately sized, they will automatically be re-allocated. Additionally, some temporary work storage is needed.

Exceptions
If a (re-)allocation is triggered and fails, a bad_alloc exception will be thrown.

Template Parameters
REALshould be 'float' or 'double'.

◆ qr() [3/3]

template<typename REAL >
void fml::linalg::qr ( const bool  pivot,
mpimat< REAL > &  x,
cpuvec< REAL > &  qraux 
)

Computes the QR decomposition.

The factorization works mostly in-place by modifying the input data. After execution, the matrix will be the LAPACK-like compact QR representation.

Parameters
[in]pivotShould the factorization use column pivoting?
[in,out]xInput data matrix. Values are overwritten.
[out]qrauxAuxiliary data for compact QR.

Implementation Details
Uses the ScaLAPACK function pXgeqpf() if pivoting and pXgeqrf() otherwise.

Memory Allocations
If the any outputs are inappropriately sized, they will automatically be re-allocated. Additionally, some temporary work storage is needed.

Exceptions
If a (re-)allocation is triggered and fails, a bad_alloc exception will be thrown.

Communication Details
The method will communicate across all processes in the BLACS grid.

Template Parameters
REALshould be 'float' or 'double'.

◆ qr_Q() [1/3]

template<typename REAL >
void fml::linalg::qr_Q ( const cpumat< REAL > &  QR,
const cpuvec< REAL > &  qraux,
cpumat< REAL > &  Q,
cpuvec< REAL > &  work 
)

Recover the Q matrix from a QR decomposition.

Parameters
[in]QRThe compact QR factorization, as computed via qr().
[in]qrauxAuxiliary data for compact QR.
[out]QThe Q matrix.
[out]workWorkspace array. Will be resized as necessary.

Implementation Details
Uses the LAPACK function Xorgqr().

Memory Allocations
If the any outputs are inappropriately sized, they will automatically be re-allocated. Additionally, some temporary work storage is needed.

Exceptions
If a (re-)allocation is triggered and fails, a bad_alloc exception will be thrown.

Template Parameters
REALshould be 'float' or 'double'.

◆ qr_Q() [2/3]

template<typename REAL >
void fml::linalg::qr_Q ( const gpumat< REAL > &  QR,
const gpuvec< REAL > &  qraux,
gpumat< REAL > &  Q,
gpuvec< REAL > &  work 
)

Recover the Q matrix from a QR decomposition.

Parameters
[in]QRThe compact QR factorization, as computed via qr().
[in]qrauxAuxiliary data for compact QR.
[out]QThe Q matrix.
[out]workWorkspace array. Will be resized as necessary.

Implementation Details
Uses the cuSOLVER function cusolverDnXorgqr().

Memory Allocations
If the any outputs are inappropriately sized, they will automatically be re-allocated. Additionally, some temporary work storage is needed.

Exceptions
If a (re-)allocation is triggered and fails, a bad_alloc exception will be thrown.

Template Parameters
REALshould be 'float' or 'double'.

◆ qr_Q() [3/3]

template<typename REAL >
void fml::linalg::qr_Q ( const mpimat< REAL > &  QR,
const cpuvec< REAL > &  qraux,
mpimat< REAL > &  Q,
cpuvec< REAL > &  work 
)

Recover the Q matrix from a QR decomposition.

Parameters
[in]QRThe compact QR factorization, as computed via qr().
[in]qrauxAuxiliary data for compact QR.
[out]QThe Q matrix.
[out]workWorkspace array. Will be resized as necessary.

Implementation Details
Uses the ScaLAPACK function pXormgr().

Memory Allocations
If the any outputs are inappropriately sized, they will automatically be re-allocated. Additionally, some temporary work storage is needed.

Exceptions
If a (re-)allocation is triggered and fails, a bad_alloc exception will be thrown.

Communication Details
The method will communicate across all processes in the BLACS grid.

Template Parameters
REALshould be 'float' or 'double'.

◆ qr_R() [1/3]

template<typename REAL >
void fml::linalg::qr_R ( const cpumat< REAL > &  QR,
cpumat< REAL > &  R 
)

Recover the R matrix from a QR decomposition.

Parameters
[in]QRThe compact QR factorization, as computed via qr().
[out]RThe R matrix.

Implementation Details
Uses the LAPACK function Xlacpy().

Memory Allocations
If the any outputs are inappropriately sized, they will automatically be re-allocated. Additionally, some temporary work storage is needed.

Exceptions
If a (re-)allocation is triggered and fails, a bad_alloc exception will be thrown.

Template Parameters
REALshould be 'float' or 'double'.

◆ qr_R() [2/3]

template<typename REAL >
void fml::linalg::qr_R ( const gpumat< REAL > &  QR,
gpumat< REAL > &  R 
)

Recover the R matrix from a QR decomposition.

Parameters
[in]QRThe compact QR factorization, as computed via qr().
[out]RThe R matrix.

Implementation Details
Uses a custom LAPACK-like lacpy() clone.

Memory Allocations
If the any outputs are inappropriately sized, they will automatically be re-allocated. Additionally, some temporary work storage is needed.

Exceptions
If a (re-)allocation is triggered and fails, a bad_alloc exception will be thrown.

Template Parameters
REALshould be 'float' or 'double'.

◆ qr_R() [3/3]

template<typename REAL >
void fml::linalg::qr_R ( const mpimat< REAL > &  QR,
mpimat< REAL > &  R 
)

Recover the R matrix from a QR decomposition.

Parameters
[in]QRThe compact QR factorization, as computed via qr().
[out]RThe R matrix.

Implementation Details
Uses the ScaLAPACK function pXlacpy().

Memory Allocations
If the any outputs are inappropriately sized, they will automatically be re-allocated. Additionally, some temporary work storage is needed.

Exceptions
If a (re-)allocation is triggered and fails, a bad_alloc exception will be thrown.

Communication Details
The method will communicate across all processes in the BLACS grid.

Template Parameters
REALshould be 'float' or 'double'.

◆ qrsvd() [1/3]

template<typename REAL >
void fml::linalg::qrsvd ( cpumat< REAL > &  x,
cpuvec< REAL > &  s,
cpumat< REAL > &  u,
cpumat< REAL > &  vt 
)

Computes the singular value decomposition by first reducing the rectangular matrix to a square matrix using an orthogonal factorization. If the matrix is square, we skip the orthogonal factorization.

If the matrix has more rows than columns, the operation works by computing a QR and then taking the SVD of the R matrix. The left singular vectors are Q times the left singular vectors from R's SVD, and the singular value and the right singular vectors are those from R's SVD. Likewise, if the matrix has more columns than rows, w take the LQ and then the SVD of the L matrix. The left singular vectors are Q times the right singular vectors from L's SVD, and the singular value and the left singular vectors are those from L's SVD.

Parameters
[in,out]xInput data matrix. Values are overwritten.
[out]sVector of singular values.
[out]uMatrix of left singular vectors.
[out]vtMatrix of (transposed) right singular vectors.

Implementation Details
Uses linalg::qr() or linalg::lq() (whichever is cheaper) to reduce the matrix to a square matrix, and then calls linalg::svd(). If computing the vectors, linalg::qr_Q() and linalg::qr_R() or linalg::lq_L() and linalg::lq_Q() are called.

Memory Allocations
If the any outputs are inappropriately sized, they will automatically be re-allocated. Additionally, some temporary work storage is needed.

Exceptions
If a (re-)allocation is triggered and fails, a bad_alloc exception will be thrown.

Template Parameters
REALshould be 'float' or 'double'.

◆ qrsvd() [2/3]

template<typename REAL >
void fml::linalg::qrsvd ( gpumat< REAL > &  x,
gpuvec< REAL > &  s,
gpumat< REAL > &  u,
gpumat< REAL > &  vt 
)

Computes the singular value decomposition by first reducing the rectangular matrix to a square matrix using an orthogonal factorization. If the matrix is square, we skip the orthogonal factorization.

If the matrix has more rows than columns, the operation works by computing a QR and then taking the SVD of the R matrix. The left singular vectors are Q times the left singular vectors from R's SVD, and the singular value and the right singular vectors are those from R's SVD. Likewise, if the matrix has more columns than rows, w take the LQ and then the SVD of the L matrix. The left singular vectors are Q times the right singular vectors from L's SVD, and the singular value and the left singular vectors are those from L's SVD.

Parameters
[in,out]xInput data matrix. Values are overwritten.
[out]sVector of singular values.
[out]uMatrix of left singular vectors.
[out]vtMatrix of (transposed) right singular vectors.

Implementation Details
Uses linalg::qr() to reduce the matrix to a square matrix, and then calls linalg::svd(). If computing the vectors, linalg::qr_Q() and linalg::qr_R(). Since cuSOLVER only offers QR, if m<n we operate on a transpose.

Memory Allocations
If the any outputs are inappropriately sized, they will automatically be re-allocated. Additionally, some temporary work storage is needed.

Exceptions
If a (re-)allocation is triggered and fails, a bad_alloc exception will be thrown.

Template Parameters
REALshould be 'float' or 'double'.

◆ qrsvd() [3/3]

template<typename REAL >
void fml::linalg::qrsvd ( mpimat< REAL > &  x,
cpuvec< REAL > &  s,
mpimat< REAL > &  u,
mpimat< REAL > &  vt 
)

Computes the singular value decomposition by first reducing the rectangular matrix to a square matrix using an orthogonal factorization. If the matrix is square, we skip the orthogonal factorization.

If the matrix has more rows than columns, the operation works by computing a QR and then taking the SVD of the R matrix. The left singular vectors are Q times the left singular vectors from R's SVD, and the singular value and the right singular vectors are those from R's SVD. Likewise, if the matrix has more columns than rows, w take the LQ and then the SVD of the L matrix. The left singular vectors are Q times the right singular vectors from L's SVD, and the singular value and the left singular vectors are those from L's SVD.

Parameters
[in,out]xInput data matrix. Values are overwritten.
[out]sVector of singular values.
[out]uMatrix of left singular vectors.
[out]vtMatrix of (transposed) right singular vectors.

Implementation Details
Uses linalg::qr() or linalg::lq() (whichever is cheaper) to reduce the matrix to a square matrix, and then calls linalg::svd(). If computing the vectors, linalg::qr_Q() and linalg::qr_R() or linalg::lq_L() and linalg::lq_Q() are called.

Memory Allocations
If the any outputs are inappropriately sized, they will automatically be re-allocated. Additionally, some temporary work storage is needed.

Exceptions
If a (re-)allocation is triggered and fails, a bad_alloc exception will be thrown.

Template Parameters
REALshould be 'float' or 'double'.

◆ rsvd() [1/5]

template<typename REAL >
void fml::linalg::rsvd ( const uint32_t  seed,
const int  k,
const int  q,
cpumat< REAL > &  x,
cpuvec< REAL > &  s 
)

Computes the truncated singular value decomposition using the normal projections method of Halko et al. This method is only an approximation.

Parameters
[in,out]xInput data matrix.
[out]sVector of singular values.
[out]uMatrix of left singular vectors.
[out]vtMatrix of (transposed) right singular vectors.

Implementation Details
Uses a series of QR's and matrix multiplications.

Memory Allocations
If the any outputs are inappropriately sized, they will automatically be re-allocated. Additionally, some temporary work storage is needed.

Exceptions
If a (re-)allocation is triggered and fails, a bad_alloc exception will be thrown.

Template Parameters
REALshould be 'float' or 'double'.

◆ rsvd() [2/5]

template<typename REAL >
void fml::linalg::rsvd ( const uint32_t  seed,
const int  k,
const int  q,
gpumat< REAL > &  x,
gpuvec< REAL > &  s 
)

Computes the truncated singular value decomposition using the normal projections method of Halko et al. This method is only an approximation.

Parameters
[in,out]xInput data matrix.
[out]sVector of singular values.
[out]uMatrix of left singular vectors.
[out]vtMatrix of (transposed) right singular vectors.

Implementation Details
Uses a series of QR's and matrix multiplications.

Memory Allocations
If the any outputs are inappropriately sized, they will automatically be re-allocated. Additionally, some temporary work storage is needed.

Exceptions
If a (re-)allocation is triggered and fails, a bad_alloc exception will be thrown.

Template Parameters
REALshould be 'float' or 'double'.

◆ rsvd() [3/5]

template<typename REAL >
void fml::linalg::rsvd ( const uint32_t  seed,
const int  k,
const int  q,
mpimat< REAL > &  x,
cpuvec< REAL > &  s 
)

Computes the truncated singular value decomposition using the normal projections method of Halko et al. This method is only an approximation.

Parameters
[in,out]xInput data matrix.
[out]sVector of singular values.
[out]uMatrix of left singular vectors.
[out]vtMatrix of (transposed) right singular vectors.

Implementation Details
Uses a series of QR's and matrix multiplications.

Memory Allocations
If the any outputs are inappropriately sized, they will automatically be re-allocated. Additionally, some temporary work storage is needed.

Exceptions
If a (re-)allocation is triggered and fails, a bad_alloc exception will be thrown.

Template Parameters
REALshould be 'float' or 'double'.

◆ rsvd() [4/5]

template<typename REAL >
void fml::linalg::rsvd ( const uint32_t  seed,
const int  k,
const int  q,
parmat_cpu< REAL > &  x,
cpuvec< REAL > &  s 
)

Computes the truncated singular value decomposition using the normal projections method of Halko et al. This method is only an approximation.

Parameters
[in,out]xInput data matrix.
[out]sVector of singular values.
[out]uMatrix of left singular vectors.
[out]vtMatrix of (transposed) right singular vectors.

Implementation Details
Uses a series of QR's using the TSQR implementation, and matrix multiplications.

Memory Allocations
If the any outputs are inappropriately sized, they will automatically be re-allocated. Additionally, some temporary work storage is needed.

Exceptions
If a (re-)allocation is triggered and fails, a bad_alloc exception will be thrown.

Template Parameters
REALshould be 'float' or 'double'.

◆ rsvd() [5/5]

template<typename REAL >
void fml::linalg::rsvd ( const uint32_t  seed,
const int  k,
const int  q,
parmat_gpu< REAL > &  x,
gpuvec< REAL > &  s 
)

Computes the truncated singular value decomposition using the normal projections method of Halko et al. This method is only an approximation.

Parameters
[in,out]xInput data matrix.
[out]sVector of singular values.
[out]uMatrix of left singular vectors.
[out]vtMatrix of (transposed) right singular vectors.

Implementation Details
Uses a series of QR's using the TSQR implementation, and matrix multiplications.

Memory Allocations
If the any outputs are inappropriately sized, they will automatically be re-allocated. Additionally, some temporary work storage is needed.

Exceptions
If a (re-)allocation is triggered and fails, a bad_alloc exception will be thrown.

Template Parameters
REALshould be 'float' or 'double'.

◆ solve() [1/3]

template<typename REAL >
void fml::linalg::solve ( cpumat< REAL > &  x,
cpuvec< REAL > &  y 
)

Solve a system of equations.

The input is replaced by its LU factorization.

Parameters
[in,out]xInput LHS. Should be square. Overwritten by LU.
[in,out]yInput RHS. Overwritten by solution.

Implementation Details
Uses the LAPACK functions Xgesv().

Memory Allocations
LU pivot data is allocated internally.

Exceptions
If the matrix is non-square or if the RHS is incompatible with the LHS, a runtime_error exception is thrown. If an allocation fails, a bad_alloc exception will be thrown.

Template Parameters
REALshould be 'float' or 'double'.

◆ solve() [2/3]

template<typename REAL >
void fml::linalg::solve ( gpumat< REAL > &  x,
gpuvec< REAL > &  y 
)

Solve a system of equations.

The input is replaced by its PLU factorization.

Parameters
[in,out]xInput LHS. Should be square. Overwritten by LU.
[in,out]yInput RHS. Overwritten by solution.

Implementation Details
Uses the cuSOLVER functions cusolverDnXgetrf() (LU) and cusolverDnXgetrs() (solve).

Memory Allocations
LU pivot data is allocated internally.

Exceptions
If the matrix is non-square or if the RHS is incompatible with the LHS, a runtime_error exception is thrown. If an allocation fails, a bad_alloc exception will be thrown.

Template Parameters
REALshould be '__half', 'float', or 'double'.

◆ solve() [3/3]

template<typename REAL >
void fml::linalg::solve ( mpimat< REAL > &  x,
mpimat< REAL > &  y 
)

Solve a system of equations.

The input is replaced by its LU factorization.

Parameters
[in,out]xInput LHS. Should be square. Overwritten by LU.
[in,out]yInput RHS. Overwritten by solution.

Implementation Details
Uses the ScaLAPACK functions pXgesv().

Memory Allocations
LU pivot data is allocated internally.

Exceptions
If the matrix is non-square or if the RHS is incompatible with the LHS, a runtime_error exception is thrown. If the inputs are distributed on different grids, a runtime_exception is thrown. If an allocation fails, a bad_alloc exception will be thrown.

Communication Details
The method will communicate across all processes in the BLACS grid.

Template Parameters
REALshould be 'float' or 'double'.

◆ svd() [1/2]

template<typename REAL >
void fml::linalg::svd ( cpumat< REAL > &  x,
cpuvec< REAL > &  s 
)

Computes the singular value decomposition.

Parameters
[in,out]xInput data matrix. Values are overwritten.
[out]sVector of singular values.
[out]uMatrix of left singular vectors.
[out]vtMatrix of (transposed) right singular vectors.

Implementation Details
Uses the LAPACK function Xgesvd().

Memory Allocations
If the any outputs are inappropriately sized, they will automatically be re-allocated. Additionally, some temporary work storage is needed.

Exceptions
If a (re-)allocation is triggered and fails, a bad_alloc exception will be thrown.

Template Parameters
REALshould be 'float' or 'double'.

◆ svd() [2/2]

template<typename REAL >
void fml::linalg::svd ( gpumat< REAL > &  x,
gpuvec< REAL > &  s 
)

Computes the singular value decomposition.

Parameters
[in,out]xInput data matrix. Values are overwritten.
[out]sVector of singular values.
[out]uMatrix of left singular vectors.
[out]vtMatrix of (transposed) right singnular vectors.

Implementation Details
Uses the cuSOLVER function cusolverDnXgesvd(). Since cuSOLVER only supports the m>=n case, if m<n we operate on a transpose.

Memory Allocations
If the any outputs are inappropriately sized, they will automatically be re-allocated. Additionally, some temporary work storage is needed.

Exceptions
If a (re-)allocation is triggered and fails, a bad_alloc exception will be thrown.

Template Parameters
REALshould be '__half', 'float', or 'double'.

◆ tcrossprod() [1/3]

template<typename REAL >
void fml::linalg::tcrossprod ( const REAL  alpha,
const cpumat< REAL > &  x,
cpumat< REAL > &  ret 
)

Computes lower triangle of alpha*x*x^T.

Parameters
[in]alphaScalar.
[in]xInput data matrix.
[out]retThe product.

Implementation Details
Uses the BLAS function Xsyrk().

Memory Allocations
If the output dimension is inappropriately sized, it will automatically be re-allocated.

Exceptions
If a reallocation is triggered and fails, a bad_alloc exception will be thrown.

Template Parameters
REALshould be 'float' or 'double'.

◆ tcrossprod() [2/3]

template<typename REAL >
void fml::linalg::tcrossprod ( const REAL  alpha,
const gpumat< REAL > &  x,
gpumat< REAL > &  ret 
)

Computes lower triangle of alpha*x*x^T.

Parameters
[in]alphaScalar.
[in]xInput data matrix.
[out]retThe product.

Implementation Details
Uses the cuBLAS function cublasXsyrk().

Memory Allocations
If the output dimension is inappropriately sized, it will automatically be re-allocated.

Exceptions
If a reallocation is triggered and fails, a bad_alloc exception will be thrown.

Template Parameters
REALshould be '__half', 'float', or 'double'.

◆ tcrossprod() [3/3]

template<typename REAL >
void fml::linalg::tcrossprod ( const REAL  alpha,
const mpimat< REAL > &  x,
mpimat< REAL > &  ret 
)

Computes lower triangle of alpha*x*x^T.

Parameters
[in]alphaScalar.
[in]xInput data matrix.
[out]retThe product.

Implementation Details
Uses the PBLAS function pXsyrk().

Memory Allocations
If the output dimension is inappropriately sized, it will automatically be re-allocated.

Exceptions
If a reallocation is triggered and fails, a bad_alloc exception will be thrown.

Communication Details
The method will communicate across all processes in the BLACS grid.

Template Parameters
REALshould be 'float' or 'double'.

◆ trace() [1/3]

template<typename REAL >
REAL fml::linalg::trace ( const cpumat< REAL > &  x)

Computes the trace, i.e. the sum of the diagonal.

Parameters
[in]xInput data matrix.
Template Parameters
REALshould be 'float' or 'double'.

◆ trace() [2/3]

template<typename REAL >
REAL fml::linalg::trace ( const gpumat< REAL > &  x)

Computes the trace, i.e. the sum of the diagonal.

Parameters
[in]xInput data matrix.
Template Parameters
REALshould be '__half', 'float', or 'double'.

◆ trace() [3/3]

template<typename REAL >
REAL fml::linalg::trace ( const mpimat< REAL > &  x)

Computes the trace, i.e. the sum of the diagonal.

Parameters
[in]xInput data matrix.

Communication Details
The method will communicate across all processes in the BLACS grid.

Template Parameters
REALshould be 'float' or 'double'.

◆ trinv() [1/3]

template<typename REAL >
void fml::linalg::trinv ( const bool  upper,
const bool  unit_diag,
cpumat< REAL > &  x 
)

Compute the matrix inverse of a triangular matrix.

The input is replaced by its inverse.

Parameters
[in]upperShould the upper triangle be used? Otherwise the lower triangle will be used.
[in]unit_diagIs the input matrix unit diagonal?
[in,out]xInput data matrix. Should be square.

Implementation Details
Uses the LAPACK functions Xtrtri().

Exceptions
If the matrix is non-square, a runtime_error exception is thrown.

Template Parameters
REALshould be 'float' or 'double'.

◆ trinv() [2/3]

template<typename REAL >
void fml::linalg::trinv ( const bool  upper,
const bool  unit_diag,
gpumat< REAL > &  x 
)

Compute the matrix inverse of a triangular matrix.

The input is replaced by its inverse.

Parameters
[in]upperShould the upper triangle be used? Otherwise the lower triangle will be used.
[in]unit_diagIs the input matrix unit diagonal?
[in,out]xInput data matrix. Should be square.

Implementation Details
Uses the cuBLAS functions cublasXtrsm().

Memory Allocations
The inverse is computed in a copy.

Exceptions
If the matrix is non-square, a runtime_error exception is thrown. If an allocation fails, a bad_alloc exception will be thrown.

Template Parameters
REALshould be 'float' or 'double'.

◆ trinv() [3/3]

template<typename REAL >
void fml::linalg::trinv ( const bool  upper,
const bool  unit_diag,
mpimat< REAL > &  x 
)

Compute the matrix inverse of a triangular matrix.

The input is replaced by its inverse.

Parameters
[in]upperShould the upper triangle be used? Otherwise the lower triangle will be used.
[in]unit_diagIs the input matrix unit diagonal?
[in,out]xInput data matrix. Should be square.

Implementation Details
Uses the LAPACK functions Xtrtri().

Exceptions
If the matrix is non-square, a runtime_error exception is thrown.

Template Parameters
REALshould be 'float' or 'double'.

◆ tssvd() [1/2]

template<typename REAL >
void fml::linalg::tssvd ( parmat_cpu< REAL > &  x,
cpuvec< REAL > &  s 
)

Computes the singular value decomposition by first reducing the rectangular matrix to a square matrix using an orthogonal factorization. If the matrix is square, we skip the orthogonal factorization.

If the matrix has more rows than columns, the operation works by computing a QR and then taking the SVD of the R matrix. The left singular vectors are Q times the left singular vectors from R's SVD, and the singular value and the right singular vectors are those from R's SVD. Likewise, if the matrix has more columns than rows, w take the LQ and then the SVD of the L matrix. The left singular vectors are Q times the right singular vectors from L's SVD, and the singular value and the left singular vectors are those from L's SVD.

Parameters
[in,out]xInput data matrix. Values are overwritten.
[out]sVector of singular values.
[out]uMatrix of left singular vectors.
[out]vtMatrix of (transposed) right singular vectors.

Implementation Details
Uses the TSQR implementation to reduce the matrix to a square matrix, and then calls linalg::svd(). If computing the vectors, linalg::trinv() and a series of matrix multiplications are used.

Memory Allocations
If the any outputs are inappropriately sized, they will automatically be re-allocated. Additionally, some temporary work storage is needed.

Exceptions
If a (re-)allocation is triggered and fails, a bad_alloc exception will be thrown.

Template Parameters
REALshould be 'float' or 'double'.

◆ tssvd() [2/2]

template<typename REAL >
void fml::linalg::tssvd ( parmat_gpu< REAL > &  x,
gpuvec< REAL > &  s 
)

Computes the singular value decomposition by first reducing the rectangular matrix to a square matrix using an orthogonal factorization. If the matrix is square, we skip the orthogonal factorization.

If the matrix has more rows than columns, the operation works by computing a QR and then taking the SVD of the R matrix. The left singular vectors are Q times the left singular vectors from R's SVD, and the singular value and the right singular vectors are those from R's SVD. Likewise, if the matrix has more columns than rows, w take the LQ and then the SVD of the L matrix. The left singular vectors are Q times the right singular vectors from L's SVD, and the singular value and the left singular vectors are those from L's SVD.

Parameters
[in,out]xInput data matrix. Values are overwritten.
[out]sVector of singular values.
[out]uMatrix of left singular vectors.
[out]vtMatrix of (transposed) right singular vectors.

Implementation Details
Uses the TSQR implementation to reduce the matrix to a square matrix, and then calls linalg::svd(). If computing the vectors, linalg::trinv() and a series of matrix multiplications are used.

Memory Allocations
If the any outputs are inappropriately sized, they will automatically be re-allocated. Additionally, some temporary work storage is needed.

Exceptions
If a (re-)allocation is triggered and fails, a bad_alloc exception will be thrown.

Template Parameters
REALshould be 'float' or 'double'.

◆ xpose() [1/3]

template<typename REAL >
void fml::linalg::xpose ( const cpumat< REAL > &  x,
cpumat< REAL > &  tx 
)

Computes the transpose out-of-place (i.e. in a copy).

Parameters
[in]xInput data matrix.
[out]txThe transpose.

Memory Allocations
If the output dimension is inappropriately sized, it will automatically be re-allocated.

Exceptions
If a reallocation is triggered and fails, a bad_alloc exception will be thrown.

Template Parameters
REALshould be 'float' or 'double'.

◆ xpose() [2/3]

template<typename REAL >
void fml::linalg::xpose ( const gpumat< REAL > &  x,
gpumat< REAL > &  tx 
)

Computes the transpose out-of-place (i.e. in a copy).

Parameters
[in]xInput data matrix.
[out]txThe transpose.

Implementation Details
Uses the cuBLAS function cublasXgeam().

Memory Allocations
If the output dimension is inappropriately sized, it will automatically be re-allocated.

Exceptions
If a reallocation is triggered and fails, a bad_alloc exception will be thrown.

Template Parameters
REALshould be '__half', 'float', or 'double'.

◆ xpose() [3/3]

template<typename REAL >
void fml::linalg::xpose ( const mpimat< REAL > &  x,
mpimat< REAL > &  tx 
)

Computes the transpose out-of-place (i.e. in a copy).

Parameters
[in]xInput data matrix.
[out]txThe transpose.

Implementation Details
Uses the PBLAS function pXtran().

Memory Allocations
If the output dimension is inappropriately sized, it will automatically be re-allocated.

Exceptions
If a reallocation is triggered and fails, a bad_alloc exception will be thrown.

Communication Details
The method will communicate across all processes in the BLACS grid.

Template Parameters
REALshould be 'float' or 'double'.