Linear algebra functions. More...

Functions
template<typename REAL >
void	add (const bool transx, const bool transy, const REAL alpha, const REAL beta, const cpumat< REAL > &x, const cpumat< REAL > &y, cpumat< REAL > &ret)
	Returns alphaop(x) + betaop(y) where op(A) is A or A^T. More...

template<typename REAL >
cpumat< REAL >	add (const bool transx, const bool transy, const REAL alpha, const REAL beta, const cpumat< REAL > &x, const cpumat< REAL > &y)
	This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.

template<typename REAL >
void	chol (cpumat< REAL > &x)
	Compute the Choleski factorization. More...

template<typename REAL >
void	crossprod (const REAL alpha, const cpumat< REAL > &x, cpumat< REAL > &ret)
	Computes lower triangle of alphax^Tx. More...

template<typename REAL >
cpumat< REAL >	crossprod (const REAL alpha, const cpumat< REAL > &x)
	This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.

template<typename REAL >
void	tcrossprod (const REAL alpha, const cpumat< REAL > &x, cpumat< REAL > &ret)
	Computes lower triangle of alphaxx^T. More...

template<typename REAL >
cpumat< REAL >	tcrossprod (const REAL alpha, const cpumat< REAL > &x)

template<typename REAL >
void	det (cpumat< REAL > &x, int &sign, REAL &modulus)
	Computes the determinant in logarithmic form. More...

template<typename REAL >
REAL	dot (const cpuvec< REAL > &x, const cpuvec< REAL > &y)
	Computes the dot product of two vectors, i.e. the sum of the product of the elements. More...

template<typename REAL >
REAL	dot (const cpuvec< REAL > &x)
	This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.

template<typename REAL >
void	eigen_sym (cpumat< REAL > &x, cpuvec< REAL > &values)
	Compute the eigenvalues and optionally the eigenvectors for a symmetric matrix. More...

template<typename REAL >
void	eigen_sym (cpumat< REAL > &x, cpuvec< REAL > &values, cpumat< REAL > &vectors)
	This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.

template<typename REAL >
void	invert (cpumat< REAL > &x)
	Compute the matrix inverse. More...

template<typename REAL >
void	trinv (const bool upper, const bool unit_diag, cpumat< REAL > &x)
	Compute the matrix inverse of a triangular matrix. More...

template<typename REAL >
void	lu (cpumat< REAL > &x, cpuvec< int > &p, int &info)
	Computes the PLU factorization with partial pivoting. More...

template<typename REAL >
void	lu (cpumat< REAL > &x)
	This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.

template<typename REAL >
void	matmult (const bool transx, const bool transy, const REAL alpha, const cpumat< REAL > &x, const cpumat< REAL > &y, cpumat< REAL > &ret)
	Computes ret = alphaop(x)op(y) where op(A) is A or A^T. More...

template<typename REAL >
cpumat< REAL >	matmult (const bool transx, const bool transy, const REAL alpha, const cpumat< REAL > &x, const cpumat< REAL > &y)
	This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.

template<typename REAL >
void	matmult (const bool transx, const bool transy, const REAL alpha, const cpumat< REAL > &x, const cpuvec< REAL > &y, cpuvec< REAL > &ret)
	This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.

template<typename REAL >
cpuvec< REAL >	matmult (const bool transx, const bool transy, const REAL alpha, const cpumat< REAL > &x, const cpuvec< REAL > &y)
	This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.

template<typename REAL >
void	matmult (const bool transx, const bool transy, const REAL alpha, const cpuvec< REAL > &x, const cpumat< REAL > &y, cpuvec< REAL > &ret)
	This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.

template<typename REAL >
cpuvec< REAL >	matmult (const bool transx, const bool transy, const REAL alpha, const cpuvec< REAL > &x, const cpumat< REAL > &y)
	This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.

template<typename REAL >
REAL	norm_1 (const cpumat< REAL > &x)
	Computes the 1 matrix norm, the maximum absolute column sum. More...

template<typename REAL >
REAL	norm_I (const cpumat< REAL > &x)
	Computes the infinity matrix norm, the maximum absolute row sum. More...

template<typename REAL >
REAL	norm_F (const cpumat< REAL > &x)
	Computes the Frobenius/Euclidean matrix norm. More...

template<typename REAL >
REAL	norm_M (const cpumat< REAL > &x)
	Computes the maximum modulus matrix norm. More...

template<typename REAL >
REAL	norm_2 (cpumat< REAL > &x)
	Computes the 2/spectral matrix norm. More...

template<typename REAL >
REAL	cond_1 (cpumat< REAL > &x)
	Estimates the condition number under the 1-norm. More...

template<typename REAL >
REAL	cond_I (cpumat< REAL > &x)
	Estimates the condition number under the infinity norm. More...

template<typename REAL >
REAL	cond_2 (cpumat< REAL > &x)
	Estimates the condition number under the 2 norm. More...

template<typename REAL >
void	qr (const bool pivot, cpumat< REAL > &x, cpuvec< REAL > &qraux)
	Computes the QR decomposition. More...

template<typename REAL >
void	qr_Q (const cpumat< REAL > &QR, const cpuvec< REAL > &qraux, cpumat< REAL > &Q, cpuvec< REAL > &work)
	Recover the Q matrix from a QR decomposition. More...

template<typename REAL >
void	qr_R (const cpumat< REAL > &QR, cpumat< REAL > &R)
	Recover the R matrix from a QR decomposition. More...

template<typename REAL >
void	lq (cpumat< REAL > &x, cpuvec< REAL > &lqaux)
	Computes the LQ decomposition. More...

template<typename REAL >
void	lq_L (const cpumat< REAL > &LQ, cpumat< REAL > &L)
	Recover the L matrix from an LQ decomposition. More...

template<typename REAL >
void	lq_Q (const cpumat< REAL > &LQ, const cpuvec< REAL > &lqaux, cpumat< REAL > &Q, cpuvec< REAL > &work)
	Recover the Q matrix from an LQ decomposition. More...

template<typename REAL >
void	solve (cpumat< REAL > &x, cpuvec< REAL > &y)
	Solve a system of equations. More...

template<typename REAL >
void	solve (cpumat< REAL > &x, cpumat< REAL > &y)
	This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.

template<typename REAL >
void	svd (cpumat< REAL > &x, cpuvec< REAL > &s)
	Computes the singular value decomposition. More...

template<typename REAL >
void	svd (cpumat< REAL > &x, cpuvec< REAL > &s, cpumat< REAL > &u, cpumat< REAL > &vt)
	This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.

template<typename REAL >
void	qrsvd (cpumat< REAL > &x, cpuvec< REAL > &s, cpumat< REAL > &u, cpumat< REAL > &vt)
	Computes the singular value decomposition by first reducing the rectangular matrix to a square matrix using an orthogonal factorization. If the matrix is square, we skip the orthogonal factorization. More...

template<typename REAL >
void	qrsvd (cpumat< REAL > &x, cpuvec< REAL > &s)
	This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.

template<typename REAL >
void	cpsvd (const cpumat< REAL > &x, cpuvec< REAL > &s, cpumat< REAL > &u, cpumat< REAL > &vt)
	Computes the singular value decomposition using the "crossproducts SVD". This method is not numerically stable. More...

template<typename REAL >
void	cpsvd (const cpumat< REAL > &x, cpuvec< REAL > &s)
	This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.

template<typename REAL >
void	rsvd (const uint32_t seed, const int k, const int q, cpumat< REAL > &x, cpuvec< REAL > &s)
	Computes the truncated singular value decomposition using the normal projections method of Halko et al. This method is only an approximation. More...

template<typename REAL >
void	rsvd (const uint32_t seed, const int k, const int q, cpumat< REAL > &x, cpuvec< REAL > &s, cpumat< REAL > &u, cpumat< REAL > &vt)
	This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.

template<typename REAL >
REAL	trace (const cpumat< REAL > &x)
	Computes the trace, i.e. the sum of the diagonal. More...

template<typename REAL >
void	xpose (const cpumat< REAL > &x, cpumat< REAL > &tx)
	Computes the transpose out-of-place (i.e. in a copy). More...

template<typename REAL >
cpumat< REAL >	xpose (const cpumat< REAL > &x)
	This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.

template<typename REAL >
void	add (const bool transx, const bool transy, const REAL alpha, const REAL beta, const gpumat< REAL > &x, const gpumat< REAL > &y, gpumat< REAL > &ret)
	Returns alphaop(x) + betaop(y) where op(A) is A or A^T. More...

template<typename REAL >
gpumat< REAL >	add (const bool transx, const bool transy, const REAL alpha, const REAL beta, const gpumat< REAL > &x, const gpumat< REAL > &y)
	This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.

template<typename REAL >
void	chol (gpumat< REAL > &x)
	Compute the Choleski factorization. More...

template<typename REAL >
void	crossprod (const REAL alpha, const gpumat< REAL > &x, gpumat< REAL > &ret)
	Computes lower triangle of alphax^Tx. More...

template<typename REAL >
gpumat< REAL >	crossprod (const REAL alpha, const gpumat< REAL > &x)
	This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.

template<typename REAL >
void	tcrossprod (const REAL alpha, const gpumat< REAL > &x, gpumat< REAL > &ret)
	Computes lower triangle of alphaxx^T. More...

template<typename REAL >
gpumat< REAL >	tcrossprod (const REAL alpha, const gpumat< REAL > &x)
	This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.

template<typename REAL >
void	det (gpumat< REAL > &x, int &sign, REAL &modulus)
	Computes the determinant in logarithmic form. More...

template<typename REAL >
REAL	dot (const gpuvec< REAL > &x, const gpuvec< REAL > &y)
	Computes the dot product of two vectors, i.e. the sum of the product of the elements. More...

template<typename REAL >
REAL	dot (const gpuvec< REAL > &x)
	This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.

template<typename REAL >
void	eigen_sym (gpumat< REAL > &x, gpuvec< REAL > &values)
	Compute the eigenvalues and optionally the eigenvectors for a symmetric matrix. More...

template<typename REAL >
void	eigen_sym (gpumat< REAL > &x, gpuvec< REAL > &values, gpumat< REAL > &vectors)
	This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.

template<typename REAL >
void	invert (gpumat< REAL > &x)
	Compute the matrix inverse. More...

template<typename REAL >
void	trinv (const bool upper, const bool unit_diag, gpumat< REAL > &x)
	Compute the matrix inverse of a triangular matrix. More...

template<typename REAL >
void	lu (gpumat< REAL > &x, gpuvec< int > &p, int &info)
	Computes the PLU factorization with partial pivoting. More...

template<typename REAL >
void	lu (gpumat< REAL > &x)
	This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.

template<typename REAL >
void	matmult (const bool transx, const bool transy, const REAL alpha, const gpumat< REAL > &x, const gpumat< REAL > &y, gpumat< REAL > &ret)
	Computes ret = alphaop(x)op(y) where op(A) is A or A^T. More...

template<typename REAL >
gpumat< REAL >	matmult (const bool transx, const bool transy, const REAL alpha, const gpumat< REAL > &x, const gpumat< REAL > &y)
	This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.

template<typename REAL >
void	matmult (const bool transx, const bool transy, const REAL alpha, const gpumat< REAL > &x, const gpuvec< REAL > &y, gpuvec< REAL > &ret)
	This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.

template<typename REAL >
gpuvec< REAL >	matmult (const bool transx, const bool transy, const REAL alpha, const gpumat< REAL > &x, const gpuvec< REAL > &y)
	This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.

template<typename REAL >
void	matmult (const bool transx, const bool transy, const REAL alpha, const gpuvec< REAL > &x, const gpumat< REAL > &y, gpuvec< REAL > &ret)
	This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.

template<typename REAL >
gpuvec< REAL >	matmult (const bool transx, const bool transy, const REAL alpha, const gpuvec< REAL > &x, const gpumat< REAL > &y)
	This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.

template<typename REAL >
REAL	norm_1 (const gpumat< REAL > &x)
	Computes the 1 matrix norm, the maximum absolute column sum. More...

template<typename REAL >
REAL	norm_I (const gpumat< REAL > &x)
	Computes the infinity matrix norm, the maximum absolute row sum. More...

template<typename REAL >
REAL	norm_F (const gpumat< REAL > &x)
	Computes the Frobenius/Euclidean matrix norm. More...

template<typename REAL >
REAL	norm_M (const gpumat< REAL > &x)
	Computes the maximum modulus matrix norm. More...

template<typename REAL >
REAL	norm_2 (gpumat< REAL > &x)
	Computes the 2/spectral matrix norm. More...

template<typename REAL >
REAL	cond_1 (gpumat< REAL > &x)
	Estimates the condition number under the 1-norm. More...

template<typename REAL >
REAL	cond_I (gpumat< REAL > &x)
	Estimates the condition number under the infinity norm. More...

template<typename REAL >
REAL	cond_2 (gpumat< REAL > &x)
	Estimates the condition number under the 2 norm. More...

template<typename REAL >
void	qr (const bool pivot, gpumat< REAL > &x, gpuvec< REAL > &qraux)
	Computes the QR decomposition. More...

template<typename REAL >
void	qr_Q (const gpumat< REAL > &QR, const gpuvec< REAL > &qraux, gpumat< REAL > &Q, gpuvec< REAL > &work)
	Recover the Q matrix from a QR decomposition. More...

template<typename REAL >
void	qr_R (const gpumat< REAL > &QR, gpumat< REAL > &R)
	Recover the R matrix from a QR decomposition. More...

template<typename REAL >
void	lq (gpumat< REAL > &x, gpuvec< REAL > &lqaux)
	Computes the LQ decomposition. More...

template<typename REAL >
void	lq_L (const gpumat< REAL > &LQ, gpumat< REAL > &L)
	Recover the L matrix from an LQ decomposition. More...

template<typename REAL >
void	lq_Q (const gpumat< REAL > &LQ, const gpuvec< REAL > &lqaux, gpumat< REAL > &Q, gpuvec< REAL > &work)
	Recover the Q matrix from an LQ decomposition. More...

template<typename REAL >
void	solve (gpumat< REAL > &x, gpuvec< REAL > &y)
	Solve a system of equations. More...

template<typename REAL >
void	solve (gpumat< REAL > &x, gpumat< REAL > &y)
	This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.

template<typename REAL >
void	svd (gpumat< REAL > &x, gpuvec< REAL > &s)
	Computes the singular value decomposition. More...

template<typename REAL >
void	svd (gpumat< REAL > &x, gpuvec< REAL > &s, gpumat< REAL > &u, gpumat< REAL > &vt)
	This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.

template<typename REAL >
void	qrsvd (gpumat< REAL > &x, gpuvec< REAL > &s, gpumat< REAL > &u, gpumat< REAL > &vt)
	Computes the singular value decomposition by first reducing the rectangular matrix to a square matrix using an orthogonal factorization. If the matrix is square, we skip the orthogonal factorization. More...

template<typename REAL >
void	qrsvd (gpumat< REAL > &x, gpuvec< REAL > &s)
	This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.

template<typename REAL >
void	cpsvd (const gpumat< REAL > &x, gpuvec< REAL > &s, gpumat< REAL > &u, gpumat< REAL > &vt)
	Computes the singular value decomposition using the "crossproducts SVD". This method is not numerically stable. More...

template<typename REAL >
void	cpsvd (const gpumat< REAL > &x, gpuvec< REAL > &s)
	This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.

template<typename REAL >
void	rsvd (const uint32_t seed, const int k, const int q, gpumat< REAL > &x, gpuvec< REAL > &s)
	Computes the truncated singular value decomposition using the normal projections method of Halko et al. This method is only an approximation. More...

template<typename REAL >
void	rsvd (const uint32_t seed, const int k, const int q, gpumat< REAL > &x, gpuvec< REAL > &s, gpumat< REAL > &u, gpumat< REAL > &vt)
	This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.

template<typename REAL >
REAL	trace (const gpumat< REAL > &x)
	Computes the trace, i.e. the sum of the diagonal. More...

template<typename REAL >
void	xpose (const gpumat< REAL > &x, gpumat< REAL > &tx)
	Computes the transpose out-of-place (i.e. in a copy). More...

template<typename REAL >
gpumat< REAL >	xpose (const gpumat< REAL > &x)
	This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.

template<typename REAL >
void	add (const bool transx, const bool transy, const REAL alpha, const REAL beta, const mpimat< REAL > &x, const mpimat< REAL > &y, mpimat< REAL > &ret)
	Returns alphaop(x) + betaop(y) where op(A) is A or A^T. More...

template<typename REAL >
mpimat< REAL >	add (const bool transx, const bool transy, const REAL alpha, const REAL beta, const mpimat< REAL > &x, const mpimat< REAL > &y)
	This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.

template<typename REAL >
void	chol (mpimat< REAL > &x)
	Compute the Choleski factorization. More...

template<typename REAL >
void	crossprod (const REAL alpha, const mpimat< REAL > &x, mpimat< REAL > &ret)
	Computes lower triangle of alphax^Tx. More...

template<typename REAL >
mpimat< REAL >	crossprod (const REAL alpha, const mpimat< REAL > &x)
	This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.

template<typename REAL >
void	tcrossprod (const REAL alpha, const mpimat< REAL > &x, mpimat< REAL > &ret)
	Computes lower triangle of alphaxx^T. More...

template<typename REAL >
mpimat< REAL >	tcrossprod (const REAL alpha, const mpimat< REAL > &x)
	This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.

template<typename REAL >
void	det (mpimat< REAL > &x, int &sign, REAL &modulus)
	Computes the determinant in logarithmic form. More...

template<typename REAL >
void	eigen_sym (mpimat< REAL > &x, cpuvec< REAL > &values)
	Compute the eigenvalues and optionally the eigenvectors for a symmetric matrix. More...

template<typename REAL >
void	eigen_sym (mpimat< REAL > &x, cpuvec< REAL > &values, mpimat< REAL > &vectors)
	This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.

template<typename REAL >
void	invert (mpimat< REAL > &x)
	Compute the matrix inverse. More...

template<typename REAL >
void	trinv (const bool upper, const bool unit_diag, mpimat< REAL > &x)
	Compute the matrix inverse of a triangular matrix. More...

template<typename REAL >
void	lu (mpimat< REAL > &x, cpuvec< int > &p, int &info)
	Computes the PLU factorization with partial pivoting. More...

template<typename REAL >
void	lu (mpimat< REAL > &x)
	This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.

template<typename REAL >
mpimat< REAL >	matmult (const bool transx, const bool transy, const REAL alpha, const mpimat< REAL > &x, const mpimat< REAL > &y)
	Returns alphaop(x)op(y) where op(A) is A or A^T. More...

template<typename REAL >
void	matmult (const bool transx, const bool transy, const REAL alpha, const mpimat< REAL > &x, const mpimat< REAL > &y, mpimat< REAL > &ret)
	Computes ret = alphaop(x)op(y) where op(A) is A or A^T. More...

template<typename REAL >
REAL	norm_1 (const mpimat< REAL > &x)
	Computes the 1 matrix norm, the maximum absolute column sum. More...

template<typename REAL >
REAL	norm_I (const mpimat< REAL > &x)
	Computes the infinity matrix norm, the maximum absolute row sum. More...

template<typename REAL >
REAL	norm_F (const mpimat< REAL > &x)
	Computes the Frobenius/Euclidean matrix norm. More...

template<typename REAL >
REAL	norm_M (const mpimat< REAL > &x)
	Computes the maximum modulus matrix norm. More...

template<typename REAL >
REAL	norm_2 (mpimat< REAL > &x)
	Computes the 2/spectral matrix norm. More...

template<typename REAL >
REAL	cond_1 (mpimat< REAL > &x)
	Estimates the condition number under the 1-norm. More...

template<typename REAL >
REAL	cond_I (mpimat< REAL > &x)
	Estimates the condition number under the infinity norm. More...

template<typename REAL >
REAL	cond_2 (mpimat< REAL > &x)
	Estimates the condition number under the 2 norm. More...

template<typename REAL >
void	qr (const bool pivot, mpimat< REAL > &x, cpuvec< REAL > &qraux)
	Computes the QR decomposition. More...

template<typename REAL >
void	qr_Q (const mpimat< REAL > &QR, const cpuvec< REAL > &qraux, mpimat< REAL > &Q, cpuvec< REAL > &work)
	Recover the Q matrix from a QR decomposition. More...

template<typename REAL >
void	qr_R (const mpimat< REAL > &QR, mpimat< REAL > &R)
	Recover the R matrix from a QR decomposition. More...

template<typename REAL >
void	lq (mpimat< REAL > &x, cpuvec< REAL > &lqaux)
	Computes the LQ decomposition. More...

template<typename REAL >
void	lq_L (const mpimat< REAL > &LQ, mpimat< REAL > &L)
	Recover the L matrix from a LQ decomposition. More...

template<typename REAL >
void	lq_Q (const mpimat< REAL > &LQ, const cpuvec< REAL > &lqaux, mpimat< REAL > &Q, cpuvec< REAL > &work)
	Recover the Q matrix from a LQ decomposition. More...

template<typename REAL >
void	solve (mpimat< REAL > &x, mpimat< REAL > &y)
	Solve a system of equations. More...

template<typename REAL >
void	qrsvd (mpimat< REAL > &x, cpuvec< REAL > &s, mpimat< REAL > &u, mpimat< REAL > &vt)
	Computes the singular value decomposition by first reducing the rectangular matrix to a square matrix using an orthogonal factorization. If the matrix is square, we skip the orthogonal factorization. More...

template<typename REAL >
void	qrsvd (mpimat< REAL > &x, cpuvec< REAL > &s)
	This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.

template<typename REAL >
void	cpsvd (const mpimat< REAL > &x, cpuvec< REAL > &s, mpimat< REAL > &u, mpimat< REAL > &vt)
	Computes the singular value decomposition using the "crossproducts SVD". This method is not numerically stable. More...

template<typename REAL >
void	cpsvd (const mpimat< REAL > &x, cpuvec< REAL > &s)
	This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.

template<typename REAL >
void	rsvd (const uint32_t seed, const int k, const int q, mpimat< REAL > &x, cpuvec< REAL > &s)
	Computes the truncated singular value decomposition using the normal projections method of Halko et al. This method is only an approximation. More...

template<typename REAL >
void	rsvd (const uint32_t seed, const int k, const int q, mpimat< REAL > &x, cpuvec< REAL > &s, mpimat< REAL > &u, mpimat< REAL > &vt)
	This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.

template<typename REAL >
REAL	trace (const mpimat< REAL > &x)
	Computes the trace, i.e. the sum of the diagonal. More...

template<typename REAL >
void	xpose (const mpimat< REAL > &x, mpimat< REAL > &tx)
	Computes the transpose out-of-place (i.e. in a copy). More...

template<typename REAL >
mpimat< REAL >	xpose (const mpimat< REAL > &x)
	This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.

template<typename REAL >
void	matmult (const parmat_cpu< REAL > &x, const cpumat< REAL > &y, parmat_cpu< REAL > &ret)
	Computes the product of a distributed and a non-distributed matrix whose result is distributed, or the transpose of a distributed matrix with a distributed matrix whose result is non-distributed. More...

template<typename REAL >
parmat_cpu< REAL >	matmult (const parmat_cpu< REAL > &x, const cpumat< REAL > &y)
	This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.

template<typename REAL >
void	matmult (const parmat_cpu< REAL > &x, const parmat_cpu< REAL > &y, cpumat< REAL > &ret)
	This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.

template<typename REAL >
cpumat< REAL >	matmult (const parmat_cpu< REAL > &x, const parmat_cpu< REAL > &y)
	This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.

template<typename REAL >
void	crossprod (const REAL alpha, const parmat_cpu< REAL > &x, cpumat< REAL > &ret)
	Computes lower triangle of alphax^Tx. More...

template<typename REAL >
cpumat< REAL >	crossprod (const REAL alpha, const parmat_cpu< REAL > &x)
	This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.

template<typename REAL >
void	qr_R (const int root, parmat_cpu< REAL > &x, cpumat< REAL > &R)

template<typename REAL >
void	qr_Q (parmat_cpu< REAL > &x, cpuvec< REAL > &qraux, parmat_cpu< REAL > &Q)

template<typename REAL >
void	cpsvd (const parmat_cpu< REAL > &x, cpuvec< REAL > &s)
	Computes the singular value decomposition using the "crossproducts SVD". This method is not numerically stable. More...

template<typename REAL >
void	cpsvd (const parmat_cpu< REAL > &x, cpuvec< REAL > &s, cpumat< REAL > &u, cpumat< REAL > &vt)
	This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.

template<typename REAL >
void	tssvd (parmat_cpu< REAL > &x, cpuvec< REAL > &s)
	Computes the singular value decomposition by first reducing the rectangular matrix to a square matrix using an orthogonal factorization. If the matrix is square, we skip the orthogonal factorization. More...

template<typename REAL >
void	tssvd (parmat_cpu< REAL > &x, cpuvec< REAL > &s, parmat_cpu< REAL > &u, cpumat< REAL > &vt)
	This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.

template<typename REAL >
void	rsvd (const uint32_t seed, const int k, const int q, parmat_cpu< REAL > &x, cpuvec< REAL > &s)
	Computes the truncated singular value decomposition using the normal projections method of Halko et al. This method is only an approximation. More...

template<typename REAL >
void	rsvd (const uint32_t seed, const int k, const int q, parmat_cpu< REAL > &x, cpuvec< REAL > &s, parmat_cpu< REAL > &u, cpumat< REAL > &vt)
	This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.

template<typename REAL >
void	matmult (const parmat_gpu< REAL > &x, const gpumat< REAL > &y, parmat_gpu< REAL > &ret)
	Computes the product of a distributed and a non-distributed matrix whose result is distributed, or the transpose of a distributed matrix with a distributed matrix whose result is non-distributed. More...

template<typename REAL >
parmat_gpu< REAL >	matmult (const parmat_gpu< REAL > &x, const gpumat< REAL > &y)
	This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.

template<typename REAL >
void	matmult (const parmat_gpu< REAL > &x, const parmat_gpu< REAL > &y, gpumat< REAL > &ret)
	This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.

template<typename REAL >
gpumat< REAL >	matmult (const parmat_gpu< REAL > &x, const parmat_gpu< REAL > &y)
	This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.

template<typename REAL >
void	crossprod (const REAL alpha, const parmat_gpu< REAL > &x, gpumat< REAL > &ret)
	Computes lower triangle of alphax^Tx. More...

template<typename REAL >
gpumat< REAL >	crossprod (const REAL alpha, const parmat_gpu< REAL > &x)
	This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.

template<typename REAL >
void	qr_R (const int root, parmat_gpu< REAL > &x, gpumat< REAL > &R)

template<typename REAL >
void	qr_Q (parmat_gpu< REAL > &x, gpuvec< REAL > &qraux, parmat_gpu< REAL > &Q)

template<typename REAL >
void	cpsvd (const parmat_gpu< REAL > &x, gpuvec< REAL > &s)
	Computes the singular value decomposition using the "crossproducts SVD". This method is not numerically stable. More...

template<typename REAL >
void	cpsvd (const parmat_gpu< REAL > &x, gpuvec< REAL > &s, gpumat< REAL > &u, gpumat< REAL > &vt)
	This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.

template<typename REAL >
void	tssvd (parmat_gpu< REAL > &x, gpuvec< REAL > &s)
	Computes the singular value decomposition by first reducing the rectangular matrix to a square matrix using an orthogonal factorization. If the matrix is square, we skip the orthogonal factorization. More...

template<typename REAL >
void	tssvd (parmat_gpu< REAL > &x, gpuvec< REAL > &s, parmat_gpu< REAL > &u, gpumat< REAL > &vt)
	This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.

template<typename REAL >
void	rsvd (const uint32_t seed, const int k, const int q, parmat_gpu< REAL > &x, gpuvec< REAL > &s)
	Computes the truncated singular value decomposition using the normal projections method of Halko et al. This method is only an approximation. More...

template<typename REAL >
void	rsvd (const uint32_t seed, const int k, const int q, parmat_gpu< REAL > &x, gpuvec< REAL > &s, parmat_gpu< REAL > &u, gpumat< REAL > &vt)
	This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.

Detailed Description

Linear algebra functions.

Function Documentation

◆ add() [1/3]

template<typename REAL >

void fml::linalg::add	(	const bool	transx,
		const bool	transy,
		const REAL	alpha,
		const REAL	beta,
		const cpumat< REAL > &	x,
		const cpumat< REAL > &	y,
		cpumat< REAL > &	ret
	)

Returns alpha*op(x) + beta*op(y) where op(A) is A or A^T.

Parameters

[in]	transx	Should x^T be used?
[in]	transy	Should y^T be used?
[in]	alpha,beta	Scalars.
[in]	x,y	The inputs to the sum.
[out]	ret	The sum.

Exceptions
If x and y are inappropriately sized for the sum, the method will throw a 'runtime_error' exception.

Template Parameters

REAL	should be 'float' or 'double'.

◆ add() [2/3]

template<typename REAL >

void fml::linalg::add	(	const bool	transx,
		const bool	transy,
		const REAL	alpha,
		const REAL	beta,
		const gpumat< REAL > &	x,
		const gpumat< REAL > &	y,
		gpumat< REAL > &	ret
	)

Returns alpha*op(x) + beta*op(y) where op(A) is A or A^T.

Parameters

[in]	transx	Should x^T be used?
[in]	transy	Should y^T be used?
[in]	alpha,beta	Scalars.
[in]	x,y	The inputs to the sum.
[out]	ret	The sum.

Exceptions
If x and y are inappropriately sized for the sum, the method will throw a 'runtime_error' exception.

Implementation Details
Uses the cuBLAS function cublasXgeam().

Template Parameters

REAL	should be '__half', 'float', or 'double'.

◆ add() [3/3]

template<typename REAL >

void fml::linalg::add	(	const bool	transx,
		const bool	transy,
		const REAL	alpha,
		const REAL	beta,
		const mpimat< REAL > &	x,
		const mpimat< REAL > &	y,
		mpimat< REAL > &	ret
	)

Returns alpha*op(x) + beta*op(y) where op(A) is A or A^T.

Parameters

[in]	transx	Should x^T be used?
[in]	transy	Should y^T be used?
[in]	alpha,beta	Scalars.
[in]	x,y	The inputs to the sum.
[out]	ret	The sum.

Exceptions
If x and y are inappropriately sized for the sum, the method will throw a 'runtime_error' exception.

Communication Details
The method will communicate across all processes in the BLACS grid.

Implementation Details
Uses the PBLAS function pXgeadd().

Template Parameters

REAL	should be 'float' or 'double'.

◆ chol() [1/3]

template<typename REAL >

void fml::linalg::chol ( cpumat< REAL > & x )

Compute the Choleski factorization.

The matrix should be 1. square, 2. symmetric, 3. positive-definite. Failure of any of these conditions can lead to a runtime exception. The input is replaced by its lower-triangular Choleski factor.

Parameters

[in,out] x Input data matrix, replaced by its lower-triangular Choleski factor.

Implementation Details
Uses the LAPACK function Xpotrf().

Memory Allocations
Some temporary work storage is needed.

Exceptions
If a (re-)allocation is triggered and fails, a bad_alloc exception will be thrown.

Template Parameters

REAL	should be 'float' or 'double'.

◆ chol() [2/3]

template<typename REAL >

void fml::linalg::chol ( gpumat< REAL > & x )

Compute the Choleski factorization.

The matrix should be 1. square, 2. symmetric, 3. positive-definite. Failure of any of these conditions can lead to a runtime exception. The input is replaced by its lower-triangular Choleski factor.

Parameters

[in,out] x Input data matrix, replaced by its lower-triangular Choleski factor.

Uses the cuSOLVER function cusolverDnXpotrf().

Memory Allocations
Some temporary work storage is needed.

Exceptions
If a (re-)allocation is triggered and fails, a bad_alloc exception will be thrown.

Template Parameters

REAL	should be 'float' or 'double'.

◆ chol() [3/3]

template<typename REAL >

void fml::linalg::chol ( mpimat< REAL > & x )

Compute the Choleski factorization.

The matrix should be 1. square, 2. symmetric, 3. positive-definite. Failure of any of these conditions can lead to a runtime exception. The input is replaced by its lower-triangular Choleski factor.

Parameters

[in,out] x Input data matrix, replaced by its lower-triangular Choleski factor.

Implementation Details
Uses the ScaLAPACK function pXpotrf().

Memory Allocations
Some temporary work storage is needed.

Exceptions
If a (re-)allocation is triggered and fails, a bad_alloc exception will be thrown.

Template Parameters

REAL	should be 'float' or 'double'.

◆ cond_1() [1/3]

template<typename REAL >

REAL fml::linalg::cond_1 ( cpumat< REAL > & x )

Estimates the condition number under the 1-norm.

Parameters

[in]	x	Input data matrix.
[in,out]	x	Input data matrix. The data is overwritten.

Implementation Details
Computes L or R (whichever is smaller) and the LAPACK function Xtrcon() if the input is not square, and Xgecon() on the LU of the input otherwise.

Memory Allocations
Allocates temporary storage to compute the QR/LQ/LU, as well as workspace arrays for the LAPACK condition number function.

Exceptions
If an allocation is triggered and fails, a bad_alloc exception will be thrown.

Template Parameters

REAL	should be 'float' or 'double'.

◆ cond_1() [2/3]

template<typename REAL >

REAL fml::linalg::cond_1 ( gpumat< REAL > & x )

Estimates the condition number under the 1-norm.

Parameters

[in]	x	Input data matrix.
[in,out]	x	Input data matrix. The data is overwritten.

Implementation Details
Computes L or R (whichever is smaller) if the input is not square, and the inverse otherwise.

Memory Allocations
Allocates temporary storage to compute the QR/LQ/LU, as well as workspace arrays for the LAPACK condition number function.

Exceptions
If an allocation is triggered and fails, a bad_alloc exception will be thrown.

Template Parameters

REAL	should be 'float' or 'double'.

◆ cond_1() [3/3]

template<typename REAL >

REAL fml::linalg::cond_1 ( mpimat< REAL > & x )

Estimates the condition number under the 1-norm.

Parameters

[in]	x	Input data matrix.
[in,out]	x	Input data matrix. The data is overwritten.

Implementation Details
Computes L or R (whichever is smaller) and the LAPACK function Xtrcon() if the input is not square, and Xgecon() on the LU of the input otherwise.

Memory Allocations
Allocates temporary storage to compute the QR/LQ/LU, as well as workspace arrays for the LAPACK condition number function.

Exceptions
If an allocation is triggered and fails, a bad_alloc exception will be thrown.

Template Parameters

REAL	should be 'float' or 'double'.

◆ cond_2() [1/3]

template<typename REAL >

REAL fml::linalg::cond_2 ( cpumat< REAL > & x )

Estimates the condition number under the 2 norm.

Parameters

[in]	x	Input data matrix.
[in,out]	x	Input data matrix. The data is overwritten.

Implementation Details
Uses linalg::svd().

Memory Allocations
Allocates temporary storage to compute the QR/LQ/LU, as well as workspace arrays for the LAPACK condition number function.

Exceptions
If an allocation is triggered and fails, a bad_alloc exception will be thrown.

Template Parameters

REAL	should be 'float' or 'double'.

◆ cond_2() [2/3]

template<typename REAL >

REAL fml::linalg::cond_2 ( gpumat< REAL > & x )

Estimates the condition number under the 2 norm.

Parameters

[in]	x	Input data matrix.
[in,out]	x	Input data matrix. The data is overwritten.

Implementation Details
Uses linalg::cpsvd().

Memory Allocations
Allocates temporary storage to compute the QR/LQ/LU, as well as workspace arrays for the LAPACK condition number function.

Exceptions
If an allocation is triggered and fails, a bad_alloc exception will be thrown.

Template Parameters

REAL	should be 'float' or 'double'.

◆ cond_2() [3/3]

template<typename REAL >

REAL fml::linalg::cond_2 ( mpimat< REAL > & x )

Estimates the condition number under the 2 norm.

Parameters

[in]	x	Input data matrix.
[in,out]	x	Input data matrix. The data is overwritten.

Implementation Details
Uses linalg::svd().

Memory Allocations
Allocates temporary storage to compute the QR/LQ/LU, as well as workspace arrays for the LAPACK condition number function.

Exceptions
If an allocation is triggered and fails, a bad_alloc exception will be thrown.

Template Parameters

REAL	should be 'float' or 'double'.

◆ cond_I() [1/3]

template<typename REAL >

REAL fml::linalg::cond_I ( cpumat< REAL > & x )

Estimates the condition number under the infinity norm.

Parameters

[in]	x	Input data matrix.
[in,out]	x	Input data matrix. The data is overwritten.

Implementation Details
Computes L or R (whichever is smaller) and the LAPACK function Xtrcon() if the input is not square, and Xgecon() on the LU of the input otherwise.

Memory Allocations
Allocates temporary storage to compute the QR/LQ/LU, as well as workspace arrays for the LAPACK condition number function.

Exceptions
If an allocation is triggered and fails, a bad_alloc exception will be thrown.

Template Parameters

REAL	should be 'float' or 'double'.

◆ cond_I() [2/3]

template<typename REAL >

REAL fml::linalg::cond_I ( gpumat< REAL > & x )

Estimates the condition number under the infinity norm.

Parameters

[in]	x	Input data matrix.
[in,out]	x	Input data matrix. The data is overwritten.

Implementation Details
Computes L or R (whichever is smaller) if the input is not square, and the inverse otherwise.

Memory Allocations
Allocates temporary storage to compute the QR/LQ/LU, as well as workspace arrays for the LAPACK condition number function.

Exceptions
If an allocation is triggered and fails, a bad_alloc exception will be thrown.

Template Parameters

REAL	should be 'float' or 'double'.

◆ cond_I() [3/3]

template<typename REAL >

REAL fml::linalg::cond_I ( mpimat< REAL > & x )

Estimates the condition number under the infinity norm.

Parameters

[in]	x	Input data matrix.
[in,out]	x	Input data matrix. The data is overwritten.

Implementation Details
Computes L or R (whichever is smaller) and the LAPACK function Xtrcon() if the input is not square, and Xgecon() on the LU of the input otherwise.

Memory Allocations
Allocates temporary storage to compute the QR/LQ/LU, as well as workspace arrays for the LAPACK condition number function.

Exceptions
If an allocation is triggered and fails, a bad_alloc exception will be thrown.

Template Parameters

REAL	should be 'float' or 'double'.

◆ cpsvd() [1/5]

template<typename REAL >

void fml::linalg::cpsvd	(	const cpumat< REAL > &	x,
		cpuvec< REAL > &	s,
		cpumat< REAL > &	u,
		cpumat< REAL > &	vt
	)

Computes the singular value decomposition using the "crossproducts SVD". This method is not numerically stable.

The operation works by computing the crossproducts matrix X^T * X or X * X^T (whichever is smaller) and then computing the eigenvalue decomposition.

Parameters

[in,out]	x	Input data matrix.
[out]	s	Vector of singular values.
[out]	u	Matrix of left singular vectors.
[out]	vt	Matrix of (transposed) right singular vectors.

Implementation Details
Uses crossprod() or tcrossprod() (whichever is smaller), and eigen_sym().

Memory Allocations
If the any outputs are inappropriately sized, they will automatically be re-allocated. Additionally, some temporary work storage is needed.

Exceptions
If a (re-)allocation is triggered and fails, a bad_alloc exception will be thrown.

Template Parameters

REAL	should be 'float' or 'double'.

◆ cpsvd() [2/5]

template<typename REAL >

void fml::linalg::cpsvd	(	const gpumat< REAL > &	x,
		gpuvec< REAL > &	s,
		gpumat< REAL > &	u,
		gpumat< REAL > &	vt
	)

Computes the singular value decomposition using the "crossproducts SVD". This method is not numerically stable.

The operation works by computing the crossproducts matrix X^T * X or X * X^T (whichever is smaller) and then computing the eigenvalue decomposition.

Parameters

[in,out]	x	Input data matrix.
[out]	s	Vector of singular values.
[out]	u	Matrix of left singular vectors.
[out]	vt	Matrix of (transposed) right singular vectors.

Implementation Details
Uses crossprod() or tcrossprod() (whichever is smaller), and eigen_sym().

Memory Allocations
If the any outputs are inappropriately sized, they will automatically be re-allocated. Additionally, some temporary work storage is needed.

Exceptions
If a (re-)allocation is triggered and fails, a bad_alloc exception will be thrown.

Template Parameters

REAL	should be 'float' or 'double'.

◆ cpsvd() [3/5]

template<typename REAL >

void fml::linalg::cpsvd	(	const mpimat< REAL > &	x,
		cpuvec< REAL > &	s,
		mpimat< REAL > &	u,
		mpimat< REAL > &	vt
	)

Computes the singular value decomposition using the "crossproducts SVD". This method is not numerically stable.

The operation works by computing the crossproducts matrix X^T * X or X * X^T (whichever is smaller) and then computing the eigenvalue decomposition.

Parameters

[in,out]	x	Input data matrix.
[out]	s	Vector of singular values.
[out]	u	Matrix of left singular vectors.
[out]	vt	Matrix of (transposed) right singular vectors.

Implementation Details
Uses crossprod() or tcrossprod() (whichever is smaller), and eigen_sym().

Memory Allocations
If the any outputs are inappropriately sized, they will automatically be re-allocated. Additionally, some temporary work storage is needed.

Exceptions
If a (re-)allocation is triggered and fails, a bad_alloc exception will be thrown.

Template Parameters

REAL	should be 'float' or 'double'.

◆ cpsvd() [4/5]

template<typename REAL >

void fml::linalg::cpsvd	(	const parmat_cpu< REAL > &	x,
		cpuvec< REAL > &	s
	)

Computes the singular value decomposition using the "crossproducts SVD". This method is not numerically stable.

The operation works by computing the crossproducts matrix X^T * X and then computing the eigenvalue decomposition.

Parameters

[in,out]	x	Input data matrix.
[out]	s	Vector of singular values.
[out]	u	Matrix of left singular vectors.
[out]	vt	Matrix of (transposed) right singular vectors.

Implementation Details
Uses a crossproduct which requires communication, followed by a local linalg::eigen_sym() call.

Memory Allocations
If the any outputs are inappropriately sized, they will automatically be re-allocated. Additionally, some temporary work storage is needed.

Exceptions
If a (re-)allocation is triggered and fails, a bad_alloc exception will be thrown.

Template Parameters

REAL	should be 'float' or 'double'.

◆ cpsvd() [5/5]

template<typename REAL >

void fml::linalg::cpsvd	(	const parmat_gpu< REAL > &	x,
		gpuvec< REAL > &	s
	)

Computes the singular value decomposition using the "crossproducts SVD". This method is not numerically stable.

The operation works by computing the crossproducts matrix X^T * X and then computing the eigenvalue decomposition.

Parameters

[in,out]	x	Input data matrix.
[out]	s	Vector of singular values.
[out]	u	Matrix of left singular vectors.
[out]	vt	Matrix of (transposed) right singular vectors.

Implementation Details
Uses a crossproduct which requires communication, followed by a local linalg::eigen_sym() call.

Memory Allocations
If the any outputs are inappropriately sized, they will automatically be re-allocated. Additionally, some temporary work storage is needed.

Exceptions
If a (re-)allocation is triggered and fails, a bad_alloc exception will be thrown.

Template Parameters

REAL	should be 'float' or 'double'.

◆ crossprod() [1/5]

template<typename REAL >

void fml::linalg::crossprod	(	const REAL	alpha,
		const cpumat< REAL > &	x,
		cpumat< REAL > &	ret
	)

Computes lower triangle of alpha*x^T*x.

Parameters

[in]	alpha	Scalar.
[in]	x	Input data matrix.
[out]	ret	The product.

Implementation Details
Uses the BLAS function Xsyrk().

Memory Allocations
If the output dimension is inappropriately sized, it will automatically be re-allocated.

Exceptions
If a reallocation is triggered and fails, a bad_alloc exception will be thrown.

Template Parameters

REAL	should be 'float' or 'double'.

◆ crossprod() [2/5]

template<typename REAL >

void fml::linalg::crossprod	(	const REAL	alpha,
		const gpumat< REAL > &	x,
		gpumat< REAL > &	ret
	)

Computes lower triangle of alpha*x^T*x.

Parameters

[in]	alpha	Scalar.
[in]	x	Input data matrix.
[out]	ret	The product.

Implementation Details
Uses the cuBLAS function cublasXsyrk().

Memory Allocations
If the output dimension is inappropriately sized, it will automatically be re-allocated.

Exceptions
If a reallocation is triggered and fails, a bad_alloc exception will be thrown.

Template Parameters

REAL	should be '__half', 'float', or 'double'.

◆ crossprod() [3/5]

template<typename REAL >

void fml::linalg::crossprod	(	const REAL	alpha,
		const mpimat< REAL > &	x,
		mpimat< REAL > &	ret
	)

Computes lower triangle of alpha*x^T*x.

Parameters

[in]	alpha	Scalar.
[in]	x	Input data matrix.
[out]	ret	The product.

Implementation Details
Uses the BLAS function pXsyrk().

Memory Allocations
If the output dimension is inappropriately sized, it will automatically be re-allocated.

Exceptions
If a reallocation is triggered and fails, a bad_alloc exception will be thrown.

Communication Details
The method will communicate across all processes in the BLACS grid.

Template Parameters

REAL	should be 'float' or 'double'.

◆ crossprod() [4/5]

template<typename REAL >

void fml::linalg::crossprod	(	const REAL	alpha,
		const parmat_cpu< REAL > &	x,
		cpumat< REAL > &	ret
	)

Computes lower triangle of alpha*x^T*x.

Parameters

[in]	alpha	Scalar.
[in]	x	Input data matrix.
[out]	ret	The product.

Implementation Details
Uses the BLAS function Xsyrk().

Memory Allocations
If the output dimension is inappropriately sized, it will automatically be re-allocated.

Exceptions
If a reallocation is triggered and fails, a bad_alloc exception will be thrown.

Template Parameters

REAL	should be 'float' or 'double'.

◆ crossprod() [5/5]

template<typename REAL >

void fml::linalg::crossprod	(	const REAL	alpha,
		const parmat_gpu< REAL > &	x,
		gpumat< REAL > &	ret
	)

Computes lower triangle of alpha*x^T*x.

Parameters

[in]	alpha	Scalar.
[in]	x	Input data matrix.
[out]	ret	The product.

Implementation Details
Uses the BLAS function Xsyrk().

Memory Allocations
If the output dimension is inappropriately sized, it will automatically be re-allocated.

Exceptions
If a reallocation is triggered and fails, a bad_alloc exception will be thrown.

Template Parameters

REAL	should be 'float' or 'double'.

◆ det() [1/3]

template<typename REAL >

void fml::linalg::det	(	cpumat< REAL > &	x,
		int &	sign,
		REAL &	modulus
	)

Computes the determinant in logarithmic form.

The input is replaced by its LU factorization.

Parameters

[in,out]	x	Input data matrix, replaced by its LU factorization.
[out]	sign	The sign of the determinant.
[out]	modulus	Log of the modulus.

Implementation Details
Uses lu().

Memory Allocations
Allocates temporary storage to compute the LU.

Exceptions
If an allocation is triggered and fails, a bad_alloc exception will be thrown.

Template Parameters

REAL	should be 'float' or 'double'.

◆ det() [2/3]

template<typename REAL >

void fml::linalg::det	(	gpumat< REAL > &	x,
		int &	sign,
		REAL &	modulus
	)

Computes the determinant in logarithmic form.

The input is replaced by its LU factorization.

Parameters

[in,out]	x	Input data matrix, replaced by its LU factorization.
[out]	sign	The sign of the determinant.
[out]	modulus	Log of the modulus.

Implementation Details
Uses lu().

Memory Allocations
Allocates temporary storage to compute the LU.

Exceptions
If an allocation is triggered and fails, a bad_alloc exception will be thrown.

Template Parameters

REAL	should be 'float' or 'double'.

◆ det() [3/3]

template<typename REAL >

void fml::linalg::det	(	mpimat< REAL > &	x,
		int &	sign,
		REAL &	modulus
	)

Computes the determinant in logarithmic form.

The input is replaced by its LU factorization.

Parameters

[in,out]	x	Input data matrix, replaced by its LU factorization.
[out]	sign	The sign of the determinant.
[out]	modulus	Log of the modulus.

Implementation Details
Uses lu().

Memory Allocations
Allocates temporary storage to compute the LU.

Exceptions
If an allocation is triggered and fails, a bad_alloc exception will be thrown.

Template Parameters

REAL	should be 'float' or 'double'.

◆ dot() [1/2]

template<typename REAL >

REAL fml::linalg::dot	(	const cpuvec< REAL > &	x,
		const cpuvec< REAL > &	y
	)

Computes the dot product of two vectors, i.e. the sum of the product of the elements.

NOTE: if the vectors are of different length, the dot product will use only the indices of the smaller-sized vector.

Parameters

[in] x,y Vectors.

Returns: The dot product.

Template Parameters

REAL	should be 'float' or 'double' ('int' is also ok).

◆ dot() [2/2]

template<typename REAL >

REAL fml::linalg::dot	(	const gpuvec< REAL > &	x,
		const gpuvec< REAL > &	y
	)

Computes the dot product of two vectors, i.e. the sum of the product of the elements.

NOTE: if the vectors are of different length, the dot product will use only the indices of the smaller-sized vector.

Parameters

[in] x,y Vectors.

Returns: The dot product.

Template Parameters

REAL	should be 'float' or 'double'.

◆ eigen_sym() [1/3]

template<typename REAL >

void fml::linalg::eigen_sym	(	cpumat< REAL > &	x,
		cpuvec< REAL > &	values
	)

Compute the eigenvalues and optionally the eigenvectors for a symmetric matrix.

The input data is overwritten.

Parameters

[in,out]	x	Input data matrix. Should be square.
[out]	values	Eigenvalues.
[out]	vectors	Eigenvectors.

Implementation Details
Uses the LAPACK functions Xsyevr().

Memory Allocations
If any output's dimension is inappropriately sized, it will automatically be re-allocated.

Exceptions
If the matrix is non-square, a runtime_error exception is thrown. If an allocation fails, a bad_alloc exception will be thrown.

Template Parameters

REAL	should be 'float' or 'double'.

◆ eigen_sym() [2/3]

template<typename REAL >

void fml::linalg::eigen_sym	(	gpumat< REAL > &	x,
		gpuvec< REAL > &	values
	)

Compute the eigenvalues and optionally the eigenvectors for a symmetric matrix.

The input data is overwritten.

Parameters

[in,out]	x	Input data matrix. Should be square.
[out]	values	Eigenvalues.
[out]	vectors	Eigenvectors.

Implementation Details
Uses the cuSOLVER functions cusolverDnXsyevd().

Memory Allocations
If any output's dimension is inappropriately sized, it will automatically be re-allocated.

Exceptions
If the matrix is non-square, a runtime_error exception is thrown. If an allocation fails, a bad_alloc exception will be thrown.

Template Parameters

REAL	should be '__half', 'float', or 'double'.

◆ eigen_sym() [3/3]

template<typename REAL >

void fml::linalg::eigen_sym	(	mpimat< REAL > &	x,
		cpuvec< REAL > &	values
	)

Compute the eigenvalues and optionally the eigenvectors for a symmetric matrix.

The input data is overwritten.

Parameters

[in,out]	x	Input data matrix. Should be square.
[out]	values	Eigenvalues.
[out]	vectors	Eigenvectors.

Implementation Details
Uses the ScaLAPACK functions pXsyevr().

Memory Allocations
If any output's dimension is inappropriately sized, it will automatically be re-allocated.

Exceptions
If the matrix is non-square, a runtime_error exception is thrown. If an allocation fails, a bad_alloc exception will be thrown.

Communication Details
The method will communicate across all processes in the BLACS grid.

Template Parameters

REAL	should be 'float' or 'double'.

◆ invert() [1/3]

template<typename REAL >

void fml::linalg::invert ( cpumat< REAL > & x )

Compute the matrix inverse.

The input is replaced by its inverse, computed via a PLU.

Parameters

[in,out] x Input data matrix. Should be square.

Implementation Details
Uses the LAPACK functions Xgetrf() (LU) and Xgetri() (solve).

Memory Allocations
LU pivot data is allocated internally.

Exceptions
If the matrix is non-square, a runtime_error exception is thrown. If an allocation fails, a bad_alloc exception will be thrown.

Template Parameters

REAL	should be 'float' or 'double'.

◆ invert() [2/3]

template<typename REAL >

void fml::linalg::invert ( gpumat< REAL > & x )

Compute the matrix inverse.

The input is replaced by its inverse, computed via a PLU.

Parameters

[in,out] x Input data matrix. Should be square.

Implementation Details
Uses the cuSOLVER functions cusolverDnXgetrf() (LU) and cusolverDnXgetrs() (solve).

Memory Allocations
LU pivot data is allocated internally. The inverse is computed in a copy before copying back to the input.

Exceptions
If the matrix is non-square, a runtime_error exception is thrown. If an allocation fails, a bad_alloc exception will be thrown.

Template Parameters

REAL	should be '__half', 'float', or 'double'.

◆ invert() [3/3]

template<typename REAL >

void fml::linalg::invert ( mpimat< REAL > & x )

Compute the matrix inverse.

The input is replaced by its inverse, computed via a PLU.

Parameters

[in,out] x Input data matrix. Should be square.

Implementation Details
Uses the ScaLAPACK functions pXgetrf() (LU) and pXgetri() (solve).

Memory Allocations
LU pivot data is allocated internally.

Exceptions
If the matrix is non-square, a runtime_error exception is thrown. If an allocation fails, a bad_alloc exception will be thrown.

Communication Details
The method will communicate across all processes in the BLACS grid.

Template Parameters

REAL	should be 'float' or 'double'.

◆ lq() [1/3]

template<typename REAL >

void fml::linalg::lq	(	cpumat< REAL > &	x,
		cpuvec< REAL > &	lqaux
	)

Computes the LQ decomposition.

The factorization works mostly in-place by modifying the input data. After execution, the matrix will be the LAPACK-like compact LQ representation.

Parameters

[in,out]	x	Input data matrix. Values are overwritten.
[out]	lqaux	Auxiliary data for compact LQ.

Implementation Details
Uses the LAPACK function Xgelqf().

Memory Allocations
If the any outputs are inappropriately sized, they will automatically be re-allocated. Additionally, some temporary work storage is needed.

Exceptions
If a (re-)allocation is triggered and fails, a bad_alloc exception will be thrown.

Template Parameters

REAL	should be 'float' or 'double'.

◆ lq() [2/3]

template<typename REAL >

void fml::linalg::lq	(	gpumat< REAL > &	x,
		gpuvec< REAL > &	lqaux
	)

Computes the LQ decomposition.

The factorization works mostly in-place by modifying the input data. After execution, the matrix will be the LAPACK-like compact LQ representation.

Parameters

[in,out]	x	Input data matrix. Values are overwritten.
[out]	lqaux	Auxiliary data for compact LQ.

Implementation Details
NOTE: not directly supported by cuSOLVER (vendor gpulapack + cuda backend). In that case, the matrix is transposed and a QR is performed.

Memory Allocations
If the any outputs are inappropriately sized, they will automatically be re-allocated. Additionally, some temporary work storage is needed.

Exceptions
If a (re-)allocation is triggered and fails, a bad_alloc exception will be thrown.

Template Parameters

REAL	should be 'float' or 'double'.

◆ lq() [3/3]

template<typename REAL >

void fml::linalg::lq	(	mpimat< REAL > &	x,
		cpuvec< REAL > &	lqaux
	)

Computes the LQ decomposition.

The factorization works mostly in-place by modifying the input data. After execution, the matrix will be the LAPACK-like compact LQ representation.

Parameters

[in,out]	x	Input data matrix. Values are overwritten.
[out]	lqaux	Auxiliary data for compact LQ.

Implementation Details
Uses the ScaLAPACK function pXgelqf().

Memory Allocations
If the any outputs are inappropriately sized, they will automatically be re-allocated. Additionally, some temporary work storage is needed.

Exceptions
If a (re-)allocation is triggered and fails, a bad_alloc exception will be thrown.

Communication Details
The method will communicate across all processes in the BLACS grid.

Template Parameters

REAL	should be 'float' or 'double'.

◆ lq_L() [1/3]

template<typename REAL >

void fml::linalg::lq_L	(	const cpumat< REAL > &	LQ,
		cpumat< REAL > &	L
	)

Recover the L matrix from an LQ decomposition.

Parameters

[in]	LQ	The compact LQ factorization, as computed via `lq()`.
[out]	L	The L matrix.

Implementation Details
Uses the LAPACK function Xlacpy().

Memory Allocations
If the any outputs are inappropriately sized, they will automatically be re-allocated. Additionally, some temporary work storage is needed.

Exceptions
If a (re-)allocation is triggered and fails, a bad_alloc exception will be thrown.

Template Parameters

REAL	should be 'float' or 'double'.

◆ lq_L() [2/3]

template<typename REAL >

void fml::linalg::lq_L	(	const gpumat< REAL > &	LQ,
		gpumat< REAL > &	L
	)

Recover the L matrix from an LQ decomposition.

Parameters

[in]	LQ	The compact LQ factorization, as computed via `lq()`.
[out]	L	The L matrix.

Implementation Details
NOTE: not directly supported by cuSOLVER (vendor gpulapack + cuda backend). In that case, the matrix is transposed and a QR is performed.

Memory Allocations
If the any outputs are inappropriately sized, they will automatically be re-allocated. Additionally, some temporary work storage is needed.

Exceptions
If a (re-)allocation is triggered and fails, a bad_alloc exception will be thrown.

Template Parameters

REAL	should be 'float' or 'double'.

◆ lq_L() [3/3]

template<typename REAL >

void fml::linalg::lq_L	(	const mpimat< REAL > &	LQ,
		mpimat< REAL > &	L
	)

Recover the L matrix from a LQ decomposition.

Parameters

[in]	LQ	The compact LQ factorization, as computed via `lq()`.
[out]	L	The L matrix.

Implementation Details
Uses the ScaLAPACK function pXlacpy().

Memory Allocations
If the any outputs are inappropriately sized, they will automatically be re-allocated. Additionally, some temporary work storage is needed.

Exceptions
If a (re-)allocation is triggered and fails, a bad_alloc exception will be thrown.

Communication Details
The method will communicate across all processes in the BLACS grid.

Template Parameters

REAL	should be 'float' or 'double'.

◆ lq_Q() [1/3]

template<typename REAL >

void fml::linalg::lq_Q	(	const cpumat< REAL > &	LQ,
		const cpuvec< REAL > &	lqaux,
		cpumat< REAL > &	Q,
		cpuvec< REAL > &	work
	)

Recover the Q matrix from an LQ decomposition.

Parameters

[in]	LQ	The compact LQ factorization, as computed via `lq()`.
[in]	lqaux	Auxiliary data for compact LQ.
[out]	Q	The Q matrix.
[out]	work	Workspace array. Will be resized as necessary.

Implementation Details
Uses the LAPACK function Xorglq().

Memory Allocations
If the any outputs are inappropriately sized, they will automatically be re-allocated. Additionally, some temporary work storage is needed.

Exceptions
If a (re-)allocation is triggered and fails, a bad_alloc exception will be thrown.

Template Parameters

REAL	should be 'float' or 'double'.

◆ lq_Q() [2/3]

template<typename REAL >

void fml::linalg::lq_Q	(	const gpumat< REAL > &	LQ,
		const gpuvec< REAL > &	lqaux,
		gpumat< REAL > &	Q,
		gpuvec< REAL > &	work
	)

Recover the Q matrix from an LQ decomposition.

Parameters

[in]	LQ	The compact LQ factorization, as computed via `lq()`.
[in]	lqaux	Auxiliary data for compact LQ.
[out]	Q	The Q matrix.
[out]	work	Workspace array. Will be resized as necessary.

Implementation Details
NOTE: not directly supported by cuSOLVER (vendor gpulapack + cuda backend). In that case, the matrix is transposed and a QR is performed.

Memory Allocations
If the any outputs are inappropriately sized, they will automatically be re-allocated. Additionally, some temporary work storage is needed.

Exceptions
If a (re-)allocation is triggered and fails, a bad_alloc exception will be thrown.

Template Parameters

REAL	should be 'float' or 'double'.

◆ lq_Q() [3/3]

template<typename REAL >

void fml::linalg::lq_Q	(	const mpimat< REAL > &	LQ,
		const cpuvec< REAL > &	lqaux,
		mpimat< REAL > &	Q,
		cpuvec< REAL > &	work
	)

Recover the Q matrix from a LQ decomposition.

Parameters

[in]	LQ	The compact LQ factorization, as computed via `lq()`.
[in]	lqaux	Auxiliary data for compact LQ.
[out]	Q	The Q matrix.
[out]	work	Workspace array. Will be resized as necessary.

Implementation Details
Uses the ScaLAPACK function pXorglq().

Memory Allocations
If the any outputs are inappropriately sized, they will automatically be re-allocated. Additionally, some temporary work storage is needed.

Exceptions
If a (re-)allocation is triggered and fails, a bad_alloc exception will be thrown.

Communication Details
The method will communicate across all processes in the BLACS grid.

Template Parameters

REAL	should be 'float' or 'double'.

◆ lu() [1/3]

template<typename REAL >

void fml::linalg::lu	(	cpumat< REAL > &	x,
		cpuvec< int > &	p,
		int &	info
	)

Computes the PLU factorization with partial pivoting.

The input is replaced by its LU factorization, with L unit-diagonal.

Parameters

[in,out]	x	Input data matrix, replaced by its LU factorization.
[out]	p	Vector of pivots, representing the diagonal matrix P in the PLU.
[out]	info	The LAPACK return number.

Implementation Details
Uses the LAPACK function Xgetrf().

Memory Allocations
If the pivot vector is inappropriately sized, it will automatically be re-allocated.

Exceptions
If a reallocation is triggered and fails, a bad_alloc exception will be thrown.

Template Parameters

REAL	should be 'float' or 'double'.

◆ lu() [2/3]

template<typename REAL >

void fml::linalg::lu	(	gpumat< REAL > &	x,
		gpuvec< int > &	p,
		int &	info
	)

Computes the PLU factorization with partial pivoting.

The input is replaced by its LU factorization, with L unit-diagonal.

Parameters

[in,out]	x	Input data matrix, replaced by its LU factorization.
[out]	p	Vector of pivots, representing the diagonal matrix P in the PLU.
[out]	info	The LAPACK return number.

Implementation Details
Uses the cuSOLVER function cusolverDnXgetrf().

Memory Allocations
If the pivot vector is inappropriately sized, it will automatically be re-allocated.

Exceptions
If a reallocation is triggered and fails, a bad_alloc exception will be thrown.

Template Parameters

REAL	should be '__half', 'float', or 'double'.

◆ lu() [3/3]

template<typename REAL >

void fml::linalg::lu	(	mpimat< REAL > &	x,
		cpuvec< int > &	p,
		int &	info
	)

Computes the PLU factorization with partial pivoting.

The input is replaced by its LU factorization, with L unit-diagonal.

Parameters

[in,out]	x	Input data matrix, replaced by its LU factorization.
[out]	p	Vector of pivots, representing the diagonal matrix P in the PLU.
[out]	info	The ScaLAPACK return number.

Implementation Details
Uses the ScaLAPACK function pXgetrf().

Memory Allocations
If the pivot vector is inappropriately sized, it will automatically be re-allocated.

Exceptions
If a reallocation is triggered and fails, a bad_alloc exception will be thrown.

Communication Details
The method will communicate across all processes in the BLACS grid.

Template Parameters

REAL	should be 'float' or 'double'.

◆ matmult() [1/6]

template<typename REAL >

void fml::linalg::matmult	(	const bool	transx,
		const bool	transy,
		const REAL	alpha,
		const cpumat< REAL > &	x,
		const cpumat< REAL > &	y,
		cpumat< REAL > &	ret
	)

Computes ret = alpha*op(x)*op(y) where op(A) is A or A^T.

Parameters

[in]	transx	Should x^T be used?
[in]	transy	Should y^T be used?
[in]	alpha	Scalar.
[in]	x	Left multiplicand.
[in]	y	Right multiplicand.
[out]	ret	The product.

Exceptions
If x and y are inappropriately sized for a matrix product, the method will throw a 'runtime_error' exception.

Implementation Details
Uses the BLAS function Xgemm().

Template Parameters

REAL	should be 'float' or 'double'.

◆ matmult() [2/6]

template<typename REAL >

void fml::linalg::matmult	(	const bool	transx,
		const bool	transy,
		const REAL	alpha,
		const gpumat< REAL > &	x,
		const gpumat< REAL > &	y,
		gpumat< REAL > &	ret
	)

Computes ret = alpha*op(x)*op(y) where op(A) is A or A^T.

Parameters

[in]	transx	Should x^T be used?
[in]	transy	Should y^T be used?
[in]	alpha	Scalar.
[in]	x	Left multiplicand.
[in]	y	Right multiplicand.
[out]	ret	The product.

Exceptions
If x and y are inappropriately sized for a matrix product, the method will throw a 'runtime_error' exception.

Implementation Details
Uses the cuBLAS function cublasXgemm().

Template Parameters

REAL	should be '__half', 'float', or 'double'.

◆ matmult() [3/6]

template<typename REAL >

mpimat<REAL> fml::linalg::matmult	(	const bool	transx,
		const bool	transy,
		const REAL	alpha,
		const mpimat< REAL > &	x,
		const mpimat< REAL > &	y
	)

Returns alpha*op(x)*op(y) where op(A) is A or A^T.

Parameters

[in]	transx	Should x^T be used?
[in]	transy	Should y^T be used?
[in]	alpha	Scalar.
[in]	x	Left multiplicand.
[in]	y	Right multiplicand.

Exceptions
If x and y are inappropriately sized for a matrix product, the method will throw a 'runtime_error' exception. If the inputs are distributed on different grids, a runtime_exception is thrown.

Implementation Details
Uses the PBLAS function pXgemm().

Communication Details
The method will communicate across all processes in the BLACS grid.

Template Parameters

REAL	should be 'float' or 'double'.

◆ matmult() [4/6]

template<typename REAL >

void fml::linalg::matmult	(	const bool	transx,
		const bool	transy,
		const REAL	alpha,
		const mpimat< REAL > &	x,
		const mpimat< REAL > &	y,
		mpimat< REAL > &	ret
	)

Computes ret = alpha*op(x)*op(y) where op(A) is A or A^T.

Parameters

[in]	transx	Should x^T be used?
[in]	transy	Should y^T be used?
[in]	alpha	Scalar.
[in]	x	Left multiplicand.
[in]	y	Right multiplicand.
[out]	ret	The product.

Exceptions
If x and y are inappropriately sized for a matrix product, the method will throw a 'runtime_error' exception. If the inputs are distributed on different grids, a runtime_exception is thrown.

Implementation Details
Uses the PBLAS function pXgemm().

Communication Details
The method will communicate across all processes in the BLACS grid.

Template Parameters

REAL	should be 'float' or 'double'.

◆ matmult() [5/6]

template<typename REAL >

void fml::linalg::matmult	(	const parmat_cpu< REAL > &	x,
		const cpumat< REAL > &	y,
		parmat_cpu< REAL > &	ret
	)

Computes the product of a distributed and a non-distributed matrix whose result is distributed, or the transpose of a distributed matrix with a distributed matrix whose result is non-distributed.

Parameters

[in]	alpha	Scalar.
[in]	x	Left multiplicand.
[in]	y	Right multiplicand.
[out]	ret	The product.

Exceptions
If x and y are inappropriately sized for a matrix product, the method will throw a 'runtime_error' exception.

Implementation Details
Uses the BLAS function Xgemm().

Template Parameters

REAL	should be 'float' or 'double'.

◆ matmult() [6/6]

template<typename REAL >

void fml::linalg::matmult	(	const parmat_gpu< REAL > &	x,
		const gpumat< REAL > &	y,
		parmat_gpu< REAL > &	ret
	)

Computes the product of a distributed and a non-distributed matrix whose result is distributed, or the transpose of a distributed matrix with a distributed matrix whose result is non-distributed.

Parameters

[in]	alpha	Scalar.
[in]	x	Left multiplicand.
[in]	y	Right multiplicand.
[out]	ret	The product.

Exceptions
If x and y are inappropriately sized for a matrix product, the method will throw a 'runtime_error' exception.

Implementation Details
Uses the BLAS function Xgemm().

Template Parameters

REAL	should be 'float' or 'double'.

◆ norm_1() [1/3]

template<typename REAL >

REAL fml::linalg::norm_1 ( const cpumat< REAL > & x )

Computes the 1 matrix norm, the maximum absolute column sum.

Parameters

[in] x Input data matrix.

Returns: Returns the norm.

Template Parameters

REAL	should be 'float' or 'double'.

◆ norm_1() [2/3]

template<typename REAL >

REAL fml::linalg::norm_1 ( const gpumat< REAL > & x )

Computes the 1 matrix norm, the maximum absolute column sum.

Parameters

[in] x Input data matrix, replaced by its LU factorization.

Returns: Returns the norm.

Memory Allocations
Allocates temporary storage to store the col sums.

Exceptions
If an allocation is triggered and fails, a bad_alloc exception will be thrown.

Template Parameters

REAL	should be 'float' or 'double'.

◆ norm_1() [3/3]

template<typename REAL >

REAL fml::linalg::norm_1 ( const mpimat< REAL > & x )

Computes the 1 matrix norm, the maximum absolute column sum.

Parameters

[in] x Input data matrix, replaced by its LU factorization.

Returns: Returns the norm.

Memory Allocations
Allocates temporary storage to store the col sums.

Exceptions
If an allocation is triggered and fails, a bad_alloc exception will be thrown.

Template Parameters

REAL	should be 'float' or 'double'.

◆ norm_2() [1/3]

template<typename REAL >

REAL fml::linalg::norm_2 ( cpumat< REAL > & x )

Computes the 2/spectral matrix norm.

Returns the largest singular value.

Parameters

[in,out] x Input data matrix. Values are overwritten.

Returns: Returns the norm.

Implementation Details
Uses linalg::svd().

Memory Allocations
Allocates temporary storage to compute the singular values.

Exceptions
If an allocation is triggered and fails, a bad_alloc exception will be thrown.

Template Parameters

REAL	should be 'float' or 'double'.

◆ norm_2() [2/3]

template<typename REAL >

REAL fml::linalg::norm_2 ( gpumat< REAL > & x )

Computes the 2/spectral matrix norm.

Returns the largest singular value.

Parameters

[in,out] x Input data matrix. Values are overwritten.

Returns: Returns the norm.

Implementation Details
Uses linalg::cpsvd().

Memory Allocations
Allocates temporary storage to compute the singular values.

Exceptions
If an allocation is triggered and fails, a bad_alloc exception will be thrown.

Template Parameters

REAL	should be 'float' or 'double'.

◆ norm_2() [3/3]

template<typename REAL >

REAL fml::linalg::norm_2 ( mpimat< REAL > & x )

Computes the 2/spectral matrix norm.

Returns the largest singular value.

Parameters

[in,out] x Input data matrix. Values are overwritten.

Returns: Returns the norm.

Memory Allocations
Allocates temporary storage to compute the singular values.

Exceptions
If an allocation is triggered and fails, a bad_alloc exception will be thrown.

Template Parameters

REAL	should be 'float' or 'double'.

◆ norm_F() [1/3]

template<typename REAL >

REAL fml::linalg::norm_F ( const cpumat< REAL > & x )

Computes the Frobenius/Euclidean matrix norm.

Parameters

[in] x Input data matrix.

Returns: Returns the norm.

Template Parameters

REAL	should be 'float' or 'double'.

◆ norm_F() [2/3]

template<typename REAL >

REAL fml::linalg::norm_F ( const gpumat< REAL > & x )

Computes the Frobenius/Euclidean matrix norm.

Parameters

[in] x Input data matrix, replaced by its LU factorization.

Returns: Returns the norm.

Template Parameters

REAL	should be 'float' or 'double'.

◆ norm_F() [3/3]

template<typename REAL >

REAL fml::linalg::norm_F ( const mpimat< REAL > & x )

Computes the Frobenius/Euclidean matrix norm.

Parameters

[in] x Input data matrix, replaced by its LU factorization.

Returns: Returns the norm.

Template Parameters

REAL	should be 'float' or 'double'.

◆ norm_I() [1/3]

template<typename REAL >

REAL fml::linalg::norm_I ( const cpumat< REAL > & x )

Computes the infinity matrix norm, the maximum absolute row sum.

Parameters

[in] x Input data matrix.

Returns: Returns the norm.

Memory Allocations
Allocates temporary storage to store the row sums.

Exceptions
If an allocation is triggered and fails, a bad_alloc exception will be thrown.

Template Parameters

REAL	should be 'float' or 'double'.

◆ norm_I() [2/3]

template<typename REAL >

REAL fml::linalg::norm_I ( const gpumat< REAL > & x )

Computes the infinity matrix norm, the maximum absolute row sum.

Parameters

[in] x Input data matrix, replaced by its LU factorization.

Returns: Returns the norm.

Memory Allocations
Allocates temporary storage to store the row sums.

Exceptions
If an allocation is triggered and fails, a bad_alloc exception will be thrown.

Template Parameters

REAL	should be 'float' or 'double'.

◆ norm_I() [3/3]

template<typename REAL >

REAL fml::linalg::norm_I ( const mpimat< REAL > & x )

Computes the infinity matrix norm, the maximum absolute row sum.

Parameters

[in] x Input data matrix, replaced by its LU factorization.

Returns: Returns the norm.

Memory Allocations
Allocates temporary storage to store the row sums.

Exceptions
If an allocation is triggered and fails, a bad_alloc exception will be thrown.

Template Parameters

REAL	should be 'float' or 'double'.

◆ norm_M() [1/3]

template<typename REAL >

REAL fml::linalg::norm_M ( const cpumat< REAL > & x )

Computes the maximum modulus matrix norm.

Parameters

[in] x Input data matrix.

Returns: Returns the norm.

Template Parameters

REAL	should be 'float' or 'double'.

◆ norm_M() [2/3]

template<typename REAL >

REAL fml::linalg::norm_M ( const gpumat< REAL > & x )

Computes the maximum modulus matrix norm.

Parameters

[in] x Input data matrix, replaced by its LU factorization.

Returns: Returns the norm.

Template Parameters

REAL	should be 'float' or 'double'.

◆ norm_M() [3/3]

template<typename REAL >

REAL fml::linalg::norm_M ( const mpimat< REAL > & x )

Computes the maximum modulus matrix norm.

Parameters

[in] x Input data matrix, replaced by its LU factorization.

Returns: Returns the norm.

Template Parameters

REAL	should be 'float' or 'double'.

◆ qr() [1/3]

template<typename REAL >

void fml::linalg::qr	(	const bool	pivot,
		cpumat< REAL > &	x,
		cpuvec< REAL > &	qraux
	)

Computes the QR decomposition.

The factorization works mostly in-place by modifying the input data. After execution, the matrix will be the LAPACK-like compact QR representation.

Parameters

[in]	pivot	Should the factorization use column pivoting?
[in,out]	x	Input data matrix. Values are overwritten.
[out]	qraux	Auxiliary data for compact QR.

Implementation Details
Uses the LAPACK function Xgeqp3() if pivoting and Xgeqrf() otherwise.

Memory Allocations
If the any outputs are inappropriately sized, they will automatically be re-allocated. Additionally, some temporary work storage is needed.

Exceptions
If a (re-)allocation is triggered and fails, a bad_alloc exception will be thrown.

Template Parameters

REAL	should be 'float' or 'double'.

◆ qr() [2/3]

template<typename REAL >

void fml::linalg::qr	(	const bool	pivot,
		gpumat< REAL > &	x,
		gpuvec< REAL > &	qraux
	)

Computes the QR decomposition.

The factorization works mostly in-place by modifying the input data. After execution, the matrix will be the LAPACK-like compact QR representation.

Parameters

[in]	pivot	NOTE Pivoting does not yet work on GPU. Should the factorization use column pivoting?
[in,out]	x	Input data matrix. Values are overwritten.
[out]	qraux	Auxiliary data for compact QR.

Implementation Details
Uses the cuSOLVER function cusolverDnXgeqrf().

Memory Allocations
If the any outputs are inappropriately sized, they will automatically be re-allocated. Additionally, some temporary work storage is needed.

Exceptions
If a (re-)allocation is triggered and fails, a bad_alloc exception will be thrown.

Template Parameters

REAL	should be 'float' or 'double'.

◆ qr() [3/3]

template<typename REAL >

void fml::linalg::qr	(	const bool	pivot,
		mpimat< REAL > &	x,
		cpuvec< REAL > &	qraux
	)

Computes the QR decomposition.

The factorization works mostly in-place by modifying the input data. After execution, the matrix will be the LAPACK-like compact QR representation.

Parameters

[in]	pivot	Should the factorization use column pivoting?
[in,out]	x	Input data matrix. Values are overwritten.
[out]	qraux	Auxiliary data for compact QR.

Implementation Details
Uses the ScaLAPACK function pXgeqpf() if pivoting and pXgeqrf() otherwise.

Memory Allocations
If the any outputs are inappropriately sized, they will automatically be re-allocated. Additionally, some temporary work storage is needed.

Exceptions
If a (re-)allocation is triggered and fails, a bad_alloc exception will be thrown.

Communication Details
The method will communicate across all processes in the BLACS grid.

Template Parameters

REAL	should be 'float' or 'double'.

◆ qr_Q() [1/3]

template<typename REAL >

void fml::linalg::qr_Q	(	const cpumat< REAL > &	QR,
		const cpuvec< REAL > &	qraux,
		cpumat< REAL > &	Q,
		cpuvec< REAL > &	work
	)

Recover the Q matrix from a QR decomposition.

Parameters

[in]	QR	The compact QR factorization, as computed via `qr()`.
[in]	qraux	Auxiliary data for compact QR.
[out]	Q	The Q matrix.
[out]	work	Workspace array. Will be resized as necessary.

Implementation Details
Uses the LAPACK function Xorgqr().

Memory Allocations
If the any outputs are inappropriately sized, they will automatically be re-allocated. Additionally, some temporary work storage is needed.

Exceptions
If a (re-)allocation is triggered and fails, a bad_alloc exception will be thrown.

Template Parameters

REAL	should be 'float' or 'double'.

◆ qr_Q() [2/3]

template<typename REAL >

void fml::linalg::qr_Q	(	const gpumat< REAL > &	QR,
		const gpuvec< REAL > &	qraux,
		gpumat< REAL > &	Q,
		gpuvec< REAL > &	work
	)

Recover the Q matrix from a QR decomposition.

Parameters

[in]	QR	The compact QR factorization, as computed via `qr()`.
[in]	qraux	Auxiliary data for compact QR.
[out]	Q	The Q matrix.
[out]	work	Workspace array. Will be resized as necessary.

Implementation Details
Uses the cuSOLVER function cusolverDnXorgqr().

Memory Allocations
If the any outputs are inappropriately sized, they will automatically be re-allocated. Additionally, some temporary work storage is needed.

Exceptions
If a (re-)allocation is triggered and fails, a bad_alloc exception will be thrown.

Template Parameters

REAL	should be 'float' or 'double'.

◆ qr_Q() [3/3]

template<typename REAL >

void fml::linalg::qr_Q	(	const mpimat< REAL > &	QR,
		const cpuvec< REAL > &	qraux,
		mpimat< REAL > &	Q,
		cpuvec< REAL > &	work
	)

Recover the Q matrix from a QR decomposition.

Parameters

[in]	QR	The compact QR factorization, as computed via `qr()`.
[in]	qraux	Auxiliary data for compact QR.
[out]	Q	The Q matrix.
[out]	work	Workspace array. Will be resized as necessary.

Implementation Details
Uses the ScaLAPACK function pXormgr().

Memory Allocations
If the any outputs are inappropriately sized, they will automatically be re-allocated. Additionally, some temporary work storage is needed.

Exceptions
If a (re-)allocation is triggered and fails, a bad_alloc exception will be thrown.

Communication Details
The method will communicate across all processes in the BLACS grid.

Template Parameters

REAL	should be 'float' or 'double'.

◆ qr_R() [1/3]

template<typename REAL >

void fml::linalg::qr_R	(	const cpumat< REAL > &	QR,
		cpumat< REAL > &	R
	)

Recover the R matrix from a QR decomposition.

Parameters

[in]	QR	The compact QR factorization, as computed via `qr()`.
[out]	R	The R matrix.

Implementation Details
Uses the LAPACK function Xlacpy().

Memory Allocations
If the any outputs are inappropriately sized, they will automatically be re-allocated. Additionally, some temporary work storage is needed.

Exceptions
If a (re-)allocation is triggered and fails, a bad_alloc exception will be thrown.

Template Parameters

REAL	should be 'float' or 'double'.

◆ qr_R() [2/3]

template<typename REAL >

void fml::linalg::qr_R	(	const gpumat< REAL > &	QR,
		gpumat< REAL > &	R
	)

Recover the R matrix from a QR decomposition.

Parameters

[in]	QR	The compact QR factorization, as computed via `qr()`.
[out]	R	The R matrix.

Implementation Details
Uses a custom LAPACK-like lacpy() clone.

Memory Allocations
If the any outputs are inappropriately sized, they will automatically be re-allocated. Additionally, some temporary work storage is needed.

Exceptions
If a (re-)allocation is triggered and fails, a bad_alloc exception will be thrown.

Template Parameters

REAL	should be 'float' or 'double'.

◆ qr_R() [3/3]

template<typename REAL >

void fml::linalg::qr_R	(	const mpimat< REAL > &	QR,
		mpimat< REAL > &	R
	)

Recover the R matrix from a QR decomposition.

Parameters

[in]	QR	The compact QR factorization, as computed via `qr()`.
[out]	R	The R matrix.

Implementation Details
Uses the ScaLAPACK function pXlacpy().

Memory Allocations
If the any outputs are inappropriately sized, they will automatically be re-allocated. Additionally, some temporary work storage is needed.

Exceptions
If a (re-)allocation is triggered and fails, a bad_alloc exception will be thrown.

Communication Details
The method will communicate across all processes in the BLACS grid.

Template Parameters

REAL	should be 'float' or 'double'.

◆ qrsvd() [1/3]

template<typename REAL >

void fml::linalg::qrsvd	(	cpumat< REAL > &	x,
		cpuvec< REAL > &	s,
		cpumat< REAL > &	u,
		cpumat< REAL > &	vt
	)

Computes the singular value decomposition by first reducing the rectangular matrix to a square matrix using an orthogonal factorization. If the matrix is square, we skip the orthogonal factorization.

If the matrix has more rows than columns, the operation works by computing a QR and then taking the SVD of the R matrix. The left singular vectors are Q times the left singular vectors from R's SVD, and the singular value and the right singular vectors are those from R's SVD. Likewise, if the matrix has more columns than rows, w take the LQ and then the SVD of the L matrix. The left singular vectors are Q times the right singular vectors from L's SVD, and the singular value and the left singular vectors are those from L's SVD.

Parameters

[in,out]	x	Input data matrix. Values are overwritten.
[out]	s	Vector of singular values.
[out]	u	Matrix of left singular vectors.
[out]	vt	Matrix of (transposed) right singular vectors.

Implementation Details
Uses linalg::qr() or linalg::lq() (whichever is cheaper) to reduce the matrix to a square matrix, and then calls linalg::svd(). If computing the vectors, linalg::qr_Q() and linalg::qr_R() or linalg::lq_L() and linalg::lq_Q() are called.

Memory Allocations
If the any outputs are inappropriately sized, they will automatically be re-allocated. Additionally, some temporary work storage is needed.

Exceptions
If a (re-)allocation is triggered and fails, a bad_alloc exception will be thrown.

Template Parameters

REAL	should be 'float' or 'double'.

◆ qrsvd() [2/3]

template<typename REAL >

void fml::linalg::qrsvd	(	gpumat< REAL > &	x,
		gpuvec< REAL > &	s,
		gpumat< REAL > &	u,
		gpumat< REAL > &	vt
	)

Computes the singular value decomposition by first reducing the rectangular matrix to a square matrix using an orthogonal factorization. If the matrix is square, we skip the orthogonal factorization.

If the matrix has more rows than columns, the operation works by computing a QR and then taking the SVD of the R matrix. The left singular vectors are Q times the left singular vectors from R's SVD, and the singular value and the right singular vectors are those from R's SVD. Likewise, if the matrix has more columns than rows, w take the LQ and then the SVD of the L matrix. The left singular vectors are Q times the right singular vectors from L's SVD, and the singular value and the left singular vectors are those from L's SVD.

Parameters

[in,out]	x	Input data matrix. Values are overwritten.
[out]	s	Vector of singular values.
[out]	u	Matrix of left singular vectors.
[out]	vt	Matrix of (transposed) right singular vectors.

Implementation Details
Uses linalg::qr() to reduce the matrix to a square matrix, and then calls linalg::svd(). If computing the vectors, linalg::qr_Q() and linalg::qr_R(). Since cuSOLVER only offers QR, if m<n we operate on a transpose.

Memory Allocations
If the any outputs are inappropriately sized, they will automatically be re-allocated. Additionally, some temporary work storage is needed.

Exceptions
If a (re-)allocation is triggered and fails, a bad_alloc exception will be thrown.

Template Parameters

REAL	should be 'float' or 'double'.

◆ qrsvd() [3/3]

template<typename REAL >

void fml::linalg::qrsvd	(	mpimat< REAL > &	x,
		cpuvec< REAL > &	s,
		mpimat< REAL > &	u,
		mpimat< REAL > &	vt
	)

Computes the singular value decomposition by first reducing the rectangular matrix to a square matrix using an orthogonal factorization. If the matrix is square, we skip the orthogonal factorization.

If the matrix has more rows than columns, the operation works by computing a QR and then taking the SVD of the R matrix. The left singular vectors are Q times the left singular vectors from R's SVD, and the singular value and the right singular vectors are those from R's SVD. Likewise, if the matrix has more columns than rows, w take the LQ and then the SVD of the L matrix. The left singular vectors are Q times the right singular vectors from L's SVD, and the singular value and the left singular vectors are those from L's SVD.

Parameters

[in,out]	x	Input data matrix. Values are overwritten.
[out]	s	Vector of singular values.
[out]	u	Matrix of left singular vectors.
[out]	vt	Matrix of (transposed) right singular vectors.

Implementation Details
Uses linalg::qr() or linalg::lq() (whichever is cheaper) to reduce the matrix to a square matrix, and then calls linalg::svd(). If computing the vectors, linalg::qr_Q() and linalg::qr_R() or linalg::lq_L() and linalg::lq_Q() are called.

Memory Allocations
If the any outputs are inappropriately sized, they will automatically be re-allocated. Additionally, some temporary work storage is needed.

Exceptions
If a (re-)allocation is triggered and fails, a bad_alloc exception will be thrown.

Template Parameters

REAL	should be 'float' or 'double'.

◆ rsvd() [1/5]

template<typename REAL >

void fml::linalg::rsvd	(	const uint32_t	seed,
		const int	k,
		const int	q,
		cpumat< REAL > &	x,
		cpuvec< REAL > &	s
	)

Computes the truncated singular value decomposition using the normal projections method of Halko et al. This method is only an approximation.

Parameters

[in,out]	x	Input data matrix.
[out]	s	Vector of singular values.
[out]	u	Matrix of left singular vectors.
[out]	vt	Matrix of (transposed) right singular vectors.

Implementation Details
Uses a series of QR's and matrix multiplications.

Memory Allocations
If the any outputs are inappropriately sized, they will automatically be re-allocated. Additionally, some temporary work storage is needed.

Exceptions
If a (re-)allocation is triggered and fails, a bad_alloc exception will be thrown.

Template Parameters

REAL	should be 'float' or 'double'.

◆ rsvd() [2/5]

template<typename REAL >

void fml::linalg::rsvd	(	const uint32_t	seed,
		const int	k,
		const int	q,
		gpumat< REAL > &	x,
		gpuvec< REAL > &	s
	)

Computes the truncated singular value decomposition using the normal projections method of Halko et al. This method is only an approximation.

Parameters

[in,out]	x	Input data matrix.
[out]	s	Vector of singular values.
[out]	u	Matrix of left singular vectors.
[out]	vt	Matrix of (transposed) right singular vectors.

Implementation Details
Uses a series of QR's and matrix multiplications.

Memory Allocations
If the any outputs are inappropriately sized, they will automatically be re-allocated. Additionally, some temporary work storage is needed.

Exceptions
If a (re-)allocation is triggered and fails, a bad_alloc exception will be thrown.

Template Parameters

REAL	should be 'float' or 'double'.

◆ rsvd() [3/5]

template<typename REAL >

void fml::linalg::rsvd	(	const uint32_t	seed,
		const int	k,
		const int	q,
		mpimat< REAL > &	x,
		cpuvec< REAL > &	s
	)

Computes the truncated singular value decomposition using the normal projections method of Halko et al. This method is only an approximation.

Parameters

[in,out]	x	Input data matrix.
[out]	s	Vector of singular values.
[out]	u	Matrix of left singular vectors.
[out]	vt	Matrix of (transposed) right singular vectors.

Implementation Details
Uses a series of QR's and matrix multiplications.

Memory Allocations
If the any outputs are inappropriately sized, they will automatically be re-allocated. Additionally, some temporary work storage is needed.

Exceptions
If a (re-)allocation is triggered and fails, a bad_alloc exception will be thrown.

Template Parameters

REAL	should be 'float' or 'double'.

◆ rsvd() [4/5]

template<typename REAL >

void fml::linalg::rsvd	(	const uint32_t	seed,
		const int	k,
		const int	q,
		parmat_cpu< REAL > &	x,
		cpuvec< REAL > &	s
	)

Computes the truncated singular value decomposition using the normal projections method of Halko et al. This method is only an approximation.

Parameters

[in,out]	x	Input data matrix.
[out]	s	Vector of singular values.
[out]	u	Matrix of left singular vectors.
[out]	vt	Matrix of (transposed) right singular vectors.

Implementation Details
Uses a series of QR's using the TSQR implementation, and matrix multiplications.

Memory Allocations
If the any outputs are inappropriately sized, they will automatically be re-allocated. Additionally, some temporary work storage is needed.

Exceptions
If a (re-)allocation is triggered and fails, a bad_alloc exception will be thrown.

Template Parameters

REAL	should be 'float' or 'double'.

◆ rsvd() [5/5]

template<typename REAL >

void fml::linalg::rsvd	(	const uint32_t	seed,
		const int	k,
		const int	q,
		parmat_gpu< REAL > &	x,
		gpuvec< REAL > &	s
	)

Computes the truncated singular value decomposition using the normal projections method of Halko et al. This method is only an approximation.

Parameters

[in,out]	x	Input data matrix.
[out]	s	Vector of singular values.
[out]	u	Matrix of left singular vectors.
[out]	vt	Matrix of (transposed) right singular vectors.

Implementation Details
Uses a series of QR's using the TSQR implementation, and matrix multiplications.

Memory Allocations
If the any outputs are inappropriately sized, they will automatically be re-allocated. Additionally, some temporary work storage is needed.

Exceptions
If a (re-)allocation is triggered and fails, a bad_alloc exception will be thrown.

Template Parameters

REAL	should be 'float' or 'double'.

◆ solve() [1/3]

template<typename REAL >

void fml::linalg::solve	(	cpumat< REAL > &	x,
		cpuvec< REAL > &	y
	)

Solve a system of equations.

The input is replaced by its LU factorization.

Parameters

[in,out]	x	Input LHS. Should be square. Overwritten by LU.
[in,out]	y	Input RHS. Overwritten by solution.

Implementation Details
Uses the LAPACK functions Xgesv().

Memory Allocations
LU pivot data is allocated internally.

Exceptions
If the matrix is non-square or if the RHS is incompatible with the LHS, a runtime_error exception is thrown. If an allocation fails, a bad_alloc exception will be thrown.

Template Parameters

REAL	should be 'float' or 'double'.

◆ solve() [2/3]

template<typename REAL >

void fml::linalg::solve	(	gpumat< REAL > &	x,
		gpuvec< REAL > &	y
	)

Solve a system of equations.

The input is replaced by its PLU factorization.

Parameters

[in,out]	x	Input LHS. Should be square. Overwritten by LU.
[in,out]	y	Input RHS. Overwritten by solution.

Implementation Details
Uses the cuSOLVER functions cusolverDnXgetrf() (LU) and cusolverDnXgetrs() (solve).

Memory Allocations
LU pivot data is allocated internally.

Exceptions
If the matrix is non-square or if the RHS is incompatible with the LHS, a runtime_error exception is thrown. If an allocation fails, a bad_alloc exception will be thrown.

Template Parameters

REAL	should be '__half', 'float', or 'double'.

◆ solve() [3/3]

template<typename REAL >

void fml::linalg::solve	(	mpimat< REAL > &	x,
		mpimat< REAL > &	y
	)

Solve a system of equations.

The input is replaced by its LU factorization.

Parameters

[in,out]	x	Input LHS. Should be square. Overwritten by LU.
[in,out]	y	Input RHS. Overwritten by solution.

Implementation Details
Uses the ScaLAPACK functions pXgesv().

Memory Allocations
LU pivot data is allocated internally.

Exceptions
If the matrix is non-square or if the RHS is incompatible with the LHS, a runtime_error exception is thrown. If the inputs are distributed on different grids, a runtime_exception is thrown. If an allocation fails, a bad_alloc exception will be thrown.

Communication Details
The method will communicate across all processes in the BLACS grid.

Template Parameters

REAL	should be 'float' or 'double'.

◆ svd() [1/2]

template<typename REAL >

void fml::linalg::svd	(	cpumat< REAL > &	x,
		cpuvec< REAL > &	s
	)

Computes the singular value decomposition.

Parameters

[in,out]	x	Input data matrix. Values are overwritten.
[out]	s	Vector of singular values.
[out]	u	Matrix of left singular vectors.
[out]	vt	Matrix of (transposed) right singular vectors.

Implementation Details
Uses the LAPACK function Xgesvd().

Memory Allocations
If the any outputs are inappropriately sized, they will automatically be re-allocated. Additionally, some temporary work storage is needed.

Exceptions
If a (re-)allocation is triggered and fails, a bad_alloc exception will be thrown.

Template Parameters

REAL	should be 'float' or 'double'.

◆ svd() [2/2]

template<typename REAL >

void fml::linalg::svd	(	gpumat< REAL > &	x,
		gpuvec< REAL > &	s
	)

Computes the singular value decomposition.

Parameters

[in,out]	x	Input data matrix. Values are overwritten.
[out]	s	Vector of singular values.
[out]	u	Matrix of left singular vectors.
[out]	vt	Matrix of (transposed) right singnular vectors.

Implementation Details
Uses the cuSOLVER function cusolverDnXgesvd(). Since cuSOLVER only supports the m>=n case, if m<n we operate on a transpose.

Memory Allocations
If the any outputs are inappropriately sized, they will automatically be re-allocated. Additionally, some temporary work storage is needed.

Exceptions
If a (re-)allocation is triggered and fails, a bad_alloc exception will be thrown.

Template Parameters

REAL	should be '__half', 'float', or 'double'.

◆ tcrossprod() [1/3]

template<typename REAL >

void fml::linalg::tcrossprod	(	const REAL	alpha,
		const cpumat< REAL > &	x,
		cpumat< REAL > &	ret
	)

Computes lower triangle of alpha*x*x^T.

Parameters

[in]	alpha	Scalar.
[in]	x	Input data matrix.
[out]	ret	The product.

Implementation Details
Uses the BLAS function Xsyrk().

Memory Allocations
If the output dimension is inappropriately sized, it will automatically be re-allocated.

Exceptions
If a reallocation is triggered and fails, a bad_alloc exception will be thrown.

Template Parameters

REAL	should be 'float' or 'double'.

◆ tcrossprod() [2/3]

template<typename REAL >

void fml::linalg::tcrossprod	(	const REAL	alpha,
		const gpumat< REAL > &	x,
		gpumat< REAL > &	ret
	)

Computes lower triangle of alpha*x*x^T.

Parameters

[in]	alpha	Scalar.
[in]	x	Input data matrix.
[out]	ret	The product.

Implementation Details
Uses the cuBLAS function cublasXsyrk().

Memory Allocations
If the output dimension is inappropriately sized, it will automatically be re-allocated.

Exceptions
If a reallocation is triggered and fails, a bad_alloc exception will be thrown.

Template Parameters

REAL	should be '__half', 'float', or 'double'.

◆ tcrossprod() [3/3]

template<typename REAL >

void fml::linalg::tcrossprod	(	const REAL	alpha,
		const mpimat< REAL > &	x,
		mpimat< REAL > &	ret
	)

Computes lower triangle of alpha*x*x^T.

Parameters

[in]	alpha	Scalar.
[in]	x	Input data matrix.
[out]	ret	The product.

Implementation Details
Uses the PBLAS function pXsyrk().

Memory Allocations
If the output dimension is inappropriately sized, it will automatically be re-allocated.

Exceptions
If a reallocation is triggered and fails, a bad_alloc exception will be thrown.

Communication Details
The method will communicate across all processes in the BLACS grid.

Template Parameters

REAL	should be 'float' or 'double'.

◆ trace() [1/3]

template<typename REAL >

REAL fml::linalg::trace ( const cpumat< REAL > & x )

Computes the trace, i.e. the sum of the diagonal.

Parameters

[in] x Input data matrix.

Template Parameters

REAL	should be 'float' or 'double'.

◆ trace() [2/3]

template<typename REAL >

REAL fml::linalg::trace ( const gpumat< REAL > & x )

Computes the trace, i.e. the sum of the diagonal.

Parameters

[in] x Input data matrix.

Template Parameters

REAL	should be '__half', 'float', or 'double'.

◆ trace() [3/3]

template<typename REAL >

REAL fml::linalg::trace ( const mpimat< REAL > & x )

Computes the trace, i.e. the sum of the diagonal.

Parameters

[in] x Input data matrix.

Communication Details
The method will communicate across all processes in the BLACS grid.

Template Parameters

REAL	should be 'float' or 'double'.

◆ trinv() [1/3]

template<typename REAL >

void fml::linalg::trinv	(	const bool	upper,
		const bool	unit_diag,
		cpumat< REAL > &	x
	)

Compute the matrix inverse of a triangular matrix.

The input is replaced by its inverse.

Parameters

[in]	upper	Should the upper triangle be used? Otherwise the lower triangle will be used.
[in]	unit_diag	Is the input matrix unit diagonal?
[in,out]	x	Input data matrix. Should be square.

Implementation Details
Uses the LAPACK functions Xtrtri().

Exceptions
If the matrix is non-square, a runtime_error exception is thrown.

Template Parameters

REAL	should be 'float' or 'double'.

◆ trinv() [2/3]

template<typename REAL >

void fml::linalg::trinv	(	const bool	upper,
		const bool	unit_diag,
		gpumat< REAL > &	x
	)

Compute the matrix inverse of a triangular matrix.

The input is replaced by its inverse.

Parameters

[in]	upper	Should the upper triangle be used? Otherwise the lower triangle will be used.
[in]	unit_diag	Is the input matrix unit diagonal?
[in,out]	x	Input data matrix. Should be square.

Implementation Details
Uses the cuBLAS functions cublasXtrsm().

Memory Allocations
The inverse is computed in a copy.

Exceptions
If the matrix is non-square, a runtime_error exception is thrown. If an allocation fails, a bad_alloc exception will be thrown.

Template Parameters

REAL	should be 'float' or 'double'.

◆ trinv() [3/3]

template<typename REAL >

void fml::linalg::trinv	(	const bool	upper,
		const bool	unit_diag,
		mpimat< REAL > &	x
	)

Compute the matrix inverse of a triangular matrix.

The input is replaced by its inverse.

Parameters

[in]	upper	Should the upper triangle be used? Otherwise the lower triangle will be used.
[in]	unit_diag	Is the input matrix unit diagonal?
[in,out]	x	Input data matrix. Should be square.

Implementation Details
Uses the LAPACK functions Xtrtri().

Exceptions
If the matrix is non-square, a runtime_error exception is thrown.

Template Parameters

REAL	should be 'float' or 'double'.

◆ tssvd() [1/2]

template<typename REAL >

void fml::linalg::tssvd	(	parmat_cpu< REAL > &	x,
		cpuvec< REAL > &	s
	)

Computes the singular value decomposition by first reducing the rectangular matrix to a square matrix using an orthogonal factorization. If the matrix is square, we skip the orthogonal factorization.

If the matrix has more rows than columns, the operation works by computing a QR and then taking the SVD of the R matrix. The left singular vectors are Q times the left singular vectors from R's SVD, and the singular value and the right singular vectors are those from R's SVD. Likewise, if the matrix has more columns than rows, w take the LQ and then the SVD of the L matrix. The left singular vectors are Q times the right singular vectors from L's SVD, and the singular value and the left singular vectors are those from L's SVD.

Parameters

[in,out]	x	Input data matrix. Values are overwritten.
[out]	s	Vector of singular values.
[out]	u	Matrix of left singular vectors.
[out]	vt	Matrix of (transposed) right singular vectors.

Implementation Details
Uses the TSQR implementation to reduce the matrix to a square matrix, and then calls linalg::svd(). If computing the vectors, linalg::trinv() and a series of matrix multiplications are used.

Memory Allocations
If the any outputs are inappropriately sized, they will automatically be re-allocated. Additionally, some temporary work storage is needed.

Exceptions
If a (re-)allocation is triggered and fails, a bad_alloc exception will be thrown.

Template Parameters

REAL	should be 'float' or 'double'.

◆ tssvd() [2/2]

template<typename REAL >

void fml::linalg::tssvd	(	parmat_gpu< REAL > &	x,
		gpuvec< REAL > &	s
	)

Computes the singular value decomposition by first reducing the rectangular matrix to a square matrix using an orthogonal factorization. If the matrix is square, we skip the orthogonal factorization.

If the matrix has more rows than columns, the operation works by computing a QR and then taking the SVD of the R matrix. The left singular vectors are Q times the left singular vectors from R's SVD, and the singular value and the right singular vectors are those from R's SVD. Likewise, if the matrix has more columns than rows, w take the LQ and then the SVD of the L matrix. The left singular vectors are Q times the right singular vectors from L's SVD, and the singular value and the left singular vectors are those from L's SVD.

Parameters

[in,out]	x	Input data matrix. Values are overwritten.
[out]	s	Vector of singular values.
[out]	u	Matrix of left singular vectors.
[out]	vt	Matrix of (transposed) right singular vectors.

Implementation Details
Uses the TSQR implementation to reduce the matrix to a square matrix, and then calls linalg::svd(). If computing the vectors, linalg::trinv() and a series of matrix multiplications are used.

Memory Allocations
If the any outputs are inappropriately sized, they will automatically be re-allocated. Additionally, some temporary work storage is needed.

Exceptions
If a (re-)allocation is triggered and fails, a bad_alloc exception will be thrown.

Template Parameters

REAL	should be 'float' or 'double'.

◆ xpose() [1/3]

template<typename REAL >

void fml::linalg::xpose	(	const cpumat< REAL > &	x,
		cpumat< REAL > &	tx
	)

Computes the transpose out-of-place (i.e. in a copy).

Parameters

[in]	x	Input data matrix.
[out]	tx	The transpose.

Memory Allocations
If the output dimension is inappropriately sized, it will automatically be re-allocated.

Exceptions
If a reallocation is triggered and fails, a bad_alloc exception will be thrown.

Template Parameters

REAL	should be 'float' or 'double'.

◆ xpose() [2/3]

template<typename REAL >

void fml::linalg::xpose	(	const gpumat< REAL > &	x,
		gpumat< REAL > &	tx
	)

Computes the transpose out-of-place (i.e. in a copy).

Parameters

[in]	x	Input data matrix.
[out]	tx	The transpose.

Implementation Details
Uses the cuBLAS function cublasXgeam().

Memory Allocations
If the output dimension is inappropriately sized, it will automatically be re-allocated.

Exceptions
If a reallocation is triggered and fails, a bad_alloc exception will be thrown.

Template Parameters

REAL	should be '__half', 'float', or 'double'.

◆ xpose() [3/3]

template<typename REAL >

void fml::linalg::xpose	(	const mpimat< REAL > &	x,
		mpimat< REAL > &	tx
	)

Computes the transpose out-of-place (i.e. in a copy).

Parameters

[in]	x	Input data matrix.
[out]	tx	The transpose.

Implementation Details
Uses the PBLAS function pXtran().

Memory Allocations
If the output dimension is inappropriately sized, it will automatically be re-allocated.

Exceptions
If a reallocation is triggered and fails, a bad_alloc exception will be thrown.

Communication Details
The method will communicate across all processes in the BLACS grid.

Template Parameters

REAL	should be 'float' or 'double'.

Functions

Detailed Description

Function Documentation

◆ add() [1/3]

◆ add() [2/3]

◆ add() [3/3]

◆ chol() [1/3]

◆ chol() [2/3]

◆ chol() [3/3]

◆ cond_1() [1/3]

◆ cond_1() [2/3]

◆ cond_1() [3/3]

◆ cond_2() [1/3]

◆ cond_2() [2/3]

◆ cond_2() [3/3]

◆ cond_I() [1/3]

◆ cond_I() [2/3]

◆ cond_I() [3/3]

◆ cpsvd() [1/5]

◆ cpsvd() [2/5]

◆ cpsvd() [3/5]

◆ cpsvd() [4/5]

◆ cpsvd() [5/5]

◆ crossprod() [1/5]

◆ crossprod() [2/5]

◆ crossprod() [3/5]

◆ crossprod() [4/5]

◆ crossprod() [5/5]

◆ det() [1/3]

◆ det() [2/3]

◆ det() [3/3]

◆ dot() [1/2]

◆ dot() [2/2]

◆ eigen_sym() [1/3]

◆ eigen_sym() [2/3]

◆ eigen_sym() [3/3]

◆ invert() [1/3]

◆ invert() [2/3]

◆ invert() [3/3]

◆ lq() [1/3]

◆ lq() [2/3]

◆ lq() [3/3]

◆ lq_L() [1/3]

◆ lq_L() [2/3]

◆ lq_L() [3/3]

◆ lq_Q() [1/3]

◆ lq_Q() [2/3]

◆ lq_Q() [3/3]

◆ lu() [1/3]

◆ lu() [2/3]

◆ lu() [3/3]

◆ matmult() [1/6]

◆ matmult() [2/6]

◆ matmult() [3/6]

◆ matmult() [4/6]

◆ matmult() [5/6]

◆ matmult() [6/6]

◆ norm_1() [1/3]

◆ norm_1() [2/3]

◆ norm_1() [3/3]

◆ norm_2() [1/3]

◆ norm_2() [2/3]

◆ norm_2() [3/3]

◆ norm_F() [1/3]

◆ norm_F() [2/3]

◆ norm_F() [3/3]

◆ norm_I() [1/3]

◆ norm_I() [2/3]

◆ norm_I() [3/3]

◆ norm_M() [1/3]

◆ norm_M() [2/3]

◆ norm_M() [3/3]

◆ qr() [1/3]

◆ qr() [2/3]

◆ qr() [3/3]

◆ qr_Q() [1/3]

◆ qr_Q() [2/3]

◆ qr_Q() [3/3]

◆ qr_R() [1/3]

◆ qr_R() [2/3]