Sparse Level 3 Functions#
This module holds all sparse level 3 routines.
The sparse level 3 routines describe operations between a matrix in sparse format and multiple vectors in dense format that can also be seen as a dense matrix.
hipsparseXbsrmm()#
-
hipsparseStatus_t hipsparseSbsrmm(hipsparseHandle_t handle, hipsparseDirection_t dirA, hipsparseOperation_t transA, hipsparseOperation_t transB, int mb, int n, int kb, int nnzb, const float *alpha, const hipsparseMatDescr_t descrA, const float *bsrValA, const int *bsrRowPtrA, const int *bsrColIndA, int blockDim, const float *B, int ldb, const float *beta, float *C, int ldc)#
-
hipsparseStatus_t hipsparseDbsrmm(hipsparseHandle_t handle, hipsparseDirection_t dirA, hipsparseOperation_t transA, hipsparseOperation_t transB, int mb, int n, int kb, int nnzb, const double *alpha, const hipsparseMatDescr_t descrA, const double *bsrValA, const int *bsrRowPtrA, const int *bsrColIndA, int blockDim, const double *B, int ldb, const double *beta, double *C, int ldc)#
-
hipsparseStatus_t hipsparseCbsrmm(hipsparseHandle_t handle, hipsparseDirection_t dirA, hipsparseOperation_t transA, hipsparseOperation_t transB, int mb, int n, int kb, int nnzb, const hipComplex *alpha, const hipsparseMatDescr_t descrA, const hipComplex *bsrValA, const int *bsrRowPtrA, const int *bsrColIndA, int blockDim, const hipComplex *B, int ldb, const hipComplex *beta, hipComplex *C, int ldc)#
-
hipsparseStatus_t hipsparseZbsrmm(hipsparseHandle_t handle, hipsparseDirection_t dirA, hipsparseOperation_t transA, hipsparseOperation_t transB, int mb, int n, int kb, int nnzb, const hipDoubleComplex *alpha, const hipsparseMatDescr_t descrA, const hipDoubleComplex *bsrValA, const int *bsrRowPtrA, const int *bsrColIndA, int blockDim, const hipDoubleComplex *B, int ldb, const hipDoubleComplex *beta, hipDoubleComplex *C, int ldc)#
Sparse matrix dense matrix multiplication using BSR storage format.
hipsparseXbsrmm
multiplies the scalar \(\alpha\) with a sparse \(mb \times kb\) matrix \(A\), defined in BSR storage format, and the dense \(k \times n\) matrix \(B\) (where \(k = block\_dim \times kb\)) and adds the result to the dense \(m \times n\) matrix \(C\) (where \(m = block\_dim \times mb\)) that is multiplied by the scalar \(\beta\), such that\[ C := \alpha \cdot op(A) \cdot op(B) + \beta \cdot C, \]with\[\begin{split} op(A) = \left\{ \begin{array}{ll} A, & \text{if transA == HIPSPARSE_OPERATION_NON_TRANSPOSE} \\ \end{array} \right. \end{split}\]and\[\begin{split} op(B) = \left\{ \begin{array}{ll} B, & \text{if transB == HIPSPARSE_OPERATION_NON_TRANSPOSE} \\ B^T, & \text{if transB == HIPSPARSE_OPERATION_TRANSPOSE} \\ \end{array} \right. \end{split}\]- Example
// hipSPARSE handle hipsparseHandle_t handle; hipsparseCreate(&handle); // 1 2 0 3 0 0 // A = 0 4 5 0 0 0 // 0 0 0 7 8 0 // 0 0 1 2 4 1 int blockDim = 2; int mb = 2; int kb = 3; int nnzb = 4; hipsparseDirection_t dir = HIPSPARSE_DIRECTION_ROW; int hbsrRowPtr[2 + 1] = {0, 2, 4}; int hbsrColInd[4] = {0, 1, 1, 2}; float hbsrVal[4 * 2 * 2] = {1, 2, 0, 4, 0, 3, 5, 0, 0, 7, 1, 2, 8, 0, 4, 1}; // Set dimension n of B int n = 3; int m = mb * blockDim; int k = kb * blockDim; // Allocate and generate dense matrix B (k x n) float hB[6 * 3] = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f, 18.0f}; int* dbsrRowPtr = NULL; int* dbsrColInd = NULL; float* dbsrVal = NULL; hipMalloc((void**)&dbsrRowPtr, sizeof(int) * (mb + 1)); hipMalloc((void**)&dbsrColInd, sizeof(int) * nnzb); hipMalloc((void**)&dbsrVal, sizeof(float) * nnzb * blockDim * blockDim); hipMemcpy(dbsrRowPtr, hbsrRowPtr, sizeof(int) * (mb + 1), hipMemcpyHostToDevice); hipMemcpy(dbsrColInd, hbsrColInd, sizeof(int) * nnzb, hipMemcpyHostToDevice); hipMemcpy(dbsrVal, hbsrVal, sizeof(float) * nnzb * blockDim * blockDim, hipMemcpyHostToDevice); // Copy B to the device float* dB; hipMalloc((void**)&dB, sizeof(float) * k * n); hipMemcpy(dB, hB, sizeof(float) * k * n, hipMemcpyHostToDevice); // alpha and beta float alpha = 1.0f; float beta = 0.0f; // Allocate memory for the resulting matrix C float* dC; hipMalloc((void**)&dC, sizeof(float) * m * n); // Matrix descriptor hipsparseMatDescr_t descr; hipsparseCreateMatDescr(&descr); // Perform the matrix multiplication hipsparseSbsrmm(handle, dir, HIPSPARSE_OPERATION_NON_TRANSPOSE, HIPSPARSE_OPERATION_NON_TRANSPOSE, mb, n, kb, nnzb, &alpha, descr, dbsrVal, dbsrRowPtr, dbsrColInd, blockDim, dB, k, &beta, dC, m); // Copy results to host float hC[6 * 3]; hipMemcpy(hC, dC, sizeof(float) * m * n, hipMemcpyDeviceToHost); hipFree(dbsrRowPtr); hipFree(dbsrColInd); hipFree(dbsrVal); hipFree(dB); hipFree(dC);
Note
This function is non blocking and executed asynchronously with respect to the host. It may return before the actual computation has finished.
Note
Currently, only
transA
== HIPSPARSE_OPERATION_NON_TRANSPOSE is supported.
hipsparseXcsrmm()#
-
hipsparseStatus_t hipsparseScsrmm(hipsparseHandle_t handle, hipsparseOperation_t transA, int m, int n, int k, int nnz, const float *alpha, const hipsparseMatDescr_t descrA, const float *csrSortedValA, const int *csrSortedRowPtrA, const int *csrSortedColIndA, const float *B, int ldb, const float *beta, float *C, int ldc)#
-
hipsparseStatus_t hipsparseDcsrmm(hipsparseHandle_t handle, hipsparseOperation_t transA, int m, int n, int k, int nnz, const double *alpha, const hipsparseMatDescr_t descrA, const double *csrSortedValA, const int *csrSortedRowPtrA, const int *csrSortedColIndA, const double *B, int ldb, const double *beta, double *C, int ldc)#
-
hipsparseStatus_t hipsparseCcsrmm(hipsparseHandle_t handle, hipsparseOperation_t transA, int m, int n, int k, int nnz, const hipComplex *alpha, const hipsparseMatDescr_t descrA, const hipComplex *csrSortedValA, const int *csrSortedRowPtrA, const int *csrSortedColIndA, const hipComplex *B, int ldb, const hipComplex *beta, hipComplex *C, int ldc)#
-
hipsparseStatus_t hipsparseZcsrmm(hipsparseHandle_t handle, hipsparseOperation_t transA, int m, int n, int k, int nnz, const hipDoubleComplex *alpha, const hipsparseMatDescr_t descrA, const hipDoubleComplex *csrSortedValA, const int *csrSortedRowPtrA, const int *csrSortedColIndA, const hipDoubleComplex *B, int ldb, const hipDoubleComplex *beta, hipDoubleComplex *C, int ldc)#
Sparse matrix dense matrix multiplication using CSR storage format.
hipsparseXcsrmm
multiplies the scalar \(\alpha\) with a sparse \(m \times k\) matrix \(A\), defined in CSR storage format, and the dense \(k \times n\) matrix \(B\) and adds the result to the dense \(m \times n\) matrix \(C\) that is multiplied by the scalar \(\beta\), such that\[ C := \alpha \cdot op(A) \cdot B + \beta \cdot C, \]with\[\begin{split} op(A) = \left\{ \begin{array}{ll} A, & \text{if transA == HIPSPARSE_OPERATION_NON_TRANSPOSE} \\ A^T, & \text{if transA == HIPSPARSE_OPERATION_TRANSPOSE} \\ A^H, & \text{if transA == HIPSPARSE_OPERATION_CONJUGATE_TRANSPOSE} \end{array} \right. \end{split}\]for(i = 0; i < ldc; ++i) { for(j = 0; j < n; ++j) { C[i][j] = beta * C[i][j]; for(k = csrRowPtr[i]; k < csrRowPtr[i + 1]; ++k) { C[i][j] += alpha * csrVal[k] * B[csrColInd[k]][j]; } } }
- Example
// hipSPARSE handle hipsparseHandle_t handle; hipsparseCreate(&handle); // 1 2 0 3 0 0 // A = 0 4 5 0 0 0 // 0 0 0 7 8 0 // 0 0 1 2 4 1 int m = 4; int k = 6; int nnz = 11; hipsparseDirection_t dir = HIPSPARSE_DIRECTION_ROW; int hcsrRowPtr[4 + 1] = {0, 3, 5, 7, 11}; int hcsrColInd[11] = {0, 1, 3, 1, 2, 3, 4, 2, 3, 4, 5}; float hcsrVal[11] = {1, 2, 3, 4, 5, 7, 8, 1, 2, 4, 1}; // Set dimension n of B int n = 3; // Allocate and generate dense matrix B (k x n) float hB[6 * 3] = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f, 18.0f}; int* dcsrRowPtr = NULL; int* dcsrColInd = NULL; float* dcsrVal = NULL; hipMalloc((void**)&dcsrRowPtr, sizeof(int) * (m + 1)); hipMalloc((void**)&dcsrColInd, sizeof(int) * nnz); hipMalloc((void**)&dcsrVal, sizeof(float) * nnz); hipMemcpy(dcsrRowPtr, hcsrRowPtr, sizeof(int) * (m + 1), hipMemcpyHostToDevice); hipMemcpy(dcsrColInd, hcsrColInd, sizeof(int) * nnz, hipMemcpyHostToDevice); hipMemcpy(dcsrVal, hcsrVal, sizeof(float) * nnz, hipMemcpyHostToDevice); // Copy B to the device float* dB; hipMalloc((void**)&dB, sizeof(float) * k * n); hipMemcpy(dB, hB, sizeof(float) * k * n, hipMemcpyHostToDevice); // alpha and beta float alpha = 1.0f; float beta = 0.0f; // Allocate memory for the resulting matrix C float* dC; hipMalloc((void**)&dC, sizeof(float) * m * n); // Matrix descriptor hipsparseMatDescr_t descr; hipsparseCreateMatDescr(&descr); // Perform the matrix multiplication hipsparseScsrmm(handle, HIPSPARSE_OPERATION_NON_TRANSPOSE, m, n, k, nnz, &alpha, descr, dcsrVal, dcsrRowPtr, dcsrColInd, dB, k, &beta, dC, m); // Copy results to host float hC[6 * 3]; hipMemcpy(hC, dC, sizeof(float) * m * n, hipMemcpyDeviceToHost); hipFree(dcsrRowPtr); hipFree(dcsrColInd); hipFree(dcsrVal); hipFree(dB); hipFree(dC);
Note
This function is non blocking and executed asynchronously with respect to the host. It may return before the actual computation has finished.
hipsparseXcsrmm2()#
-
hipsparseStatus_t hipsparseScsrmm2(hipsparseHandle_t handle, hipsparseOperation_t transA, hipsparseOperation_t transB, int m, int n, int k, int nnz, const float *alpha, const hipsparseMatDescr_t descrA, const float *csrSortedValA, const int *csrSortedRowPtrA, const int *csrSortedColIndA, const float *B, int ldb, const float *beta, float *C, int ldc)#
-
hipsparseStatus_t hipsparseDcsrmm2(hipsparseHandle_t handle, hipsparseOperation_t transA, hipsparseOperation_t transB, int m, int n, int k, int nnz, const double *alpha, const hipsparseMatDescr_t descrA, const double *csrSortedValA, const int *csrSortedRowPtrA, const int *csrSortedColIndA, const double *B, int ldb, const double *beta, double *C, int ldc)#
-
hipsparseStatus_t hipsparseCcsrmm2(hipsparseHandle_t handle, hipsparseOperation_t transA, hipsparseOperation_t transB, int m, int n, int k, int nnz, const hipComplex *alpha, const hipsparseMatDescr_t descrA, const hipComplex *csrSortedValA, const int *csrSortedRowPtrA, const int *csrSortedColIndA, const hipComplex *B, int ldb, const hipComplex *beta, hipComplex *C, int ldc)#
-
hipsparseStatus_t hipsparseZcsrmm2(hipsparseHandle_t handle, hipsparseOperation_t transA, hipsparseOperation_t transB, int m, int n, int k, int nnz, const hipDoubleComplex *alpha, const hipsparseMatDescr_t descrA, const hipDoubleComplex *csrSortedValA, const int *csrSortedRowPtrA, const int *csrSortedColIndA, const hipDoubleComplex *B, int ldb, const hipDoubleComplex *beta, hipDoubleComplex *C, int ldc)#
Sparse matrix dense matrix multiplication using CSR storage format.
hipsparseXcsrmm2
multiplies the scalar \(\alpha\) with a sparse \(m \times k\) matrix \(A\), defined in CSR storage format, and the dense \(k \times n\) matrix \(B\) and adds the result to the dense \(m \times n\) matrix \(C\) that is multiplied by the scalar \(\beta\), such that\[ C := \alpha \cdot op(A) \cdot op(B) + \beta \cdot C, \]with\[\begin{split} op(A) = \left\{ \begin{array}{ll} A, & \text{if transA == HIPSPARSE_OPERATION_NON_TRANSPOSE} \\ A^T, & \text{if transA == HIPSPARSE_OPERATION_TRANSPOSE} \\ A^H, & \text{if transA == HIPSPARSE_OPERATION_CONJUGATE_TRANSPOSE} \end{array} \right. \end{split}\]and\[\begin{split} op(B) = \left\{ \begin{array}{ll} B, & \text{if transB == HIPSPARSE_OPERATION_NON_TRANSPOSE} \\ B^T, & \text{if transB == HIPSPARSE_OPERATION_TRANSPOSE} \\ B^H, & \text{if transB == HIPSPARSE_OPERATION_CONJUGATE_TRANSPOSE} \end{array} \right. \end{split}\]for(i = 0; i < ldc; ++i) { for(j = 0; j < n; ++j) { C[i][j] = beta * C[i][j]; for(k = csrRowPtr[i]; k < csrRowPtr[i + 1]; ++k) { C[i][j] += alpha * csrVal[k] * B[csrColInd[k]][j]; } } }
Note
This function is non blocking and executed asynchronously with respect to the host. It may return before the actual computation has finished.
hipsparseXbsrsm2_zeroPivot()#
-
hipsparseStatus_t hipsparseXbsrsm2_zeroPivot(hipsparseHandle_t handle, bsrsm2Info_t info, int *position)#
Sparse triangular system solve using BSR storage format.
hipsparseXbsrsm2_zeroPivot
returns HIPSPARSE_STATUS_ZERO_PIVOT, if either a structural or numerical zero has been found during hipsparseXbsrsm2_analysis() or hipsparseXbsrsm2_solve() computation. The first zero pivot \(j\) at \(A_{j,j}\) is stored inposition
, using same index base as the BSR matrix.position
can be in host or device memory. If no zero pivot has been found,position
is set to -1 and HIPSPARSE_STATUS_SUCCESS is returned instead.Note
hipsparseXbsrsm2_zeroPivot
is a blocking function. It might influence performance negatively.
hipsparseXbsrsm2_bufferSize()#
-
hipsparseStatus_t hipsparseSbsrsm2_bufferSize(hipsparseHandle_t handle, hipsparseDirection_t dirA, hipsparseOperation_t transA, hipsparseOperation_t transX, int mb, int nrhs, int nnzb, const hipsparseMatDescr_t descrA, float *bsrSortedValA, const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, bsrsm2Info_t info, int *pBufferSizeInBytes)#
-
hipsparseStatus_t hipsparseDbsrsm2_bufferSize(hipsparseHandle_t handle, hipsparseDirection_t dirA, hipsparseOperation_t transA, hipsparseOperation_t transX, int mb, int nrhs, int nnzb, const hipsparseMatDescr_t descrA, double *bsrSortedValA, const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, bsrsm2Info_t info, int *pBufferSizeInBytes)#
-
hipsparseStatus_t hipsparseCbsrsm2_bufferSize(hipsparseHandle_t handle, hipsparseDirection_t dirA, hipsparseOperation_t transA, hipsparseOperation_t transX, int mb, int nrhs, int nnzb, const hipsparseMatDescr_t descrA, hipComplex *bsrSortedValA, const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, bsrsm2Info_t info, int *pBufferSizeInBytes)#
-
hipsparseStatus_t hipsparseZbsrsm2_bufferSize(hipsparseHandle_t handle, hipsparseDirection_t dirA, hipsparseOperation_t transA, hipsparseOperation_t transX, int mb, int nrhs, int nnzb, const hipsparseMatDescr_t descrA, hipDoubleComplex *bsrSortedValA, const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, bsrsm2Info_t info, int *pBufferSizeInBytes)#
Sparse triangular system solve using BSR storage format.
hipsparseXbsrsm2_buffer_size
returns the size of the temporary storage buffer in bytes that is required by hipsparseXbsrsm2_analysis() and hipsparseXbsrsm2_solve(). The temporary storage buffer must be allocated by the user.
hipsparseXbsrsm2_analysis()#
-
hipsparseStatus_t hipsparseSbsrsm2_analysis(hipsparseHandle_t handle, hipsparseDirection_t dirA, hipsparseOperation_t transA, hipsparseOperation_t transX, int mb, int nrhs, int nnzb, const hipsparseMatDescr_t descrA, const float *bsrSortedValA, const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, bsrsm2Info_t info, hipsparseSolvePolicy_t policy, void *pBuffer)#
-
hipsparseStatus_t hipsparseDbsrsm2_analysis(hipsparseHandle_t handle, hipsparseDirection_t dirA, hipsparseOperation_t transA, hipsparseOperation_t transX, int mb, int nrhs, int nnzb, const hipsparseMatDescr_t descrA, const double *bsrSortedValA, const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, bsrsm2Info_t info, hipsparseSolvePolicy_t policy, void *pBuffer)#
-
hipsparseStatus_t hipsparseCbsrsm2_analysis(hipsparseHandle_t handle, hipsparseDirection_t dirA, hipsparseOperation_t transA, hipsparseOperation_t transX, int mb, int nrhs, int nnzb, const hipsparseMatDescr_t descrA, const hipComplex *bsrSortedValA, const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, bsrsm2Info_t info, hipsparseSolvePolicy_t policy, void *pBuffer)#
-
hipsparseStatus_t hipsparseZbsrsm2_analysis(hipsparseHandle_t handle, hipsparseDirection_t dirA, hipsparseOperation_t transA, hipsparseOperation_t transX, int mb, int nrhs, int nnzb, const hipsparseMatDescr_t descrA, const hipDoubleComplex *bsrSortedValA, const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, bsrsm2Info_t info, hipsparseSolvePolicy_t policy, void *pBuffer)#
Sparse triangular system solve using BSR storage format.
hipsparseXbsrsm2_analysis
performs the analysis step for hipsparseXbsrsm2_solve().Note
If the matrix sparsity pattern changes, the gathered information will become invalid.
Note
This function is non blocking and executed asynchronously with respect to the host. It may return before the actual computation has finished.
hipsparseXbsrsm2_solve()#
-
hipsparseStatus_t hipsparseSbsrsm2_solve(hipsparseHandle_t handle, hipsparseDirection_t dirA, hipsparseOperation_t transA, hipsparseOperation_t transX, int mb, int nrhs, int nnzb, const float *alpha, const hipsparseMatDescr_t descrA, const float *bsrSortedValA, const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, bsrsm2Info_t info, const float *B, int ldb, float *X, int ldx, hipsparseSolvePolicy_t policy, void *pBuffer)#
-
hipsparseStatus_t hipsparseDbsrsm2_solve(hipsparseHandle_t handle, hipsparseDirection_t dirA, hipsparseOperation_t transA, hipsparseOperation_t transX, int mb, int nrhs, int nnzb, const double *alpha, const hipsparseMatDescr_t descrA, const double *bsrSortedValA, const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, bsrsm2Info_t info, const double *B, int ldb, double *X, int ldx, hipsparseSolvePolicy_t policy, void *pBuffer)#
-
hipsparseStatus_t hipsparseCbsrsm2_solve(hipsparseHandle_t handle, hipsparseDirection_t dirA, hipsparseOperation_t transA, hipsparseOperation_t transX, int mb, int nrhs, int nnzb, const hipComplex *alpha, const hipsparseMatDescr_t descrA, const hipComplex *bsrSortedValA, const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, bsrsm2Info_t info, const hipComplex *B, int ldb, hipComplex *X, int ldx, hipsparseSolvePolicy_t policy, void *pBuffer)#
-
hipsparseStatus_t hipsparseZbsrsm2_solve(hipsparseHandle_t handle, hipsparseDirection_t dirA, hipsparseOperation_t transA, hipsparseOperation_t transX, int mb, int nrhs, int nnzb, const hipDoubleComplex *alpha, const hipsparseMatDescr_t descrA, const hipDoubleComplex *bsrSortedValA, const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, bsrsm2Info_t info, const hipDoubleComplex *B, int ldb, hipDoubleComplex *X, int ldx, hipsparseSolvePolicy_t policy, void *pBuffer)#
Sparse triangular system solve using BSR storage format.
hipsparseXbsrsm2_solve
solves a sparse triangular linear system of a sparse \(m \times m\) matrix, defined in BSR storage format, a dense solution matrix \(X\) and the right-hand side matrix \(B\) that is multiplied by \(\alpha\), such that\[ op(A) \cdot op(X) = \alpha \cdot op(B), \]with\[\begin{split} op(A) = \left\{ \begin{array}{ll} A, & \text{if transA == HIPSPARSE_OPERATION_NON_TRANSPOSE} \\ A^T, & \text{if transA == HIPSPARSE_OPERATION_TRANSPOSE} \\ A^H, & \text{if transA == HIPSPARSE_OPERATION_CONJUGATE_TRANSPOSE} \end{array} \right. \end{split}\],\[\begin{split} op(X) = \left\{ \begin{array}{ll} X, & \text{if transX == HIPSPARSE_OPERATION_NON_TRANSPOSE} \\ X^T, & \text{if transX == HIPSPARSE_OPERATION_TRANSPOSE} \\ X^H, & \text{if transX == HIPSPARSE_OPERATION_CONJUGATE_TRANSPOSE} \end{array} \right. \end{split}\]hipsparseXbsrsm2_solve
requires a user allocated temporary buffer. Its size is returned by hipsparseXbsrsm2_bufferSize(). Furthermore, analysis meta data is required. It can be obtained by hipsparseXbsrsm2_analysis().hipsparseXbsrsm2_solve
reports the first zero pivot (either numerical or structural zero). The zero pivot status can be checked calling hipsparseXbsrsm2_zeroPivot(). If hipsparseDiagType_t == HIPSPARSE_DIAG_TYPE_UNIT, no zero pivot will be reported, even if \(A_{j,j} = 0\) for some \(j\).- Example
// hipSPARSE handle hipsparseHandle_t handle; hipsparseCreate(&handle); // A = ( 1.0 0.0 0.0 0.0 ) // ( 2.0 3.0 0.0 0.0 ) // ( 4.0 5.0 6.0 0.0 ) // ( 7.0 0.0 8.0 9.0 ) // // with bsr_dim = 2 // // ------------------- // = | 1.0 0.0 | 0.0 0.0 | // | 2.0 3.0 | 0.0 0.0 | // ------------------- // | 4.0 5.0 | 6.0 0.0 | // | 7.0 0.0 | 8.0 9.0 | // ------------------- // Number of rows and columns int m = 4; // Number of block rows and block columns int mb = 2; int nb = 2; // BSR block dimension int bsr_dim = 2; // Number of right-hand-sides int nrhs = 4; // Number of non-zero blocks int nnzb = 3; // BSR row pointers int hbsrRowPtr[3] = {0, 1, 3}; // BSR column indices int hbsrColInd[3] = {0, 0, 1}; // BSR values double hbsrVal[12] = {1.0, 2.0, 0.0, 3.0, 4.0, 7.0, 5.0, 0.0, 6.0, 8.0, 0.0, 9.0}; // Storage scheme of the BSR blocks hipsparseDirection_t dir = HIPSPARSE_DIRECTION_COLUMN; // Transposition of the matrix and rhs matrix hipsparseOperation_t transA = HIPSPARSE_OPERATION_NON_TRANSPOSE; hipsparseOperation_t transX = HIPSPARSE_OPERATION_NON_TRANSPOSE; // Solve policy hipsparseSolvePolicy_t solve_policy = HIPSPARSE_SOLVE_POLICY_NO_LEVEL; // Scalar alpha and beta double alpha = 1.0; // rhs and solution matrix int ldb = nb * bsr_dim; int ldx = mb * bsr_dim; double hB[16] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; double hX[16]; // Offload data to device int* dbsrRowPtr; int* dbsrColInd; double* dbsrVal; double* dB; double* dX; hipMalloc((void**)&dbsrRowPtr, sizeof(int) * (mb + 1)); hipMalloc((void**)&dbsrColInd, sizeof(int) * nnzb); hipMalloc((void**)&dbsrVal, sizeof(double) * nnzb * bsr_dim * bsr_dim); hipMalloc((void**)&dB, sizeof(double) * nb * bsr_dim * nrhs); hipMalloc((void**)&dX, sizeof(double) * mb * bsr_dim * nrhs); hipMemcpy(dbsrRowPtr, hbsrRowPtr, sizeof(int) * (mb + 1), hipMemcpyHostToDevice); hipMemcpy(dbsrColInd, hbsrColInd, sizeof(int) * nnzb, hipMemcpyHostToDevice); hipMemcpy(dbsrVal, hbsrVal, sizeof(double) * nnzb * bsr_dim * bsr_dim, hipMemcpyHostToDevice); hipMemcpy(dB, hB, sizeof(double) * nb * bsr_dim * nrhs, hipMemcpyHostToDevice); // Matrix descriptor hipsparseMatDescr_t descr; hipsparseCreateMatDescr(&descr); // Matrix fill mode hipsparseSetMatFillMode(descr, HIPSPARSE_FILL_MODE_LOWER); // Matrix diagonal type hipsparseSetMatDiagType(descr, HIPSPARSE_DIAG_TYPE_NON_UNIT); // Matrix info structure bsrsm2Info_t info; hipsparseCreateBsrsm2Info(&info); // Obtain required buffer size int buffer_size; hipsparseDbsrsm2_bufferSize(handle, dir, transA, transX, mb, nrhs, nnzb, descr, dbsrVal, dbsrRowPtr, dbsrColInd, bsr_dim, info, &buffer_size); // Allocate temporary buffer void* dbuffer; hipMalloc(&dbuffer, buffer_size); // Perform analysis step hipsparseDbsrsm2_analysis(handle, dir, transA, transX, mb, nrhs, nnzb, descr, dbsrVal, dbsrRowPtr, dbsrColInd, bsr_dim, info, solve_policy, dbuffer); // Call dbsrsm to perform lower triangular solve LX = B hipsparseDbsrsm2_solve(handle, dir, transA, transX, mb, nrhs, nnzb, &alpha, descr, dbsrVal, dbsrRowPtr, dbsrColInd, bsr_dim, info, dB, ldb, dX, ldx, solve_policy, dbuffer); // Check for zero pivots int pivot; hipsparseStatus_t status = hipsparseXbsrsm2_zeroPivot(handle, info, &pivot); if(status == HIPSPARSE_STATUS_ZERO_PIVOT) { std::cout << "Found zero pivot in matrix row " << pivot << std::endl; } // Copy result back to host hipMemcpy(hX, dX, sizeof(double) * mb * bsr_dim * nrhs, hipMemcpyDeviceToHost); // Clear hipSPARSE hipsparseDestroyBsrsm2Info(info); hipsparseDestroyMatDescr(descr); hipsparseDestroy(handle); // Clear device memory hipFree(dbsrRowPtr); hipFree(dbsrColInd); hipFree(dbsrVal); hipFree(dB); hipFree(dX); hipFree(dbuffer);
Note
The sparse BSR matrix has to be sorted.
Note
Operation type of B and X must match, if \(op(B)=B, op(X)=X\).
Note
This function is non blocking and executed asynchronously with respect to the host. It may return before the actual computation has finished.
Note
Currently, only
transA
!= HIPSPARSE_OPERATION_CONJUGATE_TRANSPOSE andtransX
!= HIPSPARSE_OPERATION_CONJUGATE_TRANSPOSE is supported.
hipsparseXcsrsm2_zeroPivot()#
-
hipsparseStatus_t hipsparseXcsrsm2_zeroPivot(hipsparseHandle_t handle, csrsm2Info_t info, int *position)#
Sparse triangular system solve using CSR storage format.
hipsparseXcsrsm2_zeroPivot
returns HIPSPARSE_STATUS_ZERO_PIVOT, if either a structural or numerical zero has been found during hipsparseXcsrsm2_analysis() or hipsparseXcsrsm2_solve() computation. The first zero pivot \(j\) at \(A_{j,j}\) is stored inposition
, using same index base as the CSR matrix.position
can be in host or device memory. If no zero pivot has been found,position
is set to -1 and HIPSPARSE_STATUS_SUCCESS is returned instead.Note
hipsparseXcsrsm2_zeroPivot
is a blocking function. It might influence performance negatively.
hipsparseXcsrsm2_bufferSizeExt()#
-
hipsparseStatus_t hipsparseScsrsm2_bufferSizeExt(hipsparseHandle_t handle, int algo, hipsparseOperation_t transA, hipsparseOperation_t transB, int m, int nrhs, int nnz, const float *alpha, const hipsparseMatDescr_t descrA, const float *csrSortedValA, const int *csrSortedRowPtrA, const int *csrSortedColIndA, const float *B, int ldb, csrsm2Info_t info, hipsparseSolvePolicy_t policy, size_t *pBufferSizeInBytes)#
-
hipsparseStatus_t hipsparseDcsrsm2_bufferSizeExt(hipsparseHandle_t handle, int algo, hipsparseOperation_t transA, hipsparseOperation_t transB, int m, int nrhs, int nnz, const double *alpha, const hipsparseMatDescr_t descrA, const double *csrSortedValA, const int *csrSortedRowPtrA, const int *csrSortedColIndA, const double *B, int ldb, csrsm2Info_t info, hipsparseSolvePolicy_t policy, size_t *pBufferSizeInBytes)#
-
hipsparseStatus_t hipsparseCcsrsm2_bufferSizeExt(hipsparseHandle_t handle, int algo, hipsparseOperation_t transA, hipsparseOperation_t transB, int m, int nrhs, int nnz, const hipComplex *alpha, const hipsparseMatDescr_t descrA, const hipComplex *csrSortedValA, const int *csrSortedRowPtrA, const int *csrSortedColIndA, const hipComplex *B, int ldb, csrsm2Info_t info, hipsparseSolvePolicy_t policy, size_t *pBufferSizeInBytes)#
-
hipsparseStatus_t hipsparseZcsrsm2_bufferSizeExt(hipsparseHandle_t handle, int algo, hipsparseOperation_t transA, hipsparseOperation_t transB, int m, int nrhs, int nnz, const hipDoubleComplex *alpha, const hipsparseMatDescr_t descrA, const hipDoubleComplex *csrSortedValA, const int *csrSortedRowPtrA, const int *csrSortedColIndA, const hipDoubleComplex *B, int ldb, csrsm2Info_t info, hipsparseSolvePolicy_t policy, size_t *pBufferSizeInBytes)#
Sparse triangular system solve using CSR storage format.
hipsparseXcsrsm2_bufferSizeExt
returns the size of the temporary storage buffer in bytes that is required by hipsparseXcsrsm2_analysis() and hipsparseXcsrsm2_solve(). The temporary storage buffer must be allocated by the user.
hipsparseXcsrsm2_analysis()#
-
hipsparseStatus_t hipsparseScsrsm2_analysis(hipsparseHandle_t handle, int algo, hipsparseOperation_t transA, hipsparseOperation_t transB, int m, int nrhs, int nnz, const float *alpha, const hipsparseMatDescr_t descrA, const float *csrSortedValA, const int *csrSortedRowPtrA, const int *csrSortedColIndA, const float *B, int ldb, csrsm2Info_t info, hipsparseSolvePolicy_t policy, void *pBuffer)#
-
hipsparseStatus_t hipsparseDcsrsm2_analysis(hipsparseHandle_t handle, int algo, hipsparseOperation_t transA, hipsparseOperation_t transB, int m, int nrhs, int nnz, const double *alpha, const hipsparseMatDescr_t descrA, const double *csrSortedValA, const int *csrSortedRowPtrA, const int *csrSortedColIndA, const double *B, int ldb, csrsm2Info_t info, hipsparseSolvePolicy_t policy, void *pBuffer)#
-
hipsparseStatus_t hipsparseCcsrsm2_analysis(hipsparseHandle_t handle, int algo, hipsparseOperation_t transA, hipsparseOperation_t transB, int m, int nrhs, int nnz, const hipComplex *alpha, const hipsparseMatDescr_t descrA, const hipComplex *csrSortedValA, const int *csrSortedRowPtrA, const int *csrSortedColIndA, const hipComplex *B, int ldb, csrsm2Info_t info, hipsparseSolvePolicy_t policy, void *pBuffer)#
-
hipsparseStatus_t hipsparseZcsrsm2_analysis(hipsparseHandle_t handle, int algo, hipsparseOperation_t transA, hipsparseOperation_t transB, int m, int nrhs, int nnz, const hipDoubleComplex *alpha, const hipsparseMatDescr_t descrA, const hipDoubleComplex *csrSortedValA, const int *csrSortedRowPtrA, const int *csrSortedColIndA, const hipDoubleComplex *B, int ldb, csrsm2Info_t info, hipsparseSolvePolicy_t policy, void *pBuffer)#
Sparse triangular system solve using CSR storage format.
hipsparseXcsrsm2_analysis
performs the analysis step for hipsparseXcsrsm2_solve().Note
If the matrix sparsity pattern changes, the gathered information will become invalid.
Note
This function is non blocking and executed asynchronously with respect to the host. It may return before the actual computation has finished.
hipsparseXcsrsm2_solve()#
-
hipsparseStatus_t hipsparseScsrsm2_solve(hipsparseHandle_t handle, int algo, hipsparseOperation_t transA, hipsparseOperation_t transB, int m, int nrhs, int nnz, const float *alpha, const hipsparseMatDescr_t descrA, const float *csrSortedValA, const int *csrSortedRowPtrA, const int *csrSortedColIndA, float *B, int ldb, csrsm2Info_t info, hipsparseSolvePolicy_t policy, void *pBuffer)#
-
hipsparseStatus_t hipsparseDcsrsm2_solve(hipsparseHandle_t handle, int algo, hipsparseOperation_t transA, hipsparseOperation_t transB, int m, int nrhs, int nnz, const double *alpha, const hipsparseMatDescr_t descrA, const double *csrSortedValA, const int *csrSortedRowPtrA, const int *csrSortedColIndA, double *B, int ldb, csrsm2Info_t info, hipsparseSolvePolicy_t policy, void *pBuffer)#
-
hipsparseStatus_t hipsparseCcsrsm2_solve(hipsparseHandle_t handle, int algo, hipsparseOperation_t transA, hipsparseOperation_t transB, int m, int nrhs, int nnz, const hipComplex *alpha, const hipsparseMatDescr_t descrA, const hipComplex *csrSortedValA, const int *csrSortedRowPtrA, const int *csrSortedColIndA, hipComplex *B, int ldb, csrsm2Info_t info, hipsparseSolvePolicy_t policy, void *pBuffer)#
-
hipsparseStatus_t hipsparseZcsrsm2_solve(hipsparseHandle_t handle, int algo, hipsparseOperation_t transA, hipsparseOperation_t transB, int m, int nrhs, int nnz, const hipDoubleComplex *alpha, const hipsparseMatDescr_t descrA, const hipDoubleComplex *csrSortedValA, const int *csrSortedRowPtrA, const int *csrSortedColIndA, hipDoubleComplex *B, int ldb, csrsm2Info_t info, hipsparseSolvePolicy_t policy, void *pBuffer)#
Sparse triangular system solve using CSR storage format.
hipsparseXcsrsm2_solve
solves a sparse triangular linear system of a sparse \(m \times m\) matrix, defined in CSR storage format, a dense solution matrix \(X\) and the right-hand side matrix \(B\) that is multiplied by \(\alpha\), such that\[ op(A) \cdot op(X) = \alpha \cdot op(B), \]with\[\begin{split} op(A) = \left\{ \begin{array}{ll} A, & \text{if transA == HIPSPARSE_OPERATION_NON_TRANSPOSE} \\ A^T, & \text{if transA == HIPSPARSE_OPERATION_TRANSPOSE} \\ A^H, & \text{if transA == HIPSPARSE_OPERATION_CONJUGATE_TRANSPOSE} \end{array} \right. \end{split}\],\[\begin{split} op(B) = \left\{ \begin{array}{ll} B, & \text{if transB == HIPSPARSE_OPERATION_NON_TRANSPOSE} \\ B^T, & \text{if transB == HIPSPARSE_OPERATION_TRANSPOSE} \\ B^H, & \text{if transB == HIPSPARSE_OPERATION_CONJUGATE_TRANSPOSE} \end{array} \right. \end{split}\]and\[\begin{split} op(X) = \left\{ \begin{array}{ll} X, & \text{if transB == HIPSPARSE_OPERATION_NON_TRANSPOSE} \\ X^T, & \text{if transB == HIPSPARSE_OPERATION_TRANSPOSE} \\ X^H, & \text{if transB == HIPSPARSE_OPERATION_CONJUGATE_TRANSPOSE} \end{array} \right. \end{split}\]hipsparseXcsrsm2_solve
requires a user allocated temporary buffer. Its size is returned by hipsparseXcsrsm2_bufferSizeExt(). Furthermore, analysis meta data is required. It can be obtained by hipsparseXcsrsm2_analysis().hipsparseXcsrsm2_solve
reports the first zero pivot (either numerical or structural zero). The zero pivot status can be checked calling hipsparseXcsrsm2_zeroPivot(). If hipsparseDiagType_t == HIPSPARSE_DIAG_TYPE_UNIT, no zero pivot will be reported, even if \(A_{j,j} = 0\) for some \(j\).- Example
// hipSPARSE handle hipsparseHandle_t handle; hipsparseCreate(&handle); // A = ( 1.0 0.0 0.0 0.0 ) // ( 2.0 3.0 0.0 0.0 ) // ( 4.0 5.0 6.0 0.0 ) // ( 7.0 0.0 8.0 9.0 ) // Number of rows and columns int m = 4; int n = 4; // Number of right-hand-sides int nrhs = 4; // Number of non-zeros int nnz = 9; // CSR row pointers int hcsrRowPtr[5] = {0, 1, 3, 6, 9}; // CSR column indices int hcsrColInd[9] = {0, 0, 1, 0, 1, 2, 0, 2, 3}; // CSR values double hcsrVal[9] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0}; // Transposition of the matrix and rhs matrix hipsparseOperation_t transA = HIPSPARSE_OPERATION_NON_TRANSPOSE; hipsparseOperation_t transB = HIPSPARSE_OPERATION_NON_TRANSPOSE; // Solve policy hipsparseSolvePolicy_t solve_policy = HIPSPARSE_SOLVE_POLICY_NO_LEVEL; // Scalar alpha and beta double alpha = 1.0; // rhs and solution matrix int ldb = n; double hB[16] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; // Offload data to device int* dcsrRowPtr; int* dcsrColInd; double* dcsrVal; double* dB; hipMalloc((void**)&dcsrRowPtr, sizeof(int) * (m + 1)); hipMalloc((void**)&dcsrColInd, sizeof(int) * nnz); hipMalloc((void**)&dcsrVal, sizeof(double) * nnz); hipMalloc((void**)&dB, sizeof(double) * n * nrhs); hipMemcpy(dcsrRowPtr, hcsrRowPtr, sizeof(int) * (m + 1), hipMemcpyHostToDevice); hipMemcpy(dcsrColInd, hcsrColInd, sizeof(int) * nnz, hipMemcpyHostToDevice); hipMemcpy(dcsrVal, hcsrVal, sizeof(double) * nnz, hipMemcpyHostToDevice); hipMemcpy(dB, hB, sizeof(double) * n * nrhs, hipMemcpyHostToDevice); // Matrix descriptor hipsparseMatDescr_t descr; hipsparseCreateMatDescr(&descr); // Matrix fill mode hipsparseSetMatFillMode(descr, HIPSPARSE_FILL_MODE_LOWER); // Matrix diagonal type hipsparseSetMatDiagType(descr, HIPSPARSE_DIAG_TYPE_NON_UNIT); // Matrix info structure csrsm2Info_t info; hipsparseCreateCsrsm2Info(&info); // Obtain required buffer size size_t buffer_size; hipsparseDcsrsm2_bufferSizeExt(handle, 0, transA, transB, m, nrhs, nnz, &alpha, descr, dcsrVal, dcsrRowPtr, dcsrColInd, dB, ldb, info, solve_policy, &buffer_size); // Allocate temporary buffer void* dbuffer; hipMalloc(&dbuffer, buffer_size); // Perform analysis step hipsparseDcsrsm2_analysis(handle, 0, transA, transB, m, nrhs, nnz, &alpha, descr, dcsrVal, dcsrRowPtr, dcsrColInd, dB, ldb, info, solve_policy, dbuffer); // Call dcsrsm to perform lower triangular solve LB = B hipsparseDcsrsm2_solve(handle, 0, transA, transB, m, nrhs, nnz, &alpha, descr, dcsrVal, dcsrRowPtr, dcsrColInd, dB, ldb, info, solve_policy, dbuffer); // Check for zero pivots int pivot; hipsparseStatus_t status = hipsparseXcsrsm2_zeroPivot(handle, info, &pivot); if(status == HIPSPARSE_STATUS_ZERO_PIVOT) { std::cout << "Found zero pivot in matrix row " << pivot << std::endl; } // Copy result back to host hipMemcpy(hB, dB, sizeof(double) * m * nrhs, hipMemcpyDeviceToHost); // Clear hipSPARSE hipsparseDestroyCsrsm2Info(info); hipsparseDestroyMatDescr(descr); hipsparseDestroy(handle); // Clear device memory hipFree(dcsrRowPtr); hipFree(dcsrColInd); hipFree(dcsrVal); hipFree(dB); hipFree(dbuffer);
Note
The sparse CSR matrix has to be sorted. This can be achieved by calling hipsparseXcsrsort().
Note
This function is non blocking and executed asynchronously with respect to the host. It may return before the actual computation has finished.
Note
Currently, only
transA
!= HIPSPARSE_OPERATION_CONJUGATE_TRANSPOSE andtransB
!= HIPSPARSE_OPERATION_CONJUGATE_TRANSPOSE is supported.
hipsparseXgemmi()#
-
hipsparseStatus_t hipsparseSgemmi(hipsparseHandle_t handle, int m, int n, int k, int nnz, const float *alpha, const float *A, int lda, const float *cscValB, const int *cscColPtrB, const int *cscRowIndB, const float *beta, float *C, int ldc)#
-
hipsparseStatus_t hipsparseDgemmi(hipsparseHandle_t handle, int m, int n, int k, int nnz, const double *alpha, const double *A, int lda, const double *cscValB, const int *cscColPtrB, const int *cscRowIndB, const double *beta, double *C, int ldc)#
-
hipsparseStatus_t hipsparseCgemmi(hipsparseHandle_t handle, int m, int n, int k, int nnz, const hipComplex *alpha, const hipComplex *A, int lda, const hipComplex *cscValB, const int *cscColPtrB, const int *cscRowIndB, const hipComplex *beta, hipComplex *C, int ldc)#
-
hipsparseStatus_t hipsparseZgemmi(hipsparseHandle_t handle, int m, int n, int k, int nnz, const hipDoubleComplex *alpha, const hipDoubleComplex *A, int lda, const hipDoubleComplex *cscValB, const int *cscColPtrB, const int *cscRowIndB, const hipDoubleComplex *beta, hipDoubleComplex *C, int ldc)#
Dense matrix sparse matrix multiplication using CSC storage format.
hipsparseXgemmi
multiplies the scalar \(\alpha\) with a dense \(m \times k\) matrix \(A\) and the sparse \(k \times n\) matrix \(B\), defined in CSC storage format and adds the result to the dense \(m \times n\) matrix \(C\) that is multiplied by the scalar \(\beta\), such that\[ C := \alpha \cdot A \cdot B + \beta \cdot C \]- Example
// A, B, and C are m×k, k×n, and m×n int m = 3, n = 5, k = 4; int lda = m, ldc = m; int nnz_A = m * k, nnz_B = 10, nnz_C = m * n; // alpha and beta float alpha = 0.5f; float beta = 0.25f; std::vector<int> hcscColPtr = {0, 2, 5, 7, 8, 10}; std::vector<int> hcscRowInd = {0, 2, 0, 1, 3, 1, 3, 2, 0, 2}; std::vector<float> hcsc_val = {1, 6, 2, 4, 9, 5, 2, 7, 3, 8}; std::vector<float> hA(nnz_A, 1.0f); std::vector<float> hC(nnz_C, 1.0f); int *dcscColPtr; int *dcscRowInd; float *dcsc_val; hipMalloc((void**)&dcscColPtr, sizeof(int) * (n + 1)); hipMalloc((void**)&dcscRowInd, sizeof(int) * nnz_B); hipMalloc((void**)&dcsc_val, sizeof(float) * nnz_B); hipMemcpy(dcscColPtr, hcscColPtr.data(), sizeof(int) * (n + 1), hipMemcpyHostToDevice); hipMemcpy(dcscRowInd, hcscRowInd.data(), sizeof(int) * nnz_B, hipMemcpyHostToDevice); hipMemcpy(dcsc_val, hcsc_val.data(), sizeof(float) * nnz_B, hipMemcpyHostToDevice); hipsparseHandle_t handle; hipsparseCreate(&handle); // Allocate memory for the matrix A float* dA; hipMalloc((void**)&dA, sizeof(float) * nnz_A); hipMemcpy(dA, hA.data(), sizeof(float) * nnz_A, hipMemcpyHostToDevice); // Allocate memory for the resulting matrix C float* dC; hipMalloc((void**)&dC, sizeof(float) * nnz_C); hipMemcpy(dC, hC.data(), sizeof(float) * nnz_C, hipMemcpyHostToDevice); // Perform operation hipsparseSgemmi(handle, m, n, k, nnz_B, &alpha, dA, lda, dcsc_val, dcscColPtr, dcscRowInd, &beta, dC, ldc); // Copy device to host hipMemcpy(hC.data(), dC, sizeof(float) * nnz_C, hipMemcpyDeviceToHost); // Destroy matrix descriptors and handles hipsparseDestroy(handle); hipFree(dcscColPtr); hipFree(dcscRowInd); hipFree(dcsc_val); hipFree(dA); hipFree(dC);
Note
This function is non blocking and executed asynchronously with respect to the host. It may return before the actual computation has finished.