Sparse Level 3 Functions#

This module holds all sparse level 3 routines.

The sparse level 3 routines describe operations between a matrix in sparse format and multiple vectors in dense format that can also be seen as a dense matrix.

hipsparseXbsrmm()#

hipsparseStatus_t hipsparseSbsrmm(hipsparseHandle_t handle, hipsparseDirection_t dirA, hipsparseOperation_t transA, hipsparseOperation_t transB, int mb, int n, int kb, int nnzb, const float *alpha, const hipsparseMatDescr_t descrA, const float *bsrValA, const int *bsrRowPtrA, const int *bsrColIndA, int blockDim, const float *B, int ldb, const float *beta, float *C, int ldc)#
hipsparseStatus_t hipsparseDbsrmm(hipsparseHandle_t handle, hipsparseDirection_t dirA, hipsparseOperation_t transA, hipsparseOperation_t transB, int mb, int n, int kb, int nnzb, const double *alpha, const hipsparseMatDescr_t descrA, const double *bsrValA, const int *bsrRowPtrA, const int *bsrColIndA, int blockDim, const double *B, int ldb, const double *beta, double *C, int ldc)#
hipsparseStatus_t hipsparseCbsrmm(hipsparseHandle_t handle, hipsparseDirection_t dirA, hipsparseOperation_t transA, hipsparseOperation_t transB, int mb, int n, int kb, int nnzb, const hipComplex *alpha, const hipsparseMatDescr_t descrA, const hipComplex *bsrValA, const int *bsrRowPtrA, const int *bsrColIndA, int blockDim, const hipComplex *B, int ldb, const hipComplex *beta, hipComplex *C, int ldc)#
hipsparseStatus_t hipsparseZbsrmm(hipsparseHandle_t handle, hipsparseDirection_t dirA, hipsparseOperation_t transA, hipsparseOperation_t transB, int mb, int n, int kb, int nnzb, const hipDoubleComplex *alpha, const hipsparseMatDescr_t descrA, const hipDoubleComplex *bsrValA, const int *bsrRowPtrA, const int *bsrColIndA, int blockDim, const hipDoubleComplex *B, int ldb, const hipDoubleComplex *beta, hipDoubleComplex *C, int ldc)#

Sparse matrix dense matrix multiplication using BSR storage format.

hipsparseXbsrmm multiplies the scalar \(\alpha\) with a sparse \(mb \times kb\) matrix \(A\), defined in BSR storage format, and the dense \(k \times n\) matrix \(B\) (where \(k = block\_dim \times kb\)) and adds the result to the dense \(m \times n\) matrix \(C\) (where \(m = block\_dim \times mb\)) that is multiplied by the scalar \(\beta\), such that

\[ C := \alpha \cdot op(A) \cdot op(B) + \beta \cdot C, \]
with
\[\begin{split} op(A) = \left\{ \begin{array}{ll} A, & \text{if transA == HIPSPARSE_OPERATION_NON_TRANSPOSE} \\ \end{array} \right. \end{split}\]
and
\[\begin{split} op(B) = \left\{ \begin{array}{ll} B, & \text{if transB == HIPSPARSE_OPERATION_NON_TRANSPOSE} \\ B^T, & \text{if transB == HIPSPARSE_OPERATION_TRANSPOSE} \\ \end{array} \right. \end{split}\]

Example
// hipSPARSE handle
hipsparseHandle_t handle;
hipsparseCreate(&handle);

//     1 2 0 3 0 0
// A = 0 4 5 0 0 0
//     0 0 0 7 8 0
//     0 0 1 2 4 1

int blockDim = 2;
int mb   = 2;
int kb   = 3;
int nnzb = 4;
hipsparseDirection_t dir = HIPSPARSE_DIRECTION_ROW;

int hbsrRowPtr[2 + 1]   = {0, 2, 4};
int hbsrColInd[4]       = {0, 1, 1, 2};
float hbsrVal[4 * 2 * 2] = {1, 2, 0, 4, 0, 3, 5, 0, 0, 7, 1, 2, 8, 0, 4, 1};

// Set dimension n of B
int n = 3;
int m = mb * blockDim;
int k = kb * blockDim;

// Allocate and generate dense matrix B (k x n)
float hB[6 * 3] = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 
                11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f, 18.0f};

int* dbsrRowPtr = NULL;
int* dbsrColInd = NULL;
float* dbsrVal = NULL;
hipMalloc((void**)&dbsrRowPtr, sizeof(int) * (mb + 1));
hipMalloc((void**)&dbsrColInd, sizeof(int) * nnzb);
hipMalloc((void**)&dbsrVal, sizeof(float) * nnzb * blockDim * blockDim);
hipMemcpy(dbsrRowPtr, hbsrRowPtr, sizeof(int) * (mb + 1), hipMemcpyHostToDevice);
hipMemcpy(dbsrColInd, hbsrColInd, sizeof(int) * nnzb, hipMemcpyHostToDevice);
hipMemcpy(dbsrVal, hbsrVal, sizeof(float) * nnzb * blockDim * blockDim, hipMemcpyHostToDevice);

// Copy B to the device
float* dB;
hipMalloc((void**)&dB, sizeof(float) * k * n);
hipMemcpy(dB, hB, sizeof(float) * k * n, hipMemcpyHostToDevice);

// alpha and beta
float alpha = 1.0f;
float beta  = 0.0f;

// Allocate memory for the resulting matrix C
float* dC;
hipMalloc((void**)&dC, sizeof(float) * m * n);

// Matrix descriptor
hipsparseMatDescr_t descr;
hipsparseCreateMatDescr(&descr);

// Perform the matrix multiplication
hipsparseSbsrmm(handle,
                dir,
                HIPSPARSE_OPERATION_NON_TRANSPOSE,
                HIPSPARSE_OPERATION_NON_TRANSPOSE,
                mb,
                n,
                kb,
                nnzb,
                &alpha,
                descr,
                dbsrVal,
                dbsrRowPtr,
                dbsrColInd,
                blockDim,
                dB,
                k,
                &beta,
                dC,
                m);

// Copy results to host
float hC[6 * 3];
hipMemcpy(hC, dC, sizeof(float) * m * n, hipMemcpyDeviceToHost);

hipFree(dbsrRowPtr);
hipFree(dbsrColInd);
hipFree(dbsrVal);
hipFree(dB);
hipFree(dC);

Note

This function is non blocking and executed asynchronously with respect to the host. It may return before the actual computation has finished.

Note

Currently, only transA == HIPSPARSE_OPERATION_NON_TRANSPOSE is supported.

hipsparseXcsrmm()#

hipsparseStatus_t hipsparseScsrmm(hipsparseHandle_t handle, hipsparseOperation_t transA, int m, int n, int k, int nnz, const float *alpha, const hipsparseMatDescr_t descrA, const float *csrSortedValA, const int *csrSortedRowPtrA, const int *csrSortedColIndA, const float *B, int ldb, const float *beta, float *C, int ldc)#
hipsparseStatus_t hipsparseDcsrmm(hipsparseHandle_t handle, hipsparseOperation_t transA, int m, int n, int k, int nnz, const double *alpha, const hipsparseMatDescr_t descrA, const double *csrSortedValA, const int *csrSortedRowPtrA, const int *csrSortedColIndA, const double *B, int ldb, const double *beta, double *C, int ldc)#
hipsparseStatus_t hipsparseCcsrmm(hipsparseHandle_t handle, hipsparseOperation_t transA, int m, int n, int k, int nnz, const hipComplex *alpha, const hipsparseMatDescr_t descrA, const hipComplex *csrSortedValA, const int *csrSortedRowPtrA, const int *csrSortedColIndA, const hipComplex *B, int ldb, const hipComplex *beta, hipComplex *C, int ldc)#
hipsparseStatus_t hipsparseZcsrmm(hipsparseHandle_t handle, hipsparseOperation_t transA, int m, int n, int k, int nnz, const hipDoubleComplex *alpha, const hipsparseMatDescr_t descrA, const hipDoubleComplex *csrSortedValA, const int *csrSortedRowPtrA, const int *csrSortedColIndA, const hipDoubleComplex *B, int ldb, const hipDoubleComplex *beta, hipDoubleComplex *C, int ldc)#

Sparse matrix dense matrix multiplication using CSR storage format.

hipsparseXcsrmm multiplies the scalar \(\alpha\) with a sparse \(m \times k\) matrix \(A\), defined in CSR storage format, and the dense \(k \times n\) matrix \(B\) and adds the result to the dense \(m \times n\) matrix \(C\) that is multiplied by the scalar \(\beta\), such that

\[ C := \alpha \cdot op(A) \cdot B + \beta \cdot C, \]
with
\[\begin{split} op(A) = \left\{ \begin{array}{ll} A, & \text{if transA == HIPSPARSE_OPERATION_NON_TRANSPOSE} \\ A^T, & \text{if transA == HIPSPARSE_OPERATION_TRANSPOSE} \\ A^H, & \text{if transA == HIPSPARSE_OPERATION_CONJUGATE_TRANSPOSE} \end{array} \right. \end{split}\]

for(i = 0; i < ldc; ++i)
{
    for(j = 0; j < n; ++j)
    {
        C[i][j] = beta * C[i][j];

        for(k = csrRowPtr[i]; k < csrRowPtr[i + 1]; ++k)
        {
            C[i][j] += alpha * csrVal[k] * B[csrColInd[k]][j];
        }
    }
}

Example
// hipSPARSE handle
hipsparseHandle_t handle;
hipsparseCreate(&handle);

//     1 2 0 3 0 0
// A = 0 4 5 0 0 0
//     0 0 0 7 8 0
//     0 0 1 2 4 1

int m   = 4;
int k   = 6;
int nnz = 11;
hipsparseDirection_t dir = HIPSPARSE_DIRECTION_ROW;

int hcsrRowPtr[4 + 1] = {0, 3, 5, 7, 11};
int hcsrColInd[11]    = {0, 1, 3, 1, 2, 3, 4, 2, 3, 4, 5};
float hcsrVal[11]      = {1, 2, 3, 4, 5, 7, 8, 1, 2, 4, 1};

// Set dimension n of B
int n = 3;

// Allocate and generate dense matrix B (k x n)
float hB[6 * 3] = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 
                   11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f, 18.0f};

int* dcsrRowPtr = NULL;
int* dcsrColInd = NULL;
float* dcsrVal = NULL;
hipMalloc((void**)&dcsrRowPtr, sizeof(int) * (m + 1));
hipMalloc((void**)&dcsrColInd, sizeof(int) * nnz);
hipMalloc((void**)&dcsrVal, sizeof(float) * nnz);
hipMemcpy(dcsrRowPtr, hcsrRowPtr, sizeof(int) * (m + 1), hipMemcpyHostToDevice);
hipMemcpy(dcsrColInd, hcsrColInd, sizeof(int) * nnz, hipMemcpyHostToDevice);
hipMemcpy(dcsrVal, hcsrVal, sizeof(float) * nnz, hipMemcpyHostToDevice);

// Copy B to the device
float* dB;
hipMalloc((void**)&dB, sizeof(float) * k * n);
hipMemcpy(dB, hB, sizeof(float) * k * n, hipMemcpyHostToDevice);

// alpha and beta
float alpha = 1.0f;
float beta  = 0.0f;

// Allocate memory for the resulting matrix C
float* dC;
hipMalloc((void**)&dC, sizeof(float) * m * n);

// Matrix descriptor
hipsparseMatDescr_t descr;
hipsparseCreateMatDescr(&descr);

// Perform the matrix multiplication
hipsparseScsrmm(handle,
                HIPSPARSE_OPERATION_NON_TRANSPOSE,
                m,
                n,
                k,
                nnz,
                &alpha,
                descr,
                dcsrVal,
                dcsrRowPtr,
                dcsrColInd,
                dB,
                k,
                &beta,
                dC,
                m);

// Copy results to host
float hC[6 * 3];
hipMemcpy(hC, dC, sizeof(float) * m * n, hipMemcpyDeviceToHost);

hipFree(dcsrRowPtr);
hipFree(dcsrColInd);
hipFree(dcsrVal);
hipFree(dB);
hipFree(dC);

Note

This function is non blocking and executed asynchronously with respect to the host. It may return before the actual computation has finished.

hipsparseXcsrmm2()#

hipsparseStatus_t hipsparseScsrmm2(hipsparseHandle_t handle, hipsparseOperation_t transA, hipsparseOperation_t transB, int m, int n, int k, int nnz, const float *alpha, const hipsparseMatDescr_t descrA, const float *csrSortedValA, const int *csrSortedRowPtrA, const int *csrSortedColIndA, const float *B, int ldb, const float *beta, float *C, int ldc)#
hipsparseStatus_t hipsparseDcsrmm2(hipsparseHandle_t handle, hipsparseOperation_t transA, hipsparseOperation_t transB, int m, int n, int k, int nnz, const double *alpha, const hipsparseMatDescr_t descrA, const double *csrSortedValA, const int *csrSortedRowPtrA, const int *csrSortedColIndA, const double *B, int ldb, const double *beta, double *C, int ldc)#
hipsparseStatus_t hipsparseCcsrmm2(hipsparseHandle_t handle, hipsparseOperation_t transA, hipsparseOperation_t transB, int m, int n, int k, int nnz, const hipComplex *alpha, const hipsparseMatDescr_t descrA, const hipComplex *csrSortedValA, const int *csrSortedRowPtrA, const int *csrSortedColIndA, const hipComplex *B, int ldb, const hipComplex *beta, hipComplex *C, int ldc)#
hipsparseStatus_t hipsparseZcsrmm2(hipsparseHandle_t handle, hipsparseOperation_t transA, hipsparseOperation_t transB, int m, int n, int k, int nnz, const hipDoubleComplex *alpha, const hipsparseMatDescr_t descrA, const hipDoubleComplex *csrSortedValA, const int *csrSortedRowPtrA, const int *csrSortedColIndA, const hipDoubleComplex *B, int ldb, const hipDoubleComplex *beta, hipDoubleComplex *C, int ldc)#

Sparse matrix dense matrix multiplication using CSR storage format.

hipsparseXcsrmm2 multiplies the scalar \(\alpha\) with a sparse \(m \times k\) matrix \(A\), defined in CSR storage format, and the dense \(k \times n\) matrix \(B\) and adds the result to the dense \(m \times n\) matrix \(C\) that is multiplied by the scalar \(\beta\), such that

\[ C := \alpha \cdot op(A) \cdot op(B) + \beta \cdot C, \]
with
\[\begin{split} op(A) = \left\{ \begin{array}{ll} A, & \text{if transA == HIPSPARSE_OPERATION_NON_TRANSPOSE} \\ A^T, & \text{if transA == HIPSPARSE_OPERATION_TRANSPOSE} \\ A^H, & \text{if transA == HIPSPARSE_OPERATION_CONJUGATE_TRANSPOSE} \end{array} \right. \end{split}\]
and
\[\begin{split} op(B) = \left\{ \begin{array}{ll} B, & \text{if transB == HIPSPARSE_OPERATION_NON_TRANSPOSE} \\ B^T, & \text{if transB == HIPSPARSE_OPERATION_TRANSPOSE} \\ B^H, & \text{if transB == HIPSPARSE_OPERATION_CONJUGATE_TRANSPOSE} \end{array} \right. \end{split}\]

for(i = 0; i < ldc; ++i)
{
    for(j = 0; j < n; ++j)
    {
        C[i][j] = beta * C[i][j];

        for(k = csrRowPtr[i]; k < csrRowPtr[i + 1]; ++k)
        {
            C[i][j] += alpha * csrVal[k] * B[csrColInd[k]][j];
        }
    }
}

Note

This function is non blocking and executed asynchronously with respect to the host. It may return before the actual computation has finished.

hipsparseXbsrsm2_zeroPivot()#

hipsparseStatus_t hipsparseXbsrsm2_zeroPivot(hipsparseHandle_t handle, bsrsm2Info_t info, int *position)#

Sparse triangular system solve using BSR storage format.

hipsparseXbsrsm2_zeroPivot returns HIPSPARSE_STATUS_ZERO_PIVOT, if either a structural or numerical zero has been found during hipsparseXbsrsm2_analysis() or hipsparseXbsrsm2_solve() computation. The first zero pivot \(j\) at \(A_{j,j}\) is stored in position, using same index base as the BSR matrix.

position can be in host or device memory. If no zero pivot has been found, position is set to -1 and HIPSPARSE_STATUS_SUCCESS is returned instead.

Note

hipsparseXbsrsm2_zeroPivot is a blocking function. It might influence performance negatively.

hipsparseXbsrsm2_bufferSize()#

hipsparseStatus_t hipsparseSbsrsm2_bufferSize(hipsparseHandle_t handle, hipsparseDirection_t dirA, hipsparseOperation_t transA, hipsparseOperation_t transX, int mb, int nrhs, int nnzb, const hipsparseMatDescr_t descrA, float *bsrSortedValA, const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, bsrsm2Info_t info, int *pBufferSizeInBytes)#
hipsparseStatus_t hipsparseDbsrsm2_bufferSize(hipsparseHandle_t handle, hipsparseDirection_t dirA, hipsparseOperation_t transA, hipsparseOperation_t transX, int mb, int nrhs, int nnzb, const hipsparseMatDescr_t descrA, double *bsrSortedValA, const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, bsrsm2Info_t info, int *pBufferSizeInBytes)#
hipsparseStatus_t hipsparseCbsrsm2_bufferSize(hipsparseHandle_t handle, hipsparseDirection_t dirA, hipsparseOperation_t transA, hipsparseOperation_t transX, int mb, int nrhs, int nnzb, const hipsparseMatDescr_t descrA, hipComplex *bsrSortedValA, const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, bsrsm2Info_t info, int *pBufferSizeInBytes)#
hipsparseStatus_t hipsparseZbsrsm2_bufferSize(hipsparseHandle_t handle, hipsparseDirection_t dirA, hipsparseOperation_t transA, hipsparseOperation_t transX, int mb, int nrhs, int nnzb, const hipsparseMatDescr_t descrA, hipDoubleComplex *bsrSortedValA, const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, bsrsm2Info_t info, int *pBufferSizeInBytes)#

Sparse triangular system solve using BSR storage format.

hipsparseXbsrsm2_buffer_size returns the size of the temporary storage buffer in bytes that is required by hipsparseXbsrsm2_analysis() and hipsparseXbsrsm2_solve(). The temporary storage buffer must be allocated by the user.

hipsparseXbsrsm2_analysis()#

hipsparseStatus_t hipsparseSbsrsm2_analysis(hipsparseHandle_t handle, hipsparseDirection_t dirA, hipsparseOperation_t transA, hipsparseOperation_t transX, int mb, int nrhs, int nnzb, const hipsparseMatDescr_t descrA, const float *bsrSortedValA, const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, bsrsm2Info_t info, hipsparseSolvePolicy_t policy, void *pBuffer)#
hipsparseStatus_t hipsparseDbsrsm2_analysis(hipsparseHandle_t handle, hipsparseDirection_t dirA, hipsparseOperation_t transA, hipsparseOperation_t transX, int mb, int nrhs, int nnzb, const hipsparseMatDescr_t descrA, const double *bsrSortedValA, const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, bsrsm2Info_t info, hipsparseSolvePolicy_t policy, void *pBuffer)#
hipsparseStatus_t hipsparseCbsrsm2_analysis(hipsparseHandle_t handle, hipsparseDirection_t dirA, hipsparseOperation_t transA, hipsparseOperation_t transX, int mb, int nrhs, int nnzb, const hipsparseMatDescr_t descrA, const hipComplex *bsrSortedValA, const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, bsrsm2Info_t info, hipsparseSolvePolicy_t policy, void *pBuffer)#
hipsparseStatus_t hipsparseZbsrsm2_analysis(hipsparseHandle_t handle, hipsparseDirection_t dirA, hipsparseOperation_t transA, hipsparseOperation_t transX, int mb, int nrhs, int nnzb, const hipsparseMatDescr_t descrA, const hipDoubleComplex *bsrSortedValA, const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, bsrsm2Info_t info, hipsparseSolvePolicy_t policy, void *pBuffer)#

Sparse triangular system solve using BSR storage format.

hipsparseXbsrsm2_analysis performs the analysis step for hipsparseXbsrsm2_solve().

Note

If the matrix sparsity pattern changes, the gathered information will become invalid.

Note

This function is non blocking and executed asynchronously with respect to the host. It may return before the actual computation has finished.

hipsparseXbsrsm2_solve()#

hipsparseStatus_t hipsparseSbsrsm2_solve(hipsparseHandle_t handle, hipsparseDirection_t dirA, hipsparseOperation_t transA, hipsparseOperation_t transX, int mb, int nrhs, int nnzb, const float *alpha, const hipsparseMatDescr_t descrA, const float *bsrSortedValA, const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, bsrsm2Info_t info, const float *B, int ldb, float *X, int ldx, hipsparseSolvePolicy_t policy, void *pBuffer)#
hipsparseStatus_t hipsparseDbsrsm2_solve(hipsparseHandle_t handle, hipsparseDirection_t dirA, hipsparseOperation_t transA, hipsparseOperation_t transX, int mb, int nrhs, int nnzb, const double *alpha, const hipsparseMatDescr_t descrA, const double *bsrSortedValA, const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, bsrsm2Info_t info, const double *B, int ldb, double *X, int ldx, hipsparseSolvePolicy_t policy, void *pBuffer)#
hipsparseStatus_t hipsparseCbsrsm2_solve(hipsparseHandle_t handle, hipsparseDirection_t dirA, hipsparseOperation_t transA, hipsparseOperation_t transX, int mb, int nrhs, int nnzb, const hipComplex *alpha, const hipsparseMatDescr_t descrA, const hipComplex *bsrSortedValA, const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, bsrsm2Info_t info, const hipComplex *B, int ldb, hipComplex *X, int ldx, hipsparseSolvePolicy_t policy, void *pBuffer)#
hipsparseStatus_t hipsparseZbsrsm2_solve(hipsparseHandle_t handle, hipsparseDirection_t dirA, hipsparseOperation_t transA, hipsparseOperation_t transX, int mb, int nrhs, int nnzb, const hipDoubleComplex *alpha, const hipsparseMatDescr_t descrA, const hipDoubleComplex *bsrSortedValA, const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, bsrsm2Info_t info, const hipDoubleComplex *B, int ldb, hipDoubleComplex *X, int ldx, hipsparseSolvePolicy_t policy, void *pBuffer)#

Sparse triangular system solve using BSR storage format.

hipsparseXbsrsm2_solve solves a sparse triangular linear system of a sparse \(m \times m\) matrix, defined in BSR storage format, a dense solution matrix \(X\) and the right-hand side matrix \(B\) that is multiplied by \(\alpha\), such that

\[ op(A) \cdot op(X) = \alpha \cdot op(B), \]
with
\[\begin{split} op(A) = \left\{ \begin{array}{ll} A, & \text{if transA == HIPSPARSE_OPERATION_NON_TRANSPOSE} \\ A^T, & \text{if transA == HIPSPARSE_OPERATION_TRANSPOSE} \\ A^H, & \text{if transA == HIPSPARSE_OPERATION_CONJUGATE_TRANSPOSE} \end{array} \right. \end{split}\]
,
\[\begin{split} op(X) = \left\{ \begin{array}{ll} X, & \text{if transX == HIPSPARSE_OPERATION_NON_TRANSPOSE} \\ X^T, & \text{if transX == HIPSPARSE_OPERATION_TRANSPOSE} \\ X^H, & \text{if transX == HIPSPARSE_OPERATION_CONJUGATE_TRANSPOSE} \end{array} \right. \end{split}\]

hipsparseXbsrsm2_solve requires a user allocated temporary buffer. Its size is returned by hipsparseXbsrsm2_bufferSize(). Furthermore, analysis meta data is required. It can be obtained by hipsparseXbsrsm2_analysis(). hipsparseXbsrsm2_solve reports the first zero pivot (either numerical or structural zero). The zero pivot status can be checked calling hipsparseXbsrsm2_zeroPivot(). If hipsparseDiagType_t == HIPSPARSE_DIAG_TYPE_UNIT, no zero pivot will be reported, even if \(A_{j,j} = 0\) for some \(j\).

Example
// hipSPARSE handle
hipsparseHandle_t handle;
hipsparseCreate(&handle);

// A = ( 1.0  0.0  0.0  0.0 )
//     ( 2.0  3.0  0.0  0.0 )
//     ( 4.0  5.0  6.0  0.0 )
//     ( 7.0  0.0  8.0  9.0 )
//
// with bsr_dim = 2
//
//      -------------------
//   = | 1.0 0.0 | 0.0 0.0 |
//     | 2.0 3.0 | 0.0 0.0 |
//      -------------------
//     | 4.0 5.0 | 6.0 0.0 |
//     | 7.0 0.0 | 8.0 9.0 |
//      -------------------

// Number of rows and columns
int m = 4;

// Number of block rows and block columns
int mb = 2;
int nb = 2;

// BSR block dimension
int bsr_dim = 2;

// Number of right-hand-sides
int nrhs = 4;

// Number of non-zero blocks
int nnzb = 3;

// BSR row pointers
int hbsrRowPtr[3] = {0, 1, 3};

// BSR column indices
int hbsrColInd[3] = {0, 0, 1};

// BSR values
double hbsrVal[12] = {1.0, 2.0, 0.0, 3.0, 4.0, 7.0, 5.0, 0.0, 6.0, 8.0, 0.0, 9.0};

// Storage scheme of the BSR blocks
hipsparseDirection_t dir = HIPSPARSE_DIRECTION_COLUMN;

// Transposition of the matrix and rhs matrix
hipsparseOperation_t transA = HIPSPARSE_OPERATION_NON_TRANSPOSE;
hipsparseOperation_t transX = HIPSPARSE_OPERATION_NON_TRANSPOSE;

// Solve policy
hipsparseSolvePolicy_t solve_policy = HIPSPARSE_SOLVE_POLICY_NO_LEVEL;

// Scalar alpha and beta
double alpha = 1.0;

// rhs and solution matrix
int ldb = nb * bsr_dim;
int ldx = mb * bsr_dim;

double hB[16] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
double hX[16];

// Offload data to device
int* dbsrRowPtr;
int* dbsrColInd;
double*        dbsrVal;
double*        dB;
double*        dX;

hipMalloc((void**)&dbsrRowPtr, sizeof(int) * (mb + 1));
hipMalloc((void**)&dbsrColInd, sizeof(int) * nnzb);
hipMalloc((void**)&dbsrVal, sizeof(double) * nnzb * bsr_dim * bsr_dim);
hipMalloc((void**)&dB, sizeof(double) * nb * bsr_dim * nrhs);
hipMalloc((void**)&dX, sizeof(double) * mb * bsr_dim * nrhs);

hipMemcpy(dbsrRowPtr, hbsrRowPtr, sizeof(int) * (mb + 1), hipMemcpyHostToDevice);
hipMemcpy(dbsrColInd, hbsrColInd, sizeof(int) * nnzb, hipMemcpyHostToDevice);
hipMemcpy(dbsrVal, hbsrVal, sizeof(double) * nnzb * bsr_dim * bsr_dim, hipMemcpyHostToDevice);
hipMemcpy(dB, hB, sizeof(double) * nb * bsr_dim * nrhs, hipMemcpyHostToDevice);

// Matrix descriptor
hipsparseMatDescr_t descr;
hipsparseCreateMatDescr(&descr);

// Matrix fill mode
hipsparseSetMatFillMode(descr, HIPSPARSE_FILL_MODE_LOWER);

// Matrix diagonal type
hipsparseSetMatDiagType(descr, HIPSPARSE_DIAG_TYPE_NON_UNIT);

// Matrix info structure
bsrsm2Info_t info;
hipsparseCreateBsrsm2Info(&info);

// Obtain required buffer size
int buffer_size;
hipsparseDbsrsm2_bufferSize(handle,
                            dir,
                            transA,
                            transX,
                            mb,
                            nrhs,
                            nnzb,
                            descr,
                            dbsrVal,
                            dbsrRowPtr,
                            dbsrColInd,
                            bsr_dim,
                            info,
                            &buffer_size);

// Allocate temporary buffer
void* dbuffer;
hipMalloc(&dbuffer, buffer_size);

// Perform analysis step
hipsparseDbsrsm2_analysis(handle,
                          dir,
                          transA,
                          transX,
                          mb,
                          nrhs,
                          nnzb,
                          descr,
                          dbsrVal,
                          dbsrRowPtr,
                          dbsrColInd,
                          bsr_dim,
                          info,
                          solve_policy,
                          dbuffer);

// Call dbsrsm to perform lower triangular solve LX = B
hipsparseDbsrsm2_solve(handle,
                       dir,
                       transA,
                       transX,
                       mb,
                       nrhs,
                       nnzb,
                       &alpha,
                       descr,
                       dbsrVal,
                       dbsrRowPtr,
                       dbsrColInd,
                       bsr_dim,
                       info,
                       dB,
                       ldb,
                       dX,
                       ldx,
                       solve_policy,
                       dbuffer);

// Check for zero pivots
int    pivot;
hipsparseStatus_t status = hipsparseXbsrsm2_zeroPivot(handle, info, &pivot);

if(status == HIPSPARSE_STATUS_ZERO_PIVOT)
{
    std::cout << "Found zero pivot in matrix row " << pivot << std::endl;
}

// Copy result back to host
hipMemcpy(hX, dX, sizeof(double) * mb * bsr_dim * nrhs, hipMemcpyDeviceToHost);

// Clear hipSPARSE
hipsparseDestroyBsrsm2Info(info);
hipsparseDestroyMatDescr(descr);
hipsparseDestroy(handle);

// Clear device memory
hipFree(dbsrRowPtr);
hipFree(dbsrColInd);
hipFree(dbsrVal);
hipFree(dB);
hipFree(dX);
hipFree(dbuffer);

Note

The sparse BSR matrix has to be sorted.

Note

Operation type of B and X must match, if \(op(B)=B, op(X)=X\).

Note

This function is non blocking and executed asynchronously with respect to the host. It may return before the actual computation has finished.

Note

Currently, only transA != HIPSPARSE_OPERATION_CONJUGATE_TRANSPOSE and transX != HIPSPARSE_OPERATION_CONJUGATE_TRANSPOSE is supported.

hipsparseXcsrsm2_zeroPivot()#

hipsparseStatus_t hipsparseXcsrsm2_zeroPivot(hipsparseHandle_t handle, csrsm2Info_t info, int *position)#

Sparse triangular system solve using CSR storage format.

hipsparseXcsrsm2_zeroPivot returns HIPSPARSE_STATUS_ZERO_PIVOT, if either a structural or numerical zero has been found during hipsparseXcsrsm2_analysis() or hipsparseXcsrsm2_solve() computation. The first zero pivot \(j\) at \(A_{j,j}\) is stored in position, using same index base as the CSR matrix.

position can be in host or device memory. If no zero pivot has been found, position is set to -1 and HIPSPARSE_STATUS_SUCCESS is returned instead.

Note

hipsparseXcsrsm2_zeroPivot is a blocking function. It might influence performance negatively.

hipsparseXcsrsm2_bufferSizeExt()#

hipsparseStatus_t hipsparseScsrsm2_bufferSizeExt(hipsparseHandle_t handle, int algo, hipsparseOperation_t transA, hipsparseOperation_t transB, int m, int nrhs, int nnz, const float *alpha, const hipsparseMatDescr_t descrA, const float *csrSortedValA, const int *csrSortedRowPtrA, const int *csrSortedColIndA, const float *B, int ldb, csrsm2Info_t info, hipsparseSolvePolicy_t policy, size_t *pBufferSizeInBytes)#
hipsparseStatus_t hipsparseDcsrsm2_bufferSizeExt(hipsparseHandle_t handle, int algo, hipsparseOperation_t transA, hipsparseOperation_t transB, int m, int nrhs, int nnz, const double *alpha, const hipsparseMatDescr_t descrA, const double *csrSortedValA, const int *csrSortedRowPtrA, const int *csrSortedColIndA, const double *B, int ldb, csrsm2Info_t info, hipsparseSolvePolicy_t policy, size_t *pBufferSizeInBytes)#
hipsparseStatus_t hipsparseCcsrsm2_bufferSizeExt(hipsparseHandle_t handle, int algo, hipsparseOperation_t transA, hipsparseOperation_t transB, int m, int nrhs, int nnz, const hipComplex *alpha, const hipsparseMatDescr_t descrA, const hipComplex *csrSortedValA, const int *csrSortedRowPtrA, const int *csrSortedColIndA, const hipComplex *B, int ldb, csrsm2Info_t info, hipsparseSolvePolicy_t policy, size_t *pBufferSizeInBytes)#
hipsparseStatus_t hipsparseZcsrsm2_bufferSizeExt(hipsparseHandle_t handle, int algo, hipsparseOperation_t transA, hipsparseOperation_t transB, int m, int nrhs, int nnz, const hipDoubleComplex *alpha, const hipsparseMatDescr_t descrA, const hipDoubleComplex *csrSortedValA, const int *csrSortedRowPtrA, const int *csrSortedColIndA, const hipDoubleComplex *B, int ldb, csrsm2Info_t info, hipsparseSolvePolicy_t policy, size_t *pBufferSizeInBytes)#

Sparse triangular system solve using CSR storage format.

hipsparseXcsrsm2_bufferSizeExt returns the size of the temporary storage buffer in bytes that is required by hipsparseXcsrsm2_analysis() and hipsparseXcsrsm2_solve(). The temporary storage buffer must be allocated by the user.

hipsparseXcsrsm2_analysis()#

hipsparseStatus_t hipsparseScsrsm2_analysis(hipsparseHandle_t handle, int algo, hipsparseOperation_t transA, hipsparseOperation_t transB, int m, int nrhs, int nnz, const float *alpha, const hipsparseMatDescr_t descrA, const float *csrSortedValA, const int *csrSortedRowPtrA, const int *csrSortedColIndA, const float *B, int ldb, csrsm2Info_t info, hipsparseSolvePolicy_t policy, void *pBuffer)#
hipsparseStatus_t hipsparseDcsrsm2_analysis(hipsparseHandle_t handle, int algo, hipsparseOperation_t transA, hipsparseOperation_t transB, int m, int nrhs, int nnz, const double *alpha, const hipsparseMatDescr_t descrA, const double *csrSortedValA, const int *csrSortedRowPtrA, const int *csrSortedColIndA, const double *B, int ldb, csrsm2Info_t info, hipsparseSolvePolicy_t policy, void *pBuffer)#
hipsparseStatus_t hipsparseCcsrsm2_analysis(hipsparseHandle_t handle, int algo, hipsparseOperation_t transA, hipsparseOperation_t transB, int m, int nrhs, int nnz, const hipComplex *alpha, const hipsparseMatDescr_t descrA, const hipComplex *csrSortedValA, const int *csrSortedRowPtrA, const int *csrSortedColIndA, const hipComplex *B, int ldb, csrsm2Info_t info, hipsparseSolvePolicy_t policy, void *pBuffer)#
hipsparseStatus_t hipsparseZcsrsm2_analysis(hipsparseHandle_t handle, int algo, hipsparseOperation_t transA, hipsparseOperation_t transB, int m, int nrhs, int nnz, const hipDoubleComplex *alpha, const hipsparseMatDescr_t descrA, const hipDoubleComplex *csrSortedValA, const int *csrSortedRowPtrA, const int *csrSortedColIndA, const hipDoubleComplex *B, int ldb, csrsm2Info_t info, hipsparseSolvePolicy_t policy, void *pBuffer)#

Sparse triangular system solve using CSR storage format.

hipsparseXcsrsm2_analysis performs the analysis step for hipsparseXcsrsm2_solve().

Note

If the matrix sparsity pattern changes, the gathered information will become invalid.

Note

This function is non blocking and executed asynchronously with respect to the host. It may return before the actual computation has finished.

hipsparseXcsrsm2_solve()#

hipsparseStatus_t hipsparseScsrsm2_solve(hipsparseHandle_t handle, int algo, hipsparseOperation_t transA, hipsparseOperation_t transB, int m, int nrhs, int nnz, const float *alpha, const hipsparseMatDescr_t descrA, const float *csrSortedValA, const int *csrSortedRowPtrA, const int *csrSortedColIndA, float *B, int ldb, csrsm2Info_t info, hipsparseSolvePolicy_t policy, void *pBuffer)#
hipsparseStatus_t hipsparseDcsrsm2_solve(hipsparseHandle_t handle, int algo, hipsparseOperation_t transA, hipsparseOperation_t transB, int m, int nrhs, int nnz, const double *alpha, const hipsparseMatDescr_t descrA, const double *csrSortedValA, const int *csrSortedRowPtrA, const int *csrSortedColIndA, double *B, int ldb, csrsm2Info_t info, hipsparseSolvePolicy_t policy, void *pBuffer)#
hipsparseStatus_t hipsparseCcsrsm2_solve(hipsparseHandle_t handle, int algo, hipsparseOperation_t transA, hipsparseOperation_t transB, int m, int nrhs, int nnz, const hipComplex *alpha, const hipsparseMatDescr_t descrA, const hipComplex *csrSortedValA, const int *csrSortedRowPtrA, const int *csrSortedColIndA, hipComplex *B, int ldb, csrsm2Info_t info, hipsparseSolvePolicy_t policy, void *pBuffer)#
hipsparseStatus_t hipsparseZcsrsm2_solve(hipsparseHandle_t handle, int algo, hipsparseOperation_t transA, hipsparseOperation_t transB, int m, int nrhs, int nnz, const hipDoubleComplex *alpha, const hipsparseMatDescr_t descrA, const hipDoubleComplex *csrSortedValA, const int *csrSortedRowPtrA, const int *csrSortedColIndA, hipDoubleComplex *B, int ldb, csrsm2Info_t info, hipsparseSolvePolicy_t policy, void *pBuffer)#

Sparse triangular system solve using CSR storage format.

hipsparseXcsrsm2_solve solves a sparse triangular linear system of a sparse \(m \times m\) matrix, defined in CSR storage format, a dense solution matrix \(X\) and the right-hand side matrix \(B\) that is multiplied by \(\alpha\), such that

\[ op(A) \cdot op(X) = \alpha \cdot op(B), \]
with
\[\begin{split} op(A) = \left\{ \begin{array}{ll} A, & \text{if transA == HIPSPARSE_OPERATION_NON_TRANSPOSE} \\ A^T, & \text{if transA == HIPSPARSE_OPERATION_TRANSPOSE} \\ A^H, & \text{if transA == HIPSPARSE_OPERATION_CONJUGATE_TRANSPOSE} \end{array} \right. \end{split}\]
,
\[\begin{split} op(B) = \left\{ \begin{array}{ll} B, & \text{if transB == HIPSPARSE_OPERATION_NON_TRANSPOSE} \\ B^T, & \text{if transB == HIPSPARSE_OPERATION_TRANSPOSE} \\ B^H, & \text{if transB == HIPSPARSE_OPERATION_CONJUGATE_TRANSPOSE} \end{array} \right. \end{split}\]
and
\[\begin{split} op(X) = \left\{ \begin{array}{ll} X, & \text{if transB == HIPSPARSE_OPERATION_NON_TRANSPOSE} \\ X^T, & \text{if transB == HIPSPARSE_OPERATION_TRANSPOSE} \\ X^H, & \text{if transB == HIPSPARSE_OPERATION_CONJUGATE_TRANSPOSE} \end{array} \right. \end{split}\]

hipsparseXcsrsm2_solve requires a user allocated temporary buffer. Its size is returned by hipsparseXcsrsm2_bufferSizeExt(). Furthermore, analysis meta data is required. It can be obtained by hipsparseXcsrsm2_analysis(). hipsparseXcsrsm2_solve reports the first zero pivot (either numerical or structural zero). The zero pivot status can be checked calling hipsparseXcsrsm2_zeroPivot(). If hipsparseDiagType_t == HIPSPARSE_DIAG_TYPE_UNIT, no zero pivot will be reported, even if \(A_{j,j} = 0\) for some \(j\).

Example
// hipSPARSE handle
hipsparseHandle_t handle;
hipsparseCreate(&handle);

// A = ( 1.0  0.0  0.0  0.0 )
//     ( 2.0  3.0  0.0  0.0 )
//     ( 4.0  5.0  6.0  0.0 )
//     ( 7.0  0.0  8.0  9.0 )

// Number of rows and columns
int m = 4;
int n = 4;

// Number of right-hand-sides
int nrhs = 4;

// Number of non-zeros
int nnz = 9;

// CSR row pointers
int hcsrRowPtr[5] = {0, 1, 3, 6, 9};

// CSR column indices
int hcsrColInd[9] = {0, 0, 1, 0, 1, 2, 0, 2, 3};

// CSR values
double hcsrVal[9] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};

// Transposition of the matrix and rhs matrix
hipsparseOperation_t transA = HIPSPARSE_OPERATION_NON_TRANSPOSE;
hipsparseOperation_t transB = HIPSPARSE_OPERATION_NON_TRANSPOSE;

// Solve policy
hipsparseSolvePolicy_t solve_policy = HIPSPARSE_SOLVE_POLICY_NO_LEVEL;

// Scalar alpha and beta
double alpha = 1.0;

// rhs and solution matrix
int ldb = n;

double hB[16] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};

// Offload data to device
int* dcsrRowPtr;
int* dcsrColInd;
double*        dcsrVal;
double*        dB;

hipMalloc((void**)&dcsrRowPtr, sizeof(int) * (m + 1));
hipMalloc((void**)&dcsrColInd, sizeof(int) * nnz);
hipMalloc((void**)&dcsrVal, sizeof(double) * nnz);
hipMalloc((void**)&dB, sizeof(double) * n * nrhs);

hipMemcpy(dcsrRowPtr, hcsrRowPtr, sizeof(int) * (m + 1), hipMemcpyHostToDevice);
hipMemcpy(dcsrColInd, hcsrColInd, sizeof(int) * nnz, hipMemcpyHostToDevice);
hipMemcpy(dcsrVal, hcsrVal, sizeof(double) * nnz, hipMemcpyHostToDevice);
hipMemcpy(dB, hB, sizeof(double) * n * nrhs, hipMemcpyHostToDevice);

// Matrix descriptor
hipsparseMatDescr_t descr;
hipsparseCreateMatDescr(&descr);

// Matrix fill mode
hipsparseSetMatFillMode(descr, HIPSPARSE_FILL_MODE_LOWER);

// Matrix diagonal type
hipsparseSetMatDiagType(descr, HIPSPARSE_DIAG_TYPE_NON_UNIT);

// Matrix info structure
csrsm2Info_t info;
hipsparseCreateCsrsm2Info(&info);

// Obtain required buffer size
size_t buffer_size;
hipsparseDcsrsm2_bufferSizeExt(handle,
                               0,
                               transA,
                               transB,
                               m,
                               nrhs,
                               nnz,
                               &alpha,
                               descr,
                               dcsrVal,
                               dcsrRowPtr,
                               dcsrColInd,
                               dB,
                               ldb,
                               info,
                               solve_policy,
                               &buffer_size);

// Allocate temporary buffer
void* dbuffer;
hipMalloc(&dbuffer, buffer_size);

// Perform analysis step
hipsparseDcsrsm2_analysis(handle,
                          0,
                          transA,
                          transB,
                          m,
                          nrhs,
                          nnz,
                          &alpha,
                          descr,
                          dcsrVal,
                          dcsrRowPtr,
                          dcsrColInd,
                          dB,
                          ldb,
                          info,
                          solve_policy,
                          dbuffer);

// Call dcsrsm to perform lower triangular solve LB = B
hipsparseDcsrsm2_solve(handle,
                       0,
                       transA,
                       transB,
                       m,
                       nrhs,
                       nnz,
                       &alpha,
                       descr,
                       dcsrVal,
                       dcsrRowPtr,
                       dcsrColInd,
                       dB,
                       ldb,
                       info,
                       solve_policy,
                       dbuffer);

// Check for zero pivots
int    pivot;
hipsparseStatus_t status = hipsparseXcsrsm2_zeroPivot(handle, info, &pivot);

if(status == HIPSPARSE_STATUS_ZERO_PIVOT)
{
    std::cout << "Found zero pivot in matrix row " << pivot << std::endl;
}

// Copy result back to host
hipMemcpy(hB, dB, sizeof(double) * m * nrhs, hipMemcpyDeviceToHost);

// Clear hipSPARSE
hipsparseDestroyCsrsm2Info(info);
hipsparseDestroyMatDescr(descr);
hipsparseDestroy(handle);

// Clear device memory
hipFree(dcsrRowPtr);
hipFree(dcsrColInd);
hipFree(dcsrVal);
hipFree(dB);
hipFree(dbuffer);

Note

The sparse CSR matrix has to be sorted. This can be achieved by calling hipsparseXcsrsort().

Note

This function is non blocking and executed asynchronously with respect to the host. It may return before the actual computation has finished.

Note

Currently, only transA != HIPSPARSE_OPERATION_CONJUGATE_TRANSPOSE and transB != HIPSPARSE_OPERATION_CONJUGATE_TRANSPOSE is supported.

hipsparseXgemmi()#

hipsparseStatus_t hipsparseSgemmi(hipsparseHandle_t handle, int m, int n, int k, int nnz, const float *alpha, const float *A, int lda, const float *cscValB, const int *cscColPtrB, const int *cscRowIndB, const float *beta, float *C, int ldc)#
hipsparseStatus_t hipsparseDgemmi(hipsparseHandle_t handle, int m, int n, int k, int nnz, const double *alpha, const double *A, int lda, const double *cscValB, const int *cscColPtrB, const int *cscRowIndB, const double *beta, double *C, int ldc)#
hipsparseStatus_t hipsparseCgemmi(hipsparseHandle_t handle, int m, int n, int k, int nnz, const hipComplex *alpha, const hipComplex *A, int lda, const hipComplex *cscValB, const int *cscColPtrB, const int *cscRowIndB, const hipComplex *beta, hipComplex *C, int ldc)#
hipsparseStatus_t hipsparseZgemmi(hipsparseHandle_t handle, int m, int n, int k, int nnz, const hipDoubleComplex *alpha, const hipDoubleComplex *A, int lda, const hipDoubleComplex *cscValB, const int *cscColPtrB, const int *cscRowIndB, const hipDoubleComplex *beta, hipDoubleComplex *C, int ldc)#

Dense matrix sparse matrix multiplication using CSC storage format.

hipsparseXgemmi multiplies the scalar \(\alpha\) with a dense \(m \times k\) matrix \(A\) and the sparse \(k \times n\) matrix \(B\), defined in CSC storage format and adds the result to the dense \(m \times n\) matrix \(C\) that is multiplied by the scalar \(\beta\), such that

\[ C := \alpha \cdot A \cdot B + \beta \cdot C \]

Example
// A, B, and C are m×k, k×n, and m×n
int m = 3, n = 5, k = 4;
int lda = m, ldc = m;
int nnz_A = m * k, nnz_B = 10, nnz_C = m * n;

// alpha and beta
float alpha = 0.5f;
float beta  = 0.25f;

std::vector<int> hcscColPtr = {0, 2, 5, 7, 8, 10};
std::vector<int> hcscRowInd = {0, 2, 0, 1, 3, 1, 3, 2, 0, 2}; 
std::vector<float> hcsc_val     = {1, 6, 2, 4, 9, 5, 2, 7, 3, 8}; 

std::vector<float> hA(nnz_A, 1.0f);
std::vector<float> hC(nnz_C, 1.0f);

int *dcscColPtr;
int *dcscRowInd;
float *dcsc_val;
hipMalloc((void**)&dcscColPtr, sizeof(int) * (n + 1));
hipMalloc((void**)&dcscRowInd, sizeof(int) * nnz_B);
hipMalloc((void**)&dcsc_val, sizeof(float) * nnz_B);

hipMemcpy(dcscColPtr, hcscColPtr.data(), sizeof(int) * (n + 1), hipMemcpyHostToDevice);
hipMemcpy(dcscRowInd, hcscRowInd.data(), sizeof(int) * nnz_B, hipMemcpyHostToDevice);
hipMemcpy(dcsc_val, hcsc_val.data(), sizeof(float) * nnz_B, hipMemcpyHostToDevice);

hipsparseHandle_t handle;
hipsparseCreate(&handle);

// Allocate memory for the matrix A
float* dA;
hipMalloc((void**)&dA, sizeof(float) * nnz_A);
hipMemcpy(dA, hA.data(), sizeof(float) * nnz_A, hipMemcpyHostToDevice);

// Allocate memory for the resulting matrix C
float* dC;
hipMalloc((void**)&dC, sizeof(float) * nnz_C);
hipMemcpy(dC, hC.data(), sizeof(float) * nnz_C, hipMemcpyHostToDevice);

// Perform operation
hipsparseSgemmi(handle, 
                m, 
                n, 
                k, 
                nnz_B, 
                &alpha, 
                dA, 
                lda, 
                dcsc_val, 
                dcscColPtr, 
                dcscRowInd, 
                &beta, 
                dC, 
                ldc);

// Copy device to host
hipMemcpy(hC.data(), dC, sizeof(float) * nnz_C, hipMemcpyDeviceToHost);

// Destroy matrix descriptors and handles
hipsparseDestroy(handle);

hipFree(dcscColPtr);
hipFree(dcscRowInd);
hipFree(dcsc_val);
hipFree(dA);
hipFree(dC);

Note

This function is non blocking and executed asynchronously with respect to the host. It may return before the actual computation has finished.