Sparse Extra Functions#
This module holds all sparse extra routines.
The sparse extra routines describe operations that manipulate sparse matrices.
hipsparseXcsrgeamNnz()#
-
hipsparseStatus_t hipsparseXcsrgeamNnz(hipsparseHandle_t handle, int m, int n, const hipsparseMatDescr_t descrA, int nnzA, const int *csrRowPtrA, const int *csrColIndA, const hipsparseMatDescr_t descrB, int nnzB, const int *csrRowPtrB, const int *csrColIndB, const hipsparseMatDescr_t descrC, int *csrRowPtrC, int *nnzTotalDevHostPtr)#
Sparse matrix sparse matrix addition using CSR storage format.
hipsparseXcsrgeamNnz
computes the total CSR non-zero elements and the CSR row offsets, that point to the start of every row of the sparse CSR matrix, of the resulting matrix C. It is assumed thatcsrRowPtrC
has been allocated with sizem+1
. The desired index base in the output CSR matrix is set in the hipsparseMatDescr_t. See hipsparseSetMatIndexBase().For full code example, see hipsparseScsrgeam().
Note
As indicated, nnzTotalDevHostPtr can point either to host or device memory. This is controlled by setting the pointer mode. See hipsparseSetPointerMode().
Note
This function is non blocking and executed asynchronously with respect to the host. It may return before the actual computation has finished.
Note
Currently, only HIPSPARSE_MATRIX_TYPE_GENERAL is supported.
hipsparseXcsrgeam()#
-
hipsparseStatus_t hipsparseScsrgeam(hipsparseHandle_t handle, int m, int n, const float *alpha, const hipsparseMatDescr_t descrA, int nnzA, const float *csrValA, const int *csrRowPtrA, const int *csrColIndA, const float *beta, const hipsparseMatDescr_t descrB, int nnzB, const float *csrValB, const int *csrRowPtrB, const int *csrColIndB, const hipsparseMatDescr_t descrC, float *csrValC, int *csrRowPtrC, int *csrColIndC)#
-
hipsparseStatus_t hipsparseDcsrgeam(hipsparseHandle_t handle, int m, int n, const double *alpha, const hipsparseMatDescr_t descrA, int nnzA, const double *csrValA, const int *csrRowPtrA, const int *csrColIndA, const double *beta, const hipsparseMatDescr_t descrB, int nnzB, const double *csrValB, const int *csrRowPtrB, const int *csrColIndB, const hipsparseMatDescr_t descrC, double *csrValC, int *csrRowPtrC, int *csrColIndC)#
-
hipsparseStatus_t hipsparseCcsrgeam(hipsparseHandle_t handle, int m, int n, const hipComplex *alpha, const hipsparseMatDescr_t descrA, int nnzA, const hipComplex *csrValA, const int *csrRowPtrA, const int *csrColIndA, const hipComplex *beta, const hipsparseMatDescr_t descrB, int nnzB, const hipComplex *csrValB, const int *csrRowPtrB, const int *csrColIndB, const hipsparseMatDescr_t descrC, hipComplex *csrValC, int *csrRowPtrC, int *csrColIndC)#
-
hipsparseStatus_t hipsparseZcsrgeam(hipsparseHandle_t handle, int m, int n, const hipDoubleComplex *alpha, const hipsparseMatDescr_t descrA, int nnzA, const hipDoubleComplex *csrValA, const int *csrRowPtrA, const int *csrColIndA, const hipDoubleComplex *beta, const hipsparseMatDescr_t descrB, int nnzB, const hipDoubleComplex *csrValB, const int *csrRowPtrB, const int *csrColIndB, const hipsparseMatDescr_t descrC, hipDoubleComplex *csrValC, int *csrRowPtrC, int *csrColIndC)#
Sparse matrix sparse matrix addition using CSR storage format.
hipsparseXcsrgeam
multiplies the scalar \(\alpha\) with the sparse \(m \times n\) matrix \(A\), defined in CSR storage format, multiplies the scalar \(\beta\) with the sparse \(m \times n\) matrix \(B\), defined in CSR storage format, and adds both resulting matrices to obtain the sparse \(m \times n\) matrix \(C\), defined in CSR storage format, such that\[ C := \alpha \cdot A + \beta \cdot B. \]This computation involves a multi step process. First the user must allocate
csrRowPtrC
to have sizem+1
. The user then callshipsparseXcsrgeamNnz
which fills in thecsrRowPtrC
array as well as computes the total number of nonzeros in C,nnzC
. The user then allocates both arrayscsrColIndC
andcsrValC
to have sizennzC
and callshipsparseXcsrgeam
to complete the computation. The desired index base in the output CSR matrix C is set in the hipsparseMatDescr_tdescrC
. See hipsparseSetMatIndexBase().- Example
int m = 4; int n = 4; int nnzA = 9; int nnzB = 6; float alpha{1.0f}; float beta{1.0f}; // A, B, and C are m×n // A // 1 0 0 2 // 3 4 0 0 // 5 6 7 8 // 0 0 9 0 std::vector<int> hcsrRowPtrA = {0, 2, 4, 8, 9}; std::vector<int> hcsrColIndA = {0, 3, 0, 1, 0, 1, 2, 3, 2}; std::vector<float> hcsrValA = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f}; // B // 0 1 0 0 // 1 0 1 0 // 0 1 0 1 // 0 0 1 0 std::vector<int> hcsrRowPtrB = {0, 1, 3, 5, 6}; std::vector<int> hcsrColIndB = {1, 0, 2, 1, 3, 2}; std::vector<float> hcsrValB = {1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f}; // Device memory management: Allocate and copy A, B int* dcsrRowPtrA; int* dcsrColIndA; float* dcsrValA; int* dcsrRowPtrB; int* dcsrColIndB; float* dcsrValB; int* dcsrRowPtrC; hipMalloc((void**)&dcsrRowPtrA, (m + 1) * sizeof(int)); hipMalloc((void**)&dcsrColIndA, nnzA * sizeof(int)); hipMalloc((void**)&dcsrValA, nnzA * sizeof(float)); hipMalloc((void**)&dcsrRowPtrB, (m + 1) * sizeof(int)); hipMalloc((void**)&dcsrColIndB, nnzB * sizeof(int)); hipMalloc((void**)&dcsrValB, nnzB * sizeof(float)); hipMalloc((void**)&dcsrRowPtrC, (m + 1) * sizeof(int)); hipMemcpy(dcsrRowPtrA, hcsrRowPtrA.data(), (m + 1) * sizeof(int), hipMemcpyHostToDevice); hipMemcpy(dcsrColIndA, hcsrColIndA.data(), nnzA * sizeof(int), hipMemcpyHostToDevice); hipMemcpy(dcsrValA, hcsrValA.data(), nnzA * sizeof(float), hipMemcpyHostToDevice); hipMemcpy(dcsrRowPtrB, hcsrRowPtrB.data(), (m + 1) * sizeof(int), hipMemcpyHostToDevice); hipMemcpy(dcsrColIndB, hcsrColIndB.data(), nnzB * sizeof(int), hipMemcpyHostToDevice); hipMemcpy(dcsrValB, hcsrValB.data(), nnzB * sizeof(float), hipMemcpyHostToDevice); hipsparseHandle_t handle; hipsparseCreate(&handle); hipsparseMatDescr_t descrA; hipsparseCreateMatDescr(&descrA); hipsparseMatDescr_t descrB; hipsparseCreateMatDescr(&descrB); hipsparseMatDescr_t descrC; hipsparseCreateMatDescr(&descrC); int nnzC; hipsparseXcsrgeamNnz(handle, m, n, descrA, nnzA, dcsrRowPtrA, dcsrColIndA, descrB, nnzB, dcsrRowPtrB, dcsrColIndB, descrC, dcsrRowPtrC, &nnzC); int* dcsrColIndC = nullptr; float* dcsrValC = nullptr; hipMalloc((void**)&dcsrColIndC, sizeof(int) * nnzC); hipMalloc((void**)&dcsrValC, sizeof(float) * nnzC); hipsparseScsrgeam(handle, m, n, &alpha, descrA, nnzA, dcsrValA, dcsrRowPtrA, dcsrColIndA, &beta, descrB, nnzB, dcsrValB, dcsrRowPtrB, dcsrColIndB, descrC, dcsrValC, dcsrRowPtrC, dcsrColIndC); hipFree(dcsrRowPtrA); hipFree(dcsrColIndA); hipFree(dcsrValA); hipFree(dcsrRowPtrB); hipFree(dcsrColIndB); hipFree(dcsrValB); hipFree(dcsrRowPtrC); hipFree(dcsrColIndC); hipFree(dcsrValC); hipsparseDestroyMatDescr(descrA); hipsparseDestroyMatDescr(descrB); hipsparseDestroyMatDescr(descrC); hipsparseDestroy(handle);
Note
Both scalars \(\alpha\) and \(beta\) have to be valid.
Note
Currently, only HIPSPARSE_MATRIX_TYPE_GENERAL is supported.
Note
This function is non blocking and executed asynchronously with respect to the host. It may return before the actual computation has finished.
hipsparseXcsrgeam2_bufferSizeExt()#
-
hipsparseStatus_t hipsparseScsrgeam2_bufferSizeExt(hipsparseHandle_t handle, int m, int n, const float *alpha, const hipsparseMatDescr_t descrA, int nnzA, const float *csrSortedValA, const int *csrSortedRowPtrA, const int *csrSortedColIndA, const float *beta, const hipsparseMatDescr_t descrB, int nnzB, const float *csrSortedValB, const int *csrSortedRowPtrB, const int *csrSortedColIndB, const hipsparseMatDescr_t descrC, const float *csrSortedValC, const int *csrSortedRowPtrC, const int *csrSortedColIndC, size_t *pBufferSizeInBytes)#
-
hipsparseStatus_t hipsparseDcsrgeam2_bufferSizeExt(hipsparseHandle_t handle, int m, int n, const double *alpha, const hipsparseMatDescr_t descrA, int nnzA, const double *csrSortedValA, const int *csrSortedRowPtrA, const int *csrSortedColIndA, const double *beta, const hipsparseMatDescr_t descrB, int nnzB, const double *csrSortedValB, const int *csrSortedRowPtrB, const int *csrSortedColIndB, const hipsparseMatDescr_t descrC, const double *csrSortedValC, const int *csrSortedRowPtrC, const int *csrSortedColIndC, size_t *pBufferSizeInBytes)#
-
hipsparseStatus_t hipsparseCcsrgeam2_bufferSizeExt(hipsparseHandle_t handle, int m, int n, const hipComplex *alpha, const hipsparseMatDescr_t descrA, int nnzA, const hipComplex *csrSortedValA, const int *csrSortedRowPtrA, const int *csrSortedColIndA, const hipComplex *beta, const hipsparseMatDescr_t descrB, int nnzB, const hipComplex *csrSortedValB, const int *csrSortedRowPtrB, const int *csrSortedColIndB, const hipsparseMatDescr_t descrC, const hipComplex *csrSortedValC, const int *csrSortedRowPtrC, const int *csrSortedColIndC, size_t *pBufferSizeInBytes)#
-
hipsparseStatus_t hipsparseZcsrgeam2_bufferSizeExt(hipsparseHandle_t handle, int m, int n, const hipDoubleComplex *alpha, const hipsparseMatDescr_t descrA, int nnzA, const hipDoubleComplex *csrSortedValA, const int *csrSortedRowPtrA, const int *csrSortedColIndA, const hipDoubleComplex *beta, const hipsparseMatDescr_t descrB, int nnzB, const hipDoubleComplex *csrSortedValB, const int *csrSortedRowPtrB, const int *csrSortedColIndB, const hipsparseMatDescr_t descrC, const hipDoubleComplex *csrSortedValC, const int *csrSortedRowPtrC, const int *csrSortedColIndC, size_t *pBufferSizeInBytes)#
Sparse matrix sparse matrix multiplication using CSR storage format.
hipsparseXcsrgeam2_bufferSizeExt
returns the size of the temporary storage buffer in bytes that is required by hipsparseXcsrgeam2Nnz() and hipsparseXcsrgeam2(). The temporary storage buffer must be allocated by the user.Note
Currently, only HIPSPARSE_MATRIX_TYPE_GENERAL is supported.
hipsparseXcsrgeam2Nnz()#
-
hipsparseStatus_t hipsparseXcsrgeam2Nnz(hipsparseHandle_t handle, int m, int n, const hipsparseMatDescr_t descrA, int nnzA, const int *csrSortedRowPtrA, const int *csrSortedColIndA, const hipsparseMatDescr_t descrB, int nnzB, const int *csrSortedRowPtrB, const int *csrSortedColIndB, const hipsparseMatDescr_t descrC, int *csrSortedRowPtrC, int *nnzTotalDevHostPtr, void *workspace)#
Sparse matrix sparse matrix addition using CSR storage format.
hipsparseXcsrgeam2Nnz
computes the total CSR non-zero elements and the CSR row offsets, that point to the start of every row of the sparse CSR matrix, of the resulting matrix C. It is assumed thatcsrRowPtrC
has been allocated with sizem+1
. The required buffer size can be obtained by hipsparseXcsrgeam2_bufferSizeExt(). The desired index base in the output CSR matrix C is set in the hipsparseMatDescr_tdescrC
. See hipsparseSetMatIndexBase().Note
As indicated, nnzTotalDevHostPtr can point either to host or device memory. This is controlled by setting the pointer mode. See hipsparseSetPointerMode().
Note
This function is non blocking and executed asynchronously with respect to the host. It may return before the actual computation has finished.
Note
Currently, only HIPSPARSE_MATRIX_TYPE_GENERAL is supported.
hipsparseXcsrgeam2()#
-
hipsparseStatus_t hipsparseScsrgeam2(hipsparseHandle_t handle, int m, int n, const float *alpha, const hipsparseMatDescr_t descrA, int nnzA, const float *csrSortedValA, const int *csrSortedRowPtrA, const int *csrSortedColIndA, const float *beta, const hipsparseMatDescr_t descrB, int nnzB, const float *csrSortedValB, const int *csrSortedRowPtrB, const int *csrSortedColIndB, const hipsparseMatDescr_t descrC, float *csrSortedValC, int *csrSortedRowPtrC, int *csrSortedColIndC, void *pBuffer)#
-
hipsparseStatus_t hipsparseDcsrgeam2(hipsparseHandle_t handle, int m, int n, const double *alpha, const hipsparseMatDescr_t descrA, int nnzA, const double *csrSortedValA, const int *csrSortedRowPtrA, const int *csrSortedColIndA, const double *beta, const hipsparseMatDescr_t descrB, int nnzB, const double *csrSortedValB, const int *csrSortedRowPtrB, const int *csrSortedColIndB, const hipsparseMatDescr_t descrC, double *csrSortedValC, int *csrSortedRowPtrC, int *csrSortedColIndC, void *pBuffer)#
-
hipsparseStatus_t hipsparseCcsrgeam2(hipsparseHandle_t handle, int m, int n, const hipComplex *alpha, const hipsparseMatDescr_t descrA, int nnzA, const hipComplex *csrSortedValA, const int *csrSortedRowPtrA, const int *csrSortedColIndA, const hipComplex *beta, const hipsparseMatDescr_t descrB, int nnzB, const hipComplex *csrSortedValB, const int *csrSortedRowPtrB, const int *csrSortedColIndB, const hipsparseMatDescr_t descrC, hipComplex *csrSortedValC, int *csrSortedRowPtrC, int *csrSortedColIndC, void *pBuffer)#
-
hipsparseStatus_t hipsparseZcsrgeam2(hipsparseHandle_t handle, int m, int n, const hipDoubleComplex *alpha, const hipsparseMatDescr_t descrA, int nnzA, const hipDoubleComplex *csrSortedValA, const int *csrSortedRowPtrA, const int *csrSortedColIndA, const hipDoubleComplex *beta, const hipsparseMatDescr_t descrB, int nnzB, const hipDoubleComplex *csrSortedValB, const int *csrSortedRowPtrB, const int *csrSortedColIndB, const hipsparseMatDescr_t descrC, hipDoubleComplex *csrSortedValC, int *csrSortedRowPtrC, int *csrSortedColIndC, void *pBuffer)#
Sparse matrix sparse matrix addition using CSR storage format.
hipsparseXcsrgeam2
multiplies the scalar \(\alpha\) with the sparse \(m \times n\) matrix \(A\), defined in CSR storage format, multiplies the scalar \(\beta\) with the sparse \(m \times n\) matrix \(B\), defined in CSR storage format, and adds both resulting matrices to obtain the sparse \(m \times n\) matrix \(C\), defined in CSR storage format, such that\[ C := \alpha \cdot A + \beta \cdot B. \]This computation involves a multi step process. First the user must call
hipsparseZcsrgeam2_bufferSizeExt
in order to determine the required user allocated temporary buffer size. The user then allocates this buffer and also allocatescsrRowPtrC
to have sizem+1
. Both the temporary storage buffer andcsrRowPtrC
array are then passed tohipsparseXcsrgeam2Nnz
which fills in thecsrRowPtrC
array as well as computes the total number of nonzeros in C,nnzC
. The user then allocates both arrayscsrColIndC
andcsrValC
to have sizennzC
and callshipsparseXcsrgeam2
to complete the computation. The desired index base in the output CSR matrix C is set in the hipsparseMatDescr_tdescrC
. See hipsparseSetMatIndexBase().- Example
int m = 4; int n = 4; int nnzA = 9; int nnzB = 6; float alpha{1.0f}; float beta{1.0f}; // A, B, and C are m×n // A // 1 0 0 2 // 3 4 0 0 // 5 6 7 8 // 0 0 9 0 std::vector<int> hcsrRowPtrA = {0, 2, 4, 8, 9}; std::vector<int> hcsrColIndA = {0, 3, 0, 1, 0, 1, 2, 3, 2}; std::vector<float> hcsrValA = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f}; // B // 0 1 0 0 // 1 0 1 0 // 0 1 0 1 // 0 0 1 0 std::vector<int> hcsrRowPtrB = {0, 1, 3, 5, 6}; std::vector<int> hcsrColIndB = {1, 0, 2, 1, 3, 2}; std::vector<float> hcsrValB = {1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f}; // Device memory management: Allocate and copy A, B int* dcsrRowPtrA; int* dcsrColIndA; float* dcsrValA; int* dcsrRowPtrB; int* dcsrColIndB; float* dcsrValB; int* dcsrRowPtrC; hipMalloc((void**)&dcsrRowPtrA, (m + 1) * sizeof(int)); hipMalloc((void**)&dcsrColIndA, nnzA * sizeof(int)); hipMalloc((void**)&dcsrValA, nnzA * sizeof(float)); hipMalloc((void**)&dcsrRowPtrB, (m + 1) * sizeof(int)); hipMalloc((void**)&dcsrColIndB, nnzB * sizeof(int)); hipMalloc((void**)&dcsrValB, nnzB * sizeof(float)); hipMalloc((void**)&dcsrRowPtrC, (m + 1) * sizeof(int)); hipMemcpy(dcsrRowPtrA, hcsrRowPtrA.data(), (m + 1) * sizeof(int), hipMemcpyHostToDevice); hipMemcpy(dcsrColIndA, hcsrColIndA.data(), nnzA * sizeof(int), hipMemcpyHostToDevice); hipMemcpy(dcsrValA, hcsrValA.data(), nnzA * sizeof(float), hipMemcpyHostToDevice); hipMemcpy(dcsrRowPtrB, hcsrRowPtrB.data(), (m + 1) * sizeof(int), hipMemcpyHostToDevice); hipMemcpy(dcsrColIndB, hcsrColIndB.data(), nnzB * sizeof(int), hipMemcpyHostToDevice); hipMemcpy(dcsrValB, hcsrValB.data(), nnzB * sizeof(float), hipMemcpyHostToDevice); hipsparseHandle_t handle; hipsparseCreate(&handle); hipsparseMatDescr_t descrA; hipsparseCreateMatDescr(&descrA); hipsparseMatDescr_t descrB; hipsparseCreateMatDescr(&descrB); hipsparseMatDescr_t descrC; hipsparseCreateMatDescr(&descrC); size_t bufferSize; hipsparseScsrgeam2_bufferSizeExt(handle, m, n, &alpha, descrA, nnzA, dcsrValA, dcsrRowPtrA, dcsrColIndA, &beta, descrB, nnzB, dcsrValB, dcsrRowPtrB, dcsrColIndB, descrC, nullptr, dcsrRowPtrC, nullptr, &bufferSize); void* dbuffer = nullptr; hipMalloc((void**)&dbuffer, bufferSize); int nnzC; hipsparseXcsrgeam2Nnz(handle, m, n, descrA, nnzA, dcsrRowPtrA, dcsrColIndA, descrB, nnzB, dcsrRowPtrB, dcsrColIndB, descrC, dcsrRowPtrC, &nnzC, dbuffer); int* dcsrColIndC = nullptr; float* dcsrValC = nullptr; hipMalloc((void**)&dcsrColIndC, sizeof(int) * nnzC); hipMalloc((void**)&dcsrValC, sizeof(float) * nnzC); hipsparseScsrgeam2(handle, m, n, &alpha, descrA, nnzA, dcsrValA, dcsrRowPtrA, dcsrColIndA, &beta, descrB, nnzB, dcsrValB, dcsrRowPtrB, dcsrColIndB, descrC, dcsrValC, dcsrRowPtrC, dcsrColIndC, dbuffer); hipFree(dcsrRowPtrA); hipFree(dcsrColIndA); hipFree(dcsrValA); hipFree(dcsrRowPtrB); hipFree(dcsrColIndB); hipFree(dcsrValB); hipFree(dcsrRowPtrC); hipFree(dcsrColIndC); hipFree(dcsrValC); hipFree(dbuffer); hipsparseDestroyMatDescr(descrA); hipsparseDestroyMatDescr(descrB); hipsparseDestroyMatDescr(descrC); hipsparseDestroy(handle);
Note
Both scalars \(\alpha\) and \(beta\) have to be valid.
Note
Currently, only HIPSPARSE_MATRIX_TYPE_GENERAL is supported.
Note
This function is non blocking and executed asynchronously with respect to the host. It may return before the actual computation has finished.
hipsparseXcsrgemmNnz()#
-
hipsparseStatus_t hipsparseXcsrgemmNnz(hipsparseHandle_t handle, hipsparseOperation_t transA, hipsparseOperation_t transB, int m, int n, int k, const hipsparseMatDescr_t descrA, int nnzA, const int *csrRowPtrA, const int *csrColIndA, const hipsparseMatDescr_t descrB, int nnzB, const int *csrRowPtrB, const int *csrColIndB, const hipsparseMatDescr_t descrC, int *csrRowPtrC, int *nnzTotalDevHostPtr)#
Sparse matrix sparse matrix multiplication using CSR storage format.
hipsparseXcsrgemmNnz
computes the total CSR non-zero elements and the CSR row offsets, that point to the start of every row of the sparse CSR matrix, of the resulting multiplied matrix C. It is assumed thatcsrRowPtrC
has been allocated with sizem+1
. The desired index base in the output CSR matrix C is set in the hipsparseMatDescr_tdescrC
. See hipsparseSetMatIndexBase().Note
As indicated, nnzTotalDevHostPtr can point either to host or device memory. This is controlled by setting the pointer mode. See hipsparseSetPointerMode().
Note
This function is non blocking and executed asynchronously with respect to the host. It may return before the actual computation has finished.
Note
Please note, that for matrix products with more than 8192 intermediate products per row, additional temporary storage buffer is allocated by the algorithm.
Note
Currently, only
transA
==transB
== HIPSPARSE_OPERATION_NON_TRANSPOSE is supported.Note
Currently, only HIPSPARSE_MATRIX_TYPE_GENERAL is supported.
hipsparseXcsrgemm()#
-
hipsparseStatus_t hipsparseScsrgemm(hipsparseHandle_t handle, hipsparseOperation_t transA, hipsparseOperation_t transB, int m, int n, int k, const hipsparseMatDescr_t descrA, int nnzA, const float *csrValA, const int *csrRowPtrA, const int *csrColIndA, const hipsparseMatDescr_t descrB, int nnzB, const float *csrValB, const int *csrRowPtrB, const int *csrColIndB, const hipsparseMatDescr_t descrC, float *csrValC, const int *csrRowPtrC, int *csrColIndC)#
-
hipsparseStatus_t hipsparseDcsrgemm(hipsparseHandle_t handle, hipsparseOperation_t transA, hipsparseOperation_t transB, int m, int n, int k, const hipsparseMatDescr_t descrA, int nnzA, const double *csrValA, const int *csrRowPtrA, const int *csrColIndA, const hipsparseMatDescr_t descrB, int nnzB, const double *csrValB, const int *csrRowPtrB, const int *csrColIndB, const hipsparseMatDescr_t descrC, double *csrValC, const int *csrRowPtrC, int *csrColIndC)#
-
hipsparseStatus_t hipsparseCcsrgemm(hipsparseHandle_t handle, hipsparseOperation_t transA, hipsparseOperation_t transB, int m, int n, int k, const hipsparseMatDescr_t descrA, int nnzA, const hipComplex *csrValA, const int *csrRowPtrA, const int *csrColIndA, const hipsparseMatDescr_t descrB, int nnzB, const hipComplex *csrValB, const int *csrRowPtrB, const int *csrColIndB, const hipsparseMatDescr_t descrC, hipComplex *csrValC, const int *csrRowPtrC, int *csrColIndC)#
-
hipsparseStatus_t hipsparseZcsrgemm(hipsparseHandle_t handle, hipsparseOperation_t transA, hipsparseOperation_t transB, int m, int n, int k, const hipsparseMatDescr_t descrA, int nnzA, const hipDoubleComplex *csrValA, const int *csrRowPtrA, const int *csrColIndA, const hipsparseMatDescr_t descrB, int nnzB, const hipDoubleComplex *csrValB, const int *csrRowPtrB, const int *csrColIndB, const hipsparseMatDescr_t descrC, hipDoubleComplex *csrValC, const int *csrRowPtrC, int *csrColIndC)#
Sparse matrix sparse matrix multiplication using CSR storage format.
hipsparseXcsrgemm
multiplies the sparse \(m \times k\) matrix \(op(A)\), defined in CSR storage format with the sparse \(k \times n\) matrix \(op(B)\), defined in CSR storage format, and stores the result in the sparse \(m \times n\) matrix \(C\), defined in CSR storage format, such that\[ C := op(A) \cdot op(B), \]with\[\begin{split} op(A) = \left\{ \begin{array}{ll} A, & \text{if transA == HIPSPARSE_OPERATION_NON_TRANSPOSE} \\ A^T, & \text{if transA == HIPSPARSE_OPERATION_TRANSPOSE} \\ A^H, & \text{if transA == HIPSPARSE_OPERATION_CONJUGATE_TRANSPOSE} \end{array} \right. \end{split}\]and\[\begin{split} op(B) = \left\{ \begin{array}{ll} B, & \text{if transB == HIPSPARSE_OPERATION_NON_TRANSPOSE} \\ B^T, & \text{if transB == HIPSPARSE_OPERATION_TRANSPOSE} \\ B^H, & \text{if transB == HIPSPARSE_OPERATION_CONJUGATE_TRANSPOSE} \end{array} \right. \end{split}\]This computation involves a multi step process. First the user must allocate
csrRowPtrC
to have sizem+1
. The user then callshipsparseXcsrgemmNnz
which fills in thecsrRowPtrC
array as well as computes the total number of nonzeros in C,nnzC
. The user then allocates both arrayscsrColIndC
andcsrValC
to have sizennzC
and callshipsparseXcsrgemm
to complete the computation. The desired index base in the output CSR matrix C is set in the hipsparseMatDescr_tdescrC
. See hipsparseSetMatIndexBase().- Example
int m = 4; int k = 3; int n = 2; int nnzA = 7; int nnzB = 3; hipsparseOperation_t transA = HIPSPARSE_OPERATION_NON_TRANSPOSE; hipsparseOperation_t transB = HIPSPARSE_OPERATION_NON_TRANSPOSE; // A, B, and C are mxk, kxn, and m×n // A // 1 0 0 // 3 4 0 // 5 6 7 // 0 0 9 std::vector<int> hcsrRowPtrA = {0, 1, 3, 6, 7}; std::vector<int> hcsrColIndA = {0, 0, 1, 0, 1, 2, 2}; std::vector<float> hcsrValA = {1.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 9.0f}; // B // 0 1 // 1 0 // 0 1 std::vector<int> hcsrRowPtrB = {0, 1, 2, 3}; std::vector<int> hcsrColIndB = {1, 0, 1}; std::vector<float> hcsrValB = {1.0f, 1.0f, 1.0f}; // Device memory management: Allocate and copy A, B int* dcsrRowPtrA; int* dcsrColIndA; float* dcsrValA; int* dcsrRowPtrB; int* dcsrColIndB; float* dcsrValB; int* dcsrRowPtrC; hipMalloc((void**)&dcsrRowPtrA, (m + 1) * sizeof(int)); hipMalloc((void**)&dcsrColIndA, nnzA * sizeof(int)); hipMalloc((void**)&dcsrValA, nnzA * sizeof(float)); hipMalloc((void**)&dcsrRowPtrB, (m + 1) * sizeof(int)); hipMalloc((void**)&dcsrColIndB, nnzB * sizeof(int)); hipMalloc((void**)&dcsrValB, nnzB * sizeof(float)); hipMalloc((void**)&dcsrRowPtrC, (m + 1) * sizeof(int)); hipMemcpy(dcsrRowPtrA, hcsrRowPtrA.data(), (m + 1) * sizeof(int), hipMemcpyHostToDevice); hipMemcpy(dcsrColIndA, hcsrColIndA.data(), nnzA * sizeof(int), hipMemcpyHostToDevice); hipMemcpy(dcsrValA, hcsrValA.data(), nnzA * sizeof(float), hipMemcpyHostToDevice); hipMemcpy(dcsrRowPtrB, hcsrRowPtrB.data(), (m + 1) * sizeof(int), hipMemcpyHostToDevice); hipMemcpy(dcsrColIndB, hcsrColIndB.data(), nnzB * sizeof(int), hipMemcpyHostToDevice); hipMemcpy(dcsrValB, hcsrValB.data(), nnzB * sizeof(float), hipMemcpyHostToDevice); hipsparseHandle_t handle; hipsparseCreate(&handle); hipsparseMatDescr_t descrA; hipsparseCreateMatDescr(&descrA); hipsparseMatDescr_t descrB; hipsparseCreateMatDescr(&descrB); hipsparseMatDescr_t descrC; hipsparseCreateMatDescr(&descrC); int nnzC; hipsparseXcsrgemmNnz(handle, transA, transB, m, n, k, descrA, nnzA, dcsrRowPtrA, dcsrColIndA, descrB, nnzB, dcsrRowPtrB, dcsrColIndB, descrC, dcsrRowPtrC, &nnzC); int* dcsrColIndC = nullptr; float* dcsrValC = nullptr; hipMalloc((void**)&dcsrColIndC, sizeof(int) * nnzC); hipMalloc((void**)&dcsrValC, sizeof(float) * nnzC); hipsparseScsrgemm(handle, transA, transB, m, n, k, descrA, nnzA, dcsrValA, dcsrRowPtrA, dcsrColIndA, descrB, nnzB, dcsrValB, dcsrRowPtrB, dcsrColIndB, descrC, dcsrValC, dcsrRowPtrC, dcsrColIndC); hipFree(dcsrRowPtrA); hipFree(dcsrColIndA); hipFree(dcsrValA); hipFree(dcsrRowPtrB); hipFree(dcsrColIndB); hipFree(dcsrValB); hipFree(dcsrRowPtrC); hipFree(dcsrColIndC); hipFree(dcsrValC); hipsparseDestroyMatDescr(descrA); hipsparseDestroyMatDescr(descrB); hipsparseDestroyMatDescr(descrC); hipsparseDestroy(handle);
Note
Currently, only
transA
== HIPSPARSE_OPERATION_NON_TRANSPOSE is supported.Note
Currently, only
transB
== HIPSPARSE_OPERATION_NON_TRANSPOSE is supported.Note
Currently, only HIPSPARSE_MATRIX_TYPE_GENERAL is supported.
Note
This function is non blocking and executed asynchronously with respect to the host. It may return before the actual computation has finished.
Note
Please note, that for matrix products with more than 4096 non-zero entries per row, additional temporary storage buffer is allocated by the algorithm.
hipsparseXcsrgemm2_bufferSizeExt()#
-
hipsparseStatus_t hipsparseScsrgemm2_bufferSizeExt(hipsparseHandle_t handle, int m, int n, int k, const float *alpha, const hipsparseMatDescr_t descrA, int nnzA, const int *csrRowPtrA, const int *csrColIndA, const hipsparseMatDescr_t descrB, int nnzB, const int *csrRowPtrB, const int *csrColIndB, const float *beta, const hipsparseMatDescr_t descrD, int nnzD, const int *csrRowPtrD, const int *csrColIndD, csrgemm2Info_t info, size_t *pBufferSizeInBytes)#
-
hipsparseStatus_t hipsparseDcsrgemm2_bufferSizeExt(hipsparseHandle_t handle, int m, int n, int k, const double *alpha, const hipsparseMatDescr_t descrA, int nnzA, const int *csrRowPtrA, const int *csrColIndA, const hipsparseMatDescr_t descrB, int nnzB, const int *csrRowPtrB, const int *csrColIndB, const double *beta, const hipsparseMatDescr_t descrD, int nnzD, const int *csrRowPtrD, const int *csrColIndD, csrgemm2Info_t info, size_t *pBufferSizeInBytes)#
-
hipsparseStatus_t hipsparseCcsrgemm2_bufferSizeExt(hipsparseHandle_t handle, int m, int n, int k, const hipComplex *alpha, const hipsparseMatDescr_t descrA, int nnzA, const int *csrRowPtrA, const int *csrColIndA, const hipsparseMatDescr_t descrB, int nnzB, const int *csrRowPtrB, const int *csrColIndB, const hipComplex *beta, const hipsparseMatDescr_t descrD, int nnzD, const int *csrRowPtrD, const int *csrColIndD, csrgemm2Info_t info, size_t *pBufferSizeInBytes)#
-
hipsparseStatus_t hipsparseZcsrgemm2_bufferSizeExt(hipsparseHandle_t handle, int m, int n, int k, const hipDoubleComplex *alpha, const hipsparseMatDescr_t descrA, int nnzA, const int *csrRowPtrA, const int *csrColIndA, const hipsparseMatDescr_t descrB, int nnzB, const int *csrRowPtrB, const int *csrColIndB, const hipDoubleComplex *beta, const hipsparseMatDescr_t descrD, int nnzD, const int *csrRowPtrD, const int *csrColIndD, csrgemm2Info_t info, size_t *pBufferSizeInBytes)#
Sparse matrix sparse matrix multiplication using CSR storage format.
hipsparseXcsrgemm2_bufferSizeExt
returns the size of the temporary storage buffer in bytes that is required by hipsparseXcsrgemm2Nnz() and hipsparseXcsrgemm2(). The temporary storage buffer must be allocated by the user.Note
Please note, that for matrix products with more than 4096 non-zero entries per row, additional temporary storage buffer is allocated by the algorithm.
Note
Please note, that for matrix products with more than 8192 intermediate products per row, additional temporary storage buffer is allocated by the algorithm.
Note
Currently, only HIPSPARSE_MATRIX_TYPE_GENERAL is supported.
hipsparseXcsrgemm2Nnz()#
-
hipsparseStatus_t hipsparseXcsrgemm2Nnz(hipsparseHandle_t handle, int m, int n, int k, const hipsparseMatDescr_t descrA, int nnzA, const int *csrRowPtrA, const int *csrColIndA, const hipsparseMatDescr_t descrB, int nnzB, const int *csrRowPtrB, const int *csrColIndB, const hipsparseMatDescr_t descrD, int nnzD, const int *csrRowPtrD, const int *csrColIndD, const hipsparseMatDescr_t descrC, int *csrRowPtrC, int *nnzTotalDevHostPtr, const csrgemm2Info_t info, void *pBuffer)#
Sparse matrix sparse matrix multiplication using CSR storage format.
hipsparseXcsrgemm2Nnz
computes the total CSR non-zero elements and the CSR row offsets, that point to the start of every row of the sparse CSR matrix, of the resulting multiplied matrix C. It is assumed thatcsrRowPtrC
has been allocated with sizem+1
. The required buffer size can be obtained by hipsparseXcsrgemm2_bufferSizeExt(). The desired index base in the output CSR matrix C is set in the hipsparseMatDescr_tdescrC
. See hipsparseSetMatIndexBase().Note
As indicated, nnzTotalDevHostPtr can point either to host or device memory. This is controlled by setting the pointer mode. See hipsparseSetPointerMode().
Note
This function is non blocking and executed asynchronously with respect to the host. It may return before the actual computation has finished.
Note
Please note, that for matrix products with more than 8192 intermediate products per row, additional temporary storage buffer is allocated by the algorithm.
Note
Currently, only HIPSPARSE_MATRIX_TYPE_GENERAL is supported.
hipsparseXcsrgemm2()#
-
hipsparseStatus_t hipsparseScsrgemm2(hipsparseHandle_t handle, int m, int n, int k, const float *alpha, const hipsparseMatDescr_t descrA, int nnzA, const float *csrValA, const int *csrRowPtrA, const int *csrColIndA, const hipsparseMatDescr_t descrB, int nnzB, const float *csrValB, const int *csrRowPtrB, const int *csrColIndB, const float *beta, const hipsparseMatDescr_t descrD, int nnzD, const float *csrValD, const int *csrRowPtrD, const int *csrColIndD, const hipsparseMatDescr_t descrC, float *csrValC, const int *csrRowPtrC, int *csrColIndC, const csrgemm2Info_t info, void *pBuffer)#
-
hipsparseStatus_t hipsparseDcsrgemm2(hipsparseHandle_t handle, int m, int n, int k, const double *alpha, const hipsparseMatDescr_t descrA, int nnzA, const double *csrValA, const int *csrRowPtrA, const int *csrColIndA, const hipsparseMatDescr_t descrB, int nnzB, const double *csrValB, const int *csrRowPtrB, const int *csrColIndB, const double *beta, const hipsparseMatDescr_t descrD, int nnzD, const double *csrValD, const int *csrRowPtrD, const int *csrColIndD, const hipsparseMatDescr_t descrC, double *csrValC, const int *csrRowPtrC, int *csrColIndC, const csrgemm2Info_t info, void *pBuffer)#
-
hipsparseStatus_t hipsparseCcsrgemm2(hipsparseHandle_t handle, int m, int n, int k, const hipComplex *alpha, const hipsparseMatDescr_t descrA, int nnzA, const hipComplex *csrValA, const int *csrRowPtrA, const int *csrColIndA, const hipsparseMatDescr_t descrB, int nnzB, const hipComplex *csrValB, const int *csrRowPtrB, const int *csrColIndB, const hipComplex *beta, const hipsparseMatDescr_t descrD, int nnzD, const hipComplex *csrValD, const int *csrRowPtrD, const int *csrColIndD, const hipsparseMatDescr_t descrC, hipComplex *csrValC, const int *csrRowPtrC, int *csrColIndC, const csrgemm2Info_t info, void *pBuffer)#
-
hipsparseStatus_t hipsparseZcsrgemm2(hipsparseHandle_t handle, int m, int n, int k, const hipDoubleComplex *alpha, const hipsparseMatDescr_t descrA, int nnzA, const hipDoubleComplex *csrValA, const int *csrRowPtrA, const int *csrColIndA, const hipsparseMatDescr_t descrB, int nnzB, const hipDoubleComplex *csrValB, const int *csrRowPtrB, const int *csrColIndB, const hipDoubleComplex *beta, const hipsparseMatDescr_t descrD, int nnzD, const hipDoubleComplex *csrValD, const int *csrRowPtrD, const int *csrColIndD, const hipsparseMatDescr_t descrC, hipDoubleComplex *csrValC, const int *csrRowPtrC, int *csrColIndC, const csrgemm2Info_t info, void *pBuffer)#
Sparse matrix sparse matrix multiplication using CSR storage format.
hipsparseXcsrgemm2
multiplies the scalar \(\alpha\) with the sparse \(m \times k\) matrix \(A\), defined in CSR storage format, and the sparse \(k \times n\) matrix \(B\), defined in CSR storage format, and adds the result to the sparse \(m \times n\) matrix \(D\) that is multiplied by \(\beta\). The final result is stored in the sparse \(m \times n\) matrix \(C\), defined in CSR storage format, such that\[ C := \alpha \cdot A \cdot B + \beta \cdot D \]This computation involves a multi step process. First the user must call
in
order to determine the required user allocated temporary buffer size. The user then allocates this buffer and also allocatescsrRowPtrC
to have sizem+1
. Both the temporary storage buffer andcsrRowPtrC
array are then passed tohipsparseXcsrgemm2Nnz
which fills in thecsrRowPtrC
array as well as computes the total number of nonzeros in C,nnzC
. The user then allocates both arrayscsrColIndC
andcsrValC
to have sizennzC
and callshipsparseXcsrgemm2
to complete the computation. The desired index base in the output CSR matrix C is set in the hipsparseMatDescr_tdescrC
. See hipsparseSetMatIndexBase().- Example
int m = 4; int k = 3; int n = 2; int nnzA = 7; int nnzB = 3; int nnzD = 6; float alpha{1.0f}; float beta{1.0f}; // A, B, and C are mxk, kxn, and m×n // A // 1 0 0 // 3 4 0 // 5 6 7 // 0 0 9 std::vector<int> hcsrRowPtrA = {0, 1, 3, 6, 7}; std::vector<int> hcsrColIndA = {0, 0, 1, 0, 1, 2, 2}; std::vector<float> hcsrValA = {1.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 9.0f}; // B // 0 1 // 1 0 // 0 1 std::vector<int> hcsrRowPtrB = {0, 1, 2, 3}; std::vector<int> hcsrColIndB = {1, 0, 1}; std::vector<float> hcsrValB = {1.0f, 1.0f, 1.0f}; // D // 0 1 // 2 3 // 4 5 // 0 6 std::vector<int> hcsrRowPtrD = {0, 1, 3, 5, 6}; std::vector<int> hcsrColIndD = {1, 0, 1, 0, 1, 1}; std::vector<float> hcsrValD = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f}; // Device memory management: Allocate and copy A, B int* dcsrRowPtrA; int* dcsrColIndA; float* dcsrValA; int* dcsrRowPtrB; int* dcsrColIndB; float* dcsrValB; int* dcsrRowPtrD; int* dcsrColIndD; float* dcsrValD; int* dcsrRowPtrC; hipMalloc((void**)&dcsrRowPtrA, (m + 1) * sizeof(int)); hipMalloc((void**)&dcsrColIndA, nnzA * sizeof(int)); hipMalloc((void**)&dcsrValA, nnzA * sizeof(float)); hipMalloc((void**)&dcsrRowPtrB, (k + 1) * sizeof(int)); hipMalloc((void**)&dcsrColIndB, nnzB * sizeof(int)); hipMalloc((void**)&dcsrValB, nnzB * sizeof(float)); hipMalloc((void**)&dcsrRowPtrD, (m + 1) * sizeof(int)); hipMalloc((void**)&dcsrColIndD, nnzD * sizeof(int)); hipMalloc((void**)&dcsrValD, nnzD * sizeof(float)); hipMalloc((void**)&dcsrRowPtrC, (m + 1) * sizeof(int)); hipMemcpy(dcsrRowPtrA, hcsrRowPtrA.data(), (m + 1) * sizeof(int), hipMemcpyHostToDevice); hipMemcpy(dcsrColIndA, hcsrColIndA.data(), nnzA * sizeof(int), hipMemcpyHostToDevice); hipMemcpy(dcsrValA, hcsrValA.data(), nnzA * sizeof(float), hipMemcpyHostToDevice); hipMemcpy(dcsrRowPtrB, hcsrRowPtrB.data(), (k + 1) * sizeof(int), hipMemcpyHostToDevice); hipMemcpy(dcsrColIndB, hcsrColIndB.data(), nnzB * sizeof(int), hipMemcpyHostToDevice); hipMemcpy(dcsrValB, hcsrValB.data(), nnzB * sizeof(float), hipMemcpyHostToDevice); hipMemcpy(dcsrRowPtrD, hcsrRowPtrD.data(), (m + 1) * sizeof(int), hipMemcpyHostToDevice); hipMemcpy(dcsrColIndD, hcsrColIndD.data(), nnzD * sizeof(int), hipMemcpyHostToDevice); hipMemcpy(dcsrValD, hcsrValD.data(), nnzD * sizeof(float), hipMemcpyHostToDevice); hipsparseHandle_t handle; hipsparseCreate(&handle); hipsparseMatDescr_t descrA; hipsparseCreateMatDescr(&descrA); hipsparseMatDescr_t descrB; hipsparseCreateMatDescr(&descrB); hipsparseMatDescr_t descrC; hipsparseCreateMatDescr(&descrC); hipsparseMatDescr_t descrD; hipsparseCreateMatDescr(&descrD); csrgemm2Info_t info; hipsparseCreateCsrgemm2Info(&info); size_t bufferSize; hipsparseScsrgemm2_bufferSizeExt(handle, m, n, k, &alpha, descrA, nnzA, dcsrRowPtrA, dcsrColIndA, descrB, nnzB, dcsrRowPtrB, dcsrColIndB, &beta, descrD, nnzD, dcsrRowPtrD, dcsrColIndD, info, &bufferSize); void* dbuffer = nullptr; hipMalloc((void**)&dbuffer, bufferSize); int nnzC; hipsparseXcsrgemm2Nnz(handle, m, n, k, descrA, nnzA, dcsrRowPtrA, dcsrColIndA, descrB, nnzB, dcsrRowPtrB, dcsrColIndB, descrD, nnzD, dcsrRowPtrD, dcsrColIndD, descrC, dcsrRowPtrC, &nnzC, info, dbuffer); int* dcsrColIndC = nullptr; float* dcsrValC = nullptr; hipMalloc((void**)&dcsrColIndC, sizeof(int) * nnzC); hipMalloc((void**)&dcsrValC, sizeof(float) * nnzC); hipsparseScsrgemm2(handle, m, n, k, &alpha, descrA, nnzA, dcsrValA, dcsrRowPtrA, dcsrColIndA, descrB, nnzB, dcsrValB, dcsrRowPtrB, dcsrColIndB, &beta, descrD, nnzD, dcsrValD, dcsrRowPtrD, dcsrColIndD, descrC, dcsrValC, dcsrRowPtrC, dcsrColIndC, info, dbuffer); hipFree(dcsrRowPtrA); hipFree(dcsrColIndA); hipFree(dcsrValA); hipFree(dcsrRowPtrB); hipFree(dcsrColIndB); hipFree(dcsrValB); hipFree(dcsrRowPtrC); hipFree(dcsrColIndC); hipFree(dcsrValC); hipFree(dcsrRowPtrD); hipFree(dcsrColIndD); hipFree(dcsrValD); hipFree(dbuffer); hipsparseDestroyMatDescr(descrA); hipsparseDestroyMatDescr(descrB); hipsparseDestroyMatDescr(descrC); hipsparseDestroyMatDescr(descrD); hipsparseDestroyCsrgemm2Info(info); hipsparseDestroy(handle);
Note
If \(\alpha == 0\), then \(C = \beta \cdot D\) will be computed.
Note
If \(\beta == 0\), then \(C = \alpha \cdot A \cdot B\) will be computed.
Note
\(\alpha == beta == 0\) is invalid.
Note
Currently, only HIPSPARSE_MATRIX_TYPE_GENERAL is supported.
Note
This function is non blocking and executed asynchronously with respect to the host. It may return before the actual computation has finished.
Note
Please note, that for matrix products with more than 4096 non-zero entries per row, additional temporary storage buffer is allocated by the algorithm.