# Sparse Generic Functions#

This module holds all sparse generic routines.

The sparse generic routines describe operations that manipulate sparse matrices.

## hipsparseAxpby()#

hipsparseStatus_t hipsparseAxpby(hipsparseHandle_t handle, const void *alpha, hipsparseConstSpVecDescr_t vecX, const void *beta, hipsparseDnVecDescr_t vecY)#

Description: Scale a sparse vector and add it to a scaled dense vector.

hipsparseAxpby multiplies the sparse vector $$x$$ with scalar $$\alpha$$ and adds the result to the dense vector $$y$$ that is multiplied with scalar $$\beta$$, such that

$y := \alpha \cdot x + \beta \cdot y$

for(i = 0; i < nnz; ++i)
{
y[x_ind[i]] = alpha * x_val[i] + beta * y[x_ind[i]]
}


Example
// Number of non-zeros of the sparse vector
int nnz = 3;

// Size of sparse and dense vector
int size = 9;

// Sparse index vector
std::vector<int> hx_ind = {0, 3, 5};

// Sparse value vector
std::vector<float> hx_val = {1.0f, 2.0f, 3.0f};

// Dense vector
std::vector<float> hy = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f};

// Scalar alpha
float alpha = 3.7f;

// Scalar beta
float beta = 1.2f;

int* dx_ind;
float* dx_val;
float* dy;
hipMalloc((void**)&dx_ind, sizeof(int) * nnz);
hipMalloc((void**)&dx_val, sizeof(float) * nnz);
hipMalloc((void**)&dy, sizeof(float) * size);

hipMemcpy(dx_ind, hx_ind.data(), sizeof(int) * nnz, hipMemcpyHostToDevice);
hipMemcpy(dx_val, hx_val.data(), sizeof(float) * nnz, hipMemcpyHostToDevice);
hipMemcpy(dy, hy.data(), sizeof(float) * size, hipMemcpyHostToDevice);

hipsparseHandle_t handle;
hipsparseCreate(&handle);

// Create sparse vector X
hipsparseSpVecDescr_t vecX;
hipsparseCreateSpVec(&vecX,
size,
nnz,
dx_ind,
dx_val,
HIPSPARSE_INDEX_32I,
HIPSPARSE_INDEX_BASE_ZERO,
HIP_R_32F);

// Create dense vector Y
hipsparseDnVecDescr_t vecY;
hipsparseCreateDnVec(&vecY, size, dy, HIP_R_32F);

// Call axpby to perform y = beta * y + alpha * x
hipsparseAxpby(handle, &alpha, vecX, &beta, vecY);

hipsparseDnVecGetValues(vecY, (void**)&dy);

// Copy result back to host
hipMemcpy(hy.data(), dy, sizeof(float) * size, hipMemcpyDeviceToHost);

// Clear hipSPARSE
hipsparseDestroySpVec(vecX);
hipsparseDestroyDnVec(vecY);
hipsparseDestroy(handle);

// Clear device memory
hipFree(dx_ind);
hipFree(dx_val);
hipFree(dy);


## hipsparseGather()#

hipsparseStatus_t hipsparseGather(hipsparseHandle_t handle, hipsparseConstDnVecDescr_t vecY, hipsparseSpVecDescr_t vecX)#

Description: Gather elements from a dense vector and store them into a sparse vector.

hipsparseGather gathers the elements from the dense vector $$y$$ and stores them in the sparse vector $$x$$.

for(i = 0; i < nnz; ++i)
{
x_val[i] = y[x_ind[i]];
}


Example
// Number of non-zeros of the sparse vector
int nnz = 3;

// Size of sparse and dense vector
int size = 9;

// Sparse index vector
std::vector<int> hx_ind = {0, 3, 5};

// Dense vector
std::vector<float> hy = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f};

int* dx_ind;
float* dx_val;
float* dy;
hipMalloc((void**)&dx_ind, sizeof(int) * nnz);
hipMalloc((void**)&dx_val, sizeof(float) * nnz);
hipMalloc((void**)&dy, sizeof(float) * size);

hipMemcpy(dx_ind, hx_ind.data(), sizeof(int) * nnz, hipMemcpyHostToDevice);
hipMemcpy(dy, hy.data(), sizeof(float) * size, hipMemcpyHostToDevice);

hipsparseHandle_t handle;
hipsparseCreate(&handle);

// Create sparse vector X
hipsparseSpVecDescr_t vecX;
hipsparseCreateSpVec(&vecX,
size,
nnz,
dx_ind,
dx_val,
HIPSPARSE_INDEX_32I,
HIPSPARSE_INDEX_BASE_ZERO,
HIP_R_32F);

// Create dense vector Y
hipsparseDnVecDescr_t vecY;
hipsparseCreateDnVec(&vecY, size, dy, HIP_R_32F);

// Perform gather
hipsparseGather(handle, vecY, vecX);

hipsparseSpVecGetValues(vecX, (void**)&dx_val);

// Copy result back to host
std::vector<float> hx_val(nnz, 0.0f);
hipMemcpy(hx_val.data(), dx_val, sizeof(float) * nnz, hipMemcpyDeviceToHost);

// Clear hipSPARSE
hipsparseDestroySpVec(vecX);
hipsparseDestroyDnVec(vecY);
hipsparseDestroy(handle);

// Clear device memory
hipFree(dx_ind);
hipFree(dx_val);
hipFree(dy);


## hipsparseScatter()#

hipsparseStatus_t hipsparseScatter(hipsparseHandle_t handle, hipsparseConstSpVecDescr_t vecX, hipsparseDnVecDescr_t vecY)#

Description: Scatter elements from a sparse vector into a dense vector.

hipsparseScatter scatters the elements from the sparse vector $$x$$ in the dense vector $$y$$.

for(i = 0; i < nnz; ++i)
{
y[x_ind[i]] = x_val[i];
}


Example
// Number of non-zeros of the sparse vector
int nnz = 3;

// Size of sparse and dense vector
int size = 9;

// Sparse index vector
std::vector<int> hx_ind = {0, 3, 5};

// Sparse value vector
std::vector<float> hx_val = {1.0f, 2.0f, 3.0f};

// Dense vector
std::vector<float> hy = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f};

int* dx_ind;
float* dx_val;
float* dy;
hipMalloc((void**)&dx_ind, sizeof(int) * nnz);
hipMalloc((void**)&dx_val, sizeof(float) * nnz);
hipMalloc((void**)&dy, sizeof(float) * size);

hipMemcpy(dx_ind, hx_ind.data(), sizeof(int) * nnz, hipMemcpyHostToDevice);
hipMemcpy(dx_val, hx_val.data(), sizeof(float) * nnz, hipMemcpyHostToDevice);
hipMemcpy(dy, hy.data(), sizeof(float) * size, hipMemcpyHostToDevice);

hipsparseHandle_t handle;
hipsparseCreate(&handle);

// Create sparse vector X
hipsparseSpVecDescr_t vecX;
hipsparseCreateSpVec(&vecX,
size,
nnz,
dx_ind,
dx_val,
HIPSPARSE_INDEX_32I,
HIPSPARSE_INDEX_BASE_ZERO,
HIP_R_32F);

// Create dense vector Y
hipsparseDnVecDescr_t vecY;
hipsparseCreateDnVec(&vecY, size, dy, HIP_R_32F);

// Perform scatter
hipsparseScatter(handle, vecX, vecY);

hipsparseDnVecGetValues(vecY, (void**)&dy);

// Copy result back to host
hipMemcpy(hy.data(), dy, sizeof(float) * size, hipMemcpyDeviceToHost);

// Clear hipSPARSE
hipsparseDestroySpVec(vecX);
hipsparseDestroyDnVec(vecY);
hipsparseDestroy(handle);

// Clear device memory
hipFree(dx_ind);
hipFree(dx_val);
hipFree(dy);


## hipsparseRot()#

hipsparseStatus_t hipsparseRot(hipsparseHandle_t handle, const void *c_coeff, const void *s_coeff, hipsparseSpVecDescr_t vecX, hipsparseDnVecDescr_t vecY)#

Description: Apply Givens rotation to a dense and a sparse vector.

hipsparseRot applies the Givens rotation matrix $$G$$ to the sparse vector $$x$$ and the dense vector $$y$$, where

$\begin{split} G = \begin{pmatrix} c & s \\ -s & c \end{pmatrix} \end{split}$

for(i = 0; i < nnz; ++i)
{
x_tmp = x_val[i];
y_tmp = y[x_ind[i]];

x_val[i]    = c * x_tmp + s * y_tmp;
y[x_ind[i]] = c * y_tmp - s * x_tmp;
}


Example
// Number of non-zeros of the sparse vector
int nnz = 3;

// Size of sparse and dense vector
int size = 9;

// Sparse index vector
std::vector<int> hx_ind = {0, 3, 5};

// Sparse value vector
std::vector<float> hx_val = {1.0f, 2.0f, 3.0f};

// Dense vector
std::vector<float> hy = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f};

// Scalar c
float c = 3.7f;

// Scalar s
float s = 1.2f;

int* dx_ind;
float* dx_val;
float* dy;
hipMalloc((void**)&dx_ind, sizeof(int) * nnz);
hipMalloc((void**)&dx_val, sizeof(float) * nnz);
hipMalloc((void**)&dy, sizeof(float) * size);

hipMemcpy(dx_ind, hx_ind.data(), sizeof(int) * nnz, hipMemcpyHostToDevice);
hipMemcpy(dx_val, hx_val.data(), sizeof(float) * nnz, hipMemcpyHostToDevice);
hipMemcpy(dy, hy.data(), sizeof(float) * size, hipMemcpyHostToDevice);

hipsparseHandle_t handle;
hipsparseCreate(&handle);

// Create sparse vector X
hipsparseSpVecDescr_t vecX;
hipsparseCreateSpVec(&vecX,
size,
nnz,
dx_ind,
dx_val,
HIPSPARSE_INDEX_32I,
HIPSPARSE_INDEX_BASE_ZERO,
HIP_R_32F);

// Create dense vector Y
hipsparseDnVecDescr_t vecY;
hipsparseCreateDnVec(&vecY, size, dy, HIP_R_32F);

// Call rot
hipsparseRot(handle, (void*)&c, (void*)&s, vecX, vecY);

hipsparseSpVecGetValues(vecX, (void**)&dx_val);
hipsparseDnVecGetValues(vecY, (void**)&dy);

// Copy result back to host
hipMemcpy(hx_val.data(), dx_val, sizeof(float) * nnz, hipMemcpyDeviceToHost);
hipMemcpy(hy.data(), dy, sizeof(float) * size, hipMemcpyDeviceToHost);

// Clear hipSPARSE
hipsparseDestroySpVec(vecX);
hipsparseDestroyDnVec(vecY);
hipsparseDestroy(handle);

// Clear device memory
hipFree(dx_ind);
hipFree(dx_val);
hipFree(dy);


## hipsparseSparseToDense_bufferSize()#

hipsparseStatus_t hipsparseSparseToDense_bufferSize(hipsparseHandle_t handle, hipsparseConstSpMatDescr_t matA, hipsparseDnMatDescr_t matB, hipsparseSparseToDenseAlg_t alg, size_t *bufferSize)#

Description: Sparse matrix to dense matrix conversion.

hipsparseSparseToDense_bufferSize computes the required user allocated buffer size needed when converting a sparse matrix to a dense matrix.

## hipsparseSparseToDense()#

hipsparseStatus_t hipsparseSparseToDense(hipsparseHandle_t handle, hipsparseConstSpMatDescr_t matA, hipsparseDnMatDescr_t matB, hipsparseSparseToDenseAlg_t alg, void *externalBuffer)#

Description: Sparse matrix to dense matrix conversion.

hipsparseSparseToDense converts a sparse matrix to a dense matrix. This routine takes a user allocated buffer whose size must first be computed by calling hipsparseSparseToDense_bufferSize

## hipsparseDenseToSparse_bufferSize()#

hipsparseStatus_t hipsparseDenseToSparse_bufferSize(hipsparseHandle_t handle, hipsparseConstDnMatDescr_t matA, hipsparseSpMatDescr_t matB, hipsparseDenseToSparseAlg_t alg, size_t *bufferSize)#

Description: Dense matrix to sparse matrix conversion.

hipsparseDenseToSparse_bufferSize computes the required user allocated buffer size needed when converting a dense matrix to a sparse matrix.

## hipsparseDenseToSparse_analysis()#

hipsparseStatus_t hipsparseDenseToSparse_analysis(hipsparseHandle_t handle, hipsparseConstDnMatDescr_t matA, hipsparseSpMatDescr_t matB, hipsparseDenseToSparseAlg_t alg, void *externalBuffer)#

Description: Dense matrix to sparse matrix conversion.

hipsparseDenseToSparse_analysis performs analysis that is later used in hipsparseDenseToSparse_convert when converting a dense matrix to sparse matrix. This routine takes a user allocated buffer whose size must first be computed using hipsparseDenseToSparse_bufferSize.

## hipsparseDenseToSparse_convert()#

hipsparseStatus_t hipsparseDenseToSparse_convert(hipsparseHandle_t handle, hipsparseConstDnMatDescr_t matA, hipsparseSpMatDescr_t matB, hipsparseDenseToSparseAlg_t alg, void *externalBuffer)#

Description: Dense matrix to sparse matrix conversion.

hipsparseDenseToSparse_convert converts a dense matrix to a sparse matrix. This routine requires a user allocated buffer whose size must be determined by first calling hipsparseDenseToSparse_bufferSize.

## hipsparseSpVV_bufferSize()#

hipsparseStatus_t hipsparseSpVV_bufferSize(hipsparseHandle_t handle, hipsparseOperation_t opX, hipsparseConstSpVecDescr_t vecX, hipsparseConstDnVecDescr_t vecY, void *result, hipDataType computeType, size_t *bufferSize)#

Description: Compute the inner dot product of a sparse vector with a dense vector.

hipsparseSpVV_bufferSize computes the required user allocated buffer size needed when computing the inner dot product of a sparse vector with a dense vector

See full example below

## hipsparseSpVV()#

hipsparseStatus_t hipsparseSpVV(hipsparseHandle_t handle, hipsparseOperation_t opX, hipsparseConstSpVecDescr_t vecX, hipsparseConstDnVecDescr_t vecY, void *result, hipDataType computeType, void *externalBuffer)#

Description: Compute the inner dot product of a sparse vector with a dense vector.

hipsparseSpVV computes the inner dot product of a sparse vector with a dense vector. This routine takes a user allocated buffer whose size must first be computed by calling hipsparseSpVV_bufferSize

Example
// Number of non-zeros of the sparse vector
int nnz = 3;

// Size of sparse and dense vector
int size = 9;

// Sparse index vector
std::vector<int> hx_ind = {0, 3, 5};

// Sparse value vector
std::vector<float> hx_val = {1.0f, 2.0f, 3.0f};

// Dense vector
std::vector<float> hy = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f};

int* dx_ind;
float* dx_val;
float* dy;
hipMalloc((void**)&dx_ind, sizeof(int) * nnz);
hipMalloc((void**)&dx_val, sizeof(float) * nnz);
hipMalloc((void**)&dy, sizeof(float) * size);

hipMemcpy(dx_ind, hx_ind.data(), sizeof(int) * nnz, hipMemcpyHostToDevice);
hipMemcpy(dx_val, hx_val.data(), sizeof(float) * nnz, hipMemcpyHostToDevice);
hipMemcpy(dy, hy.data(), sizeof(float) * size, hipMemcpyHostToDevice);

hipsparseHandle_t handle;
hipsparseCreate(&handle);

// Create sparse vector X
hipsparseSpVecDescr_t vecX;
hipsparseCreateSpVec(&vecX,
size,
nnz,
dx_ind,
dx_val,
HIPSPARSE_INDEX_32I,
HIPSPARSE_INDEX_BASE_ZERO,
HIP_R_32F);

// Create dense vector Y
hipsparseDnVecDescr_t vecY;
hipsparseCreateDnVec(&vecY, size, dy, HIP_R_32F);

// Obtain buffer size
float hresult = 0.0f;
size_t buffer_size;
hipsparseSpVV_bufferSize(handle,
HIPSPARSE_OPERATION_NON_TRANSPOSE,
vecX,
vecY,
&hresult,
HIP_R_32F,
&buffer_size);

void* temp_buffer;
hipMalloc(&temp_buffer, buffer_size);

// SpVV
hipsparseSpVV(handle,
HIPSPARSE_OPERATION_NON_TRANSPOSE,
vecX,
vecY,
&hresult,
HIP_R_32F,
temp_buffer);

hipDeviceSynchronize();

std::cout << "hresult: " << hresult << std::endl;

// Clear hipSPARSE
hipsparseDestroySpVec(vecX);
hipsparseDestroyDnVec(vecY);
hipsparseDestroy(handle);

// Clear device memory
hipFree(dx_ind);
hipFree(dx_val);
hipFree(dy);
hipFree(temp_buffer);


## hipsparseSpMV_bufferSize()#

hipsparseStatus_t hipsparseSpMV_bufferSize(hipsparseHandle_t handle, hipsparseOperation_t opA, const void *alpha, hipsparseConstSpMatDescr_t matA, hipsparseConstDnVecDescr_t vecX, const void *beta, const hipsparseDnVecDescr_t vecY, hipDataType computeType, hipsparseSpMVAlg_t alg, size_t *bufferSize)#

Description: Buffer size step of the sparse matrix multiplication with a dense vector.

hipsparseSpMV_bufferSize computes the required user allocated buffer size needed when computing the sparse matrix multiplication with a dense vector

See full example below

## hipsparseSpMV()#

hipsparseStatus_t hipsparseSpMV(hipsparseHandle_t handle, hipsparseOperation_t opA, const void *alpha, hipsparseConstSpMatDescr_t matA, hipsparseConstDnVecDescr_t vecX, const void *beta, const hipsparseDnVecDescr_t vecY, hipDataType computeType, hipsparseSpMVAlg_t alg, void *externalBuffer)#

Description: Compute the sparse matrix multiplication with a dense vector.

hipsparseSpMV computes sparse matrix multiplication with a dense vector

Example
// A, x, and y are m×k, k×1, and m×1
int m = 3, k = 4;
int nnz_A = 8;
hipsparseOperation_t transA = HIPSPARSE_OPERATION_NON_TRANSPOSE;

// alpha and beta
float alpha = 0.5f;
float beta  = 0.25f;

std::vector<int> hcsr_row_ptr = {0, 3, 5, 8};
std::vector<int> hcsr_col_ind = {0, 1, 3, 1, 2, 0, 2, 3};
std::vector<float> hcsr_val     = {1, 2, 3, 4, 5, 6, 7, 8};

std::vector<float> hx(k, 1.0f);
std::vector<float> hy(m, 1.0f);

int *dcsr_row_ptr;
int *dcsr_col_ind;
float *dcsr_val;
hipMalloc((void**)&dcsr_row_ptr, sizeof(int) * (m + 1));
hipMalloc((void**)&dcsr_col_ind, sizeof(int) * nnz_A);
hipMalloc((void**)&dcsr_val, sizeof(float) * nnz_A);

hipMemcpy(dcsr_row_ptr, hcsr_row_ptr.data(), sizeof(int) * (m + 1), hipMemcpyHostToDevice);
hipMemcpy(dcsr_col_ind, hcsr_col_ind.data(), sizeof(int) * nnz_A, hipMemcpyHostToDevice);
hipMemcpy(dcsr_val, hcsr_val.data(), sizeof(float) * nnz_A, hipMemcpyHostToDevice);

hipsparseHandle_t handle;
hipsparseCreate(&handle);

hipsparseSpMatDescr_t matA;
hipsparseCreateCsr(&matA, m, k, nnz_A,
dcsr_row_ptr, dcsr_col_ind, dcsr_val,
HIPSPARSE_INDEX_32I, HIPSPARSE_INDEX_32I,
HIPSPARSE_INDEX_BASE_ZERO, HIP_R_32F);

// Allocate memory for the vector x
float* dx;
hipMalloc((void**)&dx, sizeof(float) * k);
hipMemcpy(dx, hx.data(), sizeof(float) * k, hipMemcpyHostToDevice);

hipsparseDnVecDescr_t vecX;
hipsparseCreateDnVec(&vecX, k, dx, HIP_R_32F);

// Allocate memory for the resulting vector y
float* dy;
hipMalloc((void**)&dy, sizeof(float) * m);
hipMemcpy(dy, hy.data(), sizeof(float) * m, hipMemcpyHostToDevice);

hipsparseDnMatDescr_t vecY;
hipsparseCreateDnVec(&vecY, m, dy, HIP_R_32F);

// Compute buffersize
size_t bufferSize;
hipsparseSpMV_bufferSize(handle,
transA,
&alpha,
matA,
vecX,
&beta,
vecY,
HIP_R_32F,
HIPSPARSE_MV_ALG_DEFAULT,
&bufferSize);

void* buffer;
hipMalloc(&buffer, bufferSize);

// Preprocess operation (Optional)
hipsparseSpMV_preprocess(handle,
transA,
&alpha,
matA,
vecX,
&beta,
vecY,
HIP_R_32F,
HIPSPARSE_MV_ALG_DEFAULT,
&buffer);

// Perform operation
hipsparseSpMV(handle,
transA,
&alpha,
matA,
vecX,
&beta,
vecY,
HIP_R_32F,
HIPSPARSE_MV_ALG_DEFAULT,
&buffer);

// Copy device to host
hipMemcpy(hy.data(), dy, sizeof(float) * m, hipMemcpyDeviceToHost);

// Destroy matrix descriptors and handles
hipsparseDestroySpMat(matA);
hipsparseDestroyDnVec(vecX);
hipsparseDestroyDnVec(vecY);
hipsparseDestroy(handle);

hipFree(buffer);
hipFree(dcsr_row_ptr);
hipFree(dcsr_col_ind);
hipFree(dcsr_val);
hipFree(dx);
hipFree(dy);


## hipsparseSpMM_bufferSize()#

hipsparseStatus_t hipsparseSpMM_bufferSize(hipsparseHandle_t handle, hipsparseOperation_t opA, hipsparseOperation_t opB, const void *alpha, hipsparseConstSpMatDescr_t matA, hipsparseConstDnMatDescr_t matB, const void *beta, const hipsparseDnMatDescr_t matC, hipDataType computeType, hipsparseSpMMAlg_t alg, size_t *bufferSize)#

Description: Calculate the buffer size required for the sparse matrix multiplication with a dense matrix.

hipsparseSpMM_bufferSize computes the required user allocated buffer size needed when computing the sparse matrix multiplication with a dense matrix

See full example below

## hipsparseSpMM_preprocess()#

hipsparseStatus_t hipsparseSpMM_preprocess(hipsparseHandle_t handle, hipsparseOperation_t opA, hipsparseOperation_t opB, const void *alpha, hipsparseConstSpMatDescr_t matA, hipsparseConstDnMatDescr_t matB, const void *beta, const hipsparseDnMatDescr_t matC, hipDataType computeType, hipsparseSpMMAlg_t alg, void *externalBuffer)#

Description: Preprocess step of the sparse matrix multiplication with a dense matrix.

hipsparseSpMM_preprocess performs the required preprocessing used when computing the sparse matrix multiplication with a dense matrix

See full example below

## hipsparseSpMM()#

hipsparseStatus_t hipsparseSpMM(hipsparseHandle_t handle, hipsparseOperation_t opA, hipsparseOperation_t opB, const void *alpha, hipsparseConstSpMatDescr_t matA, hipsparseConstDnMatDescr_t matB, const void *beta, const hipsparseDnMatDescr_t matC, hipDataType computeType, hipsparseSpMMAlg_t alg, void *externalBuffer)#

Description: Compute the sparse matrix multiplication with a dense matrix.

hipsparseSpMM computes sparse matrix multiplication with a dense matrix

Example
// A, B, and C are m×k, k×n, and m×n
int m = 3, n = 5, k = 4;
int ldb = n, ldc = n;
int nnz_A = 8, nnz_B = 20, nnz_C = 15;
hipsparseOperation_t transA = HIPSPARSE_OPERATION_NON_TRANSPOSE;
hipsparseOperation_t transB = HIPSPARSE_OPERATION_NON_TRANSPOSE;
hipsparseOperation_t transC = HIPSPARSE_OPERATION_NON_TRANSPOSE;
hipsparseOrder_t order = HIPSPARSE_ORDER_ROW;

// alpha and beta
float alpha = 0.5f;
float beta  = 0.25f;

std::vector<int> hcsr_row_ptr = {0, 3, 5, 8};
std::vector<int> hcsr_col_ind = {0, 1, 3, 1, 2, 0, 2, 3};
std::vector<float> hcsr_val     = {1, 2, 3, 4, 5, 6, 7, 8};

std::vector<float> hB(nnz_B, 1.0f);
std::vector<float> hC(nnz_C, 1.0f);

int *dcsr_row_ptr;
int *dcsr_col_ind;
float *dcsr_val;
hipMalloc((void**)&dcsr_row_ptr, sizeof(int) * (m + 1));
hipMalloc((void**)&dcsr_col_ind, sizeof(int) * nnz_A);
hipMalloc((void**)&dcsr_val, sizeof(float) * nnz_A);

hipMemcpy(dcsr_row_ptr, hcsr_row_ptr.data(), sizeof(int) * (m + 1), hipMemcpyHostToDevice);
hipMemcpy(dcsr_col_ind, hcsr_col_ind.data(), sizeof(int) * nnz_A, hipMemcpyHostToDevice);
hipMemcpy(dcsr_val, hcsr_val.data(), sizeof(float) * nnz_A, hipMemcpyHostToDevice);

hipsparseHandle_t handle;
hipsparseCreate(&handle);

hipsparseSpMatDescr_t matA;
hipsparseCreateCsr(&matA, m, k, nnz_A,
dcsr_row_ptr, dcsr_col_ind, dcsr_val,
HIPSPARSE_INDEX_32I, HIPSPARSE_INDEX_32I,
HIPSPARSE_INDEX_BASE_ZERO, HIP_R_32F);

// Allocate memory for the matrix B
float* dB;
hipMalloc((void**)&dB, sizeof(float) * nnz_B);
hipMemcpy(dB, hB.data(), sizeof(float) * nnz_B, hipMemcpyHostToDevice);

hipsparseDnMatDescr_t matB;
hipsparseCreateDnMat(&matB, k, n, ldb, dB, HIP_R_32F, order);

// Allocate memory for the resulting matrix C
float* dC;
hipMalloc((void**)&dC, sizeof(float) * nnz_C);
hipMemcpy(dC, hC.data(), sizeof(float) * nnz_C, hipMemcpyHostToDevice);

hipsparseDnMatDescr_t matC;
hipsparseCreateDnMat(&matC, m, n, ldc, dC, HIP_R_32F, HIPSPARSE_ORDER_ROW);

// Compute buffersize
size_t bufferSize;
hipsparseSpMM_bufferSize(handle,
transA,
transB,
&alpha,
matA,
matB,
&beta,
matC,
HIP_R_32F,
HIPSPARSE_MM_ALG_DEFAULT,
&bufferSize);

void* buffer;
hipMalloc(&buffer, bufferSize);

// Preprocess operation (Optional)
hipsparseSpMM_preprocess(handle,
transA,
transB,
&alpha,
matA,
matB,
&beta,
matC,
HIP_R_32F,
HIPSPARSE_MM_ALG_DEFAULT,
&buffer);

// Perform operation
hipsparseSpMM(handle,
transA,
transB,
&alpha,
matA,
matB,
&beta,
matC,
HIP_R_32F,
HIPSPARSE_MM_ALG_DEFAULT,
&buffer);

// Copy device to host
hipMemcpy(hC.data(), dC, sizeof(float) * nnz_C, hipMemcpyDeviceToHost);

// Destroy matrix descriptors and handles
hipsparseDestroySpMat(matA);
hipsparseDestroyDnMat(matB);
hipsparseDestroyDnMat(matC);
hipsparseDestroy(handle);

hipFree(buffer);
hipFree(dcsr_row_ptr);
hipFree(dcsr_col_ind);
hipFree(dcsr_val);
hipFree(dB);
hipFree(dC);


## hipsparseSpGEMM_createDescr()#

hipsparseStatus_t hipsparseSpGEMM_createDescr(hipsparseSpGEMMDescr_t *descr)#

Description: Create sparse matrix sparse matrix product descriptor.

hipsparseSpGEMM_createDescr creates a sparse matrix sparse matrix product descriptor. It should be destroyed at the end using hipsparseSpGEMM_destroyDescr().

## hipsparseSpGEMM_destroyDescr()#

hipsparseStatus_t hipsparseSpGEMM_destroyDescr(hipsparseSpGEMMDescr_t descr)#

Description: Destroy sparse matrix sparse matrix product descriptor.

hipsparseSpGEMM_destroyDescr destroys a sparse matrix sparse matrix product descriptor and releases all resources used by the descriptor.

## hipsparseSpGEMM_workEstimation()#

hipsparseStatus_t hipsparseSpGEMM_workEstimation(hipsparseHandle_t handle, hipsparseOperation_t opA, hipsparseOperation_t opB, const void *alpha, hipsparseConstSpMatDescr_t matA, hipsparseConstSpMatDescr_t matB, const void *beta, hipsparseSpMatDescr_t matC, hipDataType computeType, hipsparseSpGEMMAlg_t alg, hipsparseSpGEMMDescr_t spgemmDescr, size_t *bufferSize1, void *externalBuffer1)#

Description: Work estimation step of the sparse matrix sparse matrix product C’ = alpha * A * B + beta * C where C’, A, B, C are sparse matrices and C’ and C have the same sparsity pattern.

hipsparseSpGEMM_workEstimation is called twice. We call it to compute the size of the first required user allocated buffer. After this buffer size is determined, the user allocates it and calls hipsparseSpGEMM_workEstimation a second time with the newly allocated buffer passed in. This second call inspects the matrices A and B to determine the number of intermediate products that will result from multipltying A and B together.

Example (See full example below)
void*  dBuffer1  = NULL;
size_t bufferSize1 = 0;

hipsparseSpGEMMDescr_t spgemmDesc;
hipsparseSpGEMM_createDescr(&spgemmDesc);

size_t bufferSize1 = 0;
hipsparseSpGEMM_workEstimation(handle, opA, opB,
&alpha, matA, matB, &beta, matC,
computeType, HIPSPARSE_SPGEMM_DEFAULT,
spgemmDesc, &bufferSize1, NULL);
hipMalloc((void**) &dBuffer1, bufferSize1);

// Determine number of intermediate product when computing A * B
hipsparseSpGEMM_workEstimation(handle, opA, opB,
&alpha, matA, matB, &beta, matC,
computeType, HIPSPARSE_SPGEMM_DEFAULT,
spgemmDesc, &bufferSize1, dBuffer1);


## hipsparseSpGEMM_compute()#

hipsparseStatus_t hipsparseSpGEMM_compute(hipsparseHandle_t handle, hipsparseOperation_t opA, hipsparseOperation_t opB, const void *alpha, hipsparseConstSpMatDescr_t matA, hipsparseConstSpMatDescr_t matB, const void *beta, hipsparseSpMatDescr_t matC, hipDataType computeType, hipsparseSpGEMMAlg_t alg, hipsparseSpGEMMDescr_t spgemmDescr, size_t *bufferSize2, void *externalBuffer2)#

Description: Compute step of the sparse matrix sparse matrix product C’ = alpha * A * B + beta * C where C’, A, B, C are sparse matrices and C’ and C have the same sparsity pattern.

hipsparseSpGEMM_compute is called twice. First to compute the size of the second required user allocated buffer. After this buffer size is determined, the user allocates it and calls hipsparseSpGEMM_compute a second time with the newly allocated buffer passed in. This second call performs the actual computation of C’ = alpha * A * B (the result is stored in the temporary buffers).

Example (See full example below)
void*  dBuffer2  = NULL;
size_t bufferSize2 = 0;

size_t bufferSize2 = 0;
hipsparseSpGEMM_compute(handle, opA, opB,
&alpha, matA, matB, &beta, matC,
computeType, HIPSPARSE_SPGEMM_DEFAULT,
spgemmDesc, &bufferSize2, NULL);
hipMalloc((void**) &dBuffer2, bufferSize2);

// compute the intermediate product of A * B
hipsparseSpGEMM_compute(handle, opA, opB,
&alpha, matA, matB, &beta, matC,
computeType, HIPSPARSE_SPGEMM_DEFAULT,
spgemmDesc, &bufferSize2, dBuffer2);


## hipsparseSpGEMM_copy()#

hipsparseStatus_t hipsparseSpGEMM_copy(hipsparseHandle_t handle, hipsparseOperation_t opA, hipsparseOperation_t opB, const void *alpha, hipsparseConstSpMatDescr_t matA, hipsparseConstSpMatDescr_t matB, const void *beta, hipsparseSpMatDescr_t matC, hipDataType computeType, hipsparseSpGEMMAlg_t alg, hipsparseSpGEMMDescr_t spgemmDescr)#

Description: Copy step of the sparse matrix sparse matrix product C’ = alpha * A * B + beta * C where C’, A, B, C are sparse matrices and C’ and C have the same sparsity pattern.

hipsparseSpGEMM_copy is called once to copy the results (that are currently stored in the temporary arrays) to the output sparse matrix. If beta != 0, then the beta * C portion of the computation: C’ = alpha * A * B + beta * C is handled. This is possible because C’ and C must have the same sparsity pattern.

Example (Full example)
hipsparseHandle_t     handle = NULL;
hipsparseSpMatDescr_t matA, matB, matC;
void*  dBuffer1  = NULL;
void*  dBuffer2  = NULL;
size_t bufferSize1 = 0;
size_t bufferSize2 = 0;

hipsparseCreate(&handle);

// Create sparse matrix A in CSR format
hipsparseCreateCsr(&matA, m, k, nnzA,
dcsr_row_ptrA, dcsr_col_indA, dcsr_valA,
HIPSPARSE_INDEX_32I, HIPSPARSE_INDEX_32I,
HIPSPARSE_INDEX_BASE_ZERO, HIP_R_32F);
hipsparseCreateCsr(&matB, k, n, nnzB,
dcsr_row_ptrB, dcsr_col_indB, dcsr_valB,
HIPSPARSE_INDEX_32I, HIPSPARSE_INDEX_32I,
HIPSPARSE_INDEX_BASE_ZERO, HIP_R_32F);
hipsparseCreateCsr(&matC, m, n, 0,
dcsr_row_ptrC, NULL, NULL,
HIPSPARSE_INDEX_32I, HIPSPARSE_INDEX_32I,
HIPSPARSE_INDEX_BASE_ZERO, HIP_R_32F);

hipsparseSpGEMMDescr_t spgemmDesc;
hipsparseSpGEMM_createDescr(&spgemmDesc);

// Determine size of first user allocated buffer
hipsparseSpGEMM_workEstimation(handle, opA, opB,
&alpha, matA, matB, &beta, matC,
computeType, HIPSPARSE_SPGEMM_DEFAULT,
spgemmDesc, &bufferSize1, NULL);
hipMalloc((void**) &dBuffer1, bufferSize1);

// Inspect the matrices A and B to determine the number of intermediate product in
// C = alpha * A * B
hipsparseSpGEMM_workEstimation(handle, opA, opB,
&alpha, matA, matB, &beta, matC,
computeType, HIPSPARSE_SPGEMM_DEFAULT,
spgemmDesc, &bufferSize1, dBuffer1);

// Determine size of second user allocated buffer
hipsparseSpGEMM_compute(handle, opA, opB,
&alpha, matA, matB, &beta, matC,
computeType, HIPSPARSE_SPGEMM_DEFAULT,
spgemmDesc, &bufferSize2, NULL);
hipMalloc((void**) &dBuffer2, bufferSize2);

// Compute C = alpha * A * B and store result in temporary buffers
hipsparseSpGEMM_compute(handle, opA, opB,
&alpha, matA, matB, &beta, matC,
computeType, HIPSPARSE_SPGEMM_DEFAULT,
spgemmDesc, &bufferSize2, dBuffer2);

// Get matrix C non-zero entries C_nnz1
int64_t C_num_rows1, C_num_cols1, C_nnz1;
hipsparseSpMatGetSize(matC, &C_num_rows1, &C_num_cols1, &C_nnz1);

// Allocate the CSR structures for the matrix C
hipMalloc((void**) &dcsr_col_indC, C_nnz1 * sizeof(int));
hipMalloc((void**) &dcsr_valC,  C_nnz1 * sizeof(float));

// Update matC with the new pointers
hipsparseCsrSetPointers(matC, dcsr_row_ptrC, dcsr_col_indC, dcsr_valC);

// Copy the final products to the matrix C
hipsparseSpGEMM_copy(handle, opA, opB,
&alpha, matA, matB, &beta, matC,
computeType, HIPSPARSE_SPGEMM_DEFAULT, spgemmDesc);

// Destroy matrix descriptors and handles
hipsparseSpGEMM_destroyDescr(spgemmDesc);
hipsparseDestroySpMat(matA);
hipsparseDestroySpMat(matB);
hipsparseDestroySpMat(matC);
hipsparseDestroy(handle);

// Free device memory
hipFree(dBuffer1);
hipFree(dBuffer2);


Note

The two user allocated temporary buffers can only be freed after the call to hipsparseSpGEMM_copy

## hipsparseSpGEMMreuse_workEstimation()#

hipsparseStatus_t hipsparseSpGEMMreuse_workEstimation(hipsparseHandle_t handle, hipsparseOperation_t opA, hipsparseOperation_t opB, hipsparseConstSpMatDescr_t matA, hipsparseConstSpMatDescr_t matB, hipsparseSpMatDescr_t matC, hipsparseSpGEMMAlg_t alg, hipsparseSpGEMMDescr_t spgemmDescr, size_t *bufferSize1, void *externalBuffer1)#

Description: Work estimation step of the sparse matrix sparse matrix product C’ = alpha * A * B + beta * C where C’, A, B, C are sparse matrices and C’ and C have the same sparsity pattern.

hipsparseSpGEMMreuse_workEstimation is called twice. We call it to compute the size of the first required user allocated buffer. After this buffer size is determined, the user allocates it and calls hipsparseSpGEMMreuse_workEstimation a second time with the newly allocated buffer passed in. This second call inspects the matrices A and B to determine the number of intermediate products that will result from multipltying A and B together.

Example (See full example below)
void*  dBuffer1  = NULL;
size_t bufferSize1 = 0;

hipsparseSpGEMMDescr_t spgemmDesc;
hipsparseSpGEMM_createDescr(&spgemmDesc);

size_t bufferSize1 = 0;
hipsparseSpGEMMreuse_workEstimation(handle, opA, opB, matA, matB, matC,
HIPSPARSE_SPGEMM_DEFAULT, spgemmDesc,
&bufferSize1, NULL);
hipMalloc((void**) &dBuffer1, bufferSize1);

// Determine number of intermediate product when computing A * B
hipsparseSpGEMMreuse_workEstimation(handle, opA, opB, matA, matB, matC,
HIPSPARSE_SPGEMM_DEFAULT, spgemmDesc,
&bufferSize1, dBuffer1);


## hipsparseSpGEMMreuse_nnz()#

hipsparseStatus_t hipsparseSpGEMMreuse_nnz(hipsparseHandle_t handle, hipsparseOperation_t opA, hipsparseOperation_t opB, hipsparseConstSpMatDescr_t matA, hipsparseConstSpMatDescr_t matB, hipsparseSpMatDescr_t matC, hipsparseSpGEMMAlg_t alg, hipsparseSpGEMMDescr_t spgemmDescr, size_t *bufferSize2, void *externalBuffer2, size_t *bufferSize3, void *externalBuffer3, size_t *bufferSize4, void *externalBuffer4)#

Description: Nnz calculation step of the sparse matrix sparse matrix product C’ = alpha * A * B + beta * C where C’, A, B, C are sparse matrices and C’ and C have the same sparsity pattern.

Example (See full example below)
// Determine size of second, third, and fourth user allocated buffer
hipsparseSpGEMMreuse_nnz(handle, opA, opB, matA, matB,
matC, HIPSPARSE_SPGEMM_DEFAULT, spgemmDesc,
&bufferSize2, NULL, &bufferSize3, NULL,
&bufferSize4, NULL);

hipMalloc((void**) &dBuffer2, bufferSize2);
hipMalloc((void**) &dBuffer3, bufferSize3);
hipMalloc((void**) &dBuffer4, bufferSize4);

// COmpute sparsity pattern of C matrix and store in temporary buffers
hipsparseSpGEMMreuse_nnz(handle, opA, opB, matA, matB,
matC, HIPSPARSE_SPGEMM_DEFAULT, spgemmDesc,
&bufferSize2, dBuffer2, &bufferSize3, dBuffer3,
&bufferSize4, dBuffer4);

// We can now free buffer 1 and 2
hipFree(dBuffer1);
hipFree(dBuffer2);


## hipsparseSpGEMMreuse_copy()#

hipsparseStatus_t hipsparseSpGEMMreuse_copy(hipsparseHandle_t handle, hipsparseOperation_t opA, hipsparseOperation_t opB, hipsparseConstSpMatDescr_t matA, hipsparseConstSpMatDescr_t matB, hipsparseSpMatDescr_t matC, hipsparseSpGEMMAlg_t alg, hipsparseSpGEMMDescr_t spgemmDescr, size_t *bufferSize5, void *externalBuffer5)#

Description: Copy step of the sparse matrix sparse matrix product C’ = alpha * A * B + beta * C where C’, A, B, C are sparse matrices and C’ and C have the same sparsity pattern.

Example (See full example below)
// Get matrix C non-zero entries nnzC
int64_t rowsC, colsC, nnzC;
hipsparseSpMatGetSize(matC, &rowsC, &colsC, &nnzC);

// Allocate matrix C
hipMalloc((void**) &dcsr_col_indC, sizeof(int) * nnzC);
hipMalloc((void**) &dcsr_valC,  sizeof(float) * nnzC);

// Update matC with the new pointers. The C values array can be filled with data here
// which is used if beta != 0.
hipsparseCsrSetPointers(matC, dcsr_row_ptrC, dcsr_col_indC, dcsr_valC);

// Determine size of fifth user allocated buffer
hipsparseSpGEMMreuse_copy(handle, opA, opB, matA, matB, matC,
HIPSPARSE_SPGEMM_DEFAULT, spgemmDesc,
&bufferSize5, NULL);

hipMalloc((void**) &dBuffer5, bufferSize5);

// Copy data from temporary buffers to the newly allocated C matrix
hipsparseSpGEMMreuse_copy(handle, opA, opB, matA, matB, matC,
HIPSPARSE_SPGEMM_DEFAULT, spgemmDesc,
&bufferSize5, dBuffer5);

// We can now free buffer 3
hipFree(dBuffer3);


## hipsparseSpGEMMreuse_compute()#

hipsparseStatus_t hipsparseSpGEMMreuse_compute(hipsparseHandle_t handle, hipsparseOperation_t opA, hipsparseOperation_t opB, const void *alpha, hipsparseConstSpMatDescr_t matA, hipsparseConstSpMatDescr_t matB, const void *beta, hipsparseSpMatDescr_t matC, hipDataType computeType, hipsparseSpGEMMAlg_t alg, hipsparseSpGEMMDescr_t spgemmDescr)#

Description: Compute step of the sparse matrix sparse matrix product.

Full example
int m = 2;
int k = 2;
int n = 3;
int nnzA = 4;
int nnzB = 4;

float alpha{1.0f};
float beta{0.0f};

hipsparseOperation_t opA        = HIPSPARSE_OPERATION_NON_TRANSPOSE;
hipsparseOperation_t opB        = HIPSPARSE_OPERATION_NON_TRANSPOSE;
hipDataType computeType         = HIP_R_32F;

// A, B, and C are m×k, k×n, and m×n

// A
std::vector<int> hcsr_row_ptrA = {0, 2, 4};
std::vector<int> hcsr_col_indA = {0, 1, 0, 1};
std::vector<float> hcsr_valA = {1.0f, 2.0f, 3.0f, 4.0f};

// B
std::vector<int> hcsr_row_ptrB = {0, 2, 4};
std::vector<int> hcsr_col_indB = {1, 2, 0, 2};
std::vector<float> hcsr_valB = {5.0f , 6.0f, 7.0f, 8.0f};

// Device memory management: Allocate and copy A, B
int* dcsr_row_ptrA;
int* dcsr_col_indA;
float* dcsr_valA;
int* dcsr_row_ptrB;
int* dcsr_col_indB;
float* dcsr_valB;
int* dcsr_row_ptrC;
int* dcsr_col_indC;
float* dcsr_valC;
hipMalloc((void**)&dcsr_row_ptrA, (m + 1) * sizeof(int));
hipMalloc((void**)&dcsr_col_indA, nnzA * sizeof(int));
hipMalloc((void**)&dcsr_valA, nnzA * sizeof(float));
hipMalloc((void**)&dcsr_row_ptrB, (k + 1) * sizeof(int));
hipMalloc((void**)&dcsr_col_indB, nnzB * sizeof(int));
hipMalloc((void**)&dcsr_valB, nnzB * sizeof(float));
hipMalloc((void**)&dcsr_row_ptrC, (m + 1) * sizeof(int));

hipMemcpy(dcsr_row_ptrA, hcsr_row_ptrA.data(), (m + 1) * sizeof(int), hipMemcpyHostToDevice);
hipMemcpy(dcsr_col_indA, hcsr_col_indA.data(), nnzA * sizeof(int), hipMemcpyHostToDevice);
hipMemcpy(dcsr_valA, hcsr_valA.data(), nnzA * sizeof(float), hipMemcpyHostToDevice);
hipMemcpy(dcsr_row_ptrB, hcsr_row_ptrB.data(), (k + 1) * sizeof(int), hipMemcpyHostToDevice);
hipMemcpy(dcsr_col_indB, hcsr_col_indB.data(), nnzB * sizeof(int), hipMemcpyHostToDevice);
hipMemcpy(dcsr_valB, hcsr_valB.data(), nnzB * sizeof(float), hipMemcpyHostToDevice);

hipsparseHandle_t     handle = NULL;
hipsparseSpMatDescr_t matA, matB, matC;
void*  dBuffer1  = NULL;
void*  dBuffer2  = NULL;
void*  dBuffer3  = NULL;
void*  dBuffer4  = NULL;
void*  dBuffer5  = NULL;
size_t bufferSize1 = 0;
size_t bufferSize2 = 0;
size_t bufferSize3 = 0;
size_t bufferSize4 = 0;
size_t bufferSize5 = 0;

hipsparseCreate(&handle);

// Create sparse matrix A in CSR format
hipsparseCreateCsr(&matA, m, k, nnzA,
dcsr_row_ptrA, dcsr_col_indA, dcsr_valA,
HIPSPARSE_INDEX_32I, HIPSPARSE_INDEX_32I,
HIPSPARSE_INDEX_BASE_ZERO, HIP_R_32F);
hipsparseCreateCsr(&matB, k, n, nnzB,
dcsr_row_ptrB, dcsr_col_indB, dcsr_valB,
HIPSPARSE_INDEX_32I, HIPSPARSE_INDEX_32I,
HIPSPARSE_INDEX_BASE_ZERO, HIP_R_32F);
hipsparseCreateCsr(&matC, m, n, 0,
dcsr_row_ptrC, NULL, NULL,
HIPSPARSE_INDEX_32I, HIPSPARSE_INDEX_32I,
HIPSPARSE_INDEX_BASE_ZERO, HIP_R_32F);

hipsparseSpGEMMDescr_t spgemmDesc;
hipsparseSpGEMM_createDescr(&spgemmDesc);

// Determine size of first user allocated buffer
hipsparseSpGEMMreuse_workEstimation(handle, opA, opB, matA, matB, matC,
HIPSPARSE_SPGEMM_DEFAULT, spgemmDesc,
&bufferSize1, NULL);

hipMalloc((void**) &dBuffer1, bufferSize1);

// Inspect the matrices A and B to determine the number of intermediate product in
// C = alpha * A * B
hipsparseSpGEMMreuse_workEstimation(handle, opA, opB, matA, matB, matC,
HIPSPARSE_SPGEMM_DEFAULT, spgemmDesc,
&bufferSize1, dBuffer1);

// Determine size of second, third, and fourth user allocated buffer
hipsparseSpGEMMreuse_nnz(handle, opA, opB, matA, matB,
matC, HIPSPARSE_SPGEMM_DEFAULT, spgemmDesc,
&bufferSize2, NULL, &bufferSize3, NULL,
&bufferSize4, NULL);

hipMalloc((void**) &dBuffer2, bufferSize2);
hipMalloc((void**) &dBuffer3, bufferSize3);
hipMalloc((void**) &dBuffer4, bufferSize4);

// COmpute sparsity pattern of C matrix and store in temporary buffers
hipsparseSpGEMMreuse_nnz(handle, opA, opB, matA, matB,
matC, HIPSPARSE_SPGEMM_DEFAULT, spgemmDesc,
&bufferSize2, dBuffer2, &bufferSize3, dBuffer3,
&bufferSize4, dBuffer4);

// We can now free buffer 1 and 2
hipFree(dBuffer1);
hipFree(dBuffer2);

// Get matrix C non-zero entries nnzC
int64_t rowsC, colsC, nnzC;
hipsparseSpMatGetSize(matC, &rowsC, &colsC, &nnzC);

// Allocate matrix C
hipMalloc((void**) &dcsr_col_indC, sizeof(int) * nnzC);
hipMalloc((void**) &dcsr_valC,  sizeof(float) * nnzC);

// Update matC with the new pointers. The C values array can be filled with data here
// which is used if beta != 0.
hipsparseCsrSetPointers(matC, dcsr_row_ptrC, dcsr_col_indC, dcsr_valC);

// Determine size of fifth user allocated buffer
hipsparseSpGEMMreuse_copy(handle, opA, opB, matA, matB, matC,
HIPSPARSE_SPGEMM_DEFAULT, spgemmDesc,
&bufferSize5, NULL);

hipMalloc((void**) &dBuffer5, bufferSize5);

// Copy data from temporary buffers to the newly allocated C matrix
hipsparseSpGEMMreuse_copy(handle, opA, opB, matA, matB, matC,
HIPSPARSE_SPGEMM_DEFAULT, spgemmDesc,
&bufferSize5, dBuffer5);

// We can now free buffer 3
hipFree(dBuffer3);

// Compute C' = alpha * A * B + beta * C
hipsparseSpGEMMreuse_compute(handle, opA, opB, &alpha, matA, matB, &beta,
matC, computeType, HIPSPARSE_SPGEMM_DEFAULT,
spgemmDesc);

// Copy results back to host if required using hipsparseCsrGet...

// Update dcsr_valA, dcsr_valB with new values
for(size_t i = 0; i < hcsr_valA.size(); i++){ hcsr_valA[i] = 1.0f; }
for(size_t i = 0; i < hcsr_valB.size(); i++){ hcsr_valB[i] = 2.0f; }

hipMemcpy(dcsr_valA, hcsr_valA.data(), sizeof(float) * nnzA, hipMemcpyHostToDevice);
hipMemcpy(dcsr_valB, hcsr_valB.data(), sizeof(float) * nnzB, hipMemcpyHostToDevice);

// Compute C' = alpha * A * B + beta * C again with the new A and B values
hipsparseSpGEMMreuse_compute(handle, opA, opB, &alpha, matA, matB, &beta,
matC, computeType, HIPSPARSE_SPGEMM_DEFAULT,
spgemmDesc);

// Copy results back to host if required using hipsparseCsrGet...

// Destroy matrix descriptors and handles
hipsparseSpGEMM_destroyDescr(spgemmDesc);
hipsparseDestroySpMat(matA);
hipsparseDestroySpMat(matB);
hipsparseDestroySpMat(matC);
hipsparseDestroy(handle);

// Free device memory
hipFree(dBuffer4);
hipFree(dBuffer5);
hipFree(dcsr_row_ptrA);
hipFree(dcsr_col_indA);
hipFree(dcsr_valA);
hipFree(dcsr_row_ptrB);
hipFree(dcsr_col_indB);
hipFree(dcsr_valB);
hipFree(dcsr_row_ptrC);
hipFree(dcsr_col_indC);
hipFree(dcsr_valC);


## hipsparseSDDMM_bufferSize()#

hipsparseStatus_t hipsparseSDDMM_bufferSize(hipsparseHandle_t handle, hipsparseOperation_t opA, hipsparseOperation_t opB, const void *alpha, hipsparseConstDnMatDescr_t A, hipsparseConstDnMatDescr_t B, const void *beta, hipsparseSpMatDescr_t C, hipDataType computeType, hipsparseSDDMMAlg_t alg, size_t *bufferSize)#

Description: Calculate the buffer size required for the sampled dense dense matrix multiplication.

hipsparseSDDMM_bufferSize computes the required user allocated buffer size needed when computing the sampled dense dense matrix multiplication

## hipsparseSDDMM_preprocess()#

hipsparseStatus_t hipsparseSDDMM_preprocess(hipsparseHandle_t handle, hipsparseOperation_t opA, hipsparseOperation_t opB, const void *alpha, hipsparseConstDnMatDescr_t A, hipsparseConstDnMatDescr_t B, const void *beta, hipsparseSpMatDescr_t C, hipDataType computeType, hipsparseSDDMMAlg_t alg, void *tempBuffer)#

Description: Preprocess step of the sampled dense dense matrix multiplication.

hipsparseSDDMM_preprocess performs the required preprocessing used when computing the sampled dense dense matrix multiplication

## hipsparseSDDMM()#

hipsparseStatus_t hipsparseSDDMM(hipsparseHandle_t handle, hipsparseOperation_t opA, hipsparseOperation_t opB, const void *alpha, hipsparseConstDnMatDescr_t A, hipsparseConstDnMatDescr_t B, const void *beta, hipsparseSpMatDescr_t C, hipDataType computeType, hipsparseSDDMMAlg_t alg, void *tempBuffer)#

Description: Sampled Dense-Dense Matrix Multiplication.

hipsparseSDDMM multiplies the scalar $$\alpha$$ with the dense $$m \times k$$ matrix $$A$$, the dense $$k \times n$$ matrix $$B$$, filtered by the sparsity pattern of the $$m \times n$$ sparse matrix $$C$$ and adds the result to $$C$$ scaled by $$\beta$$. The final result is stored in the sparse $$m \times n$$ matrix $$C$$, such that

$C := \alpha ( opA(A) \cdot opB(B) ) \cdot spy(C) + \beta C,$
with
$\begin{split} op(A) = \left\{ \begin{array}{ll} A, & \text{if opA == HIPSPARSE_OPERATION_NON_TRANSPOSE} \\ A^T, & \text{if opA == HIPSPARSE_OPERATION_TRANSPOSE} \\ \end{array} \right. \end{split}$
,
$\begin{split} op(B) = \left\{ \begin{array}{ll} B, & \text{if opB == HIPSPARSE_OPERATION_NON_TRANSPOSE} \\ B^T, & \text{if opB == HIPSPARSE_OPERATION_TRANSPOSE} \\ \end{array} \right. \end{split}$
and
$\begin{split} spy(C)_ij = \left\{ \begin{array}{ll} 1 \text{if i == j}, & 0 \text{if i != j} \\ \end{array} \right. \end{split}$

## hipsparseSpSV_createDescr()#

hipsparseStatus_t hipsparseSpSV_createDescr(hipsparseSpSVDescr_t *descr)#

Description: Create sparse matrix triangular solve descriptor.

hipsparseSpGEMM_createDescr creates a sparse matrix triangular solve descriptor. It should be destroyed at the end using hipsparseSpSV_destroyDescr().

## hipsparseSpSV_destroyDescr()#

hipsparseStatus_t hipsparseSpSV_destroyDescr(hipsparseSpSVDescr_t descr)#

Description: Destroy sparse matrix triangular solve descriptor.

hipsparseSpSV_destroyDescr destroys a sparse matrix triangular solve descriptor and releases all resources used by the descriptor.

## hipsparseSpSV_bufferSize()#

hipsparseStatus_t hipsparseSpSV_bufferSize(hipsparseHandle_t handle, hipsparseOperation_t opA, const void *alpha, hipsparseConstSpMatDescr_t matA, hipsparseConstDnVecDescr_t x, const hipsparseDnVecDescr_t y, hipDataType computeType, hipsparseSpSVAlg_t alg, hipsparseSpSVDescr_t spsvDescr, size_t *bufferSize)#

Description: Buffer size step of solution of triangular linear system op(A) * Y = alpha * X, where A is a sparse matrix in CSR storage format, x and Y are dense vectors.

hipsparseSpSV_bufferSize computes the required user allocated buffer size needed when computing the solution of triangular linear system op(A) * Y = alpha * X, where A is a sparse matrix in CSR storage format, x and Y are dense vectors.

## hipsparseSpSV_analysis()#

hipsparseStatus_t hipsparseSpSV_analysis(hipsparseHandle_t handle, hipsparseOperation_t opA, const void *alpha, hipsparseConstSpMatDescr_t matA, hipsparseConstDnVecDescr_t x, const hipsparseDnVecDescr_t y, hipDataType computeType, hipsparseSpSVAlg_t alg, hipsparseSpSVDescr_t spsvDescr, void *externalBuffer)#

Description: Analysis step of solution of triangular linear system op(A) * Y = alpha * X, where A is a sparse matrix in CSR storage format, x and Y are dense vectors.

hipsparseSpSV_analysis performs the required analysis used when computing the solution of triangular linear system op(A) * Y = alpha * X, where A is a sparse matrix in CSR storage format, x and Y are dense vectors.

## hipsparseSpSV_solve()#

hipsparseStatus_t hipsparseSpSV_solve(hipsparseHandle_t handle, hipsparseOperation_t opA, const void *alpha, hipsparseConstSpMatDescr_t matA, hipsparseConstDnVecDescr_t x, const hipsparseDnVecDescr_t y, hipDataType computeType, hipsparseSpSVAlg_t alg, hipsparseSpSVDescr_t spsvDescr)#

Description: Sparse triangular solve.

hipsparseSpSV_solve solves a sparse triangular linear system of a sparse $$m \times m$$ matrix, defined in CSR or COO storage format, a dense solution vector $$y$$ and the right-hand side $$x$$ that is multiplied by $$\alpha$$, such that

$op(A) \cdot y = \alpha \cdot x,$
with
$\begin{split} op(A) = \left\{ \begin{array}{ll} A, & \text{if trans == HIPSPARSE_OPERATION_NON_TRANSPOSE} \\ A^T, & \text{if trans == HIPSPARSE_OPERATION_TRANSPOSE} \\ A^H, & \text{if trans == HIPSPARSE_OPERATION_CONJUGATE_TRANSPOSE} \end{array} \right. \end{split}$

## hipsparseSpSM_createDescr()#

hipsparseStatus_t hipsparseSpSM_createDescr(hipsparseSpSMDescr_t *descr)#

Description: Create sparse matrix triangular solve with multiple rhs descriptor.

hipsparseSpSM_createDescr creates a sparse matrix triangular solve with multiple rhs descriptor. It should be destroyed at the end using hipsparseSpSM_destroyDescr().

## hipsparseSpSM_destroyDescr()#

hipsparseStatus_t hipsparseSpSM_destroyDescr(hipsparseSpSMDescr_t descr)#

Description: Destroy sparse matrix triangular solve with multiple rhs descriptor.

hipsparseSpSM_destroyDescr destroys a sparse matrix triangular solve with multiple rhs descriptor and releases all resources used by the descriptor.

## hipsparseSpSM_bufferSize()#

hipsparseStatus_t hipsparseSpSM_bufferSize(hipsparseHandle_t handle, hipsparseOperation_t opA, hipsparseOperation_t opB, const void *alpha, hipsparseConstSpMatDescr_t matA, hipsparseConstDnMatDescr_t matB, const hipsparseDnMatDescr_t matC, hipDataType computeType, hipsparseSpSMAlg_t alg, hipsparseSpSMDescr_t spsmDescr, size_t *bufferSize)#

Description: Buffer size step of solution of triangular linear system op(A) * C = alpha * op(B), where A is a sparse matrix in CSR storage format, B and C are dense matrices.

hipsparseSpSV_bufferSize computes the required user allocated buffer size needed when computing the solution of triangular linear system op(A) * C = alpha * op(B), where A is a sparse matrix in CSR storage format, B and C are dense matrices.

## hipsparseSpSM_analysis()#

hipsparseStatus_t hipsparseSpSM_analysis(hipsparseHandle_t handle, hipsparseOperation_t opA, hipsparseOperation_t opB, const void *alpha, hipsparseConstSpMatDescr_t matA, hipsparseConstDnMatDescr_t matB, const hipsparseDnMatDescr_t matC, hipDataType computeType, hipsparseSpSMAlg_t alg, hipsparseSpSMDescr_t spsmDescr, void *externalBuffer)#

Description: Analysis step of solution of triangular linear system op(A) * C = alpha * op(B), where A is a sparse matrix in CSR storage format, B and C are dense vectors.

hipsparseSpSV_analysis performs the required analysis used when computing the solution of triangular linear system op(A) * C = alpha * op(B), where A is a sparse matrix in CSR storage format, B and C are dense vectors.

## hipsparseSpSM_solve()#

hipsparseStatus_t hipsparseSpSM_solve(hipsparseHandle_t handle, hipsparseOperation_t opA, hipsparseOperation_t opB, const void *alpha, hipsparseConstSpMatDescr_t matA, hipsparseConstDnMatDescr_t matB, const hipsparseDnMatDescr_t matC, hipDataType computeType, hipsparseSpSMAlg_t alg, hipsparseSpSMDescr_t spsmDescr, void *externalBuffer)#

Description: Sparse triangular system solve.

hipsparseSpSM_solve solves a sparse triangular linear system of a sparse $$m \times m$$ matrix, defined in CSR or COO storage format, a dense solution matrix $$C$$ and the right-hand side $$B$$ that is multiplied by $$\alpha$$, such that

$op(A) \cdot C = \alpha \cdot op(B),$
with
$\begin{split} op(A) = \left\{ \begin{array}{ll} A, & \text{if trans == HIPSPARSE_OPERATION_NON_TRANSPOSE} \\ A^T, & \text{if trans == HIPSPARSE_OPERATION_TRANSPOSE} \\ A^H, & \text{if trans == HIPSPARSE_OPERATION_CONJUGATE_TRANSPOSE} \end{array} \right. \end{split}$
and
$\begin{split} op(B) = \left\{ \begin{array}{ll} B, & \text{if trans_B == HIPSPARSE_OPERATION_NON_TRANSPOSE} \\ B^T, & \text{if trans_B == HIPSPARSE_OPERATION_TRANSPOSE} \\ B^H, & \text{if trans_B == HIPSPARSE_OPERATION_CONJUGATE_TRANSPOSE} \end{array} \right. \end{split}$