Sparse Linear Algebra#

2025-10-17

14 min read time

Applies to Linux

template<typename T>
size_t raft::sparse::linalg::csr_add_calc_inds(
const int *a_ind,
const int *a_indptr,
const T *a_val,
int nnz1,
const int *b_ind,
const int *b_indptr,
const T *b_val,
int nnz2,
int m,
int *out_ind,
cudaStream_t stream
)#

Calculate the CSR row_ind array that would result from summing together two CSR matrices.

Parameters:
  • a_ind – left hand row_ind array

  • a_indptr – left hand index_ptr array

  • a_val – left hand data array

  • nnz1 – size of left hand index_ptr and val arrays

  • b_ind – right hand row_ind array

  • b_indptr – right hand index_ptr array

  • b_val – right hand data array

  • nnz2 – size of right hand index_ptr and val arrays

  • m – size of output array (number of rows in final matrix)

  • out_ind – output row_ind array

  • stream – cuda stream to use

template<typename T>
void raft::sparse::linalg::csr_add_finalize(
const int *a_ind,
const int *a_indptr,
const T *a_val,
int nnz1,
const int *b_ind,
const int *b_indptr,
const T *b_val,
int nnz2,
int m,
int *c_ind,
int *c_indptr,
T *c_val,
cudaStream_t stream
)#

Calculate the CSR row_ind array that would result from summing together two CSR matrices.

Parameters:
  • a_ind – left hand row_ind array

  • a_indptr – left hand index_ptr array

  • a_val – left hand data array

  • nnz1 – size of left hand index_ptr and val arrays

  • b_ind – right hand row_ind array

  • b_indptr – right hand index_ptr array

  • b_val – right hand data array

  • nnz2 – size of right hand index_ptr and val arrays

  • m – size of output array (number of rows in final matrix)

  • c_ind – output row_ind array

  • c_indptr – output ind_ptr array

  • c_val – output data array

  • stream – cuda stream to use

template<typename T = int>
void raft::sparse::linalg::coo_degree(
const T *rows,
int nnz,
T *results,
cudaStream_t stream
)#

Count the number of values for each row.

Template Parameters:

TPB_X – number of threads to use per block

Parameters:
  • rows – rows array of the COO matrix

  • nnz – size of the rows array

  • results – output result array

  • stream – cuda stream to use

template<typename T>
void raft::sparse::linalg::coo_degree(
COO<T> *in,
int *results,
cudaStream_t stream
)#

Count the number of values for each row.

Template Parameters:
  • TPB_X – number of threads to use per block

  • T – type name of underlying values array

Parameters:
  • in – input COO object for counting rows

  • results – output array with row counts (size=in->n_rows)

  • stream – cuda stream to use

template<typename T>
void raft::sparse::linalg::coo_degree_scalar(
const int *rows,
const T *vals,
int nnz,
T scalar,
int *results,
cudaStream_t stream = 0
)#

Count the number of values for each row that doesn’t match a particular scalar.

Template Parameters:
  • TPB_X – number of threads to use per block

  • T – the type name of the underlying value arrays

Parameters:
  • rows – Input COO row array

  • vals – Input COO val arrays

  • nnz – size of input COO arrays

  • scalar – scalar to match for counting rows

  • results – output row counts

  • stream – cuda stream to use

template<typename T>
void raft::sparse::linalg::coo_degree_scalar(
COO<T> *in,
T scalar,
int *results,
cudaStream_t stream
)#

Count the number of values for each row that doesn’t match a particular scalar.

Template Parameters:
  • TPB_X – number of threads to use per block

  • T – the type name of the underlying value arrays

Parameters:
  • in – Input COO array

  • scalar – scalar to match for counting rows

  • results – output row counts

  • stream – cuda stream to use

template<typename T>
void raft::sparse::linalg::coo_degree_nz(
const int *rows,
const T *vals,
int nnz,
int *results,
cudaStream_t stream
)#

Count the number of nonzeros for each row.

Template Parameters:
  • TPB_X – number of threads to use per block

  • T – the type name of the underlying value arrays

Parameters:
  • rows – Input COO row array

  • vals – Input COO val arrays

  • nnz – size of input COO arrays

  • results – output row counts

  • stream – cuda stream to use

template<typename T>
void raft::sparse::linalg::coo_degree_nz(
COO<T> *in,
int *results,
cudaStream_t stream
)#

Count the number of nonzero values for each row.

Template Parameters:
  • TPB_X – number of threads to use per block

  • T – the type name of the underlying value arrays

Parameters:
  • in – Input COO array

  • results – output row counts

  • stream – cuda stream to use

template<typename value_t, typename output_t, typename index_t, typename nnz_t, typename bitmap_t>
void raft::sparse::linalg::masked_matmul(
raft::resources const &handle,
raft::device_matrix_view<const value_t, index_t, raft::row_major> A,
raft::device_matrix_view<const value_t, index_t, raft::row_major> B,
raft::core::bitmap_view<bitmap_t, index_t> mask,
raft::device_csr_matrix_view<output_t, index_t, index_t, nnz_t> C,
std::optional<raft::host_scalar_view<output_t>> alpha = std::nullopt,
std::optional<raft::host_scalar_view<output_t>> beta = std::nullopt
)#

Performs a masked multiplication of dense matrices A and B, followed by an element-wise multiplication with the sparsity pattern defined by the mask, resulting in the computation C = alpha * ((A * B) ∘ spy(mask)) + beta * C.

This function multiplies two dense matrices A and B, and then applies an element-wise multiplication using the sparsity pattern provided by the mask. The result is scaled by alpha and added to beta times the original matrix C.

Template Parameters:
  • value_t – Data type of elements in the input matrices (e.g., half, float, double)

  • output_t – Data type of elements in the output matrices (e.g., float, double)

  • index_t – Type used for matrix indices

  • nnz_t – Type used for the number of non-zero entries in CSR format

  • bitmap_t – Type of the bitmap used for the mask

Parameters:
  • handle[in] RAFT handle for resource management

  • A[in] Input dense matrix (device_matrix_view) with shape [m, k]

  • B[in] Input dense matrix (device_matrix_view) with shape [n, k]

  • mask[in] Bitmap view representing the sparsity pattern (bitmap_view) with logical shape [m, n]. Each bit in the mask indicates whether the corresponding element pair in A and B is included (1) or masked out (0).

  • C[inout] Output sparse matrix in CSR format (device_csr_matrix_view) with dense shape [m, n]

  • alpha[in] Optional scalar multiplier for the product of A and B (default: 1.0 if std::nullopt)

  • beta[in] Optional scalar multiplier for the original matrix C (default: 0 if std::nullopt)

template<typename value_t, typename output_t, typename index_t, typename nnz_t, typename bitset_t>
void raft::sparse::linalg::masked_matmul(
raft::resources const &handle,
raft::device_matrix_view<const value_t, index_t, raft::row_major> A,
raft::device_matrix_view<const value_t, index_t, raft::row_major> B,
raft::core::bitset_view<bitset_t, index_t> mask,
raft::device_csr_matrix_view<output_t, index_t, index_t, nnz_t> C,
std::optional<raft::host_scalar_view<output_t>> alpha = std::nullopt,
std::optional<raft::host_scalar_view<output_t>> beta = std::nullopt
)#

Computes a sparse matrix product with a masked sparsity pattern and scaling.

This function computes the result of: C = alpha * ((A * B) ∘ spy(mask)) + beta * C where:

  • A and B are dense input matrices.

  • ”mask” defines the sparsity pattern for element-wise multiplication.

  • The result is scaled by alpha and added to beta times the original C.

Special behavior of the mask:

  • The bitset mask represents a single row of data, with its bits indicating whether each corresponding element in (A * B) is included (1) or masked out (0).

  • If the output CSR matrix C has multiple rows, the bitset is logically repeated across all rows of C. For example, if C has n_rows rows, the same bitset pattern is applied to all rows.

Template Parameters:
  • value_t – Data type of input matrix elements (e.g., half, float, double).

  • output_t – Data type of output matrix elements (e.g., float, double).

  • index_t – Type for matrix indices.

  • nnz_t – Type for non-zero entries in CSR format.

  • bitset_t – Type for the bitset mask.

Parameters:
  • handle[in] RAFT handle for managing resources.

  • A[in] Dense input matrix [m, k] (row-major).

  • B[in] Dense input matrix [n, k] (row-major).

  • mask[in] Bitmap view representing a single row [1, n], where each bit indicates if the corresponding element in (A * B) is included (1) or masked out (0). The pattern is repeated for all rows of C.

  • C[inout] Output sparse matrix in CSR format [m, n].

  • alpha[in] Scalar multiplier for (A * B) (default: 1.0 if std::nullopt).

  • beta[in] Scalar multiplier for the initial C (default: 0 if std::nullopt).

template<typename T>
void raft::sparse::linalg::csr_row_normalize_l1(
const int *ia,
const T *vals,
int nnz,
int m,
T *result,
cudaStream_t stream
)#

Perform L1 normalization on the rows of a given CSR-formatted sparse matrix.

Parameters:
  • ia – row_ind array

  • vals – data array

  • nnz – size of data array

  • m – size of row_ind array

  • result – l1 normalized data array

  • stream – cuda stream to use

template<typename T>
void raft::sparse::linalg::csr_row_normalize_max(
const int *ia,
const T *vals,
int nnz,
int m,
T *result,
cudaStream_t stream
)#

Perform L_inf normalization on a given CSR-formatted sparse matrix.

Parameters:
  • ia – row_ind array

  • vals – data array

  • nnz – size of data array

  • m – size of row_ind array

  • result – l1 normalized data array

  • stream – cuda stream to use

template<typename Type, typename IdxType = int, typename Lambda = raft::identity_op>
void raft::sparse::linalg::rowNormCsr(
raft::resources const &handle,
const IdxType *ia,
const Type *data,
const IdxType nnz,
const IdxType N,
Type *norm,
raft::linalg::NormType type,
Lambda fin_op = raft::identity_op()
)#

Compute row-wise norm of the input matrix and perform fin_op lambda.

Row-wise norm is useful while computing pairwise distance matrix, for example. This is used in many clustering algos like knn, kmeans, dbscan, etc…

Template Parameters:
  • Type – the data type

  • Lambda – device final lambda

  • IdxType – Integer type used to for addressing

Parameters:
  • handle – raft handle

  • ia – the input matrix row index array

  • data – the input matrix nnz data

  • nnz – number of elements in data

  • N – number of rows

  • norm – the output vector of row-wise norm, size [N]

  • type – the type of norm to be applied

  • fin_op – the final lambda op

template<typename ValueType, typename IndexType, typename NZType, typename LayoutPolicyA, typename LayoutPolicyB, typename OutputType>
void raft::sparse::linalg::sddmm(
raft::resources const &handle,
raft::device_matrix_view<const ValueType, IndexType, LayoutPolicyA> A,
raft::device_matrix_view<const ValueType, IndexType, LayoutPolicyB> B,
raft::device_csr_matrix_view<OutputType, IndexType, IndexType, NZType> C,
const raft::linalg::Operation opA,
const raft::linalg::Operation opB,
raft::host_scalar_view<OutputType> alpha,
raft::host_scalar_view<OutputType> beta
)#

This function performs the multiplication of dense matrix A and dense matrix B, followed by an element-wise multiplication with the sparsity pattern of C. It computes the following equation: C = alpha · (opA(A) * opB(B) ∘ spy(C)) + beta · C where A,B are device matrix views and C is a CSR device matrix view.

Template Parameters:
  • ValueType – Data type of input/output matrices (float/double/half)

  • IndexType – Type of C

  • NZType – Type of C

  • LayoutPolicyA – layout of A

  • LayoutPolicyB – layout of B

  • OutputType – output type, equal to ValueType by default

Parameters:
template<typename ValueType, typename IndexType, typename NZType, typename LayoutPolicyY, typename LayoutPolicyZ>
void raft::sparse::linalg::spmm(
raft::resources const &handle,
const bool trans_x,
const bool trans_y,
const ValueType *alpha,
raft::device_csr_matrix_view<const ValueType, int, int, NZType> x,
raft::device_matrix_view<const ValueType, IndexType, LayoutPolicyY> y,
const ValueType *beta,
raft::device_matrix_view<ValueType, IndexType, LayoutPolicyZ> z
)#

SPMM function designed for handling all CSR * DENSE combinations of operand layouts for cuSparse. It computes the following equation: Z = alpha . X * Y + beta . Z where X is a CSR device matrix view and Y,Z are device matrix views.

Template Parameters:
  • ValueType – Data type of input/output matrices (float/double)

  • IndexType – Type of Y and Z

  • NZType – Type of X

  • LayoutPolicyY – layout of Y

  • LayoutPolicyZ – layout of Z

Parameters:
template<typename T, typename Lambda>
void raft::sparse::linalg::coo_symmetrize(
COO<T> *in,
COO<T> *out,
Lambda reduction_op,
cudaStream_t stream
)#

takes a COO matrix which may not be symmetric and symmetrizes it, running a custom reduction function against the each value and its transposed value.

Parameters:
  • in – Input COO matrix

  • out – Output symmetrized COO matrix

  • reduction_op – a custom reduction function

  • stream – cuda stream to use

template<typename value_idx = int64_t, typename value_t = float> RAFT_KERNEL raft::sparse::linalg::symmetric_find_size (const value_t __restrict__ *data, const value_idx __restrict__ *indices, const value_idx n, const int k, value_idx __restrict__ *row_sizes, value_idx __restrict__ *row_sizes2)

Find how much space needed in each row. We look through all datapoints and increment the count for each row.

TODO: This isn’t generalized. Remove in place of symmetrize()

Parameters:
  • data – Input knn distances(n, k)

  • indices – Input knn indices(n, k)

  • n – Number of rows

  • k – Number of n_neighbors

  • row_sizes – Input empty row sum 1 array(n)

  • row_sizes2 – Input empty row sum 2 array(n) for faster reduction

template<typename value_idx> RAFT_KERNEL raft::sparse::linalg::reduce_find_size (const value_idx n, const int k, value_idx __restrict__ *row_sizes, const value_idx __restrict__ *row_sizes2)

Reduce sum(row_sizes) + k Reduction for symmetric_find_size kernel. Allows algo to be faster.

TODO: This isn’t generalized. Remove in place of symmetrize()

Parameters:
  • n – Number of rows

  • k – Number of n_neighbors

  • row_sizes – Input row sum 1 array(n)

  • row_sizes2 – Input row sum 2 array(n) for faster reduction

template<typename value_idx = int64_t, typename value_t = float> RAFT_KERNEL raft::sparse::linalg::symmetric_sum (value_idx *__restrict__ edges, const value_t *__restrict__ data, const value_idx *__restrict__ indices, value_t *__restrict__ VAL, value_idx *__restrict__ COL, value_idx *__restrict__ ROW, const value_idx n, const int k)

Perform data + data.T operation. Can only run once row_sizes from the CSR matrix of data + data.T has been determined.

TODO: This isn’t generalized. Remove in place of symmetrize()

Parameters:
  • edges – Input row sum array(n) after reduction

  • data – Input knn distances(n, k)

  • indices – Input knn indices(n, k)

  • VAL – Output values for data + data.T

  • COL – Output column indices for data + data.T

  • ROW – Output row indices for data + data.T

  • n – Number of rows

  • k – Number of n_neighbors

template<typename value_idx = int64_t, typename value_t = float, int TPB_X = 32, int TPB_Y = 32> void raft::sparse::linalg::from_knn_symmetrize_matrix (const value_idx *__restrict__ knn_indices, const value_t *__restrict__ knn_dists, const value_idx n, const int k, COO< value_t, value_idx > *out, cudaStream_t stream)

Perform data + data.T on raw KNN data. The following steps are invoked: (1) Find how much space needed in each row (2) Compute final space needed (n*k + sum(row_sizes)) == 2*n*k (3) Allocate new space (4) Prepare edges for each new row (5) Perform final data + data.T operation (6) Return summed up VAL, COL, ROW.

TODO: This isn’t generalized. Remove in place of symmetrize()

Parameters:
  • knn_indices – Input knn distances(n, k)

  • knn_dists – Input knn indices(n, k)

  • n – Number of rows

  • k – Number of n_neighbors

  • out – Output COO Matrix class

  • stream – Input cuda stream

template<typename value_idx, typename value_t>
void raft::sparse::linalg::symmetrize(
raft::resources const &handle,
const value_idx *rows,
const value_idx *cols,
const value_t *vals,
size_t m,
size_t n,
size_t nnz,
raft::sparse::COO<value_t, value_idx> &out
)#

Symmetrizes a COO matrix

template<typename value_idx, typename value_t>
void raft::sparse::linalg::csr_transpose(
raft::resources const &handle,
const value_idx *csr_indptr,
const value_idx *csr_indices,
const value_t *csr_data,
value_idx *csc_indptr,
value_idx *csc_indices,
value_t *csc_data,
value_idx csr_nrows,
value_idx csr_ncols,
value_idx nnz,
cudaStream_t stream
)#

Transpose a set of CSR arrays into a set of CSC arrays.

Template Parameters:
  • value_idx – : data type of the CSR index arrays

  • value_t – : data type of the CSR data array

Parameters:
  • handle[in] : used for invoking cusparse

  • csr_indptr[in] : CSR row index array

  • csr_indices[in] : CSR column indices array

  • csr_data[in] : CSR data array

  • csc_indptr[out] : CSC row index array

  • csc_indices[out] : CSC column indices array

  • csc_data[out] : CSC data array

  • csr_nrows[in] : Number of rows in CSR

  • csr_ncols[in] : Number of columns in CSR

  • nnz[in] : Number of nonzeros of CSR

  • stream[in] : Cuda stream for ordering events