Sparse Linear Algebra#
2025-10-17
14 min read time
-
template<typename T>
size_t raft::sparse::linalg::csr_add_calc_inds( - const int *a_ind,
- const int *a_indptr,
- const T *a_val,
- int nnz1,
- const int *b_ind,
- const int *b_indptr,
- const T *b_val,
- int nnz2,
- int m,
- int *out_ind,
- cudaStream_t stream
Calculate the CSR row_ind array that would result from summing together two CSR matrices.
- Parameters:
a_ind – left hand row_ind array
a_indptr – left hand index_ptr array
a_val – left hand data array
nnz1 – size of left hand index_ptr and val arrays
b_ind – right hand row_ind array
b_indptr – right hand index_ptr array
b_val – right hand data array
nnz2 – size of right hand index_ptr and val arrays
m – size of output array (number of rows in final matrix)
out_ind – output row_ind array
stream – cuda stream to use
-
template<typename T>
void raft::sparse::linalg::csr_add_finalize( - const int *a_ind,
- const int *a_indptr,
- const T *a_val,
- int nnz1,
- const int *b_ind,
- const int *b_indptr,
- const T *b_val,
- int nnz2,
- int m,
- int *c_ind,
- int *c_indptr,
- T *c_val,
- cudaStream_t stream
Calculate the CSR row_ind array that would result from summing together two CSR matrices.
- Parameters:
a_ind – left hand row_ind array
a_indptr – left hand index_ptr array
a_val – left hand data array
nnz1 – size of left hand index_ptr and val arrays
b_ind – right hand row_ind array
b_indptr – right hand index_ptr array
b_val – right hand data array
nnz2 – size of right hand index_ptr and val arrays
m – size of output array (number of rows in final matrix)
c_ind – output row_ind array
c_indptr – output ind_ptr array
c_val – output data array
stream – cuda stream to use
-
template<typename T = int>
void raft::sparse::linalg::coo_degree(
)# Count the number of values for each row.
- Template Parameters:
TPB_X – number of threads to use per block
- Parameters:
rows – rows array of the COO matrix
nnz – size of the rows array
results – output result array
stream – cuda stream to use
-
template<typename T>
void raft::sparse::linalg::coo_degree( - COO<T> *in,
- int *results,
- cudaStream_t stream
Count the number of values for each row.
- Template Parameters:
TPB_X – number of threads to use per block
T – type name of underlying values array
- Parameters:
in – input COO object for counting rows
results – output array with row counts (size=in->n_rows)
stream – cuda stream to use
-
template<typename T>
void raft::sparse::linalg::coo_degree_scalar(
)# Count the number of values for each row that doesn’t match a particular scalar.
- Template Parameters:
TPB_X – number of threads to use per block
T – the type name of the underlying value arrays
- Parameters:
rows – Input COO row array
vals – Input COO val arrays
nnz – size of input COO arrays
scalar – scalar to match for counting rows
results – output row counts
stream – cuda stream to use
-
template<typename T>
void raft::sparse::linalg::coo_degree_scalar(
)# Count the number of values for each row that doesn’t match a particular scalar.
- Template Parameters:
TPB_X – number of threads to use per block
T – the type name of the underlying value arrays
- Parameters:
in – Input COO array
scalar – scalar to match for counting rows
results – output row counts
stream – cuda stream to use
-
template<typename T>
void raft::sparse::linalg::coo_degree_nz( - const int *rows,
- const T *vals,
- int nnz,
- int *results,
- cudaStream_t stream
Count the number of nonzeros for each row.
- Template Parameters:
TPB_X – number of threads to use per block
T – the type name of the underlying value arrays
- Parameters:
rows – Input COO row array
vals – Input COO val arrays
nnz – size of input COO arrays
results – output row counts
stream – cuda stream to use
-
template<typename T>
void raft::sparse::linalg::coo_degree_nz( - COO<T> *in,
- int *results,
- cudaStream_t stream
Count the number of nonzero values for each row.
- Template Parameters:
TPB_X – number of threads to use per block
T – the type name of the underlying value arrays
- Parameters:
in – Input COO array
results – output row counts
stream – cuda stream to use
-
template<typename value_t, typename output_t, typename index_t, typename nnz_t, typename bitmap_t>
void raft::sparse::linalg::masked_matmul( - raft::resources const &handle,
- raft::device_matrix_view<const value_t, index_t, raft::row_major> A,
- raft::device_matrix_view<const value_t, index_t, raft::row_major> B,
- raft::core::bitmap_view<bitmap_t, index_t> mask,
- raft::device_csr_matrix_view<output_t, index_t, index_t, nnz_t> C,
- std::optional<raft::host_scalar_view<output_t>> alpha = std::nullopt,
- std::optional<raft::host_scalar_view<output_t>> beta = std::nullopt
Performs a masked multiplication of dense matrices A and B, followed by an element-wise multiplication with the sparsity pattern defined by the mask, resulting in the computation C = alpha * ((A * B) ∘ spy(mask)) + beta * C.
This function multiplies two dense matrices A and B, and then applies an element-wise multiplication using the sparsity pattern provided by the mask. The result is scaled by alpha and added to beta times the original matrix C.
- Template Parameters:
value_t – Data type of elements in the input matrices (e.g., half, float, double)
output_t – Data type of elements in the output matrices (e.g., float, double)
index_t – Type used for matrix indices
nnz_t – Type used for the number of non-zero entries in CSR format
bitmap_t – Type of the bitmap used for the mask
- Parameters:
handle – [in] RAFT handle for resource management
A – [in] Input dense matrix (device_matrix_view) with shape [m, k]
B – [in] Input dense matrix (device_matrix_view) with shape [n, k]
mask – [in] Bitmap view representing the sparsity pattern (bitmap_view) with logical shape [m, n]. Each bit in the mask indicates whether the corresponding element pair in A and B is included (1) or masked out (0).
C – [inout] Output sparse matrix in CSR format (device_csr_matrix_view) with dense shape [m, n]
alpha – [in] Optional scalar multiplier for the product of A and B (default: 1.0 if std::nullopt)
beta – [in] Optional scalar multiplier for the original matrix C (default: 0 if std::nullopt)
-
template<typename value_t, typename output_t, typename index_t, typename nnz_t, typename bitset_t>
void raft::sparse::linalg::masked_matmul( - raft::resources const &handle,
- raft::device_matrix_view<const value_t, index_t, raft::row_major> A,
- raft::device_matrix_view<const value_t, index_t, raft::row_major> B,
- raft::core::bitset_view<bitset_t, index_t> mask,
- raft::device_csr_matrix_view<output_t, index_t, index_t, nnz_t> C,
- std::optional<raft::host_scalar_view<output_t>> alpha = std::nullopt,
- std::optional<raft::host_scalar_view<output_t>> beta = std::nullopt
Computes a sparse matrix product with a masked sparsity pattern and scaling.
This function computes the result of: C = alpha * ((A * B) ∘ spy(mask)) + beta * C where:
A and B are dense input matrices.
”mask” defines the sparsity pattern for element-wise multiplication.
The result is scaled by alpha and added to beta times the original C.
Special behavior of the mask:
The
bitsetmask represents a single row of data, with its bits indicating whether each corresponding element in (A * B) is included (1) or masked out (0).If the output CSR matrix
Chas multiple rows, thebitsetis logically repeated across all rows ofC. For example, ifChasn_rowsrows, the samebitsetpattern is applied to all rows.
- Template Parameters:
value_t – Data type of input matrix elements (e.g., half, float, double).
output_t – Data type of output matrix elements (e.g., float, double).
index_t – Type for matrix indices.
nnz_t – Type for non-zero entries in CSR format.
bitset_t – Type for the bitset mask.
- Parameters:
handle – [in] RAFT handle for managing resources.
A – [in] Dense input matrix [m, k] (row-major).
B – [in] Dense input matrix [n, k] (row-major).
mask – [in] Bitmap view representing a single row [1, n], where each bit indicates if the corresponding element in (A * B) is included (1) or masked out (0). The pattern is repeated for all rows of
C.C – [inout] Output sparse matrix in CSR format [m, n].
alpha – [in] Scalar multiplier for (A * B) (default: 1.0 if std::nullopt).
beta – [in] Scalar multiplier for the initial C (default: 0 if std::nullopt).
-
template<typename T>
void raft::sparse::linalg::csr_row_normalize_l1(
)# Perform L1 normalization on the rows of a given CSR-formatted sparse matrix.
- Parameters:
ia – row_ind array
vals – data array
nnz – size of data array
m – size of row_ind array
result – l1 normalized data array
stream – cuda stream to use
-
template<typename T>
void raft::sparse::linalg::csr_row_normalize_max(
)# Perform L_inf normalization on a given CSR-formatted sparse matrix.
- Parameters:
ia – row_ind array
vals – data array
nnz – size of data array
m – size of row_ind array
result – l1 normalized data array
stream – cuda stream to use
-
template<typename Type, typename IdxType = int, typename Lambda = raft::identity_op>
void raft::sparse::linalg::rowNormCsr( - raft::resources const &handle,
- const IdxType *ia,
- const Type *data,
- const IdxType nnz,
- const IdxType N,
- Type *norm,
- raft::linalg::NormType type,
- Lambda fin_op = raft::identity_op()
Compute row-wise norm of the input matrix and perform fin_op lambda.
Row-wise norm is useful while computing pairwise distance matrix, for example. This is used in many clustering algos like knn, kmeans, dbscan, etc…
- Template Parameters:
Type – the data type
Lambda – device final lambda
IdxType – Integer type used to for addressing
- Parameters:
handle – raft handle
ia – the input matrix row index array
data – the input matrix nnz data
nnz – number of elements in data
N – number of rows
norm – the output vector of row-wise norm, size [N]
type – the type of norm to be applied
fin_op – the final lambda op
-
template<typename ValueType, typename IndexType, typename NZType, typename LayoutPolicyA, typename LayoutPolicyB, typename OutputType>
void raft::sparse::linalg::sddmm( - raft::resources const &handle,
- raft::device_matrix_view<const ValueType, IndexType, LayoutPolicyA> A,
- raft::device_matrix_view<const ValueType, IndexType, LayoutPolicyB> B,
- raft::device_csr_matrix_view<OutputType, IndexType, IndexType, NZType> C,
- const raft::linalg::Operation opA,
- const raft::linalg::Operation opB,
- raft::host_scalar_view<OutputType> alpha,
- raft::host_scalar_view<OutputType> beta
This function performs the multiplication of dense matrix A and dense matrix B, followed by an element-wise multiplication with the sparsity pattern of C. It computes the following equation: C = alpha · (opA(A) * opB(B) ∘ spy(C)) + beta · C where A,B are device matrix views and C is a CSR device matrix view.
- Template Parameters:
ValueType – Data type of input/output matrices (float/double/half)
IndexType – Type of C
NZType – Type of C
LayoutPolicyA – layout of A
LayoutPolicyB – layout of B
OutputType – output type, equal to ValueType by default
- Parameters:
handle – [in] raft handle
A – [in] input raft::device_matrix_view
B – [in] input raft::device_matrix_view
C – [inout] output raft::device_csr_matrix_view
opA – [in] input Operation op(A)
opB – [in] input Operation op(B)
alpha – [in] input raft::host_scalar_view
beta – [in] input raft::host_scalar_view
-
template<typename ValueType, typename IndexType, typename NZType, typename LayoutPolicyY, typename LayoutPolicyZ>
void raft::sparse::linalg::spmm( - raft::resources const &handle,
- const bool trans_x,
- const bool trans_y,
- const ValueType *alpha,
- raft::device_csr_matrix_view<const ValueType, int, int, NZType> x,
- raft::device_matrix_view<const ValueType, IndexType, LayoutPolicyY> y,
- const ValueType *beta,
- raft::device_matrix_view<ValueType, IndexType, LayoutPolicyZ> z
SPMM function designed for handling all CSR * DENSE combinations of operand layouts for cuSparse. It computes the following equation: Z = alpha . X * Y + beta . Z where X is a CSR device matrix view and Y,Z are device matrix views.
- Template Parameters:
ValueType – Data type of input/output matrices (float/double)
IndexType – Type of Y and Z
NZType – Type of X
LayoutPolicyY – layout of Y
LayoutPolicyZ – layout of Z
- Parameters:
handle – [in] raft handle
trans_x – [in] transpose operation for X
trans_y – [in] transpose operation for Y
alpha – [in] scalar
x – [in] input raft::device_csr_matrix_view
y – [in] input raft::device_matrix_view
beta – [in] scalar
z – [inout] input-output raft::device_matrix_view
-
template<typename T, typename Lambda>
void raft::sparse::linalg::coo_symmetrize(
)# takes a COO matrix which may not be symmetric and symmetrizes it, running a custom reduction function against the each value and its transposed value.
- Parameters:
in – Input COO matrix
out – Output symmetrized COO matrix
reduction_op – a custom reduction function
stream – cuda stream to use
- template<typename value_idx = int64_t, typename value_t = float> RAFT_KERNEL raft::sparse::linalg::symmetric_find_size (const value_t __restrict__ *data, const value_idx __restrict__ *indices, const value_idx n, const int k, value_idx __restrict__ *row_sizes, value_idx __restrict__ *row_sizes2)
Find how much space needed in each row. We look through all datapoints and increment the count for each row.
TODO: This isn’t generalized. Remove in place of
symmetrize()- Parameters:
data – Input knn distances(n, k)
indices – Input knn indices(n, k)
n – Number of rows
k – Number of n_neighbors
row_sizes – Input empty row sum 1 array(n)
row_sizes2 – Input empty row sum 2 array(n) for faster reduction
- template<typename value_idx> RAFT_KERNEL raft::sparse::linalg::reduce_find_size (const value_idx n, const int k, value_idx __restrict__ *row_sizes, const value_idx __restrict__ *row_sizes2)
Reduce sum(row_sizes) + k Reduction for symmetric_find_size kernel. Allows algo to be faster.
TODO: This isn’t generalized. Remove in place of
symmetrize()- Parameters:
n – Number of rows
k – Number of n_neighbors
row_sizes – Input row sum 1 array(n)
row_sizes2 – Input row sum 2 array(n) for faster reduction
- template<typename value_idx = int64_t, typename value_t = float> RAFT_KERNEL raft::sparse::linalg::symmetric_sum (value_idx *__restrict__ edges, const value_t *__restrict__ data, const value_idx *__restrict__ indices, value_t *__restrict__ VAL, value_idx *__restrict__ COL, value_idx *__restrict__ ROW, const value_idx n, const int k)
Perform data + data.T operation. Can only run once row_sizes from the CSR matrix of data + data.T has been determined.
TODO: This isn’t generalized. Remove in place of
symmetrize()- Parameters:
edges – Input row sum array(n) after reduction
data – Input knn distances(n, k)
indices – Input knn indices(n, k)
VAL – Output values for data + data.T
COL – Output column indices for data + data.T
ROW – Output row indices for data + data.T
n – Number of rows
k – Number of n_neighbors
- template<typename value_idx = int64_t, typename value_t = float, int TPB_X = 32, int TPB_Y = 32> void raft::sparse::linalg::from_knn_symmetrize_matrix (const value_idx *__restrict__ knn_indices, const value_t *__restrict__ knn_dists, const value_idx n, const int k, COO< value_t, value_idx > *out, cudaStream_t stream)
Perform data + data.T on raw KNN data. The following steps are invoked: (1) Find how much space needed in each row (2) Compute final space needed (n*k + sum(row_sizes)) == 2*n*k (3) Allocate new space (4) Prepare edges for each new row (5) Perform final data + data.T operation (6) Return summed up VAL, COL, ROW.
TODO: This isn’t generalized. Remove in place of
symmetrize()- Parameters:
knn_indices – Input knn distances(n, k)
knn_dists – Input knn indices(n, k)
n – Number of rows
k – Number of n_neighbors
out – Output COO Matrix class
stream – Input cuda stream
-
template<typename value_idx, typename value_t>
void raft::sparse::linalg::symmetrize( - raft::resources const &handle,
- const value_idx *rows,
- const value_idx *cols,
- const value_t *vals,
- size_t m,
- size_t n,
- size_t nnz,
- raft::sparse::COO<value_t, value_idx> &out
Symmetrizes a COO matrix
-
template<typename value_idx, typename value_t>
void raft::sparse::linalg::csr_transpose( - raft::resources const &handle,
- const value_idx *csr_indptr,
- const value_idx *csr_indices,
- const value_t *csr_data,
- value_idx *csc_indptr,
- value_idx *csc_indices,
- value_t *csc_data,
- value_idx csr_nrows,
- value_idx csr_ncols,
- value_idx nnz,
- cudaStream_t stream
Transpose a set of CSR arrays into a set of CSC arrays.
- Template Parameters:
value_idx – : data type of the CSR index arrays
value_t – : data type of the CSR data array
- Parameters:
handle – [in] : used for invoking cusparse
csr_indptr – [in] : CSR row index array
csr_indices – [in] : CSR column indices array
csr_data – [in] : CSR data array
csc_indptr – [out] : CSC row index array
csc_indices – [out] : CSC column indices array
csc_data – [out] : CSC data array
csr_nrows – [in] : Number of rows in CSR
csr_ncols – [in] : Number of columns in CSR
nnz – [in] : Number of nonzeros of CSR
stream – [in] : Cuda stream for ordering events