BLAS Routines#

2025-10-17

6 min read time

Applies to Linux

axpy#

#include <raft/linalg/axpy.cuh>

namespace raft::linalg

template<typename ElementType, typename IndexType, typename InLayoutPolicy, typename OutLayoutPolicy, typename ScalarIdxType>
void axpy(
raft::resources const &handle,
raft::device_scalar_view<const ElementType, ScalarIdxType> alpha,
raft::device_vector_view<const ElementType, IndexType, InLayoutPolicy> x,
raft::device_vector_view<ElementType, IndexType, OutLayoutPolicy> y
)#

axpy function It computes the following equation: y = alpha * x + y

Parameters:
  • handle[in] raft::resources

  • alpha[in] raft::device_scalar_view

  • x[in] Input vector

  • y[inout] Output vector

dot#

#include <raft/linalg/dot.cuh>

namespace raft::linalg

template<typename T>
cublasStatus_t cublasdot(
cublasHandle_t handle,
int n,
const T *x,
int incx,
const T *y,
int incy,
T *result,
cudaStream_t stream
)#
template<>
inline cublasStatus_t cublasdot(
cublasHandle_t handle,
int n,
const float *x,
int incx,
const float *y,
int incy,
float *result,
cudaStream_t stream
)#
template<>
inline cublasStatus_t cublasdot(
cublasHandle_t handle,
int n,
const double *x,
int incx,
const double *y,
int incy,
double *result,
cudaStream_t stream
)#
template<typename ElementType, typename IndexType, typename ScalarIndexType, typename LayoutPolicy1, typename LayoutPolicy2>
void dot(
raft::resources const &handle,
raft::device_vector_view<const ElementType, IndexType, LayoutPolicy1> x,
raft::device_vector_view<const ElementType, IndexType, LayoutPolicy2> y,
raft::device_scalar_view<ElementType, ScalarIndexType> out
)#

Computes the dot product of two vectors.

Parameters:
  • handle[in] raft::resources

  • x[in] First input vector

  • y[in] Second input vector

  • out[out] The output dot product between the x and y vectors.

gemm#

#include <raft/linalg/gemm.hpp>

namespace raft::linalg

template<typename T>
cublasStatus_t cublasgemm(
cublasHandle_t handle,
cublasOperation_t transA,
cublasOperation_t transB,
int m,
int n,
int k,
const T *alfa,
const T *A,
int lda,
const T *B,
int ldb,
const T *beta,
T *C,
int ldc,
cudaStream_t stream
)#
template<>
inline cublasStatus_t cublasgemm(
cublasHandle_t handle,
cublasOperation_t transA,
cublasOperation_t transB,
int m,
int n,
int k,
const float *alfa,
const float *A,
int lda,
const float *B,
int ldb,
const float *beta,
float *C,
int ldc,
cudaStream_t stream
)#
template<>
inline cublasStatus_t cublasgemm(
cublasHandle_t handle,
cublasOperation_t transA,
cublasOperation_t transB,
int m,
int n,
int k,
const double *alfa,
const double *A,
int lda,
const double *B,
int ldb,
const double *beta,
double *C,
int ldc,
cudaStream_t stream
)#
template<typename ValueType, typename IndexType, typename LayoutPolicyX, typename LayoutPolicyY, typename LayoutPolicyZ, typename ScalarIdxType = std::uint32_t, typename ScalarViewType = raft::host_scalar_view<ValueType, ScalarIdxType>, typename = std::enable_if_t<std::disjunction_v<std::is_same<ScalarViewType, raft::host_scalar_view<ValueType, ScalarIdxType>>, std::is_same<ScalarViewType, raft::device_scalar_view<ValueType, ScalarIdxType>>>>>
void gemm(
raft::resources const &res,
raft::device_matrix_view<ValueType, IndexType, LayoutPolicyX> x,
raft::device_matrix_view<ValueType, IndexType, LayoutPolicyY> y,
raft::device_matrix_view<ValueType, IndexType, LayoutPolicyZ> z,
std::optional<ScalarViewType> alpha = std::nullopt,
std::optional<ScalarViewType> beta = std::nullopt
)#

GEMM function designed for handling all possible combinations of operand layouts (raft::row_major or raft::col_major) with scalars alpha and beta on the host or device It computes the following equation: Z = alpha . X * Y + beta . Z If alpha is not provided, it is assumed to be 1.0 If beta is not provided, it is assumed to be 0.0.

Template Parameters:
  • ValueType – Data type of input/output matrices (float/double)

  • IndexType – Type of index

  • LayoutPolicyX – layout of X

  • LayoutPolicyY – layout of Y

  • LayoutPolicyZ – layout of Z

Parameters:

gemv#

#include <raft/linalg/gemv.cuh>

namespace raft::linalg

template<typename T>
cublasStatus_t cublasgemv(
cublasHandle_t handle,
cublasOperation_t transA,
int m,
int n,
const T *alfa,
const T *A,
int lda,
const T *x,
int incx,
const T *beta,
T *y,
int incy,
cudaStream_t stream
)#
template<>
inline cublasStatus_t cublasgemv(
cublasHandle_t handle,
cublasOperation_t transA,
int m,
int n,
const float *alfa,
const float *A,
int lda,
const float *x,
int incx,
const float *beta,
float *y,
int incy,
cudaStream_t stream
)#
template<>
inline cublasStatus_t cublasgemv(
cublasHandle_t handle,
cublasOperation_t transA,
int m,
int n,
const double *alfa,
const double *A,
int lda,
const double *x,
int incx,
const double *beta,
double *y,
int incy,
cudaStream_t stream
)#
template<typename ValueType, typename IndexType, typename LayoutPolicy, typename ScalarIdxType = std::uint32_t, typename ScalarViewType = raft::host_scalar_view<ValueType, ScalarIdxType>, typename = std::enable_if_t<std::disjunction_v<std::is_same<ScalarViewType, raft::host_scalar_view<ValueType, ScalarIdxType>>, std::is_same<ScalarViewType, raft::device_scalar_view<ValueType, ScalarIdxType>>>>>
void gemv(
raft::resources const &handle,
raft::device_matrix_view<const ValueType, IndexType, LayoutPolicy> A,
raft::device_vector_view<const ValueType, IndexType> x,
raft::device_vector_view<ValueType, IndexType> y,
std::optional<ScalarViewType> alpha = std::nullopt,
std::optional<ScalarViewType> beta = std::nullopt
)#

GEMV function designed for raft::col_major layout for A It computes y = alpha * op(A) * x + beta * y, where length of y is number of rows in A while length of x is number of columns in A If layout for A is provided as raft::row_major, then a transpose of A is used in the computation, where length of y is number of columns in A while length of x is number of rows in A If alpha is not provided, it is assumed to be 1.0 If beta is not provided, it is assumed to be 0.0.

Template Parameters:
  • ValueType – Data type of input/output matrices (float/double)

  • IndexType – Type of index

  • LayoutPolicyX – layout of X

  • LayoutPolicyY – layout of Y

  • LayoutPolicyZ – layout of Z

Parameters: