Sparse level 3 functions#
This module contains all sparse level 3 routines.
The sparse level 3 routines describe operations between a matrix in sparse format and multiple vectors in dense format that can also be seen as a dense matrix.
hipsparseXbsrmm()#
-
hipsparseStatus_t hipsparseSbsrmm(hipsparseHandle_t handle, hipsparseDirection_t dirA, hipsparseOperation_t transA, hipsparseOperation_t transB, int mb, int n, int kb, int nnzb, const float *alpha, const hipsparseMatDescr_t descrA, const float *bsrValA, const int *bsrRowPtrA, const int *bsrColIndA, int blockDim, const float *B, int ldb, const float *beta, float *C, int ldc)#
-
hipsparseStatus_t hipsparseDbsrmm(hipsparseHandle_t handle, hipsparseDirection_t dirA, hipsparseOperation_t transA, hipsparseOperation_t transB, int mb, int n, int kb, int nnzb, const double *alpha, const hipsparseMatDescr_t descrA, const double *bsrValA, const int *bsrRowPtrA, const int *bsrColIndA, int blockDim, const double *B, int ldb, const double *beta, double *C, int ldc)#
-
hipsparseStatus_t hipsparseCbsrmm(hipsparseHandle_t handle, hipsparseDirection_t dirA, hipsparseOperation_t transA, hipsparseOperation_t transB, int mb, int n, int kb, int nnzb, const hipComplex *alpha, const hipsparseMatDescr_t descrA, const hipComplex *bsrValA, const int *bsrRowPtrA, const int *bsrColIndA, int blockDim, const hipComplex *B, int ldb, const hipComplex *beta, hipComplex *C, int ldc)#
-
hipsparseStatus_t hipsparseZbsrmm(hipsparseHandle_t handle, hipsparseDirection_t dirA, hipsparseOperation_t transA, hipsparseOperation_t transB, int mb, int n, int kb, int nnzb, const hipDoubleComplex *alpha, const hipsparseMatDescr_t descrA, const hipDoubleComplex *bsrValA, const int *bsrRowPtrA, const int *bsrColIndA, int blockDim, const hipDoubleComplex *B, int ldb, const hipDoubleComplex *beta, hipDoubleComplex *C, int ldc)#
Sparse matrix dense matrix multiplication using BSR storage format.
hipsparseXbsrmmmultiplies the scalar \(\alpha\) with a sparse \(m \times k\) matrix \(A\), defined in BSR storage format, and the column-oriented dense \(k \times n\) matrix \(B\) and adds the result to the column-oriented dense \(m \times n\) matrix \(C\) that is multiplied by the scalar \(\beta\), such that\[ C := \alpha \cdot op(A) \cdot op(B) + \beta \cdot C, \]with\[\begin{split} op(A) = \left\{ \begin{array}{ll} A, & \text{if transA == HIPSPARSE_OPERATION_NON_TRANSPOSE} \\ \end{array} \right. \end{split}\]and\[\begin{split} op(B) = \left\{ \begin{array}{ll} B, & \text{if transB == HIPSPARSE_OPERATION_NON_TRANSPOSE} \\ B^T, & \text{if transB == HIPSPARSE_OPERATION_TRANSPOSE} \\ \end{array} \right. \end{split}\]and where \(k = blockDim \times kb\) and \(m = blockDim \times mb\).Note
This function is non blocking and executed asynchronously with respect to the host. It may return before the actual computation has finished.
Note
Currently, only
transA== HIPSPARSE_OPERATION_NON_TRANSPOSE is supported.- Parameters:
handle – [in] handle to the hipsparse library context queue.
dirA – [in] the storage format of the blocks. Can be HIPSPARSE_DIRECTION_ROW or HIPSPARSE_DIRECTION_COLUMN.
transA – [in] matrix \(A\) operation type. Currently, only HIPSPARSE_OPERATION_NON_TRANSPOSE is supported.
transB – [in] matrix \(B\) operation type. Currently, only HIPSPARSE_OPERATION_NON_TRANSPOSE and HIPSPARSE_OPERATION_TRANSPOSE are supported.
mb – [in] number of block rows of the sparse BSR matrix \(A\). Must be non-negative.
n – [in] number of columns of the dense matrix \(op(B)\) and \(C\). Must be non-negative.
kb – [in] number of block columns of the sparse BSR matrix \(A\). Must be non-negative.
nnzb – [in] number of non-zero blocks of the sparse BSR matrix \(A\). Must be non-negative.
alpha – [in] scalar \(\alpha\).
descrA – [in] descriptor of the sparse BSR matrix \(A\). Currently, only HIPSPARSE_MATRIX_TYPE_GENERAL is supported.
bsrValA – [in] array of
nnzb*blockDim*blockDimelements of the sparse BSR matrix \(A\).bsrRowPtrA – [in] array of
mb+1elements that point to the start of every block row of the sparse BSR matrix \(A\).bsrColIndA – [in] array of
nnzbelements containing the block column indices of the sparse BSR matrix \(A\).blockDim – [in] size of the blocks in the sparse BSR matrix. Must be positive.
B – [in] array of dimension
ldb*n( \(op(B) == B\)),ldb*kotherwise.ldb – [in] leading dimension of \(B\), must be at least \(\max{(1, k)}\) ( \( op(B) == B\)) where
k=blockDim*kb, \(\max{(1, n)}\) otherwise.beta – [in] scalar \(\beta\).
C – [inout] array of dimension
ldc*n.ldc – [in] leading dimension of \(C\), must be at least \(\max{(1, m)}\) ( \( op(A) == A\)) where
m=blockDim*mb, \(\max{(1, k)}\) wherek=blockDim*kbotherwise.
- Return values:
HIPSPARSE_STATUS_SUCCESS – the operation completed successfully.
HIPSPARSE_STATUS_NOT_INITIALIZED –
handleis not initialized.HIPSPARSE_STATUS_INVALID_VALUE –
handle,descrA,alphaorbetais nullptr,mb,n,kbornnzbis negative,ldborldcis invalid,blockDimis less than or equal to zero, orbsrValA,bsrRowPtrA,bsrColIndA,BorCis nullptr.HIPSPARSE_STATUS_ARCH_MISMATCH – the device is not supported.
HIPSPARSE_STATUS_NOT_SUPPORTED –
transAis not HIPSPARSE_OPERATION_NON_TRANSPOSE,transBis HIPSPARSE_OPERATION_CONJUGATE_TRANSPOSE, or hipsparseMatrixType_t is not HIPSPARSE_MATRIX_TYPE_GENERAL.
1int main(int argc, char* argv[])
2{
3 // hipSPARSE handle
4 hipsparseHandle_t handle;
5 HIPSPARSE_CHECK(hipsparseCreate(&handle));
6
7 // 1 2 0 3 0 0
8 // A = 0 4 5 0 0 0
9 // 0 0 0 7 8 0
10 // 0 0 1 2 4 1
11
12 const int blockDim = 2;
13 const int mb = 2;
14 const int kb = 3;
15 const int nnzb = 4;
16 const hipsparseDirection_t dir = HIPSPARSE_DIRECTION_ROW;
17
18 std::vector<int> hbsrRowPtr = {0, 2, 4};
19 std::vector<int> hbsrColInd = {0, 1, 1, 2};
20 std::vector<float> hbsrVal = {1, 2, 0, 4, 0, 3, 5, 0, 0, 7, 1, 2, 8, 0, 4, 1};
21
22 // Set dimension n of B
23 const int n = 3;
24 const int m = mb * blockDim;
25 const int k = kb * blockDim;
26
27 // Allocate and generate dense matrix B (k x n)
28 std::vector<float> hB = {1.0f,
29 2.0f,
30 3.0f,
31 4.0f,
32 5.0f,
33 6.0f,
34 7.0f,
35 8.0f,
36 9.0f,
37 10.0f,
38 11.0f,
39 12.0f,
40 13.0f,
41 14.0f,
42 15.0f,
43 16.0f,
44 17.0f,
45 18.0f};
46
47 int* dbsrRowPtr = NULL;
48 int* dbsrColInd = NULL;
49 float* dbsrVal = NULL;
50 HIP_CHECK(hipMalloc((void**)&dbsrRowPtr, sizeof(int) * (mb + 1)));
51 HIP_CHECK(hipMalloc((void**)&dbsrColInd, sizeof(int) * nnzb));
52 HIP_CHECK(hipMalloc((void**)&dbsrVal, sizeof(float) * nnzb * blockDim * blockDim));
53 HIP_CHECK(
54 hipMemcpy(dbsrRowPtr, hbsrRowPtr.data(), sizeof(int) * (mb + 1), hipMemcpyHostToDevice));
55 HIP_CHECK(hipMemcpy(dbsrColInd, hbsrColInd.data(), sizeof(int) * nnzb, hipMemcpyHostToDevice));
56 HIP_CHECK(hipMemcpy(dbsrVal,
57 hbsrVal.data(),
58 sizeof(float) * nnzb * blockDim * blockDim,
59 hipMemcpyHostToDevice));
60
61 // Copy B to the device
62 float* dB;
63 HIP_CHECK(hipMalloc((void**)&dB, sizeof(float) * k * n));
64 HIP_CHECK(hipMemcpy(dB, hB.data(), sizeof(float) * k * n, hipMemcpyHostToDevice));
65
66 // alpha and beta
67 float alpha = 1.0f;
68 float beta = 0.0f;
69
70 // Allocate memory for the resulting matrix C
71 float* dC;
72 HIP_CHECK(hipMalloc((void**)&dC, sizeof(float) * m * n));
73
74 // Matrix descriptor
75 hipsparseMatDescr_t descr;
76 HIPSPARSE_CHECK(hipsparseCreateMatDescr(&descr));
77
78 // Perform the matrix multiplication
79 HIPSPARSE_CHECK(hipsparseSbsrmm(handle,
80 dir,
81 HIPSPARSE_OPERATION_NON_TRANSPOSE,
82 HIPSPARSE_OPERATION_NON_TRANSPOSE,
83 mb,
84 n,
85 kb,
86 nnzb,
87 &alpha,
88 descr,
89 dbsrVal,
90 dbsrRowPtr,
91 dbsrColInd,
92 blockDim,
93 dB,
94 k,
95 &beta,
96 dC,
97 m));
98
99 // Copy results to host
100 std::vector<float> hC(m * n);
101 HIP_CHECK(hipMemcpy(hC.data(), dC, sizeof(float) * m * n, hipMemcpyDeviceToHost));
102
103 std::cout << "hC" << std::endl;
104 for(int i = 0; i < m * n; i++)
105 {
106 std::cout << hC[i] << " ";
107 }
108 std::cout << std::endl;
109
110 HIP_CHECK(hipFree(dbsrRowPtr));
111 HIP_CHECK(hipFree(dbsrColInd));
112 HIP_CHECK(hipFree(dbsrVal));
113 HIP_CHECK(hipFree(dB));
114 HIP_CHECK(hipFree(dC));
115
116 return 0;
117}
1int main(int argc, char* argv[])
2{
3 // hipSPARSE handle
4 hipsparseHandle_t handle;
5 HIPSPARSE_CHECK(hipsparseCreate(&handle));
6
7 // 1 2 0 3 0 0
8 // A = 0 4 5 0 0 0
9 // 0 0 0 7 8 0
10 // 0 0 1 2 4 1
11
12 const int blockDim = 2;
13 const int mb = 2;
14 const int kb = 3;
15 const int nnzb = 4;
16 const hipsparseDirection_t dir = HIPSPARSE_DIRECTION_ROW;
17
18 int hbsrRowPtr[] = {0, 2, 4};
19 int hbsrColInd[] = {0, 1, 1, 2};
20 float hbsrVal[] = {1, 2, 0, 4, 0, 3, 5, 0, 0, 7, 1, 2, 8, 0, 4, 1};
21
22 // Set dimension n of B
23 const int n = 3;
24 const int m = mb * blockDim;
25 const int k = kb * blockDim;
26
27 // Allocate and generate dense matrix B (k x n)
28 float hB[] = {1.0,
29 2.0,
30 3.0,
31 4.0,
32 5.0,
33 6.0,
34 7.0,
35 8.0,
36 9.0,
37 10.0,
38 11.0,
39 12.0,
40 13.0,
41 14.0,
42 15.0,
43 16.0,
44 17.0,
45 18.0};
46
47 int* dbsrRowPtr = NULL;
48 int* dbsrColInd = NULL;
49 float* dbsrVal = NULL;
50 HIP_CHECK(hipMalloc((void**)&dbsrRowPtr, sizeof(int) * (mb + 1)));
51 HIP_CHECK(hipMalloc((void**)&dbsrColInd, sizeof(int) * nnzb));
52 HIP_CHECK(hipMalloc((void**)&dbsrVal, sizeof(float) * nnzb * blockDim * blockDim));
53 HIP_CHECK(hipMemcpy(dbsrRowPtr, hbsrRowPtr, sizeof(int) * (mb + 1), hipMemcpyHostToDevice));
54 HIP_CHECK(hipMemcpy(dbsrColInd, hbsrColInd, sizeof(int) * nnzb, hipMemcpyHostToDevice));
55 HIP_CHECK(hipMemcpy(
56 dbsrVal, hbsrVal, sizeof(float) * nnzb * blockDim * blockDim, hipMemcpyHostToDevice));
57
58 // Copy B to the device
59 float* dB;
60 HIP_CHECK(hipMalloc((void**)&dB, sizeof(float) * k * n));
61 HIP_CHECK(hipMemcpy(dB, hB, sizeof(float) * k * n, hipMemcpyHostToDevice));
62
63 // alpha and beta
64 float alpha = 1.0;
65 float beta = 0.0;
66
67 // Allocate memory for the resulting matrix C
68 float* dC;
69 HIP_CHECK(hipMalloc((void**)&dC, sizeof(float) * m * n));
70
71 // Matrix descriptor
72 hipsparseMatDescr_t descr;
73 HIPSPARSE_CHECK(hipsparseCreateMatDescr(&descr));
74
75 // Perform the matrix multiplication
76 HIPSPARSE_CHECK(hipsparseSbsrmm(handle,
77 dir,
78 HIPSPARSE_OPERATION_NON_TRANSPOSE,
79 HIPSPARSE_OPERATION_NON_TRANSPOSE,
80 mb,
81 n,
82 kb,
83 nnzb,
84 &alpha,
85 descr,
86 dbsrVal,
87 dbsrRowPtr,
88 dbsrColInd,
89 blockDim,
90 dB,
91 k,
92 &beta,
93 dC,
94 m));
95
96 // Copy results to host
97 float hC[m * n];
98 HIP_CHECK(hipMemcpy(hC, dC, sizeof(float) * m * n, hipMemcpyDeviceToHost));
99
100 printf("hC\n");
101 for(int i = 0; i < m * n; i++)
102 {
103 printf("%f ", hC[i]);
104 }
105 printf("\n");
106
107 HIP_CHECK(hipFree(dbsrRowPtr));
108 HIP_CHECK(hipFree(dbsrColInd));
109 HIP_CHECK(hipFree(dbsrVal));
110 HIP_CHECK(hipFree(dB));
111 HIP_CHECK(hipFree(dC));
112
113 return 0;
114}
1program example_hipsparse_bsrmm
2 use iso_c_binding
3 implicit none
4
5 ! HIP
6 interface
7 function hipMalloc(ptr, size) &
8 bind(c, name = 'hipMalloc')
9 use iso_c_binding
10 implicit none
11 integer(c_int) :: hipMalloc
12 type(c_ptr) :: ptr
13 integer(c_size_t), value :: size
14 end function hipMalloc
15
16 function hipFree(ptr) &
17 bind(c, name = 'hipFree')
18 use iso_c_binding
19 implicit none
20 integer(c_int) :: hipFree
21 type(c_ptr), value :: ptr
22 end function hipFree
23
24 function hipMemcpy(dst, src, size, kind) &
25 bind(c, name = 'hipMemcpy')
26 use iso_c_binding
27 implicit none
28 integer(c_int) :: hipMemcpy
29 type(c_ptr), value :: dst
30 type(c_ptr), intent(in), value :: src
31 integer(c_size_t), value :: size
32 integer(c_int), value :: kind
33 end function hipMemcpy
34 end interface
35
36 integer, parameter :: hipMemcpyHostToDevice = 1
37 integer, parameter :: hipMemcpyDeviceToHost = 2
38
39 ! hipSPARSE
40 interface
41 function hipsparseCreate(handle) &
42 bind(c, name = 'hipsparseCreate')
43 use iso_c_binding
44 implicit none
45 integer(c_int) :: hipsparseCreate
46 type(c_ptr) :: handle
47 end function hipsparseCreate
48
49 function hipsparseDestroy(handle) &
50 bind(c, name = 'hipsparseDestroy')
51 use iso_c_binding
52 implicit none
53 integer(c_int) :: hipsparseDestroy
54 type(c_ptr), value :: handle
55 end function hipsparseDestroy
56
57 function hipsparseCreateMatDescr(descr) &
58 bind(c, name = 'hipsparseCreateMatDescr')
59 use iso_c_binding
60 implicit none
61 integer(c_int) :: hipsparseCreateMatDescr
62 type(c_ptr) :: descr
63 end function hipsparseCreateMatDescr
64
65 function hipsparseDestroyMatDescr(descr) &
66 bind(c, name = 'hipsparseDestroyMatDescr')
67 use iso_c_binding
68 implicit none
69 integer(c_int) :: hipsparseDestroyMatDescr
70 type(c_ptr), value :: descr
71 end function hipsparseDestroyMatDescr
72
73 function hipsparseSbsrmm(handle, dirA, transA, transB, mb, n, kb, nnzb, alpha, descrA, &
74 bsrSortedValA, bsrSortedRowPtrA, bsrSortedColIndA, blockDim, &
75 B, ldb, beta, C, ldc) &
76 bind(c, name = 'hipsparseSbsrmm')
77 use iso_c_binding
78 implicit none
79 integer(c_int) :: hipsparseSbsrmm
80 type(c_ptr), value :: handle
81 integer(c_int), value :: dirA
82 integer(c_int), value :: transA
83 integer(c_int), value :: transB
84 integer(c_int), value :: mb
85 integer(c_int), value :: n
86 integer(c_int), value :: kb
87 integer(c_int), value :: nnzb
88 type(c_ptr), intent(in), value :: alpha
89 type(c_ptr), value :: descrA
90 type(c_ptr), intent(in), value :: bsrSortedValA
91 type(c_ptr), intent(in), value :: bsrSortedRowPtrA
92 type(c_ptr), intent(in), value :: bsrSortedColIndA
93 integer(c_int), value :: blockDim
94 type(c_ptr), intent(in), value :: B
95 integer(c_int), value :: ldb
96 type(c_ptr), intent(in), value :: beta
97 type(c_ptr), value :: C
98 integer(c_int), value :: ldc
99 end function hipsparseSbsrmm
100 end interface
101
102 integer, parameter :: HIPSPARSE_DIRECTION_ROW = 0
103 integer, parameter :: HIPSPARSE_OPERATION_NON_TRANSPOSE = 0
104
105 ! Variables
106 type(c_ptr) :: handle
107 type(c_ptr) :: descr
108 integer :: i, stat
109
110 ! Block sparse matrix A (2x3 blocks of size 2x2)
111 integer, parameter :: blockDim = 2
112 integer, parameter :: mb = 2
113 integer, parameter :: kb = 3
114 integer, parameter :: nnzb = 4
115 integer, parameter :: m = mb * blockDim ! 4
116 integer, parameter :: k = kb * blockDim ! 6
117
118 integer, dimension(mb+1), target :: hbsrRowPtr = (/0, 2, 4/)
119 integer, dimension(nnzb), target :: hbsrColInd = (/0, 1, 1, 2/)
120 real(c_float), dimension(nnzb*blockDim*blockDim), target :: hbsrVal = (/ &
121 1.0, 2.0, 0.0, 4.0, & ! block (0,0)
122 0.0, 3.0, 5.0, 0.0, & ! block (0,1)
123 0.0, 7.0, 1.0, 2.0, & ! block (1,1)
124 8.0, 0.0, 4.0, 1.0 /) ! block (1,2)
125
126 ! Dense matrix B (6x3) - column major
127 integer, parameter :: n = 3
128 real(c_float), dimension(k*n), target :: hB = (/ &
129 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, &
130 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, &
131 13.0, 14.0, 15.0, 16.0, 17.0, 18.0 /)
132
133 ! Result matrix C (4x3)
134 real(c_float), dimension(m*n), target :: hC
135
136 ! Scalar values
137 real(c_float), target :: alpha = 1.0
138 real(c_float), target :: beta = 0.0
139
140 ! Device pointers
141 type(c_ptr) :: dbsrRowPtr
142 type(c_ptr) :: dbsrColInd
143 type(c_ptr) :: dbsrVal
144 type(c_ptr) :: dB
145 type(c_ptr) :: dC
146
147 ! Create hipSPARSE handle
148 stat = hipsparseCreate(handle)
149 if (stat /= 0) then
150 write(*,*) 'Error: hipsparseCreate failed'
151 stop
152 end if
153
154 ! Create matrix descriptor
155 stat = hipsparseCreateMatDescr(descr)
156 if (stat /= 0) then
157 write(*,*) 'Error: hipsparseCreateMatDescr failed'
158 stop
159 end if
160
161 ! Allocate device memory for BSR matrix A
162 stat = hipMalloc(dbsrRowPtr, int((mb + 1) * 4, c_size_t))
163 if (stat /= 0) then
164 write(*,*) 'Error: hipMalloc dbsrRowPtr failed'
165 stop
166 end if
167
168 stat = hipMalloc(dbsrColInd, int(nnzb * 4, c_size_t))
169 if (stat /= 0) then
170 write(*,*) 'Error: hipMalloc dbsrColInd failed'
171 stop
172 end if
173
174 stat = hipMalloc(dbsrVal, int(nnzb * blockDim * blockDim * 4, c_size_t))
175 if (stat /= 0) then
176 write(*,*) 'Error: hipMalloc dbsrVal failed'
177 stop
178 end if
179
180 ! Allocate device memory for dense matrices B and C
181 stat = hipMalloc(dB, int(k * n * 4, c_size_t))
182 if (stat /= 0) then
183 write(*,*) 'Error: hipMalloc dB failed'
184 stop
185 end if
186
187 stat = hipMalloc(dC, int(m * n * 4, c_size_t))
188 if (stat /= 0) then
189 write(*,*) 'Error: hipMalloc dC failed'
190 stop
191 end if
192
193 ! Copy data to device
194 stat = hipMemcpy(dbsrRowPtr, c_loc(hbsrRowPtr), int((mb + 1) * 4, c_size_t), hipMemcpyHostToDevice)
195 if (stat /= 0) then
196 write(*,*) 'Error: hipMemcpy dbsrRowPtr failed'
197 stop
198 end if
199
200 stat = hipMemcpy(dbsrColInd, c_loc(hbsrColInd), int(nnzb * 4, c_size_t), hipMemcpyHostToDevice)
201 if (stat /= 0) then
202 write(*,*) 'Error: hipMemcpy dbsrColInd failed'
203 stop
204 end if
205
206 stat = hipMemcpy(dbsrVal, c_loc(hbsrVal), int(nnzb * blockDim * blockDim * 4, c_size_t), hipMemcpyHostToDevice)
207 if (stat /= 0) then
208 write(*,*) 'Error: hipMemcpy dbsrVal failed'
209 stop
210 end if
211
212 stat = hipMemcpy(dB, c_loc(hB), int(k * n * 4, c_size_t), hipMemcpyHostToDevice)
213 if (stat /= 0) then
214 write(*,*) 'Error: hipMemcpy dB failed'
215 stop
216 end if
217
218 ! Perform block sparse matrix-matrix multiplication: C = alpha * A * B + beta * C
219 stat = hipsparseSbsrmm(handle, &
220 HIPSPARSE_DIRECTION_ROW, &
221 HIPSPARSE_OPERATION_NON_TRANSPOSE, &
222 HIPSPARSE_OPERATION_NON_TRANSPOSE, &
223 mb, &
224 n, &
225 kb, &
226 nnzb, &
227 c_loc(alpha), &
228 descr, &
229 dbsrVal, &
230 dbsrRowPtr, &
231 dbsrColInd, &
232 blockDim, &
233 dB, &
234 k, &
235 c_loc(beta), &
236 dC, &
237 m)
238 if (stat /= 0) then
239 write(*,*) 'Error: hipsparseSbsrmm failed'
240 stop
241 end if
242
243 ! Copy result back to host
244 stat = hipMemcpy(c_loc(hC), dC, int(m * n * 4, c_size_t), hipMemcpyDeviceToHost)
245 if (stat /= 0) then
246 write(*,*) 'Error: hipMemcpy hC failed'
247 stop
248 end if
249
250 ! Print result
251 write(*,*) 'hC:'
252 do i = 1, m * n
253 write(*,*) hC(i)
254 end do
255
256 ! Clean up
257 stat = hipFree(dbsrRowPtr)
258 stat = hipFree(dbsrColInd)
259 stat = hipFree(dbsrVal)
260 stat = hipFree(dB)
261 stat = hipFree(dC)
262
263 stat = hipsparseDestroyMatDescr(descr)
264 stat = hipsparseDestroy(handle)
265
266end program example_hipsparse_bsrmm
hipsparseXcsrmm()#
-
hipsparseStatus_t hipsparseScsrmm(hipsparseHandle_t handle, hipsparseOperation_t transA, int m, int n, int k, int nnz, const float *alpha, const hipsparseMatDescr_t descrA, const float *csrSortedValA, const int *csrSortedRowPtrA, const int *csrSortedColIndA, const float *B, int ldb, const float *beta, float *C, int ldc)#
-
hipsparseStatus_t hipsparseDcsrmm(hipsparseHandle_t handle, hipsparseOperation_t transA, int m, int n, int k, int nnz, const double *alpha, const hipsparseMatDescr_t descrA, const double *csrSortedValA, const int *csrSortedRowPtrA, const int *csrSortedColIndA, const double *B, int ldb, const double *beta, double *C, int ldc)#
-
hipsparseStatus_t hipsparseCcsrmm(hipsparseHandle_t handle, hipsparseOperation_t transA, int m, int n, int k, int nnz, const hipComplex *alpha, const hipsparseMatDescr_t descrA, const hipComplex *csrSortedValA, const int *csrSortedRowPtrA, const int *csrSortedColIndA, const hipComplex *B, int ldb, const hipComplex *beta, hipComplex *C, int ldc)#
-
hipsparseStatus_t hipsparseZcsrmm(hipsparseHandle_t handle, hipsparseOperation_t transA, int m, int n, int k, int nnz, const hipDoubleComplex *alpha, const hipsparseMatDescr_t descrA, const hipDoubleComplex *csrSortedValA, const int *csrSortedRowPtrA, const int *csrSortedColIndA, const hipDoubleComplex *B, int ldb, const hipDoubleComplex *beta, hipDoubleComplex *C, int ldc)#
Sparse matrix dense matrix multiplication using CSR storage format.
hipsparseXcsrmmmultiplies the scalar \(\alpha\) with a sparse \(m \times k\) matrix \(A\), defined in CSR storage format, and the column-oriented dense \(k \times n\) matrix \(B\) and adds the result to the column-oriented dense \(m \times n\) matrix \(C\) that is multiplied by the scalar \(\beta\), such that\[ C := \alpha \cdot op(A) \cdot B + \beta \cdot C, \]with\[\begin{split} op(A) = \left\{ \begin{array}{ll} A, & \text{if transA == HIPSPARSE_OPERATION_NON_TRANSPOSE} \\ A^T, & \text{if transA == HIPSPARSE_OPERATION_TRANSPOSE} \\ A^H, & \text{if transA == HIPSPARSE_OPERATION_CONJUGATE_TRANSPOSE} \end{array} \right. \end{split}\]for(i = 0; i < ldc; ++i) { for(j = 0; j < n; ++j) { C[i][j] = beta * C[i][j]; for(k = csrRowPtr[i]; k < csrRowPtr[i + 1]; ++k) { C[i][j] += alpha * csrVal[k] * B[csrColInd[k]][j]; } } }
- Deprecated:
This function is deprecated when using the CUDA backend (CUDA 10.0+) and will be removed in CUDA 11.0. This deprecation does not apply to the ROCm backend.
Note
This function is non blocking and executed asynchronously with respect to the host. It may return before the actual computation has finished.
- Parameters:
handle – [in] handle to the hipsparse library context queue.
transA – [in] matrix \(A\) operation type.
m – [in] number of rows of the sparse CSR matrix \(A\). Must be non-negative.
n – [in] number of columns of the dense matrix \(op(B)\) and \(C\). Must be non-negative.
k – [in] number of columns of the sparse CSR matrix \(A\). Must be non-negative.
nnz – [in] number of non-zero entries of the sparse CSR matrix \(A\). Must be non-negative.
alpha – [in] scalar \(\alpha\).
descrA – [in] descriptor of the sparse CSR matrix \(A\). Currently, only HIPSPARSE_MATRIX_TYPE_GENERAL is supported.
csrSortedValA – [in] array of
nnzelements of the sparse CSR matrix \(A\).csrSortedRowPtrA – [in] array of
m+1elements that point to the start of every row of the sparse CSR matrix \(A\).csrSortedColIndA – [in] array of
nnzelements containing the column indices of the sparse CSR matrix \(A\).B – [in] array of dimension
ldb*n( \(op(B) == B\)),ldb*kotherwise.ldb – [in] leading dimension of \(B\), must be at least \(\max{(1, k)}\) ( \(op(B) == B\)), \(\max{(1, n)}\) otherwise.
beta – [in] scalar \(\beta\).
C – [inout] array of dimension
ldc*n.ldc – [in] leading dimension of \(C\), must be at least \(\max{(1, m)}\) ( \(op(A) == A\)), \(\max{(1, k)}\) otherwise.
- Return values:
HIPSPARSE_STATUS_SUCCESS – the operation completed successfully.
HIPSPARSE_STATUS_NOT_INITIALIZED –
handleis not initialized.HIPSPARSE_STATUS_INVALID_VALUE –
handle,descrA,alphaorbetais nullptr,m,n,kornnzis negative,ldborldcis invalid, orcsrSortedValA,csrSortedRowPtrA,csrSortedColIndA,BorCis nullptr.HIPSPARSE_STATUS_ARCH_MISMATCH – the device is not supported.
HIPSPARSE_STATUS_NOT_SUPPORTED – hipsparseMatrixType_t is not HIPSPARSE_MATRIX_TYPE_GENERAL.
1int main(int argc, char* argv[])
2{
3 // hipSPARSE handle
4 hipsparseHandle_t handle;
5 HIPSPARSE_CHECK(hipsparseCreate(&handle));
6
7 // 1 2 0 3 0 0
8 // A = 0 4 5 0 0 0
9 // 0 0 0 7 8 0
10 // 0 0 1 2 4 1
11
12 const int m = 4;
13 const int k = 6;
14 const int nnz = 11;
15 const hipsparseDirection_t dir = HIPSPARSE_DIRECTION_ROW;
16
17 std::vector<int> hcsrRowPtr = {0, 3, 5, 7, 11};
18 std::vector<int> hcsrColInd = {0, 1, 3, 1, 2, 3, 4, 2, 3, 4, 5};
19 std::vector<float> hcsrVal = {1, 2, 3, 4, 5, 7, 8, 1, 2, 4, 1};
20
21 // Set dimension n of B
22 const int n = 3;
23
24 // Allocate and generate dense matrix B (k x n)
25 std::vector<float> hB = {1.0f,
26 2.0f,
27 3.0f,
28 4.0f,
29 5.0f,
30 6.0f,
31 7.0f,
32 8.0f,
33 9.0f,
34 10.0f,
35 11.0f,
36 12.0f,
37 13.0f,
38 14.0f,
39 15.0f,
40 16.0f,
41 17.0f,
42 18.0f};
43
44 int* dcsrRowPtr = NULL;
45 int* dcsrColInd = NULL;
46 float* dcsrVal = NULL;
47 HIP_CHECK(hipMalloc((void**)&dcsrRowPtr, sizeof(int) * (m + 1)));
48 HIP_CHECK(hipMalloc((void**)&dcsrColInd, sizeof(int) * nnz));
49 HIP_CHECK(hipMalloc((void**)&dcsrVal, sizeof(float) * nnz));
50 HIP_CHECK(
51 hipMemcpy(dcsrRowPtr, hcsrRowPtr.data(), sizeof(int) * (m + 1), hipMemcpyHostToDevice));
52 HIP_CHECK(hipMemcpy(dcsrColInd, hcsrColInd.data(), sizeof(int) * nnz, hipMemcpyHostToDevice));
53 HIP_CHECK(hipMemcpy(dcsrVal, hcsrVal.data(), sizeof(float) * nnz, hipMemcpyHostToDevice));
54
55 // Copy B to the device
56 float* dB;
57 HIP_CHECK(hipMalloc((void**)&dB, sizeof(float) * k * n));
58 HIP_CHECK(hipMemcpy(dB, hB.data(), sizeof(float) * k * n, hipMemcpyHostToDevice));
59
60 // alpha and beta
61 float alpha = 1.0f;
62 float beta = 0.0f;
63
64 // Allocate memory for the resulting matrix C
65 float* dC;
66 HIP_CHECK(hipMalloc((void**)&dC, sizeof(float) * m * n));
67
68 // Matrix descriptor
69 hipsparseMatDescr_t descr;
70 HIPSPARSE_CHECK(hipsparseCreateMatDescr(&descr));
71
72 // Perform the matrix multiplication
73 HIPSPARSE_CHECK(hipsparseScsrmm(handle,
74 HIPSPARSE_OPERATION_NON_TRANSPOSE,
75 m,
76 n,
77 k,
78 nnz,
79 &alpha,
80 descr,
81 dcsrVal,
82 dcsrRowPtr,
83 dcsrColInd,
84 dB,
85 k,
86 &beta,
87 dC,
88 m));
89
90 // Copy results to host
91 std::vector<float> hC(6 * 3);
92 HIP_CHECK(hipMemcpy(hC.data(), dC, sizeof(float) * m * n, hipMemcpyDeviceToHost));
93
94 std::cout << "hC" << std::endl;
95 for(int i = 0; i < m * n; i++)
96 {
97 std::cout << hC[i] << " ";
98 }
99 std::cout << std::endl;
100
101 HIP_CHECK(hipFree(dcsrRowPtr));
102 HIP_CHECK(hipFree(dcsrColInd));
103 HIP_CHECK(hipFree(dcsrVal));
104 HIP_CHECK(hipFree(dB));
105 HIP_CHECK(hipFree(dC));
106
107 HIPSPARSE_CHECK(hipsparseDestroy(handle));
108
109 return 0;
110}
1int main(int argc, char* argv[])
2{
3 // hipSPARSE handle
4 hipsparseHandle_t handle;
5 HIPSPARSE_CHECK(hipsparseCreate(&handle));
6
7 // 1 2 0 3 0 0
8 // A = 0 4 5 0 0 0
9 // 0 0 0 7 8 0
10 // 0 0 1 2 4 1
11
12 const int m = 4;
13 const int k = 6;
14 const int nnz = 11;
15 const hipsparseDirection_t dir = HIPSPARSE_DIRECTION_ROW;
16
17 int hcsrRowPtr[] = {0, 3, 5, 7, 11};
18 int hcsrColInd[] = {0, 1, 3, 1, 2, 3, 4, 2, 3, 4, 5};
19 float hcsrVal[] = {1, 2, 3, 4, 5, 7, 8, 1, 2, 4, 1};
20
21 // Set dimension n of B
22 const int n = 3;
23
24 // Allocate and generate dense matrix B (k x n)
25 float hB[] = {1.0,
26 2.0,
27 3.0,
28 4.0,
29 5.0,
30 6.0,
31 7.0,
32 8.0,
33 9.0,
34 10.0,
35 11.0,
36 12.0,
37 13.0,
38 14.0,
39 15.0,
40 16.0,
41 17.0,
42 18.0};
43
44 int* dcsrRowPtr = NULL;
45 int* dcsrColInd = NULL;
46 float* dcsrVal = NULL;
47 HIP_CHECK(hipMalloc((void**)&dcsrRowPtr, sizeof(int) * (m + 1)));
48 HIP_CHECK(hipMalloc((void**)&dcsrColInd, sizeof(int) * nnz));
49 HIP_CHECK(hipMalloc((void**)&dcsrVal, sizeof(float) * nnz));
50 HIP_CHECK(hipMemcpy(dcsrRowPtr, hcsrRowPtr, sizeof(int) * (m + 1), hipMemcpyHostToDevice));
51 HIP_CHECK(hipMemcpy(dcsrColInd, hcsrColInd, sizeof(int) * nnz, hipMemcpyHostToDevice));
52 HIP_CHECK(hipMemcpy(dcsrVal, hcsrVal, sizeof(float) * nnz, hipMemcpyHostToDevice));
53
54 // Copy B to the device
55 float* dB;
56 HIP_CHECK(hipMalloc((void**)&dB, sizeof(float) * k * n));
57 HIP_CHECK(hipMemcpy(dB, hB, sizeof(float) * k * n, hipMemcpyHostToDevice));
58
59 // alpha and beta
60 float alpha = 1.0;
61 float beta = 0.0;
62
63 // Allocate memory for the resulting matrix C
64 float* dC;
65 HIP_CHECK(hipMalloc((void**)&dC, sizeof(float) * m * n));
66
67 // Matrix descriptor
68 hipsparseMatDescr_t descr;
69 HIPSPARSE_CHECK(hipsparseCreateMatDescr(&descr));
70
71 // Perform the matrix multiplication
72 HIPSPARSE_CHECK(hipsparseScsrmm(handle,
73 HIPSPARSE_OPERATION_NON_TRANSPOSE,
74 m,
75 n,
76 k,
77 nnz,
78 &alpha,
79 descr,
80 dcsrVal,
81 dcsrRowPtr,
82 dcsrColInd,
83 dB,
84 k,
85 &beta,
86 dC,
87 m));
88
89 // Copy results to host
90 float hC[6 * 3];
91 HIP_CHECK(hipMemcpy(hC, dC, sizeof(float) * m * n, hipMemcpyDeviceToHost));
92
93 printf("hC\n");
94 for(int i = 0; i < m * n; i++)
95 {
96 printf("%f ", hC[i]);
97 }
98 printf("\n");
99
100 HIP_CHECK(hipFree(dcsrRowPtr));
101 HIP_CHECK(hipFree(dcsrColInd));
102 HIP_CHECK(hipFree(dcsrVal));
103 HIP_CHECK(hipFree(dB));
104 HIP_CHECK(hipFree(dC));
105
106 HIPSPARSE_CHECK(hipsparseDestroy(handle));
107
108 return 0;
109}
1program example_hipsparse_csrmm
2 use iso_c_binding
3 implicit none
4
5 ! HIP
6 interface
7 function hipMalloc(ptr, size) &
8 bind(c, name = 'hipMalloc')
9 use iso_c_binding
10 implicit none
11 integer(c_int) :: hipMalloc
12 type(c_ptr) :: ptr
13 integer(c_size_t), value :: size
14 end function hipMalloc
15
16 function hipFree(ptr) &
17 bind(c, name = 'hipFree')
18 use iso_c_binding
19 implicit none
20 integer(c_int) :: hipFree
21 type(c_ptr), value :: ptr
22 end function hipFree
23
24 function hipMemcpy(dst, src, size, kind) &
25 bind(c, name = 'hipMemcpy')
26 use iso_c_binding
27 implicit none
28 integer(c_int) :: hipMemcpy
29 type(c_ptr), value :: dst
30 type(c_ptr), intent(in), value :: src
31 integer(c_size_t), value :: size
32 integer(c_int), value :: kind
33 end function hipMemcpy
34
35 function hipMemset(dst, val, size) &
36 bind(c, name = 'hipMemset')
37 use iso_c_binding
38 implicit none
39 integer(c_int) :: hipMemset
40 type(c_ptr), value :: dst
41 integer(c_int), value :: val
42 integer(c_size_t), value :: size
43 end function hipMemset
44
45 function hipDeviceSynchronize() &
46 bind(c, name = 'hipDeviceSynchronize')
47 use iso_c_binding
48 implicit none
49 integer(c_int) :: hipDeviceSynchronize
50 end function hipDeviceSynchronize
51 end interface
52
53 integer, parameter :: hipMemcpyHostToDevice = 1
54 integer, parameter :: hipMemcpyDeviceToHost = 2
55
56 ! hipSPARSE
57 interface
58 function hipsparseCreate(handle) &
59 bind(c, name = 'hipsparseCreate')
60 use iso_c_binding
61 implicit none
62 integer(c_int) :: hipsparseCreate
63 type(c_ptr) :: handle
64 end function hipsparseCreate
65
66 function hipsparseDestroy(handle) &
67 bind(c, name = 'hipsparseDestroy')
68 use iso_c_binding
69 implicit none
70 integer(c_int) :: hipsparseDestroy
71 type(c_ptr), value :: handle
72 end function hipsparseDestroy
73
74 function hipsparseCreateMatDescr(descr) &
75 bind(c, name = 'hipsparseCreateMatDescr')
76 use iso_c_binding
77 implicit none
78 integer(c_int) :: hipsparseCreateMatDescr
79 type(c_ptr) :: descr
80 end function hipsparseCreateMatDescr
81
82 function hipsparseDestroyMatDescr(descr) &
83 bind(c, name = 'hipsparseDestroyMatDescr')
84 use iso_c_binding
85 implicit none
86 integer(c_int) :: hipsparseDestroyMatDescr
87 type(c_ptr), value :: descr
88 end function hipsparseDestroyMatDescr
89
90 function hipsparseScsrmm(handle, transA, m, n, k, nnz, alpha, descrA, csrSortedValA, &
91 csrSortedRowPtrA, csrSortedColIndA, B, ldb, beta, C, ldc) &
92 bind(c, name = 'hipsparseScsrmm')
93 use iso_c_binding
94 implicit none
95 integer(c_int) :: hipsparseScsrmm
96 type(c_ptr), value :: handle
97 integer(c_int), value :: transA
98 integer(c_int), value :: m
99 integer(c_int), value :: n
100 integer(c_int), value :: k
101 integer(c_int), value :: nnz
102 type(c_ptr), intent(in), value :: alpha
103 type(c_ptr), value :: descrA
104 type(c_ptr), intent(in), value :: csrSortedValA
105 type(c_ptr), intent(in), value :: csrSortedRowPtrA
106 type(c_ptr), intent(in), value :: csrSortedColIndA
107 type(c_ptr), intent(in), value :: B
108 integer(c_int), value :: ldb
109 type(c_ptr), intent(in), value :: beta
110 type(c_ptr), value :: C
111 integer(c_int), value :: ldc
112 end function hipsparseScsrmm
113 end interface
114
115 integer, parameter :: HIPSPARSE_OPERATION_NON_TRANSPOSE = 0
116
117 ! Variables
118 type(c_ptr) :: handle
119 type(c_ptr) :: descr
120 integer :: i, j, stat
121
122 ! Matrix A (4x6) in CSR format
123 ! 1 2 0 3 0 0
124 ! A = 0 4 5 0 0 0
125 ! 0 0 0 7 8 0
126 ! 0 0 1 2 4 1
127 integer, parameter :: m = 4
128 integer, parameter :: k = 6
129 integer, parameter :: nnz = 11
130
131 integer, dimension(m+1), target :: hcsrRowPtr = (/0, 3, 5, 7, 11/)
132 integer, dimension(nnz), target :: hcsrColInd = (/0, 1, 3, 1, 2, 3, 4, 2, 3, 4, 5/)
133 real(c_float), dimension(nnz), target :: hcsrVal = (/1.0, 2.0, 3.0, 4.0, 5.0, 7.0, 8.0, 1.0, 2.0, 4.0, 1.0/)
134
135 ! Dense matrix B (6x3) - column major
136 integer, parameter :: n = 3
137 real(c_float), dimension(k*n), target :: hB = (/ &
138 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, &
139 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, &
140 13.0, 14.0, 15.0, 16.0, 17.0, 18.0 /)
141
142 ! Result matrix C (4x3)
143 real(c_float), dimension(m*n), target :: hC
144
145 ! Scalar values
146 real(c_float), target :: alpha = 1.0
147 real(c_float), target :: beta = 0.0
148
149 ! Device pointers
150 type(c_ptr) :: dcsrRowPtr
151 type(c_ptr) :: dcsrColInd
152 type(c_ptr) :: dcsrVal
153 type(c_ptr) :: dB
154 type(c_ptr) :: dC
155
156 ! Create hipSPARSE handle
157 stat = hipsparseCreate(handle)
158 if (stat /= 0) then
159 write(*,*) 'Error: hipsparseCreate failed'
160 stop
161 end if
162
163 ! Create matrix descriptor
164 stat = hipsparseCreateMatDescr(descr)
165 if (stat /= 0) then
166 write(*,*) 'Error: hipsparseCreateMatDescr failed'
167 stop
168 end if
169
170 ! Allocate device memory for CSR matrix A
171 stat = hipMalloc(dcsrRowPtr, int((m + 1) * 4, c_size_t))
172 if (stat /= 0) then
173 write(*,*) 'Error: hipMalloc dcsrRowPtr failed'
174 stop
175 end if
176
177 stat = hipMalloc(dcsrColInd, int(nnz * 4, c_size_t))
178 if (stat /= 0) then
179 write(*,*) 'Error: hipMalloc dcsrColInd failed'
180 stop
181 end if
182
183 stat = hipMalloc(dcsrVal, int(nnz * 4, c_size_t))
184 if (stat /= 0) then
185 write(*,*) 'Error: hipMalloc dcsrVal failed'
186 stop
187 end if
188
189 ! Allocate device memory for dense matrices B and C
190 stat = hipMalloc(dB, int(k * n * 4, c_size_t))
191 if (stat /= 0) then
192 write(*,*) 'Error: hipMalloc dB failed'
193 stop
194 end if
195
196 stat = hipMalloc(dC, int(m * n * 4, c_size_t))
197 if (stat /= 0) then
198 write(*,*) 'Error: hipMalloc dC failed'
199 stop
200 end if
201
202 ! Copy data to device
203 stat = hipMemcpy(dcsrRowPtr, c_loc(hcsrRowPtr), int((m + 1) * 4, c_size_t), hipMemcpyHostToDevice)
204 if (stat /= 0) then
205 write(*,*) 'Error: hipMemcpy dcsrRowPtr failed'
206 stop
207 end if
208
209 stat = hipMemcpy(dcsrColInd, c_loc(hcsrColInd), int(nnz * 4, c_size_t), hipMemcpyHostToDevice)
210 if (stat /= 0) then
211 write(*,*) 'Error: hipMemcpy dcsrColInd failed'
212 stop
213 end if
214
215 stat = hipMemcpy(dcsrVal, c_loc(hcsrVal), int(nnz * 4, c_size_t), hipMemcpyHostToDevice)
216 if (stat /= 0) then
217 write(*,*) 'Error: hipMemcpy dcsrVal failed'
218 stop
219 end if
220
221 stat = hipMemcpy(dB, c_loc(hB), int(k * n * 4, c_size_t), hipMemcpyHostToDevice)
222 if (stat /= 0) then
223 write(*,*) 'Error: hipMemcpy dB failed'
224 stop
225 end if
226
227 ! Perform matrix-matrix multiplication: C = alpha * A * B + beta * C
228 stat = hipsparseScsrmm(handle, &
229 HIPSPARSE_OPERATION_NON_TRANSPOSE, &
230 m, &
231 n, &
232 k, &
233 nnz, &
234 c_loc(alpha), &
235 descr, &
236 dcsrVal, &
237 dcsrRowPtr, &
238 dcsrColInd, &
239 dB, &
240 k, &
241 c_loc(beta), &
242 dC, &
243 m)
244 if (stat /= 0) then
245 write(*,*) 'Error: hipsparseScsrmm failed'
246 stop
247 end if
248
249 ! Copy result back to host
250 stat = hipMemcpy(c_loc(hC), dC, int(m * n * 4, c_size_t), hipMemcpyDeviceToHost)
251 if (stat /= 0) then
252 write(*,*) 'Error: hipMemcpy hC failed'
253 stop
254 end if
255
256 ! Print result
257 write(*,*) 'hC:'
258 do i = 1, m * n
259 write(*,*) hC(i)
260 end do
261
262 ! Clean up
263 stat = hipFree(dcsrRowPtr)
264 stat = hipFree(dcsrColInd)
265 stat = hipFree(dcsrVal)
266 stat = hipFree(dB)
267 stat = hipFree(dC)
268
269 stat = hipsparseDestroyMatDescr(descr)
270 stat = hipsparseDestroy(handle)
271
272end program example_hipsparse_csrmm
hipsparseXcsrmm2()#
-
hipsparseStatus_t hipsparseScsrmm2(hipsparseHandle_t handle, hipsparseOperation_t transA, hipsparseOperation_t transB, int m, int n, int k, int nnz, const float *alpha, const hipsparseMatDescr_t descrA, const float *csrSortedValA, const int *csrSortedRowPtrA, const int *csrSortedColIndA, const float *B, int ldb, const float *beta, float *C, int ldc)#
-
hipsparseStatus_t hipsparseDcsrmm2(hipsparseHandle_t handle, hipsparseOperation_t transA, hipsparseOperation_t transB, int m, int n, int k, int nnz, const double *alpha, const hipsparseMatDescr_t descrA, const double *csrSortedValA, const int *csrSortedRowPtrA, const int *csrSortedColIndA, const double *B, int ldb, const double *beta, double *C, int ldc)#
-
hipsparseStatus_t hipsparseCcsrmm2(hipsparseHandle_t handle, hipsparseOperation_t transA, hipsparseOperation_t transB, int m, int n, int k, int nnz, const hipComplex *alpha, const hipsparseMatDescr_t descrA, const hipComplex *csrSortedValA, const int *csrSortedRowPtrA, const int *csrSortedColIndA, const hipComplex *B, int ldb, const hipComplex *beta, hipComplex *C, int ldc)#
-
hipsparseStatus_t hipsparseZcsrmm2(hipsparseHandle_t handle, hipsparseOperation_t transA, hipsparseOperation_t transB, int m, int n, int k, int nnz, const hipDoubleComplex *alpha, const hipsparseMatDescr_t descrA, const hipDoubleComplex *csrSortedValA, const int *csrSortedRowPtrA, const int *csrSortedColIndA, const hipDoubleComplex *B, int ldb, const hipDoubleComplex *beta, hipDoubleComplex *C, int ldc)#
Sparse matrix dense matrix multiplication using CSR storage format.
hipsparseXcsrmm2multiplies the scalar \(\alpha\) with a sparse \(m \times k\) matrix \(A\), defined in CSR storage format, and the column-oriented dense \(k \times n\) matrix \(B\) and adds the result to the column-oriented dense \(m \times n\) matrix \(C\) that is multiplied by the scalar \(\beta\), such that\[ C := \alpha \cdot op(A) \cdot op(B) + \beta \cdot C, \]with\[\begin{split} op(A) = \left\{ \begin{array}{ll} A, & \text{if transA == HIPSPARSE_OPERATION_NON_TRANSPOSE} \\ A^T, & \text{if transA == HIPSPARSE_OPERATION_TRANSPOSE} \\ A^H, & \text{if transA == HIPSPARSE_OPERATION_CONJUGATE_TRANSPOSE} \end{array} \right. \end{split}\]and\[\begin{split} op(B) = \left\{ \begin{array}{ll} B, & \text{if transB == HIPSPARSE_OPERATION_NON_TRANSPOSE} \\ B^T, & \text{if transB == HIPSPARSE_OPERATION_TRANSPOSE} \\ B^H, & \text{if transB == HIPSPARSE_OPERATION_CONJUGATE_TRANSPOSE} \end{array} \right. \end{split}\]for(i = 0; i < ldc; ++i) { for(j = 0; j < n; ++j) { C[i][j] = beta * C[i][j]; for(k = csrRowPtr[i]; k < csrRowPtr[i + 1]; ++k) { C[i][j] += alpha * csrVal[k] * B[csrColInd[k]][j]; } } }
Note
This function is non blocking and executed asynchronously with respect to the host. It may return before the actual computation has finished.
- Parameters:
handle – [in] handle to the hipsparse library context queue.
transA – [in] matrix \(A\) operation type.
transB – [in] matrix \(B\) operation type.
m – [in] number of rows of the sparse CSR matrix \(A\).
n – [in] number of columns of the dense matrix \(op(B)\) and \(C\).
k – [in] number of columns of the sparse CSR matrix \(A\).
nnz – [in] number of non-zero entries of the sparse CSR matrix \(A\).
alpha – [in] scalar \(\alpha\).
descrA – [in] descriptor of the sparse CSR matrix \(A\). Currently, only HIPSPARSE_MATRIX_TYPE_GENERAL is supported.
csrSortedValA – [in] array of
nnzelements of the sparse CSR matrix \(A\).csrSortedRowPtrA – [in] array of
m+1elements that point to the start of every row of the sparse CSR matrix \(A\).csrSortedColIndA – [in] array of
nnzelements containing the column indices of the sparse CSR matrix \(A\).B – [in] array of dimension
ldb*n( \(op(B) == B\)),ldb*kotherwise.ldb – [in] leading dimension of \(B\), must be at least \(\max{(1, k)}\) ( \(op(B) == B\)), \(\max{(1, n)}\) otherwise.
beta – [in] scalar \(\beta\).
C – [inout] array of dimension
ldc*n.ldc – [in] leading dimension of \(C\), must be at least \(\max{(1, m)}\) ( \(op(A) == A\)), \(\max{(1, k)}\) otherwise.
- Return values:
HIPSPARSE_STATUS_SUCCESS – the operation completed successfully.
HIPSPARSE_STATUS_INVALID_VALUE –
handle,m,n,k,nnz,ldb,ldcdescrA,alpha,csrSortedValA,csrSortedRowPtrA,csrSortedColIndA,B,betaorCis invalid.HIPSPARSE_STATUS_ARCH_MISMATCH – the device is not supported.
HIPSPARSE_STATUS_NOT_SUPPORTED – hipsparseMatrixType_t != HIPSPARSE_MATRIX_TYPE_GENERAL.
hipsparseXbsrsm2_zeroPivot()#
-
hipsparseStatus_t hipsparseXbsrsm2_zeroPivot(hipsparseHandle_t handle, bsrsm2Info_t info, int *position)#
hipsparseXbsrsm2_zeroPivotreturns HIPSPARSE_STATUS_ZERO_PIVOT, if either a structural or numerical zero has been found during hipsparseXbsrsm2_analysis() or hipsparseXbsrsm2_solve() computation. The first zero pivot \(j\) at \(A_{j,j}\) is stored inposition, using same index base as the BSR matrix.positioncan be in host or device memory. If no zero pivot has been found,positionis set to -1 and HIPSPARSE_STATUS_SUCCESS is returned instead.- Deprecated:
This function is deprecated when using the CUDA backend (CUDA 12.0+) and will be removed in CUDA 13.0. This deprecation does not apply to the ROCm backend.
Note
hipsparseXbsrsm2_zeroPivotis a blocking function. It might influence performance negatively.- Parameters:
handle – [in] handle to the hipsparse library context queue.
info – [in] structure that holds the information collected during the analysis step.
position – [inout] pointer to zero pivot \(j\), can be in host or device memory.
- Return values:
HIPSPARSE_STATUS_SUCCESS – the operation completed successfully.
HIPSPARSE_STATUS_NOT_INITIALIZED –
handleis not initialized.HIPSPARSE_STATUS_INVALID_VALUE –
handle,infoorpositionis nullptr.HIPSPARSE_STATUS_INTERNAL_ERROR – an internal error occurred.
HIPSPARSE_STATUS_ZERO_PIVOT – zero pivot has been found.
hipsparseXbsrsm2_bufferSize()#
-
hipsparseStatus_t hipsparseSbsrsm2_bufferSize(hipsparseHandle_t handle, hipsparseDirection_t dirA, hipsparseOperation_t transA, hipsparseOperation_t transX, int mb, int nrhs, int nnzb, const hipsparseMatDescr_t descrA, float *bsrSortedValA, const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, bsrsm2Info_t info, int *pBufferSizeInBytes)#
-
hipsparseStatus_t hipsparseDbsrsm2_bufferSize(hipsparseHandle_t handle, hipsparseDirection_t dirA, hipsparseOperation_t transA, hipsparseOperation_t transX, int mb, int nrhs, int nnzb, const hipsparseMatDescr_t descrA, double *bsrSortedValA, const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, bsrsm2Info_t info, int *pBufferSizeInBytes)#
-
hipsparseStatus_t hipsparseCbsrsm2_bufferSize(hipsparseHandle_t handle, hipsparseDirection_t dirA, hipsparseOperation_t transA, hipsparseOperation_t transX, int mb, int nrhs, int nnzb, const hipsparseMatDescr_t descrA, hipComplex *bsrSortedValA, const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, bsrsm2Info_t info, int *pBufferSizeInBytes)#
-
hipsparseStatus_t hipsparseZbsrsm2_bufferSize(hipsparseHandle_t handle, hipsparseDirection_t dirA, hipsparseOperation_t transA, hipsparseOperation_t transX, int mb, int nrhs, int nnzb, const hipsparseMatDescr_t descrA, hipDoubleComplex *bsrSortedValA, const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, bsrsm2Info_t info, int *pBufferSizeInBytes)#
hipsparseXbsrsm2_buffer_sizereturns the size of the temporary storage buffer in bytes that is required by hipsparseXbsrsm2_analysis() and hipsparseXbsrsm2_solve(). The temporary storage buffer must be allocated by the user.- Parameters:
handle – [in] handle to the hipsparse library context queue.
dirA – [in] matrix storage of BSR blocks.
transA – [in] matrix \(A\) operation type.
transX – [in] matrix \(X\) operation type.
mb – [in] number of block rows of the sparse BSR matrix \(A\).
nrhs – [in] number of columns of the dense matrix \(op(X)\).
nnzb – [in] number of non-zero blocks of the sparse BSR matrix \(A\).
descrA – [in] descriptor of the sparse BSR matrix \(A\).
bsrSortedValA – [in] array of
nnzbblocks of the sparse BSR matrix.bsrSortedRowPtrA – [in] array of
mb+1elements that point to the start of every block row of the sparse BSR matrix.bsrSortedColIndA – [in] array of
nnzbcontaining the block column indices of the sparse BSR matrix.blockDim – [in] block dimension of the sparse BSR matrix.
info – [in] structure that holds the information collected during the analysis step.
pBufferSizeInBytes – [out] number of bytes of the temporary storage buffer required by hipsparseXbsrsm2_analysis() and hipsparseXbsrsm2_solve().
- Return values:
HIPSPARSE_STATUS_SUCCESS – the operation completed successfully.
HIPSPARSE_STATUS_INVALID_VALUE –
handle,mb,nrhs,nnzb,blockDim,descrA,bsrSortedValA,bsrSortedRowPtrA,bsrSortedColIndA,infoorpBufferSizeInBytesis invalid.HIPSPARSE_STATUS_INTERNAL_ERROR – an internal error occurred.
HIPSPARSE_STATUS_NOT_SUPPORTED –
transA== HIPSPARSE_OPERATION_CONJUGATE_TRANSPOSE,transX== HIPSPARSE_OPERATION_CONJUGATE_TRANSPOSE or hipsparseMatrixType_t != HIPSPARSE_MATRIX_TYPE_GENERAL.
hipsparseXbsrsm2_analysis()#
-
hipsparseStatus_t hipsparseSbsrsm2_analysis(hipsparseHandle_t handle, hipsparseDirection_t dirA, hipsparseOperation_t transA, hipsparseOperation_t transX, int mb, int nrhs, int nnzb, const hipsparseMatDescr_t descrA, const float *bsrSortedValA, const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, bsrsm2Info_t info, hipsparseSolvePolicy_t policy, void *pBuffer)#
-
hipsparseStatus_t hipsparseDbsrsm2_analysis(hipsparseHandle_t handle, hipsparseDirection_t dirA, hipsparseOperation_t transA, hipsparseOperation_t transX, int mb, int nrhs, int nnzb, const hipsparseMatDescr_t descrA, const double *bsrSortedValA, const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, bsrsm2Info_t info, hipsparseSolvePolicy_t policy, void *pBuffer)#
-
hipsparseStatus_t hipsparseCbsrsm2_analysis(hipsparseHandle_t handle, hipsparseDirection_t dirA, hipsparseOperation_t transA, hipsparseOperation_t transX, int mb, int nrhs, int nnzb, const hipsparseMatDescr_t descrA, const hipComplex *bsrSortedValA, const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, bsrsm2Info_t info, hipsparseSolvePolicy_t policy, void *pBuffer)#
-
hipsparseStatus_t hipsparseZbsrsm2_analysis(hipsparseHandle_t handle, hipsparseDirection_t dirA, hipsparseOperation_t transA, hipsparseOperation_t transX, int mb, int nrhs, int nnzb, const hipsparseMatDescr_t descrA, const hipDoubleComplex *bsrSortedValA, const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, bsrsm2Info_t info, hipsparseSolvePolicy_t policy, void *pBuffer)#
Sparse triangular system solve using BSR storage format.
hipsparseXbsrsm2_analysisperforms the analysis step for hipsparseXbsrsm2_solve(). It is expected that this function will be executed only once for a given matrix and particular operation type.Note
If the matrix sparsity pattern changes, the gathered information will become invalid.
Note
This function is non blocking and executed asynchronously with respect to the host. It may return before the actual computation has finished.
- Parameters:
handle – [in] handle to the hipsparse library context queue.
dirA – [in] matrix storage of BSR blocks.
transA – [in] matrix \(A\) operation type.
transX – [in] matrix \(X\) operation type.
mb – [in] number of block rows of the sparse BSR matrix \(A\).
nrhs – [in] number of columns of the dense matrix \(op(X)\).
nnzb – [in] number of non-zero blocks of the sparse BSR matrix \(A\).
descrA – [in] descriptor of the sparse BSR matrix \(A\).
bsrSortedValA – [in] array of
nnzbblocks of the sparse BSR matrix \(A\).bsrSortedRowPtrA – [in] array of
mb+1elements that point to the start of every block row of the sparse BSR matrix \(A\).bsrSortedColIndA – [in] array of
nnzbcontaining the block column indices of the sparse BSR matrix \(A\).blockDim – [in] block dimension of the sparse BSR matrix \(A\).
info – [out] structure that holds the information collected during the analysis step.
policy – [in] HIPSPARSE_SOLVE_POLICY_NO_LEVEL or HIPSPARSE_SOLVE_POLICY_USE_LEVEL.
pBuffer – [in] temporary storage buffer allocated by the user.
- Return values:
HIPSPARSE_STATUS_SUCCESS – the operation completed successfully.
HIPSPARSE_STATUS_INVALID_VALUE –
handle,mb,nrhs,nnzborblockDim,descrA,bsrSortedValA,bsrSortedRowPtrA,bsrSortedColIndA,infoorpBufferis invalid.HIPSPARSE_STATUS_INTERNAL_ERROR – an internal error occurred.
HIPSPARSE_STATUS_NOT_SUPPORTED –
transA== HIPSPARSE_OPERATION_CONJUGATE_TRANSPOSE,transX== HIPSPARSE_OPERATION_CONJUGATE_TRANSPOSE or hipsparseMatrixType_t != HIPSPARSE_MATRIX_TYPE_GENERAL.
hipsparseXbsrsm2_solve()#
-
hipsparseStatus_t hipsparseSbsrsm2_solve(hipsparseHandle_t handle, hipsparseDirection_t dirA, hipsparseOperation_t transA, hipsparseOperation_t transX, int mb, int nrhs, int nnzb, const float *alpha, const hipsparseMatDescr_t descrA, const float *bsrSortedValA, const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, bsrsm2Info_t info, const float *B, int ldb, float *X, int ldx, hipsparseSolvePolicy_t policy, void *pBuffer)#
-
hipsparseStatus_t hipsparseDbsrsm2_solve(hipsparseHandle_t handle, hipsparseDirection_t dirA, hipsparseOperation_t transA, hipsparseOperation_t transX, int mb, int nrhs, int nnzb, const double *alpha, const hipsparseMatDescr_t descrA, const double *bsrSortedValA, const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, bsrsm2Info_t info, const double *B, int ldb, double *X, int ldx, hipsparseSolvePolicy_t policy, void *pBuffer)#
-
hipsparseStatus_t hipsparseCbsrsm2_solve(hipsparseHandle_t handle, hipsparseDirection_t dirA, hipsparseOperation_t transA, hipsparseOperation_t transX, int mb, int nrhs, int nnzb, const hipComplex *alpha, const hipsparseMatDescr_t descrA, const hipComplex *bsrSortedValA, const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, bsrsm2Info_t info, const hipComplex *B, int ldb, hipComplex *X, int ldx, hipsparseSolvePolicy_t policy, void *pBuffer)#
-
hipsparseStatus_t hipsparseZbsrsm2_solve(hipsparseHandle_t handle, hipsparseDirection_t dirA, hipsparseOperation_t transA, hipsparseOperation_t transX, int mb, int nrhs, int nnzb, const hipDoubleComplex *alpha, const hipsparseMatDescr_t descrA, const hipDoubleComplex *bsrSortedValA, const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, bsrsm2Info_t info, const hipDoubleComplex *B, int ldb, hipDoubleComplex *X, int ldx, hipsparseSolvePolicy_t policy, void *pBuffer)#
Sparse triangular system solve using BSR storage format.
hipsparseXbsrsm2_solvesolves a sparse triangular linear system of a sparse \(m \times m\) matrix, defined in BSR storage format, a column-oriented dense solution matrix \(X\) and the column-oriented dense right-hand side matrix \(B\) that is multiplied by \(\alpha\), such that\[ op(A) \cdot op(X) = \alpha \cdot op(B), \]with\[\begin{split} op(A) = \left\{ \begin{array}{ll} A, & \text{if transA == HIPSPARSE_OPERATION_NON_TRANSPOSE} \\ A^T, & \text{if transA == HIPSPARSE_OPERATION_TRANSPOSE} \\ A^H, & \text{if transA == HIPSPARSE_OPERATION_CONJUGATE_TRANSPOSE} \end{array} \right. \end{split}\],\[\begin{split} op(B) = \left\{ \begin{array}{ll} B, & \text{if transX == HIPSPARSE_OPERATION_NON_TRANSPOSE} \\ B^T, & \text{if transX == HIPSPARSE_OPERATION_TRANSPOSE} \\ B^H, & \text{if transX == HIPSPARSE_OPERATION_CONJUGATE_TRANSPOSE} \end{array} \right. \end{split}\]and\[\begin{split} op(X) = \left\{ \begin{array}{ll} X, & \text{if transX == HIPSPARSE_OPERATION_NON_TRANSPOSE} \\ X^T, & \text{if transX == HIPSPARSE_OPERATION_TRANSPOSE} \\ X^H, & \text{if transX == HIPSPARSE_OPERATION_CONJUGATE_TRANSPOSE} \end{array} \right. \end{split}\]and where \(m = blockDim \times mb\).Note that as indicated above, the operation type of both \(op(B)\) and \(op(X)\) is specified by the
transXparameter and that the operation type of \(B\) and \(X\) must match. For example, if \(op(B)=B\) then \(op(X)=X\). Likewise, if \(op(B)=B^T\) then \(op(X)=X^T\).Given that the sparse matrix \(A\) is a square matrix, its size is \(m \times m\) regardless of whether \(A\) is transposed or not. The size of the column-oriented dense matrices \(B\) and \(X\) have size that depends on the value of
transX:\[\begin{split} op(B) = \left\{ \begin{array}{ll} ldb \times nrhs, \text{ } ldb \ge m, & \text{if transX == HIPSPARSE_OPERATION_NON_TRANSPOSE} \\ ldb \times m, \text{ } ldb \ge nrhs, & \text{if transX == HIPSPARSE_OPERATION_TRANSPOSE} \\ ldb \times m, \text{ } ldb \ge nrhs, & \text{if transX == HIPSPARSE_OPERATION_CONJUGATE_TRANSPOSE} \end{array} \right. \end{split}\]and\[\begin{split} op(X) = \left\{ \begin{array}{ll} ldb \times nrhs, \text{ } ldb \ge m, & \text{if transX == HIPSPARSE_OPERATION_NON_TRANSPOSE} \\ ldb \times m, \text{ } ldb \ge nrhs, & \text{if transX == HIPSPARSE_OPERATION_TRANSPOSE} \\ ldb \times m, \text{ } ldb \ge nrhs, & \text{if transX == HIPSPARSE_OPERATION_CONJUGATE_TRANSPOSE} \end{array} \right. \end{split}\]hipsparseXbsrsm2_solverequires a user allocated temporary buffer. Its size is returned by hipsparseXbsrsm2_bufferSize(). The size of the required buffer is larger whentransAequals HIPSPARSE_OPERATION_TRANSPOSE or HIPSPARSE_OPERATION_CONJUGATE_TRANSPOSE and whentransXis HIPSPARSE_OPERATION_NON_TRANSPOSE. The subsequent solve will also be faster when \(A\) is non-transposed and \(B\) is transposed (or conjugate transposed). For example, instead of solving:\[\begin{split} \left[ \begin{array}{c | c} \begin{array}{c c} a_{00} & a_{01} \\ a_{10} & a_{11} \end{array} & \begin{array}{c c} 0 & 0 \\ 0 & 0 \end{array} \\ \hline \begin{array}{c c} a_{20} & a_{21} \\ a_{30} & a_{31} \end{array} & \begin{array}{c c} a_{22} & a_{23} \\ a_{32} & a_{33} \end{array} \\ \end{array} \right] \cdot \begin{bmatrix} x_{00} & x_{01} \\ x_{10} & x_{11} \\ x_{20} & x_{21} \\ x_{30} & x_{31} \\ \end{bmatrix} = \begin{bmatrix} b_{00} & b_{01} \\ b_{10} & b_{11} \\ b_{20} & b_{21} \\ b_{30} & b_{31} \\ \end{bmatrix} \end{split}\]Consider solving:
\[\begin{split} \left[ \begin{array}{c | c} \begin{array}{c c} a_{00} & a_{01} \\ a_{10} & a_{11} \end{array} & \begin{array}{c c} 0 & 0 \\ 0 & 0 \end{array} \\ \hline \begin{array}{c c} a_{20} & a_{21} \\ a_{30} & a_{31} \end{array} & \begin{array}{c c} a_{22} & a_{23} \\ a_{32} & a_{33} \end{array} \\ \end{array} \right] \cdot \begin{bmatrix} x_{00} & x_{10} & x_{20} & x_{30} \\ x_{01} & x_{11} & x_{21} & x_{31} \end{bmatrix}^{T} = \begin{bmatrix} b_{00} & b_{10} & b_{20} & b_{30} \\ b_{01} & b_{11} & b_{21} & b_{31} \end{bmatrix}^{T} \end{split}\]Once the temporary storage buffer has been allocated, analysis meta data is required. It can be obtained by hipsparseSbsrsm2_analysis “hipsparseXbsrsm2_analysis()”. The triangular solve is completed by calling
hipsparseXbsrsm2_solveand once all solves are performed, the temporary storage buffer allocated by the user can be freed.Solving a triangular system involves inverting the diagonal blocks. This means that if the sparse matrix is missing the diagonal block (referred to as a structural zero) or the diagonal block is not invertible (referred to as a numerical zero) then a solution is not possible.
hipsparseXbsrsm2_solvetracks the location of the first zero pivot (either numerical or structural zero). The zero pivot status can be checked calling hipsparseXbsrsm2_zeroPivot(). If hipsparseXbsrsm2_zeroPivot() returns HIPSPARSE_STATUS_SUCCESS, then no zero pivot was found and therefore the matrix does not have a structural or numerical zero.The user can specify that the sparse matrix should be interpreted as having identity blocks on the diagonal by setting the diagonal type on the descriptor
descrAto HIPSPARSE_DIAG_TYPE_UNIT using hipsparseSetMatDiagType. If hipsparseDiagType_t == HIPSPARSE_DIAG_TYPE_UNIT, no zero pivot will be reported, even if the diagonal block \(A_{j,j}\) for some \(j\) is not invertible.The sparse CSR matrix passed to
hipsparseXbsrsm2_solvedoes not actually have to be a triangular matrix. Instead the triangular upper or lower part of the sparse matrix is solved based on hipsparseFillMode_t set on the descriptordescrA. If the fill mode is set to HIPSPARSE_FILL_MODE_LOWER, then the lower triangular matrix is solved. If the fill mode is set to HIPSPARSE_FILL_MODE_UPPER then the upper triangular matrix is solved.Note
The sparse BSR matrix has to be sorted.
Note
Operation type of B and X must match, if \(op(B)=B, op(X)=X\).
Note
This function is non blocking and executed asynchronously with respect to the host. It may return before the actual computation has finished.
Note
Currently, only
transA!= HIPSPARSE_OPERATION_CONJUGATE_TRANSPOSE andtransX!= HIPSPARSE_OPERATION_CONJUGATE_TRANSPOSE is supported.- Parameters:
handle – [in] handle to the hipsparse library context queue.
dirA – [in] matrix storage of BSR blocks.
transA – [in] matrix \(A\) operation type.
transX – [in] matrix \(X\) operation type.
mb – [in] number of block rows of the sparse BSR matrix \(A\).
nrhs – [in] number of columns of the dense matrix \(op(X)\).
nnzb – [in] number of non-zero blocks of the sparse BSR matrix \(A\).
alpha – [in] scalar \(\alpha\).
descrA – [in] descriptor of the sparse BSR matrix \(A\).
bsrSortedValA – [in] array of
nnzbblocks of the sparse BSR matrix.bsrSortedRowPtrA – [in] array of
mb+1elements that point to the start of every block row of the sparse BSR matrix.bsrSortedColIndA – [in] array of
nnzbcontaining the block column indices of the sparse BSR matrix.blockDim – [in] block dimension of the sparse BSR matrix.
info – [in] structure that holds the information collected during the analysis step.
B – [in] rhs matrix B with leading dimension
ldb.ldb – [in] leading dimension of rhs matrix \(B\).
X – [out] solution matrix X with leading dimension
ldx.ldx – [in] leading dimension of solution matrix \(X\).
policy – [in] HIPSPARSE_SOLVE_POLICY_NO_LEVEL or HIPSPARSE_SOLVE_POLICY_USE_LEVEL.
pBuffer – [in] temporary storage buffer allocated by the user.
- Return values:
HIPSPARSE_STATUS_SUCCESS – the operation completed successfully.
HIPSPARSE_STATUS_INVALID_VALUE –
handle,mb,nrhs,nnzb,blockDim,alpha,descrA,bsrSortedValA,bsrSortedRowPtrA,bsrSortedColIndA,B,XinfoorpBufferis invalid.HIPSPARSE_STATUS_INTERNAL_ERROR – an internal error occurred.
HIPSPARSE_STATUS_NOT_SUPPORTED –
transA== HIPSPARSE_OPERATION_CONJUGATE_TRANSPOSE,transX== HIPSPARSE_OPERATION_CONJUGATE_TRANSPOSE or hipsparseMatrixType_t != HIPSPARSE_MATRIX_TYPE_GENERAL.
1int main(int argc, char* argv[])
2{
3 // hipSPARSE handle
4 hipsparseHandle_t handle;
5 HIPSPARSE_CHECK(hipsparseCreate(&handle));
6
7 // A = ( 1.0 0.0 0.0 0.0 )
8 // ( 2.0 3.0 0.0 0.0 )
9 // ( 4.0 5.0 6.0 0.0 )
10 // ( 7.0 0.0 8.0 9.0 )
11 //
12 // with bsr_dim = 2
13 //
14 // -------------------
15 // = | 1.0 0.0 | 0.0 0.0 |
16 // | 2.0 3.0 | 0.0 0.0 |
17 // -------------------
18 // | 4.0 5.0 | 6.0 0.0 |
19 // | 7.0 0.0 | 8.0 9.0 |
20 // -------------------
21
22 // Number of rows and columns
23 const int m = 4;
24
25 // Number of block rows and block columns
26 const int mb = 2;
27 const int nb = 2;
28
29 // BSR block dimension
30 const int bsr_dim = 2;
31
32 // Number of right-hand-sides
33 const int nrhs = 4;
34
35 // Number of non-zero blocks
36 const int nnzb = 3;
37
38 // BSR row pointers
39 std::vector<int> hbsrRowPtr = {0, 1, 3};
40
41 // BSR column indices
42 std::vector<int> hbsrColInd = {0, 0, 1};
43
44 // BSR values
45 std::vector<double> hbsrVal = {1.0, 2.0, 0.0, 3.0, 4.0, 7.0, 5.0, 0.0, 6.0, 8.0, 0.0, 9.0};
46
47 // Storage scheme of the BSR blocks
48 hipsparseDirection_t dir = HIPSPARSE_DIRECTION_COLUMN;
49
50 // Transposition of the matrix and rhs matrix
51 hipsparseOperation_t transA = HIPSPARSE_OPERATION_NON_TRANSPOSE;
52 hipsparseOperation_t transX = HIPSPARSE_OPERATION_NON_TRANSPOSE;
53
54 // Solve policy
55 hipsparseSolvePolicy_t solve_policy = HIPSPARSE_SOLVE_POLICY_NO_LEVEL;
56
57 // Scalar alpha and beta
58 double alpha = 1.0;
59
60 // rhs and solution matrix
61 const int ldb = nb * bsr_dim;
62 const int ldx = mb * bsr_dim;
63
64 std::vector<double> hB = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
65 std::vector<double> hX(ldx * nrhs);
66
67 // Offload data to device
68 int* dbsrRowPtr;
69 int* dbsrColInd;
70 double* dbsrVal;
71 double* dB;
72 double* dX;
73
74 HIP_CHECK(hipMalloc((void**)&dbsrRowPtr, sizeof(int) * (mb + 1)));
75 HIP_CHECK(hipMalloc((void**)&dbsrColInd, sizeof(int) * nnzb));
76 HIP_CHECK(hipMalloc((void**)&dbsrVal, sizeof(double) * nnzb * bsr_dim * bsr_dim));
77 HIP_CHECK(hipMalloc((void**)&dB, sizeof(double) * nb * bsr_dim * nrhs));
78 HIP_CHECK(hipMalloc((void**)&dX, sizeof(double) * mb * bsr_dim * nrhs));
79
80 HIP_CHECK(
81 hipMemcpy(dbsrRowPtr, hbsrRowPtr.data(), sizeof(int) * (mb + 1), hipMemcpyHostToDevice));
82 HIP_CHECK(hipMemcpy(dbsrColInd, hbsrColInd.data(), sizeof(int) * nnzb, hipMemcpyHostToDevice));
83 HIP_CHECK(hipMemcpy(
84 dbsrVal, hbsrVal.data(), sizeof(double) * nnzb * bsr_dim * bsr_dim, hipMemcpyHostToDevice));
85 HIP_CHECK(
86 hipMemcpy(dB, hB.data(), sizeof(double) * nb * bsr_dim * nrhs, hipMemcpyHostToDevice));
87
88 // Matrix descriptor
89 hipsparseMatDescr_t descr;
90 HIPSPARSE_CHECK(hipsparseCreateMatDescr(&descr));
91
92 // Matrix fill mode
93 HIPSPARSE_CHECK(hipsparseSetMatFillMode(descr, HIPSPARSE_FILL_MODE_LOWER));
94
95 // Matrix diagonal type
96 HIPSPARSE_CHECK(hipsparseSetMatDiagType(descr, HIPSPARSE_DIAG_TYPE_NON_UNIT));
97
98 // Matrix info structure
99 bsrsm2Info_t info;
100 HIPSPARSE_CHECK(hipsparseCreateBsrsm2Info(&info));
101
102 // Obtain required buffer size
103 int buffer_size;
104 HIPSPARSE_CHECK(hipsparseDbsrsm2_bufferSize(handle,
105 dir,
106 transA,
107 transX,
108 mb,
109 nrhs,
110 nnzb,
111 descr,
112 dbsrVal,
113 dbsrRowPtr,
114 dbsrColInd,
115 bsr_dim,
116 info,
117 &buffer_size));
118
119 // Allocate temporary buffer
120 void* dbuffer;
121 HIP_CHECK(hipMalloc(&dbuffer, buffer_size));
122
123 // Perform analysis step
124 HIPSPARSE_CHECK(hipsparseDbsrsm2_analysis(handle,
125 dir,
126 transA,
127 transX,
128 mb,
129 nrhs,
130 nnzb,
131 descr,
132 dbsrVal,
133 dbsrRowPtr,
134 dbsrColInd,
135 bsr_dim,
136 info,
137 solve_policy,
138 dbuffer));
139
140 // Call dbsrsm to perform lower triangular solve LX = B
141 HIPSPARSE_CHECK(hipsparseDbsrsm2_solve(handle,
142 dir,
143 transA,
144 transX,
145 mb,
146 nrhs,
147 nnzb,
148 &alpha,
149 descr,
150 dbsrVal,
151 dbsrRowPtr,
152 dbsrColInd,
153 bsr_dim,
154 info,
155 dB,
156 ldb,
157 dX,
158 ldx,
159 solve_policy,
160 dbuffer));
161
162 // Check for zero pivots
163 int pivot;
164 hipsparseStatus_t status = hipsparseXbsrsm2_zeroPivot(handle, info, &pivot);
165
166 if(status == HIPSPARSE_STATUS_ZERO_PIVOT)
167 {
168 std::cout << "Found zero pivot in matrix row " << pivot << std::endl;
169 }
170
171 // Copy result back to host
172 HIP_CHECK(
173 hipMemcpy(hX.data(), dX, sizeof(double) * mb * bsr_dim * nrhs, hipMemcpyDeviceToHost));
174
175 std::cout << "hX" << std::endl;
176 for(int i = 0; i < ldx * nrhs; i++)
177 {
178 std::cout << hX[i] << " ";
179 }
180 std::cout << std::endl;
181
182 // Clear hipSPARSE
183 HIPSPARSE_CHECK(hipsparseDestroyBsrsm2Info(info));
184 HIPSPARSE_CHECK(hipsparseDestroyMatDescr(descr));
185 HIPSPARSE_CHECK(hipsparseDestroy(handle));
186
187 // Clear device memory
188 HIP_CHECK(hipFree(dbsrRowPtr));
189 HIP_CHECK(hipFree(dbsrColInd));
190 HIP_CHECK(hipFree(dbsrVal));
191 HIP_CHECK(hipFree(dB));
192 HIP_CHECK(hipFree(dX));
193 HIP_CHECK(hipFree(dbuffer));
194
195 return 0;
196}
1int main(int argc, char* argv[])
2{
3 // hipSPARSE handle
4 hipsparseHandle_t handle;
5 HIPSPARSE_CHECK(hipsparseCreate(&handle));
6
7 // A = ( 1.0 0.0 0.0 0.0 )
8 // ( 2.0 3.0 0.0 0.0 )
9 // ( 4.0 5.0 6.0 0.0 )
10 // ( 7.0 0.0 8.0 9.0 )
11 //
12 // with bsr_dim = 2
13 //
14 // -------------------
15 // = | 1.0 0.0 | 0.0 0.0 |
16 // | 2.0 3.0 | 0.0 0.0 |
17 // -------------------
18 // | 4.0 5.0 | 6.0 0.0 |
19 // | 7.0 0.0 | 8.0 9.0 |
20 // -------------------
21
22 // Number of rows and columns
23 const int m = 4;
24
25 // Number of block rows and block columns
26 const int mb = 2;
27 const int nb = 2;
28
29 // BSR block dimension
30 const int bsr_dim = 2;
31
32 // Number of right-hand-sides
33 const int nrhs = 4;
34
35 // Number of non-zero blocks
36 const int nnzb = 3;
37
38 // BSR row pointers
39 int hbsrRowPtr[] = {0, 1, 3};
40
41 // BSR column indices
42 int hbsrColInd[] = {0, 0, 1};
43
44 // BSR values
45 double hbsrVal[] = {1.0, 2.0, 0.0, 3.0, 4.0, 7.0, 5.0, 0.0, 6.0, 8.0, 0.0, 9.0};
46
47 // Storage scheme of the BSR blocks
48 hipsparseDirection_t dir = HIPSPARSE_DIRECTION_COLUMN;
49
50 // Transposition of the matrix and rhs matrix
51 hipsparseOperation_t transA = HIPSPARSE_OPERATION_NON_TRANSPOSE;
52 hipsparseOperation_t transX = HIPSPARSE_OPERATION_NON_TRANSPOSE;
53
54 // Solve policy
55 hipsparseSolvePolicy_t solve_policy = HIPSPARSE_SOLVE_POLICY_NO_LEVEL;
56
57 // Scalar alpha and beta
58 double alpha = 1.0;
59
60 // rhs and solution matrix
61 const int ldb = nb * bsr_dim;
62 const int ldx = mb * bsr_dim;
63
64 double hB[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
65 double hX[ldx * nrhs];
66
67 // Offload data to device
68 int* dbsrRowPtr;
69 int* dbsrColInd;
70 double* dbsrVal;
71 double* dB;
72 double* dX;
73
74 HIP_CHECK(hipMalloc((void**)&dbsrRowPtr, sizeof(int) * (mb + 1)));
75 HIP_CHECK(hipMalloc((void**)&dbsrColInd, sizeof(int) * nnzb));
76 HIP_CHECK(hipMalloc((void**)&dbsrVal, sizeof(double) * nnzb * bsr_dim * bsr_dim));
77 HIP_CHECK(hipMalloc((void**)&dB, sizeof(double) * nb * bsr_dim * nrhs));
78 HIP_CHECK(hipMalloc((void**)&dX, sizeof(double) * mb * bsr_dim * nrhs));
79
80 HIP_CHECK(hipMemcpy(dbsrRowPtr, hbsrRowPtr, sizeof(int) * (mb + 1), hipMemcpyHostToDevice));
81 HIP_CHECK(hipMemcpy(dbsrColInd, hbsrColInd, sizeof(int) * nnzb, hipMemcpyHostToDevice));
82 HIP_CHECK(hipMemcpy(
83 dbsrVal, hbsrVal, sizeof(double) * nnzb * bsr_dim * bsr_dim, hipMemcpyHostToDevice));
84 HIP_CHECK(hipMemcpy(dB, hB, sizeof(double) * nb * bsr_dim * nrhs, hipMemcpyHostToDevice));
85
86 // Matrix descriptor
87 hipsparseMatDescr_t descr;
88 HIPSPARSE_CHECK(hipsparseCreateMatDescr(&descr));
89
90 // Matrix fill mode
91 HIPSPARSE_CHECK(hipsparseSetMatFillMode(descr, HIPSPARSE_FILL_MODE_LOWER));
92
93 // Matrix diagonal type
94 HIPSPARSE_CHECK(hipsparseSetMatDiagType(descr, HIPSPARSE_DIAG_TYPE_NON_UNIT));
95
96 // Matrix info structure
97 bsrsm2Info_t info;
98 HIPSPARSE_CHECK(hipsparseCreateBsrsm2Info(&info));
99
100 // Obtain required buffer size
101 int buffer_size;
102 HIPSPARSE_CHECK(hipsparseDbsrsm2_bufferSize(handle,
103 dir,
104 transA,
105 transX,
106 mb,
107 nrhs,
108 nnzb,
109 descr,
110 dbsrVal,
111 dbsrRowPtr,
112 dbsrColInd,
113 bsr_dim,
114 info,
115 &buffer_size));
116
117 // Allocate temporary buffer
118 void* dbuffer;
119 HIP_CHECK(hipMalloc(&dbuffer, buffer_size));
120
121 // Perform analysis step
122 HIPSPARSE_CHECK(hipsparseDbsrsm2_analysis(handle,
123 dir,
124 transA,
125 transX,
126 mb,
127 nrhs,
128 nnzb,
129 descr,
130 dbsrVal,
131 dbsrRowPtr,
132 dbsrColInd,
133 bsr_dim,
134 info,
135 solve_policy,
136 dbuffer));
137
138 // Call dbsrsm to perform lower triangular solve LX = B
139 HIPSPARSE_CHECK(hipsparseDbsrsm2_solve(handle,
140 dir,
141 transA,
142 transX,
143 mb,
144 nrhs,
145 nnzb,
146 &alpha,
147 descr,
148 dbsrVal,
149 dbsrRowPtr,
150 dbsrColInd,
151 bsr_dim,
152 info,
153 dB,
154 ldb,
155 dX,
156 ldx,
157 solve_policy,
158 dbuffer));
159
160 // Check for zero pivots
161 int pivot;
162 hipsparseStatus_t status = hipsparseXbsrsm2_zeroPivot(handle, info, &pivot);
163
164 if(status == HIPSPARSE_STATUS_ZERO_PIVOT)
165 {
166 printf("Found zero pivot in matrix row %d\n", pivot);
167 }
168
169 // Copy result back to host
170 HIP_CHECK(hipMemcpy(hX, dX, sizeof(double) * mb * bsr_dim * nrhs, hipMemcpyDeviceToHost));
171
172 printf("hX\n");
173 for(int i = 0; i < ldx * nrhs; i++)
174 {
175 printf("%f ", hX[i]);
176 }
177 printf("\n");
178
179 // Clear hipSPARSE
180 HIPSPARSE_CHECK(hipsparseDestroyBsrsm2Info(info));
181 HIPSPARSE_CHECK(hipsparseDestroyMatDescr(descr));
182 HIPSPARSE_CHECK(hipsparseDestroy(handle));
183
184 // Clear device memory
185 HIP_CHECK(hipFree(dbsrRowPtr));
186 HIP_CHECK(hipFree(dbsrColInd));
187 HIP_CHECK(hipFree(dbsrVal));
188 HIP_CHECK(hipFree(dB));
189 HIP_CHECK(hipFree(dX));
190 HIP_CHECK(hipFree(dbuffer));
191
192 return 0;
193}
hipsparseXcsrsm2_zeroPivot()#
-
hipsparseStatus_t hipsparseXcsrsm2_zeroPivot(hipsparseHandle_t handle, csrsm2Info_t info, int *position)#
hipsparseXcsrsm2_zeroPivotreturns HIPSPARSE_STATUS_ZERO_PIVOT, if either a structural or numerical zero has been found during hipsparseXcsrsm2_analysis() or hipsparseXcsrsm2_solve() computation. The first zero pivot \(j\) at \(A_{j,j}\) is stored inposition, using same index base as the CSR matrix.positioncan be in host or device memory. If no zero pivot has been found,positionis set to -1 and HIPSPARSE_STATUS_SUCCESS is returned instead.- Deprecated:
This function is deprecated when using the CUDA backend (CUDA 11.0+) and will be removed in CUDA 12.0. This deprecation does not apply to the ROCm backend.
Note
hipsparseXcsrsm2_zeroPivotis a blocking function. It might influence performance negatively.- Parameters:
handle – [in] handle to the hipsparse library context queue.
info – [in] structure that holds the information collected during the analysis step.
position – [inout] pointer to zero pivot \(j\), can be in host or device memory.
- Return values:
HIPSPARSE_STATUS_SUCCESS – the operation completed successfully.
HIPSPARSE_STATUS_NOT_INITIALIZED –
handleis not initialized.HIPSPARSE_STATUS_INVALID_VALUE –
handle,infoorpositionis nullptr.HIPSPARSE_STATUS_INTERNAL_ERROR – an internal error occurred.
HIPSPARSE_STATUS_ZERO_PIVOT – zero pivot has been found.
hipsparseXcsrsm2_bufferSizeExt()#
-
hipsparseStatus_t hipsparseScsrsm2_bufferSizeExt(hipsparseHandle_t handle, int algo, hipsparseOperation_t transA, hipsparseOperation_t transB, int m, int nrhs, int nnz, const float *alpha, const hipsparseMatDescr_t descrA, const float *csrSortedValA, const int *csrSortedRowPtrA, const int *csrSortedColIndA, const float *B, int ldb, csrsm2Info_t info, hipsparseSolvePolicy_t policy, size_t *pBufferSizeInBytes)#
-
hipsparseStatus_t hipsparseDcsrsm2_bufferSizeExt(hipsparseHandle_t handle, int algo, hipsparseOperation_t transA, hipsparseOperation_t transB, int m, int nrhs, int nnz, const double *alpha, const hipsparseMatDescr_t descrA, const double *csrSortedValA, const int *csrSortedRowPtrA, const int *csrSortedColIndA, const double *B, int ldb, csrsm2Info_t info, hipsparseSolvePolicy_t policy, size_t *pBufferSizeInBytes)#
-
hipsparseStatus_t hipsparseCcsrsm2_bufferSizeExt(hipsparseHandle_t handle, int algo, hipsparseOperation_t transA, hipsparseOperation_t transB, int m, int nrhs, int nnz, const hipComplex *alpha, const hipsparseMatDescr_t descrA, const hipComplex *csrSortedValA, const int *csrSortedRowPtrA, const int *csrSortedColIndA, const hipComplex *B, int ldb, csrsm2Info_t info, hipsparseSolvePolicy_t policy, size_t *pBufferSizeInBytes)#
-
hipsparseStatus_t hipsparseZcsrsm2_bufferSizeExt(hipsparseHandle_t handle, int algo, hipsparseOperation_t transA, hipsparseOperation_t transB, int m, int nrhs, int nnz, const hipDoubleComplex *alpha, const hipsparseMatDescr_t descrA, const hipDoubleComplex *csrSortedValA, const int *csrSortedRowPtrA, const int *csrSortedColIndA, const hipDoubleComplex *B, int ldb, csrsm2Info_t info, hipsparseSolvePolicy_t policy, size_t *pBufferSizeInBytes)#
hipsparseXcsrsm2_bufferSizeExtreturns the size of the temporary storage buffer in bytes that is required by hipsparseXcsrsm2_analysis() and hipsparseXcsrsm2_solve(). The temporary storage buffer must be allocated by the user.- Parameters:
handle – [in] handle to the hipsparse library context queue.
algo – [in] algorithm to use.
transA – [in] matrix \(A\) operation type.
transB – [in] matrix \(B\) operation type.
m – [in] number of rows of the sparse CSR matrix \(A\).
nrhs – [in] number of columns of the dense matrix \(op(B)\).
nnz – [in] number of non-zero entries of the sparse CSR matrix \(A\).
alpha – [in] scalar \(\alpha\).
descrA – [in] descriptor of the sparse CSR matrix \(A\).
csrSortedValA – [in] array of
nnzelements of the sparse CSR matrix \(A\).csrSortedRowPtrA – [in] array of
m+1elements that point to the start of every row of the sparse CSR matrix \(A\).csrSortedColIndA – [in] array of
nnzelements containing the column indices of the sparse CSR matrix \(A\).B – [in] array of
m\(\times\)nrhselements of the rhs matrix \(B\).ldb – [in] leading dimension of rhs matrix \(B\).
info – [in] structure that holds the information collected during the analysis step.
policy – [in] HIPSPARSE_SOLVE_POLICY_NO_LEVEL or HIPSPARSE_SOLVE_POLICY_USE_LEVEL.
pBufferSizeInBytes – [out] number of bytes of the temporary storage buffer required by hipsparseXcsrsm2_analysis() and hipsparseXcsrsm2_solve().
- Return values:
HIPSPARSE_STATUS_SUCCESS – the operation completed successfully.
HIPSPARSE_STATUS_INVALID_VALUE –
handle,m,nrhs,nnz,alpha,descrA,csrSortedValA,csrSortedRowPtrA,csrSortedColIndA,B,infoorpBufferSizeInBytesis invalid.HIPSPARSE_STATUS_INTERNAL_ERROR – an internal error occurred.
HIPSPARSE_STATUS_NOT_SUPPORTED –
transA== HIPSPARSE_OPERATION_CONJUGATE_TRANSPOSE,transB== HIPSPARSE_OPERATION_CONJUGATE_TRANSPOSE or hipsparseMatrixType_t != HIPSPARSE_MATRIX_TYPE_GENERAL.
hipsparseXcsrsm2_analysis()#
-
hipsparseStatus_t hipsparseScsrsm2_analysis(hipsparseHandle_t handle, int algo, hipsparseOperation_t transA, hipsparseOperation_t transB, int m, int nrhs, int nnz, const float *alpha, const hipsparseMatDescr_t descrA, const float *csrSortedValA, const int *csrSortedRowPtrA, const int *csrSortedColIndA, const float *B, int ldb, csrsm2Info_t info, hipsparseSolvePolicy_t policy, void *pBuffer)#
-
hipsparseStatus_t hipsparseDcsrsm2_analysis(hipsparseHandle_t handle, int algo, hipsparseOperation_t transA, hipsparseOperation_t transB, int m, int nrhs, int nnz, const double *alpha, const hipsparseMatDescr_t descrA, const double *csrSortedValA, const int *csrSortedRowPtrA, const int *csrSortedColIndA, const double *B, int ldb, csrsm2Info_t info, hipsparseSolvePolicy_t policy, void *pBuffer)#
-
hipsparseStatus_t hipsparseCcsrsm2_analysis(hipsparseHandle_t handle, int algo, hipsparseOperation_t transA, hipsparseOperation_t transB, int m, int nrhs, int nnz, const hipComplex *alpha, const hipsparseMatDescr_t descrA, const hipComplex *csrSortedValA, const int *csrSortedRowPtrA, const int *csrSortedColIndA, const hipComplex *B, int ldb, csrsm2Info_t info, hipsparseSolvePolicy_t policy, void *pBuffer)#
-
hipsparseStatus_t hipsparseZcsrsm2_analysis(hipsparseHandle_t handle, int algo, hipsparseOperation_t transA, hipsparseOperation_t transB, int m, int nrhs, int nnz, const hipDoubleComplex *alpha, const hipsparseMatDescr_t descrA, const hipDoubleComplex *csrSortedValA, const int *csrSortedRowPtrA, const int *csrSortedColIndA, const hipDoubleComplex *B, int ldb, csrsm2Info_t info, hipsparseSolvePolicy_t policy, void *pBuffer)#
hipsparseXcsrsm2_analysisperforms the analysis step for hipsparseXcsrsm2_solve(). It is expected that this function will be executed only once for a given matrix and particular operation type.Note
If the matrix sparsity pattern changes, the gathered information will become invalid.
Note
This function is non blocking and executed asynchronously with respect to the host. It may return before the actual computation has finished.
- Parameters:
handle – [in] handle to the hipsparse library context queue.
algo – [in] algorithm to use.
transA – [in] matrix \(A\) operation type.
transB – [in] matrix \(B\) operation type.
m – [in] number of rows of the sparse CSR matrix \(A\).
nrhs – [in] number of columns of the dense matrix \(op(B)\).
nnz – [in] number of non-zero entries of the sparse CSR matrix \(A\).
alpha – [in] scalar \(\alpha\).
descrA – [in] descriptor of the sparse CSR matrix \(A\).
csrSortedValA – [in] array of
nnzelements of the sparse CSR matrix \(A\).csrSortedRowPtrA – [in] array of
m+1elements that point to the start of every row of the sparse CSR matrix \(A\).csrSortedColIndA – [in] array of
nnzelements containing the column indices of the sparse CSR matrix \(A\).B – [in] array of
m\(\times\)nrhselements of the rhs matrix \(B\).ldb – [in] leading dimension of rhs matrix \(B\).
info – [out] structure that holds the information collected during the analysis step.
policy – [in] HIPSPARSE_SOLVE_POLICY_NO_LEVEL or HIPSPARSE_SOLVE_POLICY_USE_LEVEL.
pBuffer – [in] temporary storage buffer allocated by the user.
- Return values:
HIPSPARSE_STATUS_SUCCESS – the operation completed successfully.
HIPSPARSE_STATUS_INVALID_VALUE –
handle,m,nrhs,nnz,alpha,descrA,csrSortedValA,csrSortedRowPtrA,csrSortedColIndA,B,infoorpBufferis invalid.HIPSPARSE_STATUS_INTERNAL_ERROR – an internal error occurred.
HIPSPARSE_STATUS_NOT_SUPPORTED –
transA== HIPSPARSE_OPERATION_CONJUGATE_TRANSPOSE,transB== HIPSPARSE_OPERATION_CONJUGATE_TRANSPOSE or hipsparseMatrixType_t != HIPSPARSE_MATRIX_TYPE_GENERAL.
hipsparseXcsrsm2_solve()#
-
hipsparseStatus_t hipsparseScsrsm2_solve(hipsparseHandle_t handle, int algo, hipsparseOperation_t transA, hipsparseOperation_t transB, int m, int nrhs, int nnz, const float *alpha, const hipsparseMatDescr_t descrA, const float *csrSortedValA, const int *csrSortedRowPtrA, const int *csrSortedColIndA, float *B, int ldb, csrsm2Info_t info, hipsparseSolvePolicy_t policy, void *pBuffer)#
-
hipsparseStatus_t hipsparseDcsrsm2_solve(hipsparseHandle_t handle, int algo, hipsparseOperation_t transA, hipsparseOperation_t transB, int m, int nrhs, int nnz, const double *alpha, const hipsparseMatDescr_t descrA, const double *csrSortedValA, const int *csrSortedRowPtrA, const int *csrSortedColIndA, double *B, int ldb, csrsm2Info_t info, hipsparseSolvePolicy_t policy, void *pBuffer)#
-
hipsparseStatus_t hipsparseCcsrsm2_solve(hipsparseHandle_t handle, int algo, hipsparseOperation_t transA, hipsparseOperation_t transB, int m, int nrhs, int nnz, const hipComplex *alpha, const hipsparseMatDescr_t descrA, const hipComplex *csrSortedValA, const int *csrSortedRowPtrA, const int *csrSortedColIndA, hipComplex *B, int ldb, csrsm2Info_t info, hipsparseSolvePolicy_t policy, void *pBuffer)#
-
hipsparseStatus_t hipsparseZcsrsm2_solve(hipsparseHandle_t handle, int algo, hipsparseOperation_t transA, hipsparseOperation_t transB, int m, int nrhs, int nnz, const hipDoubleComplex *alpha, const hipsparseMatDescr_t descrA, const hipDoubleComplex *csrSortedValA, const int *csrSortedRowPtrA, const int *csrSortedColIndA, hipDoubleComplex *B, int ldb, csrsm2Info_t info, hipsparseSolvePolicy_t policy, void *pBuffer)#
Sparse triangular system solve using CSR storage format.
hipsparseXcsrsm2_solvesolves a sparse triangular linear system of a sparse \(m \times m\) matrix, defined in CSR storage format, a column-oriented dense solution matrix \(X\) and the column-oriented dense right-hand side matrix \(B\) that is multiplied by \(\alpha\), such that\[ op(A) \cdot op(X) = \alpha \cdot op(B), \]with\[\begin{split} op(A) = \left\{ \begin{array}{ll} A, & \text{if transA == HIPSPARSE_OPERATION_NON_TRANSPOSE} \\ A^T, & \text{if transA == HIPSPARSE_OPERATION_TRANSPOSE} \\ A^H, & \text{if transA == HIPSPARSE_OPERATION_CONJUGATE_TRANSPOSE} \end{array} \right. \end{split}\],\[\begin{split} op(B) = \left\{ \begin{array}{ll} B, & \text{if transB == HIPSPARSE_OPERATION_NON_TRANSPOSE} \\ B^T, & \text{if transB == HIPSPARSE_OPERATION_TRANSPOSE} \\ B^H, & \text{if transB == HIPSPARSE_OPERATION_CONJUGATE_TRANSPOSE} \end{array} \right. \end{split}\]and\[\begin{split} op(X) = \left\{ \begin{array}{ll} X, & \text{if transB == HIPSPARSE_OPERATION_NON_TRANSPOSE} \\ X^T, & \text{if transB == HIPSPARSE_OPERATION_TRANSPOSE} \\ X^H, & \text{if transB == HIPSPARSE_OPERATION_CONJUGATE_TRANSPOSE} \end{array} \right. \end{split}\]The solution is performed inplace meaning that the matrix \(B\) is overwritten with the solution \(X\) after calling
hipsparseXcsrsm2_solve. Given that the sparse matrix \(A\) is a square matrix, its size is \(m \times m\) regardless of whether \(A\) is transposed or not. The size of the column-oriented dense matrices \(B\) and \(X\) have size that depends on the value oftransB:\[\begin{split} op(B)/op(X) = \left\{ \begin{array}{ll} ldb \times nrhs, \text{ } ldb \ge m, & \text{if transB == HIPSPARSE_OPERATION_NON_TRANSPOSE} \\ ldb \times m, \text{ } ldb \ge nrhs, & \text{if transB == HIPSPARSE_OPERATION_TRANSPOSE} \\ ldb \times m, \text{ } ldb \ge nrhs, & \text{if transB == HIPSPARSE_OPERATION_CONJUGATE_TRANSPOSE} \end{array} \right. \end{split}\]hipsparseXcsrsm2_solverequires a user allocated temporary buffer. Its size is returned by hipsparseXcsrsm2_bufferSizeExt(). The size of the required buffer is larger whentransAequals HIPSPARSE_OPERATION_TRANSPOSE or HIPSPARSE_OPERATION_CONJUGATE_TRANSPOSE and whentransBis HIPSPARSE_OPERATION_NON_TRANSPOSE. The subsequent solve will also be faster when \(A\) is non-transposed and \(B\) is transposed (or conjugate transposed). For example, instead of solving:\[\begin{split} \begin{bmatrix} a_{00} & 0 & 0 \\ a_{10} & a_{11} & 0 \\ a_{20} & a_{21} & a_{22} \\ \end{bmatrix} \cdot \begin{bmatrix} x_{00} & x_{01} \\ x_{10} & x_{11} \\ x_{20} & x_{21} \\ \end{bmatrix} = \begin{bmatrix} b_{00} & b_{01} \\ b_{10} & b_{11} \\ b_{20} & b_{21} \\ \end{bmatrix} \end{split}\]Consider solving:
\[\begin{split} \begin{bmatrix} a_{00} & 0 & 0 \\ a_{10} & a_{11} & 0 \\ a_{20} & a_{21} & a_{22} \end{bmatrix} \cdot \begin{bmatrix} x_{00} & x_{10} & x_{20} \\ x_{01} & x_{11} & x_{21} \end{bmatrix}^{T} = \begin{bmatrix} b_{00} & b_{10} & b_{20} \\ b_{01} & b_{11} & b_{21} \end{bmatrix}^{T} \end{split}\]Once the temporary storage buffer has been allocated, analysis meta data is required. It can be obtained by hipsparseXcsrsm2_analysis(). The triangular solve is completed by calling
hipsparseXcsrsm2_solveand once all solves are performed, the temporary storage buffer allocated by the user can be freed.Solving a triangular system involves division by the diagonal elements. This means that if the sparse matrix is missing the diagonal entry (referred to as a structural zero) or the diagonal entry is zero (referred to as a numerical zero) then a division by zero would occur.
hipsparseXcsrsm2_solvetracks the location of the first zero pivot (either numerical or structural zero). The zero pivot status can be checked calling hipsparseXcsrsm2_zeroPivot(). If hipsparseXcsrsm2_zeroPivot() returns HIPSPARSE_STATUS_SUCCESS, then no zero pivot was found and therefore the matrix does not have a structural or numerical zero.The user can specify that the sparse matrix should be interpreted as having ones on the diagonal by setting the diagonal type on the descriptor
descrAto HIPSPARSE_DIAG_TYPE_UNIT using hipsparseSetMatDiagType. If hipsparseDiagType_t == HIPSPARSE_DIAG_TYPE_UNIT, no zero pivot will be reported, even if \(A_{j,j} = 0\) for some \(j\).The sparse CSR matrix passed to
hipsparseXcsrsm2_solvedoes not actually have to be a triangular matrix. Instead the triangular upper or lower part of the sparse matrix is solved based on hipsparseFillMode_t set on the descriptordescrA. If the fill mode is set to HIPSPARSE_FILL_MODE_LOWER, then the lower triangular matrix is solved. If the fill mode is set to HIPSPARSE_FILL_MODE_UPPER then the upper triangular matrix is solved.Note
The sparse CSR matrix has to be sorted. This can be achieved by calling hipsparseXcsrsort().
Note
This function is non blocking and executed asynchronously with respect to the host. It may return before the actual computation has finished.
Note
Currently, only
transA!= HIPSPARSE_OPERATION_CONJUGATE_TRANSPOSE andtransB!= HIPSPARSE_OPERATION_CONJUGATE_TRANSPOSE is supported.- Parameters:
handle – [in] handle to the hipsparse library context queue.
algo – [in] algorithm to use.
transA – [in] matrix \(A\) operation type.
transB – [in] matrix \(B\) operation type.
m – [in] number of rows of the sparse CSR matrix \(A\).
nrhs – [in] number of columns of the dense matrix \(op(B)\).
nnz – [in] number of non-zero entries of the sparse CSR matrix \(A\).
alpha – [in] scalar \(\alpha\).
descrA – [in] descriptor of the sparse CSR matrix \(A\).
csrSortedValA – [in] array of
nnzelements of the sparse CSR matrix \(A\).csrSortedRowPtrA – [in] array of
m+1elements that point to the start of every row of the sparse CSR matrix \(A\).csrSortedColIndA – [in] array of
nnzelements containing the column indices of the sparse CSR matrix \(A\).B – [inout] array of
m\(\times\)nrhselements of the rhs matrix \(B\).ldb – [in] leading dimension of rhs matrix \(B\).
info – [in] structure that holds the information collected during the analysis step.
policy – [in] HIPSPARSE_SOLVE_POLICY_NO_LEVEL or HIPSPARSE_SOLVE_POLICY_USE_LEVEL.
pBuffer – [in] temporary storage buffer allocated by the user.
- Return values:
HIPSPARSE_STATUS_SUCCESS – the operation completed successfully.
HIPSPARSE_STATUS_INVALID_VALUE –
handle,m,nrhs,nnz,alpha,descrA,csrSortedValA,csrSortedRowPtrA,csrSortedColIndA,B,infoorpBufferis invalid.HIPSPARSE_STATUS_INTERNAL_ERROR – an internal error occurred.
HIPSPARSE_STATUS_NOT_SUPPORTED –
transA== HIPSPARSE_OPERATION_CONJUGATE_TRANSPOSE,transB== HIPSPARSE_OPERATION_CONJUGATE_TRANSPOSE or hipsparseMatrixType_t != HIPSPARSE_MATRIX_TYPE_GENERAL.
1int main(int argc, char* argv[])
2{
3 // hipSPARSE handle
4 hipsparseHandle_t handle;
5 HIPSPARSE_CHECK(hipsparseCreate(&handle));
6
7 // A = ( 1.0 0.0 0.0 0.0 )
8 // ( 2.0 3.0 0.0 0.0 )
9 // ( 4.0 5.0 6.0 0.0 )
10 // ( 7.0 0.0 8.0 9.0 )
11
12 // Number of rows and columns
13 int m = 4;
14 int n = 4;
15
16 // Number of right-hand-sides
17 int nrhs = 4;
18
19 // Number of non-zeros
20 int nnz = 9;
21
22 // CSR row pointers
23 std::vector<int> hcsrRowPtr = {0, 1, 3, 6, 9};
24
25 // CSR column indices
26 std::vector<int> hcsrColInd = {0, 0, 1, 0, 1, 2, 0, 2, 3};
27
28 // CSR values
29 std::vector<double> hcsrVal = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
30
31 // Transposition of the matrix and rhs matrix
32 hipsparseOperation_t transA = HIPSPARSE_OPERATION_NON_TRANSPOSE;
33 hipsparseOperation_t transB = HIPSPARSE_OPERATION_NON_TRANSPOSE;
34
35 // Solve policy
36 hipsparseSolvePolicy_t solve_policy = HIPSPARSE_SOLVE_POLICY_NO_LEVEL;
37
38 // Scalar alpha and beta
39 double alpha = 1.0;
40
41 // rhs and solution matrix
42 int ldb = n;
43
44 std::vector<double> hB = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
45
46 // Offload data to device
47 int* dcsrRowPtr;
48 int* dcsrColInd;
49 double* dcsrVal;
50 double* dB;
51
52 HIP_CHECK(hipMalloc((void**)&dcsrRowPtr, sizeof(int) * (m + 1)));
53 HIP_CHECK(hipMalloc((void**)&dcsrColInd, sizeof(int) * nnz));
54 HIP_CHECK(hipMalloc((void**)&dcsrVal, sizeof(double) * nnz));
55 HIP_CHECK(hipMalloc((void**)&dB, sizeof(double) * n * nrhs));
56
57 HIP_CHECK(
58 hipMemcpy(dcsrRowPtr, hcsrRowPtr.data(), sizeof(int) * (m + 1), hipMemcpyHostToDevice));
59 HIP_CHECK(hipMemcpy(dcsrColInd, hcsrColInd.data(), sizeof(int) * nnz, hipMemcpyHostToDevice));
60 HIP_CHECK(hipMemcpy(dcsrVal, hcsrVal.data(), sizeof(double) * nnz, hipMemcpyHostToDevice));
61 HIP_CHECK(hipMemcpy(dB, hB.data(), sizeof(double) * n * nrhs, hipMemcpyHostToDevice));
62
63 // Matrix descriptor
64 hipsparseMatDescr_t descr;
65 HIPSPARSE_CHECK(hipsparseCreateMatDescr(&descr));
66
67 // Matrix fill mode
68 HIPSPARSE_CHECK(hipsparseSetMatFillMode(descr, HIPSPARSE_FILL_MODE_LOWER));
69
70 // Matrix diagonal type
71 HIPSPARSE_CHECK(hipsparseSetMatDiagType(descr, HIPSPARSE_DIAG_TYPE_NON_UNIT));
72
73 // Matrix info structure
74 csrsm2Info_t info;
75 HIPSPARSE_CHECK(hipsparseCreateCsrsm2Info(&info));
76
77 // Obtain required buffer size
78 size_t buffer_size;
79 HIPSPARSE_CHECK(hipsparseDcsrsm2_bufferSizeExt(handle,
80 0,
81 transA,
82 transB,
83 m,
84 nrhs,
85 nnz,
86 &alpha,
87 descr,
88 dcsrVal,
89 dcsrRowPtr,
90 dcsrColInd,
91 dB,
92 ldb,
93 info,
94 solve_policy,
95 &buffer_size));
96
97 // Allocate temporary buffer
98 void* dbuffer;
99 HIP_CHECK(hipMalloc(&dbuffer, buffer_size));
100
101 // Perform analysis step
102 HIPSPARSE_CHECK(hipsparseDcsrsm2_analysis(handle,
103 0,
104 transA,
105 transB,
106 m,
107 nrhs,
108 nnz,
109 &alpha,
110 descr,
111 dcsrVal,
112 dcsrRowPtr,
113 dcsrColInd,
114 dB,
115 ldb,
116 info,
117 solve_policy,
118 dbuffer));
119
120 // Call dcsrsm to perform lower triangular solve LB = B
121 HIPSPARSE_CHECK(hipsparseDcsrsm2_solve(handle,
122 0,
123 transA,
124 transB,
125 m,
126 nrhs,
127 nnz,
128 &alpha,
129 descr,
130 dcsrVal,
131 dcsrRowPtr,
132 dcsrColInd,
133 dB,
134 ldb,
135 info,
136 solve_policy,
137 dbuffer));
138
139 // Check for zero pivots
140 int pivot;
141 hipsparseStatus_t status = hipsparseXcsrsm2_zeroPivot(handle, info, &pivot);
142
143 if(status == HIPSPARSE_STATUS_ZERO_PIVOT)
144 {
145 std::cout << "Found zero pivot in matrix row " << pivot << std::endl;
146 }
147
148 // Copy result back to host
149 HIP_CHECK(hipMemcpy(hB.data(), dB, sizeof(double) * m * nrhs, hipMemcpyDeviceToHost));
150
151 std::cout << "hB" << std::endl;
152 for(size_t i = 0; i < hB.size(); i++)
153 {
154 std::cout << hB[i] << " ";
155 }
156 std::cout << "" << std::endl;
157
158 // Clear hipSPARSE
159 HIPSPARSE_CHECK(hipsparseDestroyCsrsm2Info(info));
160 HIPSPARSE_CHECK(hipsparseDestroyMatDescr(descr));
161 HIPSPARSE_CHECK(hipsparseDestroy(handle));
162
163 // Clear device memory
164 HIP_CHECK(hipFree(dcsrRowPtr));
165 HIP_CHECK(hipFree(dcsrColInd));
166 HIP_CHECK(hipFree(dcsrVal));
167 HIP_CHECK(hipFree(dB));
168 HIP_CHECK(hipFree(dbuffer));
169
170 return 0;
171}
1int main(int argc, char* argv[])
2{
3 // hipSPARSE handle
4 hipsparseHandle_t handle;
5 HIPSPARSE_CHECK(hipsparseCreate(&handle));
6
7 // A = ( 1.0 0.0 0.0 0.0 )
8 // ( 2.0 3.0 0.0 0.0 )
9 // ( 4.0 5.0 6.0 0.0 )
10 // ( 7.0 0.0 8.0 9.0 )
11
12 // Number of rows and columns
13 int m = 4;
14 int n = 4;
15
16 // Number of right-hand-sides
17 int nrhs = 4;
18
19 // Number of non-zeros
20 int nnz = 9;
21
22 // CSR row pointers
23 int hcsrRowPtr[] = {0, 1, 3, 6, 9};
24
25 // CSR column indices
26 int hcsrColInd[] = {0, 0, 1, 0, 1, 2, 0, 2, 3};
27
28 // CSR values
29 double hcsrVal[] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
30
31 // Transposition of the matrix and rhs matrix
32 hipsparseOperation_t transA = HIPSPARSE_OPERATION_NON_TRANSPOSE;
33 hipsparseOperation_t transB = HIPSPARSE_OPERATION_NON_TRANSPOSE;
34
35 // Solve policy
36 hipsparseSolvePolicy_t solve_policy = HIPSPARSE_SOLVE_POLICY_NO_LEVEL;
37
38 // Scalar alpha and beta
39 double alpha = 1.0;
40
41 // rhs and solution matrix
42 int ldb = n;
43
44 double hB[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
45
46 // Offload data to device
47 int* dcsrRowPtr;
48 int* dcsrColInd;
49 double* dcsrVal;
50 double* dB;
51
52 HIP_CHECK(hipMalloc((void**)&dcsrRowPtr, sizeof(int) * (m + 1)));
53 HIP_CHECK(hipMalloc((void**)&dcsrColInd, sizeof(int) * nnz));
54 HIP_CHECK(hipMalloc((void**)&dcsrVal, sizeof(double) * nnz));
55 HIP_CHECK(hipMalloc((void**)&dB, sizeof(double) * n * nrhs));
56
57 HIP_CHECK(hipMemcpy(dcsrRowPtr, hcsrRowPtr, sizeof(int) * (m + 1), hipMemcpyHostToDevice));
58 HIP_CHECK(hipMemcpy(dcsrColInd, hcsrColInd, sizeof(int) * nnz, hipMemcpyHostToDevice));
59 HIP_CHECK(hipMemcpy(dcsrVal, hcsrVal, sizeof(double) * nnz, hipMemcpyHostToDevice));
60 HIP_CHECK(hipMemcpy(dB, hB, sizeof(double) * n * nrhs, hipMemcpyHostToDevice));
61
62 // Matrix descriptor
63 hipsparseMatDescr_t descr;
64 HIPSPARSE_CHECK(hipsparseCreateMatDescr(&descr));
65
66 // Matrix fill mode
67 HIPSPARSE_CHECK(hipsparseSetMatFillMode(descr, HIPSPARSE_FILL_MODE_LOWER));
68
69 // Matrix diagonal type
70 HIPSPARSE_CHECK(hipsparseSetMatDiagType(descr, HIPSPARSE_DIAG_TYPE_NON_UNIT));
71
72 // Matrix info structure
73 csrsm2Info_t info;
74 HIPSPARSE_CHECK(hipsparseCreateCsrsm2Info(&info));
75
76 // Obtain required buffer size
77 size_t buffer_size;
78 HIPSPARSE_CHECK(hipsparseDcsrsm2_bufferSizeExt(handle,
79 0,
80 transA,
81 transB,
82 m,
83 nrhs,
84 nnz,
85 &alpha,
86 descr,
87 dcsrVal,
88 dcsrRowPtr,
89 dcsrColInd,
90 dB,
91 ldb,
92 info,
93 solve_policy,
94 &buffer_size));
95
96 // Allocate temporary buffer
97 void* dbuffer;
98 HIP_CHECK(hipMalloc(&dbuffer, buffer_size));
99
100 // Perform analysis step
101 HIPSPARSE_CHECK(hipsparseDcsrsm2_analysis(handle,
102 0,
103 transA,
104 transB,
105 m,
106 nrhs,
107 nnz,
108 &alpha,
109 descr,
110 dcsrVal,
111 dcsrRowPtr,
112 dcsrColInd,
113 dB,
114 ldb,
115 info,
116 solve_policy,
117 dbuffer));
118
119 // Call dcsrsm to perform lower triangular solve LB = B
120 HIPSPARSE_CHECK(hipsparseDcsrsm2_solve(handle,
121 0,
122 transA,
123 transB,
124 m,
125 nrhs,
126 nnz,
127 &alpha,
128 descr,
129 dcsrVal,
130 dcsrRowPtr,
131 dcsrColInd,
132 dB,
133 ldb,
134 info,
135 solve_policy,
136 dbuffer));
137
138 // Check for zero pivots
139 int pivot;
140 hipsparseStatus_t status = hipsparseXcsrsm2_zeroPivot(handle, info, &pivot);
141
142 if(status == HIPSPARSE_STATUS_ZERO_PIVOT)
143 {
144 printf("Found zero pivot in matrix row %d\n", pivot);
145 }
146
147 // Copy result back to host
148 HIP_CHECK(hipMemcpy(hB, dB, sizeof(double) * m * nrhs, hipMemcpyDeviceToHost));
149
150 printf("hB\n");
151 for(int i = 0; i < m * nrhs; i++)
152 {
153 printf("%f ", hB[i]);
154 }
155 printf("\n");
156
157 // Clear hipSPARSE
158 HIPSPARSE_CHECK(hipsparseDestroyCsrsm2Info(info));
159 HIPSPARSE_CHECK(hipsparseDestroyMatDescr(descr));
160 HIPSPARSE_CHECK(hipsparseDestroy(handle));
161
162 // Clear device memory
163 HIP_CHECK(hipFree(dcsrRowPtr));
164 HIP_CHECK(hipFree(dcsrColInd));
165 HIP_CHECK(hipFree(dcsrVal));
166 HIP_CHECK(hipFree(dB));
167 HIP_CHECK(hipFree(dbuffer));
168
169 return 0;
170}
1program example_hipsparse_csrsm2
2 use iso_c_binding
3 implicit none
4
5 ! HIP
6 interface
7 function hipMalloc(ptr, size) &
8 bind(c, name = 'hipMalloc')
9 use iso_c_binding
10 implicit none
11 integer(c_int) :: hipMalloc
12 type(c_ptr) :: ptr
13 integer(c_size_t), value :: size
14 end function hipMalloc
15
16 function hipFree(ptr) &
17 bind(c, name = 'hipFree')
18 use iso_c_binding
19 implicit none
20 integer(c_int) :: hipFree
21 type(c_ptr), value :: ptr
22 end function hipFree
23
24 function hipMemcpy(dst, src, size, kind) &
25 bind(c, name = 'hipMemcpy')
26 use iso_c_binding
27 implicit none
28 integer(c_int) :: hipMemcpy
29 type(c_ptr), value :: dst
30 type(c_ptr), intent(in), value :: src
31 integer(c_size_t), value :: size
32 integer(c_int), value :: kind
33 end function hipMemcpy
34 end interface
35
36 integer, parameter :: hipMemcpyHostToDevice = 1
37 integer, parameter :: hipMemcpyDeviceToHost = 2
38
39 ! hipSPARSE
40 interface
41 function hipsparseCreate(handle) &
42 bind(c, name = 'hipsparseCreate')
43 use iso_c_binding
44 implicit none
45 integer(c_int) :: hipsparseCreate
46 type(c_ptr) :: handle
47 end function hipsparseCreate
48
49 function hipsparseDestroy(handle) &
50 bind(c, name = 'hipsparseDestroy')
51 use iso_c_binding
52 implicit none
53 integer(c_int) :: hipsparseDestroy
54 type(c_ptr), value :: handle
55 end function hipsparseDestroy
56
57 function hipsparseCreateMatDescr(descr) &
58 bind(c, name = 'hipsparseCreateMatDescr')
59 use iso_c_binding
60 implicit none
61 integer(c_int) :: hipsparseCreateMatDescr
62 type(c_ptr) :: descr
63 end function hipsparseCreateMatDescr
64
65 function hipsparseDestroyMatDescr(descr) &
66 bind(c, name = 'hipsparseDestroyMatDescr')
67 use iso_c_binding
68 implicit none
69 integer(c_int) :: hipsparseDestroyMatDescr
70 type(c_ptr), value :: descr
71 end function hipsparseDestroyMatDescr
72
73 function hipsparseSetMatFillMode(descr, fillMode) &
74 bind(c, name = 'hipsparseSetMatFillMode')
75 use iso_c_binding
76 implicit none
77 integer(c_int) :: hipsparseSetMatFillMode
78 type(c_ptr), value :: descr
79 integer(c_int), value :: fillMode
80 end function hipsparseSetMatFillMode
81
82 function hipsparseSetMatDiagType(descr, diagType) &
83 bind(c, name = 'hipsparseSetMatDiagType')
84 use iso_c_binding
85 implicit none
86 integer(c_int) :: hipsparseSetMatDiagType
87 type(c_ptr), value :: descr
88 integer(c_int), value :: diagType
89 end function hipsparseSetMatDiagType
90
91 function hipsparseCreateCsrsm2Info(info) &
92 bind(c, name = 'hipsparseCreateCsrsm2Info')
93 use iso_c_binding
94 implicit none
95 integer(c_int) :: hipsparseCreateCsrsm2Info
96 type(c_ptr) :: info
97 end function hipsparseCreateCsrsm2Info
98
99 function hipsparseDestroyCsrsm2Info(info) &
100 bind(c, name = 'hipsparseDestroyCsrsm2Info')
101 use iso_c_binding
102 implicit none
103 integer(c_int) :: hipsparseDestroyCsrsm2Info
104 type(c_ptr), value :: info
105 end function hipsparseDestroyCsrsm2Info
106
107 function hipsparseDcsrsm2_bufferSizeExt(handle, algo, transA, transB, m, nrhs, nnz, &
108 alpha, descr, csrSortedValA, csrSortedRowPtrA, &
109 csrSortedColIndA, B, ldb, info, policy, pBufferSize) &
110 bind(c, name = 'hipsparseDcsrsm2_bufferSizeExt')
111 use iso_c_binding
112 implicit none
113 integer(c_int) :: hipsparseDcsrsm2_bufferSizeExt
114 type(c_ptr), value :: handle
115 integer(c_int), value :: algo
116 integer(c_int), value :: transA
117 integer(c_int), value :: transB
118 integer(c_int), value :: m
119 integer(c_int), value :: nrhs
120 integer(c_int), value :: nnz
121 type(c_ptr), intent(in), value :: alpha
122 type(c_ptr), value :: descr
123 type(c_ptr), intent(in), value :: csrSortedValA
124 type(c_ptr), intent(in), value :: csrSortedRowPtrA
125 type(c_ptr), intent(in), value :: csrSortedColIndA
126 type(c_ptr), value :: B
127 integer(c_int), value :: ldb
128 type(c_ptr), value :: info
129 integer(c_int), value :: policy
130 type(c_ptr), value :: pBufferSize
131 end function hipsparseDcsrsm2_bufferSizeExt
132
133 function hipsparseDcsrsm2_analysis(handle, algo, transA, transB, m, nrhs, nnz, alpha, &
134 descr, csrSortedValA, csrSortedRowPtrA, csrSortedColIndA, &
135 B, ldb, info, policy, pBuffer) &
136 bind(c, name = 'hipsparseDcsrsm2_analysis')
137 use iso_c_binding
138 implicit none
139 integer(c_int) :: hipsparseDcsrsm2_analysis
140 type(c_ptr), value :: handle
141 integer(c_int), value :: algo
142 integer(c_int), value :: transA
143 integer(c_int), value :: transB
144 integer(c_int), value :: m
145 integer(c_int), value :: nrhs
146 integer(c_int), value :: nnz
147 type(c_ptr), intent(in), value :: alpha
148 type(c_ptr), value :: descr
149 type(c_ptr), intent(in), value :: csrSortedValA
150 type(c_ptr), intent(in), value :: csrSortedRowPtrA
151 type(c_ptr), intent(in), value :: csrSortedColIndA
152 type(c_ptr), value :: B
153 integer(c_int), value :: ldb
154 type(c_ptr), value :: info
155 integer(c_int), value :: policy
156 type(c_ptr), value :: pBuffer
157 end function hipsparseDcsrsm2_analysis
158
159 function hipsparseDcsrsm2_solve(handle, algo, transA, transB, m, nrhs, nnz, alpha, descr, &
160 csrSortedValA, csrSortedRowPtrA, csrSortedColIndA, B, ldb, &
161 info, policy, pBuffer) &
162 bind(c, name = 'hipsparseDcsrsm2_solve')
163 use iso_c_binding
164 implicit none
165 integer(c_int) :: hipsparseDcsrsm2_solve
166 type(c_ptr), value :: handle
167 integer(c_int), value :: algo
168 integer(c_int), value :: transA
169 integer(c_int), value :: transB
170 integer(c_int), value :: m
171 integer(c_int), value :: nrhs
172 integer(c_int), value :: nnz
173 type(c_ptr), intent(in), value :: alpha
174 type(c_ptr), value :: descr
175 type(c_ptr), intent(in), value :: csrSortedValA
176 type(c_ptr), intent(in), value :: csrSortedRowPtrA
177 type(c_ptr), intent(in), value :: csrSortedColIndA
178 type(c_ptr), value :: B
179 integer(c_int), value :: ldb
180 type(c_ptr), value :: info
181 integer(c_int), value :: policy
182 type(c_ptr), value :: pBuffer
183 end function hipsparseDcsrsm2_solve
184
185 function hipsparseXcsrsm2_zeroPivot(handle, info, position) &
186 bind(c, name = 'hipsparseXcsrsm2_zeroPivot')
187 use iso_c_binding
188 implicit none
189 integer(c_int) :: hipsparseXcsrsm2_zeroPivot
190 type(c_ptr), value :: handle
191 type(c_ptr), value :: info
192 type(c_ptr), value :: position
193 end function hipsparseXcsrsm2_zeroPivot
194 end interface
195
196 integer, parameter :: HIPSPARSE_OPERATION_NON_TRANSPOSE = 0
197 integer, parameter :: HIPSPARSE_FILL_MODE_LOWER = 0
198 integer, parameter :: HIPSPARSE_DIAG_TYPE_NON_UNIT = 0
199 integer, parameter :: HIPSPARSE_SOLVE_POLICY_NO_LEVEL = 0
200 integer, parameter :: HIPSPARSE_STATUS_ZERO_PIVOT = 6
201
202 ! Variables
203 type(c_ptr) :: handle
204 type(c_ptr) :: descr
205 type(c_ptr) :: info
206 integer :: i, stat
207 integer, target :: pivot
208 integer(c_size_t), target :: buffer_size
209
210 ! Lower triangular matrix A (4x4) in CSR format
211 ! A = ( 1.0 0.0 0.0 0.0 )
212 ! ( 2.0 3.0 0.0 0.0 )
213 ! ( 4.0 5.0 6.0 0.0 )
214 ! ( 7.0 0.0 8.0 9.0 )
215 integer, parameter :: m = 4
216 integer, parameter :: n = 4
217 integer, parameter :: nrhs = 4
218 integer, parameter :: nnz = 9
219
220 integer, dimension(m+1), target :: hcsrRowPtr = (/0, 1, 3, 6, 9/)
221 integer, dimension(nnz), target :: hcsrColInd = (/0, 0, 1, 0, 1, 2, 0, 2, 3/)
222 real(c_double), dimension(nnz), target :: hcsrVal = (/1.0d0, 2.0d0, 3.0d0, 4.0d0, 5.0d0, 6.0d0, 7.0d0, 8.0d0, 9.0d0/)
223
224 ! Right-hand side matrix B (4x4) - column major
225 integer, parameter :: ldb = n
226 real(c_double), dimension(n*nrhs), target :: hB = (/ &
227 1.0d0, 2.0d0, 3.0d0, 4.0d0, &
228 5.0d0, 6.0d0, 7.0d0, 8.0d0, &
229 9.0d0, 10.0d0, 11.0d0, 12.0d0, &
230 13.0d0, 14.0d0, 15.0d0, 16.0d0 /)
231
232 ! Scalar values
233 real(c_double), target :: alpha = 1.0d0
234
235 ! Device pointers
236 type(c_ptr) :: dcsrRowPtr
237 type(c_ptr) :: dcsrColInd
238 type(c_ptr) :: dcsrVal
239 type(c_ptr) :: dB
240 type(c_ptr) :: dbuffer
241
242 ! Create hipSPARSE handle
243 stat = hipsparseCreate(handle)
244 if (stat /= 0) then
245 write(*,*) 'Error: hipsparseCreate failed'
246 stop
247 end if
248
249 ! Create matrix descriptor
250 stat = hipsparseCreateMatDescr(descr)
251 if (stat /= 0) then
252 write(*,*) 'Error: hipsparseCreateMatDescr failed'
253 stop
254 end if
255
256 ! Set matrix fill mode (lower triangular)
257 stat = hipsparseSetMatFillMode(descr, HIPSPARSE_FILL_MODE_LOWER)
258 if (stat /= 0) then
259 write(*,*) 'Error: hipsparseSetMatFillMode failed'
260 stop
261 end if
262
263 ! Set matrix diagonal type
264 stat = hipsparseSetMatDiagType(descr, HIPSPARSE_DIAG_TYPE_NON_UNIT)
265 if (stat /= 0) then
266 write(*,*) 'Error: hipsparseSetMatDiagType failed'
267 stop
268 end if
269
270 ! Create csrsm2 info
271 stat = hipsparseCreateCsrsm2Info(info)
272 if (stat /= 0) then
273 write(*,*) 'Error: hipsparseCreateCsrsm2Info failed'
274 stop
275 end if
276
277 ! Allocate device memory
278 stat = hipMalloc(dcsrRowPtr, int((m + 1) * 4, c_size_t))
279 if (stat /= 0) then
280 write(*,*) 'Error: hipMalloc dcsrRowPtr failed'
281 stop
282 end if
283
284 stat = hipMalloc(dcsrColInd, int(nnz * 4, c_size_t))
285 if (stat /= 0) then
286 write(*,*) 'Error: hipMalloc dcsrColInd failed'
287 stop
288 end if
289
290 stat = hipMalloc(dcsrVal, int(nnz * 8, c_size_t))
291 if (stat /= 0) then
292 write(*,*) 'Error: hipMalloc dcsrVal failed'
293 stop
294 end if
295
296 stat = hipMalloc(dB, int(n * nrhs * 8, c_size_t))
297 if (stat /= 0) then
298 write(*,*) 'Error: hipMalloc dB failed'
299 stop
300 end if
301
302 ! Copy data to device
303 stat = hipMemcpy(dcsrRowPtr, c_loc(hcsrRowPtr), int((m + 1) * 4, c_size_t), hipMemcpyHostToDevice)
304 if (stat /= 0) then
305 write(*,*) 'Error: hipMemcpy dcsrRowPtr failed'
306 stop
307 end if
308
309 stat = hipMemcpy(dcsrColInd, c_loc(hcsrColInd), int(nnz * 4, c_size_t), hipMemcpyHostToDevice)
310 if (stat /= 0) then
311 write(*,*) 'Error: hipMemcpy dcsrColInd failed'
312 stop
313 end if
314
315 stat = hipMemcpy(dcsrVal, c_loc(hcsrVal), int(nnz * 8, c_size_t), hipMemcpyHostToDevice)
316 if (stat /= 0) then
317 write(*,*) 'Error: hipMemcpy dcsrVal failed'
318 stop
319 end if
320
321 stat = hipMemcpy(dB, c_loc(hB), int(n * nrhs * 8, c_size_t), hipMemcpyHostToDevice)
322 if (stat /= 0) then
323 write(*,*) 'Error: hipMemcpy dB failed'
324 stop
325 end if
326
327 ! Obtain required buffer size
328 stat = hipsparseDcsrsm2_bufferSizeExt(handle, &
329 0, &
330 HIPSPARSE_OPERATION_NON_TRANSPOSE, &
331 HIPSPARSE_OPERATION_NON_TRANSPOSE, &
332 m, &
333 nrhs, &
334 nnz, &
335 c_loc(alpha), &
336 descr, &
337 dcsrVal, &
338 dcsrRowPtr, &
339 dcsrColInd, &
340 dB, &
341 ldb, &
342 info, &
343 HIPSPARSE_SOLVE_POLICY_NO_LEVEL, &
344 c_loc(buffer_size))
345 if (stat /= 0) then
346 write(*,*) 'Error: hipsparseDcsrsm2_bufferSizeExt failed'
347 stop
348 end if
349
350 ! Allocate temporary buffer
351 stat = hipMalloc(dbuffer, buffer_size)
352 if (stat /= 0) then
353 write(*,*) 'Error: hipMalloc dbuffer failed'
354 stop
355 end if
356
357 ! Perform analysis step
358 stat = hipsparseDcsrsm2_analysis(handle, &
359 0, &
360 HIPSPARSE_OPERATION_NON_TRANSPOSE, &
361 HIPSPARSE_OPERATION_NON_TRANSPOSE, &
362 m, &
363 nrhs, &
364 nnz, &
365 c_loc(alpha), &
366 descr, &
367 dcsrVal, &
368 dcsrRowPtr, &
369 dcsrColInd, &
370 dB, &
371 ldb, &
372 info, &
373 HIPSPARSE_SOLVE_POLICY_NO_LEVEL, &
374 dbuffer)
375 if (stat /= 0) then
376 write(*,*) 'Error: hipsparseDcsrsm2_analysis failed'
377 stop
378 end if
379
380 ! Perform solve: solve Ax = B for x
381 stat = hipsparseDcsrsm2_solve(handle, &
382 0, &
383 HIPSPARSE_OPERATION_NON_TRANSPOSE, &
384 HIPSPARSE_OPERATION_NON_TRANSPOSE, &
385 m, &
386 nrhs, &
387 nnz, &
388 c_loc(alpha), &
389 descr, &
390 dcsrVal, &
391 dcsrRowPtr, &
392 dcsrColInd, &
393 dB, &
394 ldb, &
395 info, &
396 HIPSPARSE_SOLVE_POLICY_NO_LEVEL, &
397 dbuffer)
398 if (stat /= 0) then
399 write(*,*) 'Error: hipsparseDcsrsm2_solve failed'
400 stop
401 end if
402
403 ! Check for zero pivots
404 stat = hipsparseXcsrsm2_zeroPivot(handle, info, c_loc(pivot))
405 if (stat == HIPSPARSE_STATUS_ZERO_PIVOT) then
406 write(*,*) 'Found zero pivot in matrix row ', pivot
407 end if
408
409 ! Copy result back to host
410 stat = hipMemcpy(c_loc(hB), dB, int(m * nrhs * 8, c_size_t), hipMemcpyDeviceToHost)
411 if (stat /= 0) then
412 write(*,*) 'Error: hipMemcpy hB failed'
413 stop
414 end if
415
416 ! Print result
417 write(*,*) 'Solution:'
418 do i = 1, m * nrhs
419 write(*,*) hB(i)
420 end do
421
422 ! Clean up
423 stat = hipFree(dcsrRowPtr)
424 stat = hipFree(dcsrColInd)
425 stat = hipFree(dcsrVal)
426 stat = hipFree(dB)
427 stat = hipFree(dbuffer)
428
429 stat = hipsparseDestroyCsrsm2Info(info)
430 stat = hipsparseDestroyMatDescr(descr)
431 stat = hipsparseDestroy(handle)
432
433end program example_hipsparse_csrsm2
hipsparseXgemmi()#
-
hipsparseStatus_t hipsparseSgemmi(hipsparseHandle_t handle, int m, int n, int k, int nnz, const float *alpha, const float *A, int lda, const float *cscValB, const int *cscColPtrB, const int *cscRowIndB, const float *beta, float *C, int ldc)#
-
hipsparseStatus_t hipsparseDgemmi(hipsparseHandle_t handle, int m, int n, int k, int nnz, const double *alpha, const double *A, int lda, const double *cscValB, const int *cscColPtrB, const int *cscRowIndB, const double *beta, double *C, int ldc)#
-
hipsparseStatus_t hipsparseCgemmi(hipsparseHandle_t handle, int m, int n, int k, int nnz, const hipComplex *alpha, const hipComplex *A, int lda, const hipComplex *cscValB, const int *cscColPtrB, const int *cscRowIndB, const hipComplex *beta, hipComplex *C, int ldc)#
-
hipsparseStatus_t hipsparseZgemmi(hipsparseHandle_t handle, int m, int n, int k, int nnz, const hipDoubleComplex *alpha, const hipDoubleComplex *A, int lda, const hipDoubleComplex *cscValB, const int *cscColPtrB, const int *cscRowIndB, const hipDoubleComplex *beta, hipDoubleComplex *C, int ldc)#
Dense matrix sparse matrix multiplication using CSC storage format.
hipsparseXgemmimultiplies the scalar \(\alpha\) with a dense column-oriented \(m \times k\) matrix \(A\) and the sparse \(k \times n\) matrix \(B\), defined in CSC storage format and adds the result to the dense column-oriented \(m \times n\) matrix \(C\) that is multiplied by the scalar \(\beta\), such that\[ C := \alpha \cdot A \cdot B + \beta \cdot C \]- Deprecated:
This function is deprecated when using the CUDA backend (CUDA 11.0+) and will be removed in CUDA 12.0. This deprecation does not apply to the ROCm backend.
Note
This function is non blocking and executed asynchronously with respect to the host. It may return before the actual computation has finished.
- Parameters:
handle – [in] handle to the hipsparse library context queue.
m – [in] number of rows of the dense matrix \(A\). Must be non-negative.
n – [in] number of columns of the sparse CSC matrix \(op(B)\) and \(C\). Must be non-negative.
k – [in] number of columns of the dense matrix \(A\). Must be non-negative.
nnz – [in] number of non-zero entries of the sparse CSC matrix \(B\). Must be non-negative.
alpha – [in] scalar \(\alpha\).
A – [in] array of dimension \(lda \times k\) ( \(op(A) == A\)) or \(lda \times m\) ( \(op(A) == A^T\) or \(op(A) == A^H\)).
lda – [in] leading dimension of \(A\), must be at least \(m\) ( \(op(A) == A\)) or \(k\) ( \(op(A) == A^T\) or \(op(A) == A^H\)).
cscValB – [in] array of
nnzelements of the sparse CSC matrix \(B\).cscColPtrB – [in] array of
n+1elements that point to the start of every column of the sparse CSC matrix \(B\).cscRowIndB – [in] array of
nnzelements containing the column indices of the sparse CSC matrix \(B\).beta – [in] scalar \(\beta\).
C – [inout] array of dimension \(ldc \times n\) that holds the values of \(C\).
ldc – [in] leading dimension of \(C\), must be at least \(m\).
- Return values:
HIPSPARSE_STATUS_SUCCESS – the operation completed successfully.
HIPSPARSE_STATUS_NOT_INITIALIZED –
handleis not initialized.HIPSPARSE_STATUS_INVALID_VALUE –
handle,alphaorbetais nullptr,m,n,kornnzis negative,ldaorldcis invalid, orA,cscValB,cscColPtrB,cscRowIndBorCis nullptr.
1int main(int argc, char* argv[])
2{
3 // A, B, and C are m×k, k×n, and m×n
4 const int m = 3, n = 5, k = 4;
5 const int lda = m, ldc = m;
6 const int nnz_A = m * k, nnz_B = 10, nnz_C = m * n;
7
8 // alpha and beta
9 float alpha = 0.5f;
10 float beta = 0.25f;
11
12 std::vector<int> hcscColPtr = {0, 2, 5, 7, 8, 10};
13 std::vector<int> hcscRowInd = {0, 2, 0, 1, 3, 1, 3, 2, 0, 2};
14 std::vector<float> hcsc_val = {1, 6, 2, 4, 9, 5, 2, 7, 3, 8};
15
16 std::vector<float> hA(nnz_A, 1.0f);
17 std::vector<float> hC(nnz_C, 1.0f);
18
19 int* dcscColPtr;
20 int* dcscRowInd;
21 float* dcsc_val;
22 HIP_CHECK(hipMalloc((void**)&dcscColPtr, sizeof(int) * (n + 1)));
23 HIP_CHECK(hipMalloc((void**)&dcscRowInd, sizeof(int) * nnz_B));
24 HIP_CHECK(hipMalloc((void**)&dcsc_val, sizeof(float) * nnz_B));
25
26 HIP_CHECK(
27 hipMemcpy(dcscColPtr, hcscColPtr.data(), sizeof(int) * (n + 1), hipMemcpyHostToDevice));
28 HIP_CHECK(hipMemcpy(dcscRowInd, hcscRowInd.data(), sizeof(int) * nnz_B, hipMemcpyHostToDevice));
29 HIP_CHECK(hipMemcpy(dcsc_val, hcsc_val.data(), sizeof(float) * nnz_B, hipMemcpyHostToDevice));
30
31 hipsparseHandle_t handle;
32 HIPSPARSE_CHECK(hipsparseCreate(&handle));
33
34 // Allocate memory for the matrix A
35 float* dA;
36 HIP_CHECK(hipMalloc((void**)&dA, sizeof(float) * nnz_A));
37 HIP_CHECK(hipMemcpy(dA, hA.data(), sizeof(float) * nnz_A, hipMemcpyHostToDevice));
38
39 // Allocate memory for the resulting matrix C
40 float* dC;
41 HIP_CHECK(hipMalloc((void**)&dC, sizeof(float) * nnz_C));
42 HIP_CHECK(hipMemcpy(dC, hC.data(), sizeof(float) * nnz_C, hipMemcpyHostToDevice));
43
44 // Perform operation
45 HIPSPARSE_CHECK(hipsparseSgemmi(
46 handle, m, n, k, nnz_B, &alpha, dA, lda, dcsc_val, dcscColPtr, dcscRowInd, &beta, dC, ldc));
47
48 // Copy device to host
49 HIP_CHECK(hipMemcpy(hC.data(), dC, sizeof(float) * nnz_C, hipMemcpyDeviceToHost));
50
51 std::cout << "hC" << std::endl;
52 for(int i = 0; i < nnz_C; i++)
53 {
54 std::cout << hC[i] << " ";
55 }
56 std::cout << std::endl;
57
58 // Destroy matrix descriptors and handles
59 HIPSPARSE_CHECK(hipsparseDestroy(handle));
60
61 HIP_CHECK(hipFree(dcscColPtr));
62 HIP_CHECK(hipFree(dcscRowInd));
63 HIP_CHECK(hipFree(dcsc_val));
64 HIP_CHECK(hipFree(dA));
65 HIP_CHECK(hipFree(dC));
66
67 return 0;
68}
1int main(int argc, char* argv[])
2{
3 // A, B, and C are m×k, k×n, and m×n
4 const int m = 3, n = 5, k = 4;
5 const int lda = m, ldc = m;
6 const int nnz_A = m * k, nnz_B = 10, nnz_C = m * n;
7
8 // alpha and beta
9 float alpha = 0.5;
10 float beta = 0.25;
11
12 int hcscColPtr[] = {0, 2, 5, 7, 8, 10};
13 int hcscRowInd[] = {0, 2, 0, 1, 3, 1, 3, 2, 0, 2};
14 float hcsc_val[] = {1, 6, 2, 4, 9, 5, 2, 7, 3, 8};
15
16 float* hA = (float*)malloc(nnz_A * sizeof(float));
17 float* hC = (float*)malloc(nnz_C * sizeof(float));
18
19 // Initialize A and C to 1.0
20 for(int i = 0; i < nnz_A; i++)
21 {
22 hA[i] = 1.0f;
23 }
24 for(int i = 0; i < nnz_C; i++)
25 {
26 hC[i] = 1.0f;
27 }
28
29 int* dcscColPtr;
30 int* dcscRowInd;
31 float* dcsc_val;
32 HIP_CHECK(hipMalloc((void**)&dcscColPtr, sizeof(int) * (n + 1)));
33 HIP_CHECK(hipMalloc((void**)&dcscRowInd, sizeof(int) * nnz_B));
34 HIP_CHECK(hipMalloc((void**)&dcsc_val, sizeof(float) * nnz_B));
35
36 HIP_CHECK(hipMemcpy(dcscColPtr, hcscColPtr, sizeof(int) * (n + 1), hipMemcpyHostToDevice));
37 HIP_CHECK(hipMemcpy(dcscRowInd, hcscRowInd, sizeof(int) * nnz_B, hipMemcpyHostToDevice));
38 HIP_CHECK(hipMemcpy(dcsc_val, hcsc_val, sizeof(float) * nnz_B, hipMemcpyHostToDevice));
39
40 hipsparseHandle_t handle;
41 HIPSPARSE_CHECK(hipsparseCreate(&handle));
42
43 // Allocate memory for the matrix A
44 float* dA;
45 HIP_CHECK(hipMalloc((void**)&dA, sizeof(float) * nnz_A));
46 HIP_CHECK(hipMemcpy(dA, hA, sizeof(float) * nnz_A, hipMemcpyHostToDevice));
47
48 // Allocate memory for the resulting matrix C
49 float* dC;
50 HIP_CHECK(hipMalloc((void**)&dC, sizeof(float) * nnz_C));
51 HIP_CHECK(hipMemcpy(dC, hC, sizeof(float) * nnz_C, hipMemcpyHostToDevice));
52
53 // Perform operation
54 HIPSPARSE_CHECK(hipsparseSgemmi(
55 handle, m, n, k, nnz_B, &alpha, dA, lda, dcsc_val, dcscColPtr, dcscRowInd, &beta, dC, ldc));
56
57 // Copy device to host
58 HIP_CHECK(hipMemcpy(hC, dC, sizeof(float) * nnz_C, hipMemcpyDeviceToHost));
59
60 printf("hC\n");
61 for(int i = 0; i < nnz_C; i++)
62 {
63 printf("%f ", hC[i]);
64 }
65 printf("\n");
66
67 // Destroy matrix descriptors and handles
68 HIPSPARSE_CHECK(hipsparseDestroy(handle));
69
70 HIP_CHECK(hipFree(dcscColPtr));
71 HIP_CHECK(hipFree(dcscRowInd));
72 HIP_CHECK(hipFree(dcsc_val));
73 HIP_CHECK(hipFree(dA));
74 HIP_CHECK(hipFree(dC));
75
76 return 0;
77}
1program example_hipsparse_gemmi
2 use iso_c_binding
3 implicit none
4
5 ! HIP
6 interface
7 function hipMalloc(ptr, size) &
8 bind(c, name = 'hipMalloc')
9 use iso_c_binding
10 implicit none
11 integer(c_int) :: hipMalloc
12 type(c_ptr) :: ptr
13 integer(c_size_t), value :: size
14 end function hipMalloc
15
16 function hipFree(ptr) &
17 bind(c, name = 'hipFree')
18 use iso_c_binding
19 implicit none
20 integer(c_int) :: hipFree
21 type(c_ptr), value :: ptr
22 end function hipFree
23
24 function hipMemcpy(dst, src, size, kind) &
25 bind(c, name = 'hipMemcpy')
26 use iso_c_binding
27 implicit none
28 integer(c_int) :: hipMemcpy
29 type(c_ptr), value :: dst
30 type(c_ptr), intent(in), value :: src
31 integer(c_size_t), value :: size
32 integer(c_int), value :: kind
33 end function hipMemcpy
34 end interface
35
36 integer, parameter :: hipMemcpyHostToDevice = 1
37 integer, parameter :: hipMemcpyDeviceToHost = 2
38
39 ! hipSPARSE
40 interface
41 function hipsparseCreate(handle) &
42 bind(c, name = 'hipsparseCreate')
43 use iso_c_binding
44 implicit none
45 integer(c_int) :: hipsparseCreate
46 type(c_ptr) :: handle
47 end function hipsparseCreate
48
49 function hipsparseDestroy(handle) &
50 bind(c, name = 'hipsparseDestroy')
51 use iso_c_binding
52 implicit none
53 integer(c_int) :: hipsparseDestroy
54 type(c_ptr), value :: handle
55 end function hipsparseDestroy
56
57 function hipsparseSgemmi(handle, m, n, k, nnz, alpha, A, lda, cscValB, cscColPtrB, &
58 cscRowIndB, beta, C, ldc) &
59 bind(c, name = 'hipsparseSgemmi')
60 use iso_c_binding
61 implicit none
62 integer(c_int) :: hipsparseSgemmi
63 type(c_ptr), value :: handle
64 integer(c_int), value :: m
65 integer(c_int), value :: n
66 integer(c_int), value :: k
67 integer(c_int), value :: nnz
68 type(c_ptr), intent(in), value :: alpha
69 type(c_ptr), intent(in), value :: A
70 integer(c_int), value :: lda
71 type(c_ptr), intent(in), value :: cscValB
72 type(c_ptr), intent(in), value :: cscColPtrB
73 type(c_ptr), intent(in), value :: cscRowIndB
74 type(c_ptr), intent(in), value :: beta
75 type(c_ptr), value :: C
76 integer(c_int), value :: ldc
77 end function hipsparseSgemmi
78 end interface
79
80 ! Variables
81 type(c_ptr) :: handle
82 integer :: i, stat
83
84 ! C = alpha * A * B + beta * C
85 ! A is dense (m x k), B is sparse CSC (k x n), C is dense (m x n)
86 integer, parameter :: m = 3
87 integer, parameter :: n = 5
88 integer, parameter :: k = 4
89 integer, parameter :: lda = m
90 integer, parameter :: ldc = m
91 integer, parameter :: nnz_A = m * k
92 integer, parameter :: nnz_B = 10
93 integer, parameter :: nnz_C = m * n
94
95 ! Sparse matrix B in CSC format
96 integer, dimension(n+1), target :: hcscColPtr = (/0, 2, 5, 7, 8, 10/)
97 integer, dimension(nnz_B), target :: hcscRowInd = (/0, 2, 0, 1, 3, 1, 3, 2, 0, 2/)
98 real(c_float), dimension(nnz_B), target :: hcsc_val = (/1.0, 6.0, 2.0, 4.0, 9.0, 5.0, 2.0, 7.0, 3.0, 8.0/)
99
100 ! Dense matrices A and C
101 real(c_float), dimension(nnz_A), target :: hA
102 real(c_float), dimension(nnz_C), target :: hC
103
104 ! Scalar values
105 real(c_float), target :: alpha = 0.5
106 real(c_float), target :: beta = 0.25
107
108 ! Device pointers
109 type(c_ptr) :: dcscColPtr
110 type(c_ptr) :: dcscRowInd
111 type(c_ptr) :: dcsc_val
112 type(c_ptr) :: dA
113 type(c_ptr) :: dC
114
115 ! Initialize dense matrices with 1.0
116 do i = 1, nnz_A
117 hA(i) = 1.0
118 end do
119
120 do i = 1, nnz_C
121 hC(i) = 1.0
122 end do
123
124 ! Create hipSPARSE handle
125 stat = hipsparseCreate(handle)
126 if (stat /= 0) then
127 write(*,*) 'Error: hipsparseCreate failed'
128 stop
129 end if
130
131 ! Allocate device memory for CSC matrix B
132 stat = hipMalloc(dcscColPtr, int((n + 1) * 4, c_size_t))
133 if (stat /= 0) then
134 write(*,*) 'Error: hipMalloc dcscColPtr failed'
135 stop
136 end if
137
138 stat = hipMalloc(dcscRowInd, int(nnz_B * 4, c_size_t))
139 if (stat /= 0) then
140 write(*,*) 'Error: hipMalloc dcscRowInd failed'
141 stop
142 end if
143
144 stat = hipMalloc(dcsc_val, int(nnz_B * 4, c_size_t))
145 if (stat /= 0) then
146 write(*,*) 'Error: hipMalloc dcsc_val failed'
147 stop
148 end if
149
150 ! Allocate device memory for dense matrices A and C
151 stat = hipMalloc(dA, int(nnz_A * 4, c_size_t))
152 if (stat /= 0) then
153 write(*,*) 'Error: hipMalloc dA failed'
154 stop
155 end if
156
157 stat = hipMalloc(dC, int(nnz_C * 4, c_size_t))
158 if (stat /= 0) then
159 write(*,*) 'Error: hipMalloc dC failed'
160 stop
161 end if
162
163 ! Copy data to device
164 stat = hipMemcpy(dcscColPtr, c_loc(hcscColPtr), int((n + 1) * 4, c_size_t), hipMemcpyHostToDevice)
165 if (stat /= 0) then
166 write(*,*) 'Error: hipMemcpy dcscColPtr failed'
167 stop
168 end if
169
170 stat = hipMemcpy(dcscRowInd, c_loc(hcscRowInd), int(nnz_B * 4, c_size_t), hipMemcpyHostToDevice)
171 if (stat /= 0) then
172 write(*,*) 'Error: hipMemcpy dcscRowInd failed'
173 stop
174 end if
175
176 stat = hipMemcpy(dcsc_val, c_loc(hcsc_val), int(nnz_B * 4, c_size_t), hipMemcpyHostToDevice)
177 if (stat /= 0) then
178 write(*,*) 'Error: hipMemcpy dcsc_val failed'
179 stop
180 end if
181
182 stat = hipMemcpy(dA, c_loc(hA), int(nnz_A * 4, c_size_t), hipMemcpyHostToDevice)
183 if (stat /= 0) then
184 write(*,*) 'Error: hipMemcpy dA failed'
185 stop
186 end if
187
188 stat = hipMemcpy(dC, c_loc(hC), int(nnz_C * 4, c_size_t), hipMemcpyHostToDevice)
189 if (stat /= 0) then
190 write(*,*) 'Error: hipMemcpy dC failed'
191 stop
192 end if
193
194 ! Perform operation: C = alpha * A * B + beta * C
195 stat = hipsparseSgemmi(handle, &
196 m, &
197 n, &
198 k, &
199 nnz_B, &
200 c_loc(alpha), &
201 dA, &
202 lda, &
203 dcsc_val, &
204 dcscColPtr, &
205 dcscRowInd, &
206 c_loc(beta), &
207 dC, &
208 ldc)
209 if (stat /= 0) then
210 write(*,*) 'Error: hipsparseSgemmi failed'
211 stop
212 end if
213
214 ! Copy result back to host
215 stat = hipMemcpy(c_loc(hC), dC, int(nnz_C * 4, c_size_t), hipMemcpyDeviceToHost)
216 if (stat /= 0) then
217 write(*,*) 'Error: hipMemcpy hC failed'
218 stop
219 end if
220
221 ! Print result
222 write(*,*) 'hC:'
223 do i = 1, nnz_C
224 write(*,*) hC(i)
225 end do
226
227 ! Clean up
228 stat = hipFree(dcscColPtr)
229 stat = hipFree(dcscRowInd)
230 stat = hipFree(dcsc_val)
231 stat = hipFree(dA)
232 stat = hipFree(dC)
233
234 stat = hipsparseDestroy(handle)
235
236end program example_hipsparse_gemmi