Sparse extra functions#
This module contains all sparse extra routines.
The sparse extra routines describe operations that manipulate sparse matrices.
hipsparseXcsrgeamNnz()#
-
hipsparseStatus_t hipsparseXcsrgeamNnz(hipsparseHandle_t handle, int m, int n, const hipsparseMatDescr_t descrA, int nnzA, const int *csrRowPtrA, const int *csrColIndA, const hipsparseMatDescr_t descrB, int nnzB, const int *csrRowPtrB, const int *csrColIndB, const hipsparseMatDescr_t descrC, int *csrRowPtrC, int *nnzTotalDevHostPtr)#
hipsparseXcsrgeamNnzcomputes the total CSR non-zero elements and the CSR row offsets, that point to the start of every row of the sparse CSR matrix, of the resulting matrix \(C\). It is assumed thatcsrRowPtrChas been allocated with sizem+1. The desired index base in the output CSR matrix is set in the hipsparseMatDescr_t. See hipsparseSetMatIndexBase().For full code example, see hipsparseScsrgeam().
- Deprecated:
This function is deprecated when using the CUDA backend (CUDA 10.0+) and will be removed in CUDA 11.0. This deprecation does not apply to the ROCm backend.
Note
As indicated,
nnzTotalDevHostPtrcan point either to host or device memory. This is controlled by setting the pointer mode. See hipsparseSetPointerMode().Note
This function is non blocking and executed asynchronously with respect to the host. It may return before the actual computation has finished.
Note
Currently, only HIPSPARSE_MATRIX_TYPE_GENERAL is supported.
- Parameters:
handle – [in] handle to the hipsparse library context queue.
m – [in] number of rows of the sparse CSR matrix \(A\), \(B\) and \(C\). Must be non-negative.
n – [in] number of columns of the sparse CSR matrix \(A\), \(B\) and \(C\). Must be non-negative.
descrA – [in] descriptor of the sparse CSR matrix \(A\). Currently, only HIPSPARSE_MATRIX_TYPE_GENERAL is supported.
nnzA – [in] number of non-zero entries of the sparse CSR matrix \(A\). Must be non-negative.
csrRowPtrA – [in] array of
m+1elements that point to the start of every row of the sparse CSR matrix \(A\).csrColIndA – [in] array of
nnzAelements containing the column indices of the sparse CSR matrix \(A\).descrB – [in] descriptor of the sparse CSR matrix \(B\). Currently, only HIPSPARSE_MATRIX_TYPE_GENERAL is supported.
nnzB – [in] number of non-zero entries of the sparse CSR matrix \(B\). Must be non-negative.
csrRowPtrB – [in] array of
m+1elements that point to the start of every row of the sparse CSR matrix \(B\).csrColIndB – [in] array of
nnzBelements containing the column indices of the sparse CSR matrix \(B\).descrC – [in] descriptor of the sparse CSR matrix \(C\). Currently, only HIPSPARSE_MATRIX_TYPE_GENERAL is supported.
csrRowPtrC – [out] array of
m+1elements that point to the start of every row of the sparse CSR matrix \(C\).nnzTotalDevHostPtr – [out] pointer to the number of non-zero entries of the sparse CSR matrix \(C\).
nnzTotalDevHostPtrcan be a host or device pointer.
- Return values:
HIPSPARSE_STATUS_SUCCESS – the operation completed successfully.
HIPSPARSE_STATUS_NOT_INITIALIZED –
handleis not initialized.HIPSPARSE_STATUS_INVALID_VALUE –
handle,descrA,descrBordescrCis nullptr,m,n,nnzAornnzBis negative, orcsrRowPtrA,csrColIndA,csrRowPtrB,csrColIndB,csrRowPtrCornnzTotalDevHostPtris nullptr.HIPSPARSE_STATUS_NOT_SUPPORTED – hipsparseMatrixType_t is not HIPSPARSE_MATRIX_TYPE_GENERAL.
hipsparseXcsrgeam()#
-
hipsparseStatus_t hipsparseScsrgeam(hipsparseHandle_t handle, int m, int n, const float *alpha, const hipsparseMatDescr_t descrA, int nnzA, const float *csrValA, const int *csrRowPtrA, const int *csrColIndA, const float *beta, const hipsparseMatDescr_t descrB, int nnzB, const float *csrValB, const int *csrRowPtrB, const int *csrColIndB, const hipsparseMatDescr_t descrC, float *csrValC, int *csrRowPtrC, int *csrColIndC)#
-
hipsparseStatus_t hipsparseDcsrgeam(hipsparseHandle_t handle, int m, int n, const double *alpha, const hipsparseMatDescr_t descrA, int nnzA, const double *csrValA, const int *csrRowPtrA, const int *csrColIndA, const double *beta, const hipsparseMatDescr_t descrB, int nnzB, const double *csrValB, const int *csrRowPtrB, const int *csrColIndB, const hipsparseMatDescr_t descrC, double *csrValC, int *csrRowPtrC, int *csrColIndC)#
-
hipsparseStatus_t hipsparseCcsrgeam(hipsparseHandle_t handle, int m, int n, const hipComplex *alpha, const hipsparseMatDescr_t descrA, int nnzA, const hipComplex *csrValA, const int *csrRowPtrA, const int *csrColIndA, const hipComplex *beta, const hipsparseMatDescr_t descrB, int nnzB, const hipComplex *csrValB, const int *csrRowPtrB, const int *csrColIndB, const hipsparseMatDescr_t descrC, hipComplex *csrValC, int *csrRowPtrC, int *csrColIndC)#
-
hipsparseStatus_t hipsparseZcsrgeam(hipsparseHandle_t handle, int m, int n, const hipDoubleComplex *alpha, const hipsparseMatDescr_t descrA, int nnzA, const hipDoubleComplex *csrValA, const int *csrRowPtrA, const int *csrColIndA, const hipDoubleComplex *beta, const hipsparseMatDescr_t descrB, int nnzB, const hipDoubleComplex *csrValB, const int *csrRowPtrB, const int *csrColIndB, const hipsparseMatDescr_t descrC, hipDoubleComplex *csrValC, int *csrRowPtrC, int *csrColIndC)#
Sparse matrix sparse matrix addition using CSR storage format.
hipsparseXcsrgeammultiplies the scalar \(\alpha\) with the sparse \(m \times n\) matrix \(A\), defined in CSR storage format, multiplies the scalar \(\beta\) with the sparse \(m \times n\) matrix \(B\), defined in CSR storage format, and adds both resulting matrices to obtain the sparse \(m \times n\) matrix \(C\), defined in CSR storage format, such that\[ C := \alpha \cdot A + \beta \cdot B. \]This computation involves a multi step process. First the user must allocate
csrRowPtrCto have sizem+1. The user then calls hipsparseXcsrgeamNnz which fills in thecsrRowPtrCarray as well as computes the total number of nonzeros in \(C\),nnzC. The user then allocates both arrayscsrColIndCandcsrValCto have sizennzCand callshipsparseXcsrgeamto complete the computation. The desired index base in the output CSR matrix \(C\) is set in the hipsparseMatDescr_tdescrC. See hipsparseSetMatIndexBase().Note
Both scalars \(\alpha\) and \(beta\) have to be valid.
Note
Currently, only HIPSPARSE_MATRIX_TYPE_GENERAL is supported.
Note
This function is non blocking and executed asynchronously with respect to the host. It may return before the actual computation has finished.
- Parameters:
handle – [in] handle to the hipsparse library context queue.
m – [in] number of rows of the sparse CSR matrix \(A\), \(B\) and \(C\).
n – [in] number of columns of the sparse CSR matrix \(A\), \(B\) and \(C\).
alpha – [in] scalar \(\alpha\).
descrA – [in] descriptor of the sparse CSR matrix \(A\). Currently, only HIPSPARSE_MATRIX_TYPE_GENERAL is supported.
nnzA – [in] number of non-zero entries of the sparse CSR matrix \(A\).
csrValA – [in] array of
nnzAelements of the sparse CSR matrix \(A\).csrRowPtrA – [in] array of
m+1elements that point to the start of every row of the sparse CSR matrix \(A\).csrColIndA – [in] array of
nnzAelements containing the column indices of the sparse CSR matrix \(A\).beta – [in] scalar \(\beta\).
descrB – [in] descriptor of the sparse CSR matrix \(B\). Currently, only HIPSPARSE_MATRIX_TYPE_GENERAL is supported.
nnzB – [in] number of non-zero entries of the sparse CSR matrix \(B\).
csrValB – [in] array of
nnzBelements of the sparse CSR matrix \(B\).csrRowPtrB – [in] array of
m+1elements that point to the start of every row of the sparse CSR matrix \(B\).csrColIndB – [in] array of
nnzBelements containing the column indices of the sparse CSR matrix \(B\).descrC – [in] descriptor of the sparse CSR matrix \(C\). Currently, only HIPSPARSE_MATRIX_TYPE_GENERAL is supported.
csrValC – [out] array of elements of the sparse CSR matrix \(C\).
csrRowPtrC – [in] array of
m+1elements that point to the start of every row of the sparse CSR matrix \(C\).csrColIndC – [out] array of elements containing the column indices of the sparse CSR matrix \(C\).
- Return values:
HIPSPARSE_STATUS_SUCCESS – the operation completed successfully.
HIPSPARSE_STATUS_INVALID_VALUE –
handle,m,n,nnzA,nnzB,alpha,descrA,csrValA,csrRowPtrA,csrColIndA,beta,descrB,csrValB,csrRowPtrB,csrColIndB,descrC,csrValC,csrRowPtrCorcsrColIndCis invalid.HIPSPARSE_STATUS_NOT_SUPPORTED – hipsparseMatrixType_t != HIPSPARSE_MATRIX_TYPE_GENERAL.
1int main(int argc, char* argv[])
2{
3 const int m = 4;
4 const int n = 4;
5 const int nnzA = 9;
6 const int nnzB = 6;
7
8 float alpha{1.0f};
9 float beta{1.0f};
10
11 // A, B, and C are m×n
12
13 // A
14 // 1 0 0 2
15 // 3 4 0 0
16 // 5 6 7 8
17 // 0 0 9 0
18 std::vector<int> hcsrRowPtrA = {0, 2, 4, 8, 9};
19 std::vector<int> hcsrColIndA = {0, 3, 0, 1, 0, 1, 2, 3, 2};
20 std::vector<float> hcsrValA = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f};
21
22 // B
23 // 0 1 0 0
24 // 1 0 1 0
25 // 0 1 0 1
26 // 0 0 1 0
27 std::vector<int> hcsrRowPtrB = {0, 1, 3, 5, 6};
28 std::vector<int> hcsrColIndB = {1, 0, 2, 1, 3, 2};
29 std::vector<float> hcsrValB = {1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f};
30
31 // Device memory management: Allocate and copy A, B
32 int* dcsrRowPtrA;
33 int* dcsrColIndA;
34 float* dcsrValA;
35 int* dcsrRowPtrB;
36 int* dcsrColIndB;
37 float* dcsrValB;
38 int* dcsrRowPtrC;
39 HIP_CHECK(hipMalloc((void**)&dcsrRowPtrA, (m + 1) * sizeof(int)));
40 HIP_CHECK(hipMalloc((void**)&dcsrColIndA, nnzA * sizeof(int)));
41 HIP_CHECK(hipMalloc((void**)&dcsrValA, nnzA * sizeof(float)));
42 HIP_CHECK(hipMalloc((void**)&dcsrRowPtrB, (m + 1) * sizeof(int)));
43 HIP_CHECK(hipMalloc((void**)&dcsrColIndB, nnzB * sizeof(int)));
44 HIP_CHECK(hipMalloc((void**)&dcsrValB, nnzB * sizeof(float)));
45 HIP_CHECK(hipMalloc((void**)&dcsrRowPtrC, (m + 1) * sizeof(int)));
46
47 HIP_CHECK(
48 hipMemcpy(dcsrRowPtrA, hcsrRowPtrA.data(), (m + 1) * sizeof(int), hipMemcpyHostToDevice));
49 HIP_CHECK(
50 hipMemcpy(dcsrColIndA, hcsrColIndA.data(), nnzA * sizeof(int), hipMemcpyHostToDevice));
51 HIP_CHECK(hipMemcpy(dcsrValA, hcsrValA.data(), nnzA * sizeof(float), hipMemcpyHostToDevice));
52 HIP_CHECK(
53 hipMemcpy(dcsrRowPtrB, hcsrRowPtrB.data(), (m + 1) * sizeof(int), hipMemcpyHostToDevice));
54 HIP_CHECK(
55 hipMemcpy(dcsrColIndB, hcsrColIndB.data(), nnzB * sizeof(int), hipMemcpyHostToDevice));
56 HIP_CHECK(hipMemcpy(dcsrValB, hcsrValB.data(), nnzB * sizeof(float), hipMemcpyHostToDevice));
57
58 hipsparseHandle_t handle;
59 HIPSPARSE_CHECK(hipsparseCreate(&handle));
60
61 hipsparseMatDescr_t descrA;
62 HIPSPARSE_CHECK(hipsparseCreateMatDescr(&descrA));
63
64 hipsparseMatDescr_t descrB;
65 HIPSPARSE_CHECK(hipsparseCreateMatDescr(&descrB));
66
67 hipsparseMatDescr_t descrC;
68 HIPSPARSE_CHECK(hipsparseCreateMatDescr(&descrC));
69
70 int nnzC;
71 HIPSPARSE_CHECK(hipsparseXcsrgeamNnz(handle,
72 m,
73 n,
74 descrA,
75 nnzA,
76 dcsrRowPtrA,
77 dcsrColIndA,
78 descrB,
79 nnzB,
80 dcsrRowPtrB,
81 dcsrColIndB,
82 descrC,
83 dcsrRowPtrC,
84 &nnzC));
85
86 int* dcsrColIndC = nullptr;
87 float* dcsrValC = nullptr;
88 HIP_CHECK(hipMalloc((void**)&dcsrColIndC, sizeof(int) * nnzC));
89 HIP_CHECK(hipMalloc((void**)&dcsrValC, sizeof(float) * nnzC));
90
91 HIPSPARSE_CHECK(hipsparseScsrgeam(handle,
92 m,
93 n,
94 &alpha,
95 descrA,
96 nnzA,
97 dcsrValA,
98 dcsrRowPtrA,
99 dcsrColIndA,
100 &beta,
101 descrB,
102 nnzB,
103 dcsrValB,
104 dcsrRowPtrB,
105 dcsrColIndB,
106 descrC,
107 dcsrValC,
108 dcsrRowPtrC,
109 dcsrColIndC));
110
111 std::vector<int> hcsrRowPtrC(m + 1);
112 std::vector<int> hcsrColIndC(nnzC);
113 std::vector<float> hcsrValC(nnzC);
114
115 // Copy back to the host
116 HIP_CHECK(
117 hipMemcpy(hcsrRowPtrC.data(), dcsrRowPtrC, sizeof(int) * (m + 1), hipMemcpyDeviceToHost));
118 HIP_CHECK(
119 hipMemcpy(hcsrColIndC.data(), dcsrColIndC, sizeof(int) * nnzC, hipMemcpyDeviceToHost));
120 HIP_CHECK(hipMemcpy(hcsrValC.data(), dcsrValC, sizeof(float) * nnzC, hipMemcpyDeviceToHost));
121
122 std::cout << "C" << std::endl;
123 for(int i = 0; i < m; i++)
124 {
125 int start = hcsrRowPtrC[i];
126 int end = hcsrRowPtrC[i + 1];
127
128 std::vector<float> temp(n, 0.0f);
129 for(int j = start; j < end; j++)
130 {
131 temp[hcsrColIndC[j]] = hcsrValC[j];
132 }
133
134 for(int j = 0; j < n; j++)
135 {
136 std::cout << temp[j] << " ";
137 }
138 std::cout << std::endl;
139 }
140 std::cout << std::endl;
141
142 HIP_CHECK(hipFree(dcsrRowPtrA));
143 HIP_CHECK(hipFree(dcsrColIndA));
144 HIP_CHECK(hipFree(dcsrValA));
145 HIP_CHECK(hipFree(dcsrRowPtrB));
146 HIP_CHECK(hipFree(dcsrColIndB));
147 HIP_CHECK(hipFree(dcsrValB));
148 HIP_CHECK(hipFree(dcsrRowPtrC));
149 HIP_CHECK(hipFree(dcsrColIndC));
150 HIP_CHECK(hipFree(dcsrValC));
151
152 HIPSPARSE_CHECK(hipsparseDestroyMatDescr(descrA));
153 HIPSPARSE_CHECK(hipsparseDestroyMatDescr(descrB));
154 HIPSPARSE_CHECK(hipsparseDestroyMatDescr(descrC));
155 HIPSPARSE_CHECK(hipsparseDestroy(handle));
156
157 return 0;
158}
1int main(int argc, char* argv[])
2{
3 const int m = 4;
4 const int n = 4;
5 const int nnzA = 9;
6 const int nnzB = 6;
7
8 float alpha = 1.0;
9 float beta = 1.0;
10
11 // A, B, and C are m×n
12
13 // A
14 // 1 0 0 2
15 // 3 4 0 0
16 // 5 6 7 8
17 // 0 0 9 0
18 int hcsrRowPtrA[] = {0, 2, 4, 8, 9};
19 int hcsrColIndA[] = {0, 3, 0, 1, 0, 1, 2, 3, 2};
20 float hcsrValA[] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
21
22 // B
23 // 0 1 0 0
24 // 1 0 1 0
25 // 0 1 0 1
26 // 0 0 1 0
27 int hcsrRowPtrB[] = {0, 1, 3, 5, 6};
28 int hcsrColIndB[] = {1, 0, 2, 1, 3, 2};
29 float hcsrValB[] = {1.0, 1.0, 1.0, 1.0, 1.0, 1.0};
30
31 // Device memory management: Allocate and copy A, B
32 int* dcsrRowPtrA;
33 int* dcsrColIndA;
34 float* dcsrValA;
35 int* dcsrRowPtrB;
36 int* dcsrColIndB;
37 float* dcsrValB;
38 int* dcsrRowPtrC;
39 HIP_CHECK(hipMalloc((void**)&dcsrRowPtrA, (m + 1) * sizeof(int)));
40 HIP_CHECK(hipMalloc((void**)&dcsrColIndA, nnzA * sizeof(int)));
41 HIP_CHECK(hipMalloc((void**)&dcsrValA, nnzA * sizeof(float)));
42 HIP_CHECK(hipMalloc((void**)&dcsrRowPtrB, (m + 1) * sizeof(int)));
43 HIP_CHECK(hipMalloc((void**)&dcsrColIndB, nnzB * sizeof(int)));
44 HIP_CHECK(hipMalloc((void**)&dcsrValB, nnzB * sizeof(float)));
45 HIP_CHECK(hipMalloc((void**)&dcsrRowPtrC, (m + 1) * sizeof(int)));
46
47 HIP_CHECK(hipMemcpy(dcsrRowPtrA, hcsrRowPtrA, (m + 1) * sizeof(int), hipMemcpyHostToDevice));
48 HIP_CHECK(hipMemcpy(dcsrColIndA, hcsrColIndA, nnzA * sizeof(int), hipMemcpyHostToDevice));
49 HIP_CHECK(hipMemcpy(dcsrValA, hcsrValA, nnzA * sizeof(float), hipMemcpyHostToDevice));
50 HIP_CHECK(hipMemcpy(dcsrRowPtrB, hcsrRowPtrB, (m + 1) * sizeof(int), hipMemcpyHostToDevice));
51 HIP_CHECK(hipMemcpy(dcsrColIndB, hcsrColIndB, nnzB * sizeof(int), hipMemcpyHostToDevice));
52 HIP_CHECK(hipMemcpy(dcsrValB, hcsrValB, nnzB * sizeof(float), hipMemcpyHostToDevice));
53
54 hipsparseHandle_t handle;
55 HIPSPARSE_CHECK(hipsparseCreate(&handle));
56
57 hipsparseMatDescr_t descrA;
58 HIPSPARSE_CHECK(hipsparseCreateMatDescr(&descrA));
59
60 hipsparseMatDescr_t descrB;
61 HIPSPARSE_CHECK(hipsparseCreateMatDescr(&descrB));
62
63 hipsparseMatDescr_t descrC;
64 HIPSPARSE_CHECK(hipsparseCreateMatDescr(&descrC));
65
66 int nnzC;
67 HIPSPARSE_CHECK(hipsparseXcsrgeamNnz(handle,
68 m,
69 n,
70 descrA,
71 nnzA,
72 dcsrRowPtrA,
73 dcsrColIndA,
74 descrB,
75 nnzB,
76 dcsrRowPtrB,
77 dcsrColIndB,
78 descrC,
79 dcsrRowPtrC,
80 &nnzC));
81
82 int* dcsrColIndC = NULL;
83 float* dcsrValC = NULL;
84 HIP_CHECK(hipMalloc((void**)&dcsrColIndC, sizeof(int) * nnzC));
85 HIP_CHECK(hipMalloc((void**)&dcsrValC, sizeof(float) * nnzC));
86
87 HIPSPARSE_CHECK(hipsparseScsrgeam(handle,
88 m,
89 n,
90 &alpha,
91 descrA,
92 nnzA,
93 dcsrValA,
94 dcsrRowPtrA,
95 dcsrColIndA,
96 &beta,
97 descrB,
98 nnzB,
99 dcsrValB,
100 dcsrRowPtrB,
101 dcsrColIndB,
102 descrC,
103 dcsrValC,
104 dcsrRowPtrC,
105 dcsrColIndC));
106
107 int* hcsrRowPtrC = (int*)malloc((m + 1) * sizeof(int));
108 int* hcsrColIndC = (int*)malloc((nnzC) * sizeof(int));
109 float hcsrValC[nnzC];
110
111 // Copy back to the host
112 HIP_CHECK(hipMemcpy(hcsrRowPtrC, dcsrRowPtrC, sizeof(int) * (m + 1), hipMemcpyDeviceToHost));
113 HIP_CHECK(hipMemcpy(hcsrColIndC, dcsrColIndC, sizeof(int) * nnzC, hipMemcpyDeviceToHost));
114 HIP_CHECK(hipMemcpy(hcsrValC, dcsrValC, sizeof(float) * nnzC, hipMemcpyDeviceToHost));
115
116 printf("C\n");
117 for(int i = 0; i < m; i++)
118 {
119 int start = hcsrRowPtrC[i];
120 int end = hcsrRowPtrC[i + 1];
121
122 float* temp = (float*)malloc(n * sizeof(float));
123 for(int j = start; j < end; j++)
124 {
125 temp[hcsrColIndC[j]] = hcsrValC[j];
126 }
127
128 for(int j = 0; j < n; j++)
129 {
130 printf("%f ", temp[j]);
131 }
132 printf("\n");
133 }
134 printf("\n");
135
136 HIP_CHECK(hipFree(dcsrRowPtrA));
137 HIP_CHECK(hipFree(dcsrColIndA));
138 HIP_CHECK(hipFree(dcsrValA));
139 HIP_CHECK(hipFree(dcsrRowPtrB));
140 HIP_CHECK(hipFree(dcsrColIndB));
141 HIP_CHECK(hipFree(dcsrValB));
142 HIP_CHECK(hipFree(dcsrRowPtrC));
143 HIP_CHECK(hipFree(dcsrColIndC));
144 HIP_CHECK(hipFree(dcsrValC));
145
146 HIPSPARSE_CHECK(hipsparseDestroyMatDescr(descrA));
147 HIPSPARSE_CHECK(hipsparseDestroyMatDescr(descrB));
148 HIPSPARSE_CHECK(hipsparseDestroyMatDescr(descrC));
149 HIPSPARSE_CHECK(hipsparseDestroy(handle));
150
151 return 0;
152}
1program example_hipsparse_csrgeam
2 use iso_c_binding
3 implicit none
4
5 ! HIP
6 interface
7 function hipMalloc(ptr, size) &
8 bind(c, name = 'hipMalloc')
9 use iso_c_binding
10 implicit none
11 integer(c_int) :: hipMalloc
12 type(c_ptr) :: ptr
13 integer(c_size_t), value :: size
14 end function hipMalloc
15
16 function hipFree(ptr) &
17 bind(c, name = 'hipFree')
18 use iso_c_binding
19 implicit none
20 integer(c_int) :: hipFree
21 type(c_ptr), value :: ptr
22 end function hipFree
23
24 function hipMemcpy(dst, src, size, kind) &
25 bind(c, name = 'hipMemcpy')
26 use iso_c_binding
27 implicit none
28 integer(c_int) :: hipMemcpy
29 type(c_ptr), value :: dst
30 type(c_ptr), intent(in), value :: src
31 integer(c_size_t), value :: size
32 integer(c_int), value :: kind
33 end function hipMemcpy
34 end interface
35
36 integer, parameter :: hipMemcpyHostToDevice = 1
37 integer, parameter :: hipMemcpyDeviceToHost = 2
38
39 ! hipSPARSE
40 interface
41 function hipsparseCreate(handle) &
42 bind(c, name = 'hipsparseCreate')
43 use iso_c_binding
44 implicit none
45 integer(c_int) :: hipsparseCreate
46 type(c_ptr) :: handle
47 end function hipsparseCreate
48
49 function hipsparseDestroy(handle) &
50 bind(c, name = 'hipsparseDestroy')
51 use iso_c_binding
52 implicit none
53 integer(c_int) :: hipsparseDestroy
54 type(c_ptr), value :: handle
55 end function hipsparseDestroy
56
57 function hipsparseCreateMatDescr(descr) &
58 bind(c, name = 'hipsparseCreateMatDescr')
59 use iso_c_binding
60 implicit none
61 integer(c_int) :: hipsparseCreateMatDescr
62 type(c_ptr) :: descr
63 end function hipsparseCreateMatDescr
64
65 function hipsparseDestroyMatDescr(descr) &
66 bind(c, name = 'hipsparseDestroyMatDescr')
67 use iso_c_binding
68 implicit none
69 integer(c_int) :: hipsparseDestroyMatDescr
70 type(c_ptr), value :: descr
71 end function hipsparseDestroyMatDescr
72
73 function hipsparseXcsrgeamNnz(handle, m, n, descrA, nnzA, csrRowPtrA, csrColIndA, descrB, &
74 nnzB, csrRowPtrB, csrColIndB, descrC, csrRowPtrC, nnzTotalDevHostPtr) &
75 bind(c, name = 'hipsparseXcsrgeamNnz')
76 use iso_c_binding
77 implicit none
78 integer(c_int) :: hipsparseXcsrgeamNnz
79 type(c_ptr), value :: handle
80 integer(c_int), value :: m
81 integer(c_int), value :: n
82 type(c_ptr), value :: descrA
83 integer(c_int), value :: nnzA
84 type(c_ptr), intent(in), value :: csrRowPtrA
85 type(c_ptr), intent(in), value :: csrColIndA
86 type(c_ptr), value :: descrB
87 integer(c_int), value :: nnzB
88 type(c_ptr), intent(in), value :: csrRowPtrB
89 type(c_ptr), intent(in), value :: csrColIndB
90 type(c_ptr), value :: descrC
91 type(c_ptr), value :: csrRowPtrC
92 type(c_ptr), value :: nnzTotalDevHostPtr
93 end function hipsparseXcsrgeamNnz
94
95 function hipsparseScsrgeam(handle, m, n, alpha, descrA, nnzA, csrSortedValA, csrSortedRowPtrA, &
96 csrSortedColIndA, beta, descrB, nnzB, csrSortedValB, csrSortedRowPtrB, &
97 csrSortedColIndB, descrC, csrSortedValC, csrSortedRowPtrC, csrSortedColIndC) &
98 bind(c, name = 'hipsparseScsrgeam')
99 use iso_c_binding
100 implicit none
101 integer(c_int) :: hipsparseScsrgeam
102 type(c_ptr), value :: handle
103 integer(c_int), value :: m
104 integer(c_int), value :: n
105 type(c_ptr), intent(in), value :: alpha
106 type(c_ptr), value :: descrA
107 integer(c_int), value :: nnzA
108 type(c_ptr), intent(in), value :: csrSortedValA
109 type(c_ptr), intent(in), value :: csrSortedRowPtrA
110 type(c_ptr), intent(in), value :: csrSortedColIndA
111 type(c_ptr), intent(in), value :: beta
112 type(c_ptr), value :: descrB
113 integer(c_int), value :: nnzB
114 type(c_ptr), intent(in), value :: csrSortedValB
115 type(c_ptr), intent(in), value :: csrSortedRowPtrB
116 type(c_ptr), intent(in), value :: csrSortedColIndB
117 type(c_ptr), value :: descrC
118 type(c_ptr), value :: csrSortedValC
119 type(c_ptr), value :: csrSortedRowPtrC
120 type(c_ptr), value :: csrSortedColIndC
121 end function hipsparseScsrgeam
122 end interface
123
124 ! Variables
125 type(c_ptr) :: handle
126 type(c_ptr) :: descrA, descrB, descrC
127 integer :: i, stat
128
129 ! Matrix addition: C = alpha * A + beta * B (no transpose!)
130 integer, parameter :: m = 4
131 integer, parameter :: n = 4
132 integer, parameter :: nnzA = 9
133 integer, parameter :: nnzB = 6
134 integer(c_int), target :: nnzC
135
136 ! Matrix A (4x4)
137 integer, dimension(m+1), target :: hcsrRowPtrA = (/0, 2, 4, 8, 9/)
138 integer, dimension(nnzA), target :: hcsrColIndA = (/0, 3, 0, 1, 0, 1, 2, 3, 2/)
139 real(c_float), dimension(nnzA), target :: hcsrValA = (/1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0/)
140
141 ! Matrix B (4x4)
142 integer, dimension(m+1), target :: hcsrRowPtrB = (/0, 1, 3, 5, 6/)
143 integer, dimension(nnzB), target :: hcsrColIndB = (/1, 0, 2, 1, 3, 2/)
144 real(c_float), dimension(nnzB), target :: hcsrValB = (/1.0, 1.0, 1.0, 1.0, 1.0, 1.0/)
145
146 ! Matrix C (will be allocated after nnzC is determined)
147 integer, dimension(:), allocatable, target :: hcsrRowPtrC
148 integer, dimension(:), allocatable, target :: hcsrColIndC
149 real(c_float), dimension(:), allocatable, target :: hcsrValC
150
151 ! Scalar values
152 real(c_float), target :: alpha = 1.0
153 real(c_float), target :: beta = 1.0
154
155 ! Device pointers
156 type(c_ptr) :: dcsrRowPtrA, dcsrColIndA, dcsrValA
157 type(c_ptr) :: dcsrRowPtrB, dcsrColIndB, dcsrValB
158 type(c_ptr) :: dcsrRowPtrC, dcsrColIndC, dcsrValC
159
160 ! Create hipSPARSE handle
161 stat = hipsparseCreate(handle)
162 if (stat /= 0) then
163 write(*,*) 'Error: hipsparseCreate failed'
164 stop
165 end if
166
167 ! Create matrix descriptors
168 stat = hipsparseCreateMatDescr(descrA)
169 if (stat /= 0) then
170 write(*,*) 'Error: hipsparseCreateMatDescr descrA failed'
171 stop
172 end if
173
174 stat = hipsparseCreateMatDescr(descrB)
175 if (stat /= 0) then
176 write(*,*) 'Error: hipsparseCreateMatDescr descrB failed'
177 stop
178 end if
179
180 stat = hipsparseCreateMatDescr(descrC)
181 if (stat /= 0) then
182 write(*,*) 'Error: hipsparseCreateMatDescr descrC failed'
183 stop
184 end if
185
186 ! Allocate device memory for A and B
187 stat = hipMalloc(dcsrRowPtrA, int((m + 1) * 4, c_size_t))
188 if (stat /= 0) stop
189 stat = hipMalloc(dcsrColIndA, int(nnzA * 4, c_size_t))
190 if (stat /= 0) stop
191 stat = hipMalloc(dcsrValA, int(nnzA * 4, c_size_t))
192 if (stat /= 0) stop
193
194 stat = hipMalloc(dcsrRowPtrB, int((m + 1) * 4, c_size_t))
195 if (stat /= 0) stop
196 stat = hipMalloc(dcsrColIndB, int(nnzB * 4, c_size_t))
197 if (stat /= 0) stop
198 stat = hipMalloc(dcsrValB, int(nnzB * 4, c_size_t))
199 if (stat /= 0) stop
200
201 stat = hipMalloc(dcsrRowPtrC, int((m + 1) * 4, c_size_t))
202 if (stat /= 0) stop
203
204 ! Copy A and B to device
205 stat = hipMemcpy(dcsrRowPtrA, c_loc(hcsrRowPtrA), int((m + 1) * 4, c_size_t), hipMemcpyHostToDevice)
206 if (stat /= 0) stop
207 stat = hipMemcpy(dcsrColIndA, c_loc(hcsrColIndA), int(nnzA * 4, c_size_t), hipMemcpyHostToDevice)
208 if (stat /= 0) stop
209 stat = hipMemcpy(dcsrValA, c_loc(hcsrValA), int(nnzA * 4, c_size_t), hipMemcpyHostToDevice)
210 if (stat /= 0) stop
211
212 stat = hipMemcpy(dcsrRowPtrB, c_loc(hcsrRowPtrB), int((m + 1) * 4, c_size_t), hipMemcpyHostToDevice)
213 if (stat /= 0) stop
214 stat = hipMemcpy(dcsrColIndB, c_loc(hcsrColIndB), int(nnzB * 4, c_size_t), hipMemcpyHostToDevice)
215 if (stat /= 0) stop
216 stat = hipMemcpy(dcsrValB, c_loc(hcsrValB), int(nnzB * 4, c_size_t), hipMemcpyHostToDevice)
217 if (stat /= 0) stop
218
219 ! Determine nnzC
220 stat = hipsparseXcsrgeamNnz(handle, &
221 m, &
222 n, &
223 descrA, &
224 nnzA, &
225 dcsrRowPtrA, &
226 dcsrColIndA, &
227 descrB, &
228 nnzB, &
229 dcsrRowPtrB, &
230 dcsrColIndB, &
231 descrC, &
232 dcsrRowPtrC, &
233 c_loc(nnzC))
234 if (stat /= 0) then
235 write(*,*) 'Error: hipsparseXcsrgeamNnz failed'
236 stop
237 end if
238
239 ! Allocate device memory for C
240 stat = hipMalloc(dcsrColIndC, int(nnzC * 4, c_size_t))
241 if (stat /= 0) stop
242 stat = hipMalloc(dcsrValC, int(nnzC * 4, c_size_t))
243 if (stat /= 0) stop
244
245 ! Perform matrix addition: C = alpha * A + beta * B
246 stat = hipsparseScsrgeam(handle, &
247 m, &
248 n, &
249 c_loc(alpha), &
250 descrA, &
251 nnzA, &
252 dcsrValA, &
253 dcsrRowPtrA, &
254 dcsrColIndA, &
255 c_loc(beta), &
256 descrB, &
257 nnzB, &
258 dcsrValB, &
259 dcsrRowPtrB, &
260 dcsrColIndB, &
261 descrC, &
262 dcsrValC, &
263 dcsrRowPtrC, &
264 dcsrColIndC)
265 if (stat /= 0) then
266 write(*,*) 'Error: hipsparseScsrgeam failed'
267 stop
268 end if
269
270 ! Allocate host memory for C
271 allocate(hcsrRowPtrC(m+1))
272 allocate(hcsrColIndC(nnzC))
273 allocate(hcsrValC(nnzC))
274
275 ! Copy result back to host
276 stat = hipMemcpy(c_loc(hcsrRowPtrC), dcsrRowPtrC, int((m + 1) * 4, c_size_t), hipMemcpyDeviceToHost)
277 if (stat /= 0) stop
278 stat = hipMemcpy(c_loc(hcsrColIndC), dcsrColIndC, int(nnzC * 4, c_size_t), hipMemcpyDeviceToHost)
279 if (stat /= 0) stop
280 stat = hipMemcpy(c_loc(hcsrValC), dcsrValC, int(nnzC * 4, c_size_t), hipMemcpyDeviceToHost)
281 if (stat /= 0) stop
282
283 ! Print result
284 write(*,*) 'Matrix C (result of A + B):'
285 write(*,*) 'nnzC =', nnzC
286 write(*,*) 'csrRowPtrC:'
287 do i = 1, m + 1
288 write(*,*) hcsrRowPtrC(i)
289 end do
290 write(*,*) 'csrColIndC:'
291 do i = 1, nnzC
292 write(*,*) hcsrColIndC(i)
293 end do
294 write(*,*) 'csrValC:'
295 do i = 1, nnzC
296 write(*,*) hcsrValC(i)
297 end do
298
299 ! Clean up
300 deallocate(hcsrRowPtrC)
301 deallocate(hcsrColIndC)
302 deallocate(hcsrValC)
303
304 stat = hipFree(dcsrRowPtrA)
305 stat = hipFree(dcsrColIndA)
306 stat = hipFree(dcsrValA)
307 stat = hipFree(dcsrRowPtrB)
308 stat = hipFree(dcsrColIndB)
309 stat = hipFree(dcsrValB)
310 stat = hipFree(dcsrRowPtrC)
311 stat = hipFree(dcsrColIndC)
312 stat = hipFree(dcsrValC)
313
314 stat = hipsparseDestroyMatDescr(descrA)
315 stat = hipsparseDestroyMatDescr(descrB)
316 stat = hipsparseDestroyMatDescr(descrC)
317 stat = hipsparseDestroy(handle)
318
319end program example_hipsparse_csrgeam
hipsparseXcsrgeam2_bufferSizeExt()#
-
hipsparseStatus_t hipsparseScsrgeam2_bufferSizeExt(hipsparseHandle_t handle, int m, int n, const float *alpha, const hipsparseMatDescr_t descrA, int nnzA, const float *csrSortedValA, const int *csrSortedRowPtrA, const int *csrSortedColIndA, const float *beta, const hipsparseMatDescr_t descrB, int nnzB, const float *csrSortedValB, const int *csrSortedRowPtrB, const int *csrSortedColIndB, const hipsparseMatDescr_t descrC, const float *csrSortedValC, const int *csrSortedRowPtrC, const int *csrSortedColIndC, size_t *pBufferSizeInBytes)#
-
hipsparseStatus_t hipsparseDcsrgeam2_bufferSizeExt(hipsparseHandle_t handle, int m, int n, const double *alpha, const hipsparseMatDescr_t descrA, int nnzA, const double *csrSortedValA, const int *csrSortedRowPtrA, const int *csrSortedColIndA, const double *beta, const hipsparseMatDescr_t descrB, int nnzB, const double *csrSortedValB, const int *csrSortedRowPtrB, const int *csrSortedColIndB, const hipsparseMatDescr_t descrC, const double *csrSortedValC, const int *csrSortedRowPtrC, const int *csrSortedColIndC, size_t *pBufferSizeInBytes)#
-
hipsparseStatus_t hipsparseCcsrgeam2_bufferSizeExt(hipsparseHandle_t handle, int m, int n, const hipComplex *alpha, const hipsparseMatDescr_t descrA, int nnzA, const hipComplex *csrSortedValA, const int *csrSortedRowPtrA, const int *csrSortedColIndA, const hipComplex *beta, const hipsparseMatDescr_t descrB, int nnzB, const hipComplex *csrSortedValB, const int *csrSortedRowPtrB, const int *csrSortedColIndB, const hipsparseMatDescr_t descrC, const hipComplex *csrSortedValC, const int *csrSortedRowPtrC, const int *csrSortedColIndC, size_t *pBufferSizeInBytes)#
-
hipsparseStatus_t hipsparseZcsrgeam2_bufferSizeExt(hipsparseHandle_t handle, int m, int n, const hipDoubleComplex *alpha, const hipsparseMatDescr_t descrA, int nnzA, const hipDoubleComplex *csrSortedValA, const int *csrSortedRowPtrA, const int *csrSortedColIndA, const hipDoubleComplex *beta, const hipsparseMatDescr_t descrB, int nnzB, const hipDoubleComplex *csrSortedValB, const int *csrSortedRowPtrB, const int *csrSortedColIndB, const hipsparseMatDescr_t descrC, const hipDoubleComplex *csrSortedValC, const int *csrSortedRowPtrC, const int *csrSortedColIndC, size_t *pBufferSizeInBytes)#
hipsparseXcsrgeam2_bufferSizeExtreturns the size of the temporary storage buffer in bytes that is required by hipsparseXcsrgeam2Nnz() and hipsparseXcsrgeam2(). The temporary storage buffer must be allocated by the user.Note
Currently, only HIPSPARSE_MATRIX_TYPE_GENERAL is supported.
- Parameters:
handle – [in] handle to the hipsparse library context queue.
m – [in] number of rows of the sparse CSR matrix \(A\), \(B\) and \(C\).
n – [in] number of columns of the sparse CSR matrix \(A\), \(B\) and \(C\).
alpha – [in] scalar \(\alpha\).
descrA – [in] descriptor of the sparse CSR matrix \(A\). Currently, only HIPSPARSE_MATRIX_TYPE_GENERAL is supported.
nnzA – [in] number of non-zero entries of the sparse CSR matrix \(A\).
csrSortedValA – [in] array of
nnzAelements of the sparse CSR matrix \(A\).csrSortedRowPtrA – [in] array of
m+1elements that point to the start of every row of the sparse CSR matrix \(A\).csrSortedColIndA – [in] array of
nnzAelements containing the column indices of the sparse CSR matrix \(A\).beta – [in] scalar \(\beta\).
descrB – [in] descriptor of the sparse CSR matrix \(B\). Currently, only HIPSPARSE_MATRIX_TYPE_GENERAL is supported.
nnzB – [in] number of non-zero entries of the sparse CSR matrix \(B\).
csrSortedValB – [in] array of
nnzBelements of the sparse CSR matrix \(B\).csrSortedRowPtrB – [in] array of
m+1elements that point to the start of every row of the sparse CSR matrix \(B\).csrSortedColIndB – [in] array of
nnzBelements containing the column indices of the sparse CSR matrix \(B\).descrC – [in] descriptor of the sparse CSR matrix \(C\). Currently, only HIPSPARSE_MATRIX_TYPE_GENERAL is supported.
csrSortedValC – [out] array of elements of the sparse CSR matrix \(C\).
csrSortedRowPtrC – [in] array of
m+1elements that point to the start of every row of the sparse CSR matrix \(C\).csrSortedColIndC – [out] array of elements containing the column indices of the sparse CSR matrix \(C\).
pBufferSizeInBytes – [out] number of bytes of the temporary storage buffer required by hipsparseXcsrgeam2Nnz() and hipsparseXcsrgeam2().
- Return values:
HIPSPARSE_STATUS_SUCCESS – the operation completed successfully.
HIPSPARSE_STATUS_INVALID_VALUE –
handle,m,n,nnzA,nnzB,alpha,descrA,csrSortedValA,csrSortedRowPtrA,csrSortedColIndA,beta,descrB,csrSortedValB,csrSortedRowPtrB,csrSortedColIndB,descrC,csrSortedValC,csrSortedRowPtrC,csrSortedColIndC, orpBufferSizeInBytesis invalid.HIPSPARSE_STATUS_NOT_SUPPORTED – hipsparseMatrixType_t != HIPSPARSE_MATRIX_TYPE_GENERAL.
hipsparseXcsrgeam2Nnz()#
-
hipsparseStatus_t hipsparseXcsrgeam2Nnz(hipsparseHandle_t handle, int m, int n, const hipsparseMatDescr_t descrA, int nnzA, const int *csrSortedRowPtrA, const int *csrSortedColIndA, const hipsparseMatDescr_t descrB, int nnzB, const int *csrSortedRowPtrB, const int *csrSortedColIndB, const hipsparseMatDescr_t descrC, int *csrSortedRowPtrC, int *nnzTotalDevHostPtr, void *workspace)#
hipsparseXcsrgeam2Nnzcomputes the total CSR non-zero elements and the CSR row offsets, that point to the start of every row of the sparse CSR matrix, of the resulting matrix \(C\). It is assumed thatcsrRowPtrChas been allocated with sizem+1. The required buffer size can be obtained by hipsparseXcsrgeam2_bufferSizeExt(). The desired index base in the output CSR matrix \(C\) is set in the hipsparseMatDescr_tdescrC. See hipsparseSetMatIndexBase().Note
As indicated,
nnzTotalDevHostPtrcan point either to host or device memory. This is controlled by setting the pointer mode. See hipsparseSetPointerMode().Note
This function is non blocking and executed asynchronously with respect to the host. It may return before the actual computation has finished.
Note
Currently, only HIPSPARSE_MATRIX_TYPE_GENERAL is supported.
- Parameters:
handle – [in] handle to the hipsparse library context queue.
m – [in] number of rows of the sparse CSR matrix \(A\), \(B\) and \(C\).
n – [in] number of columns of the sparse CSR matrix \(A\), \(B\) and \(C\).
descrA – [in] descriptor of the sparse CSR matrix \(A\). Currently, only HIPSPARSE_MATRIX_TYPE_GENERAL is supported.
nnzA – [in] number of non-zero entries of the sparse CSR matrix \(A\).
csrSortedRowPtrA – [in] array of
m+1elements that point to the start of every row of the sparse CSR matrix \(A\).csrSortedColIndA – [in] array of
nnzAelements containing the column indices of the sparse CSR matrix \(A\).descrB – [in] descriptor of the sparse CSR matrix \(B\). Currently, only HIPSPARSE_MATRIX_TYPE_GENERAL is supported.
nnzB – [in] number of non-zero entries of the sparse CSR matrix \(B\).
csrSortedRowPtrB – [in] array of
m+1elements that point to the start of every row of the sparse CSR matrix \(B\).csrSortedColIndB – [in] array of
nnzBelements containing the column indices of the sparse CSR matrix \(B\).descrC – [in] descriptor of the sparse CSR matrix \(C\). Currently, only HIPSPARSE_MATRIX_TYPE_GENERAL is supported.
csrSortedRowPtrC – [in] array of
m+1elements that point to the start of every row of the sparse CSR matrix \(C\).nnzTotalDevHostPtr – [out] pointer to the number of non-zero entries of the sparse CSR matrix \(C\).
nnzTotalDevHostPtrcan be a host or device pointer.workspace – [in] temporary storage buffer allocated by the user.
- Return values:
HIPSPARSE_STATUS_SUCCESS – the operation completed successfully.
HIPSPARSE_STATUS_INVALID_VALUE –
handle,m,n,nnzA,nnzB,descrA,csrSortedRowPtrA,csrSortedColIndA,descrB,csrSortedRowPtrB,csrSortedColIndB,descrC,csrSortedRowPtrCornnzTotalDevHostPtris invalid.HIPSPARSE_STATUS_NOT_SUPPORTED – hipsparseMatrixType_t != HIPSPARSE_MATRIX_TYPE_GENERAL.
hipsparseXcsrgeam2()#
-
hipsparseStatus_t hipsparseScsrgeam2(hipsparseHandle_t handle, int m, int n, const float *alpha, const hipsparseMatDescr_t descrA, int nnzA, const float *csrSortedValA, const int *csrSortedRowPtrA, const int *csrSortedColIndA, const float *beta, const hipsparseMatDescr_t descrB, int nnzB, const float *csrSortedValB, const int *csrSortedRowPtrB, const int *csrSortedColIndB, const hipsparseMatDescr_t descrC, float *csrSortedValC, int *csrSortedRowPtrC, int *csrSortedColIndC, void *pBuffer)#
-
hipsparseStatus_t hipsparseDcsrgeam2(hipsparseHandle_t handle, int m, int n, const double *alpha, const hipsparseMatDescr_t descrA, int nnzA, const double *csrSortedValA, const int *csrSortedRowPtrA, const int *csrSortedColIndA, const double *beta, const hipsparseMatDescr_t descrB, int nnzB, const double *csrSortedValB, const int *csrSortedRowPtrB, const int *csrSortedColIndB, const hipsparseMatDescr_t descrC, double *csrSortedValC, int *csrSortedRowPtrC, int *csrSortedColIndC, void *pBuffer)#
-
hipsparseStatus_t hipsparseCcsrgeam2(hipsparseHandle_t handle, int m, int n, const hipComplex *alpha, const hipsparseMatDescr_t descrA, int nnzA, const hipComplex *csrSortedValA, const int *csrSortedRowPtrA, const int *csrSortedColIndA, const hipComplex *beta, const hipsparseMatDescr_t descrB, int nnzB, const hipComplex *csrSortedValB, const int *csrSortedRowPtrB, const int *csrSortedColIndB, const hipsparseMatDescr_t descrC, hipComplex *csrSortedValC, int *csrSortedRowPtrC, int *csrSortedColIndC, void *pBuffer)#
-
hipsparseStatus_t hipsparseZcsrgeam2(hipsparseHandle_t handle, int m, int n, const hipDoubleComplex *alpha, const hipsparseMatDescr_t descrA, int nnzA, const hipDoubleComplex *csrSortedValA, const int *csrSortedRowPtrA, const int *csrSortedColIndA, const hipDoubleComplex *beta, const hipsparseMatDescr_t descrB, int nnzB, const hipDoubleComplex *csrSortedValB, const int *csrSortedRowPtrB, const int *csrSortedColIndB, const hipsparseMatDescr_t descrC, hipDoubleComplex *csrSortedValC, int *csrSortedRowPtrC, int *csrSortedColIndC, void *pBuffer)#
Sparse matrix sparse matrix addition using CSR storage format.
hipsparseXcsrgeam2multiplies the scalar \(\alpha\) with the sparse \(m \times n\) matrix \(A\), defined in CSR storage format, multiplies the scalar \(\beta\) with the sparse \(m \times n\) matrix \(B\), defined in CSR storage format, and adds both resulting matrices to obtain the sparse \(m \times n\) matrix \(C\), defined in CSR storage format, such that\[ C := \alpha \cdot A + \beta \cdot B. \]This computation involves a multi step process. First the user must call hipsparseXcsrgeam2_bufferSizeExt() in order to determine the required user allocated temporary buffer size. The user then allocates this buffer and also allocates
csrRowPtrCto have sizem+1. Both the temporary storage buffer andcsrRowPtrCarray are then passed to hipsparseXcsrgeam2Nnz which fills in thecsrRowPtrCarray as well as computes the total number of nonzeros in C,nnzC. The user then allocates both arrayscsrColIndCandcsrValCto have sizennzCand callshipsparseXcsrgeam2to complete the computation. The desired index base in the output CSR matrix C is set in the hipsparseMatDescr_tdescrC. See hipsparseSetMatIndexBase().Note
Both scalars \(\alpha\) and \(beta\) have to be valid.
Note
Currently, only HIPSPARSE_MATRIX_TYPE_GENERAL is supported.
Note
This function is non blocking and executed asynchronously with respect to the host. It may return before the actual computation has finished.
- Parameters:
handle – [in] handle to the hipsparse library context queue.
m – [in] number of rows of the sparse CSR matrix \(A\), \(B\) and \(C\).
n – [in] number of columns of the sparse CSR matrix \(A\), \(B\) and \(C\).
alpha – [in] scalar \(\alpha\).
descrA – [in] descriptor of the sparse CSR matrix \(A\). Currently, only HIPSPARSE_MATRIX_TYPE_GENERAL is supported.
nnzA – [in] number of non-zero entries of the sparse CSR matrix \(A\).
csrSortedValA – [in] array of
nnzAelements of the sparse CSR matrix \(A\).csrSortedRowPtrA – [in] array of
m+1elements that point to the start of every row of the sparse CSR matrix \(A\).csrSortedColIndA – [in] array of
nnzAelements containing the column indices of the sparse CSR matrix \(A\).beta – [in] scalar \(\beta\).
descrB – [in] descriptor of the sparse CSR matrix \(B\). Currently, only HIPSPARSE_MATRIX_TYPE_GENERAL is supported.
nnzB – [in] number of non-zero entries of the sparse CSR matrix \(B\).
csrSortedValB – [in] array of
nnzBelements of the sparse CSR matrix \(B\).csrSortedRowPtrB – [in] array of
m+1elements that point to the start of every row of the sparse CSR matrix \(B\).csrSortedColIndB – [in] array of
nnzBelements containing the column indices of the sparse CSR matrix \(B\).descrC – [in] descriptor of the sparse CSR matrix \(C\). Currently, only HIPSPARSE_MATRIX_TYPE_GENERAL is supported.
csrSortedValC – [out] array of elements of the sparse CSR matrix \(C\).
csrSortedRowPtrC – [in] array of
m+1elements that point to the start of every row of the sparse CSR matrix \(C\).csrSortedColIndC – [out] array of elements containing the column indices of the sparse CSR matrix \(C\).
pBuffer – [in] temporary storage buffer allocated by the user.
- Return values:
HIPSPARSE_STATUS_SUCCESS – the operation completed successfully.
HIPSPARSE_STATUS_INVALID_VALUE –
handle,m,n,nnzA,nnzB,alpha,descrA,csrSortedValA,csrSortedRowPtrA,csrSortedColIndA,beta,descrB,csrSortedValB,csrSortedRowPtrB,csrSortedColIndB,descrC,csrSortedValC,csrSortedRowPtrC,csrSortedColIndCorpBufferis invalid.HIPSPARSE_STATUS_NOT_SUPPORTED – hipsparseMatrixType_t != HIPSPARSE_MATRIX_TYPE_GENERAL.
1int main(int argc, char* argv[])
2{
3 const int m = 4;
4 const int n = 4;
5 const int nnzA = 9;
6 const int nnzB = 6;
7
8 float alpha{1.0f};
9 float beta{1.0f};
10
11 // A, B, and C are m×n
12
13 // A
14 // 1 0 0 2
15 // 3 4 0 0
16 // 5 6 7 8
17 // 0 0 9 0
18 std::vector<int> hcsrRowPtrA = {0, 2, 4, 8, 9};
19 std::vector<int> hcsrColIndA = {0, 3, 0, 1, 0, 1, 2, 3, 2};
20 std::vector<float> hcsrValA = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f};
21
22 // B
23 // 0 1 0 0
24 // 1 0 1 0
25 // 0 1 0 1
26 // 0 0 1 0
27 std::vector<int> hcsrRowPtrB = {0, 1, 3, 5, 6};
28 std::vector<int> hcsrColIndB = {1, 0, 2, 1, 3, 2};
29 std::vector<float> hcsrValB = {1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f};
30
31 // Device memory management: Allocate and copy A, B
32 int* dcsrRowPtrA;
33 int* dcsrColIndA;
34 float* dcsrValA;
35 int* dcsrRowPtrB;
36 int* dcsrColIndB;
37 float* dcsrValB;
38 int* dcsrRowPtrC;
39 HIP_CHECK(hipMalloc((void**)&dcsrRowPtrA, (m + 1) * sizeof(int)));
40 HIP_CHECK(hipMalloc((void**)&dcsrColIndA, nnzA * sizeof(int)));
41 HIP_CHECK(hipMalloc((void**)&dcsrValA, nnzA * sizeof(float)));
42 HIP_CHECK(hipMalloc((void**)&dcsrRowPtrB, (m + 1) * sizeof(int)));
43 HIP_CHECK(hipMalloc((void**)&dcsrColIndB, nnzB * sizeof(int)));
44 HIP_CHECK(hipMalloc((void**)&dcsrValB, nnzB * sizeof(float)));
45 HIP_CHECK(hipMalloc((void**)&dcsrRowPtrC, (m + 1) * sizeof(int)));
46
47 HIP_CHECK(
48 hipMemcpy(dcsrRowPtrA, hcsrRowPtrA.data(), (m + 1) * sizeof(int), hipMemcpyHostToDevice));
49 HIP_CHECK(
50 hipMemcpy(dcsrColIndA, hcsrColIndA.data(), nnzA * sizeof(int), hipMemcpyHostToDevice));
51 HIP_CHECK(hipMemcpy(dcsrValA, hcsrValA.data(), nnzA * sizeof(float), hipMemcpyHostToDevice));
52 HIP_CHECK(
53 hipMemcpy(dcsrRowPtrB, hcsrRowPtrB.data(), (m + 1) * sizeof(int), hipMemcpyHostToDevice));
54 HIP_CHECK(
55 hipMemcpy(dcsrColIndB, hcsrColIndB.data(), nnzB * sizeof(int), hipMemcpyHostToDevice));
56 HIP_CHECK(hipMemcpy(dcsrValB, hcsrValB.data(), nnzB * sizeof(float), hipMemcpyHostToDevice));
57
58 hipsparseHandle_t handle;
59 HIPSPARSE_CHECK(hipsparseCreate(&handle));
60
61 hipsparseMatDescr_t descrA;
62 HIPSPARSE_CHECK(hipsparseCreateMatDescr(&descrA));
63
64 hipsparseMatDescr_t descrB;
65 HIPSPARSE_CHECK(hipsparseCreateMatDescr(&descrB));
66
67 hipsparseMatDescr_t descrC;
68 HIPSPARSE_CHECK(hipsparseCreateMatDescr(&descrC));
69
70 size_t bufferSize;
71 HIPSPARSE_CHECK(hipsparseScsrgeam2_bufferSizeExt(handle,
72 m,
73 n,
74 &alpha,
75 descrA,
76 nnzA,
77 dcsrValA,
78 dcsrRowPtrA,
79 dcsrColIndA,
80 &beta,
81 descrB,
82 nnzB,
83 dcsrValB,
84 dcsrRowPtrB,
85 dcsrColIndB,
86 descrC,
87 nullptr,
88 dcsrRowPtrC,
89 nullptr,
90 &bufferSize));
91
92 void* dbuffer = nullptr;
93 HIP_CHECK(hipMalloc((void**)&dbuffer, bufferSize));
94
95 int nnzC;
96 HIPSPARSE_CHECK(hipsparseXcsrgeam2Nnz(handle,
97 m,
98 n,
99 descrA,
100 nnzA,
101 dcsrRowPtrA,
102 dcsrColIndA,
103 descrB,
104 nnzB,
105 dcsrRowPtrB,
106 dcsrColIndB,
107 descrC,
108 dcsrRowPtrC,
109 &nnzC,
110 dbuffer));
111
112 int* dcsrColIndC = nullptr;
113 float* dcsrValC = nullptr;
114 HIP_CHECK(hipMalloc((void**)&dcsrColIndC, sizeof(int) * nnzC));
115 HIP_CHECK(hipMalloc((void**)&dcsrValC, sizeof(float) * nnzC));
116
117 HIPSPARSE_CHECK(hipsparseScsrgeam2(handle,
118 m,
119 n,
120 &alpha,
121 descrA,
122 nnzA,
123 dcsrValA,
124 dcsrRowPtrA,
125 dcsrColIndA,
126 &beta,
127 descrB,
128 nnzB,
129 dcsrValB,
130 dcsrRowPtrB,
131 dcsrColIndB,
132 descrC,
133 dcsrValC,
134 dcsrRowPtrC,
135 dcsrColIndC,
136 dbuffer));
137
138 std::vector<int> hcsrRowPtrC(m + 1);
139 std::vector<int> hcsrColIndC(nnzC);
140 std::vector<float> hcsrValC(nnzC);
141
142 // Copy back to the host
143 HIP_CHECK(
144 hipMemcpy(hcsrRowPtrC.data(), dcsrRowPtrC, sizeof(int) * (m + 1), hipMemcpyDeviceToHost));
145 HIP_CHECK(
146 hipMemcpy(hcsrColIndC.data(), dcsrColIndC, sizeof(int) * nnzC, hipMemcpyDeviceToHost));
147 HIP_CHECK(hipMemcpy(hcsrValC.data(), dcsrValC, sizeof(float) * nnzC, hipMemcpyDeviceToHost));
148
149 std::cout << "C" << std::endl;
150 for(int i = 0; i < m; i++)
151 {
152 int start = hcsrRowPtrC[i];
153 int end = hcsrRowPtrC[i + 1];
154
155 std::vector<float> temp(n, 0.0f);
156 for(int j = start; j < end; j++)
157 {
158 temp[hcsrColIndC[j]] = hcsrValC[j];
159 }
160
161 for(int j = 0; j < n; j++)
162 {
163 std::cout << temp[j] << " ";
164 }
165 std::cout << std::endl;
166 }
167 std::cout << std::endl;
168
169 HIP_CHECK(hipFree(dcsrRowPtrA));
170 HIP_CHECK(hipFree(dcsrColIndA));
171 HIP_CHECK(hipFree(dcsrValA));
172 HIP_CHECK(hipFree(dcsrRowPtrB));
173 HIP_CHECK(hipFree(dcsrColIndB));
174 HIP_CHECK(hipFree(dcsrValB));
175 HIP_CHECK(hipFree(dcsrRowPtrC));
176 HIP_CHECK(hipFree(dcsrColIndC));
177 HIP_CHECK(hipFree(dcsrValC));
178
179 HIP_CHECK(hipFree(dbuffer));
180
181 HIPSPARSE_CHECK(hipsparseDestroyMatDescr(descrA));
182 HIPSPARSE_CHECK(hipsparseDestroyMatDescr(descrB));
183 HIPSPARSE_CHECK(hipsparseDestroyMatDescr(descrC));
184 HIPSPARSE_CHECK(hipsparseDestroy(handle));
185
186 return 0;
187}
1int main(int argc, char* argv[])
2{
3 const int m = 4;
4 const int n = 4;
5 const int nnzA = 9;
6 const int nnzB = 6;
7
8 float alpha = 1.0;
9 float beta = 1.0;
10
11 // A, B, and C are m×n
12
13 // A
14 // 1 0 0 2
15 // 3 4 0 0
16 // 5 6 7 8
17 // 0 0 9 0
18 int hcsrRowPtrA[] = {0, 2, 4, 8, 9};
19 int hcsrColIndA[] = {0, 3, 0, 1, 0, 1, 2, 3, 2};
20 float hcsrValA[] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0};
21
22 // B
23 // 0 1 0 0
24 // 1 0 1 0
25 // 0 1 0 1
26 // 0 0 1 0
27 int hcsrRowPtrB[] = {0, 1, 3, 5, 6};
28 int hcsrColIndB[] = {1, 0, 2, 1, 3, 2};
29 float hcsrValB[] = {1.0, 1.0, 1.0, 1.0, 1.0, 1.0};
30
31 // Device memory management: Allocate and copy A, B
32 int* dcsrRowPtrA;
33 int* dcsrColIndA;
34 float* dcsrValA;
35 int* dcsrRowPtrB;
36 int* dcsrColIndB;
37 float* dcsrValB;
38 int* dcsrRowPtrC;
39 HIP_CHECK(hipMalloc((void**)&dcsrRowPtrA, (m + 1) * sizeof(int)));
40 HIP_CHECK(hipMalloc((void**)&dcsrColIndA, nnzA * sizeof(int)));
41 HIP_CHECK(hipMalloc((void**)&dcsrValA, nnzA * sizeof(float)));
42 HIP_CHECK(hipMalloc((void**)&dcsrRowPtrB, (m + 1) * sizeof(int)));
43 HIP_CHECK(hipMalloc((void**)&dcsrColIndB, nnzB * sizeof(int)));
44 HIP_CHECK(hipMalloc((void**)&dcsrValB, nnzB * sizeof(float)));
45 HIP_CHECK(hipMalloc((void**)&dcsrRowPtrC, (m + 1) * sizeof(int)));
46
47 HIP_CHECK(hipMemcpy(dcsrRowPtrA, hcsrRowPtrA, (m + 1) * sizeof(int), hipMemcpyHostToDevice));
48 HIP_CHECK(hipMemcpy(dcsrColIndA, hcsrColIndA, nnzA * sizeof(int), hipMemcpyHostToDevice));
49 HIP_CHECK(hipMemcpy(dcsrValA, hcsrValA, nnzA * sizeof(float), hipMemcpyHostToDevice));
50 HIP_CHECK(hipMemcpy(dcsrRowPtrB, hcsrRowPtrB, (m + 1) * sizeof(int), hipMemcpyHostToDevice));
51 HIP_CHECK(hipMemcpy(dcsrColIndB, hcsrColIndB, nnzB * sizeof(int), hipMemcpyHostToDevice));
52 HIP_CHECK(hipMemcpy(dcsrValB, hcsrValB, nnzB * sizeof(float), hipMemcpyHostToDevice));
53
54 hipsparseHandle_t handle;
55 HIPSPARSE_CHECK(hipsparseCreate(&handle));
56
57 hipsparseMatDescr_t descrA;
58 HIPSPARSE_CHECK(hipsparseCreateMatDescr(&descrA));
59
60 hipsparseMatDescr_t descrB;
61 HIPSPARSE_CHECK(hipsparseCreateMatDescr(&descrB));
62
63 hipsparseMatDescr_t descrC;
64 HIPSPARSE_CHECK(hipsparseCreateMatDescr(&descrC));
65
66 size_t bufferSize;
67 HIPSPARSE_CHECK(hipsparseScsrgeam2_bufferSizeExt(handle,
68 m,
69 n,
70 &alpha,
71 descrA,
72 nnzA,
73 dcsrValA,
74 dcsrRowPtrA,
75 dcsrColIndA,
76 &beta,
77 descrB,
78 nnzB,
79 dcsrValB,
80 dcsrRowPtrB,
81 dcsrColIndB,
82 descrC,
83 NULL,
84 dcsrRowPtrC,
85 NULL,
86 &bufferSize));
87
88 void* dbuffer = NULL;
89 HIP_CHECK(hipMalloc((void**)&dbuffer, bufferSize));
90
91 int nnzC;
92 HIPSPARSE_CHECK(hipsparseXcsrgeam2Nnz(handle,
93 m,
94 n,
95 descrA,
96 nnzA,
97 dcsrRowPtrA,
98 dcsrColIndA,
99 descrB,
100 nnzB,
101 dcsrRowPtrB,
102 dcsrColIndB,
103 descrC,
104 dcsrRowPtrC,
105 &nnzC,
106 dbuffer));
107
108 int* dcsrColIndC = NULL;
109 float* dcsrValC = NULL;
110 HIP_CHECK(hipMalloc((void**)&dcsrColIndC, sizeof(int) * nnzC));
111 HIP_CHECK(hipMalloc((void**)&dcsrValC, sizeof(float) * nnzC));
112
113 HIPSPARSE_CHECK(hipsparseScsrgeam2(handle,
114 m,
115 n,
116 &alpha,
117 descrA,
118 nnzA,
119 dcsrValA,
120 dcsrRowPtrA,
121 dcsrColIndA,
122 &beta,
123 descrB,
124 nnzB,
125 dcsrValB,
126 dcsrRowPtrB,
127 dcsrColIndB,
128 descrC,
129 dcsrValC,
130 dcsrRowPtrC,
131 dcsrColIndC,
132 dbuffer));
133
134 int* hcsrRowPtrC = (int*)malloc((m + 1) * sizeof(int));
135 int* hcsrColIndC = (int*)malloc((nnzC) * sizeof(int));
136 float hcsrValC[nnzC];
137
138 // Copy back to the host
139 HIP_CHECK(hipMemcpy(hcsrRowPtrC, dcsrRowPtrC, sizeof(int) * (m + 1), hipMemcpyDeviceToHost));
140 HIP_CHECK(hipMemcpy(hcsrColIndC, dcsrColIndC, sizeof(int) * nnzC, hipMemcpyDeviceToHost));
141 HIP_CHECK(hipMemcpy(hcsrValC, dcsrValC, sizeof(float) * nnzC, hipMemcpyDeviceToHost));
142
143 printf("C\n");
144 for(int i = 0; i < m; i++)
145 {
146 int start = hcsrRowPtrC[i];
147 int end = hcsrRowPtrC[i + 1];
148
149 float* temp = (float*)malloc(n * sizeof(float));
150 for(int j = start; j < end; j++)
151 {
152 temp[hcsrColIndC[j]] = hcsrValC[j];
153 }
154
155 for(int j = 0; j < n; j++)
156 {
157 printf("%f ", temp[j]);
158 }
159 printf("\n");
160 }
161 printf("\n");
162
163 HIP_CHECK(hipFree(dcsrRowPtrA));
164 HIP_CHECK(hipFree(dcsrColIndA));
165 HIP_CHECK(hipFree(dcsrValA));
166 HIP_CHECK(hipFree(dcsrRowPtrB));
167 HIP_CHECK(hipFree(dcsrColIndB));
168 HIP_CHECK(hipFree(dcsrValB));
169 HIP_CHECK(hipFree(dcsrRowPtrC));
170 HIP_CHECK(hipFree(dcsrColIndC));
171 HIP_CHECK(hipFree(dcsrValC));
172
173 HIP_CHECK(hipFree(dbuffer));
174
175 HIPSPARSE_CHECK(hipsparseDestroyMatDescr(descrA));
176 HIPSPARSE_CHECK(hipsparseDestroyMatDescr(descrB));
177 HIPSPARSE_CHECK(hipsparseDestroyMatDescr(descrC));
178 HIPSPARSE_CHECK(hipsparseDestroy(handle));
179
180 return 0;
181}
1program example_fortran_csrgeam2
2 use iso_c_binding
3 use hipsparse
4
5 implicit none
6
7 interface
8 function hipMalloc(ptr, size) &
9 bind(c, name = 'hipMalloc')
10 use iso_c_binding
11 implicit none
12 integer :: hipMalloc
13 type(c_ptr) :: ptr
14 integer(c_size_t), value :: size
15 end function hipMalloc
16
17 function hipFree(ptr) &
18 bind(c, name = 'hipFree')
19 use iso_c_binding
20 implicit none
21 integer :: hipFree
22 type(c_ptr), value :: ptr
23 end function hipFree
24
25 function hipMemcpy(dst, src, size, kind) &
26 bind(c, name = 'hipMemcpy')
27 use iso_c_binding
28 implicit none
29 integer :: hipMemcpy
30 type(c_ptr), value :: dst
31 type(c_ptr), intent(in), value :: src
32 integer(c_size_t), value :: size
33 integer(c_int), value :: kind
34 end function hipMemcpy
35 end interface
36
37 integer, target :: h_csr_row_ptr_a(5), h_csr_col_ind_a(9)
38 integer, target :: h_csr_row_ptr_b(5), h_csr_col_ind_b(6)
39 integer, target :: h_csr_row_ptr_c(5)
40 integer, allocatable, target :: h_csr_col_ind_c(:)
41 real(4), target :: h_csr_val_a(9), h_csr_val_b(6)
42 real(4), allocatable, target :: h_csr_val_c(:)
43 real(4) :: temp(4)
44
45 type(c_ptr) :: d_csr_row_ptr_a, d_csr_col_ind_a, d_csr_val_a
46 type(c_ptr) :: d_csr_row_ptr_b, d_csr_col_ind_b, d_csr_val_b
47 type(c_ptr) :: d_csr_row_ptr_c, d_csr_col_ind_c, d_csr_val_c
48 type(c_ptr) :: d_buffer
49
50 integer :: i, j, row_start, row_end
51 integer(c_int) :: m, n, nnz_a, nnz_b
52 integer(c_int), target :: nnz_c
53 integer(c_size_t), target :: buffer_size
54
55 real(c_float), target :: alpha, beta
56
57 type(c_ptr) :: handle
58 type(c_ptr) :: descr_a, descr_b, descr_c
59
60! Input data
61 m = 4
62 n = 4
63 nnz_a = 9
64 nnz_b = 6
65
66! Matrix A (4x4)
67 h_csr_row_ptr_a = (/0, 2, 4, 8, 9/)
68 h_csr_col_ind_a = (/0, 3, 0, 1, 0, 1, 2, 3, 2/)
69 h_csr_val_a = (/1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0/)
70
71! Matrix B (4x4)
72 h_csr_row_ptr_b = (/0, 1, 3, 5, 6/)
73 h_csr_col_ind_b = (/1, 0, 2, 1, 3, 2/)
74 h_csr_val_b = (/1.0, 1.0, 1.0, 1.0, 1.0, 1.0/)
75
76! Scalar alpha and beta
77 alpha = 1.0
78 beta = 1.0
79
80! Allocate device memory for A and B
81 call HIP_CHECK(hipMalloc(d_csr_row_ptr_a, int(m + 1, c_size_t) * 4))
82 call HIP_CHECK(hipMalloc(d_csr_col_ind_a, int(nnz_a, c_size_t) * 4))
83 call HIP_CHECK(hipMalloc(d_csr_val_a, int(nnz_a, c_size_t) * 4))
84 call HIP_CHECK(hipMalloc(d_csr_row_ptr_b, int(m + 1, c_size_t) * 4))
85 call HIP_CHECK(hipMalloc(d_csr_col_ind_b, int(nnz_b, c_size_t) * 4))
86 call HIP_CHECK(hipMalloc(d_csr_val_b, int(nnz_b, c_size_t) * 4))
87 call HIP_CHECK(hipMalloc(d_csr_row_ptr_c, int(m + 1, c_size_t) * 4))
88
89! Copy host data to device
90 call HIP_CHECK(hipMemcpy(d_csr_row_ptr_a, c_loc(h_csr_row_ptr_a), int(m + 1, c_size_t) * 4, 1))
91 call HIP_CHECK(hipMemcpy(d_csr_col_ind_a, c_loc(h_csr_col_ind_a), int(nnz_a, c_size_t) * 4, 1))
92 call HIP_CHECK(hipMemcpy(d_csr_val_a, c_loc(h_csr_val_a), int(nnz_a, c_size_t) * 4, 1))
93 call HIP_CHECK(hipMemcpy(d_csr_row_ptr_b, c_loc(h_csr_row_ptr_b), int(m + 1, c_size_t) * 4, 1))
94 call HIP_CHECK(hipMemcpy(d_csr_col_ind_b, c_loc(h_csr_col_ind_b), int(nnz_b, c_size_t) * 4, 1))
95 call HIP_CHECK(hipMemcpy(d_csr_val_b, c_loc(h_csr_val_b), int(nnz_b, c_size_t) * 4, 1))
96
97! Create hipSPARSE handle
98 call HIPSPARSE_CHECK(hipsparseCreate(handle))
99
100! Create matrix descriptors
101 call HIPSPARSE_CHECK(hipsparseCreateMatDescr(descr_a))
102 call HIPSPARSE_CHECK(hipsparseCreateMatDescr(descr_b))
103 call HIPSPARSE_CHECK(hipsparseCreateMatDescr(descr_c))
104
105! Get buffer size
106 call HIPSPARSE_CHECK(hipsparseScsrgeam2_bufferSizeExt(handle, &
107 m, &
108 n, &
109 c_loc(alpha), &
110 descr_a, &
111 nnz_a, &
112 d_csr_val_a, &
113 d_csr_row_ptr_a, &
114 d_csr_col_ind_a, &
115 c_loc(beta), &
116 descr_b, &
117 nnz_b, &
118 d_csr_val_b, &
119 d_csr_row_ptr_b, &
120 d_csr_col_ind_b, &
121 descr_c, &
122 c_null_ptr, &
123 d_csr_row_ptr_c, &
124 c_null_ptr, &
125 c_loc(buffer_size)))
126
127! Allocate buffer
128 call HIP_CHECK(hipMalloc(d_buffer, buffer_size))
129
130! Get nnz of C
131 call HIPSPARSE_CHECK(hipsparseXcsrgeam2Nnz(handle, &
132 m, &
133 n, &
134 descr_a, &
135 nnz_a, &
136 d_csr_row_ptr_a, &
137 d_csr_col_ind_a, &
138 descr_b, &
139 nnz_b, &
140 d_csr_row_ptr_b, &
141 d_csr_col_ind_b, &
142 descr_c, &
143 d_csr_row_ptr_c, &
144 c_loc(nnz_c), &
145 d_buffer))
146
147! Allocate device memory for C
148 call HIP_CHECK(hipMalloc(d_csr_col_ind_c, int(nnz_c, c_size_t) * 4))
149 call HIP_CHECK(hipMalloc(d_csr_val_c, int(nnz_c, c_size_t) * 4))
150
151! Call scsrgeam2 to compute C = alpha * A + beta * B
152 call HIPSPARSE_CHECK(hipsparseScsrgeam2(handle, &
153 m, &
154 n, &
155 c_loc(alpha), &
156 descr_a, &
157 nnz_a, &
158 d_csr_val_a, &
159 d_csr_row_ptr_a, &
160 d_csr_col_ind_a, &
161 c_loc(beta), &
162 descr_b, &
163 nnz_b, &
164 d_csr_val_b, &
165 d_csr_row_ptr_b, &
166 d_csr_col_ind_b, &
167 descr_c, &
168 d_csr_val_c, &
169 d_csr_row_ptr_c, &
170 d_csr_col_ind_c, &
171 d_buffer))
172
173! Allocate host memory for C
174 allocate(h_csr_col_ind_c(nnz_c))
175 allocate(h_csr_val_c(nnz_c))
176
177! Copy result back to host
178 call HIP_CHECK(hipMemcpy(c_loc(h_csr_row_ptr_c), d_csr_row_ptr_c, int(m + 1, c_size_t) * 4, 2))
179 call HIP_CHECK(hipMemcpy(c_loc(h_csr_col_ind_c), d_csr_col_ind_c, int(nnz_c, c_size_t) * 4, 2))
180 call HIP_CHECK(hipMemcpy(c_loc(h_csr_val_c), d_csr_val_c, int(nnz_c, c_size_t) * 4, 2))
181
182! Print result in dense format
183 write(*,'(A)') 'C'
184 do i = 1, m
185 row_start = h_csr_row_ptr_c(i) + 1
186 row_end = h_csr_row_ptr_c(i + 1)
187
188 ! Initialize temp array to zeros
189 temp = 0.0
190
191 ! Fill in non-zero values
192 do j = row_start, row_end
193 temp(h_csr_col_ind_c(j) + 1) = h_csr_val_c(j)
194 end do
195
196 ! Print the row
197 do j = 1, n
198 write(*,'(I0,1X)',advance='no') int(temp(j))
199 end do
200 write(*,*)
201 end do
202 write(*,*)
203
204! Deallocate host arrays
205 deallocate(h_csr_col_ind_c)
206 deallocate(h_csr_val_c)
207
208! Clear hipSPARSE
209 call HIPSPARSE_CHECK(hipsparseDestroyMatDescr(descr_a))
210 call HIPSPARSE_CHECK(hipsparseDestroyMatDescr(descr_b))
211 call HIPSPARSE_CHECK(hipsparseDestroyMatDescr(descr_c))
212 call HIPSPARSE_CHECK(hipsparseDestroy(handle))
213
214! Clear device memory
215 call HIP_CHECK(hipFree(d_csr_row_ptr_a))
216 call HIP_CHECK(hipFree(d_csr_col_ind_a))
217 call HIP_CHECK(hipFree(d_csr_val_a))
218 call HIP_CHECK(hipFree(d_csr_row_ptr_b))
219 call HIP_CHECK(hipFree(d_csr_col_ind_b))
220 call HIP_CHECK(hipFree(d_csr_val_b))
221 call HIP_CHECK(hipFree(d_csr_row_ptr_c))
222 call HIP_CHECK(hipFree(d_csr_col_ind_c))
223 call HIP_CHECK(hipFree(d_csr_val_c))
224 call HIP_CHECK(hipFree(d_buffer))
225
226end program example_fortran_csrgeam2
hipsparseXcsrgemmNnz()#
-
hipsparseStatus_t hipsparseXcsrgemmNnz(hipsparseHandle_t handle, hipsparseOperation_t transA, hipsparseOperation_t transB, int m, int n, int k, const hipsparseMatDescr_t descrA, int nnzA, const int *csrRowPtrA, const int *csrColIndA, const hipsparseMatDescr_t descrB, int nnzB, const int *csrRowPtrB, const int *csrColIndB, const hipsparseMatDescr_t descrC, int *csrRowPtrC, int *nnzTotalDevHostPtr)#
hipsparseXcsrgemmNnzcomputes the total CSR non-zero elements and the CSR row offsets, that point to the start of every row of the sparse CSR matrix, of the resulting multiplied matrix \(C\). It is assumed thatcsrRowPtrChas been allocated with sizem+1. The desired index base in the output CSR matrix \(C\) is set in the hipsparseMatDescr_tdescrC. See hipsparseSetMatIndexBase().- Deprecated:
This function is deprecated when using the CUDA backend (CUDA 10.0+) and will be removed in CUDA 11.0. This deprecation does not apply to the ROCm backend.
Note
As indicated,
nnzTotalDevHostPtrcan point either to host or device memory. This is controlled by setting the pointer mode. See hipsparseSetPointerMode().Note
This function is non blocking and executed asynchronously with respect to the host. It may return before the actual computation has finished.
Note
Please note, that for matrix products with more than 8192 intermediate products per row, additional temporary storage buffer is allocated by the algorithm.
Note
Currently, only
transA==transB== HIPSPARSE_OPERATION_NON_TRANSPOSE is supported.Note
Currently, only HIPSPARSE_MATRIX_TYPE_GENERAL is supported.
- Parameters:
handle – [in] handle to the hipsparse library context queue.
transA – [in] matrix \(A\) operation type.
transB – [in] matrix \(B\) operation type.
m – [in] number of rows of the sparse CSR matrix \(op(A)\) and \(C\). Must be non-negative.
n – [in] number of columns of the sparse CSR matrix \(op(B)\) and \(C\). Must be non-negative.
k – [in] number of columns of the sparse CSR matrix \(op(A)\) and number of rows of the sparse CSR matrix \(op(B)\). Must be non-negative.
descrA – [in] descriptor of the sparse CSR matrix \(A\). Currently, only HIPSPARSE_MATRIX_TYPE_GENERAL is supported.
nnzA – [in] number of non-zero entries of the sparse CSR matrix \(A\). Must be non-negative.
csrRowPtrA – [in] array of
m+1elements ( \(op(A) == A\),k+1otherwise) that point to the start of every row of the sparse CSR matrix \(op(A)\).csrColIndA – [in] array of
nnzAelements containing the column indices of the sparse CSR matrix \(A\).descrB – [in] descriptor of the sparse CSR matrix \(B\). Currently, only HIPSPARSE_MATRIX_TYPE_GENERAL is supported.
nnzB – [in] number of non-zero entries of the sparse CSR matrix \(B\). Must be non-negative.
csrRowPtrB – [in] array of
k+1elements ( \(op(B) == B\),m+1otherwise) that point to the start of every row of the sparse CSR matrix \(op(B)\).csrColIndB – [in] array of
nnzBelements containing the column indices of the sparse CSR matrix \(B\).descrC – [in] descriptor of the sparse CSR matrix \(C\). Currently, only HIPSPARSE_MATRIX_TYPE_GENERAL is supported.
csrRowPtrC – [in] array of
m+1elements that point to the start of every row of the sparse CSR matrix \(C\).nnzTotalDevHostPtr – [inout] pointer to the number of non-zero entries of the sparse CSR matrix \(C\).
nnzTotalDevHostPtrcan be a host or device pointer.
- Return values:
HIPSPARSE_STATUS_SUCCESS – the operation completed successfully.
HIPSPARSE_STATUS_NOT_INITIALIZED –
handleis not initialized.HIPSPARSE_STATUS_INVALID_VALUE –
handle,descrA,descrBordescrCis nullptr,m,n,k,nnzAornnzBis negative, orcsrRowPtrA,csrColIndA,csrRowPtrB,csrColIndB,csrRowPtrCornnzTotalDevHostPtris nullptr.HIPSPARSE_STATUS_NOT_SUPPORTED –
transAis not HIPSPARSE_OPERATION_NON_TRANSPOSE,transBis not HIPSPARSE_OPERATION_NON_TRANSPOSE, or hipsparseMatrixType_t is not HIPSPARSE_MATRIX_TYPE_GENERAL.
hipsparseXcsrgemm()#
-
hipsparseStatus_t hipsparseScsrgemm(hipsparseHandle_t handle, hipsparseOperation_t transA, hipsparseOperation_t transB, int m, int n, int k, const hipsparseMatDescr_t descrA, int nnzA, const float *csrValA, const int *csrRowPtrA, const int *csrColIndA, const hipsparseMatDescr_t descrB, int nnzB, const float *csrValB, const int *csrRowPtrB, const int *csrColIndB, const hipsparseMatDescr_t descrC, float *csrValC, const int *csrRowPtrC, int *csrColIndC)#
-
hipsparseStatus_t hipsparseDcsrgemm(hipsparseHandle_t handle, hipsparseOperation_t transA, hipsparseOperation_t transB, int m, int n, int k, const hipsparseMatDescr_t descrA, int nnzA, const double *csrValA, const int *csrRowPtrA, const int *csrColIndA, const hipsparseMatDescr_t descrB, int nnzB, const double *csrValB, const int *csrRowPtrB, const int *csrColIndB, const hipsparseMatDescr_t descrC, double *csrValC, const int *csrRowPtrC, int *csrColIndC)#
-
hipsparseStatus_t hipsparseCcsrgemm(hipsparseHandle_t handle, hipsparseOperation_t transA, hipsparseOperation_t transB, int m, int n, int k, const hipsparseMatDescr_t descrA, int nnzA, const hipComplex *csrValA, const int *csrRowPtrA, const int *csrColIndA, const hipsparseMatDescr_t descrB, int nnzB, const hipComplex *csrValB, const int *csrRowPtrB, const int *csrColIndB, const hipsparseMatDescr_t descrC, hipComplex *csrValC, const int *csrRowPtrC, int *csrColIndC)#
-
hipsparseStatus_t hipsparseZcsrgemm(hipsparseHandle_t handle, hipsparseOperation_t transA, hipsparseOperation_t transB, int m, int n, int k, const hipsparseMatDescr_t descrA, int nnzA, const hipDoubleComplex *csrValA, const int *csrRowPtrA, const int *csrColIndA, const hipsparseMatDescr_t descrB, int nnzB, const hipDoubleComplex *csrValB, const int *csrRowPtrB, const int *csrColIndB, const hipsparseMatDescr_t descrC, hipDoubleComplex *csrValC, const int *csrRowPtrC, int *csrColIndC)#
Sparse matrix sparse matrix multiplication using CSR storage format.
hipsparseXcsrgemmmultiplies the sparse \(m \times k\) matrix \(op(A)\), defined in CSR storage format with the sparse \(k \times n\) matrix \(op(B)\), defined in CSR storage format, and stores the result in the sparse \(m \times n\) matrix \(C\), defined in CSR storage format, such that\[ C := op(A) \cdot op(B), \]with\[\begin{split} op(A) = \left\{ \begin{array}{ll} A, & \text{if transA == HIPSPARSE_OPERATION_NON_TRANSPOSE} \\ A^T, & \text{if transA == HIPSPARSE_OPERATION_TRANSPOSE} \\ A^H, & \text{if transA == HIPSPARSE_OPERATION_CONJUGATE_TRANSPOSE} \end{array} \right. \end{split}\]and\[\begin{split} op(B) = \left\{ \begin{array}{ll} B, & \text{if transB == HIPSPARSE_OPERATION_NON_TRANSPOSE} \\ B^T, & \text{if transB == HIPSPARSE_OPERATION_TRANSPOSE} \\ B^H, & \text{if transB == HIPSPARSE_OPERATION_CONJUGATE_TRANSPOSE} \end{array} \right. \end{split}\]This computation involves a multi step process. First the user must allocate
csrRowPtrCto have sizem+1. The user then calls hipsparseXcsrgemmNnz which fills in thecsrRowPtrCarray as well as computes the total number of nonzeros in C,nnzC. The user then allocates both arrayscsrColIndCandcsrValCto have sizennzCand callshipsparseXcsrgemmto complete the computation. The desired index base in the output CSR matrix C is set in the hipsparseMatDescr_tdescrC. See hipsparseSetMatIndexBase().Note
Currently, only
transA== HIPSPARSE_OPERATION_NON_TRANSPOSE is supported.Note
Currently, only
transB== HIPSPARSE_OPERATION_NON_TRANSPOSE is supported.Note
Currently, only HIPSPARSE_MATRIX_TYPE_GENERAL is supported.
Note
This function is non blocking and executed asynchronously with respect to the host. It may return before the actual computation has finished.
Note
Please note, that for matrix products with more than 4096 non-zero entries per row, additional temporary storage buffer is allocated by the algorithm.
- Parameters:
handle – [in] handle to the hipsparse library context queue.
transA – [in] matrix \(A\) operation type.
transB – [in] matrix \(B\) operation type.
m – [in] number of rows of the sparse CSR matrix \(op(A)\) and \(C\).
n – [in] number of columns of the sparse CSR matrix \(op(B)\) and \(C\).
k – [in] number of columns of the sparse CSR matrix \(op(A)\) and number of rows of the sparse CSR matrix \(op(B)\).
descrA – [in] descriptor of the sparse CSR matrix \(A\). Currently, only HIPSPARSE_MATRIX_TYPE_GENERAL is supported.
nnzA – [in] number of non-zero entries of the sparse CSR matrix \(A\).
csrValA – [in] array of
nnzAelements of the sparse CSR matrix \(A\).csrRowPtrA – [in] array of
m+1elements ( \(op(A) == A\),k+1otherwise) that point to the start of every row of the sparse CSR matrix \(op(A)\).csrColIndA – [in] array of
nnzAelements containing the column indices of the sparse CSR matrix \(A\).descrB – [in] descriptor of the sparse CSR matrix \(B\). Currently, only HIPSPARSE_MATRIX_TYPE_GENERAL is supported.
nnzB – [in] number of non-zero entries of the sparse CSR matrix \(B\).
csrValB – [in] array of
nnzBelements of the sparse CSR matrix \(B\).csrRowPtrB – [in] array of
k+1elements ( \(op(B) == B\),m+1otherwise) that point to the start of every row of the sparse CSR matrix \(op(B)\).csrColIndB – [in] array of
nnzBelements containing the column indices of the sparse CSR matrix \(B\).descrC – [in] descriptor of the sparse CSR matrix \(C\). Currently, only HIPSPARSE_MATRIX_TYPE_GENERAL is supported.
csrValC – [out] array of
nnzCelements of the sparse CSR matrix \(C\).csrRowPtrC – [in] array of
m+1elements that point to the start of every row of the sparse CSR matrix \(C\).csrColIndC – [out] array of
nnzCelements containing the column indices of the sparse CSR matrix \(C\).
- Return values:
HIPSPARSE_STATUS_SUCCESS – the operation completed successfully.
HIPSPARSE_STATUS_INVALID_VALUE –
handle,m,n,k,nnzA,nnzB,descrA,csrValA,csrRowPtrA,csrColIndA,descrB,csrValB,csrRowPtrB,csrColIndB,descrC,csrValC,csrRowPtrC,csrColIndCis invalid.HIPSPARSE_STATUS_ALLOC_FAILED – additional buffer for long rows could not be allocated.
HIPSPARSE_STATUS_NOT_SUPPORTED –
transA!= HIPSPARSE_OPERATION_NON_TRANSPOSE,transB!= HIPSPARSE_OPERATION_NON_TRANSPOSE, or hipsparseMatrixType_t != HIPSPARSE_MATRIX_TYPE_GENERAL.
1int main(int argc, char* argv[])
2{
3 const int m = 4;
4 const int k = 3;
5 const int n = 2;
6 const int nnzA = 7;
7 const int nnzB = 3;
8
9 hipsparseOperation_t transA = HIPSPARSE_OPERATION_NON_TRANSPOSE;
10 hipsparseOperation_t transB = HIPSPARSE_OPERATION_NON_TRANSPOSE;
11
12 // A, B, and C are mxk, kxn, and m×n
13
14 // A
15 // 1 0 0
16 // 3 4 0
17 // 5 6 7
18 // 0 0 9
19 std::vector<int> hcsrRowPtrA = {0, 1, 3, 6, 7};
20 std::vector<int> hcsrColIndA = {0, 0, 1, 0, 1, 2, 2};
21 std::vector<float> hcsrValA = {1.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 9.0f};
22
23 // B
24 // 0 1
25 // 1 0
26 // 0 1
27 std::vector<int> hcsrRowPtrB = {0, 1, 2, 3};
28 std::vector<int> hcsrColIndB = {1, 0, 1};
29 std::vector<float> hcsrValB = {1.0f, 1.0f, 1.0f};
30
31 // Device memory management: Allocate and copy A, B
32 int* dcsrRowPtrA;
33 int* dcsrColIndA;
34 float* dcsrValA;
35 int* dcsrRowPtrB;
36 int* dcsrColIndB;
37 float* dcsrValB;
38 int* dcsrRowPtrC;
39 HIP_CHECK(hipMalloc((void**)&dcsrRowPtrA, (m + 1) * sizeof(int)));
40 HIP_CHECK(hipMalloc((void**)&dcsrColIndA, nnzA * sizeof(int)));
41 HIP_CHECK(hipMalloc((void**)&dcsrValA, nnzA * sizeof(float)));
42 HIP_CHECK(hipMalloc((void**)&dcsrRowPtrB, (m + 1) * sizeof(int)));
43 HIP_CHECK(hipMalloc((void**)&dcsrColIndB, nnzB * sizeof(int)));
44 HIP_CHECK(hipMalloc((void**)&dcsrValB, nnzB * sizeof(float)));
45 HIP_CHECK(hipMalloc((void**)&dcsrRowPtrC, (m + 1) * sizeof(int)));
46
47 HIP_CHECK(
48 hipMemcpy(dcsrRowPtrA, hcsrRowPtrA.data(), (m + 1) * sizeof(int), hipMemcpyHostToDevice));
49 HIP_CHECK(
50 hipMemcpy(dcsrColIndA, hcsrColIndA.data(), nnzA * sizeof(int), hipMemcpyHostToDevice));
51 HIP_CHECK(hipMemcpy(dcsrValA, hcsrValA.data(), nnzA * sizeof(float), hipMemcpyHostToDevice));
52 HIP_CHECK(
53 hipMemcpy(dcsrRowPtrB, hcsrRowPtrB.data(), (m + 1) * sizeof(int), hipMemcpyHostToDevice));
54 HIP_CHECK(
55 hipMemcpy(dcsrColIndB, hcsrColIndB.data(), nnzB * sizeof(int), hipMemcpyHostToDevice));
56 HIP_CHECK(hipMemcpy(dcsrValB, hcsrValB.data(), nnzB * sizeof(float), hipMemcpyHostToDevice));
57
58 hipsparseHandle_t handle;
59 HIPSPARSE_CHECK(hipsparseCreate(&handle));
60
61 hipsparseMatDescr_t descrA;
62 HIPSPARSE_CHECK(hipsparseCreateMatDescr(&descrA));
63
64 hipsparseMatDescr_t descrB;
65 HIPSPARSE_CHECK(hipsparseCreateMatDescr(&descrB));
66
67 hipsparseMatDescr_t descrC;
68 HIPSPARSE_CHECK(hipsparseCreateMatDescr(&descrC));
69
70 int nnzC;
71 HIPSPARSE_CHECK(hipsparseXcsrgemmNnz(handle,
72 transA,
73 transB,
74 m,
75 n,
76 k,
77 descrA,
78 nnzA,
79 dcsrRowPtrA,
80 dcsrColIndA,
81 descrB,
82 nnzB,
83 dcsrRowPtrB,
84 dcsrColIndB,
85 descrC,
86 dcsrRowPtrC,
87 &nnzC));
88
89 int* dcsrColIndC = nullptr;
90 float* dcsrValC = nullptr;
91 HIP_CHECK(hipMalloc((void**)&dcsrColIndC, sizeof(int) * nnzC));
92 HIP_CHECK(hipMalloc((void**)&dcsrValC, sizeof(float) * nnzC));
93
94 HIPSPARSE_CHECK(hipsparseScsrgemm(handle,
95 transA,
96 transB,
97 m,
98 n,
99 k,
100 descrA,
101 nnzA,
102 dcsrValA,
103 dcsrRowPtrA,
104 dcsrColIndA,
105 descrB,
106 nnzB,
107 dcsrValB,
108 dcsrRowPtrB,
109 dcsrColIndB,
110 descrC,
111 dcsrValC,
112 dcsrRowPtrC,
113 dcsrColIndC));
114
115 std::vector<int> hcsrRowPtrC(m + 1);
116 std::vector<int> hcsrColIndC(nnzC);
117 std::vector<float> hcsrValC(nnzC);
118
119 // Copy back to the host
120 HIP_CHECK(
121 hipMemcpy(hcsrRowPtrC.data(), dcsrRowPtrC, sizeof(int) * (m + 1), hipMemcpyDeviceToHost));
122 HIP_CHECK(
123 hipMemcpy(hcsrColIndC.data(), dcsrColIndC, sizeof(int) * nnzC, hipMemcpyDeviceToHost));
124 HIP_CHECK(hipMemcpy(hcsrValC.data(), dcsrValC, sizeof(float) * nnzC, hipMemcpyDeviceToHost));
125
126 std::cout << "C" << std::endl;
127 for(int i = 0; i < m; i++)
128 {
129 int start = hcsrRowPtrC[i];
130 int end = hcsrRowPtrC[i + 1];
131
132 std::vector<float> temp(n, 0.0f);
133 for(int j = start; j < end; j++)
134 {
135 temp[hcsrColIndC[j]] = hcsrValC[j];
136 }
137
138 for(int j = 0; j < n; j++)
139 {
140 std::cout << temp[j] << " ";
141 }
142 std::cout << std::endl;
143 }
144 std::cout << std::endl;
145
146 HIP_CHECK(hipFree(dcsrRowPtrA));
147 HIP_CHECK(hipFree(dcsrColIndA));
148 HIP_CHECK(hipFree(dcsrValA));
149 HIP_CHECK(hipFree(dcsrRowPtrB));
150 HIP_CHECK(hipFree(dcsrColIndB));
151 HIP_CHECK(hipFree(dcsrValB));
152 HIP_CHECK(hipFree(dcsrRowPtrC));
153 HIP_CHECK(hipFree(dcsrColIndC));
154 HIP_CHECK(hipFree(dcsrValC));
155
156 HIPSPARSE_CHECK(hipsparseDestroyMatDescr(descrA));
157 HIPSPARSE_CHECK(hipsparseDestroyMatDescr(descrB));
158 HIPSPARSE_CHECK(hipsparseDestroyMatDescr(descrC));
159 HIPSPARSE_CHECK(hipsparseDestroy(handle));
160
161 return 0;
162}
1int main(int argc, char* argv[])
2{
3 const int m = 4;
4 const int k = 3;
5 const int n = 2;
6 const int nnzA = 7;
7 const int nnzB = 3;
8
9 hipsparseOperation_t transA = HIPSPARSE_OPERATION_NON_TRANSPOSE;
10 hipsparseOperation_t transB = HIPSPARSE_OPERATION_NON_TRANSPOSE;
11
12 // A, B, and C are mxk, kxn, and m×n
13
14 // A
15 // 1 0 0
16 // 3 4 0
17 // 5 6 7
18 // 0 0 9
19 int hcsrRowPtrA[] = {0, 1, 3, 6, 7};
20 int hcsrColIndA[] = {0, 0, 1, 0, 1, 2, 2};
21 float hcsrValA[] = {1.0, 3.0, 4.0, 5.0, 6.0, 7.0, 9.0};
22
23 // B
24 // 0 1
25 // 1 0
26 // 0 1
27 int hcsrRowPtrB[] = {0, 1, 2, 3};
28 int hcsrColIndB[] = {1, 0, 1};
29 float hcsrValB[] = {1.0, 1.0, 1.0};
30
31 // Device memory management: Allocate and copy A, B
32 int* dcsrRowPtrA;
33 int* dcsrColIndA;
34 float* dcsrValA;
35 int* dcsrRowPtrB;
36 int* dcsrColIndB;
37 float* dcsrValB;
38 int* dcsrRowPtrC;
39 HIP_CHECK(hipMalloc((void**)&dcsrRowPtrA, (m + 1) * sizeof(int)));
40 HIP_CHECK(hipMalloc((void**)&dcsrColIndA, nnzA * sizeof(int)));
41 HIP_CHECK(hipMalloc((void**)&dcsrValA, nnzA * sizeof(float)));
42 HIP_CHECK(hipMalloc((void**)&dcsrRowPtrB, (m + 1) * sizeof(int)));
43 HIP_CHECK(hipMalloc((void**)&dcsrColIndB, nnzB * sizeof(int)));
44 HIP_CHECK(hipMalloc((void**)&dcsrValB, nnzB * sizeof(float)));
45 HIP_CHECK(hipMalloc((void**)&dcsrRowPtrC, (m + 1) * sizeof(int)));
46
47 HIP_CHECK(hipMemcpy(dcsrRowPtrA, hcsrRowPtrA, (m + 1) * sizeof(int), hipMemcpyHostToDevice));
48 HIP_CHECK(hipMemcpy(dcsrColIndA, hcsrColIndA, nnzA * sizeof(int), hipMemcpyHostToDevice));
49 HIP_CHECK(hipMemcpy(dcsrValA, hcsrValA, nnzA * sizeof(float), hipMemcpyHostToDevice));
50 HIP_CHECK(hipMemcpy(dcsrRowPtrB, hcsrRowPtrB, (m + 1) * sizeof(int), hipMemcpyHostToDevice));
51 HIP_CHECK(hipMemcpy(dcsrColIndB, hcsrColIndB, nnzB * sizeof(int), hipMemcpyHostToDevice));
52 HIP_CHECK(hipMemcpy(dcsrValB, hcsrValB, nnzB * sizeof(float), hipMemcpyHostToDevice));
53
54 hipsparseHandle_t handle;
55 HIPSPARSE_CHECK(hipsparseCreate(&handle));
56
57 hipsparseMatDescr_t descrA;
58 HIPSPARSE_CHECK(hipsparseCreateMatDescr(&descrA));
59
60 hipsparseMatDescr_t descrB;
61 HIPSPARSE_CHECK(hipsparseCreateMatDescr(&descrB));
62
63 hipsparseMatDescr_t descrC;
64 HIPSPARSE_CHECK(hipsparseCreateMatDescr(&descrC));
65
66 int nnzC;
67 HIPSPARSE_CHECK(hipsparseXcsrgemmNnz(handle,
68 transA,
69 transB,
70 m,
71 n,
72 k,
73 descrA,
74 nnzA,
75 dcsrRowPtrA,
76 dcsrColIndA,
77 descrB,
78 nnzB,
79 dcsrRowPtrB,
80 dcsrColIndB,
81 descrC,
82 dcsrRowPtrC,
83 &nnzC));
84
85 int* dcsrColIndC = NULL;
86 float* dcsrValC = NULL;
87 HIP_CHECK(hipMalloc((void**)&dcsrColIndC, sizeof(int) * nnzC));
88 HIP_CHECK(hipMalloc((void**)&dcsrValC, sizeof(float) * nnzC));
89
90 HIPSPARSE_CHECK(hipsparseScsrgemm(handle,
91 transA,
92 transB,
93 m,
94 n,
95 k,
96 descrA,
97 nnzA,
98 dcsrValA,
99 dcsrRowPtrA,
100 dcsrColIndA,
101 descrB,
102 nnzB,
103 dcsrValB,
104 dcsrRowPtrB,
105 dcsrColIndB,
106 descrC,
107 dcsrValC,
108 dcsrRowPtrC,
109 dcsrColIndC));
110
111 int* hcsrRowPtrC = (int*)malloc((m + 1) * sizeof(int));
112 int* hcsrColIndC = (int*)malloc((nnzC) * sizeof(int));
113 float hcsrValC[nnzC];
114
115 // Copy back to the host
116 HIP_CHECK(hipMemcpy(hcsrRowPtrC, dcsrRowPtrC, sizeof(int) * (m + 1), hipMemcpyDeviceToHost));
117 HIP_CHECK(hipMemcpy(hcsrColIndC, dcsrColIndC, sizeof(int) * nnzC, hipMemcpyDeviceToHost));
118 HIP_CHECK(hipMemcpy(hcsrValC, dcsrValC, sizeof(float) * nnzC, hipMemcpyDeviceToHost));
119
120 printf("C\n");
121 for(int i = 0; i < m; i++)
122 {
123 int start = hcsrRowPtrC[i];
124 int end = hcsrRowPtrC[i + 1];
125
126 float* temp = (float*)calloc(n, sizeof(float));
127 for(int j = start; j < end; j++)
128 {
129 temp[hcsrColIndC[j]] = hcsrValC[j];
130 }
131
132 for(int j = 0; j < n; j++)
133 {
134 printf("%f ", temp[j]);
135 }
136 printf("\n");
137 free(temp);
138 }
139 printf("\n");
140
141 HIP_CHECK(hipFree(dcsrRowPtrA));
142 HIP_CHECK(hipFree(dcsrColIndA));
143 HIP_CHECK(hipFree(dcsrValA));
144 HIP_CHECK(hipFree(dcsrRowPtrB));
145 HIP_CHECK(hipFree(dcsrColIndB));
146 HIP_CHECK(hipFree(dcsrValB));
147 HIP_CHECK(hipFree(dcsrRowPtrC));
148 HIP_CHECK(hipFree(dcsrColIndC));
149 HIP_CHECK(hipFree(dcsrValC));
150
151 HIPSPARSE_CHECK(hipsparseDestroyMatDescr(descrA));
152 HIPSPARSE_CHECK(hipsparseDestroyMatDescr(descrB));
153 HIPSPARSE_CHECK(hipsparseDestroyMatDescr(descrC));
154 HIPSPARSE_CHECK(hipsparseDestroy(handle));
155
156 return 0;
157}
1program example_hipsparse_csrgemm
2 use iso_c_binding
3 implicit none
4
5 ! HIP
6 interface
7 function hipMalloc(ptr, size) &
8 bind(c, name = 'hipMalloc')
9 use iso_c_binding
10 implicit none
11 integer(c_int) :: hipMalloc
12 type(c_ptr) :: ptr
13 integer(c_size_t), value :: size
14 end function hipMalloc
15
16 function hipFree(ptr) &
17 bind(c, name = 'hipFree')
18 use iso_c_binding
19 implicit none
20 integer(c_int) :: hipFree
21 type(c_ptr), value :: ptr
22 end function hipFree
23
24 function hipMemcpy(dst, src, size, kind) &
25 bind(c, name = 'hipMemcpy')
26 use iso_c_binding
27 implicit none
28 integer(c_int) :: hipMemcpy
29 type(c_ptr), value :: dst
30 type(c_ptr), intent(in), value :: src
31 integer(c_size_t), value :: size
32 integer(c_int), value :: kind
33 end function hipMemcpy
34 end interface
35
36 integer, parameter :: hipMemcpyHostToDevice = 1
37 integer, parameter :: hipMemcpyDeviceToHost = 2
38
39 ! hipSPARSE
40 interface
41 function hipsparseCreate(handle) &
42 bind(c, name = 'hipsparseCreate')
43 use iso_c_binding
44 implicit none
45 integer(c_int) :: hipsparseCreate
46 type(c_ptr) :: handle
47 end function hipsparseCreate
48
49 function hipsparseDestroy(handle) &
50 bind(c, name = 'hipsparseDestroy')
51 use iso_c_binding
52 implicit none
53 integer(c_int) :: hipsparseDestroy
54 type(c_ptr), value :: handle
55 end function hipsparseDestroy
56
57 function hipsparseCreateMatDescr(descr) &
58 bind(c, name = 'hipsparseCreateMatDescr')
59 use iso_c_binding
60 implicit none
61 integer(c_int) :: hipsparseCreateMatDescr
62 type(c_ptr) :: descr
63 end function hipsparseCreateMatDescr
64
65 function hipsparseDestroyMatDescr(descr) &
66 bind(c, name = 'hipsparseDestroyMatDescr')
67 use iso_c_binding
68 implicit none
69 integer(c_int) :: hipsparseDestroyMatDescr
70 type(c_ptr), value :: descr
71 end function hipsparseDestroyMatDescr
72
73 function hipsparseXcsrgemmNnz(handle, transA, transB, m, n, k, descrA, nnzA, csrRowPtrA, &
74 csrColIndA, descrB, nnzB, csrRowPtrB, csrColIndB, descrC, &
75 csrRowPtrC, nnzTotalDevHostPtr) &
76 bind(c, name = 'hipsparseXcsrgemmNnz')
77 use iso_c_binding
78 implicit none
79 integer(c_int) :: hipsparseXcsrgemmNnz
80 type(c_ptr), value :: handle
81 integer(c_int), value :: transA
82 integer(c_int), value :: transB
83 integer(c_int), value :: m
84 integer(c_int), value :: n
85 integer(c_int), value :: k
86 type(c_ptr), value :: descrA
87 integer(c_int), value :: nnzA
88 type(c_ptr), intent(in), value :: csrRowPtrA
89 type(c_ptr), intent(in), value :: csrColIndA
90 type(c_ptr), value :: descrB
91 integer(c_int), value :: nnzB
92 type(c_ptr), intent(in), value :: csrRowPtrB
93 type(c_ptr), intent(in), value :: csrColIndB
94 type(c_ptr), value :: descrC
95 type(c_ptr), value :: csrRowPtrC
96 type(c_ptr), value :: nnzTotalDevHostPtr
97 end function hipsparseXcsrgemmNnz
98
99 function hipsparseScsrgemm(handle, transA, transB, m, n, k, descrA, nnzA, csrSortedValA, &
100 csrSortedRowPtrA, csrSortedColIndA, descrB, nnzB, csrSortedValB, &
101 csrSortedRowPtrB, csrSortedColIndB, descrC, csrSortedValC, &
102 csrSortedRowPtrC, csrSortedColIndC) &
103 bind(c, name = 'hipsparseScsrgemm')
104 use iso_c_binding
105 implicit none
106 integer(c_int) :: hipsparseScsrgemm
107 type(c_ptr), value :: handle
108 integer(c_int), value :: transA
109 integer(c_int), value :: transB
110 integer(c_int), value :: m
111 integer(c_int), value :: n
112 integer(c_int), value :: k
113 type(c_ptr), value :: descrA
114 integer(c_int), value :: nnzA
115 type(c_ptr), intent(in), value :: csrSortedValA
116 type(c_ptr), intent(in), value :: csrSortedRowPtrA
117 type(c_ptr), intent(in), value :: csrSortedColIndA
118 type(c_ptr), value :: descrB
119 integer(c_int), value :: nnzB
120 type(c_ptr), intent(in), value :: csrSortedValB
121 type(c_ptr), intent(in), value :: csrSortedRowPtrB
122 type(c_ptr), intent(in), value :: csrSortedColIndB
123 type(c_ptr), value :: descrC
124 type(c_ptr), value :: csrSortedValC
125 type(c_ptr), value :: csrSortedRowPtrC
126 type(c_ptr), value :: csrSortedColIndC
127 end function hipsparseScsrgemm
128 end interface
129
130 integer, parameter :: HIPSPARSE_OPERATION_NON_TRANSPOSE = 0
131
132 ! Variables
133 type(c_ptr) :: handle
134 type(c_ptr) :: descrA, descrB, descrC
135 integer :: i, stat
136
137 ! Sparse matrix-matrix multiply: C = op(A) * op(B)
138 integer, parameter :: m = 4
139 integer, parameter :: k = 3
140 integer, parameter :: n = 2
141 integer, parameter :: nnzA = 7
142 integer, parameter :: nnzB = 3
143 integer(c_int), target :: nnzC
144
145 ! Matrix A (4x3)
146 integer, dimension(m+1), target :: hcsrRowPtrA = (/0, 1, 3, 6, 7/)
147 integer, dimension(nnzA), target :: hcsrColIndA = (/0, 0, 1, 0, 1, 2, 2/)
148 real(c_float), dimension(nnzA), target :: hcsrValA = (/1.0, 3.0, 4.0, 5.0, 6.0, 7.0, 9.0/)
149
150 ! Matrix B (3x2)
151 integer, dimension(k+1), target :: hcsrRowPtrB = (/0, 1, 2, 3/)
152 integer, dimension(nnzB), target :: hcsrColIndB = (/1, 0, 1/)
153 real(c_float), dimension(nnzB), target :: hcsrValB = (/1.0, 1.0, 1.0/)
154
155 ! Matrix C (will be allocated after nnzC is determined)
156 integer, dimension(:), allocatable, target :: hcsrRowPtrC
157 integer, dimension(:), allocatable, target :: hcsrColIndC
158 real(c_float), dimension(:), allocatable, target :: hcsrValC
159
160 ! Device pointers
161 type(c_ptr) :: dcsrRowPtrA, dcsrColIndA, dcsrValA
162 type(c_ptr) :: dcsrRowPtrB, dcsrColIndB, dcsrValB
163 type(c_ptr) :: dcsrRowPtrC, dcsrColIndC, dcsrValC
164
165 ! Create hipSPARSE handle
166 stat = hipsparseCreate(handle)
167 if (stat /= 0) stop
168
169 ! Create matrix descriptors
170 stat = hipsparseCreateMatDescr(descrA)
171 if (stat /= 0) stop
172 stat = hipsparseCreateMatDescr(descrB)
173 if (stat /= 0) stop
174 stat = hipsparseCreateMatDescr(descrC)
175 if (stat /= 0) stop
176
177 ! Allocate device memory for A and B
178 stat = hipMalloc(dcsrRowPtrA, int((m + 1) * 4, c_size_t))
179 if (stat /= 0) stop
180 stat = hipMalloc(dcsrColIndA, int(nnzA * 4, c_size_t))
181 if (stat /= 0) stop
182 stat = hipMalloc(dcsrValA, int(nnzA * 4, c_size_t))
183 if (stat /= 0) stop
184
185 stat = hipMalloc(dcsrRowPtrB, int((k + 1) * 4, c_size_t))
186 if (stat /= 0) stop
187 stat = hipMalloc(dcsrColIndB, int(nnzB * 4, c_size_t))
188 if (stat /= 0) stop
189 stat = hipMalloc(dcsrValB, int(nnzB * 4, c_size_t))
190 if (stat /= 0) stop
191
192 stat = hipMalloc(dcsrRowPtrC, int((m + 1) * 4, c_size_t))
193 if (stat /= 0) stop
194
195 ! Copy A and B to device
196 stat = hipMemcpy(dcsrRowPtrA, c_loc(hcsrRowPtrA), int((m + 1) * 4, c_size_t), hipMemcpyHostToDevice)
197 if (stat /= 0) stop
198 stat = hipMemcpy(dcsrColIndA, c_loc(hcsrColIndA), int(nnzA * 4, c_size_t), hipMemcpyHostToDevice)
199 if (stat /= 0) stop
200 stat = hipMemcpy(dcsrValA, c_loc(hcsrValA), int(nnzA * 4, c_size_t), hipMemcpyHostToDevice)
201 if (stat /= 0) stop
202
203 stat = hipMemcpy(dcsrRowPtrB, c_loc(hcsrRowPtrB), int((k + 1) * 4, c_size_t), hipMemcpyHostToDevice)
204 if (stat /= 0) stop
205 stat = hipMemcpy(dcsrColIndB, c_loc(hcsrColIndB), int(nnzB * 4, c_size_t), hipMemcpyHostToDevice)
206 if (stat /= 0) stop
207 stat = hipMemcpy(dcsrValB, c_loc(hcsrValB), int(nnzB * 4, c_size_t), hipMemcpyHostToDevice)
208 if (stat /= 0) stop
209
210 ! Determine nnzC
211 stat = hipsparseXcsrgemmNnz(handle, &
212 HIPSPARSE_OPERATION_NON_TRANSPOSE, &
213 HIPSPARSE_OPERATION_NON_TRANSPOSE, &
214 m, &
215 n, &
216 k, &
217 descrA, &
218 nnzA, &
219 dcsrRowPtrA, &
220 dcsrColIndA, &
221 descrB, &
222 nnzB, &
223 dcsrRowPtrB, &
224 dcsrColIndB, &
225 descrC, &
226 dcsrRowPtrC, &
227 c_loc(nnzC))
228 if (stat /= 0) then
229 write(*,*) 'Error: hipsparseXcsrgemmNnz failed'
230 stop
231 end if
232
233 ! Allocate device memory for C
234 stat = hipMalloc(dcsrColIndC, int(nnzC * 4, c_size_t))
235 if (stat /= 0) stop
236 stat = hipMalloc(dcsrValC, int(nnzC * 4, c_size_t))
237 if (stat /= 0) stop
238
239 ! Perform matrix multiplication: C = A * B
240 stat = hipsparseScsrgemm(handle, &
241 HIPSPARSE_OPERATION_NON_TRANSPOSE, &
242 HIPSPARSE_OPERATION_NON_TRANSPOSE, &
243 m, &
244 n, &
245 k, &
246 descrA, &
247 nnzA, &
248 dcsrValA, &
249 dcsrRowPtrA, &
250 dcsrColIndA, &
251 descrB, &
252 nnzB, &
253 dcsrValB, &
254 dcsrRowPtrB, &
255 dcsrColIndB, &
256 descrC, &
257 dcsrValC, &
258 dcsrRowPtrC, &
259 dcsrColIndC)
260 if (stat /= 0) then
261 write(*,*) 'Error: hipsparseScsrgemm failed'
262 stop
263 end if
264
265 ! Allocate host memory for C
266 allocate(hcsrRowPtrC(m+1))
267 allocate(hcsrColIndC(nnzC))
268 allocate(hcsrValC(nnzC))
269
270 ! Copy result back to host
271 stat = hipMemcpy(c_loc(hcsrRowPtrC), dcsrRowPtrC, int((m + 1) * 4, c_size_t), hipMemcpyDeviceToHost)
272 if (stat /= 0) stop
273 stat = hipMemcpy(c_loc(hcsrColIndC), dcsrColIndC, int(nnzC * 4, c_size_t), hipMemcpyDeviceToHost)
274 if (stat /= 0) stop
275 stat = hipMemcpy(c_loc(hcsrValC), dcsrValC, int(nnzC * 4, c_size_t), hipMemcpyDeviceToHost)
276 if (stat /= 0) stop
277
278 ! Print result
279 write(*,*) 'Matrix C (result of A * B):'
280 write(*,*) 'nnzC =', nnzC
281 write(*,*) 'csrRowPtrC:'
282 do i = 1, m + 1
283 write(*,*) hcsrRowPtrC(i)
284 end do
285 write(*,*) 'csrColIndC:'
286 do i = 1, nnzC
287 write(*,*) hcsrColIndC(i)
288 end do
289 write(*,*) 'csrValC:'
290 do i = 1, nnzC
291 write(*,*) hcsrValC(i)
292 end do
293
294 ! Clean up
295 deallocate(hcsrRowPtrC)
296 deallocate(hcsrColIndC)
297 deallocate(hcsrValC)
298
299 stat = hipFree(dcsrRowPtrA)
300 stat = hipFree(dcsrColIndA)
301 stat = hipFree(dcsrValA)
302 stat = hipFree(dcsrRowPtrB)
303 stat = hipFree(dcsrColIndB)
304 stat = hipFree(dcsrValB)
305 stat = hipFree(dcsrRowPtrC)
306 stat = hipFree(dcsrColIndC)
307 stat = hipFree(dcsrValC)
308
309 stat = hipsparseDestroyMatDescr(descrA)
310 stat = hipsparseDestroyMatDescr(descrB)
311 stat = hipsparseDestroyMatDescr(descrC)
312 stat = hipsparseDestroy(handle)
313
314end program example_hipsparse_csrgemm
hipsparseXcsrgemm2_bufferSizeExt()#
-
hipsparseStatus_t hipsparseScsrgemm2_bufferSizeExt(hipsparseHandle_t handle, int m, int n, int k, const float *alpha, const hipsparseMatDescr_t descrA, int nnzA, const int *csrRowPtrA, const int *csrColIndA, const hipsparseMatDescr_t descrB, int nnzB, const int *csrRowPtrB, const int *csrColIndB, const float *beta, const hipsparseMatDescr_t descrD, int nnzD, const int *csrRowPtrD, const int *csrColIndD, csrgemm2Info_t info, size_t *pBufferSizeInBytes)#
-
hipsparseStatus_t hipsparseDcsrgemm2_bufferSizeExt(hipsparseHandle_t handle, int m, int n, int k, const double *alpha, const hipsparseMatDescr_t descrA, int nnzA, const int *csrRowPtrA, const int *csrColIndA, const hipsparseMatDescr_t descrB, int nnzB, const int *csrRowPtrB, const int *csrColIndB, const double *beta, const hipsparseMatDescr_t descrD, int nnzD, const int *csrRowPtrD, const int *csrColIndD, csrgemm2Info_t info, size_t *pBufferSizeInBytes)#
-
hipsparseStatus_t hipsparseCcsrgemm2_bufferSizeExt(hipsparseHandle_t handle, int m, int n, int k, const hipComplex *alpha, const hipsparseMatDescr_t descrA, int nnzA, const int *csrRowPtrA, const int *csrColIndA, const hipsparseMatDescr_t descrB, int nnzB, const int *csrRowPtrB, const int *csrColIndB, const hipComplex *beta, const hipsparseMatDescr_t descrD, int nnzD, const int *csrRowPtrD, const int *csrColIndD, csrgemm2Info_t info, size_t *pBufferSizeInBytes)#
-
hipsparseStatus_t hipsparseZcsrgemm2_bufferSizeExt(hipsparseHandle_t handle, int m, int n, int k, const hipDoubleComplex *alpha, const hipsparseMatDescr_t descrA, int nnzA, const int *csrRowPtrA, const int *csrColIndA, const hipsparseMatDescr_t descrB, int nnzB, const int *csrRowPtrB, const int *csrColIndB, const hipDoubleComplex *beta, const hipsparseMatDescr_t descrD, int nnzD, const int *csrRowPtrD, const int *csrColIndD, csrgemm2Info_t info, size_t *pBufferSizeInBytes)#
hipsparseXcsrgemm2_bufferSizeExtreturns the size of the temporary storage buffer in bytes that is required by hipsparseXcsrgemm2Nnz() and hipsparseXcsrgemm2(). The temporary storage buffer must be allocated by the user.Note
Please note, that for matrix products with more than 4096 non-zero entries per row, additional temporary storage buffer is allocated by the algorithm.
Note
Please note, that for matrix products with more than 8192 intermediate products per row, additional temporary storage buffer is allocated by the algorithm.
Note
Currently, only HIPSPARSE_MATRIX_TYPE_GENERAL is supported.
- Parameters:
handle – [in] handle to the hipsparse library context queue.
m – [in] number of rows of the sparse CSR matrix \(op(A)\) and \(C\).
n – [in] number of columns of the sparse CSR matrix \(op(B)\) and \(C\).
k – [in] number of columns of the sparse CSR matrix \(op(A)\) and number of rows of the sparse CSR matrix \(op(B)\).
alpha – [in] scalar \(\alpha\).
descrA – [in] descriptor of the sparse CSR matrix \(A\). Currently, only HIPSPARSE_MATRIX_TYPE_GENERAL is supported.
nnzA – [in] number of non-zero entries of the sparse CSR matrix \(A\).
csrRowPtrA – [in] array of
m+1elements ( \(op(A) == A\),k+1otherwise) that point to the start of every row of the sparse CSR matrix \(op(A)\).csrColIndA – [in] array of
nnzAelements containing the column indices of the sparse CSR matrix \(A\).descrB – [in] descriptor of the sparse CSR matrix \(B\). Currently, only HIPSPARSE_MATRIX_TYPE_GENERAL is supported.
nnzB – [in] number of non-zero entries of the sparse CSR matrix \(B\).
csrRowPtrB – [in] array of
k+1elements ( \(op(B) == B\),m+1otherwise) that point to the start of every row of the sparse CSR matrix \(op(B)\).csrColIndB – [in] array of
nnzBelements containing the column indices of the sparse CSR matrix \(B\).beta – [in] scalar \(\beta\).
descrD – [in] descriptor of the sparse CSR matrix \(D\). Currently, only HIPSPARSE_MATRIX_TYPE_GENERAL is supported.
nnzD – [in] number of non-zero entries of the sparse CSR matrix \(D\).
csrRowPtrD – [in] array of
m+1elements that point to the start of every row of the sparse CSR matrix \(D\).csrColIndD – [in] array of
nnzDelements containing the column indices of the sparse CSR matrix \(D\).info – [inout] structure that holds meta data for the sparse CSR matrix \(C\).
pBufferSizeInBytes – [out] number of bytes of the temporary storage buffer required by hipsparseXcsrgemm2Nnz(), hipsparseScsrgemm2(), hipsparseDcsrgemm2(), hipsparseCcsrgemm2() and hipsparseZcsrgemm2().
- Return values:
HIPSPARSE_STATUS_SUCCESS – the operation completed successfully.
HIPSPARSE_STATUS_INVALID_VALUE –
handle,m,n,k,nnzA,nnzB,nnz_D,alpha,beta,descrA,csrRowPtrA,csrColIndA,descrB,csrRowPtrB,csrColIndB,descrD,csrRowPtrD,csrColIndD,infoorpBufferSizeInBytesis invalid.HIPSPARSE_STATUS_NOT_SUPPORTED – hipsparseMatrixType_t != HIPSPARSE_MATRIX_TYPE_GENERAL.
hipsparseXcsrgemm2Nnz()#
-
hipsparseStatus_t hipsparseXcsrgemm2Nnz(hipsparseHandle_t handle, int m, int n, int k, const hipsparseMatDescr_t descrA, int nnzA, const int *csrRowPtrA, const int *csrColIndA, const hipsparseMatDescr_t descrB, int nnzB, const int *csrRowPtrB, const int *csrColIndB, const hipsparseMatDescr_t descrD, int nnzD, const int *csrRowPtrD, const int *csrColIndD, const hipsparseMatDescr_t descrC, int *csrRowPtrC, int *nnzTotalDevHostPtr, const csrgemm2Info_t info, void *pBuffer)#
hipsparseXcsrgemm2Nnzcomputes the total CSR non-zero elements and the CSR row offsets, that point to the start of every row of the sparse CSR matrix, of the resulting multiplied matrix \(C\). It is assumed thatcsrRowPtrChas been allocated with sizem+1. The required buffer size can be obtained by hipsparseXcsrgemm2_bufferSizeExt(). The desired index base in the output CSR matrix \(C\) is set in the hipsparseMatDescr_tdescrC. See hipsparseSetMatIndexBase().Note
As indicated,
nnzTotalDevHostPtrcan point either to host or device memory. This is controlled by setting the pointer mode. See hipsparseSetPointerMode().Note
This function is non blocking and executed asynchronously with respect to the host. It may return before the actual computation has finished.
Note
Please note, that for matrix products with more than 8192 intermediate products per row, additional temporary storage buffer is allocated by the algorithm.
Note
Currently, only HIPSPARSE_MATRIX_TYPE_GENERAL is supported.
- Parameters:
handle – [in] handle to the hipsparse library context queue.
m – [in] number of rows of the sparse CSR matrix \(op(A)\) and \(C\).
n – [in] number of columns of the sparse CSR matrix \(op(B)\) and \(C\).
k – [in] number of columns of the sparse CSR matrix \(op(A)\) and number of rows of the sparse CSR matrix \(op(B)\).
descrA – [in] descriptor of the sparse CSR matrix \(A\). Currently, only HIPSPARSE_MATRIX_TYPE_GENERAL is supported.
nnzA – [in] number of non-zero entries of the sparse CSR matrix \(A\).
csrRowPtrA – [in] array of
m+1elements ( \(op(A) == A\),k+1otherwise) that point to the start of every row of the sparse CSR matrix \(op(A)\).csrColIndA – [in] array of
nnzAelements containing the column indices of the sparse CSR matrix \(A\).descrB – [in] descriptor of the sparse CSR matrix \(B\). Currently, only HIPSPARSE_MATRIX_TYPE_GENERAL is supported.
nnzB – [in] number of non-zero entries of the sparse CSR matrix \(B\).
csrRowPtrB – [in] array of
k+1elements ( \(op(B) == B\),m+1otherwise) that point to the start of every row of the sparse CSR matrix \(op(B)\).csrColIndB – [in] array of
nnzBelements containing the column indices of the sparse CSR matrix \(B\).descrD – [in] descriptor of the sparse CSR matrix \(D\). Currently, only HIPSPARSE_MATRIX_TYPE_GENERAL is supported.
nnzD – [in] number of non-zero entries of the sparse CSR matrix \(D\).
csrRowPtrD – [in] array of
m+1elements that point to the start of every row of the sparse CSR matrix \(D\).csrColIndD – [in] array of
nnzDelements containing the column indices of the sparse CSR matrix \(D\).descrC – [in] descriptor of the sparse CSR matrix \(C\). Currently, only HIPSPARSE_MATRIX_TYPE_GENERAL is supported.
csrRowPtrC – [out] array of
m+1elements that point to the start of every row of the sparse CSR matrix \(C\).nnzTotalDevHostPtr – [out] pointer to the number of non-zero entries of the sparse CSR matrix \(C\).
info – [in] structure that holds meta data for the sparse CSR matrix \(C\).
pBuffer – [in] temporary storage buffer allocated by the user, size is returned by hipsparseScsrgemm2_bufferSizeExt(), hipsparseDcsrgemm2_bufferSizeExt(), hipsparseZcsrgemm2_bufferSizeExt() or hipsparseZcsrgemm2_bufferSizeExt().
- Return values:
HIPSPARSE_STATUS_SUCCESS – the operation completed successfully.
HIPSPARSE_STATUS_INVALID_VALUE –
handle,m,n,k,nnzA,nnzB,nnzD,descrA,csrRowPtrA,csrColIndA,descrB,csrRowPtrB,csrColIndB,descrD,csrRowPtrD,csrColIndD,descrC,csrRowPtrC,nnzTotalDevHostPtr,infoorpBufferis invalid.HIPSPARSE_STATUS_ALLOC_FAILED – additional buffer for long rows could not be allocated.
HIPSPARSE_STATUS_NOT_SUPPORTED – hipsparseMatrixType_t != HIPSPARSE_MATRIX_TYPE_GENERAL.
hipsparseXcsrgemm2()#
-
hipsparseStatus_t hipsparseScsrgemm2(hipsparseHandle_t handle, int m, int n, int k, const float *alpha, const hipsparseMatDescr_t descrA, int nnzA, const float *csrValA, const int *csrRowPtrA, const int *csrColIndA, const hipsparseMatDescr_t descrB, int nnzB, const float *csrValB, const int *csrRowPtrB, const int *csrColIndB, const float *beta, const hipsparseMatDescr_t descrD, int nnzD, const float *csrValD, const int *csrRowPtrD, const int *csrColIndD, const hipsparseMatDescr_t descrC, float *csrValC, const int *csrRowPtrC, int *csrColIndC, const csrgemm2Info_t info, void *pBuffer)#
-
hipsparseStatus_t hipsparseDcsrgemm2(hipsparseHandle_t handle, int m, int n, int k, const double *alpha, const hipsparseMatDescr_t descrA, int nnzA, const double *csrValA, const int *csrRowPtrA, const int *csrColIndA, const hipsparseMatDescr_t descrB, int nnzB, const double *csrValB, const int *csrRowPtrB, const int *csrColIndB, const double *beta, const hipsparseMatDescr_t descrD, int nnzD, const double *csrValD, const int *csrRowPtrD, const int *csrColIndD, const hipsparseMatDescr_t descrC, double *csrValC, const int *csrRowPtrC, int *csrColIndC, const csrgemm2Info_t info, void *pBuffer)#
-
hipsparseStatus_t hipsparseCcsrgemm2(hipsparseHandle_t handle, int m, int n, int k, const hipComplex *alpha, const hipsparseMatDescr_t descrA, int nnzA, const hipComplex *csrValA, const int *csrRowPtrA, const int *csrColIndA, const hipsparseMatDescr_t descrB, int nnzB, const hipComplex *csrValB, const int *csrRowPtrB, const int *csrColIndB, const hipComplex *beta, const hipsparseMatDescr_t descrD, int nnzD, const hipComplex *csrValD, const int *csrRowPtrD, const int *csrColIndD, const hipsparseMatDescr_t descrC, hipComplex *csrValC, const int *csrRowPtrC, int *csrColIndC, const csrgemm2Info_t info, void *pBuffer)#
-
hipsparseStatus_t hipsparseZcsrgemm2(hipsparseHandle_t handle, int m, int n, int k, const hipDoubleComplex *alpha, const hipsparseMatDescr_t descrA, int nnzA, const hipDoubleComplex *csrValA, const int *csrRowPtrA, const int *csrColIndA, const hipsparseMatDescr_t descrB, int nnzB, const hipDoubleComplex *csrValB, const int *csrRowPtrB, const int *csrColIndB, const hipDoubleComplex *beta, const hipsparseMatDescr_t descrD, int nnzD, const hipDoubleComplex *csrValD, const int *csrRowPtrD, const int *csrColIndD, const hipsparseMatDescr_t descrC, hipDoubleComplex *csrValC, const int *csrRowPtrC, int *csrColIndC, const csrgemm2Info_t info, void *pBuffer)#
Sparse matrix sparse matrix multiplication using CSR storage format.
hipsparseXcsrgemm2multiplies the scalar \(\alpha\) with the sparse \(m \times k\) matrix \(A\), defined in CSR storage format, and the sparse \(k \times n\) matrix \(B\), defined in CSR storage format, and adds the result to the sparse \(m \times n\) matrix \(D\) that is multiplied by \(\beta\). The final result is stored in the sparse \(m \times n\) matrix \(C\), defined in CSR storage format, such that\[ C := \alpha \cdot A \cdot B + \beta \cdot D \]This computation involves a multi step process. First the user must call hipsparseXcsrgemm2_bufferSizeExt() in order to determine the required user allocated temporary buffer size. The user then allocates this buffer and also allocates
csrRowPtrCto have sizem+1. Both the temporary storage buffer andcsrRowPtrCarray are then passed to hipsparseXcsrgemm2Nnz which fills in thecsrRowPtrCarray as well as computes the total number of nonzeros in C,nnzC. The user then allocates both arrayscsrColIndCandcsrValCto have sizennzCand callshipsparseXcsrgemm2to complete the computation. The desired index base in the output CSR matrix C is set in the hipsparseMatDescr_tdescrC. See hipsparseSetMatIndexBase().Note
If \(\alpha == 0\), then \(C = \beta \cdot D\) will be computed.
Note
If \(\beta == 0\), then \(C = \alpha \cdot A \cdot B\) will be computed.
Note
\(\alpha == beta == 0\) is invalid.
Note
Currently, only HIPSPARSE_MATRIX_TYPE_GENERAL is supported.
Note
This function is non blocking and executed asynchronously with respect to the host. It may return before the actual computation has finished.
Note
Please note, that for matrix products with more than 4096 non-zero entries per row, additional temporary storage buffer is allocated by the algorithm.
- Parameters:
handle – [in] handle to the hipsparse library context queue.
m – [in] number of rows of the sparse CSR matrix \(op(A)\) and \(C\).
n – [in] number of columns of the sparse CSR matrix \(op(B)\) and \(C\).
k – [in] number of columns of the sparse CSR matrix \(op(A)\) and number of rows of the sparse CSR matrix \(op(B)\).
alpha – [in] scalar \(\alpha\).
descrA – [in] descriptor of the sparse CSR matrix \(A\). Currently, only HIPSPARSE_MATRIX_TYPE_GENERAL is supported.
nnzA – [in] number of non-zero entries of the sparse CSR matrix \(A\).
csrValA – [in] array of
nnzAelements of the sparse CSR matrix \(A\).csrRowPtrA – [in] array of
m+1elements ( \(op(A) == A\),k+1otherwise) that point to the start of every row of the sparse CSR matrix \(op(A)\).csrColIndA – [in] array of
nnzAelements containing the column indices of the sparse CSR matrix \(A\).descrB – [in] descriptor of the sparse CSR matrix \(B\). Currently, only HIPSPARSE_MATRIX_TYPE_GENERAL is supported.
nnzB – [in] number of non-zero entries of the sparse CSR matrix \(B\).
csrValB – [in] array of
nnzBelements of the sparse CSR matrix \(B\).csrRowPtrB – [in] array of
k+1elements ( \(op(B) == B\),m+1otherwise) that point to the start of every row of the sparse CSR matrix \(op(B)\).csrColIndB – [in] array of
nnzBelements containing the column indices of the sparse CSR matrix \(B\).beta – [in] scalar \(\beta\).
descrD – [in] descriptor of the sparse CSR matrix \(D\). Currently, only HIPSPARSE_MATRIX_TYPE_GENERAL is supported.
nnzD – [in] number of non-zero entries of the sparse CSR matrix \(D\).
csrValD – [in] array of
nnzDelements of the sparse CSR matrix \(D\).csrRowPtrD – [in] array of
m+1elements that point to the start of every row of the sparse CSR matrix \(D\).csrColIndD – [in] array of
nnzDelements containing the column indices of the sparse CSR matrix \(D\).descrC – [in] descriptor of the sparse CSR matrix \(C\). Currently, only HIPSPARSE_MATRIX_TYPE_GENERAL is supported.
csrValC – [out] array of
nnzCelements of the sparse CSR matrix \(C\).csrRowPtrC – [in] array of
m+1elements that point to the start of every row of the sparse CSR matrix \(C\).csrColIndC – [out] array of
nnzCelements containing the column indices of the sparse CSR matrix \(C\).info – [in] structure that holds meta data for the sparse CSR matrix \(C\).
pBuffer – [in] temporary storage buffer allocated by the user, size is returned by hipsparseScsrgemm2_bufferSizeExt(), hipsparseDcsrgemm2_bufferSizeExt(), hipsparseCcsrgemm2_bufferSizeExt() or hipsparseZcsrgemm2_bufferSizeExt().
- Return values:
HIPSPARSE_STATUS_SUCCESS – the operation completed successfully.
HIPSPARSE_STATUS_INVALID_VALUE –
handle,m,n,k,nnzA,nnzB,nnzD,alpha,beta,descrA,csrValA,csrRowPtrA,csrColIndA,descrB,csrValB,csrRowPtrB,csrColIndB,descrD,csrValD,csrRowPtrD,csrColIndD,csrValC,csrRowPtrC,csrColIndC,infoorpBufferis invalid.HIPSPARSE_STATUS_ALLOC_FAILED – additional buffer for long rows could not be allocated.
HIPSPARSE_STATUS_NOT_SUPPORTED – hipsparseMatrixType_t != HIPSPARSE_MATRIX_TYPE_GENERAL.
1int main(int argc, char* argv[])
2{
3 int m = 4;
4 int k = 3;
5 int n = 2;
6 int nnzA = 7;
7 int nnzB = 3;
8 int nnzD = 6;
9
10 float alpha{1.0f};
11 float beta{1.0f};
12
13 // A, B, and C are mxk, kxn, and m×n
14
15 // A
16 // 1 0 0
17 // 3 4 0
18 // 5 6 7
19 // 0 0 9
20 std::vector<int> hcsrRowPtrA = {0, 1, 3, 6, 7};
21 std::vector<int> hcsrColIndA = {0, 0, 1, 0, 1, 2, 2};
22 std::vector<float> hcsrValA = {1.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 9.0f};
23
24 // B
25 // 0 1
26 // 1 0
27 // 0 1
28 std::vector<int> hcsrRowPtrB = {0, 1, 2, 3};
29 std::vector<int> hcsrColIndB = {1, 0, 1};
30 std::vector<float> hcsrValB = {1.0f, 1.0f, 1.0f};
31
32 // D
33 // 0 1
34 // 2 3
35 // 4 5
36 // 0 6
37 std::vector<int> hcsrRowPtrD = {0, 1, 3, 5, 6};
38 std::vector<int> hcsrColIndD = {1, 0, 1, 0, 1, 1};
39 std::vector<float> hcsrValD = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f};
40
41 // Device memory management: Allocate and copy A, B
42 int* dcsrRowPtrA;
43 int* dcsrColIndA;
44 float* dcsrValA;
45 int* dcsrRowPtrB;
46 int* dcsrColIndB;
47 float* dcsrValB;
48 int* dcsrRowPtrD;
49 int* dcsrColIndD;
50 float* dcsrValD;
51 int* dcsrRowPtrC;
52 HIP_CHECK(hipMalloc((void**)&dcsrRowPtrA, (m + 1) * sizeof(int)));
53 HIP_CHECK(hipMalloc((void**)&dcsrColIndA, nnzA * sizeof(int)));
54 HIP_CHECK(hipMalloc((void**)&dcsrValA, nnzA * sizeof(float)));
55 HIP_CHECK(hipMalloc((void**)&dcsrRowPtrB, (k + 1) * sizeof(int)));
56 HIP_CHECK(hipMalloc((void**)&dcsrColIndB, nnzB * sizeof(int)));
57 HIP_CHECK(hipMalloc((void**)&dcsrValB, nnzB * sizeof(float)));
58 HIP_CHECK(hipMalloc((void**)&dcsrRowPtrD, (m + 1) * sizeof(int)));
59 HIP_CHECK(hipMalloc((void**)&dcsrColIndD, nnzD * sizeof(int)));
60 HIP_CHECK(hipMalloc((void**)&dcsrValD, nnzD * sizeof(float)));
61 HIP_CHECK(hipMalloc((void**)&dcsrRowPtrC, (m + 1) * sizeof(int)));
62
63 HIP_CHECK(
64 hipMemcpy(dcsrRowPtrA, hcsrRowPtrA.data(), (m + 1) * sizeof(int), hipMemcpyHostToDevice));
65 HIP_CHECK(
66 hipMemcpy(dcsrColIndA, hcsrColIndA.data(), nnzA * sizeof(int), hipMemcpyHostToDevice));
67 HIP_CHECK(hipMemcpy(dcsrValA, hcsrValA.data(), nnzA * sizeof(float), hipMemcpyHostToDevice));
68 HIP_CHECK(
69 hipMemcpy(dcsrRowPtrB, hcsrRowPtrB.data(), (k + 1) * sizeof(int), hipMemcpyHostToDevice));
70 HIP_CHECK(
71 hipMemcpy(dcsrColIndB, hcsrColIndB.data(), nnzB * sizeof(int), hipMemcpyHostToDevice));
72 HIP_CHECK(hipMemcpy(dcsrValB, hcsrValB.data(), nnzB * sizeof(float), hipMemcpyHostToDevice));
73 HIP_CHECK(
74 hipMemcpy(dcsrRowPtrD, hcsrRowPtrD.data(), (m + 1) * sizeof(int), hipMemcpyHostToDevice));
75 HIP_CHECK(
76 hipMemcpy(dcsrColIndD, hcsrColIndD.data(), nnzD * sizeof(int), hipMemcpyHostToDevice));
77 HIP_CHECK(hipMemcpy(dcsrValD, hcsrValD.data(), nnzD * sizeof(float), hipMemcpyHostToDevice));
78
79 hipsparseHandle_t handle;
80 HIPSPARSE_CHECK(hipsparseCreate(&handle));
81
82 hipsparseMatDescr_t descrA;
83 HIPSPARSE_CHECK(hipsparseCreateMatDescr(&descrA));
84
85 hipsparseMatDescr_t descrB;
86 HIPSPARSE_CHECK(hipsparseCreateMatDescr(&descrB));
87
88 hipsparseMatDescr_t descrC;
89 HIPSPARSE_CHECK(hipsparseCreateMatDescr(&descrC));
90
91 hipsparseMatDescr_t descrD;
92 HIPSPARSE_CHECK(hipsparseCreateMatDescr(&descrD));
93
94 csrgemm2Info_t info;
95 HIPSPARSE_CHECK(hipsparseCreateCsrgemm2Info(&info));
96
97 size_t bufferSize;
98 HIPSPARSE_CHECK(hipsparseScsrgemm2_bufferSizeExt(handle,
99 m,
100 n,
101 k,
102 &alpha,
103 descrA,
104 nnzA,
105 dcsrRowPtrA,
106 dcsrColIndA,
107 descrB,
108 nnzB,
109 dcsrRowPtrB,
110 dcsrColIndB,
111 &beta,
112 descrD,
113 nnzD,
114 dcsrRowPtrD,
115 dcsrColIndD,
116 info,
117 &bufferSize));
118
119 void* dbuffer = nullptr;
120 HIP_CHECK(hipMalloc((void**)&dbuffer, bufferSize));
121
122 int nnzC;
123 HIPSPARSE_CHECK(hipsparseXcsrgemm2Nnz(handle,
124 m,
125 n,
126 k,
127 descrA,
128 nnzA,
129 dcsrRowPtrA,
130 dcsrColIndA,
131 descrB,
132 nnzB,
133 dcsrRowPtrB,
134 dcsrColIndB,
135 descrD,
136 nnzD,
137 dcsrRowPtrD,
138 dcsrColIndD,
139 descrC,
140 dcsrRowPtrC,
141 &nnzC,
142 info,
143 dbuffer));
144
145 int* dcsrColIndC = nullptr;
146 float* dcsrValC = nullptr;
147 HIP_CHECK(hipMalloc((void**)&dcsrColIndC, sizeof(int) * nnzC));
148 HIP_CHECK(hipMalloc((void**)&dcsrValC, sizeof(float) * nnzC));
149
150 HIPSPARSE_CHECK(hipsparseScsrgemm2(handle,
151 m,
152 n,
153 k,
154 &alpha,
155 descrA,
156 nnzA,
157 dcsrValA,
158 dcsrRowPtrA,
159 dcsrColIndA,
160 descrB,
161 nnzB,
162 dcsrValB,
163 dcsrRowPtrB,
164 dcsrColIndB,
165 &beta,
166 descrD,
167 nnzD,
168 dcsrValD,
169 dcsrRowPtrD,
170 dcsrColIndD,
171 descrC,
172 dcsrValC,
173 dcsrRowPtrC,
174 dcsrColIndC,
175 info,
176 dbuffer));
177
178 std::vector<int> hcsrRowPtrC(m + 1);
179 std::vector<int> hcsrColIndC(nnzC);
180 std::vector<float> hcsrValC(nnzC);
181
182 // Copy back to the host
183 HIP_CHECK(
184 hipMemcpy(hcsrRowPtrC.data(), dcsrRowPtrC, sizeof(int) * (m + 1), hipMemcpyDeviceToHost));
185 HIP_CHECK(
186 hipMemcpy(hcsrColIndC.data(), dcsrColIndC, sizeof(int) * nnzC, hipMemcpyDeviceToHost));
187 HIP_CHECK(hipMemcpy(hcsrValC.data(), dcsrValC, sizeof(float) * nnzC, hipMemcpyDeviceToHost));
188
189 std::cout << "C" << std::endl;
190 for(int i = 0; i < m; i++)
191 {
192 int start = hcsrRowPtrC[i];
193 int end = hcsrRowPtrC[i + 1];
194
195 std::vector<float> temp(n, 0.0f);
196 for(int j = start; j < end; j++)
197 {
198 temp[hcsrColIndC[j]] = hcsrValC[j];
199 }
200
201 for(int j = 0; j < n; j++)
202 {
203 std::cout << temp[j] << " ";
204 }
205 std::cout << std::endl;
206 }
207 std::cout << std::endl;
208
209 HIP_CHECK(hipFree(dcsrRowPtrA));
210 HIP_CHECK(hipFree(dcsrColIndA));
211 HIP_CHECK(hipFree(dcsrValA));
212 HIP_CHECK(hipFree(dcsrRowPtrB));
213 HIP_CHECK(hipFree(dcsrColIndB));
214 HIP_CHECK(hipFree(dcsrValB));
215 HIP_CHECK(hipFree(dcsrRowPtrC));
216 HIP_CHECK(hipFree(dcsrColIndC));
217 HIP_CHECK(hipFree(dcsrValC));
218 HIP_CHECK(hipFree(dcsrRowPtrD));
219 HIP_CHECK(hipFree(dcsrColIndD));
220 HIP_CHECK(hipFree(dcsrValD));
221
222 HIP_CHECK(hipFree(dbuffer));
223
224 HIPSPARSE_CHECK(hipsparseDestroyMatDescr(descrA));
225 HIPSPARSE_CHECK(hipsparseDestroyMatDescr(descrB));
226 HIPSPARSE_CHECK(hipsparseDestroyMatDescr(descrC));
227 HIPSPARSE_CHECK(hipsparseDestroyMatDescr(descrD));
228 HIPSPARSE_CHECK(hipsparseDestroyCsrgemm2Info(info));
229
230 HIPSPARSE_CHECK(hipsparseDestroy(handle));
231
232 return 0;
233}
1int main(int argc, char* argv[])
2{
3 int m = 4;
4 int k = 3;
5 int n = 2;
6 int nnzA = 7;
7 int nnzB = 3;
8 int nnzD = 6;
9
10 float alpha = 1.0;
11 float beta = 1.0;
12
13 // A, B, and C are mxk, kxn, and m×n
14
15 // A
16 // 1 0 0
17 // 3 4 0
18 // 5 6 7
19 // 0 0 9
20 int hcsrRowPtrA[] = {0, 1, 3, 6, 7};
21 int hcsrColIndA[] = {0, 0, 1, 0, 1, 2, 2};
22 float hcsrValA[] = {1.0, 3.0, 4.0, 5.0, 6.0, 7.0, 9.0};
23
24 // B
25 // 0 1
26 // 1 0
27 // 0 1
28 int hcsrRowPtrB[] = {0, 1, 2, 3};
29 int hcsrColIndB[] = {1, 0, 1};
30 float hcsrValB[] = {1.0, 1.0, 1.0};
31
32 // D
33 // 0 1
34 // 2 3
35 // 4 5
36 // 0 6
37 int hcsrRowPtrD[] = {0, 1, 3, 5, 6};
38 int hcsrColIndD[] = {1, 0, 1, 0, 1, 1};
39 float hcsrValD[] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0};
40
41 // Device memory management: Allocate and copy A, B
42 int* dcsrRowPtrA;
43 int* dcsrColIndA;
44 float* dcsrValA;
45 int* dcsrRowPtrB;
46 int* dcsrColIndB;
47 float* dcsrValB;
48 int* dcsrRowPtrD;
49 int* dcsrColIndD;
50 float* dcsrValD;
51 int* dcsrRowPtrC;
52 HIP_CHECK(hipMalloc((void**)&dcsrRowPtrA, (m + 1) * sizeof(int)));
53 HIP_CHECK(hipMalloc((void**)&dcsrColIndA, nnzA * sizeof(int)));
54 HIP_CHECK(hipMalloc((void**)&dcsrValA, nnzA * sizeof(float)));
55 HIP_CHECK(hipMalloc((void**)&dcsrRowPtrB, (k + 1) * sizeof(int)));
56 HIP_CHECK(hipMalloc((void**)&dcsrColIndB, nnzB * sizeof(int)));
57 HIP_CHECK(hipMalloc((void**)&dcsrValB, nnzB * sizeof(float)));
58 HIP_CHECK(hipMalloc((void**)&dcsrRowPtrD, (m + 1) * sizeof(int)));
59 HIP_CHECK(hipMalloc((void**)&dcsrColIndD, nnzD * sizeof(int)));
60 HIP_CHECK(hipMalloc((void**)&dcsrValD, nnzD * sizeof(float)));
61 HIP_CHECK(hipMalloc((void**)&dcsrRowPtrC, (m + 1) * sizeof(int)));
62
63 HIP_CHECK(hipMemcpy(dcsrRowPtrA, hcsrRowPtrA, (m + 1) * sizeof(int), hipMemcpyHostToDevice));
64 HIP_CHECK(hipMemcpy(dcsrColIndA, hcsrColIndA, nnzA * sizeof(int), hipMemcpyHostToDevice));
65 HIP_CHECK(hipMemcpy(dcsrValA, hcsrValA, nnzA * sizeof(float), hipMemcpyHostToDevice));
66 HIP_CHECK(hipMemcpy(dcsrRowPtrB, hcsrRowPtrB, (k + 1) * sizeof(int), hipMemcpyHostToDevice));
67 HIP_CHECK(hipMemcpy(dcsrColIndB, hcsrColIndB, nnzB * sizeof(int), hipMemcpyHostToDevice));
68 HIP_CHECK(hipMemcpy(dcsrValB, hcsrValB, nnzB * sizeof(float), hipMemcpyHostToDevice));
69 HIP_CHECK(hipMemcpy(dcsrRowPtrD, hcsrRowPtrD, (m + 1) * sizeof(int), hipMemcpyHostToDevice));
70 HIP_CHECK(hipMemcpy(dcsrColIndD, hcsrColIndD, nnzD * sizeof(int), hipMemcpyHostToDevice));
71 HIP_CHECK(hipMemcpy(dcsrValD, hcsrValD, nnzD * sizeof(float), hipMemcpyHostToDevice));
72
73 hipsparseHandle_t handle;
74 HIPSPARSE_CHECK(hipsparseCreate(&handle));
75
76 hipsparseMatDescr_t descrA;
77 HIPSPARSE_CHECK(hipsparseCreateMatDescr(&descrA));
78
79 hipsparseMatDescr_t descrB;
80 HIPSPARSE_CHECK(hipsparseCreateMatDescr(&descrB));
81
82 hipsparseMatDescr_t descrC;
83 HIPSPARSE_CHECK(hipsparseCreateMatDescr(&descrC));
84
85 hipsparseMatDescr_t descrD;
86 HIPSPARSE_CHECK(hipsparseCreateMatDescr(&descrD));
87
88 csrgemm2Info_t info;
89 HIPSPARSE_CHECK(hipsparseCreateCsrgemm2Info(&info));
90
91 size_t bufferSize;
92 HIPSPARSE_CHECK(hipsparseScsrgemm2_bufferSizeExt(handle,
93 m,
94 n,
95 k,
96 &alpha,
97 descrA,
98 nnzA,
99 dcsrRowPtrA,
100 dcsrColIndA,
101 descrB,
102 nnzB,
103 dcsrRowPtrB,
104 dcsrColIndB,
105 &beta,
106 descrD,
107 nnzD,
108 dcsrRowPtrD,
109 dcsrColIndD,
110 info,
111 &bufferSize));
112
113 void* dbuffer = NULL;
114 HIP_CHECK(hipMalloc((void**)&dbuffer, bufferSize));
115
116 int nnzC;
117 HIPSPARSE_CHECK(hipsparseXcsrgemm2Nnz(handle,
118 m,
119 n,
120 k,
121 descrA,
122 nnzA,
123 dcsrRowPtrA,
124 dcsrColIndA,
125 descrB,
126 nnzB,
127 dcsrRowPtrB,
128 dcsrColIndB,
129 descrD,
130 nnzD,
131 dcsrRowPtrD,
132 dcsrColIndD,
133 descrC,
134 dcsrRowPtrC,
135 &nnzC,
136 info,
137 dbuffer));
138
139 int* dcsrColIndC = NULL;
140 float* dcsrValC = NULL;
141 HIP_CHECK(hipMalloc((void**)&dcsrColIndC, sizeof(int) * nnzC));
142 HIP_CHECK(hipMalloc((void**)&dcsrValC, sizeof(float) * nnzC));
143
144 HIPSPARSE_CHECK(hipsparseScsrgemm2(handle,
145 m,
146 n,
147 k,
148 &alpha,
149 descrA,
150 nnzA,
151 dcsrValA,
152 dcsrRowPtrA,
153 dcsrColIndA,
154 descrB,
155 nnzB,
156 dcsrValB,
157 dcsrRowPtrB,
158 dcsrColIndB,
159 &beta,
160 descrD,
161 nnzD,
162 dcsrValD,
163 dcsrRowPtrD,
164 dcsrColIndD,
165 descrC,
166 dcsrValC,
167 dcsrRowPtrC,
168 dcsrColIndC,
169 info,
170 dbuffer));
171
172 int* hcsrRowPtrC = (int*)malloc((m + 1) * sizeof(int));
173 int* hcsrColIndC = (int*)malloc((nnzC) * sizeof(int));
174 float hcsrValC[nnzC];
175
176 // Copy back to the host
177 HIP_CHECK(hipMemcpy(hcsrRowPtrC, dcsrRowPtrC, sizeof(int) * (m + 1), hipMemcpyDeviceToHost));
178 HIP_CHECK(hipMemcpy(hcsrColIndC, dcsrColIndC, sizeof(int) * nnzC, hipMemcpyDeviceToHost));
179 HIP_CHECK(hipMemcpy(hcsrValC, dcsrValC, sizeof(float) * nnzC, hipMemcpyDeviceToHost));
180
181 printf("C\n");
182 for(int i = 0; i < m; i++)
183 {
184 int start = hcsrRowPtrC[i];
185 int end = hcsrRowPtrC[i + 1];
186
187 float* temp = (float*)malloc(n * sizeof(float));
188 for(int j = start; j < end; j++)
189 {
190 temp[hcsrColIndC[j]] = hcsrValC[j];
191 }
192
193 for(int j = 0; j < n; j++)
194 {
195 printf("%f ", temp[j]);
196 }
197 printf("\n");
198 }
199 printf("\n");
200
201 HIP_CHECK(hipFree(dcsrRowPtrA));
202 HIP_CHECK(hipFree(dcsrColIndA));
203 HIP_CHECK(hipFree(dcsrValA));
204 HIP_CHECK(hipFree(dcsrRowPtrB));
205 HIP_CHECK(hipFree(dcsrColIndB));
206 HIP_CHECK(hipFree(dcsrValB));
207 HIP_CHECK(hipFree(dcsrRowPtrC));
208 HIP_CHECK(hipFree(dcsrColIndC));
209 HIP_CHECK(hipFree(dcsrValC));
210 HIP_CHECK(hipFree(dcsrRowPtrD));
211 HIP_CHECK(hipFree(dcsrColIndD));
212 HIP_CHECK(hipFree(dcsrValD));
213
214 HIP_CHECK(hipFree(dbuffer));
215
216 HIPSPARSE_CHECK(hipsparseDestroyMatDescr(descrA));
217 HIPSPARSE_CHECK(hipsparseDestroyMatDescr(descrB));
218 HIPSPARSE_CHECK(hipsparseDestroyMatDescr(descrC));
219 HIPSPARSE_CHECK(hipsparseDestroyMatDescr(descrD));
220 HIPSPARSE_CHECK(hipsparseDestroyCsrgemm2Info(info));
221
222 HIPSPARSE_CHECK(hipsparseDestroy(handle));
223
224 return 0;
225}
1program example_hipsparse_csrgemm2
2 use iso_c_binding
3 implicit none
4
5 ! HIP
6 interface
7 function hipMalloc(ptr, size) &
8 bind(c, name = 'hipMalloc')
9 use iso_c_binding
10 implicit none
11 integer(c_int) :: hipMalloc
12 type(c_ptr) :: ptr
13 integer(c_size_t), value :: size
14 end function hipMalloc
15
16 function hipFree(ptr) &
17 bind(c, name = 'hipFree')
18 use iso_c_binding
19 implicit none
20 integer(c_int) :: hipFree
21 type(c_ptr), value :: ptr
22 end function hipFree
23
24 function hipMemcpy(dst, src, size, kind) &
25 bind(c, name = 'hipMemcpy')
26 use iso_c_binding
27 implicit none
28 integer(c_int) :: hipMemcpy
29 type(c_ptr), value :: dst
30 type(c_ptr), intent(in), value :: src
31 integer(c_size_t), value :: size
32 integer(c_int), value :: kind
33 end function hipMemcpy
34 end interface
35
36 integer, parameter :: hipMemcpyHostToDevice = 1
37 integer, parameter :: hipMemcpyDeviceToHost = 2
38
39 ! hipSPARSE
40 interface
41 function hipsparseCreate(handle) &
42 bind(c, name = 'hipsparseCreate')
43 use iso_c_binding
44 implicit none
45 integer(c_int) :: hipsparseCreate
46 type(c_ptr) :: handle
47 end function hipsparseCreate
48
49 function hipsparseDestroy(handle) &
50 bind(c, name = 'hipsparseDestroy')
51 use iso_c_binding
52 implicit none
53 integer(c_int) :: hipsparseDestroy
54 type(c_ptr), value :: handle
55 end function hipsparseDestroy
56
57 function hipsparseCreateMatDescr(descr) &
58 bind(c, name = 'hipsparseCreateMatDescr')
59 use iso_c_binding
60 implicit none
61 integer(c_int) :: hipsparseCreateMatDescr
62 type(c_ptr) :: descr
63 end function hipsparseCreateMatDescr
64
65 function hipsparseDestroyMatDescr(descr) &
66 bind(c, name = 'hipsparseDestroyMatDescr')
67 use iso_c_binding
68 implicit none
69 integer(c_int) :: hipsparseDestroyMatDescr
70 type(c_ptr), value :: descr
71 end function hipsparseDestroyMatDescr
72
73 function hipsparseCreateCsrgemm2Info(info) &
74 bind(c, name = 'hipsparseCreateCsrgemm2Info')
75 use iso_c_binding
76 implicit none
77 integer(c_int) :: hipsparseCreateCsrgemm2Info
78 type(c_ptr) :: info
79 end function hipsparseCreateCsrgemm2Info
80
81 function hipsparseDestroyCsrgemm2Info(info) &
82 bind(c, name = 'hipsparseDestroyCsrgemm2Info')
83 use iso_c_binding
84 implicit none
85 integer(c_int) :: hipsparseDestroyCsrgemm2Info
86 type(c_ptr), value :: info
87 end function hipsparseDestroyCsrgemm2Info
88
89 function hipsparseScsrgemm2_bufferSizeExt(handle, m, n, k, alpha, descrA, nnzA, csrRowPtrA, &
90 csrColIndA, descrB, nnzB, csrRowPtrB, csrColIndB, &
91 beta, descrD, nnzD, csrRowPtrD, csrColIndD, &
92 info, pBufferSizeInBytes) &
93 bind(c, name = 'hipsparseScsrgemm2_bufferSizeExt')
94 use iso_c_binding
95 implicit none
96 integer(c_int) :: hipsparseScsrgemm2_bufferSizeExt
97 type(c_ptr), value :: handle
98 integer(c_int), value :: m
99 integer(c_int), value :: n
100 integer(c_int), value :: k
101 type(c_ptr), intent(in), value :: alpha
102 type(c_ptr), value :: descrA
103 integer(c_int), value :: nnzA
104 type(c_ptr), intent(in), value :: csrRowPtrA
105 type(c_ptr), intent(in), value :: csrColIndA
106 type(c_ptr), value :: descrB
107 integer(c_int), value :: nnzB
108 type(c_ptr), intent(in), value :: csrRowPtrB
109 type(c_ptr), intent(in), value :: csrColIndB
110 type(c_ptr), intent(in), value :: beta
111 type(c_ptr), value :: descrD
112 integer(c_int), value :: nnzD
113 type(c_ptr), intent(in), value :: csrRowPtrD
114 type(c_ptr), intent(in), value :: csrColIndD
115 type(c_ptr), value :: info
116 type(c_ptr), value :: pBufferSizeInBytes
117 end function hipsparseScsrgemm2_bufferSizeExt
118
119 function hipsparseXcsrgemm2Nnz(handle, m, n, k, descrA, nnzA, csrRowPtrA, csrColIndA, &
120 descrB, nnzB, csrRowPtrB, csrColIndB, descrD, nnzD, &
121 csrRowPtrD, csrColIndD, descrC, csrRowPtrC, nnzTotalDevHostPtr, &
122 info, pBuffer) &
123 bind(c, name = 'hipsparseXcsrgemm2Nnz')
124 use iso_c_binding
125 implicit none
126 integer(c_int) :: hipsparseXcsrgemm2Nnz
127 type(c_ptr), value :: handle
128 integer(c_int), value :: m
129 integer(c_int), value :: n
130 integer(c_int), value :: k
131 type(c_ptr), value :: descrA
132 integer(c_int), value :: nnzA
133 type(c_ptr), intent(in), value :: csrRowPtrA
134 type(c_ptr), intent(in), value :: csrColIndA
135 type(c_ptr), value :: descrB
136 integer(c_int), value :: nnzB
137 type(c_ptr), intent(in), value :: csrRowPtrB
138 type(c_ptr), intent(in), value :: csrColIndB
139 type(c_ptr), value :: descrD
140 integer(c_int), value :: nnzD
141 type(c_ptr), intent(in), value :: csrRowPtrD
142 type(c_ptr), intent(in), value :: csrColIndD
143 type(c_ptr), value :: descrC
144 type(c_ptr), value :: csrRowPtrC
145 type(c_ptr), value :: nnzTotalDevHostPtr
146 type(c_ptr), value :: info
147 type(c_ptr), value :: pBuffer
148 end function hipsparseXcsrgemm2Nnz
149
150 function hipsparseScsrgemm2(handle, m, n, k, alpha, descrA, nnzA, csrValA, csrRowPtrA, &
151 csrColIndA, descrB, nnzB, csrValB, csrRowPtrB, csrColIndB, &
152 beta, descrD, nnzD, csrValD, csrRowPtrD, csrColIndD, &
153 descrC, csrValC, csrRowPtrC, csrColIndC, info, pBuffer) &
154 bind(c, name = 'hipsparseScsrgemm2')
155 use iso_c_binding
156 implicit none
157 integer(c_int) :: hipsparseScsrgemm2
158 type(c_ptr), value :: handle
159 integer(c_int), value :: m
160 integer(c_int), value :: n
161 integer(c_int), value :: k
162 type(c_ptr), intent(in), value :: alpha
163 type(c_ptr), value :: descrA
164 integer(c_int), value :: nnzA
165 type(c_ptr), intent(in), value :: csrValA
166 type(c_ptr), intent(in), value :: csrRowPtrA
167 type(c_ptr), intent(in), value :: csrColIndA
168 type(c_ptr), value :: descrB
169 integer(c_int), value :: nnzB
170 type(c_ptr), intent(in), value :: csrValB
171 type(c_ptr), intent(in), value :: csrRowPtrB
172 type(c_ptr), intent(in), value :: csrColIndB
173 type(c_ptr), intent(in), value :: beta
174 type(c_ptr), value :: descrD
175 integer(c_int), value :: nnzD
176 type(c_ptr), intent(in), value :: csrValD
177 type(c_ptr), intent(in), value :: csrRowPtrD
178 type(c_ptr), intent(in), value :: csrColIndD
179 type(c_ptr), value :: descrC
180 type(c_ptr), value :: csrValC
181 type(c_ptr), value :: csrRowPtrC
182 type(c_ptr), value :: csrColIndC
183 type(c_ptr), value :: info
184 type(c_ptr), value :: pBuffer
185 end function hipsparseScsrgemm2
186 end interface
187
188 ! Variables
189 type(c_ptr) :: handle
190 type(c_ptr) :: descrA, descrB, descrC, descrD
191 type(c_ptr) :: info
192 integer :: i, stat, start_idx, end_idx
193 integer(c_size_t), target :: bufferSize
194
195 ! C = alpha * A * B + beta * D
196 ! A is m x k, B is k x n, D is m x n, C is m x n
197 integer, parameter :: m = 4
198 integer, parameter :: k = 3
199 integer, parameter :: n = 2
200 integer, parameter :: nnzA = 7
201 integer, parameter :: nnzB = 3
202 integer, parameter :: nnzD = 6
203 integer(c_int), target :: nnzC
204
205 ! Matrix A (4x3)
206 integer, dimension(m+1), target :: hcsrRowPtrA = (/0, 1, 3, 6, 7/)
207 integer, dimension(nnzA), target :: hcsrColIndA = (/0, 0, 1, 0, 1, 2, 2/)
208 real(c_float), dimension(nnzA), target :: hcsrValA = (/1.0, 3.0, 4.0, 5.0, 6.0, 7.0, 9.0/)
209
210 ! Matrix B (3x2)
211 integer, dimension(k+1), target :: hcsrRowPtrB = (/0, 1, 2, 3/)
212 integer, dimension(nnzB), target :: hcsrColIndB = (/1, 0, 1/)
213 real(c_float), dimension(nnzB), target :: hcsrValB = (/1.0, 1.0, 1.0/)
214
215 ! Matrix D (4x2)
216 integer, dimension(m+1), target :: hcsrRowPtrD = (/0, 1, 3, 5, 6/)
217 integer, dimension(nnzD), target :: hcsrColIndD = (/1, 0, 1, 0, 1, 1/)
218 real(c_float), dimension(nnzD), target :: hcsrValD = (/1.0, 2.0, 3.0, 4.0, 5.0, 6.0/)
219
220 ! Matrix C (will be allocated after nnzC is determined)
221 integer, dimension(:), allocatable, target :: hcsrRowPtrC
222 integer, dimension(:), allocatable, target :: hcsrColIndC
223 real(c_float), dimension(:), allocatable, target :: hcsrValC
224
225 ! Scalar values
226 real(c_float), target :: alpha = 1.0
227 real(c_float), target :: beta = 1.0
228
229 ! Device pointers
230 type(c_ptr) :: dcsrRowPtrA, dcsrColIndA, dcsrValA
231 type(c_ptr) :: dcsrRowPtrB, dcsrColIndB, dcsrValB
232 type(c_ptr) :: dcsrRowPtrD, dcsrColIndD, dcsrValD
233 type(c_ptr) :: dcsrRowPtrC, dcsrColIndC, dcsrValC
234 type(c_ptr) :: dbuffer
235
236 ! Create hipSPARSE handle
237 stat = hipsparseCreate(handle)
238 if (stat /= 0) stop
239
240 ! Create matrix descriptors
241 stat = hipsparseCreateMatDescr(descrA)
242 if (stat /= 0) stop
243 stat = hipsparseCreateMatDescr(descrB)
244 if (stat /= 0) stop
245 stat = hipsparseCreateMatDescr(descrC)
246 if (stat /= 0) stop
247 stat = hipsparseCreateMatDescr(descrD)
248 if (stat /= 0) stop
249
250 ! Create csrgemm2 info
251 stat = hipsparseCreateCsrgemm2Info(info)
252 if (stat /= 0) stop
253
254 ! Allocate device memory for A, B, and D
255 stat = hipMalloc(dcsrRowPtrA, int((m + 1) * 4, c_size_t))
256 if (stat /= 0) stop
257 stat = hipMalloc(dcsrColIndA, int(nnzA * 4, c_size_t))
258 if (stat /= 0) stop
259 stat = hipMalloc(dcsrValA, int(nnzA * 4, c_size_t))
260 if (stat /= 0) stop
261
262 stat = hipMalloc(dcsrRowPtrB, int((k + 1) * 4, c_size_t))
263 if (stat /= 0) stop
264 stat = hipMalloc(dcsrColIndB, int(nnzB * 4, c_size_t))
265 if (stat /= 0) stop
266 stat = hipMalloc(dcsrValB, int(nnzB * 4, c_size_t))
267 if (stat /= 0) stop
268
269 stat = hipMalloc(dcsrRowPtrD, int((m + 1) * 4, c_size_t))
270 if (stat /= 0) stop
271 stat = hipMalloc(dcsrColIndD, int(nnzD * 4, c_size_t))
272 if (stat /= 0) stop
273 stat = hipMalloc(dcsrValD, int(nnzD * 4, c_size_t))
274 if (stat /= 0) stop
275
276 stat = hipMalloc(dcsrRowPtrC, int((m + 1) * 4, c_size_t))
277 if (stat /= 0) stop
278
279 ! Copy A, B, and D to device
280 stat = hipMemcpy(dcsrRowPtrA, c_loc(hcsrRowPtrA), int((m + 1) * 4, c_size_t), hipMemcpyHostToDevice)
281 if (stat /= 0) stop
282 stat = hipMemcpy(dcsrColIndA, c_loc(hcsrColIndA), int(nnzA * 4, c_size_t), hipMemcpyHostToDevice)
283 if (stat /= 0) stop
284 stat = hipMemcpy(dcsrValA, c_loc(hcsrValA), int(nnzA * 4, c_size_t), hipMemcpyHostToDevice)
285 if (stat /= 0) stop
286
287 stat = hipMemcpy(dcsrRowPtrB, c_loc(hcsrRowPtrB), int((k + 1) * 4, c_size_t), hipMemcpyHostToDevice)
288 if (stat /= 0) stop
289 stat = hipMemcpy(dcsrColIndB, c_loc(hcsrColIndB), int(nnzB * 4, c_size_t), hipMemcpyHostToDevice)
290 if (stat /= 0) stop
291 stat = hipMemcpy(dcsrValB, c_loc(hcsrValB), int(nnzB * 4, c_size_t), hipMemcpyHostToDevice)
292 if (stat /= 0) stop
293
294 stat = hipMemcpy(dcsrRowPtrD, c_loc(hcsrRowPtrD), int((m + 1) * 4, c_size_t), hipMemcpyHostToDevice)
295 if (stat /= 0) stop
296 stat = hipMemcpy(dcsrColIndD, c_loc(hcsrColIndD), int(nnzD * 4, c_size_t), hipMemcpyHostToDevice)
297 if (stat /= 0) stop
298 stat = hipMemcpy(dcsrValD, c_loc(hcsrValD), int(nnzD * 4, c_size_t), hipMemcpyHostToDevice)
299 if (stat /= 0) stop
300
301 ! Get buffer size
302 stat = hipsparseScsrgemm2_bufferSizeExt(handle, &
303 m, &
304 n, &
305 k, &
306 c_loc(alpha), &
307 descrA, &
308 nnzA, &
309 dcsrRowPtrA, &
310 dcsrColIndA, &
311 descrB, &
312 nnzB, &
313 dcsrRowPtrB, &
314 dcsrColIndB, &
315 c_loc(beta), &
316 descrD, &
317 nnzD, &
318 dcsrRowPtrD, &
319 dcsrColIndD, &
320 info, &
321 c_loc(bufferSize))
322 if (stat /= 0) then
323 write(*,*) 'Error: hipsparseScsrgemm2_bufferSizeExt failed'
324 stop
325 end if
326
327 ! Allocate temporary buffer
328 stat = hipMalloc(dbuffer, bufferSize)
329 if (stat /= 0) stop
330
331 ! Determine nnzC
332 stat = hipsparseXcsrgemm2Nnz(handle, &
333 m, &
334 n, &
335 k, &
336 descrA, &
337 nnzA, &
338 dcsrRowPtrA, &
339 dcsrColIndA, &
340 descrB, &
341 nnzB, &
342 dcsrRowPtrB, &
343 dcsrColIndB, &
344 descrD, &
345 nnzD, &
346 dcsrRowPtrD, &
347 dcsrColIndD, &
348 descrC, &
349 dcsrRowPtrC, &
350 c_loc(nnzC), &
351 info, &
352 dbuffer)
353 if (stat /= 0) then
354 write(*,*) 'Error: hipsparseXcsrgemm2Nnz failed'
355 stop
356 end if
357
358 ! Allocate device memory for C
359 stat = hipMalloc(dcsrColIndC, int(nnzC * 4, c_size_t))
360 if (stat /= 0) stop
361 stat = hipMalloc(dcsrValC, int(nnzC * 4, c_size_t))
362 if (stat /= 0) stop
363
364 ! Compute C = alpha * A * B + beta * D
365 stat = hipsparseScsrgemm2(handle, &
366 m, &
367 n, &
368 k, &
369 c_loc(alpha), &
370 descrA, &
371 nnzA, &
372 dcsrValA, &
373 dcsrRowPtrA, &
374 dcsrColIndA, &
375 descrB, &
376 nnzB, &
377 dcsrValB, &
378 dcsrRowPtrB, &
379 dcsrColIndB, &
380 c_loc(beta), &
381 descrD, &
382 nnzD, &
383 dcsrValD, &
384 dcsrRowPtrD, &
385 dcsrColIndD, &
386 descrC, &
387 dcsrValC, &
388 dcsrRowPtrC, &
389 dcsrColIndC, &
390 info, &
391 dbuffer)
392 if (stat /= 0) then
393 write(*,*) 'Error: hipsparseScsrgemm2 failed'
394 stop
395 end if
396
397 ! Allocate host memory for C
398 allocate(hcsrRowPtrC(m+1))
399 allocate(hcsrColIndC(nnzC))
400 allocate(hcsrValC(nnzC))
401
402 ! Copy result back to host
403 stat = hipMemcpy(c_loc(hcsrRowPtrC), dcsrRowPtrC, int((m + 1) * 4, c_size_t), hipMemcpyDeviceToHost)
404 if (stat /= 0) stop
405 stat = hipMemcpy(c_loc(hcsrColIndC), dcsrColIndC, int(nnzC * 4, c_size_t), hipMemcpyDeviceToHost)
406 if (stat /= 0) stop
407 stat = hipMemcpy(c_loc(hcsrValC), dcsrValC, int(nnzC * 4, c_size_t), hipMemcpyDeviceToHost)
408 if (stat /= 0) stop
409
410 ! Print result
411 write(*,*) 'Matrix C (result of alpha*A*B + beta*D):'
412 do i = 1, m
413 start_idx = hcsrRowPtrC(i) + 1
414 end_idx = hcsrRowPtrC(i + 1)
415 write(*,fmt='(A,I0,A)',advance='no') 'Row ', i-1, ':'
416 do stat = start_idx, end_idx
417 write(*,fmt='(A,I0,A,F0.2,A)',advance='no') ' (', hcsrColIndC(stat), ',', hcsrValC(stat), ')'
418 end do
419 write(*,*)
420 end do
421
422 ! Clean up
423 deallocate(hcsrRowPtrC)
424 deallocate(hcsrColIndC)
425 deallocate(hcsrValC)
426
427 stat = hipFree(dcsrRowPtrA)
428 stat = hipFree(dcsrColIndA)
429 stat = hipFree(dcsrValA)
430 stat = hipFree(dcsrRowPtrB)
431 stat = hipFree(dcsrColIndB)
432 stat = hipFree(dcsrValB)
433 stat = hipFree(dcsrRowPtrD)
434 stat = hipFree(dcsrColIndD)
435 stat = hipFree(dcsrValD)
436 stat = hipFree(dcsrRowPtrC)
437 stat = hipFree(dcsrColIndC)
438 stat = hipFree(dcsrValC)
439 stat = hipFree(dbuffer)
440
441 stat = hipsparseDestroyCsrgemm2Info(info)
442 stat = hipsparseDestroyMatDescr(descrA)
443 stat = hipsparseDestroyMatDescr(descrB)
444 stat = hipsparseDestroyMatDescr(descrC)
445 stat = hipsparseDestroyMatDescr(descrD)
446 stat = hipsparseDestroy(handle)
447
448end program example_hipsparse_csrgemm2