/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-rocrand/checkouts/develop/projects/rocrand/library/include/rocrand/rocrand_mrg31k3p.h Source File

/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-rocrand/checkouts/develop/projects/rocrand/library/include/rocrand/rocrand_mrg31k3p.h Source File#

API library: /home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-rocrand/checkouts/develop/projects/rocrand/library/include/rocrand/rocrand_mrg31k3p.h Source File
rocrand_mrg31k3p.h
1 // Copyright (c) 2022-2025 Advanced Micro Devices, Inc. All rights reserved.
2 //
3 // Permission is hereby granted, free of charge, to any person obtaining a copy
4 // of this software and associated documentation files (the "Software"), to deal
5 // in the Software without restriction, including without limitation the rights
6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 // copies of the Software, and to permit persons to whom the Software is
8 // furnished to do so, subject to the following conditions:
9 //
10 // The above copyright notice and this permission notice shall be included in
11 // all copies or substantial portions of the Software.
12 //
13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 // THE SOFTWARE.
20 
21 #ifndef ROCRAND_MRG31K3P_H_
22 #define ROCRAND_MRG31K3P_H_
23 
24 #include "rocrand/rocrand_common.h"
25 #include "rocrand/rocrand_mrg31k3p_precomputed.h"
26 
27 #include <hip/hip_runtime.h>
28 
29 #define ROCRAND_MRG31K3P_M1 2147483647U // 2 ^ 31 - 1
30 #define ROCRAND_MRG31K3P_M2 2147462579U // 2 ^ 31 - 21069
31 #define ROCRAND_MRG31K3P_MASK12 511U // 2 ^ 9 - 1
32 #define ROCRAND_MRG31K3P_MASK13 16777215U // 2 ^ 24 - 1
33 #define ROCRAND_MRG31K3P_MASK21 65535U // 2 ^ 16 - 1
34 #define ROCRAND_MRG31K3P_NORM_DOUBLE (4.656612875245796923e-10) // 1 / ROCRAND_MRG31K3P_M1
35 #define ROCRAND_MRG31K3P_UINT32_NORM \
36  (2.000000001396983862) // UINT32_MAX / (ROCRAND_MRG31K3P_M1 - 1)
37 
46 #define ROCRAND_MRG31K3P_DEFAULT_SEED 12345ULL // end of group rocranddevice
48 
49 namespace rocrand_device
50 {
51 
52 class mrg31k3p_engine
53 {
54 public:
55  struct mrg31k3p_state
56  {
57 #ifndef ROCRAND_DETAIL_BM_NOT_IN_STATE
58  // The Box–Muller transform requires two inputs to convert uniformly
59  // distributed real values [0; 1] to normally distributed real values
60  // (with mean = 0, and stddev = 1). Often user wants only one
61  // normally distributed number, to save performance and random
62  // numbers the 2nd value is saved for future requests.
63  double boxmuller_double; // normally distributed double
64  float boxmuller_float; // normally distributed float
65 #endif
66  unsigned int x1[3];
67  unsigned int x2[3];
68  };
69 
70  __forceinline__ __device__ __host__ mrg31k3p_engine()
71  {
72  this->seed(ROCRAND_MRG31K3P_DEFAULT_SEED, 0, 0);
73  }
74 
83  __forceinline__ __device__ __host__ mrg31k3p_engine(const unsigned long long seed,
84  const unsigned long long subsequence,
85  const unsigned long long offset)
86  {
87  this->seed(seed, subsequence, offset);
88  }
89 
98  __forceinline__ __device__ __host__ void seed(unsigned long long seed_value,
99  const unsigned long long subsequence,
100  const unsigned long long offset)
101  {
102  if(seed_value == 0)
103  {
104  seed_value = ROCRAND_MRG31K3P_DEFAULT_SEED;
105  }
106  unsigned int x = static_cast<unsigned int>(seed_value ^ 0x55555555U);
107  unsigned int y = static_cast<unsigned int>((seed_value >> 32) ^ 0xAAAAAAAAU);
108  m_state.x1[0] = mod_mul_m1(x, seed_value);
109  m_state.x1[1] = mod_mul_m1(y, seed_value);
110  m_state.x1[2] = mod_mul_m1(x, seed_value);
111  m_state.x2[0] = mod_mul_m2(y, seed_value);
112  m_state.x2[1] = mod_mul_m2(x, seed_value);
113  m_state.x2[2] = mod_mul_m2(y, seed_value);
114  this->restart(subsequence, offset);
115  }
116 
118  __forceinline__ __device__ __host__ void discard(unsigned long long offset)
119  {
120  this->discard_impl(offset);
121  }
122 
125  __forceinline__ __device__ __host__ void discard_subsequence(unsigned long long subsequence)
126  {
127  this->discard_subsequence_impl(subsequence);
128  }
129 
132  __forceinline__ __device__ __host__ void discard_sequence(unsigned long long sequence)
133  {
134  this->discard_sequence_impl(sequence);
135  }
136 
137  __forceinline__ __device__ __host__ void restart(const unsigned long long subsequence,
138  const unsigned long long offset)
139  {
140 #ifndef ROCRAND_DETAIL_BM_NOT_IN_STATE
141  m_state.boxmuller_float = ROCRAND_NAN_FLOAT;
142  m_state.boxmuller_double = ROCRAND_NAN_DOUBLE;
143 #endif
144  this->discard_subsequence_impl(subsequence);
145  this->discard_impl(offset);
146  }
147 
148  __forceinline__ __device__ __host__ unsigned int operator()()
149  {
150  return this->next();
151  }
152 
153  // Returned value is in range [1, ROCRAND_MRG31K3P_M1].
154  __forceinline__ __device__ __host__ unsigned int next()
155  {
156  // First component
157  unsigned int tmp
158  = (((m_state.x1[1] & ROCRAND_MRG31K3P_MASK12) << 22) + (m_state.x1[1] >> 9))
159  + (((m_state.x1[2] & ROCRAND_MRG31K3P_MASK13) << 7) + (m_state.x1[2] >> 24));
160  tmp -= (tmp >= ROCRAND_MRG31K3P_M1) ? ROCRAND_MRG31K3P_M1 : 0;
161  tmp += m_state.x1[2];
162  tmp -= (tmp >= ROCRAND_MRG31K3P_M1) ? ROCRAND_MRG31K3P_M1 : 0;
163  m_state.x1[2] = m_state.x1[1];
164  m_state.x1[1] = m_state.x1[0];
165  m_state.x1[0] = tmp;
166 
167  // Second component
168  tmp = (((m_state.x2[0] & ROCRAND_MRG31K3P_MASK21) << 15) + 21069 * (m_state.x2[0] >> 16));
169  tmp -= (tmp >= ROCRAND_MRG31K3P_M2) ? ROCRAND_MRG31K3P_M2 : 0;
170  tmp += ((m_state.x2[2] & ROCRAND_MRG31K3P_MASK21) << 15);
171  tmp -= (tmp >= ROCRAND_MRG31K3P_M2) ? ROCRAND_MRG31K3P_M2 : 0;
172  tmp += 21069 * (m_state.x2[2] >> 16);
173  tmp -= (tmp >= ROCRAND_MRG31K3P_M2) ? ROCRAND_MRG31K3P_M2 : 0;
174  tmp += m_state.x2[2];
175  tmp -= (tmp >= ROCRAND_MRG31K3P_M2) ? ROCRAND_MRG31K3P_M2 : 0;
176  m_state.x2[2] = m_state.x2[1];
177  m_state.x2[1] = m_state.x2[0];
178  m_state.x2[0] = tmp;
179 
180  // Combination
181  return m_state.x1[0] - m_state.x2[0]
182  + (m_state.x1[0] <= m_state.x2[0] ? ROCRAND_MRG31K3P_M1 : 0);
183  }
184 
185 protected:
186  // Advances the internal state to skip \p offset numbers.
187  __forceinline__ __device__ __host__ void discard_impl(unsigned long long offset)
188  {
189  discard_state(offset);
190  }
191 
192  // Advances the internal state to skip \p subsequence subsequences.
193  __forceinline__ __device__ __host__ void
194  discard_subsequence_impl(unsigned long long subsequence)
195  {
196  int i = 0;
197 
198  while(subsequence > 0)
199  {
200  if(subsequence & 1)
201  {
202 #if defined(__HIP_DEVICE_COMPILE__)
203  mod_mat_vec_m1(d_mrg31k3p_A1P72 + i, m_state.x1);
204  mod_mat_vec_m2(d_mrg31k3p_A2P72 + i, m_state.x2);
205 #else
206  mod_mat_vec_m1(h_mrg31k3p_A1P72 + i, m_state.x1);
207  mod_mat_vec_m2(h_mrg31k3p_A2P72 + i, m_state.x2);
208 #endif
209  }
210  subsequence >>= 1;
211  i += 9;
212  }
213  }
214 
215  // Advances the internal state to skip \p sequences.
216  __forceinline__ __device__ __host__ void discard_sequence_impl(unsigned long long sequence)
217  {
218  int i = 0;
219 
220  while(sequence > 0)
221  {
222  if(sequence & 1)
223  {
224 #if defined(__HIP_DEVICE_COMPILE__)
225  mod_mat_vec_m1(d_mrg31k3p_A1P134 + i, m_state.x1);
226  mod_mat_vec_m2(d_mrg31k3p_A2P134 + i, m_state.x2);
227 #else
228  mod_mat_vec_m1(h_mrg31k3p_A1P134 + i, m_state.x1);
229  mod_mat_vec_m2(h_mrg31k3p_A2P134 + i, m_state.x2);
230 #endif
231  }
232  sequence >>= 1;
233  i += 9;
234  }
235  }
236 
237  // Advances the internal state to skip \p offset numbers.
238  __forceinline__ __device__ __host__ void discard_state(unsigned long long offset)
239  {
240  int i = 0;
241 
242  while(offset > 0)
243  {
244  if(offset & 1)
245  {
246 #if defined(__HIP_DEVICE_COMPILE__)
247  mod_mat_vec_m1(d_mrg31k3p_A1 + i, m_state.x1);
248  mod_mat_vec_m2(d_mrg31k3p_A2 + i, m_state.x2);
249 #else
250  mod_mat_vec_m1(h_mrg31k3p_A1 + i, m_state.x1);
251  mod_mat_vec_m2(h_mrg31k3p_A2 + i, m_state.x2);
252 #endif
253  }
254  offset >>= 1;
255  i += 9;
256  }
257  }
258 
259  // Advances the internal state to the next state.
260  __forceinline__ __device__ __host__ void discard_state()
261  {
262  discard_state(1);
263  }
264 
265 private:
266  __forceinline__ __device__ __host__ static void mod_mat_vec_m1(const unsigned int* A,
267  unsigned int* s)
268  {
269  unsigned long long x[3] = {s[0], s[1], s[2]};
270 
271  s[0] = mod_m1(mod_m1(A[0] * x[0]) + mod_m1(A[1] * x[1]) + mod_m1(A[2] * x[2]));
272 
273  s[1] = mod_m1(mod_m1(A[3] * x[0]) + mod_m1(A[4] * x[1]) + mod_m1(A[5] * x[2]));
274 
275  s[2] = mod_m1(mod_m1(A[6] * x[0]) + mod_m1(A[7] * x[1]) + mod_m1(A[8] * x[2]));
276  }
277 
278  __forceinline__ __device__ __host__ static void mod_mat_vec_m2(const unsigned int* A,
279  unsigned int* s)
280  {
281  unsigned long long x[3] = {s[0], s[1], s[2]};
282 
283  s[0] = mod_m2(mod_m2(A[0] * x[0]) + mod_m2(A[1] * x[1]) + mod_m2(A[2] * x[2]));
284 
285  s[1] = mod_m2(mod_m2(A[3] * x[0]) + mod_m2(A[4] * x[1]) + mod_m2(A[5] * x[2]));
286 
287  s[2] = mod_m2(mod_m2(A[6] * x[0]) + mod_m2(A[7] * x[1]) + mod_m2(A[8] * x[2]));
288  }
289 
290  __forceinline__ __device__ __host__ static unsigned long long mod_mul_m1(unsigned int i,
291  unsigned long long j)
292  {
293  return mod_m1(i * j);
294  }
295 
296  __forceinline__ __device__ __host__ static unsigned long long mod_m1(unsigned long long p)
297  {
298  return p % ROCRAND_MRG31K3P_M1;
299  }
300 
301  __forceinline__ __device__ __host__ static unsigned long long mod_mul_m2(unsigned int i,
302  unsigned long long j)
303  {
304  return mod_m2(i * j);
305  }
306 
307  __forceinline__ __device__ __host__ static unsigned long long mod_m2(unsigned long long p)
308  {
309  return p % ROCRAND_MRG31K3P_M2;
310  }
311 
312 protected:
313  // State
314  mrg31k3p_state m_state;
315 
316 #ifndef ROCRAND_DETAIL_BM_NOT_IN_STATE
317  friend struct detail::engine_boxmuller_helper<mrg31k3p_engine>;
318 #endif
319 }; // mrg31k3p_engine class
320 
321 } // end namespace rocrand_device
322 
329 typedef rocrand_device::mrg31k3p_engine rocrand_state_mrg31k3p;
331 
343 __forceinline__ __device__ __host__
344 void rocrand_init(const unsigned long long seed,
345  const unsigned long long subsequence,
346  const unsigned long long offset,
347  rocrand_state_mrg31k3p* state)
348 {
349  *state = rocrand_state_mrg31k3p(seed, subsequence, offset);
350 }
351 
364 __forceinline__ __device__ __host__
365 unsigned int rocrand(rocrand_state_mrg31k3p* state)
366 {
367  // next() in [1, ROCRAND_MRG31K3P_M1]
368  return static_cast<unsigned int>((state->next() - 1) * ROCRAND_MRG31K3P_UINT32_NORM);
369 }
370 
379 __forceinline__ __device__ __host__
380 void skipahead(unsigned long long offset, rocrand_state_mrg31k3p* state)
381 {
382  return state->discard(offset);
383 }
384 
394 __forceinline__ __device__ __host__
395 void skipahead_subsequence(unsigned long long subsequence, rocrand_state_mrg31k3p* state)
396 {
397  return state->discard_subsequence(subsequence);
398 }
399 
409 __forceinline__ __device__ __host__
410 void skipahead_sequence(unsigned long long sequence, rocrand_state_mrg31k3p* state)
411 {
412  return state->discard_sequence(sequence);
413 }
414  // end of group rocranddevice
416 
417 #endif // ROCRAND_MRG31K3P_H_
#define ROCRAND_MRG31K3P_DEFAULT_SEED
Default seed for MRG31K3P PRNG.
Definition: rocrand_mrg31k3p.h:46
__forceinline__ __device__ __host__ unsigned int rocrand(rocrand_state_mrg31k3p *state)
Returns uniformly distributed random unsigned int value from [0; 2^32 - 1] range.
Definition: rocrand_mrg31k3p.h:365
__forceinline__ __device__ __host__ void skipahead_subsequence(unsigned long long subsequence, rocrand_state_mrg31k3p *state)
Updates MRG31K3P state to skip ahead by subsequence subsequences.
Definition: rocrand_mrg31k3p.h:395
__forceinline__ __device__ __host__ void skipahead(unsigned long long offset, rocrand_state_mrg31k3p *state)
Updates MRG31K3P state to skip ahead by offset elements.
Definition: rocrand_mrg31k3p.h:380
__forceinline__ __device__ __host__ void skipahead_sequence(unsigned long long sequence, rocrand_state_mrg31k3p *state)
Updates MRG31K3P state to skip ahead by sequence sequences.
Definition: rocrand_mrg31k3p.h:410
__forceinline__ __device__ __host__ void rocrand_init(const unsigned long long seed, const unsigned long long subsequence, const unsigned long long offset, rocrand_state_mrg31k3p *state)
Initializes MRG31K3P state.
Definition: rocrand_mrg31k3p.h:344