/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-rocrand/checkouts/latest/library/include/rocrand/rocrand_mrg31k3p.h Source File

/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-rocrand/checkouts/latest/library/include/rocrand/rocrand_mrg31k3p.h Source File#

API library: /home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-rocrand/checkouts/latest/library/include/rocrand/rocrand_mrg31k3p.h Source File
API library
rocrand_mrg31k3p.h
1 // Copyright (c) 2022-2024 Advanced Micro Devices, Inc. All rights reserved.
2 //
3 // Permission is hereby granted, free of charge, to any person obtaining a copy
4 // of this software and associated documentation files (the "Software"), to deal
5 // in the Software without restriction, including without limitation the rights
6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 // copies of the Software, and to permit persons to whom the Software is
8 // furnished to do so, subject to the following conditions:
9 //
10 // The above copyright notice and this permission notice shall be included in
11 // all copies or substantial portions of the Software.
12 //
13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 // THE SOFTWARE.
20 
21 #ifndef ROCRAND_MRG31K3P_H_
22 #define ROCRAND_MRG31K3P_H_
23 
24 #include "rocrand/rocrand_common.h"
25 #include "rocrand/rocrand_mrg31k3p_precomputed.h"
26 
27 #define ROCRAND_MRG31K3P_M1 2147483647U // 2 ^ 31 - 1
28 #define ROCRAND_MRG31K3P_M2 2147462579U // 2 ^ 31 - 21069
29 #define ROCRAND_MRG31K3P_MASK12 511U // 2 ^ 9 - 1
30 #define ROCRAND_MRG31K3P_MASK13 16777215U // 2 ^ 24 - 1
31 #define ROCRAND_MRG31K3P_MASK21 65535U // 2 ^ 16 - 1
32 #define ROCRAND_MRG31K3P_NORM_DOUBLE (4.656612875245796923e-10) // 1 / ROCRAND_MRG31K3P_M1
33 #define ROCRAND_MRG31K3P_UINT32_NORM \
34  (2.000000001396983862) // UINT32_MAX / (ROCRAND_MRG31K3P_M1 - 1)
35 
44 #define ROCRAND_MRG31K3P_DEFAULT_SEED 12345ULL // end of group rocranddevice
46 
47 namespace rocrand_device
48 {
49 
50 class mrg31k3p_engine
51 {
52 public:
53  struct mrg31k3p_state
54  {
55  unsigned int x1[3];
56  unsigned int x2[3];
57 
58 #ifndef ROCRAND_DETAIL_BM_NOT_IN_STATE
59  // The Box–Muller transform requires two inputs to convert uniformly
60  // distributed real values [0; 1] to normally distributed real values
61  // (with mean = 0, and stddev = 1). Often user wants only one
62  // normally distributed number, to save performance and random
63  // numbers the 2nd value is saved for future requests.
64  unsigned int boxmuller_float_state; // is there a float in boxmuller_float
65  unsigned int boxmuller_double_state; // is there a double in boxmuller_double
66  float boxmuller_float; // normally distributed float
67  double boxmuller_double; // normally distributed double
68 #endif
69  };
70 
71  __forceinline__ __device__ __host__ mrg31k3p_engine()
72  {
73  this->seed(ROCRAND_MRG31K3P_DEFAULT_SEED, 0, 0);
74  }
75 
84  __forceinline__ __device__ __host__ mrg31k3p_engine(const unsigned long long seed,
85  const unsigned long long subsequence,
86  const unsigned long long offset)
87  {
88  this->seed(seed, subsequence, offset);
89  }
90 
99  __forceinline__ __device__ __host__ void seed(unsigned long long seed_value,
100  const unsigned long long subsequence,
101  const unsigned long long offset)
102  {
103  if(seed_value == 0)
104  {
105  seed_value = ROCRAND_MRG31K3P_DEFAULT_SEED;
106  }
107  unsigned int x = static_cast<unsigned int>(seed_value ^ 0x55555555U);
108  unsigned int y = static_cast<unsigned int>((seed_value >> 32) ^ 0xAAAAAAAAU);
109  m_state.x1[0] = mod_mul_m1(x, seed_value);
110  m_state.x1[1] = mod_mul_m1(y, seed_value);
111  m_state.x1[2] = mod_mul_m1(x, seed_value);
112  m_state.x2[0] = mod_mul_m2(y, seed_value);
113  m_state.x2[1] = mod_mul_m2(x, seed_value);
114  m_state.x2[2] = mod_mul_m2(y, seed_value);
115  this->restart(subsequence, offset);
116  }
117 
119  __forceinline__ __device__ __host__ void discard(unsigned long long offset)
120  {
121  this->discard_impl(offset);
122  }
123 
126  __forceinline__ __device__ __host__ void discard_subsequence(unsigned long long subsequence)
127  {
128  this->discard_subsequence_impl(subsequence);
129  }
130 
133  __forceinline__ __device__ __host__ void discard_sequence(unsigned long long sequence)
134  {
135  this->discard_sequence_impl(sequence);
136  }
137 
138  __forceinline__ __device__ __host__ void restart(const unsigned long long subsequence,
139  const unsigned long long offset)
140  {
141 #ifndef ROCRAND_DETAIL_BM_NOT_IN_STATE
142  m_state.boxmuller_float_state = 0;
143  m_state.boxmuller_double_state = 0;
144 #endif
145  this->discard_subsequence_impl(subsequence);
146  this->discard_impl(offset);
147  }
148 
149  __forceinline__ __device__ __host__ unsigned int operator()()
150  {
151  return this->next();
152  }
153 
154  // Returned value is in range [1, ROCRAND_MRG31K3P_M1].
155  __forceinline__ __device__ __host__ unsigned int next()
156  {
157  // First component
158  unsigned int tmp
159  = (((m_state.x1[1] & ROCRAND_MRG31K3P_MASK12) << 22) + (m_state.x1[1] >> 9))
160  + (((m_state.x1[2] & ROCRAND_MRG31K3P_MASK13) << 7) + (m_state.x1[2] >> 24));
161  tmp -= (tmp >= ROCRAND_MRG31K3P_M1) ? ROCRAND_MRG31K3P_M1 : 0;
162  tmp += m_state.x1[2];
163  tmp -= (tmp >= ROCRAND_MRG31K3P_M1) ? ROCRAND_MRG31K3P_M1 : 0;
164  m_state.x1[2] = m_state.x1[1];
165  m_state.x1[1] = m_state.x1[0];
166  m_state.x1[0] = tmp;
167 
168  // Second component
169  tmp = (((m_state.x2[0] & ROCRAND_MRG31K3P_MASK21) << 15) + 21069 * (m_state.x2[0] >> 16));
170  tmp -= (tmp >= ROCRAND_MRG31K3P_M2) ? ROCRAND_MRG31K3P_M2 : 0;
171  tmp += ((m_state.x2[2] & ROCRAND_MRG31K3P_MASK21) << 15);
172  tmp -= (tmp >= ROCRAND_MRG31K3P_M2) ? ROCRAND_MRG31K3P_M2 : 0;
173  tmp += 21069 * (m_state.x2[2] >> 16);
174  tmp -= (tmp >= ROCRAND_MRG31K3P_M2) ? ROCRAND_MRG31K3P_M2 : 0;
175  tmp += m_state.x2[2];
176  tmp -= (tmp >= ROCRAND_MRG31K3P_M2) ? ROCRAND_MRG31K3P_M2 : 0;
177  m_state.x2[2] = m_state.x2[1];
178  m_state.x2[1] = m_state.x2[0];
179  m_state.x2[0] = tmp;
180 
181  // Combination
182  return m_state.x1[0] - m_state.x2[0]
183  + (m_state.x1[0] <= m_state.x2[0] ? ROCRAND_MRG31K3P_M1 : 0);
184  }
185 
186 protected:
187  // Advances the internal state to skip \p offset numbers.
188  __forceinline__ __device__ __host__ void discard_impl(unsigned long long offset)
189  {
190  discard_state(offset);
191  }
192 
193  // Advances the internal state to skip \p subsequence subsequences.
194  __forceinline__ __device__ __host__ void
195  discard_subsequence_impl(unsigned long long subsequence)
196  {
197  int i = 0;
198 
199  while(subsequence > 0)
200  {
201  if(subsequence & 1)
202  {
203 #if defined(__HIP_DEVICE_COMPILE__)
204  mod_mat_vec_m1(d_mrg31k3p_A1P72 + i, m_state.x1);
205  mod_mat_vec_m2(d_mrg31k3p_A2P72 + i, m_state.x2);
206 #else
207  mod_mat_vec_m1(h_mrg31k3p_A1P72 + i, m_state.x1);
208  mod_mat_vec_m2(h_mrg31k3p_A2P72 + i, m_state.x2);
209 #endif
210  }
211  subsequence >>= 1;
212  i += 9;
213  }
214  }
215 
216  // Advances the internal state to skip \p sequences.
217  __forceinline__ __device__ __host__ void discard_sequence_impl(unsigned long long sequence)
218  {
219  int i = 0;
220 
221  while(sequence > 0)
222  {
223  if(sequence & 1)
224  {
225 #if defined(__HIP_DEVICE_COMPILE__)
226  mod_mat_vec_m1(d_mrg31k3p_A1P134 + i, m_state.x1);
227  mod_mat_vec_m2(d_mrg31k3p_A2P134 + i, m_state.x2);
228 #else
229  mod_mat_vec_m1(h_mrg31k3p_A1P134 + i, m_state.x1);
230  mod_mat_vec_m2(h_mrg31k3p_A2P134 + i, m_state.x2);
231 #endif
232  }
233  sequence >>= 1;
234  i += 9;
235  }
236  }
237 
238  // Advances the internal state to skip \p offset numbers.
239  __forceinline__ __device__ __host__ void discard_state(unsigned long long offset)
240  {
241  int i = 0;
242 
243  while(offset > 0)
244  {
245  if(offset & 1)
246  {
247 #if defined(__HIP_DEVICE_COMPILE__)
248  mod_mat_vec_m1(d_mrg31k3p_A1 + i, m_state.x1);
249  mod_mat_vec_m2(d_mrg31k3p_A2 + i, m_state.x2);
250 #else
251  mod_mat_vec_m1(h_mrg31k3p_A1 + i, m_state.x1);
252  mod_mat_vec_m2(h_mrg31k3p_A2 + i, m_state.x2);
253 #endif
254  }
255  offset >>= 1;
256  i += 9;
257  }
258  }
259 
260  // Advances the internal state to the next state.
261  __forceinline__ __device__ __host__ void discard_state()
262  {
263  discard_state(1);
264  }
265 
266 private:
267  __forceinline__ __device__ __host__ static void mod_mat_vec_m1(const unsigned int* A,
268  unsigned int* s)
269  {
270  unsigned long long x[3] = {s[0], s[1], s[2]};
271 
272  s[0] = mod_m1(mod_m1(A[0] * x[0]) + mod_m1(A[1] * x[1]) + mod_m1(A[2] * x[2]));
273 
274  s[1] = mod_m1(mod_m1(A[3] * x[0]) + mod_m1(A[4] * x[1]) + mod_m1(A[5] * x[2]));
275 
276  s[2] = mod_m1(mod_m1(A[6] * x[0]) + mod_m1(A[7] * x[1]) + mod_m1(A[8] * x[2]));
277  }
278 
279  __forceinline__ __device__ __host__ static void mod_mat_vec_m2(const unsigned int* A,
280  unsigned int* s)
281  {
282  unsigned long long x[3] = {s[0], s[1], s[2]};
283 
284  s[0] = mod_m2(mod_m2(A[0] * x[0]) + mod_m2(A[1] * x[1]) + mod_m2(A[2] * x[2]));
285 
286  s[1] = mod_m2(mod_m2(A[3] * x[0]) + mod_m2(A[4] * x[1]) + mod_m2(A[5] * x[2]));
287 
288  s[2] = mod_m2(mod_m2(A[6] * x[0]) + mod_m2(A[7] * x[1]) + mod_m2(A[8] * x[2]));
289  }
290 
291  __forceinline__ __device__ __host__ static unsigned long long mod_mul_m1(unsigned int i,
292  unsigned long long j)
293  {
294  return mod_m1(i * j);
295  }
296 
297  __forceinline__ __device__ __host__ static unsigned long long mod_m1(unsigned long long p)
298  {
299  return p % ROCRAND_MRG31K3P_M1;
300  }
301 
302  __forceinline__ __device__ __host__ static unsigned long long mod_mul_m2(unsigned int i,
303  unsigned long long j)
304  {
305  return mod_m2(i * j);
306  }
307 
308  __forceinline__ __device__ __host__ static unsigned long long mod_m2(unsigned long long p)
309  {
310  return p % ROCRAND_MRG31K3P_M2;
311  }
312 
313 protected:
314  // State
315  mrg31k3p_state m_state;
316 
317 #ifndef ROCRAND_DETAIL_BM_NOT_IN_STATE
318  friend struct detail::engine_boxmuller_helper<mrg31k3p_engine>;
319 #endif
320 }; // mrg31k3p_engine class
321 
322 } // end namespace rocrand_device
323 
330 typedef rocrand_device::mrg31k3p_engine rocrand_state_mrg31k3p;
332 
344 __forceinline__ __device__ __host__ void rocrand_init(const unsigned long long seed,
345  const unsigned long long subsequence,
346  const unsigned long long offset,
347  rocrand_state_mrg31k3p* state)
348 {
349  *state = rocrand_state_mrg31k3p(seed, subsequence, offset);
350 }
351 
364 __forceinline__ __device__ __host__ unsigned int rocrand(rocrand_state_mrg31k3p* state)
365 {
366  // next() in [1, ROCRAND_MRG31K3P_M1]
367  return static_cast<unsigned int>((state->next() - 1) * ROCRAND_MRG31K3P_UINT32_NORM);
368 }
369 
378 __forceinline__ __device__ __host__ void skipahead(unsigned long long offset,
379  rocrand_state_mrg31k3p* state)
380 {
381  return state->discard(offset);
382 }
383 
393 __forceinline__ __device__ __host__ void skipahead_subsequence(unsigned long long subsequence,
394  rocrand_state_mrg31k3p* state)
395 {
396  return state->discard_subsequence(subsequence);
397 }
398 
408 __forceinline__ __device__ __host__ void skipahead_sequence(unsigned long long sequence,
409  rocrand_state_mrg31k3p* state)
410 {
411  return state->discard_sequence(sequence);
412 }
413  // end of group rocranddevice
415 
416 #endif // ROCRAND_MRG31K3P_H_
#define ROCRAND_MRG31K3P_DEFAULT_SEED
Default seed for MRG31K3P PRNG.
Definition: rocrand_mrg31k3p.h:44
__forceinline__ __device__ __host__ unsigned int rocrand(rocrand_state_mrg31k3p *state)
Returns uniformly distributed random unsigned int value from [0; 2^32 - 1] range.
Definition: rocrand_mrg31k3p.h:364
__forceinline__ __device__ __host__ void skipahead_subsequence(unsigned long long subsequence, rocrand_state_mrg31k3p *state)
Updates MRG31K3P state to skip ahead by subsequence subsequences.
Definition: rocrand_mrg31k3p.h:393
__forceinline__ __device__ __host__ void skipahead(unsigned long long offset, rocrand_state_mrg31k3p *state)
Updates MRG31K3P state to skip ahead by offset elements.
Definition: rocrand_mrg31k3p.h:378
__forceinline__ __device__ __host__ void skipahead_sequence(unsigned long long sequence, rocrand_state_mrg31k3p *state)
Updates MRG31K3P state to skip ahead by sequence sequences.
Definition: rocrand_mrg31k3p.h:408
__forceinline__ __device__ __host__ void rocrand_init(const unsigned long long seed, const unsigned long long subsequence, const unsigned long long offset, rocrand_state_mrg31k3p *state)
Initializes MRG31K3P state.
Definition: rocrand_mrg31k3p.h:344