/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-rocrand/checkouts/docs-6.1.2/library/include/rocrand/rocrand_mrg31k3p.h Source File

/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-rocrand/checkouts/docs-6.1.2/library/include/rocrand/rocrand_mrg31k3p.h Source File#

API library: /home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-rocrand/checkouts/docs-6.1.2/library/include/rocrand/rocrand_mrg31k3p.h Source File
API library
rocrand_mrg31k3p.h
1 // Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
2 //
3 // Permission is hereby granted, free of charge, to any person obtaining a copy
4 // of this software and associated documentation files (the "Software"), to deal
5 // in the Software without restriction, including without limitation the rights
6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 // copies of the Software, and to permit persons to whom the Software is
8 // furnished to do so, subject to the following conditions:
9 //
10 // The above copyright notice and this permission notice shall be included in
11 // all copies or substantial portions of the Software.
12 //
13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 // THE SOFTWARE.
20 
21 #ifndef ROCRAND_MRG31K3P_H_
22 #define ROCRAND_MRG31K3P_H_
23 
24 #ifndef FQUALIFIERS
25  #define FQUALIFIERS __forceinline__ __device__
26 #endif // FQUALIFIERS_
27 
28 #include "rocrand/rocrand_common.h"
29 #include "rocrand/rocrand_mrg31k3p_precomputed.h"
30 
31 #define ROCRAND_MRG31K3P_M1 2147483647U // 2 ^ 31 - 1
32 #define ROCRAND_MRG31K3P_M2 2147462579U // 2 ^ 31 - 21069
33 #define ROCRAND_MRG31K3P_MASK12 511U // 2 ^ 9 - 1
34 #define ROCRAND_MRG31K3P_MASK13 16777215U // 2 ^ 24 - 1
35 #define ROCRAND_MRG31K3P_MASK21 65535U // 2 ^ 16 - 1
36 #define ROCRAND_MRG31K3P_NORM_DOUBLE (4.656612875245796923e-10) // 1 / ROCRAND_MRG31K3P_M1
37 #define ROCRAND_MRG31K3P_UINT32_NORM \
38  (2.000000001396983862) // UINT32_MAX / (ROCRAND_MRG31K3P_M1 - 1)
39 
48 #define ROCRAND_MRG31K3P_DEFAULT_SEED 12345ULL // end of group rocranddevice
50 
51 namespace rocrand_device
52 {
53 
54 class mrg31k3p_engine
55 {
56 public:
57  struct mrg31k3p_state
58  {
59  unsigned int x1[3];
60  unsigned int x2[3];
61 
62 #ifndef ROCRAND_DETAIL_MRG31K3P_BM_NOT_IN_STATE
63  // The Box–Muller transform requires two inputs to convert uniformly
64  // distributed real values [0; 1] to normally distributed real values
65  // (with mean = 0, and stddev = 1). Often user wants only one
66  // normally distributed number, to save performance and random
67  // numbers the 2nd value is saved for future requests.
68  unsigned int boxmuller_float_state; // is there a float in boxmuller_float
69  unsigned int boxmuller_double_state; // is there a double in boxmuller_double
70  float boxmuller_float; // normally distributed float
71  double boxmuller_double; // normally distributed double
72 #endif
73  };
74 
75  FQUALIFIERS mrg31k3p_engine()
76  {
77  this->seed(ROCRAND_MRG31K3P_DEFAULT_SEED, 0, 0);
78  }
79 
88  FQUALIFIERS mrg31k3p_engine(const unsigned long long seed,
89  const unsigned long long subsequence,
90  const unsigned long long offset)
91  {
92  this->seed(seed, subsequence, offset);
93  }
94 
103  FQUALIFIERS void seed(unsigned long long seed_value,
104  const unsigned long long subsequence,
105  const unsigned long long offset)
106  {
107  if(seed_value == 0)
108  {
109  seed_value = ROCRAND_MRG31K3P_DEFAULT_SEED;
110  }
111  unsigned int x = static_cast<unsigned int>(seed_value ^ 0x55555555U);
112  unsigned int y = static_cast<unsigned int>((seed_value >> 32) ^ 0xAAAAAAAAU);
113  m_state.x1[0] = mod_mul_m1(x, seed_value);
114  m_state.x1[1] = mod_mul_m1(y, seed_value);
115  m_state.x1[2] = mod_mul_m1(x, seed_value);
116  m_state.x2[0] = mod_mul_m2(y, seed_value);
117  m_state.x2[1] = mod_mul_m2(x, seed_value);
118  m_state.x2[2] = mod_mul_m2(y, seed_value);
119  this->restart(subsequence, offset);
120  }
121 
123  FQUALIFIERS void discard(unsigned long long offset)
124  {
125  this->discard_impl(offset);
126  }
127 
130  FQUALIFIERS void discard_subsequence(unsigned long long subsequence)
131  {
132  this->discard_subsequence_impl(subsequence);
133  }
134 
137  FQUALIFIERS void discard_sequence(unsigned long long sequence)
138  {
139  this->discard_sequence_impl(sequence);
140  }
141 
142  FQUALIFIERS void restart(const unsigned long long subsequence, const unsigned long long offset)
143  {
144 #ifndef ROCRAND_DETAIL_MRG31K3P_BM_NOT_IN_STATE
145  m_state.boxmuller_float_state = 0;
146  m_state.boxmuller_double_state = 0;
147 #endif
148  this->discard_subsequence_impl(subsequence);
149  this->discard_impl(offset);
150  }
151 
152  FQUALIFIERS unsigned int operator()()
153  {
154  return this->next();
155  }
156 
157  // Returned value is in range [1, ROCRAND_MRG31K3P_M1].
158  FQUALIFIERS unsigned int next()
159  {
160  // First component
161  unsigned int tmp
162  = (((m_state.x1[1] & ROCRAND_MRG31K3P_MASK12) << 22) + (m_state.x1[1] >> 9))
163  + (((m_state.x1[2] & ROCRAND_MRG31K3P_MASK13) << 7) + (m_state.x1[2] >> 24));
164  tmp -= (tmp >= ROCRAND_MRG31K3P_M1) ? ROCRAND_MRG31K3P_M1 : 0;
165  tmp += m_state.x1[2];
166  tmp -= (tmp >= ROCRAND_MRG31K3P_M1) ? ROCRAND_MRG31K3P_M1 : 0;
167  m_state.x1[2] = m_state.x1[1];
168  m_state.x1[1] = m_state.x1[0];
169  m_state.x1[0] = tmp;
170 
171  // Second component
172  tmp = (((m_state.x2[0] & ROCRAND_MRG31K3P_MASK21) << 15) + 21069 * (m_state.x2[0] >> 16));
173  tmp -= (tmp >= ROCRAND_MRG31K3P_M2) ? ROCRAND_MRG31K3P_M2 : 0;
174  tmp += ((m_state.x2[2] & ROCRAND_MRG31K3P_MASK21) << 15);
175  tmp -= (tmp >= ROCRAND_MRG31K3P_M2) ? ROCRAND_MRG31K3P_M2 : 0;
176  tmp += 21069 * (m_state.x2[2] >> 16);
177  tmp -= (tmp >= ROCRAND_MRG31K3P_M2) ? ROCRAND_MRG31K3P_M2 : 0;
178  tmp += m_state.x2[2];
179  tmp -= (tmp >= ROCRAND_MRG31K3P_M2) ? ROCRAND_MRG31K3P_M2 : 0;
180  m_state.x2[2] = m_state.x2[1];
181  m_state.x2[1] = m_state.x2[0];
182  m_state.x2[0] = tmp;
183 
184  // Combination
185  return m_state.x1[0] - m_state.x2[0]
186  + (m_state.x1[0] <= m_state.x2[0] ? ROCRAND_MRG31K3P_M1 : 0);
187  }
188 
189 protected:
190  // Advances the internal state to skip \p offset numbers.
191  FQUALIFIERS void discard_impl(unsigned long long offset)
192  {
193  discard_state(offset);
194  }
195 
196  // Advances the internal state to skip \p subsequence subsequences.
197  FQUALIFIERS void discard_subsequence_impl(unsigned long long subsequence)
198  {
199  int i = 0;
200 
201  while(subsequence > 0)
202  {
203  if(subsequence & 1)
204  {
205 #if defined(__HIP_DEVICE_COMPILE__)
206  mod_mat_vec_m1(d_mrg31k3p_A1P72 + i, m_state.x1);
207  mod_mat_vec_m2(d_mrg31k3p_A2P72 + i, m_state.x2);
208 #else
209  mod_mat_vec_m1(h_mrg31k3p_A1P72 + i, m_state.x1);
210  mod_mat_vec_m2(h_mrg31k3p_A2P72 + i, m_state.x2);
211 #endif
212  }
213  subsequence >>= 1;
214  i += 9;
215  }
216  }
217 
218  // Advances the internal state to skip \p sequences.
219  FQUALIFIERS void discard_sequence_impl(unsigned long long sequence)
220  {
221  int i = 0;
222 
223  while(sequence > 0)
224  {
225  if(sequence & 1)
226  {
227 #if defined(__HIP_DEVICE_COMPILE__)
228  mod_mat_vec_m1(d_mrg31k3p_A1P134 + i, m_state.x1);
229  mod_mat_vec_m2(d_mrg31k3p_A2P134 + i, m_state.x2);
230 #else
231  mod_mat_vec_m1(h_mrg31k3p_A1P134 + i, m_state.x1);
232  mod_mat_vec_m2(h_mrg31k3p_A2P134 + i, m_state.x2);
233 #endif
234  }
235  sequence >>= 1;
236  i += 9;
237  }
238  }
239 
240  // Advances the internal state to skip \p offset numbers.
241  FQUALIFIERS void discard_state(unsigned long long offset)
242  {
243  int i = 0;
244 
245  while(offset > 0)
246  {
247  if(offset & 1)
248  {
249 #if defined(__HIP_DEVICE_COMPILE__)
250  mod_mat_vec_m1(d_mrg31k3p_A1 + i, m_state.x1);
251  mod_mat_vec_m2(d_mrg31k3p_A2 + i, m_state.x2);
252 #else
253  mod_mat_vec_m1(h_mrg31k3p_A1 + i, m_state.x1);
254  mod_mat_vec_m2(h_mrg31k3p_A2 + i, m_state.x2);
255 #endif
256  }
257  offset >>= 1;
258  i += 9;
259  }
260  }
261 
262  // Advances the internal state to the next state.
263  FQUALIFIERS void discard_state()
264  {
265  discard_state(1);
266  }
267 
268 private:
269  FQUALIFIERS static void mod_mat_vec_m1(const unsigned int* A, unsigned int* s)
270  {
271  unsigned long long x[3] = {s[0], s[1], s[2]};
272 
273  s[0] = mod_m1(mod_m1(A[0] * x[0]) + mod_m1(A[1] * x[1]) + mod_m1(A[2] * x[2]));
274 
275  s[1] = mod_m1(mod_m1(A[3] * x[0]) + mod_m1(A[4] * x[1]) + mod_m1(A[5] * x[2]));
276 
277  s[2] = mod_m1(mod_m1(A[6] * x[0]) + mod_m1(A[7] * x[1]) + mod_m1(A[8] * x[2]));
278  }
279 
280  FQUALIFIERS static void mod_mat_vec_m2(const unsigned int* A, unsigned int* s)
281  {
282  unsigned long long x[3] = {s[0], s[1], s[2]};
283 
284  s[0] = mod_m2(mod_m2(A[0] * x[0]) + mod_m2(A[1] * x[1]) + mod_m2(A[2] * x[2]));
285 
286  s[1] = mod_m2(mod_m2(A[3] * x[0]) + mod_m2(A[4] * x[1]) + mod_m2(A[5] * x[2]));
287 
288  s[2] = mod_m2(mod_m2(A[6] * x[0]) + mod_m2(A[7] * x[1]) + mod_m2(A[8] * x[2]));
289  }
290 
291  FQUALIFIERS static unsigned long long mod_mul_m1(unsigned int i, unsigned long long j)
292  {
293  return mod_m1(i * j);
294  }
295 
296  FQUALIFIERS static unsigned long long mod_m1(unsigned long long p)
297  {
298  return p % ROCRAND_MRG31K3P_M1;
299  }
300 
301  FQUALIFIERS static unsigned long long mod_mul_m2(unsigned int i, unsigned long long j)
302  {
303  return mod_m2(i * j);
304  }
305 
306  FQUALIFIERS static unsigned long long mod_m2(unsigned long long p)
307  {
308  return p % ROCRAND_MRG31K3P_M2;
309  }
310 
311 protected:
312  // State
313  mrg31k3p_state m_state;
314 
315 #ifndef ROCRAND_DETAIL_MRG31K3P_BM_NOT_IN_STATE
316  friend struct detail::engine_boxmuller_helper<mrg31k3p_engine>;
317 #endif
318 }; // mrg31k3p_engine class
319 
320 } // end namespace rocrand_device
321 
328 typedef rocrand_device::mrg31k3p_engine rocrand_state_mrg31k3p;
330 
342 FQUALIFIERS void rocrand_init(const unsigned long long seed,
343  const unsigned long long subsequence,
344  const unsigned long long offset,
345  rocrand_state_mrg31k3p* state)
346 {
347  *state = rocrand_state_mrg31k3p(seed, subsequence, offset);
348 }
349 
362 FQUALIFIERS unsigned int rocrand(rocrand_state_mrg31k3p* state)
363 {
364  // next() in [1, ROCRAND_MRG31K3P_M1]
365  return static_cast<unsigned int>((state->next() - 1) * ROCRAND_MRG31K3P_UINT32_NORM);
366 }
367 
376 FQUALIFIERS void skipahead(unsigned long long offset, rocrand_state_mrg31k3p* state)
377 {
378  return state->discard(offset);
379 }
380 
390 FQUALIFIERS void skipahead_subsequence(unsigned long long subsequence,
391  rocrand_state_mrg31k3p* state)
392 {
393  return state->discard_subsequence(subsequence);
394 }
395 
405 FQUALIFIERS void skipahead_sequence(unsigned long long sequence, rocrand_state_mrg31k3p* state)
406 {
407  return state->discard_sequence(sequence);
408 }
409  // end of group rocranddevice
411 
412 #endif // ROCRAND_MRG31K3P_H_
FQUALIFIERS unsigned int rocrand(rocrand_state_mrg31k3p *state)
Returns uniformly distributed random unsigned int value from [0; 2^32 - 1] range.
Definition: rocrand_mrg31k3p.h:362
#define ROCRAND_MRG31K3P_DEFAULT_SEED
Default seed for MRG31K3P PRNG.
Definition: rocrand_mrg31k3p.h:48
FQUALIFIERS void rocrand_init(const unsigned long long seed, const unsigned long long subsequence, const unsigned long long offset, rocrand_state_mrg31k3p *state)
Initializes MRG31K3P state.
Definition: rocrand_mrg31k3p.h:342
FQUALIFIERS void skipahead(unsigned long long offset, rocrand_state_mrg31k3p *state)
Updates MRG31K3P state to skip ahead by offset elements.
Definition: rocrand_mrg31k3p.h:376
FQUALIFIERS void skipahead_sequence(unsigned long long sequence, rocrand_state_mrg31k3p *state)
Updates MRG31K3P state to skip ahead by sequence sequences.
Definition: rocrand_mrg31k3p.h:405
FQUALIFIERS void skipahead_subsequence(unsigned long long subsequence, rocrand_state_mrg31k3p *state)
Updates MRG31K3P state to skip ahead by subsequence subsequences.
Definition: rocrand_mrg31k3p.h:390
#define FQUALIFIERS
Shorthand for commonly used function qualifiers.
Definition: rocrand_uniform.h:31