/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-rocrand/checkouts/docs-6.1.2/library/include/rocrand/rocrand_xorwow.h Source File

/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-rocrand/checkouts/docs-6.1.2/library/include/rocrand/rocrand_xorwow.h Source File#

API library: /home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-rocrand/checkouts/docs-6.1.2/library/include/rocrand/rocrand_xorwow.h Source File
API library
rocrand_xorwow.h
1 // Copyright (c) 2017-2022 Advanced Micro Devices, Inc. All rights reserved.
2 //
3 // Permission is hereby granted, free of charge, to any person obtaining a copy
4 // of this software and associated documentation files (the "Software"), to deal
5 // in the Software without restriction, including without limitation the rights
6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 // copies of the Software, and to permit persons to whom the Software is
8 // furnished to do so, subject to the following conditions:
9 //
10 // The above copyright notice and this permission notice shall be included in
11 // all copies or substantial portions of the Software.
12 //
13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 // THE SOFTWARE.
20 
21 #ifndef ROCRAND_XORWOW_H_
22 #define ROCRAND_XORWOW_H_
23 
24 #ifndef FQUALIFIERS
25 #define FQUALIFIERS __forceinline__ __device__
26 #endif // FQUALIFIERS_
27 
28 #include "rocrand/rocrand_common.h"
29 #include "rocrand/rocrand_xorwow_precomputed.h"
30 
39  #define ROCRAND_XORWOW_DEFAULT_SEED 0ULL // end of group rocranddevice
41 
42 namespace rocrand_device {
43 namespace detail {
44 
46 void copy_vec(unsigned int * dst, const unsigned int * src)
47 {
48  for (int i = 0; i < XORWOW_N; i++)
49  {
50  dst[i] = src[i];
51  }
52 }
53 
55 void mul_mat_vec_inplace(const unsigned int * m, unsigned int * v)
56 {
57  unsigned int r[XORWOW_N] = { 0 };
58  for (int ij = 0; ij < XORWOW_N * XORWOW_M; ij++)
59  {
60  const int i = ij / XORWOW_M;
61  const int j = ij % XORWOW_M;
62  const unsigned int b = (v[i] & (1U << j)) ? 0xffffffff : 0x0;
63  for (int k = 0; k < XORWOW_N; k++)
64  {
65  r[k] ^= b & m[i * XORWOW_M * XORWOW_N + j * XORWOW_N + k];
66  }
67  }
68  copy_vec(v, r);
69 }
70 
71 } // end detail namespace
72 
73 class xorwow_engine
74 {
75 public:
76  struct xorwow_state
77  {
78  // Weyl sequence value
79  unsigned int d;
80 
81  #ifndef ROCRAND_DETAIL_XORWOW_BM_NOT_IN_STATE
82  // The Box–Muller transform requires two inputs to convert uniformly
83  // distributed real values [0; 1] to normally distributed real values
84  // (with mean = 0, and stddev = 1). Often user wants only one
85  // normally distributed number, to save performance and random
86  // numbers the 2nd value is saved for future requests.
87  unsigned int boxmuller_float_state; // is there a float in boxmuller_float
88  unsigned int boxmuller_double_state; // is there a double in boxmuller_double
89  float boxmuller_float; // normally distributed float
90  double boxmuller_double; // normally distributed double
91  #endif
92 
93  // Xorshift values (160 bits)
94  unsigned int x[5];
95  };
96 
98  xorwow_engine() : xorwow_engine(ROCRAND_XORWOW_DEFAULT_SEED, 0, 0) { }
99 
106  xorwow_engine(const unsigned long long seed,
107  const unsigned long long subsequence,
108  const unsigned long long offset)
109  {
110  m_state.x[0] = 123456789U;
111  m_state.x[1] = 362436069U;
112  m_state.x[2] = 521288629U;
113  m_state.x[3] = 88675123U;
114  m_state.x[4] = 5783321U;
115 
116  m_state.d = 6615241U;
117 
118  // Constants are arbitrary prime numbers
119  const unsigned int s0 = static_cast<unsigned int>(seed) ^ 0x2c7f967fU;
120  const unsigned int s1 = static_cast<unsigned int>(seed >> 32) ^ 0xa03697cbU;
121  const unsigned int t0 = 1228688033U * s0;
122  const unsigned int t1 = 2073658381U * s1;
123  m_state.x[0] += t0;
124  m_state.x[1] ^= t0;
125  m_state.x[2] += t1;
126  m_state.x[3] ^= t1;
127  m_state.x[4] += t0;
128  m_state.d += t1 + t0;
129 
130  discard_subsequence(subsequence);
131  discard(offset);
132 
133  #ifndef ROCRAND_DETAIL_XORWOW_BM_NOT_IN_STATE
134  m_state.boxmuller_float_state = 0;
135  m_state.boxmuller_double_state = 0;
136  #endif
137  }
138 
141  void discard(unsigned long long offset)
142  {
143  #ifdef __HIP_DEVICE_COMPILE__
144  jump(offset, d_xorwow_jump_matrices);
145  #else
146  jump(offset, h_xorwow_jump_matrices);
147  #endif
148 
149  // Apply n steps to Weyl sequence value as well
150  m_state.d += static_cast<unsigned int>(offset) * 362437;
151  }
152 
156  void discard_subsequence(unsigned long long subsequence)
157  {
158  // Discard n * 2^67 samples
159  #ifdef __HIP_DEVICE_COMPILE__
160  jump(subsequence, d_xorwow_sequence_jump_matrices);
161  #else
162  jump(subsequence, h_xorwow_sequence_jump_matrices);
163  #endif
164 
165  // d has the same value because 2^67 is divisible by 2^32 (d is 32-bit)
166  }
167 
169  unsigned int operator()()
170  {
171  return next();
172  }
173 
175  unsigned int next()
176  {
177  const unsigned int t = m_state.x[0] ^ (m_state.x[0] >> 2);
178  m_state.x[0] = m_state.x[1];
179  m_state.x[1] = m_state.x[2];
180  m_state.x[2] = m_state.x[3];
181  m_state.x[3] = m_state.x[4];
182  m_state.x[4] = (m_state.x[4] ^ (m_state.x[4] << 4)) ^ (t ^ (t << 1));
183 
184  m_state.d += 362437;
185 
186  return m_state.d + m_state.x[4];
187  }
188 
189 protected:
190 
192  void jump(unsigned long long v,
193  const unsigned int jump_matrices[XORWOW_JUMP_MATRICES][XORWOW_SIZE])
194  {
195  // x~(n + v) = (A^v mod m)x~n mod m
196  // The matrix (A^v mod m) can be precomputed for selected values of v.
197  //
198  // For XORWOW_JUMP_LOG2 = 2
199  // xorwow_jump_matrices contains precomputed matrices:
200  // A^1, A^4, A^16...
201  //
202  // For XORWOW_JUMP_LOG2 = 2 and XORWOW_SEQUENCE_JUMP_LOG2 = 67
203  // xorwow_sequence_jump_matrices contains precomputed matrices:
204  // A^(1 * 2^67), A^(4 * 2^67), A^(16 * 2^67)...
205  //
206  // Intermediate powers can be calculated as multiplication of the powers above.
207 
208  unsigned int mi = 0;
209  while (v > 0)
210  {
211  const unsigned int is = static_cast<unsigned int>(v) & ((1 << XORWOW_JUMP_LOG2) - 1);
212  for (unsigned int i = 0; i < is; i++)
213  {
214  detail::mul_mat_vec_inplace(jump_matrices[mi], m_state.x);
215  }
216  mi++;
217  v >>= XORWOW_JUMP_LOG2;
218  }
219  }
220 
221 protected:
222  // State
223  xorwow_state m_state;
224 
225  #ifndef ROCRAND_DETAIL_XORWOW_BM_NOT_IN_STATE
226  friend struct detail::engine_boxmuller_helper<xorwow_engine>;
227  #endif
228 
229 }; // xorwow_engine class
230 
231 } // end namespace rocrand_device
232 
239 typedef rocrand_device::xorwow_engine rocrand_state_xorwow;
241 
254 void rocrand_init(const unsigned long long seed,
255  const unsigned long long subsequence,
256  const unsigned long long offset,
257  rocrand_state_xorwow * state)
258 {
259  *state = rocrand_state_xorwow(seed, subsequence, offset);
260 }
261 
275 unsigned int rocrand(rocrand_state_xorwow * state)
276 {
277  return state->next();
278 }
279 
289 void skipahead(unsigned long long offset, rocrand_state_xorwow * state)
290 {
291  return state->discard(offset);
292 }
293 
304 void skipahead_subsequence(unsigned long long subsequence, rocrand_state_xorwow * state)
305 {
306  return state->discard_subsequence(subsequence);
307 }
308 
319  void skipahead_sequence(unsigned long long sequence, rocrand_state_xorwow * state)
320  {
321  return state->discard_subsequence(sequence);
322  }
323 
324 #endif // ROCRAND_XORWOW_H_
325  // end of group rocranddevice
FQUALIFIERS void skipahead_subsequence(unsigned long long subsequence, rocrand_state_xorwow *state)
Updates XORWOW state to skip ahead by subsequence subsequences.
Definition: rocrand_xorwow.h:304
FQUALIFIERS unsigned int rocrand(rocrand_state_xorwow *state)
Returns uniformly distributed random unsigned int value from [0; 2^32 - 1] range.
Definition: rocrand_xorwow.h:275
FQUALIFIERS void rocrand_init(const unsigned long long seed, const unsigned long long subsequence, const unsigned long long offset, rocrand_state_xorwow *state)
Initialize XORWOW state.
Definition: rocrand_xorwow.h:254
FQUALIFIERS void skipahead(unsigned long long offset, rocrand_state_xorwow *state)
Updates XORWOW state to skip ahead by offset elements.
Definition: rocrand_xorwow.h:289
#define FQUALIFIERS
Shorthand for commonly used function qualifiers.
Definition: rocrand_uniform.h:31
#define ROCRAND_XORWOW_DEFAULT_SEED
Default seed for XORWOW PRNG.
Definition: rocrand_xorwow.h:39
FQUALIFIERS void skipahead_sequence(unsigned long long sequence, rocrand_state_xorwow *state)
Updates XORWOW state to skip ahead by sequence sequences.
Definition: rocrand_xorwow.h:319