/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-rocrand/checkouts/latest/library/include/rocrand/rocrand_philox4x32_10.h Source File

/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-rocrand/checkouts/latest/library/include/rocrand/rocrand_philox4x32_10.h Source File#

API library: /home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-rocrand/checkouts/latest/library/include/rocrand/rocrand_philox4x32_10.h Source File
API library
rocrand_philox4x32_10.h
1 // Copyright (c) 2017-2023 Advanced Micro Devices, Inc. All rights reserved.
2 //
3 // Permission is hereby granted, free of charge, to any person obtaining a copy
4 // of this software and associated documentation files (the "Software"), to deal
5 // in the Software without restriction, including without limitation the rights
6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 // copies of the Software, and to permit persons to whom the Software is
8 // furnished to do so, subject to the following conditions:
9 //
10 // The above copyright notice and this permission notice shall be included in
11 // all copies or substantial portions of the Software.
12 //
13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 // THE SOFTWARE.
20 
21 /*
22 Copyright 2010-2011, D. E. Shaw Research.
23 All rights reserved.
24 
25 Redistribution and use in source and binary forms, with or without
26 modification, are permitted provided that the following conditions are
27 met:
28 
29 * Redistributions of source code must retain the above copyright
30  notice, this list of conditions, and the following disclaimer.
31 
32 * Redistributions in binary form must reproduce the above copyright
33  notice, this list of conditions, and the following disclaimer in the
34  documentation and/or other materials provided with the distribution.
35 
36 * Neither the name of D. E. Shaw Research nor the names of its
37  contributors may be used to endorse or promote products derived from
38  this software without specific prior written permission.
39 
40 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
41 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
42 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
43 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
44 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
45 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
46 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
47 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
48 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
49 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
50 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
51 */
52 
53 #ifndef ROCRAND_PHILOX4X32_10_H_
54 #define ROCRAND_PHILOX4X32_10_H_
55 
56 #ifndef FQUALIFIERS
57 #define FQUALIFIERS __forceinline__ __device__
58 #endif // FQUALIFIERS_
59 
60 #include "rocrand/rocrand_common.h"
61 
62 // Constants from Random123
63 // See https://www.deshawresearch.com/resources_random123.html
64 #define ROCRAND_PHILOX_M4x32_0 0xD2511F53U
65 #define ROCRAND_PHILOX_M4x32_1 0xCD9E8D57U
66 #define ROCRAND_PHILOX_W32_0 0x9E3779B9U
67 #define ROCRAND_PHILOX_W32_1 0xBB67AE85U
68 
77 #define ROCRAND_PHILOX4x32_DEFAULT_SEED 0xdeadbeefdeadbeefULL // end of group rocranddevice
79 
80 namespace rocrand_device {
81 namespace detail {
82 
84 unsigned int mulhilo32(unsigned int x, unsigned int y, unsigned int& z)
85 {
86  unsigned long long xy = mad_u64_u32(x, y, 0);
87  z = static_cast<unsigned int>(xy >> 32);
88  return static_cast<unsigned int>(xy);
89 }
90 
91 } // end detail namespace
92 
93 class philox4x32_10_engine
94 {
95 public:
96  struct philox4x32_10_state
97  {
98  uint4 counter;
99  uint4 result;
100  uint2 key;
101  unsigned int substate;
102 
103  #ifndef ROCRAND_DETAIL_PHILOX_BM_NOT_IN_STATE
104  // The Box–Muller transform requires two inputs to convert uniformly
105  // distributed real values [0; 1] to normally distributed real values
106  // (with mean = 0, and stddev = 1). Often user wants only one
107  // normally distributed number, to save performance and random
108  // numbers the 2nd value is saved for future requests.
109  unsigned int boxmuller_float_state; // is there a float in boxmuller_float
110  unsigned int boxmuller_double_state; // is there a double in boxmuller_double
111  float boxmuller_float; // normally distributed float
112  double boxmuller_double; // normally distributed double
113  #endif
114  };
115 
117  philox4x32_10_engine()
118  {
119  this->seed(ROCRAND_PHILOX4x32_DEFAULT_SEED, 0, 0);
120  }
121 
128  philox4x32_10_engine(const unsigned long long seed,
129  const unsigned long long subsequence,
130  const unsigned long long offset)
131  {
132  this->seed(seed, subsequence, offset);
133  }
134 
141  void seed(unsigned long long seed_value,
142  const unsigned long long subsequence,
143  const unsigned long long offset)
144  {
145  m_state.key.x = static_cast<unsigned int>(seed_value);
146  m_state.key.y = static_cast<unsigned int>(seed_value >> 32);
147  this->restart(subsequence, offset);
148  }
149 
152  void discard(unsigned long long offset)
153  {
154  this->discard_impl(offset);
155  this->m_state.result = this->ten_rounds(m_state.counter, m_state.key);
156  }
157 
163  void discard_subsequence(unsigned long long subsequence)
164  {
165  this->discard_subsequence_impl(subsequence);
166  m_state.result = this->ten_rounds(m_state.counter, m_state.key);
167  }
168 
170  void restart(const unsigned long long subsequence,
171  const unsigned long long offset)
172  {
173  m_state.counter = {0, 0, 0, 0};
174  m_state.result = {0, 0, 0, 0};
175  m_state.substate = 0;
176  #ifndef ROCRAND_DETAIL_PHILOX_BM_NOT_IN_STATE
177  m_state.boxmuller_float_state = 0;
178  m_state.boxmuller_double_state = 0;
179  #endif
180  this->discard_subsequence_impl(subsequence);
181  this->discard_impl(offset);
182  m_state.result = this->ten_rounds(m_state.counter, m_state.key);
183  }
184 
186  unsigned int operator()()
187  {
188  return this->next();
189  }
190 
192  unsigned int next()
193  {
194  #if defined(__HIP_PLATFORM_AMD__)
195  unsigned int ret = m_state.result.data[m_state.substate];
196  #else
197  unsigned int ret = (&m_state.result.x)[m_state.substate];
198  #endif
199  m_state.substate++;
200  if(m_state.substate == 4)
201  {
202  m_state.substate = 0;
203  this->discard_state();
204  m_state.result = this->ten_rounds(m_state.counter, m_state.key);
205  }
206  return ret;
207  }
208 
210  uint4 next4()
211  {
212  uint4 ret = m_state.result;
213  this->discard_state();
214  m_state.result = this->ten_rounds(m_state.counter, m_state.key);
215  return this->interleave(ret, m_state.result);
216  }
217 
218 protected:
219  // Advances the internal state to skip \p offset numbers.
220  // DOES NOT CALCULATE NEW 4 UINTs (m_state.result)
222  void discard_impl(unsigned long long offset)
223  {
224  // Adjust offset for subset
225  m_state.substate += offset & 3;
226  unsigned long long counter_offset = offset / 4;
227  counter_offset += m_state.substate < 4 ? 0 : 1;
228  m_state.substate += m_state.substate < 4 ? 0 : -4;
229  // Discard states
230  this->discard_state(counter_offset);
231  }
232 
233  // DOES NOT CALCULATE NEW 4 UINTs (m_state.result)
235  void discard_subsequence_impl(unsigned long long subsequence)
236  {
237  unsigned int lo = static_cast<unsigned int>(subsequence);
238  unsigned int hi = static_cast<unsigned int>(subsequence >> 32);
239 
240  unsigned int temp = m_state.counter.z;
241  m_state.counter.z += lo;
242  m_state.counter.w += hi + (m_state.counter.z < temp ? 1 : 0);
243  }
244 
245  // Advances the internal state by offset times.
246  // DOES NOT CALCULATE NEW 4 UINTs (m_state.result)
248  void discard_state(unsigned long long offset)
249  {
250  unsigned int lo = static_cast<unsigned int>(offset);
251  unsigned int hi = static_cast<unsigned int>(offset >> 32);
252 
253  uint4 temp = m_state.counter;
254  m_state.counter.x += lo;
255  m_state.counter.y += hi + (m_state.counter.x < temp.x ? 1 : 0);
256  m_state.counter.z += (m_state.counter.y < temp.y ? 1 : 0);
257  m_state.counter.w += (m_state.counter.z < temp.z ? 1 : 0);
258  }
259 
260  // Advances the internal state to the next state
261  // DOES NOT CALCULATE NEW 4 UINTs (m_state.result)
263  void discard_state()
264  {
265  m_state.counter = this->bump_counter(m_state.counter);
266  }
267 
269  static uint4 bump_counter(uint4 counter)
270  {
271  counter.x++;
272  unsigned int add = counter.x == 0 ? 1 : 0;
273  counter.y += add; add = counter.y == 0 ? add : 0;
274  counter.z += add; add = counter.z == 0 ? add : 0;
275  counter.w += add;
276  return counter;
277  }
278 
280  uint4 interleave(const uint4 prev, const uint4 next) const
281  {
282  switch(m_state.substate)
283  {
284  case 0:
285  return prev;
286  case 1:
287  return uint4{ prev.y, prev.z, prev.w, next.x };
288  case 2:
289  return uint4{ prev.z, prev.w, next.x, next.y };
290  case 3:
291  return uint4{ prev.w, next.x, next.y, next.z };
292  }
293  __builtin_unreachable();
294  }
295 
296  // 10 Philox4x32 rounds
298  uint4 ten_rounds(uint4 counter, uint2 key)
299  {
300  counter = this->single_round(counter, key); key = this->bumpkey(key); // 1
301  counter = this->single_round(counter, key); key = this->bumpkey(key); // 2
302  counter = this->single_round(counter, key); key = this->bumpkey(key); // 3
303  counter = this->single_round(counter, key); key = this->bumpkey(key); // 4
304  counter = this->single_round(counter, key); key = this->bumpkey(key); // 5
305  counter = this->single_round(counter, key); key = this->bumpkey(key); // 6
306  counter = this->single_round(counter, key); key = this->bumpkey(key); // 7
307  counter = this->single_round(counter, key); key = this->bumpkey(key); // 8
308  counter = this->single_round(counter, key); key = this->bumpkey(key); // 9
309  return this->single_round(counter, key); // 10
310  }
311 
312 private:
313  // Single Philox4x32 round
315  static uint4 single_round(uint4 counter, uint2 key)
316  {
317  // Source: Random123
318  unsigned int hi0;
319  unsigned int hi1;
320  unsigned int lo0 = detail::mulhilo32(ROCRAND_PHILOX_M4x32_0, counter.x, hi0);
321  unsigned int lo1 = detail::mulhilo32(ROCRAND_PHILOX_M4x32_1, counter.z, hi1);
322  return uint4 {
323  hi1 ^ counter.y ^ key.x,
324  lo1,
325  hi0 ^ counter.w ^ key.y,
326  lo0
327  };
328  }
329 
331  static uint2 bumpkey(uint2 key)
332  {
333  key.x += ROCRAND_PHILOX_W32_0;
334  key.y += ROCRAND_PHILOX_W32_1;
335  return key;
336  }
337 
338 protected:
339  // State
340  philox4x32_10_state m_state;
341 
342  #ifndef ROCRAND_DETAIL_PHILOX_BM_NOT_IN_STATE
343  friend struct detail::engine_boxmuller_helper<philox4x32_10_engine>;
344  #endif
345 
346 }; // philox4x32_10_engine class
347 
348 } // end namespace rocrand_device
349 
356 typedef rocrand_device::philox4x32_10_engine rocrand_state_philox4x32_10;
358 
371 void rocrand_init(const unsigned long long seed,
372  const unsigned long long subsequence,
373  const unsigned long long offset,
374  rocrand_state_philox4x32_10 * state)
375 {
376  *state = rocrand_state_philox4x32_10(seed, subsequence, offset);
377 }
378 
392 unsigned int rocrand(rocrand_state_philox4x32_10 * state)
393 {
394  return state->next();
395 }
396 
410 uint4 rocrand4(rocrand_state_philox4x32_10 * state)
411 {
412  return state->next4();
413 }
414 
424 void skipahead(unsigned long long offset, rocrand_state_philox4x32_10 * state)
425 {
426  return state->discard(offset);
427 }
428 
439 void skipahead_subsequence(unsigned long long subsequence, rocrand_state_philox4x32_10 * state)
440 {
441  return state->discard_subsequence(subsequence);
442 }
443 
454  void skipahead_sequence(unsigned long long sequence, rocrand_state_philox4x32_10 * state)
455  {
456  return state->discard_subsequence(sequence);
457  }
458 
459 #endif // ROCRAND_PHILOX4X32_10_H_
460  // end of group rocranddevice
FQUALIFIERS unsigned int rocrand(rocrand_state_philox4x32_10 *state)
Returns uniformly distributed random unsigned int value from [0; 2^32 - 1] range.
Definition: rocrand_philox4x32_10.h:392
#define ROCRAND_PHILOX4x32_DEFAULT_SEED
Default seed for PHILOX4x32 PRNG.
Definition: rocrand_philox4x32_10.h:77
FQUALIFIERS void skipahead_subsequence(unsigned long long subsequence, rocrand_state_philox4x32_10 *state)
Updates Philox state to skip ahead by subsequence subsequences.
Definition: rocrand_philox4x32_10.h:439
FQUALIFIERS void skipahead_sequence(unsigned long long sequence, rocrand_state_philox4x32_10 *state)
Updates Philox state to skip ahead by sequence sequences.
Definition: rocrand_philox4x32_10.h:454
FQUALIFIERS uint4 rocrand4(rocrand_state_philox4x32_10 *state)
Returns four uniformly distributed random unsigned int values from [0; 2^32 - 1] range.
Definition: rocrand_philox4x32_10.h:410
FQUALIFIERS void rocrand_init(const unsigned long long seed, const unsigned long long subsequence, const unsigned long long offset, rocrand_state_philox4x32_10 *state)
Initializes Philox state.
Definition: rocrand_philox4x32_10.h:371
FQUALIFIERS void skipahead(unsigned long long offset, rocrand_state_philox4x32_10 *state)
Updates Philox state to skip ahead by offset elements.
Definition: rocrand_philox4x32_10.h:424
#define FQUALIFIERS
Shorthand for commonly used function qualifiers.
Definition: rocrand_uniform.h:31