/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-rocrand/checkouts/latest/library/include/rocrand/rocrand_threefry4_impl.h Source File

/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-rocrand/checkouts/latest/library/include/rocrand/rocrand_threefry4_impl.h Source File#

API library: /home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-rocrand/checkouts/latest/library/include/rocrand/rocrand_threefry4_impl.h Source File
API library
rocrand_threefry4_impl.h
1 // Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
2 //
3 // Permission is hereby granted, free of charge, to any person obtaining a copy
4 // of this software and associated documentation files (the "Software"), to deal
5 // in the Software without restriction, including without limitation the rights
6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 // copies of the Software, and to permit persons to whom the Software is
8 // furnished to do so, subject to the following conditions:
9 //
10 // The above copyright notice and this permission notice shall be included in
11 // all copies or substantial portions of the Software.
12 //
13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 // THE SOFTWARE.
20 
21 /*
22 Copyright 2010-2011, D. E. Shaw Research.
23 All rights reserved.
24 
25 Redistribution and use in source and binary forms, with or without
26 modification, are permitted provided that the following conditions are
27 met:
28 
29 * Redistributions of source code must retain the above copyright
30  notice, this list of conditions, and the following disclaimer.
31 
32 * Redistributions in binary form must reproduce the above copyright
33  notice, this list of conditions, and the following disclaimer in the
34  documentation and/or other materials provided with the distribution.
35 
36 * Neither the name of D. E. Shaw Research nor the names of its
37  contributors may be used to endorse or promote products derived from
38  this software without specific prior written permission.
39 
40 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
41 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
42 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
43 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
44 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
45 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
46 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
47 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
48 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
49 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
50 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
51 */
52 
53 #ifndef ROCRAND_THREEFRY4_IMPL_H_
54 #define ROCRAND_THREEFRY4_IMPL_H_
55 
56 #ifndef FQUALIFIERS
57  #define FQUALIFIERS __forceinline__ __device__
58 #endif // FQUALIFIERS
59 
60 #include "rocrand/rocrand_threefry_common.h"
61 #include <rocrand/rocrand_common.h>
62 
63 #ifndef THREEFRY4x32_DEFAULT_ROUNDS
64  #define THREEFRY4x32_DEFAULT_ROUNDS 20
65 #endif
66 
67 #ifndef THREEFRY4x64_DEFAULT_ROUNDS
68  #define THREEFRY4x64_DEFAULT_ROUNDS 20
69 #endif
70 
71 /* These are the R_256 constants from the Threefish reference sources
72  with names changed to R_64x4... */
73 static constexpr __device__ int THREEFRY_ROTATION_64_4[8][2] = {
74  {14, 16},
75  {52, 57},
76  {23, 40},
77  { 5, 37},
78  {25, 33},
79  {46, 12},
80  {58, 22},
81  {32, 32}
82 };
83 
84 /* Output from skein_rot_search: (srs-B128-X5000.out)
85 // Random seed = 1. BlockSize = 64 bits. sampleCnt = 1024. rounds = 8, minHW_or=28
86 // Start: Mon Aug 24 22:41:36 2009
87 // ...
88 // rMin = 0.472. #0A4B[*33] [CRC=DD1ECE0F. hw_OR=31. cnt=16384. blkSize= 128].format */
89 static constexpr __device__ int THREEFRY_ROTATION_32_4[8][2] = {
90  {10, 26},
91  {11, 21},
92  {13, 27},
93  {23, 5},
94  { 6, 20},
95  {17, 11},
96  {25, 10},
97  {18, 20}
98 };
99 
100 namespace rocrand_device
101 {
102 
103 template<class value>
104 FQUALIFIERS int threefry_rotation_array(int indexX, int indexY);
105 
106 template<>
107 FQUALIFIERS int threefry_rotation_array<unsigned int>(int indexX, int indexY)
108 {
109  return THREEFRY_ROTATION_32_4[indexX][indexY];
110 };
111 
112 template<>
113 FQUALIFIERS int threefry_rotation_array<unsigned long long>(int indexX, int indexY)
114 {
115  return THREEFRY_ROTATION_64_4[indexX][indexY];
116 };
117 
118 template<typename state_value, typename value, unsigned int Nrounds>
119 class threefry_engine4_base
120 {
121 public:
122  struct threefry_state_4
123  {
124  state_value counter;
125  state_value key;
126  state_value result;
127  unsigned int substate;
128  };
129 
131  FQUALIFIERS void discard(unsigned long long offset)
132  {
133  this->discard_impl(offset);
134  this->m_state.result = this->threefry_rounds(m_state.counter, m_state.key);
135  }
136 
142  FQUALIFIERS void discard_subsequence(unsigned long long subsequence)
143  {
144  this->discard_subsequence_impl(subsequence);
145  m_state.result = this->threefry_rounds(m_state.counter, m_state.key);
146  }
147 
148  FQUALIFIERS value operator()()
149  {
150  return this->next();
151  }
152 
153  FQUALIFIERS value next()
154  {
155 #if defined(__HIP_PLATFORM_AMD__)
156  value ret = m_state.result.data[m_state.substate];
157 #else
158  value ret = (&m_state.result.x)[m_state.substate];
159 #endif
160  m_state.substate++;
161  if(m_state.substate == 4)
162  {
163  m_state.substate = 0;
164  m_state.counter = this->bump_counter(m_state.counter);
165  m_state.result = this->threefry_rounds(m_state.counter, m_state.key);
166  }
167  return ret;
168  }
169 
170  FQUALIFIERS state_value next4()
171  {
172  state_value ret = m_state.result;
173  m_state.counter = this->bump_counter(m_state.counter);
174  m_state.result = this->threefry_rounds(m_state.counter, m_state.key);
175 
176  return this->interleave(ret, m_state.result);
177  }
178 
179 protected:
180  FQUALIFIERS static state_value threefry_rounds(state_value counter, state_value key)
181  {
182  state_value X;
183  value ks[4 + 1];
184 
185  static_assert(Nrounds <= 72, "72 or less only supported in threefry rounds");
186 
187  ks[4] = skein_ks_parity<value>();
188 
189  ks[0] = key.x;
190  ks[1] = key.y;
191  ks[2] = key.z;
192  ks[3] = key.w;
193 
194  X.x = counter.x;
195  X.y = counter.y;
196  X.z = counter.z;
197  X.w = counter.w;
198 
199  ks[4] ^= key.x;
200  ks[4] ^= key.y;
201  ks[4] ^= key.z;
202  ks[4] ^= key.w;
203 
204  /* Insert initial key before round 0 */
205  X.x += ks[0];
206  X.y += ks[1];
207  X.z += ks[2];
208  X.w += ks[3];
209 
210  for(unsigned int round_idx = 0; round_idx < Nrounds; round_idx++)
211  {
212  int rot_0 = threefry_rotation_array<value>(round_idx & 7u, 0);
213  int rot_1 = threefry_rotation_array<value>(round_idx & 7u, 1);
214  if((round_idx & 2u) == 0)
215  {
216  X.x += X.y;
217  X.y = rotl<value>(X.y, rot_0);
218  X.y ^= X.x;
219  X.z += X.w;
220  X.w = rotl<value>(X.w, rot_1);
221  X.w ^= X.z;
222  }
223  else
224  {
225  X.x += X.w;
226  X.w = rotl<value>(X.w, rot_0);
227  X.w ^= X.x;
228  X.z += X.y;
229  X.y = rotl<value>(X.y, rot_1);
230  X.y ^= X.z;
231  }
232 
233  if((round_idx & 3u) == 3)
234  {
235  unsigned int inject_idx = round_idx / 4;
236  // InjectKey(r = 1 + inject_idx)
237  X.x += ks[(1 + inject_idx) % 5];
238  X.y += ks[(2 + inject_idx) % 5];
239  X.z += ks[(3 + inject_idx) % 5];
240  X.w += ks[(4 + inject_idx) % 5];
241  X.w += 1 + inject_idx;
242  }
243  }
244 
245  return X;
246  }
247 
250  FQUALIFIERS void discard_impl(unsigned long long offset)
251  {
252  // Adjust offset for subset
253  m_state.substate += offset & 3;
254  unsigned long long counter_offset = offset / 4;
255  counter_offset += m_state.substate < 4 ? 0 : 1;
256  m_state.substate += m_state.substate < 4 ? 0 : -4;
257  // Discard states
258  this->discard_state(counter_offset);
259  }
260 
262  FQUALIFIERS void discard_subsequence_impl(unsigned long long subsequence)
263  {
264  value lo, hi;
265  ::rocrand_device::detail::split_ull(lo, hi, subsequence);
266 
267  value old_counter = m_state.counter.z;
268  m_state.counter.z += lo;
269  m_state.counter.w += hi + (m_state.counter.z < old_counter ? 1 : 0);
270  }
271 
274  FQUALIFIERS void discard_state(unsigned long long offset)
275  {
276  value lo, hi;
277  ::rocrand_device::detail::split_ull(lo, hi, offset);
278 
279  state_value old_counter = m_state.counter;
280  m_state.counter.x += lo;
281  m_state.counter.y += hi + (m_state.counter.x < old_counter.x ? 1 : 0);
282  m_state.counter.z += (m_state.counter.y < old_counter.y ? 1 : 0);
283  m_state.counter.w += (m_state.counter.z < old_counter.z ? 1 : 0);
284  }
285 
286  FQUALIFIERS static state_value bump_counter(state_value counter)
287  {
288  counter.x++;
289  value add = counter.x == 0 ? 1 : 0;
290  counter.y += add;
291  add = counter.y == 0 ? add : 0;
292  counter.z += add;
293  add = counter.z == 0 ? add : 0;
294  counter.w += add;
295  return counter;
296  }
297 
298  FQUALIFIERS state_value interleave(const state_value prev, const state_value next) const
299  {
300  switch(m_state.substate)
301  {
302  case 0: return prev;
303  case 1: return state_value{prev.y, prev.z, prev.w, next.x};
304  case 2: return state_value{prev.z, prev.w, next.x, next.y};
305  case 3: return state_value{prev.w, next.x, next.y, next.z};
306  }
307  __builtin_unreachable();
308  }
309 
310 protected:
311  threefry_state_4 m_state;
312 }; // threefry_engine4_base class
313 
314 } // end namespace rocrand_device
315 
316 #endif // ROCRAND_THREEFRY4_IMPL_H_
#define FQUALIFIERS
Shorthand for commonly used function qualifiers.
Definition: rocrand_uniform.h:31