/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-rocrand/checkouts/latest/library/include/rocrand/rocrand_threefry2_impl.h Source File

/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-rocrand/checkouts/latest/library/include/rocrand/rocrand_threefry2_impl.h Source File#

API library: /home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-rocrand/checkouts/latest/library/include/rocrand/rocrand_threefry2_impl.h Source File
API library
rocrand_threefry2_impl.h
1 // Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
2 //
3 // Permission is hereby granted, free of charge, to any person obtaining a copy
4 // of this software and associated documentation files (the "Software"), to deal
5 // in the Software without restriction, including without limitation the rights
6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 // copies of the Software, and to permit persons to whom the Software is
8 // furnished to do so, subject to the following conditions:
9 //
10 // The above copyright notice and this permission notice shall be included in
11 // all copies or substantial portions of the Software.
12 //
13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 // THE SOFTWARE.
20 
21 /*
22 Copyright 2010-2011, D. E. Shaw Research.
23 All rights reserved.
24 
25 Redistribution and use in source and binary forms, with or without
26 modification, are permitted provided that the following conditions are
27 met:
28 
29 * Redistributions of source code must retain the above copyright
30  notice, this list of conditions, and the following disclaimer.
31 
32 * Redistributions in binary form must reproduce the above copyright
33  notice, this list of conditions, and the following disclaimer in the
34  documentation and/or other materials provided with the distribution.
35 
36 * Neither the name of D. E. Shaw Research nor the names of its
37  contributors may be used to endorse or promote products derived from
38  this software without specific prior written permission.
39 
40 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
41 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
42 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
43 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
44 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
45 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
46 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
47 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
48 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
49 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
50 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
51 */
52 
53 #ifndef ROCRAND_THREEFRY2_IMPL_H_
54 #define ROCRAND_THREEFRY2_IMPL_H_
55 
56 #ifndef FQUALIFIERS
57  #define FQUALIFIERS __forceinline__ __device__
58 #endif // FQUALIFIERS
59 
60 #include "rocrand/rocrand_threefry_common.h"
61 #include <rocrand/rocrand_common.h>
62 
63 #ifndef THREEFRY2x32_DEFAULT_ROUNDS
64  #define THREEFRY2x32_DEFAULT_ROUNDS 20
65 #endif
66 
67 #ifndef THREEFRY2x64_DEFAULT_ROUNDS
68  #define THREEFRY2x64_DEFAULT_ROUNDS 20
69 #endif
70 
71 /* Output from skein_rot_search (srs32x2-X5000.out)
72 // Random seed = 1. BlockSize = 64 bits. sampleCnt = 1024. rounds = 8, minHW_or=28
73 // Start: Tue Jul 12 11:11:33 2011
74 // rMin = 0.334. #0206[*07] [CRC=1D9765C0. hw_OR=32. cnt=16384. blkSize= 64].format */
75 static constexpr __device__ int THREEFRY_ROTATION_32_2[8] = {13, 15, 26, 6, 17, 29, 16, 24};
76 
77 /*
78 // Output from skein_rot_search: (srs64_B64-X1000)
79 // Random seed = 1. BlockSize = 128 bits. sampleCnt = 1024. rounds = 8, minHW_or=57
80 // Start: Tue Mar 1 10:07:48 2011
81 // rMin = 0.136. #0325[*15] [CRC=455A682F. hw_OR=64. cnt=16384. blkSize= 128].format
82 */
83 static constexpr __device__ int THREEFRY_ROTATION_64_2[8] = {16, 42, 12, 31, 16, 32, 24, 21};
84 
85 namespace rocrand_device
86 {
87 
88 template<class value>
89 FQUALIFIERS int threefry_rotation_array(int index);
90 
91 template<>
92 FQUALIFIERS int threefry_rotation_array<unsigned int>(int index)
93 {
94  return THREEFRY_ROTATION_32_2[index];
95 };
96 
97 template<>
98 FQUALIFIERS int threefry_rotation_array<unsigned long long>(int index)
99 {
100  return THREEFRY_ROTATION_64_2[index];
101 };
102 
103 template<typename state_value, typename value, unsigned int Nrounds>
104 class threefry_engine2_base
105 {
106 public:
107  struct threefry_state_2
108  {
109  state_value counter;
110  state_value key;
111  state_value result;
112  unsigned int substate;
113  };
114 
115  FQUALIFIERS void discard(unsigned long long offset)
116  {
117  this->discard_impl(offset);
118  m_state.result = this->threefry_rounds(m_state.counter, m_state.key);
119  }
120 
121  FQUALIFIERS void discard()
122  {
123  m_state.result = this->threefry_rounds(m_state.counter, m_state.key);
124  }
125 
131  FQUALIFIERS void discard_subsequence(unsigned long long subsequence)
132  {
133  this->discard_subsequence_impl(subsequence);
134  m_state.result = this->threefry_rounds(m_state.counter, m_state.key);
135  }
136 
137  FQUALIFIERS value operator()()
138  {
139  return this->next();
140  }
141 
142  FQUALIFIERS value next()
143  {
144 #if defined(__HIP_PLATFORM_AMD__)
145  value ret = m_state.result.data[m_state.substate];
146 #else
147  value ret = (&m_state.result.x)[m_state.substate];
148 #endif
149  m_state.substate++;
150  if(m_state.substate == 2)
151  {
152  m_state.substate = 0;
153  m_state.counter = this->bump_counter(m_state.counter);
154  m_state.result = this->threefry_rounds(m_state.counter, m_state.key);
155  }
156  return ret;
157  }
158 
159  FQUALIFIERS state_value next2()
160  {
161  state_value ret = m_state.result;
162  m_state.counter = this->bump_counter(m_state.counter);
163  m_state.result = this->threefry_rounds(m_state.counter, m_state.key);
164 
165  return this->interleave(ret, m_state.result);
166  }
167 
168 protected:
169  FQUALIFIERS static state_value threefry_rounds(state_value counter, state_value key)
170  {
171  state_value X;
172  value ks[2 + 1];
173 
174  static_assert(Nrounds <= 32, "32 or less only supported in threefry rounds");
175 
176  ks[2] = skein_ks_parity<value>();
177 
178  ks[0] = key.x;
179  ks[1] = key.y;
180 
181  X.x = counter.x;
182  X.y = counter.y;
183 
184  ks[2] ^= key.x;
185  ks[2] ^= key.y;
186 
187  /* Insert initial key before round 0 */
188  X.x += ks[0];
189  X.y += ks[1];
190 
191  for(unsigned int round_idx = 0; round_idx < Nrounds; round_idx++)
192  {
193  X.x += X.y;
194  X.y = rotl<value>(X.y, threefry_rotation_array<value>(round_idx & 7u));
195  X.y ^= X.x;
196 
197  if((round_idx & 3u) == 3)
198  {
199  unsigned int inject_idx = round_idx / 4;
200  // InjectKey(r = 1 + inject_idx)
201  X.x += ks[(1 + inject_idx) % 3];
202  X.y += ks[(2 + inject_idx) % 3];
203  X.y += 1 + inject_idx;
204  }
205  }
206 
207  return X;
208  }
209 
212  FQUALIFIERS void discard_impl(unsigned long long offset)
213  {
214  // Adjust offset for subset
215  m_state.substate += offset & 1;
216  unsigned long long counter_offset = offset / 2;
217  counter_offset += m_state.substate < 2 ? 0 : 1;
218  m_state.substate += m_state.substate < 2 ? 0 : -2;
219  // Discard states
220  this->discard_state(counter_offset);
221  }
222 
224  FQUALIFIERS void discard_subsequence_impl(unsigned long long subsequence)
225  {
226  m_state.counter.y += subsequence;
227  }
228 
231  FQUALIFIERS void discard_state(unsigned long long offset)
232  {
233  value lo, hi;
234  ::rocrand_device::detail::split_ull(lo, hi, offset);
235 
236  value old_counter = m_state.counter.x;
237  m_state.counter.x += lo;
238  m_state.counter.y += hi + (m_state.counter.x < old_counter ? 1 : 0);
239  }
240 
241  FQUALIFIERS static state_value bump_counter(state_value counter)
242  {
243  counter.x++;
244  value add = counter.x == 0 ? 1 : 0;
245  counter.y += add;
246  return counter;
247  }
248 
249  FQUALIFIERS state_value interleave(const state_value prev, const state_value next) const
250  {
251  switch(m_state.substate)
252  {
253  case 0: return prev;
254  case 1: return state_value{prev.y, next.x};
255  }
256  __builtin_unreachable();
257  }
258 
259 protected:
260  threefry_state_2 m_state;
261 }; // threefry_engine2_base class
262 
263 } // end namespace rocrand_device
264 
265 #endif // ROCRAND_THREEFRY2_IMPL_H_
#define FQUALIFIERS
Shorthand for commonly used function qualifiers.
Definition: rocrand_uniform.h:31