/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-rocrand/checkouts/docs-7.0.0/projects/rocrand/library/include/rocrand/rocrand_threefry2_impl.h Source File

/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-rocrand/checkouts/docs-7.0.0/projects/rocrand/library/include/rocrand/rocrand_threefry2_impl.h Source File#

API library: /home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-rocrand/checkouts/docs-7.0.0/projects/rocrand/library/include/rocrand/rocrand_threefry2_impl.h Source File
rocrand_threefry2_impl.h
1 // Copyright (c) 2022-2025 Advanced Micro Devices, Inc. All rights reserved.
2 //
3 // Permission is hereby granted, free of charge, to any person obtaining a copy
4 // of this software and associated documentation files (the "Software"), to deal
5 // in the Software without restriction, including without limitation the rights
6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 // copies of the Software, and to permit persons to whom the Software is
8 // furnished to do so, subject to the following conditions:
9 //
10 // The above copyright notice and this permission notice shall be included in
11 // all copies or substantial portions of the Software.
12 //
13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 // THE SOFTWARE.
20 
21 /*
22 Copyright 2010-2011, D. E. Shaw Research.
23 All rights reserved.
24 
25 Redistribution and use in source and binary forms, with or without
26 modification, are permitted provided that the following conditions are
27 met:
28 
29 * Redistributions of source code must retain the above copyright
30  notice, this list of conditions, and the following disclaimer.
31 
32 * Redistributions in binary form must reproduce the above copyright
33  notice, this list of conditions, and the following disclaimer in the
34  documentation and/or other materials provided with the distribution.
35 
36 * Neither the name of D. E. Shaw Research nor the names of its
37  contributors may be used to endorse or promote products derived from
38  this software without specific prior written permission.
39 
40 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
41 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
42 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
43 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
44 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
45 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
46 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
47 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
48 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
49 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
50 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
51 */
52 
53 #ifndef ROCRAND_THREEFRY2_IMPL_H_
54 #define ROCRAND_THREEFRY2_IMPL_H_
55 
56 #include "rocrand/rocrand_common.h"
57 #include "rocrand/rocrand_threefry_common.h"
58 
59 #include <hip/hip_runtime.h>
60 
61 #ifndef THREEFRY2x32_DEFAULT_ROUNDS
62  #define THREEFRY2x32_DEFAULT_ROUNDS 20
63 #endif
64 
65 #ifndef THREEFRY2x64_DEFAULT_ROUNDS
66  #define THREEFRY2x64_DEFAULT_ROUNDS 20
67 #endif
68 
69 namespace rocrand_device
70 {
71 
72 template<class value>
73 __forceinline__ __device__ __host__ int threefry_rotation_array(int index) = delete;
74 
75 template<>
76 __forceinline__ __device__ __host__ int threefry_rotation_array<unsigned int>(int index)
77 {
78  // Output from skein_rot_search (srs32x2-X5000.out)
79  // Random seed = 1. BlockSize = 64 bits. sampleCnt = 1024. rounds = 8, minHW_or=28
80  // Start: Tue Jul 12 11:11:33 2011
81  // rMin = 0.334. #0206[*07] [CRC=1D9765C0. hw_OR=32. cnt=16384. blkSize= 64].format
82  static constexpr int THREEFRY_ROTATION_32_2[8] = {13, 15, 26, 6, 17, 29, 16, 24};
83  return THREEFRY_ROTATION_32_2[index];
84 }
85 
86 template<>
87 __forceinline__ __device__ __host__ int threefry_rotation_array<unsigned long long>(int index)
88 {
89  // Output from skein_rot_search: (srs64_B64-X1000)
90  // Random seed = 1. BlockSize = 128 bits. sampleCnt = 1024. rounds = 8, minHW_or=57
91  // Start: Tue Mar 1 10:07:48 2011
92  // rMin = 0.136. #0325[*15] [CRC=455A682F. hw_OR=64. cnt=16384. blkSize= 128].format
93  static constexpr int THREEFRY_ROTATION_64_2[8] = {16, 42, 12, 31, 16, 32, 24, 21};
94  return THREEFRY_ROTATION_64_2[index];
95 }
96 
97 template<typename state_value, typename value, unsigned int Nrounds>
98 class threefry_engine2_base
99 {
100 public:
101  struct threefry_state_2
102  {
103  state_value counter;
104  state_value key;
105  state_value result;
106  unsigned int substate;
107  };
108  using state_type = threefry_state_2;
109  using state_vector_type = state_value;
110 
111  __forceinline__ __device__ __host__ void discard(unsigned long long offset)
112  {
113  this->discard_impl(offset);
114  m_state.result = this->threefry_rounds(m_state.counter, m_state.key);
115  }
116 
117  __forceinline__ __device__ __host__ void discard()
118  {
119  m_state.result = this->threefry_rounds(m_state.counter, m_state.key);
120  }
121 
127  __forceinline__ __device__ __host__ void discard_subsequence(unsigned long long subsequence)
128  {
129  this->discard_subsequence_impl(subsequence);
130  m_state.result = this->threefry_rounds(m_state.counter, m_state.key);
131  }
132 
133  __forceinline__ __device__ __host__ value operator()()
134  {
135  return this->next();
136  }
137 
138  __forceinline__ __device__ __host__ value next()
139  {
140 #if defined(__HIP_PLATFORM_AMD__)
141  value ret = ROCRAND_HIPVEC_ACCESS(m_state.result)[m_state.substate];
142 #else
143  value ret = (&m_state.result.x)[m_state.substate];
144 #endif
145  m_state.substate++;
146  if(m_state.substate == 2)
147  {
148  m_state.substate = 0;
149  m_state.counter = this->bump_counter(m_state.counter);
150  m_state.result = this->threefry_rounds(m_state.counter, m_state.key);
151  }
152  return ret;
153  }
154 
155  __forceinline__ __device__ __host__ state_value next2()
156  {
157  state_value ret = m_state.result;
158  m_state.counter = this->bump_counter(m_state.counter);
159  m_state.result = this->threefry_rounds(m_state.counter, m_state.key);
160 
161  return this->interleave(ret, m_state.result);
162  }
163 
164 protected:
165  __forceinline__ __device__ __host__ static state_value threefry_rounds(state_value counter,
166  state_value key)
167  {
168  state_value X;
169  value ks[2 + 1];
170 
171  static_assert(Nrounds <= 32, "32 or less only supported in threefry rounds");
172 
173  ks[2] = skein_ks_parity<value>();
174 
175  ks[0] = key.x;
176  ks[1] = key.y;
177 
178  X.x = counter.x;
179  X.y = counter.y;
180 
181  ks[2] ^= key.x;
182  ks[2] ^= key.y;
183 
184  /* Insert initial key before round 0 */
185  X.x += ks[0];
186  X.y += ks[1];
187 
188  for(unsigned int round_idx = 0; round_idx < Nrounds; round_idx++)
189  {
190  X.x += X.y;
191  X.y = rotl<value>(X.y, threefry_rotation_array<value>(round_idx & 7u));
192  X.y ^= X.x;
193 
194  if((round_idx & 3u) == 3)
195  {
196  unsigned int inject_idx = round_idx / 4;
197  // InjectKey(r = 1 + inject_idx)
198  X.x += ks[(1 + inject_idx) % 3];
199  X.y += ks[(2 + inject_idx) % 3];
200  X.y += 1 + inject_idx;
201  }
202  }
203 
204  return X;
205  }
206 
209  __forceinline__ __device__ __host__ void discard_impl(unsigned long long offset)
210  {
211  // Adjust offset for subset
212  m_state.substate += offset & 1;
213  unsigned long long counter_offset = offset / 2;
214  counter_offset += m_state.substate < 2 ? 0 : 1;
215  m_state.substate += m_state.substate < 2 ? 0 : -2;
216  // Discard states
217  this->discard_state(counter_offset);
218  }
219 
221  __forceinline__ __device__ __host__ void
222  discard_subsequence_impl(unsigned long long subsequence)
223  {
224  m_state.counter.y += subsequence;
225  }
226 
229  __forceinline__ __device__ __host__ void discard_state(unsigned long long offset)
230  {
231  value lo, hi;
232  ::rocrand_device::detail::split_ull(lo, hi, offset);
233 
234  value old_counter = m_state.counter.x;
235  m_state.counter.x += lo;
236  m_state.counter.y += hi + (m_state.counter.x < old_counter ? 1 : 0);
237  }
238 
239  __forceinline__ __device__ __host__ static state_value bump_counter(state_value counter)
240  {
241  counter.x++;
242  value add = counter.x == 0 ? 1 : 0;
243  counter.y += add;
244  return counter;
245  }
246 
247  __forceinline__ __device__ __host__ state_value interleave(const state_value prev,
248  const state_value next) const
249  {
250  switch(m_state.substate)
251  {
252  case 0: return prev;
253  case 1: return state_value{prev.y, next.x};
254  }
255  __builtin_unreachable();
256  }
257 
258 protected:
259  threefry_state_2 m_state;
260 }; // threefry_engine2_base class
261 
262 } // end namespace rocrand_device
263 
264 #endif // ROCRAND_THREEFRY2_IMPL_H_