/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-rocrand/checkouts/develop/projects/rocrand/library/include/rocrand/rocrand_threefry4_impl.h Source File

/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-rocrand/checkouts/develop/projects/rocrand/library/include/rocrand/rocrand_threefry4_impl.h Source File#

API library: /home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-rocrand/checkouts/develop/projects/rocrand/library/include/rocrand/rocrand_threefry4_impl.h Source File
rocrand_threefry4_impl.h
1 // Copyright (c) 2022-2026 Advanced Micro Devices, Inc. All rights reserved.
2 //
3 // Permission is hereby granted, free of charge, to any person obtaining a copy
4 // of this software and associated documentation files (the "Software"), to deal
5 // in the Software without restriction, including without limitation the rights
6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 // copies of the Software, and to permit persons to whom the Software is
8 // furnished to do so, subject to the following conditions:
9 //
10 // The above copyright notice and this permission notice shall be included in
11 // all copies or substantial portions of the Software.
12 //
13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 // THE SOFTWARE.
20 
21 /*
22 Copyright 2010-2011, D. E. Shaw Research.
23 All rights reserved.
24 
25 Redistribution and use in source and binary forms, with or without
26 modification, are permitted provided that the following conditions are
27 met:
28 
29 * Redistributions of source code must retain the above copyright
30  notice, this list of conditions, and the following disclaimer.
31 
32 * Redistributions in binary form must reproduce the above copyright
33  notice, this list of conditions, and the following disclaimer in the
34  documentation and/or other materials provided with the distribution.
35 
36 * Neither the name of D. E. Shaw Research nor the names of its
37  contributors may be used to endorse or promote products derived from
38  this software without specific prior written permission.
39 
40 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
41 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
42 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
43 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
44 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
45 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
46 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
47 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
48 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
49 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
50 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
51 */
52 
53 #ifndef ROCRAND_THREEFRY4_IMPL_H_
54 #define ROCRAND_THREEFRY4_IMPL_H_
55 
56 #include "rocrand/rocrand_common.h"
57 #include "rocrand/rocrand_threefry_common.h"
58 
59 #include <hip/hip_runtime.h>
60 
61 #ifndef THREEFRY4x32_DEFAULT_ROUNDS
62  #define THREEFRY4x32_DEFAULT_ROUNDS 20
63 #endif
64 
65 #ifndef THREEFRY4x64_DEFAULT_ROUNDS
66  #define THREEFRY4x64_DEFAULT_ROUNDS 20
67 #endif
68 
69 namespace rocrand_device
70 {
71 
72 template<typename state_value, typename value, unsigned int Nrounds>
73 class threefry_engine4_base
74 {
75 public:
76  struct threefry_state_4
77  {
78  state_value counter;
79  state_value key;
80  state_value result;
81  unsigned int substate;
82  };
83  using state_type = threefry_state_4;
84  using state_vector_type = state_value;
85 
87  __forceinline__ __device__ __host__
88  void discard(unsigned long long offset)
89  {
90  this->discard_impl(offset);
91  this->m_state.result = this->threefry_rounds(m_state.counter, m_state.key);
92  }
93 
99  __forceinline__ __device__ __host__
100  void discard_subsequence(unsigned long long subsequence)
101  {
102  this->discard_subsequence_impl(subsequence);
103  m_state.result = this->threefry_rounds(m_state.counter, m_state.key);
104  }
105 
106  __forceinline__ __device__ __host__
107  value operator()()
108  {
109  return this->next();
110  }
111 
112  __forceinline__ __device__ __host__
113  value next()
114  {
115 #if defined(__HIP_PLATFORM_AMD__)
116  value ret = ROCRAND_HIPVEC_ACCESS(m_state.result)[m_state.substate];
117 #else
118  value ret = (&m_state.result.x)[m_state.substate];
119 #endif
120  m_state.substate++;
121  if(m_state.substate == 4)
122  {
123  m_state.substate = 0;
124  m_state.counter = this->bump_counter(m_state.counter);
125  m_state.result = this->threefry_rounds(m_state.counter, m_state.key);
126  }
127  return ret;
128  }
129 
130  __forceinline__ __device__ __host__
131  state_value next4()
132  {
133  state_value ret = m_state.result;
134  m_state.counter = this->bump_counter(m_state.counter);
135  m_state.result = this->threefry_rounds(m_state.counter, m_state.key);
136 
137  return this->interleave(ret, m_state.result);
138  }
139 
140 protected:
141  __forceinline__ __device__ __host__
142  static state_value threefry_rounds(state_value counter, state_value key)
143  {
144  state_value X;
145  value ks[4 + 1];
146 
147  static_assert(Nrounds <= 72, "72 or less only supported in threefry rounds");
148 
149  ks[4] = skein_ks_parity<value>();
150 
151  ks[0] = key.x;
152  ks[1] = key.y;
153  ks[2] = key.z;
154  ks[3] = key.w;
155 
156  X.x = counter.x;
157  X.y = counter.y;
158  X.z = counter.z;
159  X.w = counter.w;
160 
161  ks[4] ^= key.x;
162  ks[4] ^= key.y;
163  ks[4] ^= key.z;
164  ks[4] ^= key.w;
165 
166  /* Insert initial key before round 0 */
167  X.x += ks[0];
168  X.y += ks[1];
169  X.z += ks[2];
170  X.w += ks[3];
171 
172  return rounds_4<state_value, value, Nrounds>(X, ks);
173  }
174 
177  __forceinline__ __device__ __host__
178  void discard_impl(unsigned long long offset)
179  {
180  // Adjust offset for subset
181  m_state.substate += offset & 3;
182  unsigned long long counter_offset = offset / 4;
183  counter_offset += m_state.substate < 4 ? 0 : 1;
184  m_state.substate += m_state.substate < 4 ? 0 : -4;
185  // Discard states
186  this->discard_state(counter_offset);
187  }
188 
190  __forceinline__ __device__ __host__
191  void discard_subsequence_impl(unsigned long long subsequence)
192  {
193  value lo, hi;
194  ::rocrand_device::detail::split_ull(lo, hi, subsequence);
195 
196  value old_counter = m_state.counter.z;
197  m_state.counter.z += lo;
198  m_state.counter.w += hi + (m_state.counter.z < old_counter ? 1 : 0);
199  }
200 
203  __forceinline__ __device__ __host__
204  void discard_state(unsigned long long offset)
205  {
206  value lo, hi;
207  ::rocrand_device::detail::split_ull(lo, hi, offset);
208 
209  state_value old_counter = m_state.counter;
210  m_state.counter.x += lo;
211  m_state.counter.y += hi + (m_state.counter.x < old_counter.x ? 1 : 0);
212  m_state.counter.z += (m_state.counter.y < old_counter.y ? 1 : 0);
213  m_state.counter.w += (m_state.counter.z < old_counter.z ? 1 : 0);
214  }
215 
216  __forceinline__ __device__ __host__
217  static state_value bump_counter(state_value counter)
218  {
219  counter.x++;
220  value add = counter.x == 0 ? 1 : 0;
221  counter.y += add;
222  add = counter.y == 0 ? add : 0;
223  counter.z += add;
224  add = counter.z == 0 ? add : 0;
225  counter.w += add;
226  return counter;
227  }
228 
229  __forceinline__ __device__ __host__
230  state_value interleave(const state_value prev, const state_value next) const
231  {
232  switch(m_state.substate)
233  {
234  case 0: return prev;
235  case 1: return state_value{prev.y, prev.z, prev.w, next.x};
236  case 2: return state_value{prev.z, prev.w, next.x, next.y};
237  case 3: return state_value{prev.w, next.x, next.y, next.z};
238  }
239  __builtin_unreachable();
240  }
241 
242 protected:
243  threefry_state_4 m_state;
244 }; // threefry_engine4_base class
245 
246 } // end namespace rocrand_device
247 
248 #endif // ROCRAND_THREEFRY4_IMPL_H_