/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-rocrand/checkouts/develop/projects/rocrand/library/include/rocrand/rocrand_threefry2_impl.h Source File

/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-rocrand/checkouts/develop/projects/rocrand/library/include/rocrand/rocrand_threefry2_impl.h Source File#

API library: /home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-rocrand/checkouts/develop/projects/rocrand/library/include/rocrand/rocrand_threefry2_impl.h Source File
rocrand_threefry2_impl.h
1 // Copyright (c) 2022-2026 Advanced Micro Devices, Inc. All rights reserved.
2 //
3 // Permission is hereby granted, free of charge, to any person obtaining a copy
4 // of this software and associated documentation files (the "Software"), to deal
5 // in the Software without restriction, including without limitation the rights
6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 // copies of the Software, and to permit persons to whom the Software is
8 // furnished to do so, subject to the following conditions:
9 //
10 // The above copyright notice and this permission notice shall be included in
11 // all copies or substantial portions of the Software.
12 //
13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 // THE SOFTWARE.
20 
21 /*
22 Copyright 2010-2011, D. E. Shaw Research.
23 All rights reserved.
24 
25 Redistribution and use in source and binary forms, with or without
26 modification, are permitted provided that the following conditions are
27 met:
28 
29 * Redistributions of source code must retain the above copyright
30  notice, this list of conditions, and the following disclaimer.
31 
32 * Redistributions in binary form must reproduce the above copyright
33  notice, this list of conditions, and the following disclaimer in the
34  documentation and/or other materials provided with the distribution.
35 
36 * Neither the name of D. E. Shaw Research nor the names of its
37  contributors may be used to endorse or promote products derived from
38  this software without specific prior written permission.
39 
40 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
41 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
42 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
43 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
44 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
45 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
46 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
47 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
48 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
49 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
50 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
51 */
52 
53 #ifndef ROCRAND_THREEFRY2_IMPL_H_
54 #define ROCRAND_THREEFRY2_IMPL_H_
55 
56 #include "rocrand/rocrand_common.h"
57 #include "rocrand/rocrand_threefry_common.h"
58 
59 #include <hip/hip_runtime.h>
60 
61 #ifndef THREEFRY2x32_DEFAULT_ROUNDS
62  #define THREEFRY2x32_DEFAULT_ROUNDS 20
63 #endif
64 
65 #ifndef THREEFRY2x64_DEFAULT_ROUNDS
66  #define THREEFRY2x64_DEFAULT_ROUNDS 20
67 #endif
68 
69 namespace rocrand_device
70 {
71 
72 template<typename state_value, typename value, unsigned int Nrounds>
73 class threefry_engine2_base
74 {
75 public:
76  struct threefry_state_2
77  {
78  state_value counter;
79  state_value key;
80  state_value result;
81  unsigned int substate;
82  };
83  using state_type = threefry_state_2;
84  using state_vector_type = state_value;
85 
86  __forceinline__ __device__ __host__
87  void discard(unsigned long long offset)
88  {
89  this->discard_impl(offset);
90  m_state.result = this->threefry_rounds(m_state.counter, m_state.key);
91  }
92 
93  __forceinline__ __device__ __host__
94  void discard()
95  {
96  m_state.result = this->threefry_rounds(m_state.counter, m_state.key);
97  }
98 
104  __forceinline__ __device__ __host__
105  void discard_subsequence(unsigned long long subsequence)
106  {
107  this->discard_subsequence_impl(subsequence);
108  m_state.result = this->threefry_rounds(m_state.counter, m_state.key);
109  }
110 
111  __forceinline__ __device__ __host__
112  value operator()()
113  {
114  return this->next();
115  }
116 
117  __forceinline__ __device__ __host__
118  value next()
119  {
120 #if defined(__HIP_PLATFORM_AMD__)
121  value ret = ROCRAND_HIPVEC_ACCESS(m_state.result)[m_state.substate];
122 #else
123  value ret = (&m_state.result.x)[m_state.substate];
124 #endif
125  m_state.substate++;
126  if(m_state.substate == 2)
127  {
128  m_state.substate = 0;
129  m_state.counter = this->bump_counter(m_state.counter);
130  m_state.result = this->threefry_rounds(m_state.counter, m_state.key);
131  }
132  return ret;
133  }
134 
135  __forceinline__ __device__ __host__
136  state_value next2()
137  {
138  state_value ret = m_state.result;
139  m_state.counter = this->bump_counter(m_state.counter);
140  m_state.result = this->threefry_rounds(m_state.counter, m_state.key);
141 
142  return this->interleave(ret, m_state.result);
143  }
144 
145 protected:
146  __forceinline__ __device__ __host__
147  static state_value threefry_rounds(state_value counter, state_value key)
148  {
149  state_value X;
150  value ks[2 + 1];
151 
152  static_assert(Nrounds <= 32, "32 or less only supported in threefry rounds");
153 
154  ks[2] = skein_ks_parity<value>();
155 
156  ks[0] = key.x;
157  ks[1] = key.y;
158 
159  X.x = counter.x;
160  X.y = counter.y;
161 
162  ks[2] ^= key.x;
163  ks[2] ^= key.y;
164 
165  /* Insert initial key before round 0 */
166  X.x += ks[0];
167  X.y += ks[1];
168 
169  return rounds_2<state_value, value, Nrounds>(X, ks);
170  }
171 
174  __forceinline__ __device__ __host__
175  void discard_impl(unsigned long long offset)
176  {
177  // Adjust offset for subset
178  m_state.substate += offset & 1;
179  unsigned long long counter_offset = offset / 2;
180  counter_offset += m_state.substate < 2 ? 0 : 1;
181  m_state.substate += m_state.substate < 2 ? 0 : -2;
182  // Discard states
183  this->discard_state(counter_offset);
184  }
185 
187  __forceinline__ __device__ __host__
188  void discard_subsequence_impl(unsigned long long subsequence)
189  {
190  m_state.counter.y += subsequence;
191  }
192 
195  __forceinline__ __device__ __host__
196  void discard_state(unsigned long long offset)
197  {
198  value lo, hi;
199  ::rocrand_device::detail::split_ull(lo, hi, offset);
200 
201  value old_counter = m_state.counter.x;
202  m_state.counter.x += lo;
203  m_state.counter.y += hi + (m_state.counter.x < old_counter ? 1 : 0);
204  }
205 
206  __forceinline__ __device__ __host__
207  static state_value bump_counter(state_value counter)
208  {
209  counter.x++;
210  value add = counter.x == 0 ? 1 : 0;
211  counter.y += add;
212  return counter;
213  }
214 
215  __forceinline__ __device__ __host__
216  state_value interleave(const state_value prev, const state_value next) const
217  {
218  switch(m_state.substate)
219  {
220  case 0: return prev;
221  case 1: return state_value{prev.y, next.x};
222  }
223  __builtin_unreachable();
224  }
225 
226 protected:
227  threefry_state_2 m_state;
228 }; // threefry_engine2_base class
229 
230 } // end namespace rocrand_device
231 
232 #endif // ROCRAND_THREEFRY2_IMPL_H_