53 #ifndef ROCRAND_THREEFRY2_IMPL_H_
54 #define ROCRAND_THREEFRY2_IMPL_H_
56 #include "rocrand/rocrand_threefry_common.h"
57 #include <rocrand/rocrand_common.h>
59 #ifndef THREEFRY2x32_DEFAULT_ROUNDS
60 #define THREEFRY2x32_DEFAULT_ROUNDS 20
63 #ifndef THREEFRY2x64_DEFAULT_ROUNDS
64 #define THREEFRY2x64_DEFAULT_ROUNDS 20
67 namespace rocrand_device
71 __forceinline__ __device__ __host__
int threefry_rotation_array(
int index) =
delete;
74 __forceinline__ __device__ __host__
int threefry_rotation_array<unsigned int>(
int index)
80 static constexpr
int THREEFRY_ROTATION_32_2[8] = {13, 15, 26, 6, 17, 29, 16, 24};
81 return THREEFRY_ROTATION_32_2[index];
85 __forceinline__ __device__ __host__
int threefry_rotation_array<unsigned long long>(
int index)
91 static constexpr
int THREEFRY_ROTATION_64_2[8] = {16, 42, 12, 31, 16, 32, 24, 21};
92 return THREEFRY_ROTATION_64_2[index];
95 template<
typename state_value,
typename value,
unsigned int Nrounds>
96 class threefry_engine2_base
99 struct threefry_state_2
104 unsigned int substate;
106 using state_type = threefry_state_2;
107 using state_vector_type = state_value;
109 __forceinline__ __device__ __host__
void discard(
unsigned long long offset)
111 this->discard_impl(offset);
112 m_state.result = this->threefry_rounds(m_state.counter, m_state.key);
115 __forceinline__ __device__ __host__
void discard()
117 m_state.result = this->threefry_rounds(m_state.counter, m_state.key);
125 __forceinline__ __device__ __host__
void discard_subsequence(
unsigned long long subsequence)
127 this->discard_subsequence_impl(subsequence);
128 m_state.result = this->threefry_rounds(m_state.counter, m_state.key);
131 __forceinline__ __device__ __host__ value operator()()
136 __forceinline__ __device__ __host__ value next()
138 #if defined(__HIP_PLATFORM_AMD__)
139 value ret = m_state.result.data[m_state.substate];
141 value ret = (&m_state.result.x)[m_state.substate];
144 if(m_state.substate == 2)
146 m_state.substate = 0;
147 m_state.counter = this->bump_counter(m_state.counter);
148 m_state.result = this->threefry_rounds(m_state.counter, m_state.key);
153 __forceinline__ __device__ __host__ state_value next2()
155 state_value ret = m_state.result;
156 m_state.counter = this->bump_counter(m_state.counter);
157 m_state.result = this->threefry_rounds(m_state.counter, m_state.key);
159 return this->interleave(ret, m_state.result);
163 __forceinline__ __device__ __host__
static state_value threefry_rounds(state_value counter,
169 static_assert(Nrounds <= 32,
"32 or less only supported in threefry rounds");
171 ks[2] = skein_ks_parity<value>();
186 for(
unsigned int round_idx = 0; round_idx < Nrounds; round_idx++)
189 X.y = rotl<value>(X.y, threefry_rotation_array<value>(round_idx & 7u));
192 if((round_idx & 3u) == 3)
194 unsigned int inject_idx = round_idx / 4;
196 X.x += ks[(1 + inject_idx) % 3];
197 X.y += ks[(2 + inject_idx) % 3];
198 X.y += 1 + inject_idx;
207 __forceinline__ __device__ __host__
void discard_impl(
unsigned long long offset)
210 m_state.substate += offset & 1;
211 unsigned long long counter_offset = offset / 2;
212 counter_offset += m_state.substate < 2 ? 0 : 1;
213 m_state.substate += m_state.substate < 2 ? 0 : -2;
215 this->discard_state(counter_offset);
219 __forceinline__ __device__ __host__
void
220 discard_subsequence_impl(
unsigned long long subsequence)
222 m_state.counter.y += subsequence;
227 __forceinline__ __device__ __host__
void discard_state(
unsigned long long offset)
230 ::rocrand_device::detail::split_ull(lo, hi, offset);
232 value old_counter = m_state.counter.x;
233 m_state.counter.x += lo;
234 m_state.counter.y += hi + (m_state.counter.x < old_counter ? 1 : 0);
237 __forceinline__ __device__ __host__
static state_value bump_counter(state_value counter)
240 value add = counter.x == 0 ? 1 : 0;
245 __forceinline__ __device__ __host__ state_value interleave(
const state_value prev,
246 const state_value next)
const
248 switch(m_state.substate)
251 case 1:
return state_value{prev.y, next.x};
253 __builtin_unreachable();
257 threefry_state_2 m_state;