53 #ifndef ROCRAND_THREEFRY2_IMPL_H_
54 #define ROCRAND_THREEFRY2_IMPL_H_
57 #define FQUALIFIERS __forceinline__ __device__
60 #include "rocrand/rocrand_threefry_common.h"
61 #include <rocrand/rocrand_common.h>
63 #ifndef THREEFRY2x32_DEFAULT_ROUNDS
64 #define THREEFRY2x32_DEFAULT_ROUNDS 20
67 #ifndef THREEFRY2x64_DEFAULT_ROUNDS
68 #define THREEFRY2x64_DEFAULT_ROUNDS 20
75 static constexpr __device__
int THREEFRY_ROTATION_32_2[8] = {13, 15, 26, 6, 17, 29, 16, 24};
83 static constexpr __device__
int THREEFRY_ROTATION_64_2[8] = {16, 42, 12, 31, 16, 32, 24, 21};
85 namespace rocrand_device
92 FQUALIFIERS int threefry_rotation_array<unsigned int>(
int index)
94 return THREEFRY_ROTATION_32_2[index];
98 FQUALIFIERS int threefry_rotation_array<unsigned long long>(
int index)
100 return THREEFRY_ROTATION_64_2[index];
103 template<
typename state_value,
typename value,
unsigned int Nrounds>
104 class threefry_engine2_base
107 struct threefry_state_2
112 unsigned int substate;
115 FQUALIFIERS void discard(
unsigned long long offset)
117 this->discard_impl(offset);
118 m_state.result = this->threefry_rounds(m_state.counter, m_state.key);
123 m_state.result = this->threefry_rounds(m_state.counter, m_state.key);
131 FQUALIFIERS void discard_subsequence(
unsigned long long subsequence)
133 this->discard_subsequence_impl(subsequence);
134 m_state.result = this->threefry_rounds(m_state.counter, m_state.key);
144 #if defined(__HIP_PLATFORM_AMD__)
145 value ret = m_state.result.data[m_state.substate];
147 value ret = (&m_state.result.x)[m_state.substate];
150 if(m_state.substate == 2)
152 m_state.substate = 0;
153 m_state.counter = this->bump_counter(m_state.counter);
154 m_state.result = this->threefry_rounds(m_state.counter, m_state.key);
161 state_value ret = m_state.result;
162 m_state.counter = this->bump_counter(m_state.counter);
163 m_state.result = this->threefry_rounds(m_state.counter, m_state.key);
165 return this->interleave(ret, m_state.result);
169 FQUALIFIERS static state_value threefry_rounds(state_value counter, state_value key)
174 static_assert(Nrounds <= 32,
"32 or less only supported in threefry rounds");
176 ks[2] = skein_ks_parity<value>();
191 for(
unsigned int round_idx = 0; round_idx < Nrounds; round_idx++)
194 X.y = rotl<value>(X.y, threefry_rotation_array<value>(round_idx & 7u));
197 if((round_idx & 3u) == 3)
199 unsigned int inject_idx = round_idx / 4;
201 X.x += ks[(1 + inject_idx) % 3];
202 X.y += ks[(2 + inject_idx) % 3];
203 X.y += 1 + inject_idx;
212 FQUALIFIERS void discard_impl(
unsigned long long offset)
215 m_state.substate += offset & 1;
216 unsigned long long counter_offset = offset / 2;
217 counter_offset += m_state.substate < 2 ? 0 : 1;
218 m_state.substate += m_state.substate < 2 ? 0 : -2;
220 this->discard_state(counter_offset);
224 FQUALIFIERS void discard_subsequence_impl(
unsigned long long subsequence)
226 m_state.counter.y += subsequence;
231 FQUALIFIERS void discard_state(
unsigned long long offset)
234 ::rocrand_device::detail::split_ull(lo, hi, offset);
236 value old_counter = m_state.counter.x;
237 m_state.counter.x += lo;
238 m_state.counter.y += hi + (m_state.counter.x < old_counter ? 1 : 0);
241 FQUALIFIERS static state_value bump_counter(state_value counter)
244 value add = counter.x == 0 ? 1 : 0;
249 FQUALIFIERS state_value interleave(
const state_value prev,
const state_value next)
const
251 switch(m_state.substate)
254 case 1:
return state_value{prev.y, next.x};
256 __builtin_unreachable();
260 threefry_state_2 m_state;
#define FQUALIFIERS
Shorthand for commonly used function qualifiers.
Definition: rocrand_uniform.h:31