53 #ifndef ROCRAND_PHILOX4X32_10_H_
54 #define ROCRAND_PHILOX4X32_10_H_
57 #define FQUALIFIERS __forceinline__ __device__
60 #include "rocrand/rocrand_common.h"
64 #define ROCRAND_PHILOX_M4x32_0 0xD2511F53U
65 #define ROCRAND_PHILOX_M4x32_1 0xCD9E8D57U
66 #define ROCRAND_PHILOX_W32_0 0x9E3779B9U
67 #define ROCRAND_PHILOX_W32_1 0xBB67AE85U
77 #define ROCRAND_PHILOX4x32_DEFAULT_SEED 0xdeadbeefdeadbeefULL
80 namespace rocrand_device {
84 unsigned int mulhilo32(
unsigned int x,
unsigned int y,
unsigned int& z)
86 unsigned long long xy = mad_u64_u32(x, y, 0);
87 z =
static_cast<unsigned int>(xy >> 32);
88 return static_cast<unsigned int>(xy);
93 class philox4x32_10_engine
96 struct philox4x32_10_state
101 unsigned int substate;
103 #ifndef ROCRAND_DETAIL_PHILOX_BM_NOT_IN_STATE
109 unsigned int boxmuller_float_state;
110 unsigned int boxmuller_double_state;
111 float boxmuller_float;
112 double boxmuller_double;
117 philox4x32_10_engine()
128 philox4x32_10_engine(
const unsigned long long seed,
129 const unsigned long long subsequence,
130 const unsigned long long offset)
132 this->seed(seed, subsequence, offset);
141 void seed(
unsigned long long seed_value,
142 const unsigned long long subsequence,
143 const unsigned long long offset)
145 m_state.key.x =
static_cast<unsigned int>(seed_value);
146 m_state.key.y =
static_cast<unsigned int>(seed_value >> 32);
147 this->restart(subsequence, offset);
152 void discard(
unsigned long long offset)
154 this->discard_impl(offset);
155 this->m_state.result = this->ten_rounds(m_state.counter, m_state.key);
163 void discard_subsequence(
unsigned long long subsequence)
165 this->discard_subsequence_impl(subsequence);
166 m_state.result = this->ten_rounds(m_state.counter, m_state.key);
170 void restart(
const unsigned long long subsequence,
171 const unsigned long long offset)
173 m_state.counter = {0, 0, 0, 0};
174 m_state.result = {0, 0, 0, 0};
175 m_state.substate = 0;
176 #ifndef ROCRAND_DETAIL_PHILOX_BM_NOT_IN_STATE
177 m_state.boxmuller_float_state = 0;
178 m_state.boxmuller_double_state = 0;
180 this->discard_subsequence_impl(subsequence);
181 this->discard_impl(offset);
182 m_state.result = this->ten_rounds(m_state.counter, m_state.key);
186 unsigned int operator()()
194 #if defined(__HIP_PLATFORM_AMD__)
195 unsigned int ret = m_state.result.data[m_state.substate];
197 unsigned int ret = (&m_state.result.x)[m_state.substate];
200 if(m_state.substate == 4)
202 m_state.substate = 0;
203 this->discard_state();
204 m_state.result = this->ten_rounds(m_state.counter, m_state.key);
212 uint4 ret = m_state.result;
213 this->discard_state();
214 m_state.result = this->ten_rounds(m_state.counter, m_state.key);
215 return this->interleave(ret, m_state.result);
222 void discard_impl(
unsigned long long offset)
225 m_state.substate += offset & 3;
226 unsigned long long counter_offset = offset / 4;
227 counter_offset += m_state.substate < 4 ? 0 : 1;
228 m_state.substate += m_state.substate < 4 ? 0 : -4;
230 this->discard_state(counter_offset);
235 void discard_subsequence_impl(
unsigned long long subsequence)
237 unsigned int lo =
static_cast<unsigned int>(subsequence);
238 unsigned int hi =
static_cast<unsigned int>(subsequence >> 32);
240 unsigned int temp = m_state.counter.z;
241 m_state.counter.z += lo;
242 m_state.counter.w += hi + (m_state.counter.z < temp ? 1 : 0);
248 void discard_state(
unsigned long long offset)
250 unsigned int lo =
static_cast<unsigned int>(offset);
251 unsigned int hi =
static_cast<unsigned int>(offset >> 32);
253 uint4 temp = m_state.counter;
254 m_state.counter.x += lo;
255 m_state.counter.y += hi + (m_state.counter.x < temp.x ? 1 : 0);
256 m_state.counter.z += (m_state.counter.y < temp.y ? 1 : 0);
257 m_state.counter.w += (m_state.counter.z < temp.z ? 1 : 0);
265 m_state.counter = this->bump_counter(m_state.counter);
269 static uint4 bump_counter(uint4 counter)
272 unsigned int add = counter.x == 0 ? 1 : 0;
273 counter.y += add; add = counter.y == 0 ? add : 0;
274 counter.z += add; add = counter.z == 0 ? add : 0;
280 uint4 interleave(
const uint4 prev,
const uint4 next)
const
282 switch(m_state.substate)
287 return uint4{ prev.y, prev.z, prev.w, next.x };
289 return uint4{ prev.z, prev.w, next.x, next.y };
291 return uint4{ prev.w, next.x, next.y, next.z };
293 __builtin_unreachable();
298 uint4 ten_rounds(uint4 counter, uint2 key)
300 counter = this->single_round(counter, key); key = this->bumpkey(key);
301 counter = this->single_round(counter, key); key = this->bumpkey(key);
302 counter = this->single_round(counter, key); key = this->bumpkey(key);
303 counter = this->single_round(counter, key); key = this->bumpkey(key);
304 counter = this->single_round(counter, key); key = this->bumpkey(key);
305 counter = this->single_round(counter, key); key = this->bumpkey(key);
306 counter = this->single_round(counter, key); key = this->bumpkey(key);
307 counter = this->single_round(counter, key); key = this->bumpkey(key);
308 counter = this->single_round(counter, key); key = this->bumpkey(key);
309 return this->single_round(counter, key);
315 static uint4 single_round(uint4 counter, uint2 key)
320 unsigned int lo0 = detail::mulhilo32(ROCRAND_PHILOX_M4x32_0, counter.x, hi0);
321 unsigned int lo1 = detail::mulhilo32(ROCRAND_PHILOX_M4x32_1, counter.z, hi1);
323 hi1 ^ counter.y ^ key.x,
325 hi0 ^ counter.w ^ key.y,
331 static uint2 bumpkey(uint2 key)
333 key.x += ROCRAND_PHILOX_W32_0;
334 key.y += ROCRAND_PHILOX_W32_1;
340 philox4x32_10_state m_state;
342 #ifndef ROCRAND_DETAIL_PHILOX_BM_NOT_IN_STATE
343 friend struct detail::engine_boxmuller_helper<philox4x32_10_engine>;
356 typedef rocrand_device::philox4x32_10_engine rocrand_state_philox4x32_10;
372 const unsigned long long subsequence,
373 const unsigned long long offset,
374 rocrand_state_philox4x32_10 * state)
376 *state = rocrand_state_philox4x32_10(seed, subsequence, offset);
392 unsigned int rocrand(rocrand_state_philox4x32_10 * state)
394 return state->next();
410 uint4
rocrand4(rocrand_state_philox4x32_10 * state)
412 return state->next4();
424 void skipahead(
unsigned long long offset, rocrand_state_philox4x32_10 * state)
426 return state->discard(offset);
441 return state->discard_subsequence(subsequence);
456 return state->discard_subsequence(sequence);
FQUALIFIERS unsigned int rocrand(rocrand_state_philox4x32_10 *state)
Returns uniformly distributed random unsigned int value from [0; 2^32 - 1] range.
Definition: rocrand_philox4x32_10.h:392
#define ROCRAND_PHILOX4x32_DEFAULT_SEED
Default seed for PHILOX4x32 PRNG.
Definition: rocrand_philox4x32_10.h:77
FQUALIFIERS void skipahead_subsequence(unsigned long long subsequence, rocrand_state_philox4x32_10 *state)
Updates Philox state to skip ahead by subsequence subsequences.
Definition: rocrand_philox4x32_10.h:439
FQUALIFIERS void skipahead_sequence(unsigned long long sequence, rocrand_state_philox4x32_10 *state)
Updates Philox state to skip ahead by sequence sequences.
Definition: rocrand_philox4x32_10.h:454
FQUALIFIERS uint4 rocrand4(rocrand_state_philox4x32_10 *state)
Returns four uniformly distributed random unsigned int values from [0; 2^32 - 1] range.
Definition: rocrand_philox4x32_10.h:410
FQUALIFIERS void rocrand_init(const unsigned long long seed, const unsigned long long subsequence, const unsigned long long offset, rocrand_state_philox4x32_10 *state)
Initializes Philox state.
Definition: rocrand_philox4x32_10.h:371
FQUALIFIERS void skipahead(unsigned long long offset, rocrand_state_philox4x32_10 *state)
Updates Philox state to skip ahead by offset elements.
Definition: rocrand_philox4x32_10.h:424
#define FQUALIFIERS
Shorthand for commonly used function qualifiers.
Definition: rocrand_uniform.h:31