21 #ifndef ROCRAND_XORWOW_H_
22 #define ROCRAND_XORWOW_H_
24 #include "rocrand/rocrand_common.h"
25 #include "rocrand/rocrand_xorwow_precomputed.h"
35 #define ROCRAND_XORWOW_DEFAULT_SEED 0ULL
38 namespace rocrand_device {
41 __forceinline__ __device__ __host__
void copy_vec(
unsigned int* dst,
const unsigned int* src)
43 for (
int i = 0; i < XORWOW_N; i++)
49 __forceinline__ __device__ __host__
void mul_mat_vec_inplace(
const unsigned int* m,
unsigned int* v)
51 unsigned int r[XORWOW_N] = { 0 };
52 for (
int ij = 0; ij < XORWOW_N * XORWOW_M; ij++)
54 const int i = ij / XORWOW_M;
55 const int j = ij % XORWOW_M;
56 const unsigned int b = (v[i] & (1U << j)) ? 0xffffffff : 0x0;
57 for (
int k = 0; k < XORWOW_N; k++)
59 r[k] ^= b & m[i * XORWOW_M * XORWOW_N + j * XORWOW_N + k];
75 #ifndef ROCRAND_DETAIL_BM_NOT_IN_STATE
81 unsigned int boxmuller_float_state;
82 unsigned int boxmuller_double_state;
83 float boxmuller_float;
84 double boxmuller_double;
91 __forceinline__ __device__ __host__ xorwow_engine()
100 __forceinline__ __device__ __host__ xorwow_engine(
const unsigned long long seed,
101 const unsigned long long subsequence,
102 const unsigned long long offset)
104 m_state.x[0] = 123456789U;
105 m_state.x[1] = 362436069U;
106 m_state.x[2] = 521288629U;
107 m_state.x[3] = 88675123U;
108 m_state.x[4] = 5783321U;
110 m_state.d = 6615241U;
113 const unsigned int s0 =
static_cast<unsigned int>(seed) ^ 0x2c7f967fU;
114 const unsigned int s1 =
static_cast<unsigned int>(seed >> 32) ^ 0xa03697cbU;
115 const unsigned int t0 = 1228688033U * s0;
116 const unsigned int t1 = 2073658381U * s1;
122 m_state.d += t1 + t0;
124 discard_subsequence(subsequence);
127 #ifndef ROCRAND_DETAIL_BM_NOT_IN_STATE
128 m_state.boxmuller_float_state = 0;
129 m_state.boxmuller_double_state = 0;
134 __forceinline__ __device__ __host__
void discard(
unsigned long long offset)
136 #ifdef __HIP_DEVICE_COMPILE__
137 jump(offset, d_xorwow_jump_matrices);
139 jump(offset, h_xorwow_jump_matrices);
143 m_state.d +=
static_cast<unsigned int>(offset) * 362437;
148 __forceinline__ __device__ __host__
void discard_subsequence(
unsigned long long subsequence)
151 #ifdef __HIP_DEVICE_COMPILE__
152 jump(subsequence, d_xorwow_sequence_jump_matrices);
154 jump(subsequence, h_xorwow_sequence_jump_matrices);
160 __forceinline__ __device__ __host__
unsigned int operator()()
165 __forceinline__ __device__ __host__
unsigned int next()
167 const unsigned int t = m_state.x[0] ^ (m_state.x[0] >> 2);
168 m_state.x[0] = m_state.x[1];
169 m_state.x[1] = m_state.x[2];
170 m_state.x[2] = m_state.x[3];
171 m_state.x[3] = m_state.x[4];
172 m_state.x[4] = (m_state.x[4] ^ (m_state.x[4] << 4)) ^ (t ^ (t << 1));
176 return m_state.d + m_state.x[4];
180 __forceinline__ __device__ __host__
void
181 jump(
unsigned long long v,
182 const unsigned int jump_matrices[XORWOW_JUMP_MATRICES][XORWOW_SIZE])
200 const unsigned int is =
static_cast<unsigned int>(v) & ((1 << XORWOW_JUMP_LOG2) - 1);
201 for (
unsigned int i = 0; i < is; i++)
203 detail::mul_mat_vec_inplace(jump_matrices[mi], m_state.x);
206 v >>= XORWOW_JUMP_LOG2;
212 xorwow_state m_state;
214 #ifndef ROCRAND_DETAIL_BM_NOT_IN_STATE
215 friend struct detail::engine_boxmuller_helper<xorwow_engine>;
228 typedef rocrand_device::xorwow_engine rocrand_state_xorwow;
242 __forceinline__ __device__ __host__
void rocrand_init(
const unsigned long long seed,
243 const unsigned long long subsequence,
244 const unsigned long long offset,
245 rocrand_state_xorwow* state)
247 *state = rocrand_state_xorwow(seed, subsequence, offset);
262 __forceinline__ __device__ __host__
unsigned int rocrand(rocrand_state_xorwow* state)
264 return state->next();
275 __forceinline__ __device__ __host__
void skipahead(
unsigned long long offset,
276 rocrand_state_xorwow* state)
278 return state->discard(offset);
291 rocrand_state_xorwow* state)
293 return state->discard_subsequence(subsequence);
306 rocrand_state_xorwow* state)
308 return state->discard_subsequence(sequence);
__forceinline__ __device__ __host__ unsigned int rocrand(rocrand_state_xorwow *state)
Returns uniformly distributed random unsigned int value from [0; 2^32 - 1] range.
Definition: rocrand_xorwow.h:262
__forceinline__ __device__ __host__ void skipahead_sequence(unsigned long long sequence, rocrand_state_xorwow *state)
Updates XORWOW state to skip ahead by sequence sequences.
Definition: rocrand_xorwow.h:305
__forceinline__ __device__ __host__ void skipahead(unsigned long long offset, rocrand_state_xorwow *state)
Updates XORWOW state to skip ahead by offset elements.
Definition: rocrand_xorwow.h:275
__forceinline__ __device__ __host__ void rocrand_init(const unsigned long long seed, const unsigned long long subsequence, const unsigned long long offset, rocrand_state_xorwow *state)
Initialize XORWOW state.
Definition: rocrand_xorwow.h:242
__forceinline__ __device__ __host__ void skipahead_subsequence(unsigned long long subsequence, rocrand_state_xorwow *state)
Updates XORWOW state to skip ahead by subsequence subsequences.
Definition: rocrand_xorwow.h:290
#define ROCRAND_XORWOW_DEFAULT_SEED
Default seed for XORWOW PRNG.
Definition: rocrand_xorwow.h:35