21 #ifndef ROCRAND_XORWOW_H_ 
   22 #define ROCRAND_XORWOW_H_ 
   24 #include "rocrand/rocrand_common.h" 
   25 #include "rocrand/rocrand_xorwow_precomputed.h" 
   35  #define ROCRAND_XORWOW_DEFAULT_SEED 0ULL  
   38 namespace rocrand_device {
 
   41 __forceinline__ __device__ __host__ 
void copy_vec(
unsigned int* dst, 
const unsigned int* src)
 
   43     for (
int i = 0; i < XORWOW_N; i++)
 
   49 __forceinline__ __device__ __host__ 
void mul_mat_vec_inplace(
const unsigned int* m, 
unsigned int* v)
 
   51     unsigned int r[XORWOW_N] = { 0 };
 
   52     for (
int ij = 0; ij < XORWOW_N * XORWOW_M; ij++)
 
   54         const int i = ij / XORWOW_M;
 
   55         const int j = ij % XORWOW_M;
 
   56         const unsigned int b = (v[i] & (1U << j)) ? 0xffffffff : 0x0;
 
   57         for (
int k = 0; k < XORWOW_N; k++)
 
   59             r[k] ^= b & m[i * XORWOW_M * XORWOW_N + j * XORWOW_N + k];
 
   75     #ifndef ROCRAND_DETAIL_BM_NOT_IN_STATE 
   81         unsigned int boxmuller_float_state; 
 
   82         unsigned int boxmuller_double_state; 
 
   83         float boxmuller_float; 
 
   84         double boxmuller_double; 
 
   91     __forceinline__ __device__ __host__ xorwow_engine()
 
  100     __forceinline__ __device__ __host__ xorwow_engine(
const unsigned long long seed,
 
  101                                                       const unsigned long long subsequence,
 
  102                                                       const unsigned long long offset)
 
  104         m_state.x[0] = 123456789U;
 
  105         m_state.x[1] = 362436069U;
 
  106         m_state.x[2] = 521288629U;
 
  107         m_state.x[3] = 88675123U;
 
  108         m_state.x[4] = 5783321U;
 
  110         m_state.d = 6615241U;
 
  113         const unsigned int s0 = 
static_cast<unsigned int>(seed) ^ 0x2c7f967fU;
 
  114         const unsigned int s1 = 
static_cast<unsigned int>(seed >> 32) ^ 0xa03697cbU;
 
  115         const unsigned int t0 = 1228688033U * s0;
 
  116         const unsigned int t1 = 2073658381U * s1;
 
  122         m_state.d += t1 + t0;
 
  124         discard_subsequence(subsequence);
 
  127     #ifndef ROCRAND_DETAIL_BM_NOT_IN_STATE 
  128         m_state.boxmuller_float_state = 0;
 
  129         m_state.boxmuller_double_state = 0;
 
  134     __forceinline__ __device__ __host__ 
void discard(
unsigned long long offset)
 
  136         #ifdef __HIP_DEVICE_COMPILE__ 
  137         jump(offset, d_xorwow_jump_matrices);
 
  139         jump(offset, h_xorwow_jump_matrices);
 
  143         m_state.d += 
static_cast<unsigned int>(offset) * 362437;
 
  148     __forceinline__ __device__ __host__ 
void discard_subsequence(
unsigned long long subsequence)
 
  151         #ifdef __HIP_DEVICE_COMPILE__ 
  152         jump(subsequence, d_xorwow_sequence_jump_matrices);
 
  154         jump(subsequence, h_xorwow_sequence_jump_matrices);
 
  160     __forceinline__ __device__ __host__ 
unsigned int operator()()
 
  165     __forceinline__ __device__ __host__ 
unsigned int next()
 
  167         const unsigned int t = m_state.x[0] ^ (m_state.x[0] >> 2);
 
  168         m_state.x[0] = m_state.x[1];
 
  169         m_state.x[1] = m_state.x[2];
 
  170         m_state.x[2] = m_state.x[3];
 
  171         m_state.x[3] = m_state.x[4];
 
  172         m_state.x[4] = (m_state.x[4] ^ (m_state.x[4] << 4)) ^ (t ^ (t << 1));
 
  176         return m_state.d + m_state.x[4];
 
  180     __forceinline__ __device__ __host__ 
void 
  181         jump(
unsigned long long v,
 
  182              const unsigned int jump_matrices[XORWOW_JUMP_MATRICES][XORWOW_SIZE])
 
  200             const unsigned int is = 
static_cast<unsigned int>(v) & ((1 << XORWOW_JUMP_LOG2) - 1);
 
  201             for (
unsigned int i = 0; i < is; i++)
 
  203                 detail::mul_mat_vec_inplace(jump_matrices[mi], m_state.x);
 
  206             v >>= XORWOW_JUMP_LOG2;
 
  212     xorwow_state m_state;
 
  214     #ifndef ROCRAND_DETAIL_BM_NOT_IN_STATE 
  215     friend struct detail::engine_boxmuller_helper<xorwow_engine>;
 
  228 typedef rocrand_device::xorwow_engine rocrand_state_xorwow;
 
  242 __forceinline__ __device__ __host__ 
void rocrand_init(
const unsigned long long seed,
 
  243                                                       const unsigned long long subsequence,
 
  244                                                       const unsigned long long offset,
 
  245                                                       rocrand_state_xorwow*    state)
 
  247     *state = rocrand_state_xorwow(seed, subsequence, offset);
 
  262 __forceinline__ __device__ __host__ 
unsigned int rocrand(rocrand_state_xorwow* state)
 
  264     return state->next();
 
  275 __forceinline__ __device__ __host__ 
void skipahead(
unsigned long long    offset,
 
  276                                                    rocrand_state_xorwow* state)
 
  278     return state->discard(offset);
 
  291                                                                rocrand_state_xorwow* state)
 
  293     return state->discard_subsequence(subsequence);
 
  306                                                             rocrand_state_xorwow* state)
 
  308     return state->discard_subsequence(sequence);
 
__forceinline__ __device__ __host__ unsigned int rocrand(rocrand_state_xorwow *state)
Returns uniformly distributed random unsigned int value from [0; 2^32 - 1] range.
Definition: rocrand_xorwow.h:262
 
__forceinline__ __device__ __host__ void skipahead_sequence(unsigned long long sequence, rocrand_state_xorwow *state)
Updates XORWOW state to skip ahead by sequence sequences.
Definition: rocrand_xorwow.h:305
 
__forceinline__ __device__ __host__ void skipahead(unsigned long long offset, rocrand_state_xorwow *state)
Updates XORWOW state to skip ahead by offset elements.
Definition: rocrand_xorwow.h:275
 
__forceinline__ __device__ __host__ void rocrand_init(const unsigned long long seed, const unsigned long long subsequence, const unsigned long long offset, rocrand_state_xorwow *state)
Initialize XORWOW state.
Definition: rocrand_xorwow.h:242
 
__forceinline__ __device__ __host__ void skipahead_subsequence(unsigned long long subsequence, rocrand_state_xorwow *state)
Updates XORWOW state to skip ahead by subsequence subsequences.
Definition: rocrand_xorwow.h:290
 
#define ROCRAND_XORWOW_DEFAULT_SEED
Default seed for XORWOW PRNG.
Definition: rocrand_xorwow.h:35