21 #ifndef ROCRAND_XORWOW_H_ 
   22 #define ROCRAND_XORWOW_H_ 
   24 #include "rocrand/rocrand_common.h" 
   25 #include "rocrand/rocrand_xorwow_precomputed.h" 
   27 #include <hip/hip_runtime.h> 
   37  #define ROCRAND_XORWOW_DEFAULT_SEED 0ULL  
   40 namespace rocrand_device {
 
   43 __forceinline__ __device__ __host__ 
void copy_vec(
unsigned int* dst, 
const unsigned int* src)
 
   45     for (
int i = 0; i < XORWOW_N; i++)
 
   51 __forceinline__ __device__ __host__ 
void mul_mat_vec_inplace(
const unsigned int* m, 
unsigned int* v)
 
   53     unsigned int r[XORWOW_N] = { 0 };
 
   54     for (
int ij = 0; ij < XORWOW_N * XORWOW_M; ij++)
 
   56         const int i = ij / XORWOW_M;
 
   57         const int j = ij % XORWOW_M;
 
   58         const unsigned int b = (v[i] & (1U << j)) ? 0xffffffff : 0x0;
 
   59         for (
int k = 0; k < XORWOW_N; k++)
 
   61             r[k] ^= b & m[i * XORWOW_M * XORWOW_N + j * XORWOW_N + k];
 
   77     #ifndef ROCRAND_DETAIL_BM_NOT_IN_STATE 
   83         unsigned int boxmuller_float_state; 
 
   84         unsigned int boxmuller_double_state; 
 
   85         float boxmuller_float; 
 
   86         double boxmuller_double; 
 
   93     __forceinline__ __device__ __host__ xorwow_engine()
 
  102     __forceinline__ __device__ __host__ xorwow_engine(
const unsigned long long seed,
 
  103                                                       const unsigned long long subsequence,
 
  104                                                       const unsigned long long offset)
 
  106         m_state.x[0] = 123456789U;
 
  107         m_state.x[1] = 362436069U;
 
  108         m_state.x[2] = 521288629U;
 
  109         m_state.x[3] = 88675123U;
 
  110         m_state.x[4] = 5783321U;
 
  112         m_state.d = 6615241U;
 
  115         const unsigned int s0 = 
static_cast<unsigned int>(seed) ^ 0x2c7f967fU;
 
  116         const unsigned int s1 = 
static_cast<unsigned int>(seed >> 32) ^ 0xa03697cbU;
 
  117         const unsigned int t0 = 1228688033U * s0;
 
  118         const unsigned int t1 = 2073658381U * s1;
 
  124         m_state.d += t1 + t0;
 
  126         discard_subsequence(subsequence);
 
  129     #ifndef ROCRAND_DETAIL_BM_NOT_IN_STATE 
  130         m_state.boxmuller_float_state = 0;
 
  131         m_state.boxmuller_double_state = 0;
 
  136     __forceinline__ __device__ __host__ 
void discard(
unsigned long long offset)
 
  138         #ifdef __HIP_DEVICE_COMPILE__ 
  139         jump(offset, d_xorwow_jump_matrices);
 
  141         jump(offset, h_xorwow_jump_matrices);
 
  145         m_state.d += 
static_cast<unsigned int>(offset) * 362437;
 
  150     __forceinline__ __device__ __host__ 
void discard_subsequence(
unsigned long long subsequence)
 
  153         #ifdef __HIP_DEVICE_COMPILE__ 
  154         jump(subsequence, d_xorwow_sequence_jump_matrices);
 
  156         jump(subsequence, h_xorwow_sequence_jump_matrices);
 
  162     __forceinline__ __device__ __host__ 
unsigned int operator()()
 
  167     __forceinline__ __device__ __host__ 
unsigned int next()
 
  169         const unsigned int t = m_state.x[0] ^ (m_state.x[0] >> 2);
 
  170         m_state.x[0] = m_state.x[1];
 
  171         m_state.x[1] = m_state.x[2];
 
  172         m_state.x[2] = m_state.x[3];
 
  173         m_state.x[3] = m_state.x[4];
 
  174         m_state.x[4] = (m_state.x[4] ^ (m_state.x[4] << 4)) ^ (t ^ (t << 1));
 
  178         return m_state.d + m_state.x[4];
 
  182     __forceinline__ __device__ __host__ 
void 
  183         jump(
unsigned long long v,
 
  184              const unsigned int jump_matrices[XORWOW_JUMP_MATRICES][XORWOW_SIZE])
 
  202             const unsigned int is = 
static_cast<unsigned int>(v) & ((1 << XORWOW_JUMP_LOG2) - 1);
 
  203             for (
unsigned int i = 0; i < is; i++)
 
  205                 detail::mul_mat_vec_inplace(jump_matrices[mi], m_state.x);
 
  208             v >>= XORWOW_JUMP_LOG2;
 
  214     xorwow_state m_state;
 
  216     #ifndef ROCRAND_DETAIL_BM_NOT_IN_STATE 
  217     friend struct detail::engine_boxmuller_helper<xorwow_engine>;
 
  230 typedef rocrand_device::xorwow_engine rocrand_state_xorwow;
 
  244 __forceinline__ __device__ __host__
 
  246                   const unsigned long long subsequence,
 
  247                   const unsigned long long offset,
 
  248                   rocrand_state_xorwow*    state)
 
  250     *state = rocrand_state_xorwow(seed, subsequence, offset);
 
  265 __forceinline__ __device__ __host__
 
  266 unsigned int rocrand(rocrand_state_xorwow* state)
 
  268     return state->next();
 
  279 __forceinline__ __device__ __host__
 
  280 void skipahead(
unsigned long long offset, rocrand_state_xorwow* state)
 
  282     return state->discard(offset);
 
  294 __forceinline__ __device__ __host__
 
  297     return state->discard_subsequence(subsequence);
 
  309 __forceinline__ __device__ __host__
 
  312     return state->discard_subsequence(sequence);
 
__forceinline__ __device__ __host__ unsigned int rocrand(rocrand_state_xorwow *state)
Returns uniformly distributed random unsigned int value from [0; 2^32 - 1] range.
Definition: rocrand_xorwow.h:266
 
__forceinline__ __device__ __host__ void skipahead_sequence(unsigned long long sequence, rocrand_state_xorwow *state)
Updates XORWOW state to skip ahead by sequence sequences.
Definition: rocrand_xorwow.h:310
 
__forceinline__ __device__ __host__ void skipahead(unsigned long long offset, rocrand_state_xorwow *state)
Updates XORWOW state to skip ahead by offset elements.
Definition: rocrand_xorwow.h:280
 
__forceinline__ __device__ __host__ void rocrand_init(const unsigned long long seed, const unsigned long long subsequence, const unsigned long long offset, rocrand_state_xorwow *state)
Initialize XORWOW state.
Definition: rocrand_xorwow.h:245
 
__forceinline__ __device__ __host__ void skipahead_subsequence(unsigned long long subsequence, rocrand_state_xorwow *state)
Updates XORWOW state to skip ahead by subsequence subsequences.
Definition: rocrand_xorwow.h:295
 
#define ROCRAND_XORWOW_DEFAULT_SEED
Default seed for XORWOW PRNG.
Definition: rocrand_xorwow.h:37