55 #ifndef ROCRAND_MTGP32_H_ 
   56 #define ROCRAND_MTGP32_H_ 
   60 #include "rocrand/rocrand.h" 
   61 #include "rocrand/rocrand_common.h" 
   63 #define MTGP_MEXP 11213 
   65 #define MTGP_FLOOR_2P 256 
   66 #define MTGP_CEIL_2P 512 
   67 #define MTGP_TN MTGP_FLOOR_2P 
   68 #define MTGP_LS (MTGP_TN * 3) 
   69 #define MTGP_BN_MAX 512 
   71 #define MTGP_STATE 1024 
   72 #define MTGP_MASK 1023 
  107 namespace rocrand_device {
 
  111     unsigned int pos_tbl[MTGP_BN_MAX];
 
  112     unsigned int param_tbl[MTGP_BN_MAX][MTGP_TS];
 
  113     unsigned int temper_tbl[MTGP_BN_MAX][MTGP_TS];
 
  114     unsigned int single_temper_tbl[MTGP_BN_MAX][MTGP_TS];
 
  115     unsigned int sh1_tbl[MTGP_BN_MAX];
 
  116     unsigned int sh2_tbl[MTGP_BN_MAX];
 
  117     unsigned int mask[1];
 
  126     unsigned int status[MTGP_STATE];
 
  130 void rocrand_mtgp32_init_state(
unsigned int array[],
 
  131                                const mtgp32_fast_params *para, 
unsigned int seed)
 
  134     int size = para->mexp / 32 + 1;
 
  135     unsigned int hidden_seed;
 
  137     hidden_seed = para->tbl[4] ^ (para->tbl[8] << 16);
 
  141     memset(array, tmp & 0xff, 
sizeof(
unsigned int) * size);
 
  143     array[1] = hidden_seed;
 
  144     for (i = 1; i < size; i++)
 
  145         array[i] ^= (1812433253) * (array[i - 1] ^ (array[i - 1] >> 30)) + i;
 
  151     __forceinline__ __device__ __host__
 
  158     __forceinline__ __device__ __host__ mtgp32_engine(
const mtgp32_state&  m_state,
 
  159                                                       const mtgp32_params* params,
 
  162         this->m_state = m_state;
 
  163         pos_tbl = params->pos_tbl[bid];
 
  164         sh1_tbl = params->sh1_tbl[bid];
 
  165         sh2_tbl = params->sh2_tbl[bid];
 
  166         mask = params->mask[0];
 
  167         for (
int j = 0; j < MTGP_TS; j++) {
 
  168             param_tbl[j] = params->param_tbl[bid][j];
 
  169             temper_tbl[j] = params->temper_tbl[bid][j];
 
  170             single_temper_tbl[j] = params->single_temper_tbl[bid][j];
 
  174     __forceinline__ __device__ __host__ 
void copy(
const mtgp32_engine* m_engine)
 
  176 #if defined(__HIP_DEVICE_COMPILE__) 
  177         const unsigned int thread_id = threadIdx.x;
 
  178         for(
int i = thread_id; i < MTGP_STATE; i += blockDim.x)
 
  179             m_state.status[i] = m_engine->m_state.status[i];
 
  183             m_state.offset = m_engine->m_state.offset;
 
  184             m_state.id = m_engine->m_state.id;
 
  185             pos_tbl = m_engine->pos_tbl;
 
  186             sh1_tbl = m_engine->sh1_tbl;
 
  187             sh2_tbl = m_engine->sh2_tbl;
 
  188             mask = m_engine->mask;
 
  190         if (thread_id < MTGP_TS)
 
  192             param_tbl[thread_id] = m_engine->param_tbl[thread_id];
 
  193             temper_tbl[thread_id] = m_engine->temper_tbl[thread_id];
 
  194             single_temper_tbl[thread_id] = m_engine->single_temper_tbl[thread_id];
 
  198         this->m_state = m_engine->m_state;
 
  199         pos_tbl = m_engine->pos_tbl;
 
  200         sh1_tbl = m_engine->sh1_tbl;
 
  201         sh2_tbl = m_engine->sh2_tbl;
 
  202         mask = m_engine->mask;
 
  203         for (
int j = 0; j < MTGP_TS; j++) {
 
  204             param_tbl[j] = m_engine->param_tbl[j];
 
  205             temper_tbl[j] = m_engine->temper_tbl[j];
 
  206             single_temper_tbl[j] = m_engine->single_temper_tbl[j];
 
  211     __forceinline__ __device__ __host__ 
void set_params(mtgp32_params* params)
 
  213         pos_tbl = params->pos_tbl[m_state.id];
 
  214         sh1_tbl = params->sh1_tbl[m_state.id];
 
  215         sh2_tbl = params->sh2_tbl[m_state.id];
 
  216         mask = params->mask[0];
 
  217         for (
int j = 0; j < MTGP_TS; j++) {
 
  218             param_tbl[j] = params->param_tbl[m_state.id][j];
 
  219             temper_tbl[j] = params->temper_tbl[m_state.id][j];
 
  220             single_temper_tbl[j] = params->single_temper_tbl[m_state.id][j];
 
  224     __forceinline__ __device__ __host__ 
unsigned int operator()()
 
  229     __forceinline__ __device__ __host__ 
unsigned int next()
 
  231 #ifdef __HIP_DEVICE_COMPILE__ 
  232         unsigned int o = next_thread(threadIdx.x);
 
  236             m_state.offset = (m_state.offset + blockDim.x) & MTGP_MASK;
 
  245     __forceinline__ __device__ __host__ 
unsigned int next_single()
 
  247 #if defined(__HIP_DEVICE_COMPILE__) 
  248         unsigned int t   = threadIdx.x;
 
  249         unsigned int d   = blockDim.x;
 
  254         r = para_rec(m_state.status[(t + m_state.offset) & MTGP_MASK],
 
  255                      m_state.status[(t + m_state.offset + 1) & MTGP_MASK],
 
  256                      m_state.status[(t + m_state.offset + pos) & MTGP_MASK]);
 
  257         m_state.status[(t + m_state.offset + MTGP_N) & MTGP_MASK] = r;
 
  259         o = temper_single(r, m_state.status[(t + m_state.offset + pos - 1) & MTGP_MASK]);
 
  262             m_state.offset = (m_state.offset + d) & MTGP_MASK;
 
  271     __forceinline__ __device__ __host__ 
unsigned int 
  272         para_rec(
unsigned int X1, 
unsigned int X2, 
unsigned int Y)
 const 
  274         unsigned int X = (X1 & mask) ^ X2;
 
  278         Y = X ^ (Y >> sh2_tbl);
 
  279         MAT = param_tbl[Y & 0x0f];
 
  283     __forceinline__ __device__ __host__ 
unsigned int temper(
unsigned int V, 
unsigned int T)
 const 
  289         MAT = temper_tbl[T & 0x0f];
 
  293     __forceinline__ __device__ __host__ 
unsigned int temper_single(
unsigned int V,
 
  294                                                                    unsigned int T)
 const 
  301         MAT = single_temper_tbl[T & 0x0f];
 
  309     __forceinline__ __device__ __host__ 
unsigned int next_thread(
unsigned int thread_idx)
 
  312             = para_rec(m_state.status[(thread_idx + m_state.offset) & MTGP_MASK],
 
  313                        m_state.status[(thread_idx + m_state.offset + 1) & MTGP_MASK],
 
  314                        m_state.status[(thread_idx + m_state.offset + pos_tbl) & MTGP_MASK]);
 
  315         m_state.status[(thread_idx + m_state.offset + MTGP_N) & MTGP_MASK] = r;
 
  316         return temper(r, m_state.status[(thread_idx + m_state.offset + pos_tbl - 1) & MTGP_MASK]);
 
  321     mtgp32_state m_state;
 
  323     unsigned int pos_tbl;
 
  324     unsigned int param_tbl[MTGP_TS];
 
  325     unsigned int temper_tbl[MTGP_TS];
 
  326     unsigned int sh1_tbl;
 
  327     unsigned int sh2_tbl;
 
  328     unsigned int single_temper_tbl[MTGP_TS];
 
  341 typedef rocrand_device::mtgp32_engine rocrand_state_mtgp32;
 
  342 typedef rocrand_device::mtgp32_state mtgp32_state;
 
  344 typedef rocrand_device::mtgp32_params mtgp32_params;
 
  363                                                          mtgp32_fast_params    params[],
 
  365                                                          unsigned long long    seed)
 
  368     rocrand_state_mtgp32 * h_state = (rocrand_state_mtgp32 *) malloc(
sizeof(rocrand_state_mtgp32) * n);
 
  369     seed = seed ^ (seed >> 32);
 
  374     for (i = 0; i < n; i++) {
 
  375         rocrand_device::rocrand_mtgp32_init_state(&(h_state[i].m_state.status[0]), ¶ms[i], (
unsigned int)seed + i + 1);
 
  376         h_state[i].m_state.offset = 0;
 
  377         h_state[i].m_state.id = i;
 
  378         h_state[i].pos_tbl = params[i].pos;
 
  379         h_state[i].sh1_tbl = params[i].sh1;
 
  380         h_state[i].sh2_tbl = params[i].sh2;
 
  381         h_state[i].mask = params[0].mask;
 
  382         for (
int j = 0; j < MTGP_TS; j++) {
 
  383             h_state[i].param_tbl[j] = params[i].tbl[j];
 
  384             h_state[i].temper_tbl[j] = params[i].tmp_tbl[j];
 
  385             h_state[i].single_temper_tbl[j] = params[i].flt_tmp_tbl[j];
 
  389     const hipError_t error
 
  390         = hipMemcpy(state, h_state, 
sizeof(rocrand_state_mtgp32) * n, hipMemcpyDefault);
 
  393     if(error != hipSuccess)
 
  418     const int block_num = MTGP_BN_MAX;
 
  419     const int size1 = 
sizeof(uint32_t) * block_num;
 
  420     const int size2 = 
sizeof(uint32_t) * block_num * MTGP_TS;
 
  424     uint32_t *h_param_tbl;
 
  425     uint32_t *h_temper_tbl;
 
  426     uint32_t *h_single_temper_tbl;
 
  428     h_pos_tbl = (uint32_t *)malloc(size1);
 
  429     h_sh1_tbl = (uint32_t *)malloc(size1);
 
  430     h_sh2_tbl = (uint32_t *)malloc(size1);
 
  431     h_param_tbl = (uint32_t *)malloc(size2);
 
  432     h_temper_tbl = (uint32_t *)malloc(size2);
 
  433     h_single_temper_tbl = (uint32_t *)malloc(size2);
 
  434     h_mask = (uint32_t *)malloc(
sizeof(uint32_t));
 
  437     if (h_pos_tbl == NULL || h_sh1_tbl == NULL || h_sh2_tbl == NULL
 
  438         || h_param_tbl == NULL || h_temper_tbl == NULL || h_single_temper_tbl == NULL
 
  440         printf(
"failure in allocating host memory for constant table.\n");
 
  444         h_mask[0] = params[0].mask;
 
  445         for (
int i = 0; i < block_num; i++) {
 
  446             h_pos_tbl[i] = params[i].pos;
 
  447             h_sh1_tbl[i] = params[i].sh1;
 
  448             h_sh2_tbl[i] = params[i].sh2;
 
  449             for (
int j = 0; j < MTGP_TS; j++) {
 
  450                 h_param_tbl[i * MTGP_TS + j] = params[i].tbl[j];
 
  451                 h_temper_tbl[i * MTGP_TS + j] = params[i].tmp_tbl[j];
 
  452                 h_single_temper_tbl[i * MTGP_TS + j] = params[i].flt_tmp_tbl[j];
 
  456         if (hipMemcpy(p->pos_tbl, h_pos_tbl, size1, hipMemcpyHostToDevice) != hipSuccess)
 
  458         if (hipMemcpy(p->sh1_tbl, h_sh1_tbl, size1, hipMemcpyHostToDevice) != hipSuccess)
 
  460         if (hipMemcpy(p->sh2_tbl, h_sh2_tbl, size1, hipMemcpyHostToDevice) != hipSuccess)
 
  462         if (hipMemcpy(p->param_tbl, h_param_tbl, size2, hipMemcpyHostToDevice) != hipSuccess)
 
  464         if (hipMemcpy(p->temper_tbl, h_temper_tbl, size2, hipMemcpyHostToDevice) != hipSuccess)
 
  466         if (hipMemcpy(p->single_temper_tbl, h_single_temper_tbl, size2, hipMemcpyHostToDevice) != hipSuccess)
 
  468         if (hipMemcpy(p->mask, h_mask, 
sizeof(
unsigned int), hipMemcpyHostToDevice) != hipSuccess)
 
  477     free(h_single_temper_tbl);
 
  495 __forceinline__ __device__ 
unsigned int rocrand(rocrand_state_mtgp32* state)
 
  497     return state->next();
 
  532                                                           rocrand_state_mtgp32* dest)
 
  544                                                           mtgp32_params*        params)
 
  546     state->set_params(params);
 
__forceinline__ __device__ void rocrand_mtgp32_block_copy(rocrand_state_mtgp32 *src, rocrand_state_mtgp32 *dest)
Copies MTGP32 state to another state using block of threads.
Definition: rocrand_mtgp32.h:531
 
__host__ rocrand_status rocrand_make_state_mtgp32(rocrand_state_mtgp32 *state, mtgp32_fast_params params[], int n, unsigned long long seed)
Initializes MTGP32 states.
Definition: rocrand_mtgp32.h:362
 
__forceinline__ __device__ unsigned int rocrand(rocrand_state_mtgp32 *state)
Returns uniformly distributed random unsigned int value from [0; 2^32 - 1] range.
Definition: rocrand_mtgp32.h:495
 
__host__ rocrand_status rocrand_make_constant(const mtgp32_fast_params params[], mtgp32_params *p)
Loads parameters for MTGP32.
Definition: rocrand_mtgp32.h:416
 
__forceinline__ __device__ void rocrand_mtgp32_set_params(rocrand_state_mtgp32 *state, mtgp32_params *params)
Changes parameters of a MTGP32 state.
Definition: rocrand_mtgp32.h:543
 
rocrand_status
rocRAND function call status type
Definition: rocrand.h:59
 
@ ROCRAND_STATUS_SUCCESS
No errors.
Definition: rocrand.h:60
 
@ ROCRAND_STATUS_ALLOCATION_FAILED
Memory allocation failed during execution.
Definition: rocrand.h:63
 
Definition: rocrand_mtgp32.h:95
 
uint32_t tmp_tbl[16]
Definition: rocrand_mtgp32.h:101
 
int pos
Definition: rocrand_mtgp32.h:97
 
int mexp
Definition: rocrand_mtgp32.h:96
 
int sh2
Definition: rocrand_mtgp32.h:99
 
int sh1
Definition: rocrand_mtgp32.h:98
 
uint32_t mask
Definition: rocrand_mtgp32.h:103
 
unsigned char poly_sha1[21]
Definition: rocrand_mtgp32.h:104
 
uint32_t tbl[16]
Definition: rocrand_mtgp32.h:100
 
uint32_t flt_tmp_tbl[16]
Definition: rocrand_mtgp32.h:102