/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-rocrand/checkouts/latest/library/include/rocrand/rocrand_mtgp32.h Source File

/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-rocrand/checkouts/latest/library/include/rocrand/rocrand_mtgp32.h Source File#

API library: /home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-rocrand/checkouts/latest/library/include/rocrand/rocrand_mtgp32.h Source File
API library
rocrand_mtgp32.h
1 // Copyright (c) 2017-2023 Advanced Micro Devices, Inc. All rights reserved.
2 //
3 // Permission is hereby granted, free of charge, to any person obtaining a copy
4 // of this software and associated documentation files (the "Software"), to deal
5 // in the Software without restriction, including without limitation the rights
6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 // copies of the Software, and to permit persons to whom the Software is
8 // furnished to do so, subject to the following conditions:
9 //
10 // The above copyright notice and this permission notice shall be included in
11 // all copies or substantial portions of the Software.
12 //
13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 // THE SOFTWARE.
20 
21 /*
22  * Copyright (c) 2009, 2010 Mutsuo Saito, Makoto Matsumoto and Hiroshima
23  * University. All rights reserved.
24  * Copyright (c) 2011 Mutsuo Saito, Makoto Matsumoto, Hiroshima
25  * University and University of Tokyo. All rights reserved.
26  *
27  * Redistribution and use in source and binary forms, with or without
28  * modification, are permitted provided that the following conditions are
29  * met:
30  *
31  * * Redistributions of source code must retain the above copyright
32  * notice, this list of conditions and the following disclaimer.
33  * * Redistributions in binary form must reproduce the above
34  * copyright notice, this list of conditions and the following
35  * disclaimer in the documentation and/or other materials provided
36  * with the distribution.
37  * * Neither the name of the Hiroshima University nor the names of
38  * its contributors may be used to endorse or promote products
39  * derived from this software without specific prior written
40  * permission.
41  *
42  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
43  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
44  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
45  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
46  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
47  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
48  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
49  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
50  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
51  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
52  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
53  */
54 
55 #ifndef ROCRAND_MTGP32_H_
56 #define ROCRAND_MTGP32_H_
57 
58 #include <stdlib.h>
59 
60 #ifndef FQUALIFIERS
61 #define FQUALIFIERS __forceinline__ __device__
62 #endif // FQUALIFIERS_
63 
64 #include "rocrand/rocrand.h"
65 #include "rocrand/rocrand_common.h"
66 
67 #define MTGP_MEXP 11213
68 #define MTGP_N 351
69 #define MTGP_FLOOR_2P 256
70 #define MTGP_CEIL_2P 512
71 #define MTGP_TN MTGP_FLOOR_2P
72 #define MTGP_LS (MTGP_TN * 3)
73 #define MTGP_BN_MAX 512
74 #define MTGP_TS 16
75 #define MTGP_STATE 1024
76 #define MTGP_MASK 1023
77 
78 // Source: https://github.com/MersenneTwister-Lab/MTGP/blob/master/mtgp32-fast.h
100  int mexp;
101  int pos;
102  int sh1;
103  int sh2;
104  uint32_t tbl[16];
105  uint32_t tmp_tbl[16];
106  uint32_t flt_tmp_tbl[16];
107  uint32_t mask;
108  unsigned char poly_sha1[21];
109 };
110 
111 namespace rocrand_device {
112 
113 struct mtgp32_params
114 {
115  unsigned int pos_tbl[MTGP_BN_MAX];
116  unsigned int param_tbl[MTGP_BN_MAX][MTGP_TS];
117  unsigned int temper_tbl[MTGP_BN_MAX][MTGP_TS];
118  unsigned int single_temper_tbl[MTGP_BN_MAX][MTGP_TS];
119  unsigned int sh1_tbl[MTGP_BN_MAX];
120  unsigned int sh2_tbl[MTGP_BN_MAX];
121  unsigned int mask[1];
122 };
123 
124 typedef mtgp32_params_fast_t mtgp32_fast_params;
125 
126 struct mtgp32_state
127 {
128  int offset;
129  int id;
130  unsigned int status[MTGP_STATE];
131 };
132 
133 inline
134 void rocrand_mtgp32_init_state(unsigned int array[],
135  const mtgp32_fast_params *para, unsigned int seed)
136 {
137  int i;
138  int size = para->mexp / 32 + 1;
139  unsigned int hidden_seed;
140  unsigned int tmp;
141  hidden_seed = para->tbl[4] ^ (para->tbl[8] << 16);
142  tmp = hidden_seed;
143  tmp += tmp >> 16;
144  tmp += tmp >> 8;
145  memset(array, tmp & 0xff, sizeof(unsigned int) * size);
146  array[0] = seed;
147  array[1] = hidden_seed;
148  for (i = 1; i < size; i++)
149  array[i] ^= (1812433253) * (array[i - 1] ^ (array[i - 1] >> 30)) + i;
150 }
151 
152 class mtgp32_engine
153 {
154 public:
156  // Initialization is not supported for __shared__ variables
157  mtgp32_engine() // cppcheck-suppress uninitMemberVar
158  {
159 
160  }
161 
163  mtgp32_engine(const mtgp32_state &m_state,
164  const mtgp32_params * params,
165  int bid)
166  {
167  this->m_state = m_state;
168  pos_tbl = params->pos_tbl[bid];
169  sh1_tbl = params->sh1_tbl[bid];
170  sh2_tbl = params->sh2_tbl[bid];
171  mask = params->mask[0];
172  for (int j = 0; j < MTGP_TS; j++) {
173  param_tbl[j] = params->param_tbl[bid][j];
174  temper_tbl[j] = params->temper_tbl[bid][j];
175  single_temper_tbl[j] = params->single_temper_tbl[bid][j];
176  }
177  }
178 
180  void copy(const mtgp32_engine * m_engine)
181  {
182 #if defined(__HIP_DEVICE_COMPILE__) || defined(USE_HIP_CPU)
183  const unsigned int thread_id = threadIdx.x;
184  for(int i = thread_id; i < MTGP_STATE; i += blockDim.x)
185  m_state.status[i] = m_engine->m_state.status[i];
186 
187  if (thread_id == 0)
188  {
189  m_state.offset = m_engine->m_state.offset;
190  m_state.id = m_engine->m_state.id;
191  pos_tbl = m_engine->pos_tbl;
192  sh1_tbl = m_engine->sh1_tbl;
193  sh2_tbl = m_engine->sh2_tbl;
194  mask = m_engine->mask;
195  }
196  if (thread_id < MTGP_TS)
197  {
198  param_tbl[thread_id] = m_engine->param_tbl[thread_id];
199  temper_tbl[thread_id] = m_engine->temper_tbl[thread_id];
200  single_temper_tbl[thread_id] = m_engine->single_temper_tbl[thread_id];
201  }
202  __syncthreads();
203 #else
204  this->m_state = m_engine->m_state;
205  pos_tbl = m_engine->pos_tbl;
206  sh1_tbl = m_engine->sh1_tbl;
207  sh2_tbl = m_engine->sh2_tbl;
208  mask = m_engine->mask;
209  for (int j = 0; j < MTGP_TS; j++) {
210  param_tbl[j] = m_engine->param_tbl[j];
211  temper_tbl[j] = m_engine->temper_tbl[j];
212  single_temper_tbl[j] = m_engine->single_temper_tbl[j];
213  }
214 #endif
215  }
216 
218  void set_params(mtgp32_params * params)
219  {
220  pos_tbl = params->pos_tbl[m_state.id];
221  sh1_tbl = params->sh1_tbl[m_state.id];
222  sh2_tbl = params->sh2_tbl[m_state.id];
223  mask = params->mask[0];
224  for (int j = 0; j < MTGP_TS; j++) {
225  param_tbl[j] = params->param_tbl[m_state.id][j];
226  temper_tbl[j] = params->temper_tbl[m_state.id][j];
227  single_temper_tbl[j] = params->single_temper_tbl[m_state.id][j];
228  }
229  }
230 
232  unsigned int operator()()
233  {
234  return this->next();
235  }
236 
238  unsigned int next()
239  {
240 #if defined(__HIP_DEVICE_COMPILE__) || defined(USE_HIP_CPU)
241  unsigned int t = threadIdx.x;
242  unsigned int d = blockDim.x;
243  int pos = pos_tbl;
244  unsigned int r;
245  unsigned int o;
246 
247  r = para_rec(m_state.status[(t + m_state.offset) & MTGP_MASK],
248  m_state.status[(t + m_state.offset + 1) & MTGP_MASK],
249  m_state.status[(t + m_state.offset + pos) & MTGP_MASK]);
250  m_state.status[(t + m_state.offset + MTGP_N) & MTGP_MASK] = r;
251 
252  o = temper(r, m_state.status[(t + m_state.offset + pos - 1) & MTGP_MASK]);
253  __syncthreads();
254  if (t == 0)
255  m_state.offset = (m_state.offset + d) & MTGP_MASK;
256  __syncthreads();
257  return o;
258 #else
259  return 0;
260 #endif
261  }
262 
264  unsigned int next_single()
265  {
266 #if defined(__HIP_DEVICE_COMPILE__) || defined(USE_HIP_CPU)
267  unsigned int t = threadIdx.x;
268  unsigned int d = blockDim.x;
269  int pos = pos_tbl;
270  unsigned int r;
271  unsigned int o;
272 
273  r = para_rec(m_state.status[(t + m_state.offset) & MTGP_MASK],
274  m_state.status[(t + m_state.offset + 1) & MTGP_MASK],
275  m_state.status[(t + m_state.offset + pos) & MTGP_MASK]);
276  m_state.status[(t + m_state.offset + MTGP_N) & MTGP_MASK] = r;
277 
278  o = temper_single(r, m_state.status[(t + m_state.offset + pos - 1) & MTGP_MASK]);
279  __syncthreads();
280  if (t == 0)
281  m_state.offset = (m_state.offset + d) & MTGP_MASK;
282  __syncthreads();
283  return o;
284 #else
285  return 0;
286 #endif
287  }
288 
289 private:
291  unsigned int para_rec(unsigned int X1, unsigned int X2, unsigned int Y) const
292  {
293  unsigned int X = (X1 & mask) ^ X2;
294  unsigned int MAT;
295 
296  X ^= X << sh1_tbl;
297  Y = X ^ (Y >> sh2_tbl);
298  MAT = param_tbl[Y & 0x0f];
299  return Y ^ MAT;
300  }
301 
303  unsigned int temper(unsigned int V, unsigned int T) const
304  {
305  unsigned int MAT;
306 
307  T ^= T >> 16;
308  T ^= T >> 8;
309  MAT = temper_tbl[T & 0x0f];
310  return V ^ MAT;
311  }
312 
314  unsigned int temper_single(unsigned int V, unsigned int T) const
315  {
316  unsigned int MAT;
317  unsigned int r;
318 
319  T ^= T >> 16;
320  T ^= T >> 8;
321  MAT = single_temper_tbl[T & 0x0f];
322  r = (V >> 9) ^ MAT;
323  return r;
324  }
325 
326 public:
327  // State
328  mtgp32_state m_state;
329  // Parameters
330  unsigned int pos_tbl;
331  unsigned int param_tbl[MTGP_TS];
332  unsigned int temper_tbl[MTGP_TS];
333  unsigned int sh1_tbl;
334  unsigned int sh2_tbl;
335  unsigned int single_temper_tbl[MTGP_TS];
336  unsigned int mask;
337 
338 }; // mtgp32_engine class
339 
340 } // end namespace rocrand_device
341 
348 typedef rocrand_device::mtgp32_engine rocrand_state_mtgp32;
349 typedef rocrand_device::mtgp32_state mtgp32_state;
350 typedef rocrand_device::mtgp32_fast_params mtgp32_fast_params;
351 typedef rocrand_device::mtgp32_params mtgp32_params;
353 
369 __host__ inline
370 rocrand_status rocrand_make_state_mtgp32(rocrand_state_mtgp32 * d_state,
371  mtgp32_fast_params params[],
372  int n,
373  unsigned long long seed)
374 {
375  int i;
376  rocrand_state_mtgp32 * h_state = (rocrand_state_mtgp32 *) malloc(sizeof(rocrand_state_mtgp32) * n);
377  seed = seed ^ (seed >> 32);
378 
379  if (h_state == NULL)
381 
382  for (i = 0; i < n; i++) {
383  rocrand_device::rocrand_mtgp32_init_state(&(h_state[i].m_state.status[0]), &params[i], (unsigned int)seed + i + 1);
384  h_state[i].m_state.offset = 0;
385  h_state[i].m_state.id = i;
386  h_state[i].pos_tbl = params[i].pos;
387  h_state[i].sh1_tbl = params[i].sh1;
388  h_state[i].sh2_tbl = params[i].sh2;
389  h_state[i].mask = params[0].mask;
390  for (int j = 0; j < MTGP_TS; j++) {
391  h_state[i].param_tbl[j] = params[i].tbl[j];
392  h_state[i].temper_tbl[j] = params[i].tmp_tbl[j];
393  h_state[i].single_temper_tbl[j] = params[i].flt_tmp_tbl[j];
394  }
395  }
396 
397  hipMemcpy(d_state, h_state, sizeof(rocrand_state_mtgp32) * n, hipMemcpyHostToDevice);
398  free(h_state);
399 
400  if (hipGetLastError() != hipSuccess)
402 
403  return ROCRAND_STATUS_SUCCESS;
404 }
405 
422 __host__ inline
423 rocrand_status rocrand_make_constant(const mtgp32_fast_params params[], mtgp32_params * p)
424 {
425  const int block_num = MTGP_BN_MAX;
426  const int size1 = sizeof(uint32_t) * block_num;
427  const int size2 = sizeof(uint32_t) * block_num * MTGP_TS;
428  uint32_t *h_pos_tbl;
429  uint32_t *h_sh1_tbl;
430  uint32_t *h_sh2_tbl;
431  uint32_t *h_param_tbl;
432  uint32_t *h_temper_tbl;
433  uint32_t *h_single_temper_tbl;
434  uint32_t *h_mask;
435  h_pos_tbl = (uint32_t *)malloc(size1);
436  h_sh1_tbl = (uint32_t *)malloc(size1);
437  h_sh2_tbl = (uint32_t *)malloc(size1);
438  h_param_tbl = (uint32_t *)malloc(size2);
439  h_temper_tbl = (uint32_t *)malloc(size2);
440  h_single_temper_tbl = (uint32_t *)malloc(size2);
441  h_mask = (uint32_t *)malloc(sizeof(uint32_t));
443 
444  if (h_pos_tbl == NULL || h_sh1_tbl == NULL || h_sh2_tbl == NULL
445  || h_param_tbl == NULL || h_temper_tbl == NULL || h_single_temper_tbl == NULL
446  || h_mask == NULL) {
447  printf("failure in allocating host memory for constant table.\n");
449  }
450  else {
451  h_mask[0] = params[0].mask;
452  for (int i = 0; i < block_num; i++) {
453  h_pos_tbl[i] = params[i].pos;
454  h_sh1_tbl[i] = params[i].sh1;
455  h_sh2_tbl[i] = params[i].sh2;
456  for (int j = 0; j < MTGP_TS; j++) {
457  h_param_tbl[i * MTGP_TS + j] = params[i].tbl[j];
458  h_temper_tbl[i * MTGP_TS + j] = params[i].tmp_tbl[j];
459  h_single_temper_tbl[i * MTGP_TS + j] = params[i].flt_tmp_tbl[j];
460  }
461  }
462 
463  if (hipMemcpy(p->pos_tbl, h_pos_tbl, size1, hipMemcpyHostToDevice) != hipSuccess)
465  if (hipMemcpy(p->sh1_tbl, h_sh1_tbl, size1, hipMemcpyHostToDevice) != hipSuccess)
467  if (hipMemcpy(p->sh2_tbl, h_sh2_tbl, size1, hipMemcpyHostToDevice) != hipSuccess)
469  if (hipMemcpy(p->param_tbl, h_param_tbl, size2, hipMemcpyHostToDevice) != hipSuccess)
471  if (hipMemcpy(p->temper_tbl, h_temper_tbl, size2, hipMemcpyHostToDevice) != hipSuccess)
473  if (hipMemcpy(p->single_temper_tbl, h_single_temper_tbl, size2, hipMemcpyHostToDevice) != hipSuccess)
475  if (hipMemcpy(p->mask, h_mask, sizeof(unsigned int), hipMemcpyHostToDevice) != hipSuccess)
477  }
478 
479  free(h_pos_tbl);
480  free(h_sh1_tbl);
481  free(h_sh2_tbl);
482  free(h_param_tbl);
483  free(h_temper_tbl);
484  free(h_single_temper_tbl);
485  free(h_mask);
486 
487  return status;
488 }
489 
503 unsigned int rocrand(rocrand_state_mtgp32 * state)
504 {
505  return state->next();
506 }
507 
540 void rocrand_mtgp32_block_copy(rocrand_state_mtgp32 * src, rocrand_state_mtgp32 * dest)
541 {
542  dest->copy(src);
543 }
544 
552 void rocrand_mtgp32_set_params(rocrand_state_mtgp32 * state, mtgp32_params * params)
553 {
554  state->set_params(params);
555 }
556  // end of group rocranddevice
558 
559 #endif // ROCRAND_MTGP32_H_
FQUALIFIERS void rocrand_mtgp32_block_copy(rocrand_state_mtgp32 *src, rocrand_state_mtgp32 *dest)
Copies MTGP32 state to another state using block of threads.
Definition: rocrand_mtgp32.h:540
__host__ rocrand_status rocrand_make_state_mtgp32(rocrand_state_mtgp32 *d_state, mtgp32_fast_params params[], int n, unsigned long long seed)
Initializes MTGP32 states.
Definition: rocrand_mtgp32.h:370
__host__ rocrand_status rocrand_make_constant(const mtgp32_fast_params params[], mtgp32_params *p)
Loads parameters for MTGP32.
Definition: rocrand_mtgp32.h:423
FQUALIFIERS unsigned int rocrand(rocrand_state_mtgp32 *state)
Returns uniformly distributed random unsigned int value from [0; 2^32 - 1] range.
Definition: rocrand_mtgp32.h:503
FQUALIFIERS void rocrand_mtgp32_set_params(rocrand_state_mtgp32 *state, mtgp32_params *params)
Changes parameters of a MTGP32 state.
Definition: rocrand_mtgp32.h:552
#define FQUALIFIERS
Shorthand for commonly used function qualifiers.
Definition: rocrand_uniform.h:31
rocrand_status
rocRAND function call status type
Definition: rocrand.h:60
@ ROCRAND_STATUS_SUCCESS
No errors.
Definition: rocrand.h:61
@ ROCRAND_STATUS_ALLOCATION_FAILED
Memory allocation failed during execution.
Definition: rocrand.h:64
Definition: rocrand_mtgp32.h:99
uint32_t tmp_tbl[16]
Definition: rocrand_mtgp32.h:105
int pos
Definition: rocrand_mtgp32.h:101
int mexp
Definition: rocrand_mtgp32.h:100
int sh2
Definition: rocrand_mtgp32.h:103
int sh1
Definition: rocrand_mtgp32.h:102
uint32_t mask
Definition: rocrand_mtgp32.h:107
unsigned char poly_sha1[21]
Definition: rocrand_mtgp32.h:108
uint32_t tbl[16]
Definition: rocrand_mtgp32.h:104
uint32_t flt_tmp_tbl[16]
Definition: rocrand_mtgp32.h:106