/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-rocrand/checkouts/latest/library/include/rocrand/rocrand_sobol64.h Source File

/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-rocrand/checkouts/latest/library/include/rocrand/rocrand_sobol64.h Source File#

API library: /home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-rocrand/checkouts/latest/library/include/rocrand/rocrand_sobol64.h Source File
rocrand_sobol64.h
1 // Copyright (c) 2021-2024 Advanced Micro Devices, Inc. All rights reserved.
2 //
3 // Permission is hereby granted, free of charge, to any person obtaining a copy
4 // of this software and associated documentation files (the "Software"), to deal
5 // in the Software without restriction, including without limitation the rights
6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 // copies of the Software, and to permit persons to whom the Software is
8 // furnished to do so, subject to the following conditions:
9 //
10 // The above copyright notice and this permission notice shall be included in
11 // all copies or substantial portions of the Software.
12 //
13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 // THE SOFTWARE.
20 
21 #ifndef ROCRAND_SOBOL64_H_
22 #define ROCRAND_SOBOL64_H_
23 
24 #include "rocrand/rocrand_common.h"
25 
26 namespace rocrand_device {
27 
28 template<bool UseSharedVectors>
29 struct sobol64_state
30 {
31  unsigned long long int d;
32  unsigned long long int i;
33  unsigned long long int vectors[64];
34 
35  __forceinline__ __device__ __host__ sobol64_state() : d(), i(), vectors() {}
36 
37  __forceinline__ __device__ __host__ sobol64_state(const unsigned long long int d,
38  const unsigned long long int i,
39  const unsigned long long int* vectors)
40  : d(d), i(i)
41  {
42  for(int k = 0; k < 64; k++)
43  {
44  this->vectors[k] = vectors[k];
45  }
46  }
47 };
48 
49 template<>
50 struct sobol64_state<true>
51 {
52  unsigned long long int d;
53  unsigned long long int i;
54  const unsigned long long int * vectors;
55 
56  __forceinline__ __device__ __host__ sobol64_state() : d(), i(), vectors() {}
57 
58  __forceinline__ __device__ __host__ sobol64_state(const unsigned long long int d,
59  const unsigned long long int i,
60  const unsigned long long int* vectors)
61  : d(d), i(i), vectors(vectors)
62  {}
63 };
64 
65 template<bool UseSharedVectors>
66 class sobol64_engine
67 {
68 public:
69 
70  typedef struct sobol64_state<UseSharedVectors> sobol64_state;
71 
72  __forceinline__ __device__ __host__ sobol64_engine() {}
73 
74  __forceinline__ __device__ __host__ sobol64_engine(const unsigned long long int* vectors,
75  const unsigned long long int offset)
76  : m_state(0, 0, vectors)
77  {
78  discard_state(offset);
79  }
80 
82  __forceinline__ __device__ __host__ void discard(unsigned long long int offset)
83  {
84  discard_state(offset);
85  }
86 
87  __forceinline__ __device__ __host__ void discard()
88  {
89  discard_state();
90  }
91 
93  __forceinline__ __device__ __host__ void discard_stride(unsigned long long int stride)
94  {
95  discard_state_power2(stride);
96  }
97 
98  __forceinline__ __device__ __host__ unsigned long long int operator()()
99  {
100  return this->next();
101  }
102 
103  __forceinline__ __device__ __host__ unsigned long long int next()
104  {
105  unsigned long long int p = m_state.d;
106  discard_state();
107  return p;
108  }
109 
110  __forceinline__ __device__ __host__ unsigned long long int current() const
111  {
112  return m_state.d;
113  }
114 
115  __forceinline__ __device__ __host__ static constexpr bool uses_shared_vectors()
116  {
117  return UseSharedVectors;
118  }
119 
120 protected:
121  // Advances the internal state by offset times.
122  __forceinline__ __device__ __host__ void discard_state(unsigned long long int offset)
123  {
124  m_state.i += offset;
125  const unsigned long long int g = m_state.i ^ (m_state.i >> 1ull);
126  m_state.d = 0;
127  for(int i = 0; i < 64; i++)
128  {
129  m_state.d ^= (g & (1ull << i) ? m_state.vectors[i] : 0ull);
130  }
131  }
132 
133  // Advances the internal state to the next state
134  __forceinline__ __device__ __host__ void discard_state()
135  {
136  m_state.d ^= m_state.vectors[rightmost_zero_bit(m_state.i)];
137  m_state.i++;
138  }
139 
140  __forceinline__ __device__ __host__ void discard_state_power2(unsigned long long int stride)
141  {
142  // Leap frog
143  //
144  // T Bradley, J Toit, M Giles, R Tong, P Woodhams
145  // Parallelisation Techniques for Random Number Generators
146  // GPU Computing Gems, 2011
147  //
148  // For power of 2 jumps only 2 bits in Gray code change values
149  // All bits lower than log2(stride) flip 2, 4... times, i.e.
150  // do not change their values.
151 
152  // log2(stride) bit
153  m_state.d ^= m_state.vectors[rightmost_zero_bit(~stride) - 1];
154  // the rightmost zero bit of i, not including the lower log2(stride) bits
155  m_state.d ^= m_state.vectors[rightmost_zero_bit(m_state.i | (stride - 1))];
156  m_state.i += stride;
157  }
158 
159  // Returns the index of the rightmost zero bit in the binary expansion of
160  // x (Gray code of the current element's index)
161  // NOTE changing unsigned long long int to unit64_t will cause compile failure on device
162  __forceinline__ __device__ __host__ unsigned int rightmost_zero_bit(unsigned long long int x)
163  {
164  #if defined(__HIP_DEVICE_COMPILE__)
165  unsigned int z = __ffsll(~x);
166  return z ? z - 1 : 0;
167  #else
168  if(x == 0)
169  return 0;
170  unsigned long long int y = x;
171  unsigned long long int z = 1;
172  while(y & 1)
173  {
174  y >>= 1;
175  z++;
176  }
177  return z - 1;
178  #endif
179  }
180 
181 protected:
182  // State
183  sobol64_state m_state;
184 
185 }; // sobol64_engine class
186 
187 } // end namespace rocrand_device
188 
195 typedef rocrand_device::sobol64_engine<false> rocrand_state_sobol64;
197 
208 __forceinline__ __device__ __host__ void rocrand_init(const unsigned long long int* vectors,
209  const unsigned int offset,
210  rocrand_state_sobol64* state)
211 {
212  *state = rocrand_state_sobol64(vectors, offset);
213 }
214 
227 __forceinline__ __device__ __host__ unsigned long long int rocrand(rocrand_state_sobol64* state)
228 {
229  return state->next();
230 }
231 
240 __forceinline__ __device__ __host__ void skipahead(unsigned long long int offset,
241  rocrand_state_sobol64* state)
242 {
243  return state->discard(offset);
244 }
245  // end of group rocranddevice
247 
248 #endif // ROCRAND_sobol64_H_
__forceinline__ __device__ __host__ unsigned long long int rocrand(rocrand_state_sobol64 *state)
Returns uniformly distributed random unsigned long long int value from [0; 2^64 - 1] range.
Definition: rocrand_sobol64.h:227
__forceinline__ __device__ __host__ void skipahead(unsigned long long int offset, rocrand_state_sobol64 *state)
Updates sobol64 state to skip ahead by offset elements.
Definition: rocrand_sobol64.h:240
__forceinline__ __device__ __host__ void rocrand_init(const unsigned long long int *vectors, const unsigned int offset, rocrand_state_sobol64 *state)
Initialize sobol64 state.
Definition: rocrand_sobol64.h:208