/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-rocrand/checkouts/develop/library/include/rocrand/rocrand_poisson.h Source File

/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-rocrand/checkouts/develop/library/include/rocrand/rocrand_poisson.h Source File#

API library: /home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-rocrand/checkouts/develop/library/include/rocrand/rocrand_poisson.h Source File
rocrand_poisson.h
1 // Copyright (c) 2017-2024 Advanced Micro Devices, Inc. All rights reserved.
2 //
3 // Permission is hereby granted, free of charge, to any person obtaining a copy
4 // of this software and associated documentation files (the "Software"), to deal
5 // in the Software without restriction, including without limitation the rights
6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 // copies of the Software, and to permit persons to whom the Software is
8 // furnished to do so, subject to the following conditions:
9 //
10 // The above copyright notice and this permission notice shall be included in
11 // all copies or substantial portions of the Software.
12 //
13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 // THE SOFTWARE.
20 
21 #ifndef ROCRAND_POISSON_H_
22 #define ROCRAND_POISSON_H_
23 
29 #include <math.h>
30 
31 #include "rocrand/rocrand_lfsr113.h"
32 #include "rocrand/rocrand_mrg31k3p.h"
33 #include "rocrand/rocrand_mrg32k3a.h"
34 #include "rocrand/rocrand_mtgp32.h"
35 #include "rocrand/rocrand_philox4x32_10.h"
36 #include "rocrand/rocrand_scrambled_sobol32.h"
37 #include "rocrand/rocrand_scrambled_sobol64.h"
38 #include "rocrand/rocrand_sobol32.h"
39 #include "rocrand/rocrand_sobol64.h"
40 #include "rocrand/rocrand_threefry2x32_20.h"
41 #include "rocrand/rocrand_threefry2x64_20.h"
42 #include "rocrand/rocrand_threefry4x32_20.h"
43 #include "rocrand/rocrand_threefry4x64_20.h"
44 #include "rocrand/rocrand_xorwow.h"
45 
46 #include "rocrand/rocrand_normal.h"
47 #include "rocrand/rocrand_uniform.h"
48 
49 namespace rocrand_device {
50 namespace detail {
51 
52 constexpr double lambda_threshold_small = 64.0;
53 constexpr double lambda_threshold_huge = 4000.0;
54 
55 template<class State, typename Result_Type = unsigned int>
56 __forceinline__ __device__ __host__ Result_Type poisson_distribution_small(State& state,
57  double lambda)
58 {
59  // Knuth's method
60 
61  const double limit = exp(-lambda);
62  Result_Type k = 0;
63  double product = 1.0;
64 
65  do
66  {
67  k++;
68  product *= rocrand_uniform_double(state);
69  }
70  while (product > limit);
71 
72  return k - 1;
73 }
74 
75 __forceinline__ __device__ __host__ double lgamma_approx(const double x)
76 {
77  // Lanczos approximation (g = 7, n = 9)
78 
79  const double z = x - 1.0;
80 
81  const int g = 7;
82  const int n = 9;
83  const double coefs[n] = {
84  0.99999999999980993227684700473478,
85  676.520368121885098567009190444019,
86  -1259.13921672240287047156078755283,
87  771.3234287776530788486528258894,
88  -176.61502916214059906584551354,
89  12.507343278686904814458936853,
90  -0.13857109526572011689554707,
91  9.984369578019570859563e-6,
92  1.50563273514931155834e-7
93  };
94  double sum = 0.0;
95  #pragma unroll
96  for (int i = n - 1; i > 0; i--)
97  {
98  sum += coefs[i] / (z + i);
99  }
100  sum += coefs[0];
101 
102  const double log_sqrt_2_pi = 0.9189385332046727418;
103  const double e = 2.718281828459045090796;
104  return (log_sqrt_2_pi + log(sum) - g) + (z + 0.5) * log((z + g + 0.5) / e);
105 }
106 
107 template<class State, typename Result_Type = unsigned int>
108 __forceinline__ __device__ __host__ Result_Type poisson_distribution_large(State& state,
109  double lambda)
110 {
111  // Rejection method PA, A. C. Atkinson
112 
113  const double c = 0.767 - 3.36 / lambda;
114  const double beta = ROCRAND_PI_DOUBLE / sqrt(3.0 * lambda);
115  const double alpha = beta * lambda;
116  const double k = log(c) - lambda - log(beta);
117  const double log_lambda = log(lambda);
118 
119  while (true)
120  {
121  const double u = rocrand_uniform_double(state);
122  const double x = (alpha - log((1.0 - u) / u)) / beta;
123  const double n = floor(x + 0.5);
124  if (n < 0)
125  {
126  continue;
127  }
128  const double v = rocrand_uniform_double(state);
129  const double y = alpha - beta * x;
130  const double t = 1.0 + exp(y);
131  const double lhs = y + log(v / (t * t));
132  const double rhs = k + n * log_lambda - lgamma_approx(n + 1.0);
133  if (lhs <= rhs)
134  {
135  return static_cast<Result_Type>(n);
136  }
137  }
138 }
139 
140 template<class State, typename Result_Type = unsigned int>
141 __forceinline__ __device__ __host__ Result_Type poisson_distribution_huge(State& state,
142  double lambda)
143 {
144  // Approximate Poisson distribution with normal distribution
145 
146  const double n = rocrand_normal_double(state);
147  return static_cast<Result_Type>(round(sqrt(lambda) * n + lambda));
148 }
149 
150 template<class State, typename Result_Type = unsigned int>
151 __forceinline__ __device__ __host__ Result_Type poisson_distribution(State& state, double lambda)
152 {
153  if (lambda < lambda_threshold_small)
154  {
155  return poisson_distribution_small<State, Result_Type>(state, lambda);
156  }
157  else if (lambda <= lambda_threshold_huge)
158  {
159  return poisson_distribution_large<State, Result_Type>(state, lambda);
160  }
161  else
162  {
163  return poisson_distribution_huge<State, Result_Type>(state, lambda);
164  }
165 }
166 
167 template<class State, typename Result_Type = unsigned int>
168 __forceinline__ __device__ __host__ Result_Type poisson_distribution_itr(State& state,
169  double lambda)
170 {
171  // Algorithm ITR
172  // George S. Fishman
173  // Discrete-Event Simulation: Modeling, Programming, and Analysis
174  // p. 333
175  double L;
176  double x = 1.0;
177  double y = 1.0;
178  Result_Type k = 0;
179  int pow = 0;
180  // Algorithm ITR uses u from (0, 1) and uniform_double returns (0, 1]
181  // Change u to ensure that 1 is never generated,
182  // otherwise the inner loop never ends.
183  double u = rocrand_uniform_double(state) - ROCRAND_2POW32_INV_DOUBLE / 2.0;
184  double upow = pow + 500.0;
185  double ex = exp(-500.0);
186  do{
187  if (lambda > upow)
188  L = ex;
189  else
190  L = exp((double)(pow - lambda));
191 
192  x *= L;
193  y *= L;
194  pow += 500;
195  while (u > y)
196  {
197  k++;
198  x *= ((double)lambda / (double) k);
199  y += x;
200  }
201  } while((double)pow < lambda);
202  return k;
203 }
204 
205 template<class State, typename Result_Type = unsigned int>
206 __forceinline__ __device__ __host__ Result_Type poisson_distribution_inv(State& state,
207  double lambda)
208 {
209  if (lambda < 1000.0)
210  {
211  return poisson_distribution_itr<State, Result_Type>(state, lambda);
212  }
213  else
214  {
215  return poisson_distribution_huge<State, Result_Type>(state, lambda);
216  }
217 }
218 
219 } // end namespace detail
220 } // end namespace rocrand_device
221 
233 #ifndef ROCRAND_DETAIL_BM_NOT_IN_STATE
234 __forceinline__ __device__ __host__ unsigned int rocrand_poisson(rocrand_state_philox4x32_10* state,
235  double lambda)
236 {
237  return rocrand_device::detail::poisson_distribution<rocrand_state_philox4x32_10*, unsigned int>(
238  state,
239  lambda);
240 }
241 
253 __forceinline__ __device__ __host__ uint4 rocrand_poisson4(rocrand_state_philox4x32_10* state,
254  double lambda)
255 {
256  return uint4{
257  rocrand_device::detail::poisson_distribution<rocrand_state_philox4x32_10*, unsigned int>(
258  state,
259  lambda),
260  rocrand_device::detail::poisson_distribution<rocrand_state_philox4x32_10*, unsigned int>(
261  state,
262  lambda),
263  rocrand_device::detail::poisson_distribution<rocrand_state_philox4x32_10*, unsigned int>(
264  state,
265  lambda),
266  rocrand_device::detail::poisson_distribution<rocrand_state_philox4x32_10*, unsigned int>(
267  state,
268  lambda)};
269 }
270 #endif // ROCRAND_DETAIL_BM_NOT_IN_STATE
271 
283 #ifndef ROCRAND_DETAIL_BM_NOT_IN_STATE
284 __forceinline__ __device__ __host__ unsigned int rocrand_poisson(rocrand_state_mrg31k3p* state,
285  double lambda)
286 {
287  return rocrand_device::detail::poisson_distribution<rocrand_state_mrg31k3p*, unsigned int>(
288  state,
289  lambda);
290 }
291 #endif // ROCRAND_DETAIL_BM_NOT_IN_STATE
292 
304 #ifndef ROCRAND_DETAIL_BM_NOT_IN_STATE
305 __forceinline__ __device__ __host__ unsigned int rocrand_poisson(rocrand_state_mrg32k3a* state,
306  double lambda)
307 {
308  return rocrand_device::detail::poisson_distribution<rocrand_state_mrg32k3a*, unsigned int>(
309  state,
310  lambda);
311 }
312 #endif // ROCRAND_DETAIL_BM_NOT_IN_STATE
313 
325 #ifndef ROCRAND_DETAIL_BM_NOT_IN_STATE
326 __forceinline__ __device__ __host__ unsigned int rocrand_poisson(rocrand_state_xorwow* state,
327  double lambda)
328 {
329  return rocrand_device::detail::poisson_distribution<rocrand_state_xorwow*, unsigned int>(
330  state,
331  lambda);
332 }
333 #endif // ROCRAND_DETAIL_BM_NOT_IN_STATE
334 
346 __forceinline__ __device__ __host__ unsigned int rocrand_poisson(rocrand_state_mtgp32* state,
347  double lambda)
348 {
349  return rocrand_device::detail::poisson_distribution_inv<rocrand_state_mtgp32*, unsigned int>(
350  state,
351  lambda);
352 }
353 
365 __forceinline__ __device__ __host__ unsigned int rocrand_poisson(rocrand_state_sobol32* state,
366  double lambda)
367 {
368  return rocrand_device::detail::poisson_distribution_inv<rocrand_state_sobol32*, unsigned int>(
369  state,
370  lambda);
371 }
372 
384 __forceinline__ __device__ __host__ unsigned int
385  rocrand_poisson(rocrand_state_scrambled_sobol32* state, double lambda)
386 {
387  return rocrand_device::detail::poisson_distribution_inv<rocrand_state_scrambled_sobol32*,
388  unsigned int>(state, lambda);
389 }
390 
402 __forceinline__ __device__ __host__ unsigned long long int
403  rocrand_poisson(rocrand_state_sobol64* state, double lambda)
404 {
405  return rocrand_device::detail::poisson_distribution_inv<rocrand_state_sobol64*,
406  unsigned long long int>(state, lambda);
407 }
408 
420 __forceinline__ __device__ __host__ unsigned long long int
421  rocrand_poisson(rocrand_state_scrambled_sobol64* state, double lambda)
422 {
423  return rocrand_device::detail::poisson_distribution_inv<rocrand_state_scrambled_sobol64*,
424  unsigned long long int>(state, lambda);
425 }
426 
438 __forceinline__ __device__ __host__ unsigned int rocrand_poisson(rocrand_state_lfsr113* state,
439  double lambda)
440 {
441  return rocrand_device::detail::poisson_distribution_inv<rocrand_state_lfsr113*, unsigned int>(
442  state,
443  lambda);
444 }
445 
457 __forceinline__ __device__ __host__ unsigned int
458  rocrand_poisson(rocrand_state_threefry2x32_20* state, double lambda)
459 {
460  return rocrand_device::detail::poisson_distribution_inv(state, lambda);
461 }
462 
474 __forceinline__ __device__ __host__ unsigned int
475  rocrand_poisson(rocrand_state_threefry2x64_20* state, double lambda)
476 {
477  return rocrand_device::detail::poisson_distribution_inv(state, lambda);
478 }
479 
491 __forceinline__ __device__ __host__ unsigned int
492  rocrand_poisson(rocrand_state_threefry4x32_20* state, double lambda)
493 {
494  return rocrand_device::detail::poisson_distribution_inv(state, lambda);
495 }
496 
508 __forceinline__ __device__ __host__ unsigned int
509  rocrand_poisson(rocrand_state_threefry4x64_20* state, double lambda)
510 {
511  return rocrand_device::detail::poisson_distribution_inv(state, lambda);
512 }
513  // end of group rocranddevice
515 
516 #endif // ROCRAND_POISSON_H_
__forceinline__ __device__ __host__ unsigned int rocrand_poisson(rocrand_state_philox4x32_10 *state, double lambda)
Returns a Poisson-distributed unsigned int using Philox generator.
Definition: rocrand_poisson.h:234
__forceinline__ __device__ __host__ uint4 rocrand_poisson4(rocrand_state_philox4x32_10 *state, double lambda)
Returns four Poisson-distributed unsigned int values using Philox generator.
Definition: rocrand_poisson.h:253
__forceinline__ __device__ __host__ double rocrand_uniform_double(rocrand_state_philox4x32_10 *state)
Returns a uniformly distributed random double value from (0; 1] range.
Definition: rocrand_uniform.h:294
__forceinline__ __device__ __host__ double rocrand_normal_double(rocrand_state_philox4x32_10 *state)
Returns a normally distributed double value.
Definition: rocrand_normal.h:438