/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-rocrand/checkouts/latest/library/include/rocrand/rocrand_common.h Source File

/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-rocrand/checkouts/latest/library/include/rocrand/rocrand_common.h Source File#

API library: /home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-rocrand/checkouts/latest/library/include/rocrand/rocrand_common.h Source File
API library
rocrand_common.h
1 // Copyright (c) 2017-2023 Advanced Micro Devices, Inc. All rights reserved.
2 //
3 // Permission is hereby granted, free of charge, to any person obtaining a copy
4 // of this software and associated documentation files (the "Software"), to deal
5 // in the Software without restriction, including without limitation the rights
6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 // copies of the Software, and to permit persons to whom the Software is
8 // furnished to do so, subject to the following conditions:
9 //
10 // The above copyright notice and this permission notice shall be included in
11 // all copies or substantial portions of the Software.
12 //
13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 // THE SOFTWARE.
20 
21 #ifndef ROCRAND_COMMON_H_
22 #define ROCRAND_COMMON_H_
23 
24 #define ROCRAND_2POW16_INV (1.5258789e-05f)
25 #define ROCRAND_2POW16_INV_2PI (1.5258789e-05f * 6.2831855f)
26 #define ROCRAND_2POW32_INV (2.3283064e-10f)
27 #define ROCRAND_2POW32_INV_DOUBLE (2.3283064365386963e-10)
28 #define ROCRAND_2POW64_INV (5.4210109e-20f)
29 #define ROCRAND_2POW64_INV_DOUBLE (5.4210108624275221700372640043497e-20)
30 #define ROCRAND_2POW32_INV_2PI (2.3283064e-10f * 6.2831855f)
31 #define ROCRAND_2POW53_INV_DOUBLE (1.1102230246251565e-16)
32 #define ROCRAND_PI (3.1415926f)
33 #define ROCRAND_PI_DOUBLE (3.1415926535897932)
34 #define ROCRAND_2PI (6.2831855f)
35 #define ROCRAND_SQRT2 (1.4142135f)
36 #define ROCRAND_SQRT2_DOUBLE (1.4142135623730951)
37 
38 #include <math.h>
39 
40 #define ROCRAND_KERNEL __global__ static
41 
42 #ifndef FQUALIFIERS
43 #define FQUALIFIERS __forceinline__ __device__
44 #endif // FQUALIFIERS
45 
46 #if __HIP_DEVICE_COMPILE__ \
47  && (defined(__HIP_PLATFORM_AMD__) \
48  || (defined(__HIP_PLATFORM_NVCC__) && (__CUDA_ARCH__ >= 530)))
49  #define ROCRAND_HALF_MATH_SUPPORTED
50 #endif
51 
52 namespace rocrand_device {
53 namespace detail {
54 
55 #if ( defined(__HIP_PLATFORM_NVCC__) || \
56  defined(__gfx801__) || \
57  defined(__gfx802__) || \
58  defined(__gfx803__) || \
59  defined(__gfx810__) || \
60  defined(__gfx900__) || \
61  defined(__gfx902__) || \
62  defined(__gfx904__) || \
63  defined(__gfx906__) || \
64  defined(__gfx908__) || \
65  defined(__gfx909__) || \
66  defined(__gfx1030__) )
67  #if !defined(ROCRAND_ENABLE_INLINE_ASM)
68  #define ROCRAND_ENABLE_INLINE_ASM
69  #endif
70 #else
71  #if defined(__HIP_DEVICE_COMPILE__) && defined(ROCRAND_ENABLE_INLINE_ASM)
72  #undef ROCRAND_ENABLE_INLINE_ASM
73  #endif
74 #endif
75 
77 unsigned long long mad_u64_u32(const unsigned int x, const unsigned int y, const unsigned long long z)
78 {
79 #if defined(__HIP_PLATFORM_AMD__) && defined(__HIP_DEVICE_COMPILE__) \
80  && defined(ROCRAND_ENABLE_INLINE_ASM)
81 
82  #if __AMDGCN_WAVEFRONT_SIZE == 64u
83  using sgpr_t = unsigned long long;
84  #elif __AMDGCN_WAVEFRONT_SIZE == 32u
85  using sgpr_t = unsigned int;
86  #endif
87 
88  unsigned long long r;
89  sgpr_t c; // carry bits, SGPR, unused
90  // x has "r" constraint. This allows to use both VGPR and SGPR
91  // (to save VGPR) as input.
92  // y and z have "v" constraints, because only one SGPR or literal
93  // can be read by the instruction.
94  asm volatile("v_mad_u64_u32 %0, %1, %2, %3, %4"
95  : "=v"(r), "=s"(c) : "r"(x), "v"(y), "v"(z)
96  );
97  return r;
98  #elif defined(__HIP_PLATFORM_NVCC__) && defined(__HIP_DEVICE_COMPILE__) \
99  && defined(ROCRAND_ENABLE_INLINE_ASM)
100 
101  unsigned long long r;
102  asm("mad.wide.u32 %0, %1, %2, %3;"
103  : "=l"(r) : "r"(x), "r"(y), "l"(z)
104  );
105  return r;
106 
107  #else // host code
108 
109  return static_cast<unsigned long long>(x) * static_cast<unsigned long long>(y) + z;
110 
111  #endif
112 }
113 
114 // This helps access fields of engine's internal state which
115 // saves floats and doubles generated using the Box–Muller transform
116 template<typename Engine>
117 struct engine_boxmuller_helper
118 {
119  static FQUALIFIERS
120  bool has_float(const Engine * engine)
121  {
122  return engine->m_state.boxmuller_float_state != 0;
123  }
124 
125  static FQUALIFIERS
126  float get_float(Engine * engine)
127  {
128  engine->m_state.boxmuller_float_state = 0;
129  return engine->m_state.boxmuller_float;
130  }
131 
132  static FQUALIFIERS
133  void save_float(Engine * engine, float f)
134  {
135  engine->m_state.boxmuller_float_state = 1;
136  engine->m_state.boxmuller_float = f;
137  }
138 
139  static FQUALIFIERS
140  bool has_double(const Engine * engine)
141  {
142  return engine->m_state.boxmuller_double_state != 0;
143  }
144 
145  static FQUALIFIERS
146  float get_double(Engine * engine)
147  {
148  engine->m_state.boxmuller_double_state = 0;
149  return engine->m_state.boxmuller_double;
150  }
151 
152  static FQUALIFIERS
153  void save_double(Engine * engine, double d)
154  {
155  engine->m_state.boxmuller_double_state = 1;
156  engine->m_state.boxmuller_double = d;
157  }
158 };
159 
160 template<typename T>
161 FQUALIFIERS void split_ull(T& lo, T& hi, unsigned long long int val);
162 
163 template<>
164 FQUALIFIERS void split_ull(unsigned int& lo, unsigned int& hi, unsigned long long int val)
165 {
166  lo = val & 0xFFFFFFFF;
167  hi = (val >> 32) & 0xFFFFFFFF;
168 }
169 
170 template<>
171 FQUALIFIERS void
172  split_ull(unsigned long long int& lo, unsigned long long int& hi, unsigned long long int val)
173 {
174  lo = val;
175  hi = 0;
176 }
177 
178 } // end namespace detail
179 } // end namespace rocrand_device
180 
181 #endif // ROCRAND_COMMON_H_
#define FQUALIFIERS
Shorthand for commonly used function qualifiers.
Definition: rocrand_uniform.h:31