/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-rocrand/checkouts/develop/library/include/rocrand/rocrand_common.h Source File

/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-rocrand/checkouts/develop/library/include/rocrand/rocrand_common.h Source File#

API library: /home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-rocrand/checkouts/develop/library/include/rocrand/rocrand_common.h Source File
rocrand_common.h
1 // Copyright (c) 2017-2024 Advanced Micro Devices, Inc. All rights reserved.
2 //
3 // Permission is hereby granted, free of charge, to any person obtaining a copy
4 // of this software and associated documentation files (the "Software"), to deal
5 // in the Software without restriction, including without limitation the rights
6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 // copies of the Software, and to permit persons to whom the Software is
8 // furnished to do so, subject to the following conditions:
9 //
10 // The above copyright notice and this permission notice shall be included in
11 // all copies or substantial portions of the Software.
12 //
13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 // THE SOFTWARE.
20 
21 #ifndef ROCRAND_COMMON_H_
22 #define ROCRAND_COMMON_H_
23 
24 #define ROCRAND_2POW16_INV (1.5258789e-05f)
25 #define ROCRAND_2POW16_INV_2PI (9.58738e-05f)
26 #define ROCRAND_2POW32_INV (2.3283064e-10f)
27 #define ROCRAND_2POW32_INV_DOUBLE (2.3283064365386963e-10)
28 #define ROCRAND_2POW64_INV (5.4210109e-20f)
29 #define ROCRAND_2POW64_INV_DOUBLE (5.4210108624275221700372640043497e-20)
30 #define ROCRAND_2POW32_INV_2PI (1.46291807e-09f)
31 #define ROCRAND_2POW53_INV_DOUBLE (1.1102230246251565e-16)
32 #define ROCRAND_PI (3.141592653f)
33 #define ROCRAND_PI_DOUBLE (3.1415926535897932)
34 #define ROCRAND_2PI (6.2831855f)
35 #define ROCRAND_SQRT2 (1.4142135f)
36 #define ROCRAND_SQRT2_DOUBLE (1.4142135623730951)
37 
38 #include <hip/hip_runtime.h>
39 
40 #include <math.h>
41 
42 #define ROCRAND_KERNEL __global__ static
43 
44 #if __HIP_DEVICE_COMPILE__ \
45  && (defined(__HIP_PLATFORM_AMD__) \
46  || (defined(__HIP_PLATFORM_NVCC__) && (__CUDA_ARCH__ >= 530)))
47  #define ROCRAND_HALF_MATH_SUPPORTED
48 #endif
49 
50 // Copyright 2001 John Maddock.
51 // Copyright 2017 Peter Dimov.
52 //
53 // Distributed under the Boost Software License, Version 1.0.
54 //
55 // See http://www.boost.org/LICENSE_1_0.txt
56 //
57 // BOOST_STRINGIZE(X)
58 #define ROCRAND_STRINGIZE(X) ROCRAND_DO_STRINGIZE(X)
59 #define ROCRAND_DO_STRINGIZE(X) #X
60 
61 // Copyright 2017 Peter Dimov.
62 //
63 // Distributed under the Boost Software License, Version 1.0.
64 //
65 // See http://www.boost.org/LICENSE_1_0.txt
66 //
67 // BOOST_PRAGMA_MESSAGE("message")
68 //
69 // Expands to the equivalent of #pragma message("message")
70 #if defined(__INTEL_COMPILER)
71  #define ROCRAND_PRAGMA_MESSAGE(x) \
72  __pragma(message(__FILE__ "(" ROCRAND_STRINGIZE(__LINE__) "): note: " x))
73 #elif defined(__GNUC__)
74  #define ROCRAND_PRAGMA_MESSAGE(x) _Pragma(ROCRAND_STRINGIZE(message(x)))
75 #elif defined(_MSC_VER)
76  #define ROCRAND_PRAGMA_MESSAGE(x) \
77  __pragma(message(__FILE__ "(" ROCRAND_STRINGIZE(__LINE__) "): note: " x))
78 #else
79  #define ROCRAND_PRAGMA_MESSAGE(x)
80 #endif
81 
82 #if __cplusplus >= 201402L
83  #define ROCRAND_DEPRECATED(msg) [[deprecated(msg)]]
84 #elif defined(_MSC_VER) && !defined(__clang__)
85  #define ROCRAND_DEPRECATED(msg) __declspec(deprecated(msg))
86 #elif defined(__clang__) || defined(__GNUC__)
87  #define ROCRAND_DEPRECATED(msg) __attribute__((deprecated(msg)))
88 #else
89  #define ROCRAND_DEPRECATED(msg)
90 #endif
91 
92 namespace rocrand_device {
93 namespace detail {
94 
95 #if ( defined(__HIP_PLATFORM_NVCC__) || \
96  defined(__gfx801__) || \
97  defined(__gfx802__) || \
98  defined(__gfx803__) || \
99  defined(__gfx810__) || \
100  defined(__gfx900__) || \
101  defined(__gfx902__) || \
102  defined(__gfx904__) || \
103  defined(__gfx906__) || \
104  defined(__gfx908__) || \
105  defined(__gfx909__) || \
106  defined(__gfx1030__) )
107  #if !defined(ROCRAND_ENABLE_INLINE_ASM)
108  #define ROCRAND_ENABLE_INLINE_ASM
109  #endif
110 #else
111  #if defined(__HIP_DEVICE_COMPILE__) && defined(ROCRAND_ENABLE_INLINE_ASM)
112  #undef ROCRAND_ENABLE_INLINE_ASM
113  #endif
114 #endif
115 
116 __forceinline__ __device__ __host__ unsigned long long
117  mad_u64_u32(const unsigned int x, const unsigned int y, const unsigned long long z)
118 {
119 #if defined(__HIP_PLATFORM_AMD__) && defined(__HIP_DEVICE_COMPILE__) \
120  && defined(ROCRAND_ENABLE_INLINE_ASM)
121 
122  #if __AMDGCN_WAVEFRONT_SIZE == 64u
123  using sgpr_t = unsigned long long;
124  #elif __AMDGCN_WAVEFRONT_SIZE == 32u
125  using sgpr_t = unsigned int;
126  #endif
127 
128  unsigned long long r;
129  sgpr_t c; // carry bits, SGPR, unused
130  // x has "r" constraint. This allows to use both VGPR and SGPR
131  // (to save VGPR) as input.
132  // y and z have "v" constraints, because only one SGPR or literal
133  // can be read by the instruction.
134  asm volatile("v_mad_u64_u32 %0, %1, %2, %3, %4"
135  : "=v"(r), "=s"(c) : "r"(x), "v"(y), "v"(z)
136  );
137  return r;
138  #elif defined(__HIP_PLATFORM_NVCC__) && defined(__HIP_DEVICE_COMPILE__) \
139  && defined(ROCRAND_ENABLE_INLINE_ASM)
140 
141  unsigned long long r;
142  asm("mad.wide.u32 %0, %1, %2, %3;"
143  : "=l"(r) : "r"(x), "r"(y), "l"(z)
144  );
145  return r;
146 
147  #else // host code
148 
149  return static_cast<unsigned long long>(x) * static_cast<unsigned long long>(y) + z;
150 
151  #endif
152 }
153 
154 // This helps access fields of engine's internal state which
155 // saves floats and doubles generated using the Box–Muller transform
156 template<typename Engine>
157 struct engine_boxmuller_helper
158 {
159  static __forceinline__ __device__ __host__ bool has_float(const Engine* engine)
160  {
161  return engine->m_state.boxmuller_float_state != 0;
162  }
163 
164  static __forceinline__ __device__ __host__ float get_float(Engine* engine)
165  {
166  engine->m_state.boxmuller_float_state = 0;
167  return engine->m_state.boxmuller_float;
168  }
169 
170  static __forceinline__ __device__ __host__ void save_float(Engine* engine, float f)
171  {
172  engine->m_state.boxmuller_float_state = 1;
173  engine->m_state.boxmuller_float = f;
174  }
175 
176  static __forceinline__ __device__ __host__ bool has_double(const Engine* engine)
177  {
178  return engine->m_state.boxmuller_double_state != 0;
179  }
180 
181  static __forceinline__ __device__ __host__ float get_double(Engine* engine)
182  {
183  engine->m_state.boxmuller_double_state = 0;
184  return engine->m_state.boxmuller_double;
185  }
186 
187  static __forceinline__ __device__ __host__ void save_double(Engine* engine, double d)
188  {
189  engine->m_state.boxmuller_double_state = 1;
190  engine->m_state.boxmuller_double = d;
191  }
192 };
193 
194 template<typename T>
195 __forceinline__ __device__ __host__ void split_ull(T& lo, T& hi, unsigned long long int val);
196 
197 template<>
198 __forceinline__ __device__ __host__ void
199  split_ull(unsigned int& lo, unsigned int& hi, unsigned long long int val)
200 {
201  lo = val & 0xFFFFFFFF;
202  hi = (val >> 32) & 0xFFFFFFFF;
203 }
204 
205 template<>
206 __forceinline__ __device__ __host__ void
207  split_ull(unsigned long long int& lo, unsigned long long int& hi, unsigned long long int val)
208 {
209  lo = val;
210  hi = 0;
211 }
212 
213 } // end namespace detail
214 } // end namespace rocrand_device
215 
216 #endif // ROCRAND_COMMON_H_