/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-hipcub/checkouts/docs-5.5.1/hipcub/include/hipcub/backend/rocprim/thread/thread_load.hpp Source File

/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-hipcub/checkouts/docs-5.5.1/hipcub/include/hipcub/backend/rocprim/thread/thread_load.hpp Source File#

hipCUB: /home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-hipcub/checkouts/docs-5.5.1/hipcub/include/hipcub/backend/rocprim/thread/thread_load.hpp Source File
thread_load.hpp
1 /******************************************************************************
2  * Copyright (c) 2010-2011, Duane Merrill. All rights reserved.
3  * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved.
4  * Modifications Copyright (c) 2021, Advanced Micro Devices, Inc. All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions are met:
8  * * Redistributions of source code must retain the above copyright
9  * notice, this list of conditions and the following disclaimer.
10  * * Redistributions in binary form must reproduce the above copyright
11  * notice, this list of conditions and the following disclaimer in the
12  * documentation and/or other materials provided with the distribution.
13  * * Neither the name of the NVIDIA CORPORATION nor the
14  * names of its contributors may be used to endorse or promote products
15  * derived from this software without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20  * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
21  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
23  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
24  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
26  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  *
28  ******************************************************************************/
29 
30 #ifndef HIPCUB_ROCPRIM_THREAD_THREAD_LOAD_HPP_
31 #define HIPCUB_ROCPRIM_THREAD_THREAD_LOAD_HPP_
32 BEGIN_HIPCUB_NAMESPACE
33 
34 enum CacheLoadModifier : int32_t
35 {
36  LOAD_DEFAULT,
37  LOAD_CA,
38  LOAD_CG,
39  LOAD_CS,
40  LOAD_CV,
41  LOAD_LDG,
42  LOAD_VOLATILE,
43 };
44 
45 template<CacheLoadModifier MODIFIER = LOAD_DEFAULT, typename T>
46 HIPCUB_DEVICE __forceinline__ T AsmThreadLoad(void * ptr)
47 {
48  T retval = 0;
49  __builtin_memcpy(&retval, ptr, sizeof(T));
50  return retval;
51 }
52 
53 #if HIPCUB_THREAD_LOAD_USE_CACHE_MODIFIERS == 1
54 
55 // Important for syncing. Check section 9.2.2 or 7.3 in the following document
56 // http://developer.amd.com/wordpress/media/2013/12/AMD_GCN3_Instruction_Set_Architecture_rev1.1.pdf
57 #define HIPCUB_ASM_THREAD_LOAD(cache_modifier, \
58  llvm_cache_modifier, \
59  type, \
60  interim_type, \
61  asm_operator, \
62  output_modifier, \
63  wait_cmd) \
64  template<> \
65  HIPCUB_DEVICE __forceinline__ type AsmThreadLoad<cache_modifier, type>(void * ptr) \
66  { \
67  interim_type retval; \
68  asm volatile( \
69  #asm_operator " %0, %1 " llvm_cache_modifier "\n" \
70  "\ts_waitcnt " wait_cmd "(0)" : "=" #output_modifier(retval) : "v"(ptr) \
71  ); \
72  return retval; \
73  }
74 
75 // TODO Add specialization for custom larger data types
76 #define HIPCUB_ASM_THREAD_LOAD_GROUP(cache_modifier, llvm_cache_modifier, wait_cmd) \
77  HIPCUB_ASM_THREAD_LOAD(cache_modifier, llvm_cache_modifier, int8_t, int16_t, flat_load_sbyte, v, wait_cmd); \
78  HIPCUB_ASM_THREAD_LOAD(cache_modifier, llvm_cache_modifier, int16_t, int16_t, flat_load_sshort, v, wait_cmd); \
79  HIPCUB_ASM_THREAD_LOAD(cache_modifier, llvm_cache_modifier, uint8_t, uint16_t, flat_load_ubyte, v, wait_cmd); \
80  HIPCUB_ASM_THREAD_LOAD(cache_modifier, llvm_cache_modifier, uint16_t, uint16_t, flat_load_ushort, v, wait_cmd); \
81  HIPCUB_ASM_THREAD_LOAD(cache_modifier, llvm_cache_modifier, uint32_t, uint32_t, flat_load_dword, v, wait_cmd); \
82  HIPCUB_ASM_THREAD_LOAD(cache_modifier, llvm_cache_modifier, float, uint32_t, flat_load_dword, v, wait_cmd); \
83  HIPCUB_ASM_THREAD_LOAD(cache_modifier, llvm_cache_modifier, uint64_t, uint64_t, flat_load_dwordx2, v, wait_cmd); \
84  HIPCUB_ASM_THREAD_LOAD(cache_modifier, llvm_cache_modifier, double, uint64_t, flat_load_dwordx2, v, wait_cmd);
85 
86 HIPCUB_ASM_THREAD_LOAD_GROUP(LOAD_CA, "glc", "");
87 HIPCUB_ASM_THREAD_LOAD_GROUP(LOAD_CG, "glc slc", "");
88 HIPCUB_ASM_THREAD_LOAD_GROUP(LOAD_CV, "glc", "vmcnt");
89 HIPCUB_ASM_THREAD_LOAD_GROUP(LOAD_VOLATILE, "glc", "vmcnt");
90 
91 // TODO find correct modifiers to match these
92 HIPCUB_ASM_THREAD_LOAD_GROUP(LOAD_LDG, "", "");
93 HIPCUB_ASM_THREAD_LOAD_GROUP(LOAD_CS, "", "");
94 
95 #endif
96 
97 template<CacheLoadModifier MODIFIER = LOAD_DEFAULT, typename InputIteratorT>
98 HIPCUB_DEVICE __forceinline__
99 typename std::iterator_traits<InputIteratorT>::value_type ThreadLoad(InputIteratorT itr)
100 {
101  using T = typename std::iterator_traits<InputIteratorT>::value_type;
102  T retval = ThreadLoad<MODIFIER>(&(*itr));
103  return retval;
104 }
105 
106 template<CacheLoadModifier MODIFIER = LOAD_DEFAULT, typename T>
107 HIPCUB_DEVICE __forceinline__ T
108 ThreadLoad(T * ptr)
109 {
110  return AsmThreadLoad<MODIFIER, T>(ptr);
111 }
112 
113 END_HIPCUB_NAMESPACE
114 #endif