21 template <
typename TileWindow_,
23 bool oob_conditional_check =
true,
25 typename = std::enable_if_t<std::is_class_v<TileWindow_>>>
29 bool_constant<oob_conditional_check> = {})
31 return tile_window.load_with_offset(
32 offset, number<i_access>{}, bool_constant<oob_conditional_check>{});
35 template <
typename TileWindow_,
index_t i_access = -1,
bool oob_conditional_check =
true>
38 bool_constant<oob_conditional_check> = {})
40 return tile_window.load(number<i_access>{}, bool_constant<oob_conditional_check>{});
51 template <
typename TileWindow_,
52 typename ElementWise_,
54 bool oob_conditional_check =
true>
56 ElementWise_ elementwise,
58 bool_constant<oob_conditional_check> = {})
62 return tile_window[number<0>{}].load(
63 tile_window, elementwise, number<i_access>{}, bool_constant<oob_conditional_check>{});
67 template <
typename DistributedTensor_,
70 bool oob_conditional_check =
true,
72 typename = std::enable_if_t<std::is_class_v<std::remove_cv_t<DistributedTensor_>> &&
73 std::is_class_v<TileWindow_>>>
75 const TileWindow_& tile_window,
78 bool_constant<oob_conditional_check> = {})
80 return tile_window.load_with_offset(
81 offset, dst_tile, number<i_access>{}, bool_constant<oob_conditional_check>{});
84 template <
typename DistributedTensor_,
87 bool oob_conditional_check =
true>
89 const TileWindow_& tile_window,
91 bool_constant<oob_conditional_check> = {})
93 return tile_window.load(dst_tile, number<i_access>{}, bool_constant<oob_conditional_check>{});
105 template <
typename T,
106 typename BottomTensorView_,
107 typename WindowLengths_,
108 typename TileDistribution_,
111 bool oob_conditional_check =
true,
112 bool pre_nop =
false>
117 NumCoord>& tile_window,
119 bool_constant<oob_conditional_check> = {},
120 bool_constant<pre_nop> = {})
122 tile_window.load_raw(
123 tile, number<i_access>{}, bool_constant<oob_conditional_check>{}, bool_constant<pre_nop>{});
126 template <
typename T,
127 typename BottomTensorView_,
128 typename WindowLengths_,
129 typename TileDistribution_,
130 typename LinearBottomDims_,
132 bool oob_conditional_check =
true,
133 bool pre_nop =
false>
138 LinearBottomDims_>& tile_window,
140 bool_constant<oob_conditional_check> = {},
141 bool_constant<pre_nop> = {})
143 tile_window.load_raw(
144 tile, number<i_access>{}, bool_constant<oob_conditional_check>{}, bool_constant<pre_nop>{});
148 template <
typename LdsTileWindow_,
149 typename TileWindow_,
151 bool oob_conditional_check =
true,
152 bool static_move_ys =
false,
153 typename = std::enable_if_t<std::is_class_v<remove_cvref_t<LdsTileWindow_>> &&
154 std::is_class_v<TileWindow_>>>
156 const TileWindow_& tile_window,
159 bool_constant<oob_conditional_check> occ = {},
160 bool_constant<static_move_ys> smy = {})
162 tile_window.async_load_with_offset(offset, lds_tile, number<i_access>{}, occ, smy);
165 template <
typename LdsTileWindow_,
166 typename TileWindow_,
168 bool oob_conditional_check =
true,
169 bool static_move_ys =
false>
171 const TileWindow_& tile_window,
173 bool_constant<oob_conditional_check> occ = {},
174 bool_constant<static_move_ys> smy = {})
179 template <
typename LdsTileWindow_,
180 typename TileWindow_,
182 bool oob_conditional_check =
true,
183 bool pre_nop =
false>
185 const TileWindow_& tile_window,
187 bool_constant<oob_conditional_check> = {},
188 bool_constant<pre_nop> = {})
190 tile_window.async_load_raw(lds_tile,
192 bool_constant<oob_conditional_check>{},
193 bool_constant<pre_nop>{});
198 asm volatile(
"s_waitcnt vmcnt(%0)" : :
"n"(cnt) :
"memory");
201 template <
typename WindowLengths>
207 template <
typename T,
typename WindowLengths>
#define CK_TILE_DEVICE
Definition: config.hpp:45
Definition: cluster_descriptor.hpp:13
CK_TILE_DEVICE void async_load_tile_raw(LdsTileWindow_ &&lds_tile, const TileWindow_ &tile_window, number< i_access >={}, bool_constant< oob_conditional_check >={}, bool_constant< pre_nop >={})
Definition: load_tile.hpp:184
CK_TILE_DEVICE void async_load_tile_with_offset(LdsTileWindow_ &&lds_tile, const TileWindow_ &tile_window, index_t offset, number< i_access >={}, bool_constant< oob_conditional_check > occ={}, bool_constant< static_move_ys > smy={})
Definition: load_tile.hpp:155
CK_TILE_DEVICE auto load_tile_with_elementwise(const TileWindow_ &tile_window, ElementWise_ elementwise, number< i_access >={}, bool_constant< oob_conditional_check >={})
Load tile with elementwise function.
Definition: load_tile.hpp:55
CK_TILE_DEVICE auto load_tile_with_offset(const TileWindow_ &tile_window, offset_t offset, number< i_access >={}, bool_constant< oob_conditional_check >={})
Definition: load_tile.hpp:26
int32_t index_t
Definition: integer.hpp:9
CK_TILE_DEVICE void async_load_tile(LdsTileWindow_ &&lds_tile, const TileWindow_ &tile_window, number< i_access >={}, bool_constant< oob_conditional_check > occ={}, bool_constant< static_move_ys > smy={})
Definition: load_tile.hpp:170
CK_TILE_DEVICE auto load_tile_raw(T &tile, const tile_window_with_static_distribution< BottomTensorView_, WindowLengths_, TileDistribution_, NumCoord > &tile_window, number< i_access >={}, bool_constant< oob_conditional_check >={}, bool_constant< pre_nop >={})
Loads a tile of data using inline assembly.
Definition: load_tile.hpp:113
CK_TILE_DEVICE void async_load_fence(index_t cnt=0)
Definition: load_tile.hpp:196
CK_TILE_DEVICE auto load_tile(const TileWindow_ &tile_window, number< i_access >={}, bool_constant< oob_conditional_check >={})
Definition: load_tile.hpp:36
Definition: integral_constant.hpp:13
Definition: null_tensor.hpp:9
Definition: null_tile_window.hpp:19
Definition: coordinate_transform.hpp:1392
Definition: tile_window_linear.hpp:55
This class provides tile (windowed) view and access to the device memory.
Definition: tile_window.hpp:47