/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck_tile/core/tensor/load_tile.hpp Source File

/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck_tile/core/tensor/load_tile.hpp Source File#

Composable Kernel: /home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck_tile/core/tensor/load_tile.hpp Source File
load_tile.hpp
Go to the documentation of this file.
1 // Copyright (c) Advanced Micro Devices, Inc., or its affiliates.
2 // SPDX-License-Identifier: MIT
3 
4 #pragma once
5 
18 
19 namespace ck_tile {
20 // Per-lane read-offset tweaks allow swizzling patterns not representable by tile_distribution.
21 template <typename TileWindow_,
22  index_t i_access = -1,
23  bool oob_conditional_check = true,
24  typename offset_t,
25  typename = std::enable_if_t<std::is_class_v<TileWindow_>>>
26 CK_TILE_DEVICE auto load_tile_with_offset(const TileWindow_& tile_window,
27  offset_t offset,
28  number<i_access> = {},
29  bool_constant<oob_conditional_check> = {})
30 {
31  return tile_window.load_with_offset(
32  offset, number<i_access>{}, bool_constant<oob_conditional_check>{});
33 }
34 
35 template <typename TileWindow_, index_t i_access = -1, bool oob_conditional_check = true>
36 CK_TILE_DEVICE auto load_tile(const TileWindow_& tile_window,
37  number<i_access> = {},
38  bool_constant<oob_conditional_check> = {})
39 {
40  return tile_window.load(number<i_access>{}, bool_constant<oob_conditional_check>{});
41 }
42 
51 template <typename TileWindow_,
52  typename ElementWise_,
53  index_t i_access = -1,
54  bool oob_conditional_check = true>
55 CK_TILE_DEVICE auto load_tile_with_elementwise(const TileWindow_& tile_window,
56  ElementWise_ elementwise,
57  number<i_access> = {},
58  bool_constant<oob_conditional_check> = {})
59 {
60  // TODO: Tile windows should works with unknow number of params
61  // Load element_wise API works only when the input typle is a tuple-tyupe
62  return tile_window[number<0>{}].load(
63  tile_window, elementwise, number<i_access>{}, bool_constant<oob_conditional_check>{});
64 }
65 
66 // Per-lane read-offset tweaks allow swizzling patterns not representable by tile_distribution.
67 template <typename DistributedTensor_,
68  typename TileWindow_,
69  index_t i_access = -1,
70  bool oob_conditional_check = true,
71  typename offset_t,
72  typename = std::enable_if_t<std::is_class_v<std::remove_cv_t<DistributedTensor_>> &&
73  std::is_class_v<TileWindow_>>>
74 CK_TILE_DEVICE auto load_tile_with_offset(DistributedTensor_& dst_tile,
75  const TileWindow_& tile_window,
76  offset_t offset,
77  number<i_access> = {},
78  bool_constant<oob_conditional_check> = {})
79 {
80  return tile_window.load_with_offset(
81  offset, dst_tile, number<i_access>{}, bool_constant<oob_conditional_check>{});
82 }
83 
84 template <typename DistributedTensor_,
85  typename TileWindow_,
86  index_t i_access = -1,
87  bool oob_conditional_check = true>
88 CK_TILE_DEVICE auto load_tile(DistributedTensor_& dst_tile,
89  const TileWindow_& tile_window,
90  number<i_access> = {},
91  bool_constant<oob_conditional_check> = {})
92 {
93  return tile_window.load(dst_tile, number<i_access>{}, bool_constant<oob_conditional_check>{});
94 }
95 
105 template <typename T,
106  typename BottomTensorView_,
107  typename WindowLengths_,
108  typename TileDistribution_,
109  index_t NumCoord,
110  index_t i_access = -1,
111  bool oob_conditional_check = true,
112  bool pre_nop = false>
114  const tile_window_with_static_distribution<BottomTensorView_,
115  WindowLengths_,
116  TileDistribution_,
117  NumCoord>& tile_window,
118  number<i_access> = {},
119  bool_constant<oob_conditional_check> = {},
120  bool_constant<pre_nop> = {})
121 {
122  tile_window.load_raw(
123  tile, number<i_access>{}, bool_constant<oob_conditional_check>{}, bool_constant<pre_nop>{});
124 }
125 
126 template <typename T,
127  typename BottomTensorView_,
128  typename WindowLengths_,
129  typename TileDistribution_,
130  typename LinearBottomDims_,
131  index_t i_access = -1,
132  bool oob_conditional_check = true,
133  bool pre_nop = false>
135  const tile_window_linear<BottomTensorView_,
136  WindowLengths_,
137  TileDistribution_,
138  LinearBottomDims_>& tile_window,
139  number<i_access> = {},
140  bool_constant<oob_conditional_check> = {},
141  bool_constant<pre_nop> = {})
142 {
143  tile_window.load_raw(
144  tile, number<i_access>{}, bool_constant<oob_conditional_check>{}, bool_constant<pre_nop>{});
145 }
146 
147 // Per-lane read-offset tweaks allow swizzling patterns not representable by tile_distribution.
148 template <typename LdsTileWindow_,
149  typename TileWindow_,
150  index_t i_access = -1,
151  bool oob_conditional_check = true,
152  bool static_move_ys = false,
153  typename = std::enable_if_t<std::is_class_v<remove_cvref_t<LdsTileWindow_>> &&
154  std::is_class_v<TileWindow_>>>
155 CK_TILE_DEVICE void async_load_tile_with_offset(LdsTileWindow_&& lds_tile,
156  const TileWindow_& tile_window,
157  index_t offset,
158  number<i_access> = {},
159  bool_constant<oob_conditional_check> occ = {},
160  bool_constant<static_move_ys> smy = {})
161 {
162  tile_window.async_load_with_offset(offset, lds_tile, number<i_access>{}, occ, smy);
163 }
164 
165 template <typename LdsTileWindow_,
166  typename TileWindow_,
167  index_t i_access = -1,
168  bool oob_conditional_check = true,
169  bool static_move_ys = false>
170 CK_TILE_DEVICE void async_load_tile(LdsTileWindow_&& lds_tile,
171  const TileWindow_& tile_window,
172  number<i_access> = {},
173  bool_constant<oob_conditional_check> occ = {},
174  bool_constant<static_move_ys> smy = {})
175 {
176  async_load_tile_with_offset(lds_tile, tile_window, 0, number<i_access>{}, occ, smy);
177 }
178 
179 template <typename LdsTileWindow_,
180  typename TileWindow_,
181  index_t i_access = -1,
182  bool oob_conditional_check = true,
183  bool pre_nop = false>
184 CK_TILE_DEVICE void async_load_tile_raw(LdsTileWindow_&& lds_tile,
185  const TileWindow_& tile_window,
186  number<i_access> = {},
187  bool_constant<oob_conditional_check> = {},
188  bool_constant<pre_nop> = {})
189 {
190  tile_window.async_load_raw(lds_tile,
191  number<i_access>{},
192  bool_constant<oob_conditional_check>{},
193  bool_constant<pre_nop>{});
194 }
195 
197 {
198  asm volatile("s_waitcnt vmcnt(%0)" : : "n"(cnt) : "memory");
199 }
200 
201 template <typename WindowLengths>
203 {
204  return null_tensor{};
205 }
206 
207 template <typename T, typename WindowLengths>
209 {
210 }
211 
212 } // namespace ck_tile
#define CK_TILE_DEVICE
Definition: config.hpp:45
Definition: cluster_descriptor.hpp:13
CK_TILE_DEVICE void async_load_tile_raw(LdsTileWindow_ &&lds_tile, const TileWindow_ &tile_window, number< i_access >={}, bool_constant< oob_conditional_check >={}, bool_constant< pre_nop >={})
Definition: load_tile.hpp:184
CK_TILE_DEVICE void async_load_tile_with_offset(LdsTileWindow_ &&lds_tile, const TileWindow_ &tile_window, index_t offset, number< i_access >={}, bool_constant< oob_conditional_check > occ={}, bool_constant< static_move_ys > smy={})
Definition: load_tile.hpp:155
CK_TILE_DEVICE auto load_tile_with_elementwise(const TileWindow_ &tile_window, ElementWise_ elementwise, number< i_access >={}, bool_constant< oob_conditional_check >={})
Load tile with elementwise function.
Definition: load_tile.hpp:55
CK_TILE_DEVICE auto load_tile_with_offset(const TileWindow_ &tile_window, offset_t offset, number< i_access >={}, bool_constant< oob_conditional_check >={})
Definition: load_tile.hpp:26
int32_t index_t
Definition: integer.hpp:9
CK_TILE_DEVICE void async_load_tile(LdsTileWindow_ &&lds_tile, const TileWindow_ &tile_window, number< i_access >={}, bool_constant< oob_conditional_check > occ={}, bool_constant< static_move_ys > smy={})
Definition: load_tile.hpp:170
CK_TILE_DEVICE auto load_tile_raw(T &tile, const tile_window_with_static_distribution< BottomTensorView_, WindowLengths_, TileDistribution_, NumCoord > &tile_window, number< i_access >={}, bool_constant< oob_conditional_check >={}, bool_constant< pre_nop >={})
Loads a tile of data using inline assembly.
Definition: load_tile.hpp:113
CK_TILE_DEVICE void async_load_fence(index_t cnt=0)
Definition: load_tile.hpp:196
CK_TILE_DEVICE auto load_tile(const TileWindow_ &tile_window, number< i_access >={}, bool_constant< oob_conditional_check >={})
Definition: load_tile.hpp:36
Definition: integral_constant.hpp:13
Definition: null_tensor.hpp:9
Definition: null_tile_window.hpp:19
Definition: coordinate_transform.hpp:1392
Definition: tile_window_linear.hpp:55
This class provides tile (windowed) view and access to the device memory.
Definition: tile_window.hpp:47