include/ck_tile/ops/elementwise/kernel/elementwise_kernel.hpp Source File

include/ck_tile/ops/elementwise/kernel/elementwise_kernel.hpp Source File#

Composable Kernel: include/ck_tile/ops/elementwise/kernel/elementwise_kernel.hpp Source File
elementwise_kernel.hpp
Go to the documentation of this file.
1 // SPDX-License-Identifier: MIT
2 // Copyright (c) 2025, Advanced Micro Devices, Inc. All rights reserved.
3 
4 #pragma once
5 
6 #include "ck_tile/core.hpp"
7 #include "ck_tile/ops/common.hpp"
10 namespace ck_tile {
11 
12 template <typename Problem_, typename Policy_>
14 {
17 
22 
23  template <typename... XDataType, typename Dims>
24  CK_TILE_DEVICE void operator()(Dims lens,
25  Dims input_strides,
26  Dims output_strides,
27  const tuple<XDataType...>& input_tensors,
28  YDataType* p_y) const
29  {
30  using S = typename Problem::BlockShape;
31 
32  // Setup block-level coordinates and transforms
33  const index_t iM = get_block_id() * S::kBlockM;
34  const auto merge_transform = make_merge_transform(lens);
35 
36  // Load all input tiles into registers.
37  // The lambda structure here is intended to minimize the lifetime
38  // of intermediate objects (views, windows) used for loading.
39  const auto x_tiles = ck_tile::generate_tuple(
40  [&](auto i) {
41  const auto tensor_view = make_naive_tensor_view<address_space_enum::global>(
42  input_tensors.get(i), lens, input_strides, number<S::kVectorM>{}, number<1>{});
43 
44  const auto transformed_tensor = pad_tensor_view(
46  ck_tile::make_tuple(merge_transform),
51 
52  const auto x_window =
53  make_tile_window(transformed_tensor,
55  {iM},
56  Policy::template MakeXBlockTileDistribution<Problem>());
57 
58  return load_tile(x_window);
59  },
60  number<sizeof...(XDataType)>{});
61 
62  // Setup output tile in registers.
63  const auto& x_tile0 = x_tiles.get(number<0>{});
64  auto y_tile = make_static_distributed_tensor<YDataType>(x_tile0.get_tile_distribution());
65 
66  // Perform element-wise computation.
67  const auto spans = x_tile0.get_distributed_spans();
68  sweep_tile_span(spans[number<0>{}], [&](auto idx) {
69  const auto tile_idx = make_tuple(idx);
70  apply(
71  [&](auto&&... tiles) {
72  ElementWiseOperation{}(y_tile(tile_idx),
73  type_convert<ComputeDataType>(tiles[tile_idx])...);
74  },
75  x_tiles);
76  });
77 
78  // Setup output window and store the result tile.
79  const auto y_m_n = make_naive_tensor_view<address_space_enum::global>(
80  p_y, lens, output_strides, number<S::kVectorM>{});
81 
82  const auto transformed_y_m_n = pad_tensor_view(
84  ck_tile::make_tuple(merge_transform),
89 
90  auto y_window = make_tile_window(transformed_y_m_n,
92  {iM},
93  y_tile.get_tile_distribution());
94 
95  store_tile(y_window, cast_tile<YDataType>(y_tile));
96  }
97 
98  template <typename... Ints>
100  {
101  int total_elements = 1;
102  const auto kVectorM = Problem_::BlockShape::kVectorM;
103 
104  apply([&](auto&&... args) { ((total_elements *= args), ...); }, input_sizes);
105 
106  if((total_elements % kVectorM) != 0)
107  {
108  if(ck_tile::EnvIsEnabled(CK_TILE_ENV(CK_TILE_LOGGING)))
109  {
110  CK_TILE_ERROR("Conditions not met: total number of input elements (",
111  total_elements,
112  ") should be multiple of the vectorization size (",
113  kVectorM,
114  ")");
115  }
116  return false;
117  }
118 
119  return true;
120  }
121 };
122 
123 } // namespace ck_tile
#define CK_TILE_DEVICE
Definition: config.hpp:40
#define CK_TILE_HOST
Definition: config.hpp:39
Definition: cluster_descriptor.hpp:13
constexpr decltype(auto) apply(F &&f, Tuple &&t)
Definition: tuple.hpp:509
bool EnvIsEnabled(EnvVar)
Definition: env.hpp:156
void CK_TILE_ERROR(Args &&... args) noexcept
Definition: env.hpp:12
constexpr CK_TILE_HOST_DEVICE auto transform_tensor_view(const OldTensorView &old_tensor_view, const NewTransforms &new_transforms, NewLowerDimensionOldVisibleIdss, NewUpperDimensionNewVisibleIdss)
Definition: tensor_view.hpp:510
constexpr CK_TILE_HOST_DEVICE auto make_merge_transform(const LowLengths &low_lengths)
Definition: coordinate_transform.hpp:1672
int32_t index_t
Definition: integer.hpp:9
constexpr CK_TILE_HOST_DEVICE auto pad_tensor_view(const TensorView &tensor_view, const TileLengths &tile_lengths, DoPads)
Definition: tensor_view.hpp:529
remove_cv_t< std::remove_reference_t< T > > remove_cvref_t
Definition: type_traits.hpp:21
CK_TILE_DEVICE void sweep_tile_span(TileDistributedSpan_, const F &f)
Definition: sweep_tile.hpp:20
constexpr CK_TILE_DEVICE auto make_tile_window(null_tensor_view, const WindowLengths &window_lengths, const multi_index< WindowLengths::size()> &, Ts &&...)
Definition: null_tile_window.hpp:72
constexpr CK_TILE_HOST_DEVICE auto generate_tuple(F &&f, number< N >)
Definition: tuple.hpp:412
constexpr CK_TILE_HOST_DEVICE auto make_tuple(Xs &&... xs)
Definition: tuple.hpp:343
CK_TILE_DEVICE void store_tile(tile_window_with_static_lengths< BottomTensorView_, WindowLengths_ > &tile_window_tmp, const static_distributed_tensor< DataType_, TileDistribution_ > &dstr_tensor)
Definition: store_tile.hpp:23
CK_TILE_DEVICE auto load_tile(const TileWindow_ &tile_window, number< i_access >={}, bool_constant< oob_conditional_check >={})
Definition: load_tile.hpp:22
typename __make_integer_seq< impl::__integer_sequence, index_t, N >::seq_type make_index_sequence
Definition: sequence.hpp:222
Definition: elementwise_kernel.hpp:14
CK_TILE_DEVICE void operator()(Dims lens, Dims input_strides, Dims output_strides, const tuple< XDataType... > &input_tensors, YDataType *p_y) const
Definition: elementwise_kernel.hpp:24
ck_tile::remove_cvref_t< Problem_ > Problem
Definition: elementwise_kernel.hpp:15
ck_tile::remove_cvref_t< typename Problem::XDataType > XDataType
Definition: elementwise_kernel.hpp:18
ck_tile::remove_cvref_t< Policy_ > Policy
Definition: elementwise_kernel.hpp:16
ck_tile::remove_cvref_t< typename Problem::ElementWiseOperation > ElementWiseOperation
Definition: elementwise_kernel.hpp:21
static CK_TILE_HOST bool IsSupportedArgument(const ck_tile::tuple< Ints... > &input_sizes)
Definition: elementwise_kernel.hpp:99
ck_tile::remove_cvref_t< typename Problem::YDataType > YDataType
Definition: elementwise_kernel.hpp:20
ck_tile::remove_cvref_t< typename Problem::ComputeDataType > ComputeDataType
Definition: elementwise_kernel.hpp:19
Definition: integral_constant.hpp:13
Definition: sequence.hpp:52
Definition: tensor_view.hpp:41
Definition: tuple.hpp:192
#define CK_TILE_ENV(name)
Definition: env.hpp:145