/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck_tile/ops/pooling/pipeline/pool_shape.hpp Source File

/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck_tile/ops/pooling/pipeline/pool_shape.hpp Source File#

Composable Kernel: /home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck_tile/ops/pooling/pipeline/pool_shape.hpp Source File
pool_shape.hpp
Go to the documentation of this file.
1 // SPDX-License-Identifier: MIT
2 // Copyright (c) 2025, Advanced Micro Devices, Inc. All rights reserved.
3 
4 #pragma once
5 
6 #include "ck_tile/core.hpp"
7 
8 namespace ck_tile {
9 
10 template <typename BlockWarps, // num warps along seq<M, N>
11  typename BlockTile, // block size, seq<M, N>
12  typename WarpTile, // warp size, seq<M, N>
13  typename ThreadTile> // contiguous pixels(vector size) along seq<M, N>
14 struct PoolShape
15 {
16  static constexpr index_t Block_M = BlockTile::at(number<0>{});
17  static constexpr index_t Block_N = BlockTile::at(number<1>{});
18 
19  static constexpr index_t Warp_M = WarpTile::at(number<0>{});
20  static constexpr index_t Warp_N = WarpTile::at(number<1>{});
21 
22  static constexpr index_t ThreadTile_M = ThreadTile::at(number<0>{});
23  static constexpr index_t ThreadTile_N = ThreadTile::at(number<1>{});
24 
25  static constexpr index_t WarpPerBlock_M = BlockWarps::at(number<0>{});
26  static constexpr index_t WarpPerBlock_N = BlockWarps::at(number<1>{});
27 
28  static_assert(Warp_M % ThreadTile_M == 0, "Warp_M must be divisible by ThreadTile_M");
29  static_assert(Warp_N % ThreadTile_N == 0, "Warp_N must be divisible by ThreadTile_N");
30  static_assert((Warp_M * Warp_N / ThreadTile_M / ThreadTile_N) % ck_tile::get_warp_size() == 0,
31  "Warp_M * Warp_N / ThreadTile_M / ThreadTile_N must be a multiple of warp size");
32 
33  // Scale factor to account for warp size
34  // WarpSizeScaleFactor = warp tile/ thread tile / warp size
35  static constexpr index_t WarpSizeScaleFactor =
37 
38  static constexpr index_t WarpSizeScaleFactor_M =
40  static constexpr index_t WarpSizeScaleFactor_N =
42 
45 
46  static_assert((Block_M * WarpSizeScaleFactor_M) % (WarpPerBlock_M * Warp_M) == 0,
47  "Block_M * WarpSizeScaleFactor_M must be divisible by WarpPerBlock_M * Warp_M");
48  static_assert((Block_N * WarpSizeScaleFactor_N) % (WarpPerBlock_N * Warp_N) == 0,
49  "Block_N * WarpSizeScaleFactor_N must be divisible by WarpPerBlock_N * Warp_N");
50 
53 
54  static constexpr index_t BlockSize =
55  ck_tile::get_warp_size() * reduce_on_sequence(BlockWarps{}, multiplies{}, number<1>{});
56 };
57 } // namespace ck_tile
Definition: cluster_descriptor.hpp:13
int32_t index_t
Definition: integer.hpp:9
constexpr CK_TILE_HOST_DEVICE index_t reduce_on_sequence(Seq, Reduce f, number< Init >)
Definition: sequence.hpp:982
constexpr __device__ index_t get_warp_size()
Definition: get_id.hpp:10
Definition: pool_shape.hpp:15
static constexpr index_t WarpSizeScaleFactor
Definition: pool_shape.hpp:35
static constexpr index_t WarpPerBlock_N
Definition: pool_shape.hpp:26
static constexpr index_t Warp_M
Definition: pool_shape.hpp:19
static constexpr index_t Warp_N
Definition: pool_shape.hpp:20
static constexpr index_t WarpPerBlock_M
Definition: pool_shape.hpp:25
static constexpr index_t Block_N
Definition: pool_shape.hpp:17
static constexpr index_t ThreadTile_M
Definition: pool_shape.hpp:22
static constexpr index_t Repeat_N
Definition: pool_shape.hpp:52
static constexpr index_t ThreadPerWarp_M
Definition: pool_shape.hpp:43
static constexpr index_t ThreadPerWarp_N
Definition: pool_shape.hpp:44
static constexpr index_t Repeat_M
Definition: pool_shape.hpp:51
static constexpr index_t WarpSizeScaleFactor_N
Definition: pool_shape.hpp:40
static constexpr index_t Block_M
Definition: pool_shape.hpp:16
static constexpr index_t WarpSizeScaleFactor_M
Definition: pool_shape.hpp:38
static constexpr index_t BlockSize
Definition: pool_shape.hpp:54
static constexpr index_t ThreadTile_N
Definition: pool_shape.hpp:23
Definition: integral_constant.hpp:13
Definition: math.hpp:98