/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck_tile/ops/epilogue/default_2d_epilogue.hpp Source File#
default_2d_epilogue.hpp
Go to the documentation of this file.
Definition: cluster_descriptor.hpp:13
constexpr CK_TILE_HOST_DEVICE auto generate_tie(F &&f, number< N >)
Definition: tuple.hpp:435
remove_cv_t< std::remove_reference_t< T > > remove_cvref_t
Definition: type_traits.hpp:21
typename impl::warp_gemm_dispatcher::Dispatcher< AType, BType, AccType, MPerWave, NPerWave, KPerWave, TransposeC, SwizzleA, UseStructuredSparsity, AttrNumAccess >::Type WarpGemmDispatcher
Definition: warp_gemm_dispatcher.hpp:177
CK_TILE_DEVICE auto tile_elementwise_inout_unpack(const InElementFunc &in_element_func, const Tuple &t, std::index_sequence< I... >)
Template function that "unpacks" a tuple and applies an element-wise operation.
Definition: tile_elementwise.hpp:71
CK_TILE_DEVICE void buffer_store_fence(index_t cnt=0)
Definition: amd_buffer_addressing.hpp:1063
CK_TILE_DEVICE void store_tile_raw(tile_window_with_static_lengths< BottomTensorView_, WindowLengths_ > &tile_window_tmp, const static_distributed_tensor< DataType_, TileDistribution_ > &dstr_tensor)
Definition: store_tile.hpp:72
CK_TILE_HOST_DEVICE auto get_partition_index(Distribution)
Definition: tile_distribution.hpp:21
constexpr CK_TILE_DEVICE auto make_tile_window(null_tensor_view, const WindowLengths &window_lengths, const multi_index< WindowLengths::size()> &, Ts &&...)
Definition: null_tile_window.hpp:75
typename detail::detector< nonesuch, void, Op, Args... >::value_t is_detected
Definition: type_traits.hpp:67
constexpr CK_TILE_HOST_DEVICE auto generate_tuple(F &&f, number< N >)
Definition: tuple.hpp:429
constexpr CK_TILE_HOST_DEVICE auto make_tuple(Xs &&... xs)
Definition: tuple.hpp:360
constexpr CK_TILE_HOST_DEVICE auto concat_tuple_of_reference(const tuple< X &... > &tx, const tuple< Y &... > &ty)
Definition: tuple.hpp:443
CK_TILE_DEVICE void update_tile(tile_window_with_static_lengths< BottomTensorView_, WindowLengths_ > &tile_window_tmp, const static_distributed_tensor< DataType_, TileDistribution_ > &dstr_tensor)
Definition: update_tile.hpp:22
CK_TILE_DEVICE void store_tile(tile_window_with_static_lengths< BottomTensorView_, WindowLengths_ > &tile_window_tmp, const static_distributed_tensor< DataType_, TileDistribution_ > &dstr_tensor)
Definition: store_tile.hpp:24
CK_TILE_DEVICE auto load_tile(const TileWindow_ &tile_window, number< i_access >={}, bool_constant< oob_conditional_check >={})
Definition: load_tile.hpp:36
CK_TILE_DEVICE void update_tile_raw(tile_window_with_static_distribution< BottomTensorView_, WindowLengths_, TileDistribution_, NumCoord > &tile_window, const static_distributed_tensor< DataType_, TileDistribution_ > &dstr_tensor, number< i_access >={}, bool_constant< oob_conditional_check >={}, bool_constant< pre_nop >={})
Definition: update_tile.hpp:68
typename conditional< predicate, X, Y >::type conditional_t
Definition: functional.hpp:115
Definition: default_2d_epilogue.hpp:77
remove_cvref_t< typename Problem::ODataType > ODataType
Definition: default_2d_epilogue.hpp:80
CK_TILE_DEVICE auto operator()(ODramWindowTmp &o_dram_window_tmp, const OAccTile &o_acc_tile, const DsDramWindows &ds_dram_windows, void *=nullptr) const
Definition: default_2d_epilogue.hpp:91
remove_cvref_t< Problem_ > Problem
Definition: default_2d_epilogue.hpp:78
remove_cvref_t< typename Problem::AccDataType > AccDataType
Definition: default_2d_epilogue.hpp:79
static constexpr bool UseRawStore
Definition: default_2d_epilogue.hpp:83
static constexpr memory_operation_enum MemoryOperation
Definition: default_2d_epilogue.hpp:84
static constexpr CK_TILE_HOST_DEVICE index_t GetSmemSize()
Definition: default_2d_epilogue.hpp:86
Definition: default_2d_epilogue.hpp:21
static constexpr bool UseRawStore
Definition: default_2d_epilogue.hpp:26
remove_cvref_t< ODataType_ > ODataType
Definition: default_2d_epilogue.hpp:23
remove_cvref_t< AccDataType_ > AccDataType
Definition: default_2d_epilogue.hpp:22
static constexpr bool kPadM
Definition: default_2d_epilogue.hpp:24
static constexpr memory_operation_enum MemoryOperation
Definition: default_2d_epilogue.hpp:27
static constexpr bool kPadN
Definition: default_2d_epilogue.hpp:25
static constexpr index_t NumDTensor
Definition: default_2d_epilogue.hpp:28
Definition: default_2d_epilogue.hpp:192
remove_cvref_t< typename Problem::BsDataType > BsDataType
Definition: default_2d_epilogue.hpp:195
remove_cvref_t< typename Problem::AccDataType > AccDataType
Definition: default_2d_epilogue.hpp:196
static constexpr index_t kMPerXdl
Definition: default_2d_epilogue.hpp:219
static constexpr bool ADataTypeIsTuple
Definition: default_2d_epilogue.hpp:198
remove_cvref_t< std::tuple_element_t< number< 0 >{}, BsDataTypeTuple > > BDataType
Definition: default_2d_epilogue.hpp:210
static constexpr index_t kNPerXdl
Definition: default_2d_epilogue.hpp:220
std::conditional_t< BDataTypeIsTuple, remove_cvref_t< BsDataType >, remove_cvref_t< tuple< BsDataType > >> BsDataTypeTuple
Definition: default_2d_epilogue.hpp:207
static constexpr index_t kKPerXdl
Definition: default_2d_epilogue.hpp:221
remove_cvref_t< typename Problem::CDElementwise > CDElementwise
Definition: default_2d_epilogue.hpp:217
static constexpr CK_TILE_HOST_DEVICE auto GetVectorSizeC()
Definition: default_2d_epilogue.hpp:234
static constexpr index_t isCTransposed
Definition: default_2d_epilogue.hpp:222
remove_cvref_t< typename Problem::CLayout > CLayout
Definition: default_2d_epilogue.hpp:218
remove_cvref_t< typename Problem::ODataType > ODataType
Definition: default_2d_epilogue.hpp:197
remove_cvref_t< typename Problem::DsDataType > DsDataType
Definition: default_2d_epilogue.hpp:215
remove_cvref_t< typename Problem::AsDataType > AsDataType
Definition: default_2d_epilogue.hpp:194
std::conditional_t< std::is_same_v< BDataType, pk_int4_t >, ADataType, BDataType > BTypeToUse
Definition: default_2d_epilogue.hpp:213
static constexpr CK_TILE_HOST_DEVICE auto GetVectorSizeD([[maybe_unused]] number< I > index)
Definition: default_2d_epilogue.hpp:287
WarpGemmDispatcher< ADataType, BTypeToUse, AccDataType, kMPerXdl, kNPerXdl, kKPerXdl, isCTransposed > WG
Definition: default_2d_epilogue.hpp:230
std::conditional_t< ADataTypeIsTuple, remove_cvref_t< AsDataType >, remove_cvref_t< tuple< AsDataType > >> AsDataTypeTuple
Definition: default_2d_epilogue.hpp:203
static constexpr bool BDataTypeIsTuple
Definition: default_2d_epilogue.hpp:199
remove_cvref_t< std::tuple_element_t< number< 0 >{}, AsDataTypeTuple > > ADataType
Definition: default_2d_epilogue.hpp:209
typename WG::CWarpDstr CWarpDstr
Definition: default_2d_epilogue.hpp:232
remove_cvref_t< typename Problem::DsLayout > DsLayout
Definition: default_2d_epilogue.hpp:216
remove_cvref_t< Problem_ > Problem
Definition: default_2d_epilogue.hpp:193
Definition: default_2d_epilogue.hpp:55
remove_cvref_t< DsLayout_ > DsLayout
Definition: default_2d_epilogue.hpp:61
static constexpr index_t kKPerXdl
Definition: default_2d_epilogue.hpp:66
remove_cvref_t< DsDataType_ > DsDataType
Definition: default_2d_epilogue.hpp:59
remove_cvref_t< AsDataType_ > AsDataType
Definition: default_2d_epilogue.hpp:56
static constexpr index_t kMPerXdl
Definition: default_2d_epilogue.hpp:64
static constexpr index_t kNPerBlock
Definition: default_2d_epilogue.hpp:63
static constexpr index_t kNPerXdl
Definition: default_2d_epilogue.hpp:65
static constexpr index_t kMPerBlock
Definition: default_2d_epilogue.hpp:62
remove_cvref_t< CLayout_ > CLayout
Definition: default_2d_epilogue.hpp:58
remove_cvref_t< BsDataType_ > BsDataType
Definition: default_2d_epilogue.hpp:57
remove_cvref_t< CDElementwise_ > CDElementwise
Definition: default_2d_epilogue.hpp:60
static constexpr index_t isCTransposed
Definition: default_2d_epilogue.hpp:67
static constexpr index_t NumDTensor
Definition: default_2d_epilogue.hpp:69
Definition: integral_constant.hpp:13