33 template <
typename BottomTensorView_,
 
   34           typename WindowLengths_,
 
   35           typename StaticTileDistribution_,
 
   39           tile_window_with_static_distribution<BottomTensorView_,
 
   41                                                StaticTileDistribution_,
 
   45           StaticTileDistribution_>
 
   50                                              StaticTileDistribution_,
 
   54         StaticTileDistribution_>;
 
   58     static_assert(NumCoord == 1);
 
   60     static_assert(Base::Traits::NumAccess % NumCoord == 0,
 
   61                   "wrong! # of access is not divisible by NumCoord");
 
   84             window_origin + window_adaptor_thread_coord_tmp.get_bottom_index();
 
   87             bottom_tensor_view.get_tensor_descriptor(), bottom_tensor_thread_origin_idx_tmp);
 
   91         using Traits = 
typename Base::Traits;
 
   92         using SFC_Ys = 
typename Traits::SFC_Ys;
 
   95             auto window_adaptor_thread_coord = window_adaptor_thread_coord_tmp;
 
   96             auto bottom_tensor_thread_coord  = bottom_tensor_thread_coord_tmp;
 
   98             constexpr 
auto idx_diff_ys =
 
  106                 window_adaptor_thread_coord, bottom_tensor_thread_coord, idx_diff_ps_ys);
 
  109                 make_tuple(window_adaptor_thread_coord, bottom_tensor_thread_coord);
 
  113     template <
index_t i_access_unsupport_ = -1, 
bool oob_conditional_check = 
true>
 
  118         auto dst_tensor = make_static_distributed_tensor<typename Base::DataType>(tile_dstr);
 
  119         load(dst_tensor, number<i_access_unsupport_>{}, bool_constant<oob_conditional_check>{});
 
  123     template <
typename DistributedTensor,
 
  124               index_t i_access_unsupport_ = -1,
 
  125               bool oob_conditional_check  = 
true>
 
  130         using Traits   = 
typename Base::Traits;
 
  131         using vector_t = 
typename Traits::vector_t;
 
  132         using SFC_Ys   = 
typename Traits::SFC_Ys;
 
  137         static_for<0, NumCoord, 1>{}([&](
auto iCoord) {
 
  142             static_for<0, NumAccessPerCoord, 1>{}([&](
auto iCoordAccess) {
 
  143                 constexpr 
auto iAccess = number<iCoord * NumAccessPerCoord + iCoordAccess>{};
 
  146                 constexpr 
auto idx_ys_start = SFC_Ys::get_index(iAccess);
 
  149                 const vector_t vec_value =
 
  151                         bottom_tensor_thread_coord, 0, bool_constant<oob_conditional_check>{});
 
  153                 static_for<0, Traits::ScalarPerVector, Traits::PackedSize>{}([&](
auto j) {
 
  156                             return jj == Traits::VectorDimY ? (idx_ys_start[jj] + j)
 
  159                         number<Base::NDimY>{});
 
  162                         tile_dstr.get_ys_to_d_descriptor().calculate_offset(idx_ys) /
 
  165                     dst_tensor.get_thread_buffer().template at<d>() =
 
  167                             .template get_as<typename Base::DataType>()[j / Traits::PackedSize];
 
  172                     constexpr 
auto idx_diff_ys = SFC_Ys::get_forward_step(iAccess);
 
  175                         generate_tuple([&](
auto) { 
return number<0>{}; }, number<Base::NDimP>{}),
 
  179                         window_adaptor_thread_coord, bottom_tensor_thread_coord, idx_diff_ps_ys);
 
  185     template <
typename DstTile,
 
  186               index_t i_access_unsupport_ = -1,
 
  187               bool oob_conditional_check  = 
true,
 
  188               bool pre_nop                = 
false>
 
  192                                  bool_constant<pre_nop>               = {}) 
const 
  194         using Traits   = 
typename Base::Traits;
 
  195         using vector_t = 
typename Traits::vector_t;
 
  196         using SFC_Ys   = 
typename Traits::SFC_Ys;
 
  197         static constexpr 
index_t YElementSize =
 
  198             typename Base::TileDstr{}.get_ys_to_d_descriptor().get_element_space_size();
 
  199         static_assert(YElementSize % (Traits::PackedSize * Traits::ScalarPerVector) == 0);
 
  200         using vectorized_tbuf =
 
  201             array<vector_t, YElementSize / (Traits::PackedSize * Traits::ScalarPerVector)>;
 
  205         auto& dst_vec_tbuf = 
reinterpret_cast<vectorized_tbuf&
>(dst_tensor.get_thread_buffer());
 
  208         static_for<0, NumCoord, 1>{}([&](
auto iCoord) {
 
  213             static_for<0, NumAccessPerCoord, 1>{}([&](
auto iCoordAccess) {
 
  214                 constexpr 
auto iAccess  = number<iCoord * NumAccessPerCoord + iCoordAccess>{};
 
  215                 constexpr 
auto pre_nop_ = [&]() {
 
  216                     if constexpr(pre_nop && iCoord == 0 && iCoordAccess == 0)
 
  219                         return bool_constant<false>{};
 
  223                 constexpr 
auto idx_ys_start = SFC_Ys::get_index(iAccess);
 
  225                     tile_dstr.get_ys_to_d_descriptor().calculate_offset(idx_ys_start) /
 
  227                 static_assert(d % Traits::ScalarPerVector == 0);
 
  230                     dst_vec_tbuf.template at<d / Traits::ScalarPerVector>(),
 
  231                     bottom_tensor_thread_coord,
 
  233                     bool_constant<oob_conditional_check>{},
 
  235 #if CK_TILE_WORKAROUND_ROCM_6_1_SCRATCH_MEMORY_ISSUE || \ 
  236     CK_TILE_WORKAROUND_ROCM_6_2_SCRATCH_MEMORY_ISSUE 
  243                     constexpr 
auto idx_diff_ys = SFC_Ys::get_forward_step(iAccess);
 
  246                         generate_tuple([&](
auto) { 
return number<0>{}; }, number<Base::NDimP>{}),
 
  250                         window_adaptor_thread_coord, bottom_tensor_thread_coord, idx_diff_ps_ys);
 
  257     template <
typename LdsTileWindow_,
 
  258               index_t i_access_unsupport_ = -1,
 
  259               bool oob_conditional_check  = 
true,
 
  260               bool pre_nop                = 
false>
 
  264                                        bool_constant<pre_nop>               = {}) 
const 
  266         using LdsTileWindow = remove_cvref_t<LdsTileWindow_>;
 
  268         using LdsDataType = 
typename LdsTileWindow::DataType;
 
  272         static_assert(LdsTileWindow::get_num_of_dimension() == 3); 
 
  275             lds_tile.get_bottom_tensor_view().get_tensor_descriptor().calculate_offset(
 
  276                 make_tuple(number<0>{}, number<0>{}, number<0>{})) *
 
  280             lds_tile.get_bottom_tensor_view().get_tensor_descriptor().calculate_offset(
 
  281                 make_tuple(number<0>{}, number<1>{}, number<0>{})) *
 
  282                 sizeof(LdsDataType) -
 
  286             lds_tile.get_bottom_tensor_view().get_tensor_descriptor().calculate_offset(
 
  287                 make_tuple(number<1>{}, number<0>{}, number<0>{})) *
 
  288                 sizeof(LdsDataType) -
 
  291         const index_t m0_init_value = size_per_buf + size_per_wave * get_warp_id();
 
  294         using Traits = 
typename Base::Traits;
 
  296         using vector_t = 
typename Traits::vector_t;
 
  297         using SFC_Ys   = 
typename Traits::SFC_Ys;
 
  299         LdsDataType* smem = lds_tile.get_bottom_tensor_view().get_buffer_view().p_data_;
 
  302         static_for<0, NumCoord, 1>{}([&](
auto iCoord) {
 
  307             static_for<0, NumAccessPerCoord, 1>{}([&](
auto iCoordAccess) {
 
  308                 constexpr 
auto iAccess  = number<iCoord * NumAccessPerCoord + iCoordAccess>{};
 
  309                 constexpr 
auto pre_nop_ = [&]() {
 
  310                     if constexpr(pre_nop && iCoord == 0 && iCoordAccess == 0)
 
  313                         return bool_constant<false>{};
 
  318                     smem, bottom_tensor_thread_coord, 0, pre_nop_);
 
  323                     constexpr 
auto idx_diff_ys = SFC_Ys::get_forward_step(iAccess);
 
  326                         generate_tuple([&](
auto) { 
return number<0>{}; }, number<Base::NDimP>{}),
 
  330                         window_adaptor_thread_coord, bottom_tensor_thread_coord, idx_diff_ps_ys);
 
  338     template <
typename LdsTileWindow_,
 
  339               index_t i_access_unsupport_ = -1,
 
  340               bool oob_conditional_check  = 
true>
 
  345         using LdsTileWindow = remove_cvref_t<LdsTileWindow_>;
 
  346         using LdsDataType   = 
typename LdsTileWindow::DataType;
 
  347         using Traits        = 
typename Base::Traits;
 
  349         using vector_t = 
typename Traits::vector_t;
 
  350         using SFC_Ys   = 
typename Traits::SFC_Ys;
 
  353         const auto window_origin       = lds_tile.get_window_origin();
 
  354         const auto& bottom_tensor_view = lds_tile.get_bottom_tensor_view();
 
  355         const auto& tensor_descriptor  = bottom_tensor_view.get_tensor_descriptor();
 
  356         auto smem_base_ptr             = bottom_tensor_view.get_buffer_view().p_data_;
 
  358         static_for<0, NumCoord, 1>{}([&](
auto iCoord) {
 
  362             static_for<0, NumAccessPerCoord, 1>{}([&](
auto iCoordAccess) {
 
  363                 constexpr 
auto iAccess = number<iCoord * NumAccessPerCoord + iCoordAccess>{};
 
  366                 auto lds_bottom_tensor_thread_idx =
 
  367                     window_origin + window_adaptor_thread_coord.get_bottom_index();
 
  370                 const auto lds_coord =
 
  374                 CK_TILE_LDS_ADDR LdsDataType* smem = smem_base_ptr + lds_coord.get_offset();
 
  379                     bottom_tensor_thread_coord,
 
  381                     bool_constant<oob_conditional_check>{});
 
  386                     constexpr 
auto idx_diff_ys    = SFC_Ys::get_forward_step(iAccess);
 
  388                         generate_tuple([&](
auto) { 
return number<0>{}; }, number<Base::NDimP>{}),
 
  392                         window_adaptor_thread_coord, bottom_tensor_thread_coord, idx_diff_ps_ys);
 
  398     template <
typename Policy, 
index_t i_access_unsupport_ = -1, 
bool oob_conditional_check = 
true>
 
  402         auto dst_tensor = make_static_distributed_tensor<typename Base::DataType>(tile_dstr);
 
  403         this->
template load_transpose<Policy>(
 
  408     template <
typename Policy,
 
  409               typename DistributedTensor,
 
  410               index_t i_access_unsupport_ = -1,
 
  411               bool oob_conditional_check  = 
true>
 
  416         using Traits   = 
typename Base::Traits;
 
  417         using vector_t = 
typename Traits::vector_t;
 
  418         using SFC_Ys   = 
typename Traits::SFC_Ys;
 
  422         constexpr 
auto group_func = Policy::group_func;
 
  425         static_for<0, NumCoord, 1>{}([&](
auto iCoord) {
 
  430             static_for<0, NumAccessPerCoord, 1>{}([&](
auto iCoordAccess) {
 
  431                 constexpr 
auto iAccess = number<iCoord * NumAccessPerCoord + iCoordAccess>{};
 
  434                 constexpr 
auto idx_ys_start = SFC_Ys::get_index(iAccess);
 
  437                 const vector_t vec_value =
 
  439                         .template get_transpose_vectorized_elements<vector_t>(
 
  440                             bottom_tensor_thread_coord, 0);
 
  442                 static_for<0, Traits::ScalarPerVector, 1>{}([&](
auto j) {
 
  445                             return jj == Traits::VectorDimY ? (idx_ys_start[jj] + j)
 
  448                         number<Base::NDimY>{});
 
  450                     constexpr 
auto grouped_idx_ys = group_func(orig_idx_ys);
 
  452                     constexpr 
index_t linear_distributed_index =
 
  453                         tile_dstr.get_ys_to_d_descriptor().calculate_offset(grouped_idx_ys);
 
  455                     dst_tensor.get_thread_buffer().template at<linear_distributed_index>() =
 
  456                         vec_value.template get_as<typename Base::DataType>()[j];
 
  461                     constexpr 
auto idx_diff_ys = SFC_Ys::get_forward_step(iAccess);
 
  464                         generate_tuple([&](
auto) { 
return number<0>{}; }, number<Base::NDimP>{}),
 
  468                         window_adaptor_thread_coord, bottom_tensor_thread_coord, idx_diff_ps_ys);
 
  474     template <
index_t i_access_unsupport_ = -1, 
bool oob_conditional_check = 
true>
 
  480         using Traits = 
typename Base::Traits;
 
  482         using vector_t = 
typename Traits::vector_t;
 
  483         using SFC_Ys   = 
typename Traits::SFC_Ys;
 
  488         static_for<0, NumCoord, 1>{}([&](
auto iCoord) {
 
  492             static_for<0, NumAccessPerCoord, 1>{}([&](
auto iCoordAccess) {
 
  493                 constexpr 
auto iAccess = number<iCoord * NumAccessPerCoord + iCoordAccess>{};
 
  496                 constexpr 
auto idx_ys_start = SFC_Ys::get_index(iAccess);
 
  502                 static_for<0, Traits::ScalarPerVector, Traits::PackedSize>{}([&](
auto j) {
 
  505                             return jj == Traits::VectorDimY ? (idx_ys_start[jj] + j)
 
  508                         number<Base::NDimY>{});
 
  511                         tile_dstr.get_ys_to_d_descriptor().calculate_offset(idx_ys) /
 
  514                     vec_value.template get_as<typename Base::DataType>()(j / Traits::PackedSize) =
 
  515                         dstr_tensor.get_thread_buffer().template at<d>();
 
  522                     bottom_tensor_thread_coord,
 
  525                     bool_constant<oob_conditional_check>{});
 
  530                     constexpr 
auto idx_diff_ys = SFC_Ys::get_forward_step(iAccess);
 
  533                         generate_tuple([&](
auto) { 
return number<0>{}; }, number<Base::NDimP>{}),
 
  537                         window_adaptor_thread_coord, bottom_tensor_thread_coord, idx_diff_ps_ys);
 
  543     template <
index_t i_access_unsupport_ = -1>
 
  549         using Traits = 
typename Base::Traits;
 
  551         using vector_t = 
typename Traits::vector_t;
 
  552         using SFC_Ys   = 
typename Traits::SFC_Ys;
 
  555         static constexpr 
bool oob_conditional_check = 
true;
 
  558         static_for<0, NumCoord, 1>{}([&](
auto iCoord) {
 
  563             static_for<0, NumAccessPerCoord, 1>{}([&](
auto iCoordAccess) {
 
  564                 constexpr 
auto iAccess = number<iCoord * NumAccessPerCoord + iCoordAccess>{};
 
  567                 constexpr 
auto idx_ys_start = SFC_Ys::get_index(iAccess);
 
  571                 static_for<0, Traits::ScalarPerVector, Traits::PackedSize>{}([&](
auto j) {
 
  574                             return jj == Traits::VectorDimY ? (idx_ys_start[jj] + j)
 
  577                         number<Base::NDimY>{});
 
  579                         tile_dstr.get_ys_to_d_descriptor().calculate_offset(idx_ys) /
 
  581                     vec_value.template get_as<typename Base::DataType>()(j / Traits::PackedSize) =
 
  587                     .template set_vectorized_elements_raw<vector_t, oob_conditional_check>(
 
  588                         bottom_tensor_thread_coord, 0, vec_value);
 
  593                     constexpr 
auto idx_diff_ys = SFC_Ys::get_forward_step(iAccess);
 
  596                         generate_tuple([&](
auto) { 
return number<0>{}; }, number<Base::NDimP>{}),
 
  600                         window_adaptor_thread_coord, bottom_tensor_thread_coord, idx_diff_ps_ys);
 
  606     template <
index_t i_access_unsupport_ = -1, 
bool oob_conditional_check = 
true>
 
  613         using Traits = 
typename Base::Traits;
 
  615         using vector_t = 
typename Traits::vector_t;
 
  616         using SFC_Ys   = 
typename Traits::SFC_Ys;
 
  621         static_for<0, NumCoord, 1>{}([&](
auto iCoord) {
 
  626             static_for<0, NumAccessPerCoord, 1>{}([&](
auto iCoordAccess) {
 
  627                 constexpr 
auto iAccess = number<iCoord * NumAccessPerCoord + iCoordAccess>{};
 
  630                 constexpr 
auto idx_ys_start = SFC_Ys::get_index(iAccess);
 
  635                 static_for<0, Traits::ScalarPerVector, Traits::PackedSize>{}([&](
auto j) {
 
  638                             return jj == Traits::VectorDimY ? (idx_ys_start[jj] + j)
 
  641                         number<Base::NDimY>{});
 
  644                         tile_dstr.get_ys_to_d_descriptor().calculate_offset(idx_ys) /
 
  647                     vec_value.template get_as<typename Base::DataType>()(j / Traits::PackedSize) =
 
  653                     bottom_tensor_thread_coord,
 
  656                     bool_constant<oob_conditional_check>{});
 
  661                     constexpr 
auto idx_diff_ys = SFC_Ys::get_forward_step(iAccess);
 
  664                         generate_tuple([&](
auto) { 
return number<0>{}; }, number<Base::NDimP>{}),
 
  668                         window_adaptor_thread_coord, bottom_tensor_thread_coord, idx_diff_ps_ys);
 
  674     template <
index_t i_access_unsupport_ = -1, 
bool oob_conditional_check = 
true, 
bool pre_nop>
 
  680                bool_constant<pre_nop>               = {}) 
const 
  682         using Traits = 
typename Base::Traits;
 
  684         using vector_t = 
typename Traits::vector_t;
 
  685         using SFC_Ys   = 
typename Traits::SFC_Ys;
 
  690         static_for<0, NumCoord, 1>{}([&](
auto iCoord) {
 
  695             static_for<0, NumAccessPerCoord, 1>{}([&](
auto iCoordAccess) {
 
  696                 constexpr 
auto iAccess = number<iCoord * NumAccessPerCoord + iCoordAccess>{};
 
  699                 constexpr 
auto idx_ys_start = SFC_Ys::get_index(iAccess);
 
  704                 static_for<0, Traits::ScalarPerVector, Traits::PackedSize>{}([&](
auto j) {
 
  707                             return jj == Traits::VectorDimY ? (idx_ys_start[jj] + j)
 
  710                         number<Base::NDimY>{});
 
  713                         tile_dstr.get_ys_to_d_descriptor().calculate_offset(idx_ys) /
 
  716                     vec_value.template get_as<typename Base::DataType>()(j / Traits::PackedSize) =
 
  722                     bottom_tensor_thread_coord,
 
  725                     bool_constant<oob_conditional_check>{},
 
  726                     bool_constant<pre_nop>{});
 
  731                     constexpr 
auto idx_diff_ys = SFC_Ys::get_forward_step(iAccess);
 
  734                         generate_tuple([&](
auto) { 
return number<0>{}; }, number<Base::NDimP>{}),
 
  738                         window_adaptor_thread_coord, bottom_tensor_thread_coord, idx_diff_ps_ys);
 
  764             this->
window_origin_ + window_adaptor_thread_coord_tmp.get_bottom_index();
 
  771         using Traits = 
typename Base::Traits;
 
  772         using SFC_Ys = 
typename Traits::SFC_Ys;
 
  775             auto window_adaptor_thread_coord = window_adaptor_thread_coord_tmp;
 
  776             auto bottom_tensor_thread_coord  = bottom_tensor_thread_coord_tmp;
 
  778             constexpr 
auto idx_diff_ys =
 
  786                 window_adaptor_thread_coord, bottom_tensor_thread_coord, idx_diff_ps_ys);
 
  789                 make_tuple(window_adaptor_thread_coord, bottom_tensor_thread_coord);
 
  801 template <
typename TensorView_,
 
  802           typename WindowLengths_,
 
  803           typename StaticTileDistribution_,
 
  807                  const WindowLengths_& window_lengths,
 
  808                  const multi_index<TensorView_::get_num_of_dimension()>& origin,
 
  812     return tile_window_with_static_distribution<remove_cvref_t<TensorView_>,
 
  813                                                 remove_cvref_t<WindowLengths_>,
 
  814                                                 remove_cvref_t<StaticTileDistribution_>,
 
  816         tensor_view, window_lengths, origin, tile_distribution};
 
  820 template <
typename TensorView_,
 
  821           typename WindowLengths_,
 
  822           typename StaticTileDistribution_,
 
  826                      const WindowLengths_& window_lengths,
 
  827                      const multi_index<TensorView_::get_num_of_dimension()>& origin,
 
  831     auto w = tile_window_with_static_distribution<remove_cvref_t<TensorView_>,
 
  832                                                   remove_cvref_t<WindowLengths_>,
 
  833                                                   remove_cvref_t<StaticTileDistribution_>,
 
  835         tensor_view, window_lengths, origin, tile_distribution};
 
  840 template <
typename TensorView_,
 
  841           typename WindowLengths_,
 
  842           typename StaticTileDistribution_,
 
  847                                          StaticTileDistribution_,
 
  851                                                         StaticTileDistribution_,
 
  852                                                         NumCoord>::BottomTensorIndex& step)
 
  865 template <
typename BottomTensorView_, 
typename WindowLengths_>
 
  867     : 
public tile_window_base<tile_window_with_static_lengths<BottomTensorView_, WindowLengths_>,
 
  889 template <
typename TensorView_, 
typename WindowLengths_>
 
  892                  const WindowLengths_& window_lengths,
 
  893                  const multi_index<TensorView_::get_num_of_dimension()>& origin)
 
  896                   "wrong! lengths should be static");
 
  904 template <
typename TensorView, 
typename WindowLengths>
 
  907                  const multi_index<TensorView::get_num_of_dimension()>& origin)
 
  913 template <
typename TensorView, 
typename WindowLengths, 
typename StaticTileDistribution>
 
  916                  const multi_index<TensorView::get_num_of_dimension()>& origin,
 
  925 template <
typename TensorView, 
typename WindowLengths, 
typename StaticTileDistribution>
 
  936 template <
typename TensorView, 
typename WindowLengths, 
typename StaticTileDistribution>
 
  949 template <
typename TensorView_, 
typename WindowLengths_>
 
  965 template <
typename T>
 
  978 template <
typename BottomTensorView_,
 
  979           typename WindowLengths_,
 
  980           typename StaticTileDistribution_,
 
  985                                          StaticTileDistribution_,
 
  997 template <
typename T>
 
 1008 template <
typename T>
 
 1019 template <
typename BottomTensorView_, 
typename WindowLengths_>
 
 1032 template <
typename T>
 
#define CK_TILE_DEVICE
Definition: config.hpp:40
 
#define CK_TILE_LDS_ADDR
Definition: config.hpp:57
 
CK_TILE_HOST_DEVICE auto get_partition_index(Distribution)
Definition: tile_distribution.hpp:22
 
Definition: cluster_descriptor.hpp:13
 
constexpr bool is_tile_window_with_static_distribution_v
Helper variable template to check if a type is a tile window with static distribution.
Definition: tile_window.hpp:998
 
constexpr CK_TILE_HOST_DEVICE void move_tensor_coordinate(const TensorDesc &tensor_desc, TensorCoord &coord, const Index &coord_step)
Definition: tensor_coordinate.hpp:72
 
CK_TILE_DEVICE auto make_tile_window_raw(const TensorView_ &tensor_view, const WindowLengths_ &window_lengths, const multi_index< TensorView_::get_num_of_dimension()> &origin, const StaticTileDistribution_ &tile_distribution, number< NumCoord >={})
Definition: tile_window.hpp:825
 
constexpr CK_TILE_HOST_DEVICE auto make_tensor_adaptor_coordinate(const Adaptor &adaptor, const TopIndex &idx_top)
Definition: tensor_adaptor_coordinate.hpp:55
 
constant< b > bool_constant
Definition: integral_constant.hpp:39
 
int32_t index_t
Definition: integer.hpp:9
 
constexpr CK_TILE_HOST_DEVICE auto make_tensor_coordinate(const TensorDesc &tensor_desc, const TopIndex &idx_top)
Definition: tensor_coordinate.hpp:60
 
remove_cv_t< std::remove_reference_t< T > > remove_cvref_t
Definition: type_traits.hpp:21
 
constexpr bool is_tile_window_with_static_lengths_v
Helper variable template to check if a type is a tile window with static lengths.
Definition: tile_window.hpp:1033
 
constexpr CK_TILE_DEVICE auto make_tile_window(null_tensor_view, const WindowLengths &window_lengths, const multi_index< WindowLengths::size()> &, Ts &&...)
Definition: null_tile_window.hpp:72
 
CK_TILE_DEVICE void move_tile_window(null_tile_window< WindowLengths > &, const typename null_tile_window< WindowLengths >::BottomTensorIndex &)
Definition: null_tile_window.hpp:92
 
constexpr CK_TILE_HOST_DEVICE auto generate_tuple(F &&f, number< N >)
Definition: tuple.hpp:412
 
CK_TILE_DEVICE void m0_set_with_memory(index_t v)
Definition: utility.hpp:19
 
constexpr CK_TILE_HOST_DEVICE auto make_tuple(Xs &&... xs)
Definition: tuple.hpp:343
 
CK_TILE_DEVICE void m0_inc_with_memory(index_t v)
Definition: utility.hpp:25
 
constexpr CK_TILE_HOST_DEVICE auto container_concat(const X &x, const Ys &... ys)
Definition: container_helper.hpp:363
 
bool_constant< false > false_type
Definition: integral_constant.hpp:63
 
bool_constant< true > true_type
Definition: integral_constant.hpp:62
 
Definition: integral_constant.hpp:13
 
Definition: type_traits.hpp:76
 
Type trait to determine if a type is a tile window with static distribution.
Definition: tile_window.hpp:967
 
Type trait to determine if a type is a tile window with static lengths.
Definition: tile_window.hpp:1010
 
Definition: static_distributed_tensor.hpp:21
 
constexpr CK_TILE_HOST_DEVICE const auto & get_thread_buffer() const
Definition: static_distributed_tensor.hpp:58
 
Definition: functional.hpp:43
 
Definition: tensor_view.hpp:41
 
Definition: tile_distribution.hpp:72
 
constexpr CK_TILE_HOST_DEVICE const auto & get_ps_ys_to_xs_adaptor() const
Definition: tile_distribution.hpp:126
 
This class provides description of tile windowed view on the device memory.
Definition: tile_window_base.hpp:31
 
BottomTensorView bottom_tensor_view_
Definition: tile_window_base.hpp:85
 
remove_cvref_t< typename BottomTensorView::DataType > DataType
Definition: tile_window_base.hpp:36
 
constexpr CK_TILE_DEVICE auto get_window_origin() const
Definition: tile_window_base.hpp:45
 
BottomTensorIndex window_origin_
Definition: tile_window_base.hpp:79
 
constexpr CK_TILE_DEVICE auto get_bottom_tensor_view() const
Definition: tile_window_base.hpp:47
 
CK_TILE_DEVICE void move(const BottomTensorIndex &step)
Definition: tile_window_base.hpp:67
 
constexpr CK_TILE_DEVICE auto get_window_lengths() const
Definition: tile_window_base.hpp:46
 
remove_reference_t< BottomTensorView_ > BottomTensorView
Definition: tile_window_base.hpp:33
 
remove_cvref_t< WindowLengths_ > WindowLengths
Definition: tile_window_base.hpp:34
 
WindowLengths window_lengths_
Definition: tile_window_base.hpp:81
 
This class provides tile (windowed) view and access to the device memory.
Definition: tile_window.hpp:46
 
CK_TILE_DEVICE void store_raw(const static_distributed_tensor< typename Base::DataType, typename Base::TileDstr > &dstr_tensor, number< i_access_unsupport_ >={}) const
Definition: tile_window.hpp:545
 
CK_TILE_DEVICE void move_extended(const typename Base::BottomTensorIndex &step)
Definition: tile_window.hpp:745
 
CK_TILE_DEVICE auto load_transpose() const
Definition: tile_window.hpp:399
 
CK_TILE_DEVICE void set_window_origin_extended(const typename Base::BottomTensorIndex &)
Definition: tile_window.hpp:754
 
array< tuple< typename Base::WindowAdaptorCoord, typename Base::BottomTensorCoord >, NumCoord > pre_computed_coords_
Definition: tile_window.hpp:797
 
CK_TILE_DEVICE void update_raw(const static_distributed_tensor< typename Base::DataType, typename Base::TileDstr > &dstr_tensor, number< i_access_unsupport_ >={}, bool_constant< oob_conditional_check >={}, bool_constant< pre_nop >={}) const
Definition: tile_window.hpp:676
 
constexpr CK_TILE_DEVICE tile_window_with_static_distribution()=default
 
CK_TILE_DEVICE auto async_load(LdsTileWindow_ &&lds_tile, number< i_access_unsupport_ >={}, bool_constant< oob_conditional_check >={}) const
Definition: tile_window.hpp:341
 
CK_TILE_DEVICE auto load_transpose(DistributedTensor &dst_tensor, number< i_access_unsupport_ >={}, bool_constant< oob_conditional_check >={}) const
Definition: tile_window.hpp:412
 
CK_TILE_DEVICE auto load(DistributedTensor &dst_tensor, number< i_access_unsupport_ >={}, bool_constant< oob_conditional_check >={}) const
Definition: tile_window.hpp:126
 
CK_TILE_DEVICE auto load(number< i_access_unsupport_ >={}, bool_constant< oob_conditional_check >={}) const
Definition: tile_window.hpp:114
 
static constexpr auto I0
Definition: tile_window.hpp:56
 
CK_TILE_DEVICE void load_raw(DstTile &dst_tensor, number< i_access_unsupport_ >={}, bool_constant< oob_conditional_check >={}, bool_constant< pre_nop >={}) const
Definition: tile_window.hpp:189
 
static constexpr auto I1
Definition: tile_window.hpp:57
 
CK_TILE_DEVICE auto async_load_raw(LdsTileWindow_ &&lds_tile, number< i_access_unsupport_ >={}, bool_constant< oob_conditional_check >={}, bool_constant< pre_nop >={}) const
Definition: tile_window.hpp:261
 
CK_TILE_DEVICE void update(const static_distributed_tensor< typename Base::DataType, typename Base::TileDstr > &dstr_tensor, number< i_access_unsupport_ >={}, bool_constant< oob_conditional_check >={}) const
Definition: tile_window.hpp:608
 
constexpr CK_TILE_DEVICE tile_window_with_static_distribution(const typename Base::BottomTensorView &bottom_tensor_view, const typename Base::WindowLengths &window_lengths, const typename Base::BottomTensorIndex &window_origin, const typename Base::TileDstr &tile_distribution)
Definition: tile_window.hpp:66
 
static constexpr index_t NumAccessPerCoord
Definition: tile_window.hpp:62
 
CK_TILE_DEVICE void store(const static_distributed_tensor< typename Base::DataType, typename Base::TileDstr > &dstr_tensor, number< i_access_unsupport_ >={}, bool_constant< oob_conditional_check >={}) const
Definition: tile_window.hpp:475
 
This class provides description of tile windowed view on the device memory.
Definition: tile_window.hpp:870
 
constexpr CK_TILE_DEVICE tile_window_with_static_lengths()=default
 
constexpr CK_TILE_DEVICE tile_window_with_static_lengths(const typename Base::BottomTensorView &bottom_tensor_view, const typename Base::WindowLengths &window_lengths, const typename Base::BottomTensorIndex &window_origin)
Definition: tile_window.hpp:878
 
Definition: tile_window_base.hpp:94
 
remove_cvref_t< StaticTileDistribution_ > TileDstr
Definition: tile_window_base.hpp:95
 
CK_TILE_DEVICE void move_window_adaptor_and_bottom_tensor_thread_coordinate(WindowAdaptorCoord &window_adaptor_thread_coord, BottomTensorCoord &bottom_tensor_thread_coord, const ATopIndex &idx_diff_adaptor_top) const
Definition: tile_window_base.hpp:129
 
TileDstr tile_dstr_
Definition: tile_window_base.hpp:253