/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/docs-6.4.3/include/ck/tensor_operation/gpu/grid/gridwise_2d_reduction_threadwise.hpp File Reference#
gridwise_2d_reduction_threadwise.hpp File Reference
#include "ck/utility/data_type.hpp"
#include "ck/utility/reduction_common.hpp"
#include "ck/utility/reduction_operator.hpp"
#include "ck/utility/reduction_functions_accumulate.hpp"
#include "ck/tensor_operation/gpu/thread/reduction_functions_threadwise.hpp"
#include "ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer.hpp"
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
Go to the source code of this file.
Namespaces | |
ck | |
Functions | |
template<typename GridwiseReduction , bool OutputIndex, bool TransformIndexKtoGlobal, bool HaveIndexInput, typename InDataType , typename OutDataType , typename AccDataType , typename IndexDataType , typename InGridDesc_M_K , typename OutGridDesc_M , typename InElementwiseOperation , typename AccElementwiseOperation > | |
__global__ void | ck::kernel_reduce_threadwise (const InGridDesc_M_K in_grid_desc_m_k, const OutGridDesc_M out_grid_desc_m, const InElementwiseOperation in_elementwise_op, const AccElementwiseOperation acc_elementwise_op, AccDataType alpha, const InDataType *const __restrict__ p_in_value_global, const IndexDataType *const __restrict__ p_in_index_global, AccDataType beta, OutDataType *const __restrict__ p_out_value_global, IndexDataType *const __restrict__ p_out_index_global) |