/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/docs-6.4.3/include/ck/tensor_operation/gpu/block/blockwise_softmax.hpp Source File#
blockwise_softmax.hpp
Go to the documentation of this file.
Definition: ck.hpp:264
__host__ constexpr __device__ auto make_naive_tensor_descriptor_packed(const Tuple< Lengths... > &lengths)
Definition: tensor_descriptor_helper.hpp:101
decltype(make_naive_tensor_descriptor_packed(make_tuple(ThreadSliceDesc_M_K{}.GetLength(I0)))) ThreadSliceDesc_M
Definition: blockwise_softmax.hpp:39
decltype(ThreadClusterDesc_M_K{}.GetLengths()) ThreadClusterLengths_M_K
Definition: blockwise_softmax.hpp:69
__host__ __device__ void Run(CThreadBuffer &in_thread_buf, WorkspaceBuffer &reduce_work_buf)
Definition: blockwise_softmax.hpp:88
typename conditional< IgnoreNaN, ThreadwiseReduction< AccDataType, ThreadSliceDesc_M_K, ThreadSliceDesc_M, reduce::Add, false, detail::AccumulateWithNanIgnore< reduce::Add, AccDataType > >, ThreadwiseReduction< AccDataType, ThreadSliceDesc_M_K, ThreadSliceDesc_M, reduce::Add, false > >::type ThreadwiseSumReduce
Definition: blockwise_softmax.hpp:67
typename conditional< IgnoreNaN, ThreadwiseReduction< AccDataType, ThreadSliceDesc_M_K, ThreadSliceDesc_M, reduce::Max, false, detail::AccumulateWithNanIgnore< reduce::Max, AccDataType > >, ThreadwiseReduction< AccDataType, ThreadSliceDesc_M_K, ThreadSliceDesc_M, reduce::Max, false > >::type ThreadwiseMaxReduce
Definition: blockwise_softmax.hpp:53
Definition: reduction_functions_blockwise.hpp:101
static __device__ void Reduce(BufferType &work_buffer, AccDataType &in_out_value)
Definition: reduction_functions_blockwise.hpp:116
Definition: reduction_functions_threadwise.hpp:23
Definition: functional.hpp:100
Definition: reduction_functions_accumulate.hpp:17
Definition: integral_constant.hpp:10
Definition: reduction_operator.hpp:37
Definition: reduction_operator.hpp:163
Definition: functional2.hpp:31