/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/docs-6.4.3/include/ck/ck.hpp File Reference

/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/docs-6.4.3/include/ck/ck.hpp File Reference#

Composable Kernel: /home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/docs-6.4.3/include/ck/ck.hpp File Reference
ck.hpp File Reference
#include "ck/config.h"
#include "ck/utility/env.hpp"
#include "hip/hip_runtime.h"
#include "hip/hip_fp16.h"

Go to the source code of this file.

Classes

struct  ck::InMemoryDataOperationEnumSequence< Is >
 

Namespaces

 ck
 

Macros

#define CK_TIME_KERNEL   1
 
#define CK_CONSTANT_ADDRESS_SPACE   __attribute__((address_space(4)))
 
#define CK_USE_LAUNCH_BOUNDS   1
 
#define CK_MAX_THREAD_PER_BLOCK   256
 
#define CK_MIN_BLOCK_PER_CU   2
 
#define CK_WAVELET_MAX_THREAD_PER_BLOCK   512
 
#define CK_WAVELET_MIN_BLOCK_PER_CU   2
 
#define CK_USE_WAVES_PER_EU   0
 
#define CK_BUFFER_RESOURCE_3RD_DWORD   -1
 
#define CK_USE_AMD_MFMA
 
#define CK_USE_AMD_BUFFER_LOAD   1
 
#define CK_USE_AMD_BUFFER_STORE   1
 
#define CK_USE_AMD_BUFFER_ATOMIC_ADD_INTEGER   1
 
#define CK_USE_AMD_BUFFER_ATOMIC_ADD_FLOAT   1
 
#define CK_USE_AMD_BUFFER_ATOMIC_MAX_FLOAT64   0
 
#define CK_USE_AMD_INLINE_ASM   1
 
#define CK_USE_AMD_V_MAC_INLINE_ASM   1
 
#define CK_USE_AMD_V_DOT_INLINE_ASM   0
 
#define CK_USE_AMD_V_DOT_DPP8_INLINE_ASM   1
 
#define CK_USE_AMD_LDS_DIRECT_LOAD_INLINE_ASM   0
 
#define CK_USE_RNE_BF16_CONVERSION   1
 
#define CK_USE_SR_F8_CONVERSION   0
 
#define CK_USE_SR_F6_CONVERSION   0
 
#define CK_USE_SR_F4_CONVERSION   0
 
#define CK_USE_PK4_LAYOUT_SHUFFLE   1
 
#define CK_EXPERIMENTAL_BLOCK_SYNC_LDS_WITHOUT_SYNC_VMEM   1
 
#define CK_EXPERIMENTAL_USE_DYNAMICALLY_INDEXED_MULTI_INDEX   0
 
#define CK_EXPERIMENTAL_STATIC_TENSOR_DESCRIPTOR   0
 
#define CK_EXPERIMENTAL_USE_BUFFER_LOAD_OOB_CHECK_OFFSET_TRICK   0
 
#define CK_EXPERIMENTAL_USE_BUFFER_STORE_OOB_CHECK_OFFSET_TRICK   1
 
#define CK_EXPERIMENTAL_USE_BUFFER_ATOMIC_ADD_OOB_CHECK_OFFSET_TRICK   1
 
#define CK_EXPERIMENTAL_USE_BUFFER_ATOMIC_MAX_OOB_CHECK_OFFSET_TRICK   1
 
#define CK_EXPERIMENTAL_USE_IN_REGISTER_SUB_DWORD_TRANSPOSE   1
 
#define CK_EXPERIMENTAL_MERGE_USE_MAGIC_DIVISION   1
 
#define CK_EXPERIMENTAL_USE_MEMCPY_FOR_VECTOR_ACCESS   0
 
#define CK_EXPERIMENTAL_USE_MEMCPY_FOR_BIT_CAST   1
 
#define CK_EXPERIMENTAL_INTER_WAVE_SCHEDULING   1
 
#define CK_EXPERIMENTAL_INTER_WAVE_SCHEDULING_MAC_CLUSTERS   1
 
#define CK_EXPERIMENTAL_DEFAULT_TO_INTER_WAVE_SCHEDULING   0
 
#define CK_EXPERIMENTAL_INTER_WAVE_INSTANCES   1
 
#define CK_EXPERIMENTAL_PIPELINE_V2_INSTANCES   1
 
#define CK_EXPERIMENTAL_PIPELINE_V2_IGLP_OPT   0
 
#define CK_HACK_MERGE_CALCULATE_IDX_DIFF_LOW_CONST_USE_AMD_GCN_READ_FIRST_LANE   0
 
#define CK_WORKAROUND_SWDEV_275126   1
 
#define CK_WORKAROUND_SWDEV_XXXXXX_INT8_BUFFER_LOAD_STORE_ISSUE   1
 
#define CK_WORKAROUND_SWDEV_XXXXXX_INT8_DS_WRITE_ISSUE   1
 
#define CK_WORKAROUND_SWDEV_325164   0
 
#define CK_WORKAROUND_SWDEV_383542   1
 
#define CK_WORKAROUND_SWDEV_388832   1
 
#define CK_GFX90A_DENORM_WORKAROUND   0
 
#define CK_GFX90A_DENORM_WORKAROUND   0
 
#define CK_BUILD_DEPRECATED   1
 

Typedefs

using ck::index_t = int32_t
 
using ck::long_index_t = int64_t
 

Enumerations

enum class  ck::InMemoryDataOperationEnum {
  ck::Set ,
  ck::AtomicAdd ,
  ck::AtomicMax ,
  ck::Add
}
 

Macro Definition Documentation

◆ CK_BUFFER_RESOURCE_3RD_DWORD

#define CK_BUFFER_RESOURCE_3RD_DWORD   -1

◆ CK_BUILD_DEPRECATED

#define CK_BUILD_DEPRECATED   1

◆ CK_CONSTANT_ADDRESS_SPACE

#define CK_CONSTANT_ADDRESS_SPACE   __attribute__((address_space(4)))

◆ CK_EXPERIMENTAL_BLOCK_SYNC_LDS_WITHOUT_SYNC_VMEM

#define CK_EXPERIMENTAL_BLOCK_SYNC_LDS_WITHOUT_SYNC_VMEM   1

◆ CK_EXPERIMENTAL_DEFAULT_TO_INTER_WAVE_SCHEDULING

#define CK_EXPERIMENTAL_DEFAULT_TO_INTER_WAVE_SCHEDULING   0

◆ CK_EXPERIMENTAL_INTER_WAVE_INSTANCES

#define CK_EXPERIMENTAL_INTER_WAVE_INSTANCES   1

◆ CK_EXPERIMENTAL_INTER_WAVE_SCHEDULING

#define CK_EXPERIMENTAL_INTER_WAVE_SCHEDULING   1

◆ CK_EXPERIMENTAL_INTER_WAVE_SCHEDULING_MAC_CLUSTERS

#define CK_EXPERIMENTAL_INTER_WAVE_SCHEDULING_MAC_CLUSTERS   1

◆ CK_EXPERIMENTAL_MERGE_USE_MAGIC_DIVISION

#define CK_EXPERIMENTAL_MERGE_USE_MAGIC_DIVISION   1

◆ CK_EXPERIMENTAL_PIPELINE_V2_IGLP_OPT

#define CK_EXPERIMENTAL_PIPELINE_V2_IGLP_OPT   0

◆ CK_EXPERIMENTAL_PIPELINE_V2_INSTANCES

#define CK_EXPERIMENTAL_PIPELINE_V2_INSTANCES   1

◆ CK_EXPERIMENTAL_STATIC_TENSOR_DESCRIPTOR

#define CK_EXPERIMENTAL_STATIC_TENSOR_DESCRIPTOR   0

◆ CK_EXPERIMENTAL_USE_BUFFER_ATOMIC_ADD_OOB_CHECK_OFFSET_TRICK

#define CK_EXPERIMENTAL_USE_BUFFER_ATOMIC_ADD_OOB_CHECK_OFFSET_TRICK   1

◆ CK_EXPERIMENTAL_USE_BUFFER_ATOMIC_MAX_OOB_CHECK_OFFSET_TRICK

#define CK_EXPERIMENTAL_USE_BUFFER_ATOMIC_MAX_OOB_CHECK_OFFSET_TRICK   1

◆ CK_EXPERIMENTAL_USE_BUFFER_LOAD_OOB_CHECK_OFFSET_TRICK

#define CK_EXPERIMENTAL_USE_BUFFER_LOAD_OOB_CHECK_OFFSET_TRICK   0

◆ CK_EXPERIMENTAL_USE_BUFFER_STORE_OOB_CHECK_OFFSET_TRICK

#define CK_EXPERIMENTAL_USE_BUFFER_STORE_OOB_CHECK_OFFSET_TRICK   1

◆ CK_EXPERIMENTAL_USE_DYNAMICALLY_INDEXED_MULTI_INDEX

#define CK_EXPERIMENTAL_USE_DYNAMICALLY_INDEXED_MULTI_INDEX   0

◆ CK_EXPERIMENTAL_USE_IN_REGISTER_SUB_DWORD_TRANSPOSE

#define CK_EXPERIMENTAL_USE_IN_REGISTER_SUB_DWORD_TRANSPOSE   1

◆ CK_EXPERIMENTAL_USE_MEMCPY_FOR_BIT_CAST

#define CK_EXPERIMENTAL_USE_MEMCPY_FOR_BIT_CAST   1

◆ CK_EXPERIMENTAL_USE_MEMCPY_FOR_VECTOR_ACCESS

#define CK_EXPERIMENTAL_USE_MEMCPY_FOR_VECTOR_ACCESS   0

◆ CK_GFX90A_DENORM_WORKAROUND [1/2]

#define CK_GFX90A_DENORM_WORKAROUND   0

◆ CK_GFX90A_DENORM_WORKAROUND [2/2]

#define CK_GFX90A_DENORM_WORKAROUND   0

◆ CK_HACK_MERGE_CALCULATE_IDX_DIFF_LOW_CONST_USE_AMD_GCN_READ_FIRST_LANE

#define CK_HACK_MERGE_CALCULATE_IDX_DIFF_LOW_CONST_USE_AMD_GCN_READ_FIRST_LANE   0

◆ CK_MAX_THREAD_PER_BLOCK

#define CK_MAX_THREAD_PER_BLOCK   256

◆ CK_MIN_BLOCK_PER_CU

#define CK_MIN_BLOCK_PER_CU   2

◆ CK_TIME_KERNEL

#define CK_TIME_KERNEL   1

◆ CK_USE_AMD_BUFFER_ATOMIC_ADD_FLOAT

#define CK_USE_AMD_BUFFER_ATOMIC_ADD_FLOAT   1

◆ CK_USE_AMD_BUFFER_ATOMIC_ADD_INTEGER

#define CK_USE_AMD_BUFFER_ATOMIC_ADD_INTEGER   1

◆ CK_USE_AMD_BUFFER_ATOMIC_MAX_FLOAT64

#define CK_USE_AMD_BUFFER_ATOMIC_MAX_FLOAT64   0

◆ CK_USE_AMD_BUFFER_LOAD

#define CK_USE_AMD_BUFFER_LOAD   1

◆ CK_USE_AMD_BUFFER_STORE

#define CK_USE_AMD_BUFFER_STORE   1

◆ CK_USE_AMD_INLINE_ASM

#define CK_USE_AMD_INLINE_ASM   1

◆ CK_USE_AMD_LDS_DIRECT_LOAD_INLINE_ASM

#define CK_USE_AMD_LDS_DIRECT_LOAD_INLINE_ASM   0

◆ CK_USE_AMD_MFMA

#define CK_USE_AMD_MFMA

◆ CK_USE_AMD_V_DOT_DPP8_INLINE_ASM

#define CK_USE_AMD_V_DOT_DPP8_INLINE_ASM   1

◆ CK_USE_AMD_V_DOT_INLINE_ASM

#define CK_USE_AMD_V_DOT_INLINE_ASM   0

◆ CK_USE_AMD_V_MAC_INLINE_ASM

#define CK_USE_AMD_V_MAC_INLINE_ASM   1

◆ CK_USE_LAUNCH_BOUNDS

#define CK_USE_LAUNCH_BOUNDS   1

◆ CK_USE_PK4_LAYOUT_SHUFFLE

#define CK_USE_PK4_LAYOUT_SHUFFLE   1

◆ CK_USE_RNE_BF16_CONVERSION

#define CK_USE_RNE_BF16_CONVERSION   1

◆ CK_USE_SR_F4_CONVERSION

#define CK_USE_SR_F4_CONVERSION   0

◆ CK_USE_SR_F6_CONVERSION

#define CK_USE_SR_F6_CONVERSION   0

◆ CK_USE_SR_F8_CONVERSION

#define CK_USE_SR_F8_CONVERSION   0

◆ CK_USE_WAVES_PER_EU

#define CK_USE_WAVES_PER_EU   0

◆ CK_WAVELET_MAX_THREAD_PER_BLOCK

#define CK_WAVELET_MAX_THREAD_PER_BLOCK   512

◆ CK_WAVELET_MIN_BLOCK_PER_CU

#define CK_WAVELET_MIN_BLOCK_PER_CU   2

◆ CK_WORKAROUND_SWDEV_275126

#define CK_WORKAROUND_SWDEV_275126   1

◆ CK_WORKAROUND_SWDEV_325164

#define CK_WORKAROUND_SWDEV_325164   0

◆ CK_WORKAROUND_SWDEV_383542

#define CK_WORKAROUND_SWDEV_383542   1

◆ CK_WORKAROUND_SWDEV_388832

#define CK_WORKAROUND_SWDEV_388832   1

◆ CK_WORKAROUND_SWDEV_XXXXXX_INT8_BUFFER_LOAD_STORE_ISSUE

#define CK_WORKAROUND_SWDEV_XXXXXX_INT8_BUFFER_LOAD_STORE_ISSUE   1

◆ CK_WORKAROUND_SWDEV_XXXXXX_INT8_DS_WRITE_ISSUE

#define CK_WORKAROUND_SWDEV_XXXXXX_INT8_DS_WRITE_ISSUE   1