/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck_tile/ops/fmha/block/block_attention_kvcache_layout_enum.hpp Source File

/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck_tile/ops/fmha/block/block_attention_kvcache_layout_enum.hpp Source File#

Composable Kernel: /home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck_tile/ops/fmha/block/block_attention_kvcache_layout_enum.hpp Source File
block_attention_kvcache_layout_enum.hpp
Go to the documentation of this file.
1 // Copyright (c) Advanced Micro Devices, Inc., or its affiliates.
2 // SPDX-License-Identifier: MIT
3 
4 #pragma once
5 
6 namespace ck_tile {
7 
8 // KV cache memory layout selector.
9 //
10 // Layout summary (kVectorSize = 16 / sizeof(KDataType)):
11 // - VECTORIZED_LAYOUT (swizzled):
12 // K: [NumBlocks, NumHeads, HeadDim/kVectorSize, PageSize, kVectorSize]
13 // V: [NumBlocks, NumHeads, PageSize/kVectorSize, HeadDim, kVectorSize]
14 // - LINEAR_LAYOUT:
15 // K: [NumBlocks, PageSize, NumHeads, HeadDim]
16 // V: [NumBlocks, PageSize, NumHeads, HeadDim]
18 {
20  LINEAR_LAYOUT = 1,
21 };
22 
23 // KV cache lookup table layout selector.
24 // - VLLM_BLOCK_TABLE_2D: block_table[batch, max_blocks_per_seq]
25 // - SGLANG_PAGE_TABLE_1D: kv_page_indices[kv_indptr[b] ... kv_indptr[b+1])
27 {
30 };
31 
32 } // namespace ck_tile
Definition: cluster_descriptor.hpp:13
BlockAttentionKVCacheMemoryLayoutEnum
Definition: block_attention_kvcache_layout_enum.hpp:18
BlockAttentionKVCacheLookupTableEnum
Definition: block_attention_kvcache_layout_enum.hpp:27