/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/docs-6.4.3/include/ck/utility/debug.hpp Source File

/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/docs-6.4.3/include/ck/utility/debug.hpp Source File#

Composable Kernel: /home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/docs-6.4.3/include/ck/utility/debug.hpp Source File
debug.hpp
Go to the documentation of this file.
1 // SPDX-License-Identifier: MIT
2 // Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved.
3 
4 #ifndef UTILITY_DEBUG_HPP
5 #define UTILITY_DEBUG_HPP
6 #include "type.hpp"
7 
8 namespace ck {
9 namespace debug {
10 
11 namespace detail {
12 template <typename T, typename Enable = void>
13 struct PrintAsType;
14 
15 template <typename T>
16 struct PrintAsType<T, typename std::enable_if<std::is_floating_point<T>::value>::type>
17 {
18  using type = float;
19  __host__ __device__ static void Print(const T& p) { printf("%.3f ", static_cast<type>(p)); }
20 };
21 
22 template <>
23 struct PrintAsType<ck::half_t, void>
24 {
25  using type = float;
26  __host__ __device__ static void Print(const ck::half_t& p)
27  {
28  printf("%.3f ", static_cast<type>(p));
29  }
30 };
31 
32 template <typename T>
33 struct PrintAsType<T, typename std::enable_if<std::is_integral<T>::value>::type>
34 {
35  using type = int;
36  __host__ __device__ static void Print(const T& p) { printf("%d ", static_cast<type>(p)); }
37 };
38 } // namespace detail
39 
40 // Print at runtime the data in shared memory in 128 bytes per row format given shared mem pointer
41 // and the number of elements. Can optionally specify strides between elements and how many bytes'
42 // worth of data per row.
43 //
44 // Usage example:
45 //
46 // debug::print_shared(a_block_buf.p_data_, index_t(a_block_desc_k0_m_k1.GetElementSpaceSize()));
47 //
48 template <typename T, index_t element_stride = 1, index_t row_bytes = 128>
49 __device__ void print_shared(T const* p_shared, index_t num_elements)
50 {
51  constexpr index_t row_elements = row_bytes / sizeof(T);
52  static_assert((element_stride >= 1 && element_stride <= row_elements),
53  "element_stride should between [1, row_elements]");
54 
55  index_t wgid = blockIdx.x + blockIdx.y * gridDim.x + gridDim.x * gridDim.y * blockIdx.z;
56  index_t tid =
57  (threadIdx.z * (blockDim.x * blockDim.y)) + (threadIdx.y * blockDim.x) + threadIdx.x;
58 
59  __syncthreads();
60 
61  if(tid == 0)
62  {
63  printf("\nWorkgroup id %d, bytes per row %d, element stride %d\n\n",
64  wgid,
65  row_bytes,
66  element_stride);
67  for(index_t i = 0; i < num_elements; i += row_elements)
68  {
69  printf("elem %5d: ", i);
70  for(index_t j = 0; j < row_elements; j += element_stride)
71  {
72  detail::PrintAsType<T>::Print(p_shared[i + j]);
73  }
74 
75  printf("\n");
76  }
77  printf("\n");
78  }
79 
80  __syncthreads();
81 }
82 
83 template <index_t... Ids>
84 __device__ static bool is_thread_local_1d_id_idx()
85 {
86  const auto tid = get_thread_local_1d_id();
87  return ((tid == Ids) || ...);
88 }
89 
90 } // namespace debug
91 } // namespace ck
92 
93 #endif // UTILITY_DEBUG_HPP
__device__ void print_shared(T const *p_shared, index_t num_elements)
Definition: debug.hpp:49
Definition: ck.hpp:264
_Float16 half_t
Definition: data_type.hpp:25
std::enable_if< B, T > enable_if
Definition: enable_if.hpp:10
int32_t index_t
Definition: ck.hpp:289
__device__ index_t get_thread_local_1d_id()
Definition: get_id.hpp:16
__host__ static __device__ void Print(const T &p)
Definition: debug.hpp:36
__host__ static __device__ void Print(const ck::half_t &p)
Definition: debug.hpp:26
Definition: debug.hpp:13