30 typename BufferSizeType,
31 bool InvalidElementUseNumericalZeroValue,
42 template <
typename T,
typename BufferSizeType,
bool Inval
idElementUseNumericalZeroValue>
46 InvalidElementUseNumericalZeroValue,
56 : p_data_{}, buffer_size_{}, invalid_element_value_{}
61 : p_data_{p_data}, buffer_size_{buffer_size}, invalid_element_value_{0}
66 BufferSizeType buffer_size,
67 T invalid_element_value)
68 : p_data_{p_data}, buffer_size_{buffer_size}, invalid_element_value_{invalid_element_value}
89 bool oob_conditional_check =
true,
91 std::is_same<typename vector_traits<remove_cvref_t<X>>::scalar_type,
96 bool is_valid_element,
104 static_assert(scalar_per_x_vector % scalar_per_t_vector == 0,
105 "wrong! X should contain multiple T");
109 #if CK_TILE_EXPERIMENTAL_USE_MEMCPY_FOR_VECTOR_ACCESS
112 __builtin_memcpy(&tmp, &(p_data_[i + linear_offset]),
sizeof(X));
116 return *c_style_pointer_cast<const X*>(&p_data_[i + linear_offset]);
121 if constexpr(InvalidElementUseNumericalZeroValue)
123 return X{numeric<remove_cvref_t<T>>::zero()};
127 return X{invalid_element_value_};
136 std::is_same<typename vector_traits<remove_cvref_t<X>>::scalar_type,
137 typename vector_traits<remove_cvref_t<T>>::scalar_type>::value,
143 this->
template set<X>(i, linear_offset, is_valid_element, x);
148 auto tmp = this->
template get<X>(i, linear_offset, is_valid_element);
149 this->
template set<X>(i, linear_offset, is_valid_element, x + tmp);
154 template <
typename X,
156 std::is_same<typename vector_traits<remove_cvref_t<X>>::scalar_type,
166 static_assert(scalar_per_x_vector % scalar_per_t_vector == 0,
167 "wrong! X should contain multiple T");
171 #if CK_TILE_EXPERIMENTAL_USE_MEMCPY_FOR_VECTOR_ACCESS
174 __builtin_memcpy(&(p_data_[i + linear_offset]), &tmp,
sizeof(X));
176 *c_style_pointer_cast<X*>(&p_data_[i + linear_offset]) = x;
189 printf(
"buffer_view{");
192 printf(
"AddressSpace: generic, ");
195 printf(
"p_data_: %p, ",
static_cast<void*
>(
const_cast<remove_cvref_t<T>*
>(p_data_)));
198 printf(
"buffer_size_: ");
203 printf(
"invalid_element_value_: ");
204 print(invalid_element_value_);
217 template <
typename T,
218 typename BufferSizeType,
219 bool InvalidElementUseNumericalZeroValue,
224 InvalidElementUseNumericalZeroValue,
229 T* p_data_ =
nullptr;
235 : p_data_{}, buffer_size_{}, cached_buf_res_{0}, invalid_element_value_{}
240 : p_data_{p_data}, buffer_size_{buffer_size}, cached_buf_res_{0}, invalid_element_value_{0}
245 BufferSizeType buffer_size,
246 T invalid_element_value)
248 buffer_size_{buffer_size},
250 invalid_element_value_{invalid_element_value}
275 template <
typename X,
276 bool oob_conditional_check =
true,
278 std::is_same<typename vector_traits<remove_cvref_t<X>>::scalar_type,
283 bool is_valid_element,
291 static_assert(scalar_per_x_vector % scalar_per_t_vector == 0,
292 "wrong! X should contain multiple T");
294 #if CK_TILE_USE_AMD_BUFFER_LOAD
295 bool constexpr use_amd_buffer_addressing =
true;
297 bool constexpr use_amd_buffer_addressing =
false;
300 if constexpr(use_amd_buffer_addressing)
302 constexpr
index_t t_per_x = scalar_per_x_vector / scalar_per_t_vector;
304 if constexpr(InvalidElementUseNumericalZeroValue)
306 return amd_buffer_load_invalid_element_return_zero<remove_cvref_t<T>,
309 oob_conditional_check>(
310 p_data_, i + linear_offset, is_valid_element, buffer_size_);
318 oob_conditional_check>(p_data_,
322 invalid_element_value_);
329 #if CK_TILE_EXPERIMENTAL_USE_MEMCPY_FOR_VECTOR_ACCESS
332 __builtin_memcpy(&tmp, &(p_data_[i + linear_offset]),
sizeof(X));
336 return *c_style_pointer_cast<const X*>(&p_data_[i + linear_offset]);
341 if constexpr(InvalidElementUseNumericalZeroValue)
343 return X{numeric<remove_cvref_t<T>>::zero()};
347 return X{invalid_element_value_};
354 template <
typename X,
355 bool oob_conditional_check =
true,
356 bool pre_nop =
false,
358 std::is_same<typename vector_traits<remove_cvref_t<X>>::scalar_type,
359 typename vector_traits<remove_cvref_t<T>>::scalar_type>::value,
364 bool is_valid_element,
371 static_assert(scalar_per_x_vector % scalar_per_t_vector == 0,
372 "wrong! X should contain multiple T");
374 constexpr
index_t t_per_x = scalar_per_x_vector / scalar_per_t_vector;
376 amd_buffer_load_raw<remove_cvref_t<T>, t_per_x, Coherence, oob_conditional_check, pre_nop>(
381 template <
typename X,
382 bool oob_conditional_check =
true,
384 std::is_same<typename vector_traits<remove_cvref_t<X>>::scalar_type,
385 typename vector_traits<remove_cvref_t<T>>::scalar_type>::value,
390 bool is_valid_element,
397 static_assert(scalar_per_x_vector % scalar_per_t_vector == 0,
398 "wrong! X should contain multiple T");
400 constexpr
index_t t_per_x = scalar_per_x_vector / scalar_per_t_vector;
402 amd_async_buffer_load_with_oob<remove_cvref_t<T>, t_per_x, Coherence>(
412 template <
typename X,
413 bool pre_nop =
false,
415 std::is_same<typename vector_traits<remove_cvref_t<X>>::scalar_type,
416 typename vector_traits<remove_cvref_t<T>>::scalar_type>::value,
428 static_assert(scalar_per_x_vector % scalar_per_t_vector == 0,
429 "wrong! X should contain multiple T");
431 constexpr
index_t t_per_x = scalar_per_x_vector / scalar_per_t_vector;
433 amd_async_buffer_load_with_oob_raw<remove_cvref_t<T>, t_per_x, Coherence>(
440 bool oob_conditional_check =
true,
442 std::is_same<typename vector_traits<remove_cvref_t<X>>::scalar_type,
443 typename vector_traits<remove_cvref_t<T>>::scalar_type>::value,
447 bool is_valid_element,
453 this->
template set<X, oob_conditional_check>(i, linear_offset, is_valid_element, x);
457 this->
template atomic_add<X, oob_conditional_check>(
458 i, linear_offset, is_valid_element, x);
462 this->
template atomic_max<X, oob_conditional_check>(
463 i, linear_offset, is_valid_element, x);
469 this->
template get<X, oob_conditional_check>(i, linear_offset, is_valid_element);
470 this->
template set<X, oob_conditional_check>(
471 i, linear_offset, is_valid_element, x + tmp);
480 bool oob_conditional_check =
true,
481 bool pre_nop =
false,
483 std::is_same<typename vector_traits<remove_cvref_t<X>>::scalar_type,
484 typename vector_traits<remove_cvref_t<T>>::scalar_type>::value,
488 bool is_valid_element,
495 this->
template set_raw<X, oob_conditional_check>(i, linear_offset, is_valid_element, x);
499 this->
template atomic_add_raw<X, oob_conditional_check, pre_nop>(
500 i, linear_offset, is_valid_element, x);
509 template <
typename X,
510 bool oob_conditional_check =
true,
512 std::is_same<typename vector_traits<remove_cvref_t<X>>::scalar_type,
513 typename vector_traits<remove_cvref_t<T>>::scalar_type>::value,
522 static_assert(scalar_per_x_vector % scalar_per_t_vector == 0,
523 "wrong! X should contain multiple T");
525 #if CK_TILE_USE_AMD_BUFFER_STORE
526 bool constexpr use_amd_buffer_addressing =
true;
528 bool constexpr use_amd_buffer_addressing =
false;
531 if constexpr(use_amd_buffer_addressing)
533 constexpr
index_t t_per_x = scalar_per_x_vector / scalar_per_t_vector;
535 amd_buffer_store<remove_cvref_t<T>, t_per_x, Coherence>(
536 x, p_data_, i + linear_offset, is_valid_element, buffer_size_);
542 #if CK_TILE_EXPERIMENTAL_USE_MEMCPY_FOR_VECTOR_ACCESS
545 __builtin_memcpy(&(p_data_[i + linear_offset]), &tmp,
sizeof(X));
547 *c_style_pointer_cast<X*>(&p_data_[i + linear_offset]) = x;
554 template <
typename X,
555 bool oob_conditional_check =
true,
557 std::is_same<typename vector_traits<remove_cvref_t<X>>::scalar_type,
567 static_assert(scalar_per_x_vector % scalar_per_t_vector == 0,
568 "wrong! X should contain multiple T");
570 constexpr
index_t t_per_x = scalar_per_x_vector / scalar_per_t_vector;
571 amd_buffer_store_raw<remove_cvref_t<T>, t_per_x, Coherence, oob_conditional_check>(
572 x, p_data_, i, linear_offset, is_valid_element, buffer_size_);
575 template <
typename X,
576 bool oob_conditional_check =
true,
578 std::is_same<typename vector_traits<remove_cvref_t<X>>::scalar_type,
591 static_assert(scalar_per_x_vector % scalar_per_t_vector == 0,
592 "wrong! X should contain multiple T");
596 #if CK_TILE_USE_AMD_BUFFER_ATOMIC_ADD_INTEGER && CK_TILE_USE_AMD_BUFFER_ATOMIC_ADD_FLOAT
597 bool constexpr use_amd_buffer_addressing =
598 std::is_same_v<remove_cvref_t<scalar_t>, int32_t> ||
599 std::is_same_v<remove_cvref_t<scalar_t>,
float> ||
600 (std::is_same_v<remove_cvref_t<scalar_t>,
half_t> && scalar_per_x_vector % 2 == 0);
601 #elif CK_TILE_USE_AMD_BUFFER_ATOMIC_ADD_INTEGER && (!CK_TILE_USE_AMD_BUFFER_ATOMIC_ADD_FLOAT)
602 bool constexpr use_amd_buffer_addressing =
603 std::is_same_v<remove_cvref_t<scalar_t>, int32_t>;
604 #elif(!CK_TILE_USE_AMD_BUFFER_ATOMIC_ADD_INTEGER) && CK_TILE_USE_AMD_BUFFER_ATOMIC_ADD_FLOAT
605 bool constexpr use_amd_buffer_addressing =
606 std::is_same_v<remove_cvref_t<scalar_t>,
float> ||
607 (std::is_same_v<remove_cvref_t<scalar_t>,
half_t> && scalar_per_x_vector % 2 == 0);
609 bool constexpr use_amd_buffer_addressing =
false;
612 constexpr
index_t t_per_x = scalar_per_x_vector / scalar_per_t_vector;
614 if constexpr(use_amd_buffer_addressing)
616 amd_buffer_atomic_add<remove_cvref_t<T>, t_per_x>(
617 x, p_data_, i + linear_offset, is_valid_element, buffer_size_);
623 atomic_add_g<remove_cvref_t<T>, t_per_x>(&p_data_[i + linear_offset], x);
628 template <
typename X,
629 bool oob_conditional_check =
true,
632 std::is_same<typename vector_traits<remove_cvref_t<X>>::scalar_type,
645 static_assert(scalar_per_x_vector % scalar_per_t_vector == 0,
646 "wrong! X should contain multiple T");
650 constexpr
index_t t_per_x = scalar_per_x_vector / scalar_per_t_vector;
652 amd_buffer_atomic_add_raw<remove_cvref_t<T>,
655 oob_conditional_check,
657 x, p_data_, i, linear_offset, is_valid_element, buffer_size_);
660 template <
typename X,
661 bool oob_conditional_check =
true,
663 std::is_same<typename vector_traits<remove_cvref_t<X>>::scalar_type,
674 static_assert(scalar_per_x_vector % scalar_per_t_vector == 0,
675 "wrong! X should contain multiple T");
679 #if CK_TILE_USE_AMD_BUFFER_ATOMIC_MAX_FLOAT64
681 bool constexpr use_amd_buffer_addressing = std::is_same_v<remove_cvref_t<scalar_t>,
double>;
683 bool constexpr use_amd_buffer_addressing =
false;
686 constexpr
index_t t_per_x = scalar_per_x_vector / scalar_per_t_vector;
688 if constexpr(use_amd_buffer_addressing)
690 amd_buffer_atomic_max<remove_cvref_t<T>, t_per_x>(
691 x, p_data_, i + linear_offset, is_valid_element, buffer_size_);
693 else if(is_valid_element)
695 atomic_max_g<remove_cvref_t<T>, t_per_x>(&p_data_[i + linear_offset], x);
707 printf(
"buffer_view{");
710 printf(
"AddressSpace: Global, ");
713 printf(
"p_data_: %p, ",
static_cast<void*
>(
const_cast<remove_cvref_t<T>*
>(p_data_)));
716 printf(
"buffer_size_: ");
721 printf(
"invalid_element_value_: ");
722 print(invalid_element_value_);
735 template <
typename T,
typename BufferSizeType,
bool Inval
idElementUseNumericalZeroValue>
739 InvalidElementUseNumericalZeroValue,
744 T* p_data_ =
nullptr;
749 : p_data_{}, buffer_size_{}, invalid_element_value_{}
754 : p_data_{p_data}, buffer_size_{buffer_size}, invalid_element_value_{0}
759 BufferSizeType buffer_size,
760 T invalid_element_value)
761 : p_data_{p_data}, buffer_size_{buffer_size}, invalid_element_value_{invalid_element_value}
781 template <
typename X,
782 bool oob_conditional_check =
true,
784 std::is_same<typename vector_traits<remove_cvref_t<X>>::scalar_type,
789 bool is_valid_element,
797 static_assert(scalar_per_x_vector % scalar_per_t_vector == 0,
798 "wrong! X should contain multiple T");
802 #if CK_TILE_EXPERIMENTAL_USE_MEMCPY_FOR_VECTOR_ACCESS
805 __builtin_memcpy(&tmp, &(p_data_[i + linear_offset]),
sizeof(X));
810 scalar_per_t_vector * scalar_per_x_vector>;
812 auto rtn = *c_style_pointer_cast<const buf_t*>(&p_data_[i + linear_offset]);
813 return bit_cast<X>(rtn);
818 if constexpr(InvalidElementUseNumericalZeroValue)
820 return X{numeric<remove_cvref_t<T>>::zero()};
824 return X{invalid_element_value_};
830 template <
typename X,
831 bool oob_conditional_check =
true,
832 bool pre_nop =
false,
834 std::is_same<typename vector_traits<remove_cvref_t<X>>::scalar_type,
835 typename vector_traits<remove_cvref_t<T>>::scalar_type>::value,
843 smem_load<
sizeof(X)>{}(dst, v_offset *
sizeof(T), i_offset *
sizeof(T));
850 std::is_same<typename vector_traits<remove_cvref_t<X>>::scalar_type,
851 typename vector_traits<remove_cvref_t<T>>::scalar_type>::value,
857 this->
template set<X>(i, linear_offset, is_valid_element, x);
862 auto tmp = this->
template get<X>(i, linear_offset, is_valid_element);
863 this->
template set<X>(i, linear_offset, is_valid_element, x + tmp);
868 template <
typename X,
870 std::is_same<typename vector_traits<remove_cvref_t<X>>::scalar_type,
880 static_assert(scalar_per_x_vector % scalar_per_t_vector == 0,
881 "wrong! X should contain multiple T");
883 #if CK_TILE_WORKAROUND_SWDEV_XXXXXX_INT8_DS_WRITE_ISSUE
884 bool constexpr workaround_int8_ds_write_issue =
true;
886 bool constexpr workaround_int8_ds_write_issue =
false;
892 workaround_int8_ds_write_issue)
916 "wrong! not implemented for this combination, please add "
924 *c_style_pointer_cast<int8_t*>(&p_data_[i]) =
925 *c_style_pointer_cast<const int8_t*>(&x);
932 *c_style_pointer_cast<int16_t*>(&p_data_[i]) =
933 *c_style_pointer_cast<const int16_t*>(&x);
940 *c_style_pointer_cast<int32_t*>(&p_data_[i]) =
941 *c_style_pointer_cast<const int32_t*>(&x);
948 *c_style_pointer_cast<int32x2_t*>(&p_data_[i]) =
949 *c_style_pointer_cast<const int32x2_t*>(&x);
956 *c_style_pointer_cast<int32x4_t*>(&p_data_[i]) =
957 *c_style_pointer_cast<const int32x4_t*>(&x);
964 *c_style_pointer_cast<int32_t*>(&p_data_[i]) =
965 *c_style_pointer_cast<const int32_t*>(&x);
972 *c_style_pointer_cast<int32x2_t*>(&p_data_[i]) =
973 *c_style_pointer_cast<const int32x2_t*>(&x);
980 *c_style_pointer_cast<int32x4_t*>(&p_data_[i]) =
981 *c_style_pointer_cast<const int32x4_t*>(&x);
989 #if CK_TILE_EXPERIMENTAL_USE_MEMCPY_FOR_VECTOR_ACCESS
992 __builtin_memcpy(&(p_data_[i]), &tmp,
sizeof(X));
995 scalar_per_t_vector * scalar_per_x_vector>;
997 *c_style_pointer_cast<buf_t*>(&p_data_[i]) =
reinterpret_cast<const buf_t&
>(x);
1011 printf(
"buffer_view{");
1014 printf(
"AddressSpace: Lds, ");
1017 printf(
"p_data_: %p, ",
static_cast<void*
>(
const_cast<remove_cvref_t<T>*
>(p_data_)));
1020 printf(
"buffer_size_: ");
1021 print(buffer_size_);
1025 printf(
"invalid_element_value_: ");
1026 print(invalid_element_value_);
1039 template <
typename T,
typename BufferSizeType,
bool Inval
idElementUseNumericalZeroValue>
1043 InvalidElementUseNumericalZeroValue,
1048 T* p_data_ =
nullptr;
1053 : p_data_{}, buffer_size_{}, invalid_element_value_{}
1058 : p_data_{p_data}, buffer_size_{buffer_size}, invalid_element_value_{0}
1063 BufferSizeType buffer_size,
1064 T invalid_element_value)
1065 : p_data_{p_data}, buffer_size_{buffer_size}, invalid_element_value_{invalid_element_value}
1085 template <
typename X,
1086 bool oob_conditional_check =
true,
1088 std::is_same<typename vector_traits<remove_cvref_t<X>>::scalar_type,
1090 bool>::type =
false>
1093 bool is_valid_element,
1101 static_assert(scalar_per_x_vector % scalar_per_t_vector == 0,
1102 "wrong! X should contain multiple T");
1104 if(is_valid_element)
1106 #if CK_TILE_EXPERIMENTAL_USE_MEMCPY_FOR_VECTOR_ACCESS
1109 __builtin_memcpy(&tmp, &(p_data_[i]),
sizeof(X));
1113 return *c_style_pointer_cast<const X*>(&p_data_[i]);
1118 if constexpr(InvalidElementUseNumericalZeroValue)
1120 return X{numeric<remove_cvref_t<T>>::zero()};
1124 return X{invalid_element_value_};
1133 std::is_same<typename vector_traits<remove_cvref_t<X>>::scalar_type,
1134 typename vector_traits<remove_cvref_t<T>>::scalar_type>::value,
1135 bool>::type =
false>
1140 this->
template set<X>(i, linear_offset, is_valid_element, x);
1145 auto tmp = this->
template get<X>(i, linear_offset, is_valid_element);
1146 this->
template set<X>(i, linear_offset, is_valid_element, x + tmp);
1151 template <
typename X,
1153 std::is_same<typename vector_traits<remove_cvref_t<X>>::scalar_type,
1155 bool>::type =
false>
1163 static_assert(scalar_per_x_vector % scalar_per_t_vector == 0,
1164 "wrong! X should contain multiple T");
1166 if(is_valid_element)
1168 #if CK_TILE_EXPERIMENTAL_USE_MEMCPY_FOR_VECTOR_ACCESS
1171 __builtin_memcpy(&(p_data_[i + linear_offset]), &tmp,
sizeof(X));
1173 *c_style_pointer_cast<X*>(&p_data_[i + linear_offset]) = x;
1186 printf(
"buffer_view{");
1189 printf(
"AddressSpace: Vgpr, ");
1192 printf(
"p_data_: %p, ",
static_cast<void*
>(
const_cast<remove_cvref_t<T>*
>(p_data_)));
1195 printf(
"buffer_size_: ");
1196 print(buffer_size_);
1200 printf(
"invalid_element_value_: ");
1201 print(invalid_element_value_);
1210 typename BufferSizeType>
1219 typename BufferSizeType,
1221 typename std::enable_if<std::is_same<remove_cvref_t<T>, remove_cvref_t<X>>::value,
1222 bool>::type =
false>
1227 p, buffer_size, invalid_element_value};
#define CK_TILE_DEVICE
Definition: config.hpp:40
#define CK_TILE_LDS_ADDR
Definition: config.hpp:56
#define CK_TILE_HOST_DEVICE
Definition: config.hpp:41
Definition: cluster_descriptor.hpp:13
memory_operation_enum
Definition: arch.hpp:44
int8_t __attribute((ext_vector_type(4))) int8x4_t
Definition: vector_type.hpp:150
CK_TILE_DEVICE thread_buffer< T, N > amd_buffer_load_invalid_element_return_customized_value(const T *p_src_wave, index_t src_thread_element_offset, bool src_thread_element_valid, index_t src_element_space_size, T customized_value)
Definition: amd_buffer_addressing.hpp:2195
int8_t int8_t
Definition: int8.hpp:20
amd_buffer_coherence_enum
Definition: amd_buffer_addressing.hpp:1179
constexpr CK_TILE_HOST_DEVICE auto make_buffer_view(T *p, BufferSizeType buffer_size)
Definition: buffer_view.hpp:1211
int8_t __attribute((ext_vector_type(16))) int8x16_t
Definition: vector_type.hpp:152
int32_t index_t
Definition: integer.hpp:9
remove_cv_t< std::remove_reference_t< T > > remove_cvref_t
Definition: type_traits.hpp:20
int8_t __attribute((ext_vector_type(2))) int8x2_t
Definition: vector_type.hpp:149
typename impl::ext_vector< T, N >::type ext_vector_t
Definition: vector_type.hpp:54
int32_t int32x4_t
Definition: vector_type.hpp:114
address_space_enum
Definition: arch.hpp:34
_Float16 half_t
Definition: half.hpp:111
CK_TILE_DEVICE int32x4_t make_wave_buffer_resource(const void *ptr, uint32_t size=0xffffffff)
Definition: amd_buffer_addressing.hpp:26
int8_t __attribute((ext_vector_type(8))) int8x8_t
Definition: vector_type.hpp:151
std::enable_if< B, T > enable_if
Definition: enable_if.hpp:10
CK_TILE_HOST_DEVICE void print() const
Definition: buffer_view.hpp:187
constexpr CK_TILE_HOST_DEVICE buffer_view(T *p_data, BufferSizeType buffer_size)
Definition: buffer_view.hpp:60
CK_TILE_DEVICE void set(index_t i, index_t linear_offset, bool is_valid_element, const X &x)
Definition: buffer_view.hpp:159
constexpr CK_TILE_HOST_DEVICE buffer_view()
Definition: buffer_view.hpp:55
CK_TILE_HOST_DEVICE void init_raw()
Definition: buffer_view.hpp:72
constexpr CK_TILE_DEVICE T & operator()(index_t i)
Definition: buffer_view.hpp:85
static constexpr CK_TILE_DEVICE bool is_dynamic_buffer()
Definition: buffer_view.hpp:185
CK_TILE_DEVICE void update(index_t i, index_t linear_offset, bool is_valid_element, const X &x)
Definition: buffer_view.hpp:139
static constexpr CK_TILE_DEVICE address_space_enum get_address_space()
Definition: buffer_view.hpp:74
constexpr CK_TILE_HOST_DEVICE buffer_view(T *p_data, BufferSizeType buffer_size, T invalid_element_value)
Definition: buffer_view.hpp:65
constexpr CK_TILE_DEVICE auto get(index_t i, index_t linear_offset, bool is_valid_element, bool_constant< oob_conditional_check >={}) const
Definition: buffer_view.hpp:94
constexpr CK_TILE_DEVICE const T & operator[](index_t i) const
Definition: buffer_view.hpp:81
T type
Definition: buffer_view.hpp:49
BufferSizeType buffer_size_
Definition: buffer_view.hpp:52
static constexpr CK_TILE_DEVICE bool is_static_buffer()
Definition: buffer_view.hpp:182
int32x4_t cached_buf_res_
Definition: buffer_view.hpp:231
static constexpr CK_TILE_DEVICE bool is_dynamic_buffer()
Definition: buffer_view.hpp:703
BufferSizeType buffer_size_
Definition: buffer_view.hpp:230
constexpr CK_TILE_DEVICE auto async_get_raw(remove_cvref_t< T > *smem, index_t i, index_t linear_offset, bool, bool_constant< pre_nop >={}) const
Definition: buffer_view.hpp:418
constexpr CK_TILE_DEVICE const T & operator[](index_t i) const
Definition: buffer_view.hpp:268
CK_TILE_HOST_DEVICE void init_raw()
Definition: buffer_view.hpp:256
constexpr CK_TILE_DEVICE T & operator()(index_t i)
Definition: buffer_view.hpp:272
constexpr CK_TILE_DEVICE auto get(index_t i, index_t linear_offset, bool is_valid_element, bool_constant< oob_conditional_check >={}) const
Definition: buffer_view.hpp:281
CK_TILE_HOST_DEVICE void print() const
Definition: buffer_view.hpp:705
CK_TILE_DEVICE void update(index_t i, index_t linear_offset, bool is_valid_element, const X &x, bool_constant< oob_conditional_check >={})
Definition: buffer_view.hpp:445
static constexpr CK_TILE_DEVICE address_space_enum get_address_space()
Definition: buffer_view.hpp:261
CK_TILE_DEVICE void update_raw(index_t i, index_t linear_offset, bool is_valid_element, const X &x, bool_constant< oob_conditional_check >={}, bool_constant< pre_nop >={})
Definition: buffer_view.hpp:486
constexpr CK_TILE_DEVICE auto async_get(CK_TILE_LDS_ADDR remove_cvref_t< T > *smem, index_t i, index_t linear_offset, bool is_valid_element, bool_constant< oob_conditional_check >={}) const
Definition: buffer_view.hpp:387
T type
Definition: buffer_view.hpp:227
constexpr CK_TILE_DEVICE auto get_raw(remove_cvref_t< X > &dst, index_t v_offset, index_t i_offset, bool is_valid_element, bool_constant< pre_nop >={}) const
Definition: buffer_view.hpp:361
CK_TILE_DEVICE void atomic_add(index_t i, index_t linear_offset, bool is_valid_element, const X &x)
Definition: buffer_view.hpp:582
static constexpr CK_TILE_DEVICE bool is_static_buffer()
Definition: buffer_view.hpp:700
constexpr CK_TILE_HOST_DEVICE buffer_view(T *p_data, BufferSizeType buffer_size, T invalid_element_value)
Definition: buffer_view.hpp:244
constexpr CK_TILE_HOST_DEVICE buffer_view(T *p_data, BufferSizeType buffer_size)
Definition: buffer_view.hpp:239
CK_TILE_DEVICE void set_raw(index_t i, index_t linear_offset, bool is_valid_element, const X &x)
Definition: buffer_view.hpp:560
constexpr CK_TILE_HOST_DEVICE buffer_view()
Definition: buffer_view.hpp:234
CK_TILE_DEVICE void set(index_t i, index_t linear_offset, bool is_valid_element, const X &x)
Definition: buffer_view.hpp:515
CK_TILE_DEVICE void atomic_max(index_t i, index_t linear_offset, bool is_valid_element, const X &x)
Definition: buffer_view.hpp:667
CK_TILE_DEVICE void atomic_add_raw(index_t i, index_t linear_offset, bool is_valid_element, const X &x)
Definition: buffer_view.hpp:636
CK_TILE_DEVICE void set(index_t i, index_t linear_offset, bool is_valid_element, const X &x)
Definition: buffer_view.hpp:873
constexpr CK_TILE_HOST_DEVICE buffer_view()
Definition: buffer_view.hpp:748
static constexpr CK_TILE_DEVICE bool is_static_buffer()
Definition: buffer_view.hpp:1004
BufferSizeType buffer_size_
Definition: buffer_view.hpp:745
static constexpr CK_TILE_DEVICE bool is_dynamic_buffer()
Definition: buffer_view.hpp:1007
CK_TILE_HOST_DEVICE void init_raw()
Definition: buffer_view.hpp:765
constexpr CK_TILE_DEVICE auto get(index_t i, index_t linear_offset, bool is_valid_element, bool_constant< oob_conditional_check >={}) const
Definition: buffer_view.hpp:787
CK_TILE_DEVICE void update(index_t i, index_t linear_offset, bool is_valid_element, const X &x)
Definition: buffer_view.hpp:853
CK_TILE_HOST_DEVICE void print() const
Definition: buffer_view.hpp:1009
constexpr CK_TILE_DEVICE const T & operator[](index_t i) const
Definition: buffer_view.hpp:774
constexpr CK_TILE_DEVICE T & operator()(index_t i)
Definition: buffer_view.hpp:778
T type
Definition: buffer_view.hpp:742
constexpr CK_TILE_HOST_DEVICE buffer_view(T *p_data, BufferSizeType buffer_size, T invalid_element_value)
Definition: buffer_view.hpp:758
constexpr CK_TILE_DEVICE auto get_raw(remove_cvref_t< X > &dst, index_t v_offset, index_t i_offset, bool, bool_constant< pre_nop >={}) const
Definition: buffer_view.hpp:837
constexpr CK_TILE_HOST_DEVICE buffer_view(T *p_data, BufferSizeType buffer_size)
Definition: buffer_view.hpp:753
static constexpr CK_TILE_DEVICE address_space_enum get_address_space()
Definition: buffer_view.hpp:767
static constexpr CK_TILE_DEVICE bool is_dynamic_buffer()
Definition: buffer_view.hpp:1182
CK_TILE_HOST_DEVICE void init_raw()
Definition: buffer_view.hpp:1069
CK_TILE_HOST_DEVICE void print() const
Definition: buffer_view.hpp:1184
static constexpr CK_TILE_DEVICE bool is_static_buffer()
Definition: buffer_view.hpp:1179
T type
Definition: buffer_view.hpp:1046
constexpr CK_TILE_DEVICE T & operator()(index_t i)
Definition: buffer_view.hpp:1082
CK_TILE_DEVICE void set(index_t i, index_t linear_offset, bool is_valid_element, const X &x)
Definition: buffer_view.hpp:1156
constexpr CK_TILE_HOST_DEVICE buffer_view()
Definition: buffer_view.hpp:1052
BufferSizeType buffer_size_
Definition: buffer_view.hpp:1049
constexpr CK_TILE_HOST_DEVICE buffer_view(T *p_data, BufferSizeType buffer_size, T invalid_element_value)
Definition: buffer_view.hpp:1062
constexpr CK_TILE_DEVICE const T & operator[](index_t i) const
Definition: buffer_view.hpp:1078
constexpr CK_TILE_HOST_DEVICE buffer_view(T *p_data, BufferSizeType buffer_size)
Definition: buffer_view.hpp:1057
CK_TILE_DEVICE void update(index_t i, index_t linear_offset, bool is_valid_element, const X &x)
Definition: buffer_view.hpp:1136
constexpr CK_TILE_DEVICE auto get(index_t i, index_t, bool is_valid_element, bool_constant< oob_conditional_check >={}) const
Definition: buffer_view.hpp:1091
static constexpr CK_TILE_DEVICE address_space_enum get_address_space()
Definition: buffer_view.hpp:1071
Definition: buffer_view.hpp:33
Definition: integral_constant.hpp:13
Definition: amd_buffer_addressing.hpp:699
Definition: vector_type.hpp:60