9 #define UINT_MAX 4294967295 
   19 template <
typename DTYPE>
 
   22     return DTYPE::dataInfo.hasInf;
 
   47     return get_exponent_value<T>(x) == 0;
 
   53     double mantissa = is_subnormal<T>(x) ? 0.0f : 1.0f;
 
   55     for(uint i = 0; i < NumericUtils<T>::mant; i++)
 
   79     if(is_subnormal<T>(data))
 
   83     float d_mant = get_mantissa_value<T>(data);
 
   85     float data_value  = d_sign * d_exp * d_mant;
 
   89     return data_value * scale_value;
 
  101 template <
typename T>
 
  135         float diff      = max_value - prev_val;
 
  137         float actual_max = max_value + (diff / 2);
 
  139         if(std::abs(value) < actual_max)
 
  142                    (exp << NumericUtils<T>::mant) | mantissa;
 
  146             if(!get_data_has_inf<T>())
 
  155                        (exp << NumericUtils<T>::mant);
 
  161     x = bit_cast<uint32_t>(value);
 
  163     uint32_t head, mantissa;
 
  179     const int mini_denormal_act_exponent = 1 - mini_bias;
 
  181     int act_exponent, out_exponent, exponent_diff;
 
  183     bool is_subnorm = 
false;
 
  187         act_exponent  = exponent - bias + 1;
 
  188         exponent_diff = mini_denormal_act_exponent - act_exponent;
 
  193         act_exponent = exponent - bias;
 
  194         if(act_exponent <= mini_denormal_act_exponent)
 
  196             exponent_diff = mini_denormal_act_exponent - act_exponent;
 
  203         mantissa += (1UL << mfmt);
 
  207     shift_amount      = (shift_amount >= 64) ? 63 : shift_amount;
 
  208     bool midpoint     = (mantissa & ((1UL << shift_amount) - 1)) == (1UL << (shift_amount - 1));
 
  212     if(is_subnorm && std::abs(value) < std::abs(min_subnorm))
 
  215         if(std::abs(value) <= std::abs(min_subnorm - value))
 
  221     if(exponent_diff > 0)
 
  222         mantissa >>= exponent_diff;
 
  223     else if(exponent_diff == -1)
 
  224         mantissa <<= -exponent_diff;
 
  225     bool implicit_one = mantissa & (1 << mfmt);
 
  226     out_exponent      = (act_exponent + exponent_diff) + mini_bias - (implicit_one ? 0 : 1);
 
  230     mantissa += (midpoint ? (odd ? mantissa : mantissa - 1) : mantissa) & drop_mask;
 
  232     if(out_exponent == 0)
 
  234         if((1UL << mfmt) & mantissa)
 
  241         if((1UL << (mfmt + 1)) & mantissa)
 
  250     if(out_exponent == 0 && mantissa == 0)
 
  260 template <
typename T>
 
  289         float diff      = max_value - prev_val;
 
  291         float actual_max = max_value + (diff / 2);
 
  293         if(std::abs(value) < actual_max)
 
  295             double d_max_value  = 
static_cast<double>(max_value);
 
  296             double d_actual_max = 
static_cast<double>(actual_max);
 
  297             double d_value      = 
static_cast<double>(value);
 
  298             double d_is         = std::abs(d_max_value - d_actual_max);
 
  299             double d_seed       = 
static_cast<double>(seed);
 
  300             double d_prob = 1.0f - (std::abs(d_value - d_max_value) / d_is); 
 
  302             double thresh = UINT_MAX * d_prob;
 
  304             if(!get_data_has_inf<T>() || d_seed <= thresh)
 
  312                        | (exp << NumericUtils<T>::mant);
 
  317             if(!get_data_has_inf<T>())
 
  323                        | (exp << NumericUtils<T>::mant);
 
  328     uint32_t f32 = bit_cast<uint32_t>(value);
 
  339     auto mant    = f32_mant;
 
  340     bool subnorm = 
false;
 
  373     mant += seed >> sr_shift;
 
  381     auto biased_exp = 
static_cast<uint32_t
>(
exp);
 
  385     auto val = sign | biased_exp << NumericUtils<T>::mant | mant;
 
__host__ T exp(T x)
Definition: math_v2.hpp:391
 
__host__ T pow(T x, T gamma)
Definition: math_v2.hpp:427
 
Definition: check_err.hpp:24
 
__host__ __device__ T sat_convert_to_type(float value)
 
__host__ __device__ bool is_subnormal(T x)
Definition: mxfp_utils.hpp:45
 
__host__ __device__ bool get_data_has_inf()
Definition: mxfp_utils.hpp:67
 
__host__ __device__ T sat_convert_to_type_sr(float value, uint32_t seed)
 
__host__ __device__ float convert_to_float(T data, int scale_exp)
Definition: mxfp_utils.hpp:73
 
__host__ __device__ T convert_to_type_sr(float value, uint32_t seed)
Definition: mxfp_utils.hpp:261
 
__host__ __device__ bool is_zero(e8m0_bexp_t const scale, T const data)
 
__host__ __device__ T convert_to_type(float value)
Definition: mxfp_utils.hpp:102
 
__host__ __device__ bool is_inf(e8m0_bexp_t const scale, T const data)
 
__host__ constexpr __device__ int32_t get_exponent_value(T x)
Definition: mxfp_utils.hpp:35
 
__host__ __device__ double get_mantissa_value(T x)
Definition: mxfp_utils.hpp:51
 
__host__ __device__ bool is_nan(e8m0_bexp_t const scale, T const data)
 
bool getDataHasInf()
Definition: mxfp_utils.hpp:20
 
__host__ __device__ float to_float(e8m0_bexp_t const scale, T const data)
 
int32_t int32_t
Definition: integer.hpp:10
 
Definition: numeric_limits.hpp:309
 
__host__ static constexpr __device__ T Max()
Definition: numeric_limits.hpp:311
 
Definition: numeric_utils.hpp:10
 
Unsigned representation of a conventional biased Float32 exponent.
Definition: e8m0.hpp:25
 
Definition: mxfp_utils.hpp:14
 
float value_float
Definition: mxfp_utils.hpp:15
 
uint32_t value_bitwise
Definition: mxfp_utils.hpp:16