37 #include <hip/hip_fp16.h>
39 #if __has_include(<half/half.hpp>)
40 #include <half/half.hpp>
44 using halfhpp = half_float::half;
45 typedef halfhpp Rpp16f;
54 #include <x86intrin.h>
55 #include <smmintrin.h>
56 #include <immintrin.h>
61 #define RPP_MIN_8U ( 0 )
63 #define RPP_MAX_8U ( 255 )
65 #define RPPT_MAX_DIMS ( 5 )
67 #define RPPT_MAX_AUDIO_CHANNELS ( 16 )
69 #define CHECK_RETURN_STATUS(x) do { \
72 fprintf(stderr, "Runtime error: %s returned %d at %s:%d", #x, retval, __FILE__, __LINE__); \
78 #include <hip/hip_runtime.h>
79 #define RPP_HOST_DEVICE __host__ __device__
81 #define RPP_HOST_DEVICE
84 const float ONE_OVER_6 = 1.0f / 6;
85 const float ONE_OVER_3 = 1.0f / 3;
86 const float ONE_OVER_255 = 1.0f / 255;
87 const uint MMS_MAX_SCRATCH_MEMORY = 115293120;
88 const uint SPECTROGRAM_MAX_SCRATCH_MEMORY = 372877312;
89 #define DROPOUT_FIXED_SEED 42
194 rppStatusSuccess = 0,
195 rppStatusBadParm = -1,
196 rppStatusUnknownError = -2,
197 rppStatusNotInitialized = -3,
198 rppStatusInvalidValue = -4,
199 rppStatusAllocFailed = -5,
200 rppStatusInternalError = -6,
201 rppStatusNotImplemented = -7,
202 rppStatusUnsupportedOp = -8,
205 #ifdef LEGACY_SUPPORT
211 RPP_SCALAR_OP_AND = 1,
216 RPP_SCALAR_OP_NOTEQUAL,
218 RPP_SCALAR_OP_LESSEQ,
219 RPP_SCALAR_OP_GREATER,
220 RPP_SCALAR_OP_GREATEREQ,
222 RPP_SCALAR_OP_SUBTRACT,
223 RPP_SCALAR_OP_MULTIPLY,
224 RPP_SCALAR_OP_DIVIDE,
225 RPP_SCALAR_OP_MODULUS,
237 } RppConvertBitDepthMode;
292 #ifdef LEGACY_SUPPORT
300 } RppiColorConvertMode;
317 RPPI_HORIZONTAL_AXIS,
390 unsigned int roiWidth;
391 unsigned int roiHeight;
454 NEAREST_NEIGHBOR = 0,
555 int roiWidth, roiHeight;
565 int roiWidth, roiHeight, roiDepth;
688 float boxMullerExtra;
723 switch(interpolationType)
725 case RpptInterpolationType::BICUBIC:
730 case RpptInterpolationType::LANCZOS:
732 if(in_size > out_size)
734 this->radius = 3.0f * scaleRatio;
735 this->scale = (1 / scaleRatio);
741 case RpptInterpolationType::GAUSSIAN:
743 if(in_size > out_size)
745 this->radius = scaleRatio;
746 this->scale = (1 / scaleRatio);
750 case RpptInterpolationType::TRIANGULAR:
752 if(in_size > out_size)
754 this->radius = scaleRatio;
755 this->scale = (1 / scaleRatio);
766 this->size = std::ceil(2 * this->radius);
784 Rpp32f locRaw = x * scale + center;
785 Rpp32s locFloor = std::floor(locRaw);
786 Rpp32f weight = locRaw - locFloor;
787 locFloor = std::max(std::min(locFloor, lookupSize - 2), 0);
788 Rpp32f current = lookup[locFloor];
789 Rpp32f next = lookup[locFloor + 1];
790 return current + weight * (next - current);
793 inline __m128 operator()(__m128 x)
795 __m128 pLocRaw = _mm_add_ps(_mm_mul_ps(x, pScale), pCenter);
796 __m128i pxLocFloor = _mm_cvttps_epi32(pLocRaw);
797 __m128 pLocFloor = _mm_cvtepi32_ps(pxLocFloor);
798 __m128 pWeight = _mm_sub_ps(pLocRaw, pLocFloor);
800 _mm_storeu_si128(
reinterpret_cast<__m128i*
>(idx), pxLocFloor);
801 __m128 pCurrent = _mm_setr_ps(lookup[idx[0]], lookup[idx[1]], lookup[idx[2]], lookup[idx[3]]);
802 __m128 pNext = _mm_setr_ps(lookup[idx[0] + 1], lookup[idx[1] + 1], lookup[idx[2] + 1], lookup[idx[3] + 1]);
803 return _mm_add_ps(pCurrent, _mm_mul_ps(pWeight, _mm_sub_ps(pNext, pCurrent)));
806 Rpp32f scale = 1, center = 1;
807 Rpp32s lobes = 0, coeffs = 0;
809 Rpp32f *lookupPinned =
nullptr;
810 std::vector<Rpp32f> lookup;
811 __m128 pCenter, pScale;
820 inline RPP_HOST_DEVICE
virtual Rpp32f hz_to_mel(
Rpp32f hz) = 0;
821 inline RPP_HOST_DEVICE
virtual Rpp32f mel_to_hz(
Rpp32f mel) = 0;
830 inline RPP_HOST_DEVICE
Rpp32f hz_to_mel(
Rpp32f hz) {
return 1127.0f * std::log(1.0f + (hz / 700.0f)); }
831 inline RPP_HOST_DEVICE
Rpp32f mel_to_hz(
Rpp32f mel) {
return 700.0f * (std::exp(mel / 1127.0f) - 1.0f); }
842 const Rpp32f fsp = 66.666667f;
843 const Rpp32f minLogHz = 1000.0;
844 const Rpp32f minLogMel = (minLogHz - freqLow) / fsp;
845 const Rpp32f stepLog = 0.068751777;
847 const Rpp32f invMinLogHz = 0.001f;
848 const Rpp32f invStepLog = 1.0f / stepLog;
849 const Rpp32f invFsp = 1.0f / fsp;
855 mel = minLogMel + std::log(hz * invMinLogHz) * invStepLog;
857 mel = (hz - freqLow) * invFsp;
865 if (mel >= minLogMel)
866 hz = minLogHz * std::exp(stepLog * (mel - minLogMel));
868 hz = freqLow + mel * fsp;
973 Rpp32f *scratchBufferHost;
1060 clmemSize maxSrcSize;
1061 clmemSize maxDstSize;
1063 clmemRpp32f floatArr[10];
1064 clmemRpp64f doubleArr[10];
1065 clmemRpp32u uintArr[10];
1066 clmemRpp32s intArr[10];
1067 clmemRpp8u ucharArr[10];
1068 clmemRpp8s charArr[10];
1069 cl_mem srcBatchIndex;
1070 cl_mem dstBatchIndex;
1084 #elif defined(HIP_COMPILE)
unsigned int Rpp32u
32 bit unsigned int
Definition: rppdefs.h:102
int Rpp32s
32 bit signed int
Definition: rppdefs.h:104
signed char Rpp8s
8 bit signed char
Definition: rppdefs.h:96
RppiChnFormat
RPPI Image channel format type enum.
Definition: rppdefs.h:369
RpptMelScaleFormula
RPPT Mel Scale Formula.
Definition: rppdefs.h:497
unsigned short Rpp16u
16 bit unsigned short
Definition: rppdefs.h:98
unsigned char Rpp8u
8 bit unsigned char
Definition: rppdefs.h:94
void * RppPtr_t
void pointer
Definition: rppdefs.h:114
RpptLayout
RPPT Tensor layout type enum.
Definition: rppdefs.h:412
RpptRoiType
RPPT Tensor 2D ROI type enum.
Definition: rppdefs.h:426
RpptDataType
RPPT Tensor datatype enum.
Definition: rppdefs.h:400
RpptImageBorderType
RPPT Image Border Type.
Definition: rppdefs.h:487
struct RpptResamplingWindow RpptResamplingWindow
RPPT Tensor RpptResamplingWindow type struct.
double Rpp64f
64 bit double
Definition: rppdefs.h:112
RpptImageBorderEdge
RPPT Image Border Edge type enum.
Definition: rppdefs.h:476
RpptAudioBorderType
RPPT Audio Border Type.
Definition: rppdefs.h:466
size_t RppSize_t
size_t
Definition: rppdefs.h:116
struct GenericFilter GenericFilter
RPPT Tensor GenericFilter type struct.
RppStatus
RPP RppStatus type enums.
Definition: rppdefs.h:122
RpptRoi3DType
RPPT Tensor 3D ROI type enum.
Definition: rppdefs.h:435
unsigned long long Rpp64u
64 bit unsigned long long
Definition: rppdefs.h:106
#define RPPT_MAX_DIMS
RPP maximum dimensions in tensor .
Definition: rppdefs.h:65
RpptInterpolationType
RPPT Tensor interpolation type enum.
Definition: rppdefs.h:453
RpptSubpixelLayout
RPPT Tensor subpixel layout type enum.
Definition: rppdefs.h:444
short Rpp16s
16 bit signed short
Definition: rppdefs.h:100
float Rpp32f
32 bit float
Definition: rppdefs.h:110
RppBackend
RPP RppBackend type enums.
Definition: rppdefs.h:183
long long Rpp64s
64 bit long long
Definition: rppdefs.h:108
rppStatus_t
RPP rppStatus_t type enums.
Definition: rppdefs.h:193
@ RPP_ERROR_NOT_IMPLEMENTED
Function variant requested is not implemented / unsupported.
Definition: rppdefs.h:136
@ RPP_ERROR_INVALID_DST_DATATYPE
Invalid dst tensor datatype. (Needs to adhere to function specification.)
Definition: rppdefs.h:148
@ RPP_ERROR_INVALID_OUTPUT_TILE_LENGTH
Invalid output tile length (Needs to adhere to function specification.)
Definition: rppdefs.h:164
@ RPP_ERROR_INVALID_DIM_LENGTHS
Length in some dimensions are invalid. (Needs to adhere to function specification....
Definition: rppdefs.h:174
@ RPP_ERROR_OUT_OF_BOUND_SRC_ROI
Out of bound source ROI.
Definition: rppdefs.h:158
@ RPP_ERROR_OUT_OF_BOUND_SHARED_MEMORY_SIZE
Shared memory size needed is beyond the bounds (Needs to adhere to function specification....
Definition: rppdefs.h:166
@ RPP_ERROR_ZERO_DIVISION
Arguments provided will result in zero division error.
Definition: rppdefs.h:132
@ RPP_ERROR_INVALID_ARGUMENTS
One or more arguments invalid. (Needs to adhere to function specification.)
Definition: rppdefs.h:128
@ RPP_ERROR_LAYOUT_MISMATCH
src and dst layout mismatch
Definition: rppdefs.h:160
@ RPP_ERROR_INVALID_CHANNELS
Number of channels is invalid. (Needs to adhere to function specification.)
Definition: rppdefs.h:162
@ RPP_ERROR_INVALID_SRC_CHANNELS
Invalid src tensor number of channels. (Needs to adhere to function specification....
Definition: rppdefs.h:138
@ RPP_ERROR_INVALID_SRC_OR_DST_DATATYPE
Invalid src/dst tensor datatype. (Needs to adhere to function specification.)
Definition: rppdefs.h:150
@ RPP_ERROR_INVALID_PARAMETER_DATATYPE
Invalid datatype.
Definition: rppdefs.h:154
@ RPP_ERROR_NOT_ENOUGH_MEMORY
Not enough memory to write outputs, as per dim-lengths and strides set in descriptor.
Definition: rppdefs.h:156
@ RPP_ERROR
Unspecified error.
Definition: rppdefs.h:126
@ RPP_ERROR_INVALID_SRC_DIMS
Number of src dims is invalid. (Needs to adhere to function specification.)
Definition: rppdefs.h:170
@ RPP_ERROR_OUT_OF_BOUND_SCRATCH_MEMORY_SIZE
Scratch memory size needed is beyond the bounds (Needs to adhere to function specification....
Definition: rppdefs.h:168
@ RPP_ERROR_INVALID_DST_LAYOUT
Invalid dst tensor layout. (Needs to adhere to function specification.)
Definition: rppdefs.h:144
@ RPP_ERROR_INVALID_AXIS
The specified axis is invalid or out of range. (Needs to adhere to function specification....
Definition: rppdefs.h:176
@ RPP_ERROR_INVALID_DST_CHANNELS
Invalid dst tensor number of channels. (Needs to adhere to function specification....
Definition: rppdefs.h:140
@ RPP_ERROR_INVALID_SRC_DATATYPE
Invalid src tensor datatype. (Needs to adhere to function specification.)
Definition: rppdefs.h:146
@ RPP_ERROR_INVALID_SRC_LAYOUT
Invalid src tensor layout. (Needs to adhere to function specification.)
Definition: rppdefs.h:142
@ RPP_ERROR_HIGH_SRC_DIMENSION
Src tensor / src ROI dimension too high. (Needs to adhere to function specification....
Definition: rppdefs.h:134
@ RPP_SUCCESS
No error.
Definition: rppdefs.h:124
@ RPP_ERROR_INSUFFICIENT_DST_BUFFER_LENGTH
Insufficient dst buffer length provided. (Needs to adhere to function specification....
Definition: rppdefs.h:152
@ RPP_ERROR_INVALID_DST_DIMS
Number of dst dims is invalid. (Needs to adhere to function specification.)
Definition: rppdefs.h:172
@ RPP_ERROR_LOW_OFFSET
Low tensor offsetInBytes provided for src/dst tensor.
Definition: rppdefs.h:130
Base class for Mel scale conversions.
Definition: rppdefs.h:818
RPPT Tensor GenericFilter type struct.
Definition: rppdefs.h:717
Derived class for HTK Mel scale conversions.
Definition: rppdefs.h:829
RPP initialize handle.
Definition: rppdefs.h:1220
RPP 24 float vector.
Definition: rppdefs.h:286
RPP 6 float vector.
Definition: rppdefs.h:262
RPP 9 float vector.
Definition: rppdefs.h:270
RPP 24 signed int vector.
Definition: rppdefs.h:278
RPP layout params.
Definition: rppdefs.h:253
RPPI Image 2D ROI (XYWH format) type struct.
Definition: rppdefs.h:387
RPPI Image size(Width/Height dimensions) type struct.
Definition: rppdefs.h:378
RPPT Tensor 2D bilinear neighborhood 32-bit signed int 8-length-vectors type struct.
Definition: rppdefs.h:695
RPPT Tensor 2D bilinear neighborhood 32-bit float 8-length-vectors type struct.
Definition: rppdefs.h:706
RPPT Tensor Channel Offsets struct.
Definition: rppdefs.h:534
RPPT Tensor descriptor type struct.
Definition: rppdefs.h:604
RPPT Tensor 32-bit float RGB type struct.
Definition: rppdefs.h:640
RPPT Tensor 2D 32-bit float vector type struct.
Definition: rppdefs.h:659
RPPT Tensor Generic descriptor type struct.
Definition: rppdefs.h:617
RPPT Tensor 2D image patch dimensions type struct.
Definition: rppdefs.h:668
RPPT Image 2D cartesian point type struct.
Definition: rppdefs.h:506
RPPT Image 3D point type struct.
Definition: rppdefs.h:515
RPPT Tensor 8-bit uchar RGB type struct.
Definition: rppdefs.h:630
RPPT Tensor RpptResamplingWindow type struct.
Definition: rppdefs.h:774
RPPT Tensor 3D ROI LTFRBB struct.
Definition: rppdefs.h:544
RPPT Tensor 2D ROI LTRB struct.
Definition: rppdefs.h:525
RPPT Tensor 2D ROI XYWH struct.
Definition: rppdefs.h:553
RPPT Tensor 3D ROI XYZWHD struct.
Definition: rppdefs.h:563
RPPT Tensor strides type struct.
Definition: rppdefs.h:593
RPPT Tensor 2D 32-bit uint vector type struct.
Definition: rppdefs.h:650
RPPT Tensor random number generator state (xorwow box muller state) type struct.
Definition: rppdefs.h:685
RPPT Tensor random number generator state (xorwow state) type struct.
Definition: rppdefs.h:677
Derived class for Slaney Mel scale conversions.
Definition: rppdefs.h:840
RPP HIP 2D ROI memory.
Definition: rppdefs.h:1157
RPP HIP 32-bit float memory.
Definition: rppdefs.h:1092
RPP HIP 32-bit signed int memory.
Definition: rppdefs.h:1116
RPP HIP 32-bit unsigned int memory.
Definition: rppdefs.h:1108
RPP HIP 64-bit double memory.
Definition: rppdefs.h:1100
RPP HIP 8-bit signed char memory.
Definition: rppdefs.h:1132
RPP HIP 8-bit unsigned char memory.
Definition: rppdefs.h:1124
RPP HIP RGB memory.
Definition: rppdefs.h:1140
RPP HIP 2D dimensions memory.
Definition: rppdefs.h:1148
RPP HOST memory type struct.
Definition: rppdefs.h:956
RPP OCL memory management type struct.
Definition: rppdefs.h:1168
RPP HIP-HOST memory management.
Definition: rppdefs.h:1199
RPP HOST 2D ROI memory.
Definition: rppdefs.h:946
RPP HOST 32-bit float memory.
Definition: rppdefs.h:881
RPP HOST 32-bit signed int memory.
Definition: rppdefs.h:905
RPP HOST 32-bit unsigned int memory.
Definition: rppdefs.h:897
RPP HOST 64-bit double memory.
Definition: rppdefs.h:889
RPP HOST 8-bit signed char memory.
Definition: rppdefs.h:921
RPP HOST 8-bit unsigned char memory.
Definition: rppdefs.h:913
RPP HOST RGB memory.
Definition: rppdefs.h:929
RPP HOST 2D dimensions memory.
Definition: rppdefs.h:937
RPPT Tensor 3D ROI union.
Definition: rppdefs.h:583
RPPT Tensor 2D ROI union.
Definition: rppdefs.h:573