43 #include <x86intrin.h>
44 #include <smmintrin.h>
45 #include <immintrin.h>
50 #define RPP_MIN_8U ( 0 )
52 #define RPP_MAX_8U ( 255 )
54 #define RPPT_MAX_DIMS ( 5 )
56 #define CHECK_RETURN_STATUS(x) do { \
59 fprintf(stderr, "Runtime error: %s returned %d at %s:%d", #x, retval, __FILE__, __LINE__); \
64 const float ONE_OVER_6 = 1.0f / 6;
65 const float ONE_OVER_3 = 1.0f / 3;
66 const float ONE_OVER_255 = 1.0f / 255;
67 const uint MMS_MAX_SCRATCH_MEMORY = 76800000;
156 rppStatusSuccess = 0,
157 rppStatusBadParm = -1,
158 rppStatusUnknownError = -2,
159 rppStatusNotInitialized = -3,
160 rppStatusInvalidValue = -4,
161 rppStatusAllocFailed = -5,
162 rppStatusInternalError = -6,
163 rppStatusNotImplemented = -7,
164 rppStatusUnsupportedOp = -8,
172 RPP_SCALAR_OP_AND = 1,
177 RPP_SCALAR_OP_NOTEQUAL,
179 RPP_SCALAR_OP_LESSEQ,
180 RPP_SCALAR_OP_GREATER,
181 RPP_SCALAR_OP_GREATEREQ,
183 RPP_SCALAR_OP_SUBTRACT,
184 RPP_SCALAR_OP_MULTIPLY,
185 RPP_SCALAR_OP_DIVIDE,
186 RPP_SCALAR_OP_MODULUS,
277 RPPI_HORIZONTAL_AXIS,
359 unsigned int roiWidth;
360 unsigned int roiHeight;
422 NEAREST_NEIGHBOR = 0,
483 int roiWidth, roiHeight;
493 int roiWidth, roiHeight, roiDepth;
616 float boxMullerExtra;
651 switch(interpolationType)
653 case RpptInterpolationType::BICUBIC:
658 case RpptInterpolationType::LANCZOS:
660 if(in_size > out_size)
662 this->radius = 3.0f * scaleRatio;
663 this->scale = (1 / scaleRatio);
669 case RpptInterpolationType::GAUSSIAN:
671 if(in_size > out_size)
673 this->radius = scaleRatio;
674 this->scale = (1 / scaleRatio);
678 case RpptInterpolationType::TRIANGULAR:
680 if(in_size > out_size)
682 this->radius = scaleRatio;
683 this->scale = (1 / scaleRatio);
694 this->size = std::ceil(2 * this->radius);
712 Rpp32f locRaw = x * scale + center;
713 Rpp32s locFloor = std::floor(locRaw);
714 Rpp32f weight = locRaw - locFloor;
715 locFloor = std::max(std::min(locFloor, lookupSize - 2), 0);
716 Rpp32f current = lookup[locFloor];
717 Rpp32f next = lookup[locFloor + 1];
718 return current + weight * (next - current);
721 inline __m128 operator()(__m128 x)
723 __m128 pLocRaw = _mm_add_ps(_mm_mul_ps(x, pScale), pCenter);
724 __m128i pxLocFloor = _mm_cvttps_epi32(pLocRaw);
725 __m128 pLocFloor = _mm_cvtepi32_ps(pxLocFloor);
726 __m128 pWeight = _mm_sub_ps(pLocRaw, pLocFloor);
728 _mm_storeu_si128(
reinterpret_cast<__m128i*
>(idx), pxLocFloor);
729 __m128 pCurrent = _mm_setr_ps(lookup[idx[0]], lookup[idx[1]], lookup[idx[2]], lookup[idx[3]]);
730 __m128 pNext = _mm_setr_ps(lookup[idx[0] + 1], lookup[idx[1] + 1], lookup[idx[2] + 1], lookup[idx[3] + 1]);
731 return _mm_add_ps(pCurrent, _mm_mul_ps(pWeight, _mm_sub_ps(pNext, pCurrent)));
734 Rpp32f scale = 1, center = 1;
735 Rpp32s lobes = 0, coeffs = 0;
737 std::vector<Rpp32f> lookup;
738 __m128 pCenter, pScale;
839 Rpp32f *scratchBufferHost;
926 clmemSize maxSrcSize;
927 clmemSize maxDstSize;
929 clmemRpp32f floatArr[10];
930 clmemRpp64f doubleArr[10];
931 clmemRpp32u uintArr[10];
932 clmemRpp32s intArr[10];
933 clmemRpp8u ucharArr[10];
934 clmemRpp8s charArr[10];
935 cl_mem srcBatchIndex;
936 cl_mem dstBatchIndex;
950 #elif defined(HIP_COMPILE)
RppiPad
RPPI Image pad type enum.
Definition: rppdefs.h:299
RppiFormat
RPPI Image format type enum.
Definition: rppdefs.h:308
unsigned int Rpp32u
32 bit unsigned int
Definition: rppdefs.h:80
RppConvertBitDepthMode
RPP BitDepth Conversion type enum.
Definition: rppdefs.h:195
int Rpp32s
32 bit signed int
Definition: rppdefs.h:82
signed char Rpp8s
8 bit signed char
Definition: rppdefs.h:74
RppiChnFormat
RPPI Image channel format type enum.
Definition: rppdefs.h:267
RpptMelScaleFormula
RPPT Mel Scale Formula.
Definition: rppdefs.h:444
unsigned short Rpp16u
16 bit unsigned short
Definition: rppdefs.h:76
unsigned char Rpp8u
8 bit unsigned char
Definition: rppdefs.h:72
void * RppPtr_t
void pointer
Definition: rppdefs.h:92
RpptLayout
RPPT Tensor layout type enum.
Definition: rppdefs.h:380
RppiBlur
RPPI Image blur type enum.
Definition: rppdefs.h:286
RppiAxis
RPP Image axis type enum.
Definition: rppdefs.h:276
RpptRoiType
RPPT Tensor 2D ROI type enum.
Definition: rppdefs.h:394
RpptDataType
RPPT Tensor datatype enum.
Definition: rppdefs.h:369
struct RpptResamplingWindow RpptResamplingWindow
RPPT Tensor RpptResamplingWindow type struct.
double Rpp64f
64 bit double
Definition: rppdefs.h:90
RpptAudioBorderType
RPPT Audio Border Type.
Definition: rppdefs.h:434
size_t RppSize_t
size_t
Definition: rppdefs.h:94
struct GenericFilter GenericFilter
RPPT Tensor GenericFilter type struct.
RppStatus
RPP RppStatus type enums.
Definition: rppdefs.h:100
RpptRoi3DType
RPPT Tensor 3D ROI type enum.
Definition: rppdefs.h:403
RppOp
RPP Operations type enum.
Definition: rppdefs.h:171
unsigned long long Rpp64u
64 bit unsigned long long
Definition: rppdefs.h:84
#define RPPT_MAX_DIMS
RPP maximum dimensions in tensor .
Definition: rppdefs.h:54
RpptInterpolationType
RPPT Tensor interpolation type enum.
Definition: rppdefs.h:421
RpptSubpixelLayout
RPPT Tensor subpixel layout type enum.
Definition: rppdefs.h:412
short Rpp16s
16 bit signed short
Definition: rppdefs.h:78
RppiColorConvertMode
RPPI Image color convert mode type enum.
Definition: rppdefs.h:248
float Rpp32f
32 bit float
Definition: rppdefs.h:88
RppiFuzzyLevel
RPPI Image fuzzy level type enum.
Definition: rppdefs.h:257
long long Rpp64s
64 bit long long
Definition: rppdefs.h:86
rppStatus_t
RPP rppStatus_t type enums.
Definition: rppdefs.h:155
@ RPP_ERROR_NOT_IMPLEMENTED
Function variant requested is not implemented / unsupported.
Definition: rppdefs.h:114
@ RPP_ERROR_INVALID_DST_DATATYPE
Invalid dst tensor datatype. (Needs to adhere to function specification.)
Definition: rppdefs.h:126
@ RPP_ERROR_INVALID_OUTPUT_TILE_LENGTH
Invalid output tile length (Needs to adhere to function specification.)
Definition: rppdefs.h:142
@ RPP_ERROR_OUT_OF_BOUND_SRC_ROI
Out of bound source ROI.
Definition: rppdefs.h:136
@ RPP_ERROR_OUT_OF_BOUND_SHARED_MEMORY_SIZE
Shared memory size needed is beyond the bounds (Needs to adhere to function specification....
Definition: rppdefs.h:144
@ RPP_ERROR_ZERO_DIVISION
Arguments provided will result in zero division error.
Definition: rppdefs.h:110
@ RPP_ERROR_INVALID_ARGUMENTS
One or more arguments invalid. (Needs to adhere to function specification.)
Definition: rppdefs.h:106
@ RPP_ERROR_LAYOUT_MISMATCH
src and dst layout mismatch
Definition: rppdefs.h:138
@ RPP_ERROR_INVALID_CHANNELS
Number of channels is invalid. (Needs to adhere to function specification.)
Definition: rppdefs.h:140
@ RPP_ERROR_INVALID_SRC_CHANNELS
Invalid src tensor number of channels. (Needs to adhere to function specification....
Definition: rppdefs.h:116
@ RPP_ERROR_INVALID_SRC_OR_DST_DATATYPE
Invalid src/dst tensor datatype. (Needs to adhere to function specification.)
Definition: rppdefs.h:128
@ RPP_ERROR_INVALID_PARAMETER_DATATYPE
Invalid datatype.
Definition: rppdefs.h:132
@ RPP_ERROR_NOT_ENOUGH_MEMORY
Not enough memory to write outputs, as per dim-lengths and strides set in descriptor.
Definition: rppdefs.h:134
@ RPP_ERROR
Unspecified error.
Definition: rppdefs.h:104
@ RPP_ERROR_INVALID_SRC_DIMS
Number of src dims is invalid. (Needs to adhere to function specification.)
Definition: rppdefs.h:148
@ RPP_ERROR_OUT_OF_BOUND_SCRATCH_MEMORY_SIZE
Scratch memory size needed is beyond the bounds (Needs to adhere to function specification....
Definition: rppdefs.h:146
@ RPP_ERROR_INVALID_DST_LAYOUT
Invalid dst tensor layout. (Needs to adhere to function specification.)
Definition: rppdefs.h:122
@ RPP_ERROR_INVALID_DST_CHANNELS
Invalid dst tensor number of channels. (Needs to adhere to function specification....
Definition: rppdefs.h:118
@ RPP_ERROR_INVALID_SRC_DATATYPE
Invalid src tensor datatype. (Needs to adhere to function specification.)
Definition: rppdefs.h:124
@ RPP_ERROR_INVALID_SRC_LAYOUT
Invalid src tensor layout. (Needs to adhere to function specification.)
Definition: rppdefs.h:120
@ RPP_ERROR_HIGH_SRC_DIMENSION
Src tensor / src ROI dimension too high. (Needs to adhere to function specification....
Definition: rppdefs.h:112
@ RPP_SUCCESS
No error.
Definition: rppdefs.h:102
@ RPP_ERROR_INSUFFICIENT_DST_BUFFER_LENGTH
Insufficient dst buffer length provided. (Needs to adhere to function specification....
Definition: rppdefs.h:130
@ RPP_ERROR_LOW_OFFSET
Low tensor offsetInBytes provided for src/dst tensor.
Definition: rppdefs.h:108
RPPT Tensor GenericFilter type struct.
Definition: rppdefs.h:645
RPP initialize handle.
Definition: rppdefs.h:1086
RPP 24 float vector.
Definition: rppdefs.h:238
RPP 6 float vector.
Definition: rppdefs.h:222
RPP 24 signed int vector.
Definition: rppdefs.h:230
RPP layout params.
Definition: rppdefs.h:213
RPP polar point.
Definition: rppdefs.h:204
RPPI Image 3D point type struct.
Definition: rppdefs.h:335
RPPI Image 2D cartesian point type struct.
Definition: rppdefs.h:326
RPPI Image 2D ROI (XYWH format) type struct.
Definition: rppdefs.h:356
RPPI Image 2D Rectangle (XYWH format) type struct.
Definition: rppdefs.h:345
RPPI Image size(Width/Height dimensions) type struct.
Definition: rppdefs.h:317
RPPT Tensor 2D bilinear neighborhood 32-bit signed int 8-length-vectors type struct.
Definition: rppdefs.h:623
RPPT Tensor 2D bilinear neighborhood 32-bit float 8-length-vectors type struct.
Definition: rppdefs.h:634
RPPT Tensor Channel Offsets struct.
Definition: rppdefs.h:462
RPPT Tensor descriptor type struct.
Definition: rppdefs.h:532
RPPT Tensor 32-bit float RGB type struct.
Definition: rppdefs.h:568
RPPT Tensor 2D 32-bit float vector type struct.
Definition: rppdefs.h:587
RPPT Tensor Generic descriptor type struct.
Definition: rppdefs.h:545
RPPT Tensor 2D image patch dimensions type struct.
Definition: rppdefs.h:596
RPPT Tensor 8-bit uchar RGB type struct.
Definition: rppdefs.h:558
RPPT Tensor RpptResamplingWindow type struct.
Definition: rppdefs.h:702
RPPT Tensor 3D ROI LTFRBB struct.
Definition: rppdefs.h:472
RPPT Tensor 2D ROI LTRB struct.
Definition: rppdefs.h:453
RPPT Tensor 2D ROI XYWH struct.
Definition: rppdefs.h:481
RPPT Tensor 3D ROI XYZWHD struct.
Definition: rppdefs.h:491
RPPT Tensor strides type struct.
Definition: rppdefs.h:521
RPPT Tensor 2D 32-bit uint vector type struct.
Definition: rppdefs.h:578
RPPT Tensor random number generator state (xorwow box muller state) type struct.
Definition: rppdefs.h:613
RPPT Tensor random number generator state (xorwow state) type struct.
Definition: rppdefs.h:605
RPP HIP 2D ROI memory.
Definition: rppdefs.h:1023
RPP HIP 32-bit float memory.
Definition: rppdefs.h:958
RPP HIP 32-bit signed int memory.
Definition: rppdefs.h:982
RPP HIP 32-bit unsigned int memory.
Definition: rppdefs.h:974
RPP HIP 64-bit double memory.
Definition: rppdefs.h:966
RPP HIP 8-bit signed char memory.
Definition: rppdefs.h:998
RPP HIP 8-bit unsigned char memory.
Definition: rppdefs.h:990
RPP HIP RGB memory.
Definition: rppdefs.h:1006
RPP HIP 2D dimensions memory.
Definition: rppdefs.h:1014
RPP HOST memory type struct.
Definition: rppdefs.h:822
RPP OCL memory management type struct.
Definition: rppdefs.h:1034
RPP HIP-HOST memory management.
Definition: rppdefs.h:1065
RPP HOST 2D ROI memory.
Definition: rppdefs.h:812
RPP HOST 32-bit float memory.
Definition: rppdefs.h:747
RPP HOST 32-bit signed int memory.
Definition: rppdefs.h:771
RPP HOST 32-bit unsigned int memory.
Definition: rppdefs.h:763
RPP HOST 64-bit double memory.
Definition: rppdefs.h:755
RPP HOST 8-bit signed char memory.
Definition: rppdefs.h:787
RPP HOST 8-bit unsigned char memory.
Definition: rppdefs.h:779
RPP HOST RGB memory.
Definition: rppdefs.h:795
RPP HOST 2D dimensions memory.
Definition: rppdefs.h:803
RPPT Tensor 3D ROI union.
Definition: rppdefs.h:511
RPPT Tensor 2D ROI union.
Definition: rppdefs.h:501