/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-miopen/checkouts/develop/projects/miopen/include/miopen/miopen.h Source File

/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-miopen/checkouts/develop/projects/miopen/include/miopen/miopen.h Source File#

MIOpen: /home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-miopen/checkouts/develop/projects/miopen/include/miopen/miopen.h Source File
miopen.h
Go to the documentation of this file.
1 // Copyright (c) Advanced Micro Devices, Inc., or its affiliates.
2 // SPDX-License-Identifier: MIT
3 
4 #ifndef MIOPEN_GUARD_MIOPEN_H_
5 #define MIOPEN_GUARD_MIOPEN_H_
6 
7 #ifdef __clang__
8 #pragma clang diagnostic push
9 #pragma clang diagnostic ignored "-Wextern-c-compat"
10 #endif
11 
12 #include <stddef.h>
13 #include <stdbool.h>
14 #include <miopen/config.h>
15 #include <miopen/export.h>
16 
17 #if MIOPEN_BACKEND_OPENCL
18 #define CL_TARGET_OPENCL_VERSION 120
19 #if defined(__APPLE__) || defined(__MACOSX)
20 #include <OpenCL/cl.h>
21 #else
22 #define CL_USE_DEPRECATED_OPENCL_1_2_APIS
23 #include <CL/cl.h>
24 #endif
25 
26 #elif MIOPEN_BACKEND_HIP
27 #include <hip/hip_runtime_api.h>
28 #endif
29 
30 /*
31  * @defgroup convolutions
32  * @defgroup pooling
33  * @defgroup handle
34  * @defgroup layernorm
35  * @defgroup LRN
36  * @defgroup batchnorm
37  * @defgroup activation
38  * @defgroup tensor
39  * @defgroup softmax
40  * @defgroup RNN
41  * @defgroup fusion
42  * @defgroup LossFunction
43  * @defgroup TensorReduce
44  * @defgroup find2
45  * @defgroup ReduceExtreme
46  * @defgroup groupnorm
47  * @defgroup cat
48  * @defgroup SGD
49  * @defgroup getitem
50  * @defgroup ReduceCalculation
51  * @defgroup RotaryPositionalEmbeddings
52  * @defgroup ReLU
53  *
54  */
55 
57 #define MIOPEN_DECLARE_OBJECT(name) \
58  struct name \
59  { \
60  }; \
61  typedef struct name* name##_t;
62 
63 #ifdef __cplusplus
64 extern "C" {
65 #endif
66 
67 #if MIOPEN_BACKEND_OPENCL
68 typedef cl_command_queue miopenAcceleratorQueue_t;
69 #elif MIOPEN_BACKEND_HIP
70 typedef hipStream_t miopenAcceleratorQueue_t;
71 #endif
72 
76 MIOPEN_DECLARE_OBJECT(miopenHandle);
77 
86 typedef enum
87 {
100 
101 typedef enum
102 {
103  // TODO:(LYM) temporary use Pedantic as default until TF32 is fully supported
106  1,
108 
109 #ifdef MIOPEN_BETA_API
110 typedef enum
111 {
115 #endif
116 
124 MIOPEN_EXPORT const char* miopenGetErrorString(miopenStatus_t error);
125 
134 typedef void* (*miopenAllocatorFunction)(void* context, size_t sizeBytes);
135 
144 typedef void (*miopenDeallocatorFunction)(void* context, void* memory);
145 
159 MIOPEN_EXPORT miopenStatus_t miopenGetVersion(size_t* major, size_t* minor, size_t* patch);
160 
169 MIOPEN_EXPORT miopenStatus_t miopenCreate(miopenHandle_t* handle);
170 
182 MIOPEN_EXPORT miopenStatus_t miopenCreateWithStream(miopenHandle_t* handle,
183  miopenAcceleratorQueue_t stream);
184 
191 MIOPEN_EXPORT miopenStatus_t miopenDestroy(miopenHandle_t handle);
192 
200 MIOPEN_EXPORT miopenStatus_t miopenSetStream(miopenHandle_t handle,
201  miopenAcceleratorQueue_t streamID);
202 
210 MIOPEN_EXPORT miopenStatus_t miopenGetStream(miopenHandle_t handle,
211  miopenAcceleratorQueue_t* streamID);
212 
229 MIOPEN_EXPORT miopenStatus_t miopenSetAllocator(miopenHandle_t handle,
230  miopenAllocatorFunction allocator,
231  miopenDeallocatorFunction deallocator,
232  void* allocatorContext);
233 
245 MIOPEN_EXPORT miopenStatus_t miopenGetKernelTime(miopenHandle_t handle, float* time);
246 
254 MIOPEN_EXPORT miopenStatus_t miopenEnableProfiling(miopenHandle_t handle, bool enable);
256 // CLOSEOUT HANDLE DOXYGEN GROUP
257 
265 MIOPEN_DECLARE_OBJECT(miopenFusionOpDescriptor);
266 
274 MIOPEN_DECLARE_OBJECT(miopenTensorDescriptor);
275 
282 MIOPEN_DECLARE_OBJECT(miopenSeqTensorDescriptor);
283 
291 MIOPEN_DECLARE_OBJECT(miopenConvolutionDescriptor);
292 
300 MIOPEN_DECLARE_OBJECT(miopenPoolingDescriptor);
301 
309 MIOPEN_DECLARE_OBJECT(miopenLRNDescriptor);
310 
317 MIOPEN_DECLARE_OBJECT(miopenActivationDescriptor);
318 
322 MIOPEN_DECLARE_OBJECT(miopenRNNDescriptor);
323 
327 MIOPEN_DECLARE_OBJECT(miopenCTCLossDescriptor);
328 
332 MIOPEN_DECLARE_OBJECT(miopenDropoutDescriptor);
333 
337 MIOPEN_DECLARE_OBJECT(miopenReduceTensorDescriptor);
338 
342 MIOPEN_DECLARE_OBJECT(miopenMhaDescriptor);
343 
347 MIOPEN_DECLARE_OBJECT(miopenSoftmaxDescriptor);
348 
353 typedef enum
354 {
359  // miopenInt8x4 = 4, /*!< Pack of 4x Int8 in NCHW_VECT_C format (Support discontinued) */
360  miopenBFloat16 = 5,
367 
373 typedef enum
374 {
385 
390 typedef enum
391 {
397 
402 typedef enum
403 {
409 
414 typedef enum
415 {
421 
426 typedef enum
427 {
432 
437 typedef enum
438 {
443 
450 typedef enum
451 {
455 
460 typedef enum
461 {
465 #ifdef MIOPEN_BETA_API
470 typedef enum
471 {
474  1,
476  2,
480  4,
484 #endif
489 typedef enum
490 {
494 
499 typedef enum
500 {
509  7,
511  8,
513  9,
517 
522 typedef enum
523 {
528 
533 typedef enum
534 {
537  1,
539 
547 #define MIOPEN_API_VERSION_REDUCE_TENSOR 1
548 
553 typedef enum
554 {
557  1,
559  2,
561  3,
563  4,
565  5,
567  6,
570  // MIOPEN_REDUCE_TENSOR_MUL_NO_ZEROS =
571  // 8, /*!< the operation is same as MUL, but does not have the zero values considered */
573 
578 typedef enum
579 {
583 
588 typedef enum
589 {
593 
598 typedef enum
599 {
605 
610 typedef enum
611 {
613  0,
617  1,
619 #ifdef MIOPEN_BETA_API
621  2,
625 #else
626 // miopenReserved1 = 2,
627 #endif
628  // TODO:(LYM) temporarily use Pedantic as default until TF32 is fully supported
630  3,
632 
639 typedef enum
640 {
642  1,
644  2,
648  3,
651  // miopenConvolutionFindModeReserved_4 = 4, /*!< Reserved - do not use */
653  5,
662 
674 MIOPEN_EXPORT miopenStatus_t miopenCreateTensorDescriptor(miopenTensorDescriptor_t* tensorDesc);
675 
689  miopenTensorDescriptor_t tensorDesc, miopenDataType_t dataType, int n, int c, int h, int w);
690 
701 MIOPEN_EXPORT miopenStatus_t
702 miopenSetNdTensorDescriptorWithLayout(miopenTensorDescriptor_t tensorDesc,
703  miopenDataType_t dataType,
704  miopenTensorLayout_t tensorLayout,
705  const int* lens,
706  int num_lens);
726 MIOPEN_EXPORT miopenStatus_t miopenSet4dTensorDescriptorEx(miopenTensorDescriptor_t tensorDesc,
727  miopenDataType_t dataType,
728  int n,
729  int c,
730  int h,
731  int w,
732  int nStride,
733  int cStride,
734  int hStride,
735  int wStride);
736 
753 MIOPEN_EXPORT miopenStatus_t miopenGet4dTensorDescriptor(miopenTensorDescriptor_t tensorDesc,
754  miopenDataType_t* dataType,
755  int* n,
756  int* c,
757  int* h,
758  int* w,
759  int* nStride,
760  int* cStride,
761  int* hStride,
762  int* wStride);
763 
774 MIOPEN_EXPORT miopenStatus_t miopenSetTensorDescriptor(miopenTensorDescriptor_t tensorDesc,
775  miopenDataType_t dataType,
776  int nbDims,
777  const int* dimsA,
778  const int* stridesA);
779 
780 #ifdef MIOPEN_BETA_API
783 MIOPEN_EXPORT miopenStatus_t miopenSetTensorDescriptorV2(miopenTensorDescriptor_t tensorDesc,
784  miopenDataType_t dataType,
785  int nbDims,
786  const size_t* dimsA,
787  const size_t* stridesA);
788 #endif
789 
790 #ifdef MIOPEN_BETA_API
800 MIOPEN_EXPORT miopenStatus_t miopenSetTensorCastType(miopenTensorDescriptor_t tensorDesc,
801  miopenDataType_t cast_type);
802 #endif
803 
812 MIOPEN_EXPORT miopenStatus_t miopenGetTensorDescriptorSize(miopenTensorDescriptor_t tensorDesc,
813  int* size);
814 
823 MIOPEN_EXPORT miopenStatus_t miopenGetTensorDescriptor(miopenTensorDescriptor_t tensorDesc,
824  miopenDataType_t* dataType,
825  int* dimsA,
826  int* stridesA);
827 
833 MIOPEN_EXPORT miopenStatus_t miopenDestroyTensorDescriptor(miopenTensorDescriptor_t tensorDesc);
834 
841 MIOPEN_EXPORT miopenStatus_t
842 miopenCreateSeqTensorDescriptor(miopenSeqTensorDescriptor_t* tensorDesc);
843 
849 MIOPEN_EXPORT miopenStatus_t
850 miopenDestroySeqTensorDescriptor(miopenSeqTensorDescriptor_t tensorDesc);
851 
871 MIOPEN_EXPORT miopenStatus_t miopenOpTensor(miopenHandle_t handle,
872  miopenTensorOp_t tensorOp,
873  const void* alpha1,
874  const miopenTensorDescriptor_t aDesc,
875  const void* A,
876  const void* alpha2,
877  const miopenTensorDescriptor_t bDesc,
878  const void* B,
879  const void* beta,
880  const miopenTensorDescriptor_t cDesc,
881  void* C);
882 
893 MIOPEN_EXPORT miopenStatus_t miopenSetTensor(miopenHandle_t handle,
894  const miopenTensorDescriptor_t yDesc,
895  void* y,
896  const void* alpha);
897 
908 MIOPEN_EXPORT miopenStatus_t miopenScaleTensor(miopenHandle_t handle,
909  const miopenTensorDescriptor_t yDesc,
910  void* y,
911  const void* alpha);
912 
919 MIOPEN_EXPORT miopenStatus_t miopenGetTensorNumBytes(miopenTensorDescriptor_t tensorDesc,
920  size_t* numBytes);
921 
939 MIOPEN_EXPORT miopenStatus_t miopenTransformTensor(miopenHandle_t handle,
940  const void* alpha,
941  const miopenTensorDescriptor_t xDesc,
942  const void* x,
943  const void* beta,
944  const miopenTensorDescriptor_t yDesc,
945  void* y);
946 
948 // CLOSEOUT TENSOR DOXYGEN GROUP
949 
960 MIOPEN_EXPORT miopenStatus_t
961 miopenCreateConvolutionDescriptor(miopenConvolutionDescriptor_t* convDesc);
962 
978 MIOPEN_EXPORT miopenStatus_t miopenInitConvolutionDescriptor(miopenConvolutionDescriptor_t convDesc,
980  int pad_h,
981  int pad_w,
982  int stride_h,
983  int stride_w,
984  int dilation_h,
985  int dilation_w);
986 
997 MIOPEN_EXPORT miopenStatus_t
998 miopenInitConvolutionNdDescriptor(miopenConvolutionDescriptor_t convDesc,
999  int spatialDim,
1000  const int* padA,
1001  const int* strideA,
1002  const int* dilationA,
1003  miopenConvolutionMode_t c_mode);
1004 
1011 MIOPEN_EXPORT miopenStatus_t miopenGetConvolutionSpatialDim(miopenConvolutionDescriptor_t convDesc,
1012  int* spatialDim);
1013 
1029 MIOPEN_EXPORT miopenStatus_t miopenGetConvolutionDescriptor(miopenConvolutionDescriptor_t convDesc,
1030  miopenConvolutionMode_t* c_mode,
1031  int* pad_h,
1032  int* pad_w,
1033  int* stride_h,
1034  int* stride_w,
1035  int* dilation_h,
1036  int* dilation_w);
1037 
1049 MIOPEN_EXPORT miopenStatus_t
1050 miopenGetConvolutionNdDescriptor(miopenConvolutionDescriptor_t convDesc,
1051  int requestedSpatialDim,
1052  int* spatialDim,
1053  int* padA,
1054  int* strideA,
1055  int* dilationA,
1056  miopenConvolutionMode_t* c_mode);
1057 
1064 MIOPEN_EXPORT miopenStatus_t miopenGetConvolutionGroupCount(miopenConvolutionDescriptor_t convDesc,
1065  int* groupCount);
1066 
1080 MIOPEN_EXPORT miopenStatus_t miopenSetConvolutionGroupCount(miopenConvolutionDescriptor_t convDesc,
1081  int groupCount);
1082 
1095 MIOPEN_EXPORT miopenStatus_t
1096 miopenSetTransposeConvOutputPadding(miopenConvolutionDescriptor_t convDesc, int adj_h, int adj_w);
1097 
1111  miopenConvolutionDescriptor_t convDesc, int spatialDim, const int* adjA);
1112 
1130 MIOPEN_EXPORT miopenStatus_t
1131 miopenGetConvolutionForwardOutputDim(miopenConvolutionDescriptor_t convDesc,
1132  const miopenTensorDescriptor_t inputTensorDesc,
1133  const miopenTensorDescriptor_t filterDesc,
1134  int* n,
1135  int* c,
1136  int* h,
1137  int* w);
1138 
1152 MIOPEN_EXPORT miopenStatus_t
1153 miopenGetConvolutionNdForwardOutputDim(miopenConvolutionDescriptor_t convDesc,
1154  const miopenTensorDescriptor_t inputTensorDesc,
1155  const miopenTensorDescriptor_t filterDesc,
1156  int* nDim,
1157  int* outputTensorDimA);
1158 
1164 MIOPEN_EXPORT miopenStatus_t
1165 miopenDestroyConvolutionDescriptor(miopenConvolutionDescriptor_t convDesc);
1166 
1173 MIOPEN_EXPORT miopenStatus_t miopenSetConvolutionAttribute(miopenConvolutionDescriptor_t convDesc,
1174  const miopenConvolutionAttrib_t attr,
1175  int value);
1176 
1183 MIOPEN_EXPORT miopenStatus_t miopenGetConvolutionAttribute(miopenConvolutionDescriptor_t convDesc,
1184  const miopenConvolutionAttrib_t attr,
1185  int* value);
1186 
1200 MIOPEN_EXPORT miopenStatus_t miopenSetConvolutionFindMode(miopenConvolutionDescriptor_t convDesc,
1201  miopenConvolutionFindMode_t findMode);
1202 
1210  const miopenConvolutionDescriptor_t convDesc, miopenConvolutionFindMode_t* findMode);
1211 
1216 typedef enum
1217 {
1224 
1228 typedef enum
1229 {
1235 
1239 typedef enum
1240 {
1246  4,
1249 
1253 typedef enum
1254 {
1261 
1268 typedef struct
1269 {
1270  union
1271  {
1277  };
1278 
1279  float time;
1280  size_t memory;
1283 
1292 typedef struct
1293 {
1294  float time;
1299  uint64_t solution_id;
1303 
1319 MIOPEN_EXPORT miopenStatus_t
1321  const miopenTensorDescriptor_t wDesc,
1322  const miopenTensorDescriptor_t xDesc,
1323  const miopenConvolutionDescriptor_t convDesc,
1324  const miopenTensorDescriptor_t yDesc,
1325  size_t* solutionCount);
1326 
1350 MIOPEN_EXPORT miopenStatus_t
1352  const miopenTensorDescriptor_t wDesc,
1353  const miopenTensorDescriptor_t xDesc,
1354  const miopenConvolutionDescriptor_t convDesc,
1355  const miopenTensorDescriptor_t yDesc,
1356  const size_t maxSolutionCount,
1357  size_t* solutionCount,
1358  miopenConvSolution_t* solutions);
1359 
1377 MIOPEN_EXPORT miopenStatus_t
1379  const miopenTensorDescriptor_t wDesc,
1380  const miopenTensorDescriptor_t xDesc,
1381  const miopenConvolutionDescriptor_t convDesc,
1382  const miopenTensorDescriptor_t yDesc,
1383  const uint64_t solution_id,
1384  size_t* workSpaceSize);
1385 
1403 MIOPEN_EXPORT miopenStatus_t
1405  const miopenTensorDescriptor_t wDesc,
1406  const miopenTensorDescriptor_t xDesc,
1407  const miopenConvolutionDescriptor_t convDesc,
1408  const miopenTensorDescriptor_t yDesc,
1409  const uint64_t solution_id);
1410 
1428 MIOPEN_EXPORT miopenStatus_t
1430  const miopenTensorDescriptor_t wDesc,
1431  const void* w,
1432  const miopenTensorDescriptor_t xDesc,
1433  const void* x,
1434  const miopenConvolutionDescriptor_t convDesc,
1435  const miopenTensorDescriptor_t yDesc,
1436  void* y,
1437  void* workSpace,
1438  size_t workSpaceSize,
1439  const uint64_t solution_id);
1440 
1458 MIOPEN_EXPORT miopenStatus_t
1460  const miopenTensorDescriptor_t dyDesc,
1461  const miopenTensorDescriptor_t wDesc,
1462  const miopenConvolutionDescriptor_t convDesc,
1463  const miopenTensorDescriptor_t dxDesc,
1464  size_t* solutionCount);
1465 
1490 MIOPEN_EXPORT miopenStatus_t
1492  const miopenTensorDescriptor_t dyDesc,
1493  const miopenTensorDescriptor_t wDesc,
1494  const miopenConvolutionDescriptor_t convDesc,
1495  const miopenTensorDescriptor_t dxDesc,
1496  const size_t maxSolutionCount,
1497  size_t* solutionCount,
1498  miopenConvSolution_t* solutions);
1499 
1517 MIOPEN_EXPORT miopenStatus_t
1519  const miopenTensorDescriptor_t dyDesc,
1520  const miopenTensorDescriptor_t wDesc,
1521  const miopenConvolutionDescriptor_t convDesc,
1522  const miopenTensorDescriptor_t dxDesc,
1523  const uint64_t solution_id,
1524  size_t* workSpaceSize);
1525 
1544 MIOPEN_EXPORT miopenStatus_t
1546  const miopenTensorDescriptor_t dyDesc,
1547  const miopenTensorDescriptor_t wDesc,
1548  const miopenConvolutionDescriptor_t convDesc,
1549  const miopenTensorDescriptor_t dxDesc,
1550  const uint64_t solution_id);
1551 
1569 MIOPEN_EXPORT miopenStatus_t
1571  const miopenTensorDescriptor_t dyDesc,
1572  const void* dy,
1573  const miopenTensorDescriptor_t wDesc,
1574  const void* w,
1575  const miopenConvolutionDescriptor_t convDesc,
1576  const miopenTensorDescriptor_t dxDesc,
1577  void* dx,
1578  void* workSpace,
1579  size_t workSpaceSize,
1580  const uint64_t solution_id);
1581 
1599 MIOPEN_EXPORT miopenStatus_t
1601  const miopenTensorDescriptor_t dyDesc,
1602  const miopenTensorDescriptor_t xDesc,
1603  const miopenConvolutionDescriptor_t convDesc,
1604  const miopenTensorDescriptor_t dwDesc,
1605  size_t* solutionCount);
1606 
1630 MIOPEN_EXPORT miopenStatus_t
1632  const miopenTensorDescriptor_t dyDesc,
1633  const miopenTensorDescriptor_t xDesc,
1634  const miopenConvolutionDescriptor_t convDesc,
1635  const miopenTensorDescriptor_t dwDesc,
1636  const size_t maxSolutionCount,
1637  size_t* solutionCount,
1638  miopenConvSolution_t* solutions);
1639 
1658  miopenHandle_t handle,
1659  const miopenTensorDescriptor_t dyDesc,
1660  const miopenTensorDescriptor_t xDesc,
1661  const miopenConvolutionDescriptor_t convDesc,
1662  const miopenTensorDescriptor_t dwDesc,
1663  const uint64_t solution_id,
1664  size_t* workSpaceSize);
1665 
1683 MIOPEN_EXPORT miopenStatus_t
1685  const miopenTensorDescriptor_t dyDesc,
1686  const miopenTensorDescriptor_t xDesc,
1687  const miopenConvolutionDescriptor_t convDesc,
1688  const miopenTensorDescriptor_t dwDesc,
1689  const uint64_t solution_id);
1690 
1709 MIOPEN_EXPORT miopenStatus_t
1711  const miopenTensorDescriptor_t dyDesc,
1712  const void* dy,
1713  const miopenTensorDescriptor_t xDesc,
1714  const void* x,
1715  const miopenConvolutionDescriptor_t convDesc,
1716  const miopenTensorDescriptor_t dwDesc,
1717  void* dw,
1718  void* workSpace,
1719  size_t workSpaceSize,
1720  const uint64_t solution_id);
1721 
1748 MIOPEN_EXPORT miopenStatus_t
1750  const miopenTensorDescriptor_t wDesc,
1751  const miopenTensorDescriptor_t xDesc,
1752  const miopenConvolutionDescriptor_t convDesc,
1753  const miopenTensorDescriptor_t yDesc,
1754  size_t* workSpaceSize);
1755 
1799 MIOPEN_EXPORT miopenStatus_t
1801  const miopenTensorDescriptor_t xDesc,
1802  const void* x,
1803  const miopenTensorDescriptor_t wDesc,
1804  const void* w,
1805  const miopenConvolutionDescriptor_t convDesc,
1806  const miopenTensorDescriptor_t yDesc,
1807  void* y,
1808  const int requestAlgoCount,
1809  int* returnedAlgoCount,
1810  miopenConvAlgoPerf_t* perfResults,
1811  void* workSpace,
1812  size_t workSpaceSize,
1813  bool exhaustiveSearch);
1814 
1849 MIOPEN_EXPORT miopenStatus_t miopenConvolutionForward(miopenHandle_t handle,
1850  const void* alpha,
1851  const miopenTensorDescriptor_t xDesc,
1852  const void* x,
1853  const miopenTensorDescriptor_t wDesc,
1854  const void* w,
1855  const miopenConvolutionDescriptor_t convDesc,
1857  const void* beta,
1858  const miopenTensorDescriptor_t yDesc,
1859  void* y,
1860  void* workSpace,
1861  size_t workSpaceSize);
1862 
1878 MIOPEN_EXPORT miopenStatus_t miopenConvolutionForwardBias(miopenHandle_t handle,
1879  const void* alpha,
1880  const miopenTensorDescriptor_t bDesc,
1881  const void* b,
1882  const void* beta,
1883  const miopenTensorDescriptor_t yDesc,
1884  void* y);
1885 
1913 MIOPEN_EXPORT miopenStatus_t
1915  const miopenTensorDescriptor_t dyDesc,
1916  const miopenTensorDescriptor_t wDesc,
1917  const miopenConvolutionDescriptor_t convDesc,
1918  const miopenTensorDescriptor_t dxDesc,
1919  size_t* workSpaceSize);
1920 
1964 MIOPEN_EXPORT miopenStatus_t
1966  const miopenTensorDescriptor_t dyDesc,
1967  const void* dy,
1968  const miopenTensorDescriptor_t wDesc,
1969  const void* w,
1970  const miopenConvolutionDescriptor_t convDesc,
1971  const miopenTensorDescriptor_t dxDesc,
1972  void* dx,
1973  const int requestAlgoCount,
1974  int* returnedAlgoCount,
1975  miopenConvAlgoPerf_t* perfResults,
1976  void* workSpace,
1977  size_t workSpaceSize,
1978  bool exhaustiveSearch);
1979 
2013 MIOPEN_EXPORT miopenStatus_t
2014 miopenConvolutionBackwardData(miopenHandle_t handle,
2015  const void* alpha,
2016  const miopenTensorDescriptor_t dyDesc,
2017  const void* dy,
2018  const miopenTensorDescriptor_t wDesc,
2019  const void* w,
2020  const miopenConvolutionDescriptor_t convDesc,
2022  const void* beta,
2023  const miopenTensorDescriptor_t dxDesc,
2024  void* dx,
2025  void* workSpace,
2026  size_t workSpaceSize);
2027 
2055 MIOPEN_EXPORT miopenStatus_t
2057  const miopenTensorDescriptor_t dyDesc,
2058  const miopenTensorDescriptor_t xDesc,
2059  const miopenConvolutionDescriptor_t convDesc,
2060  const miopenTensorDescriptor_t dwDesc,
2061  size_t* workSpaceSize);
2062 
2106 MIOPEN_EXPORT miopenStatus_t
2108  const miopenTensorDescriptor_t dyDesc,
2109  const void* dy,
2110  const miopenTensorDescriptor_t xDesc,
2111  const void* x,
2112  const miopenConvolutionDescriptor_t convDesc,
2113  const miopenTensorDescriptor_t dwDesc,
2114  void* dw,
2115  const int requestAlgoCount,
2116  int* returnedAlgoCount,
2117  miopenConvAlgoPerf_t* perfResults,
2118  void* workSpace,
2119  size_t workSpaceSize,
2120  bool exhaustiveSearch);
2121 
2155 MIOPEN_EXPORT miopenStatus_t
2156 miopenConvolutionBackwardWeights(miopenHandle_t handle,
2157  const void* alpha,
2158  const miopenTensorDescriptor_t dyDesc,
2159  const void* dy,
2160  const miopenTensorDescriptor_t xDesc,
2161  const void* x,
2162  const miopenConvolutionDescriptor_t convDesc,
2164  const void* beta,
2165  const miopenTensorDescriptor_t dwDesc,
2166  void* dw,
2167  void* workSpace,
2168  size_t workSpaceSize);
2169 
2189 MIOPEN_EXPORT miopenStatus_t miopenConvolutionBackwardBias(miopenHandle_t handle,
2190  const void* alpha,
2191  const miopenTensorDescriptor_t dyDesc,
2192  const void* dy,
2193  const void* beta,
2194  const miopenTensorDescriptor_t dbDesc,
2195  void* db);
2196 
2198 // CLOSEOUT CONVOLUTIONS DOXYGEN GROUP
2199 
2200 // Pooling APIs
2211 MIOPEN_EXPORT miopenStatus_t miopenCreatePoolingDescriptor(miopenPoolingDescriptor_t* poolDesc);
2212 
2221 MIOPEN_EXPORT miopenStatus_t miopenSetPoolingIndexType(miopenPoolingDescriptor_t poolDesc,
2222  miopenIndexType_t index_type);
2223 
2231 MIOPEN_EXPORT miopenStatus_t miopenGetPoolingIndexType(miopenPoolingDescriptor_t poolDesc,
2232  miopenIndexType_t* index_type);
2233 
2242  miopenPoolingDescriptor_t poolDesc, miopenPoolingWorkspaceIndexMode_t workspace_index);
2243 
2251  miopenPoolingDescriptor_t poolDesc, miopenPoolingWorkspaceIndexMode_t* workspace_index);
2252 
2267 MIOPEN_EXPORT miopenStatus_t miopenSet2dPoolingDescriptor(miopenPoolingDescriptor_t poolDesc,
2268  miopenPoolingMode_t mode,
2269  int windowHeight,
2270  int windowWidth,
2271  int pad_h,
2272  int pad_w,
2273  int stride_h,
2274  int stride_w);
2275 
2290 MIOPEN_EXPORT miopenStatus_t miopenGet2dPoolingDescriptor(const miopenPoolingDescriptor_t poolDesc,
2291  miopenPoolingMode_t* mode,
2292  int* windowHeight,
2293  int* windowWidth,
2294  int* pad_h,
2295  int* pad_w,
2296  int* stride_h,
2297  int* stride_w);
2298 
2313 MIOPEN_EXPORT miopenStatus_t
2314 miopenGetPoolingForwardOutputDim(const miopenPoolingDescriptor_t poolDesc,
2315  const miopenTensorDescriptor_t tensorDesc,
2316  int* n,
2317  int* c,
2318  int* h,
2319  int* w);
2320 
2336 MIOPEN_EXPORT miopenStatus_t miopenSetNdPoolingDescriptor(miopenPoolingDescriptor_t poolDesc,
2337  const miopenPoolingMode_t mode,
2338  int nbDims,
2339  const int* windowDimA,
2340  const int* padA,
2341  const int* stridesA);
2342 
2359 MIOPEN_EXPORT miopenStatus_t miopenGetNdPoolingDescriptor(const miopenPoolingDescriptor_t poolDesc,
2360  int nbDimsRequested,
2361  miopenPoolingMode_t* mode,
2362  int* nbDims,
2363  int* windowDimA,
2364  int* padA,
2365  int* stridesA);
2366 
2379 MIOPEN_EXPORT miopenStatus_t
2380 miopenGetPoolingNdForwardOutputDim(const miopenPoolingDescriptor_t poolDesc,
2381  const miopenTensorDescriptor_t tensorDesc,
2382  int dims,
2383  int* tensorDimArr);
2384 
2397 MIOPEN_EXPORT miopenStatus_t miopenPoolingGetWorkSpaceSize(const miopenTensorDescriptor_t yDesc,
2398  size_t* workSpaceSize);
2399 
2412 MIOPEN_EXPORT miopenStatus_t
2413 miopenPoolingGetWorkSpaceSizeV2(const miopenPoolingDescriptor_t poolDesc,
2414  const miopenTensorDescriptor_t yDesc,
2415  size_t* workSpaceSize);
2416 
2437 MIOPEN_EXPORT miopenStatus_t miopenPoolingForward(miopenHandle_t handle,
2438  const miopenPoolingDescriptor_t poolDesc,
2439  const void* alpha,
2440  const miopenTensorDescriptor_t xDesc,
2441  const void* x,
2442  const void* beta,
2443  const miopenTensorDescriptor_t yDesc,
2444  void* y,
2445  bool do_backward,
2446  void* workSpace,
2447  size_t workSpaceSize);
2448 
2469 MIOPEN_EXPORT miopenStatus_t miopenPoolingBackward(miopenHandle_t handle,
2470  const miopenPoolingDescriptor_t poolDesc,
2471  const void* alpha,
2472  const miopenTensorDescriptor_t yDesc,
2473  const void* y,
2474  const miopenTensorDescriptor_t dyDesc,
2475  const void* dy,
2476  const miopenTensorDescriptor_t xDesc,
2477  const void* x,
2478  const void* beta,
2479  const miopenTensorDescriptor_t dxDesc,
2480  void* dx,
2481  void* workSpace);
2482 
2488 MIOPEN_EXPORT miopenStatus_t miopenDestroyPoolingDescriptor(miopenPoolingDescriptor_t poolDesc);
2489 
2491 // CLOSEOUT POOLING DOXYGEN GROUP
2492 
2493 // LRN APIs
2503 MIOPEN_EXPORT miopenStatus_t miopenCreateLRNDescriptor(miopenLRNDescriptor_t* lrnDesc);
2504 
2518 MIOPEN_EXPORT miopenStatus_t miopenSetLRNDescriptor(const miopenLRNDescriptor_t lrnDesc,
2519  miopenLRNMode_t mode,
2520  unsigned int lrnN,
2521  double lrnAlpha,
2522  double lrnBeta,
2523  double lrnK);
2524 
2537 MIOPEN_EXPORT miopenStatus_t miopenGetLRNDescriptor(const miopenLRNDescriptor_t lrnDesc,
2538  miopenLRNMode_t* mode,
2539  unsigned int* lrnN,
2540  double* lrnAlpha,
2541  double* lrnBeta,
2542  double* lrnK);
2543 
2553 MIOPEN_EXPORT miopenStatus_t miopenLRNGetWorkSpaceSize(const miopenTensorDescriptor_t yDesc,
2554  size_t* workSpaceSize);
2555 
2574 MIOPEN_EXPORT miopenStatus_t miopenLRNForward(miopenHandle_t handle,
2575  const miopenLRNDescriptor_t lrnDesc,
2576  const void* alpha,
2577  const miopenTensorDescriptor_t xDesc,
2578  const void* x,
2579  const void* beta,
2580  const miopenTensorDescriptor_t yDesc,
2581  void* y,
2582  bool do_backward,
2583  void* workSpace);
2584 
2602 MIOPEN_EXPORT miopenStatus_t miopenLRNBackward(miopenHandle_t handle,
2603  const miopenLRNDescriptor_t lrnDesc,
2604  const void* alpha,
2605  const miopenTensorDescriptor_t yDesc,
2606  const void* y,
2607  const miopenTensorDescriptor_t dyDesc,
2608  const void* dy,
2609  const miopenTensorDescriptor_t xDesc,
2610  const void* x,
2611  const void* beta,
2612  const miopenTensorDescriptor_t dxDesc,
2613  void* dx,
2614  const void* workSpace);
2615 
2621 MIOPEN_EXPORT miopenStatus_t miopenDestroyLRNDescriptor(miopenLRNDescriptor_t lrnDesc);
2622 
2624 // CLOSEOUT LRN DOXYGEN GROUP
2625 
2626 #ifdef MIOPEN_BETA_API
2627 // LayerNorm APIs
2652 MIOPEN_EXPORT miopenStatus_t miopenLayerNormForward(miopenHandle_t handle,
2653  miopenNormMode_t mode,
2654  const miopenTensorDescriptor_t xDesc,
2655  const void* x,
2656  const miopenTensorDescriptor_t weightDesc,
2657  const void* weight,
2658  const miopenTensorDescriptor_t biasDesc,
2659  const void* bias,
2660  const float epsilon,
2661  const int32_t normalized_dim,
2662  const miopenTensorDescriptor_t yDesc,
2663  void* y,
2664  const miopenTensorDescriptor_t meanDesc,
2665  void* mean,
2666  const miopenTensorDescriptor_t rstdDesc,
2667  void* rstd);
2668 
2686 MIOPEN_EXPORT miopenStatus_t
2688  miopenNormMode_t mode,
2689  const miopenTensorDescriptor_t dyDesc,
2690  const miopenTensorDescriptor_t xDesc,
2691  const miopenTensorDescriptor_t weightDesc,
2692  const miopenTensorDescriptor_t meanDesc,
2693  const miopenTensorDescriptor_t rstdDesc,
2694  const int32_t normalized_dim,
2695  const miopenTensorDescriptor_t dxDesc,
2696  const miopenTensorDescriptor_t dwDesc,
2697  const miopenTensorDescriptor_t dbDesc,
2698  size_t* sizeInBytes);
2699 
2725 MIOPEN_EXPORT miopenStatus_t miopenLayerNormBackward(miopenHandle_t handle,
2726  miopenNormMode_t mode,
2727  void* workspace,
2728  size_t workspaceSizeInBytes,
2729  const miopenTensorDescriptor_t dyDesc,
2730  const void* dy,
2731  const miopenTensorDescriptor_t xDesc,
2732  const void* x,
2733  const miopenTensorDescriptor_t weightDesc,
2734  const void* weight,
2735  const miopenTensorDescriptor_t meanDesc,
2736  const void* mean,
2737  const miopenTensorDescriptor_t rstdDesc,
2738  const void* rstd,
2739  const int32_t normalized_dim,
2740  const miopenTensorDescriptor_t dxDesc,
2741  void* dx,
2742  const miopenTensorDescriptor_t dwDesc,
2743  void* dw,
2744  const miopenTensorDescriptor_t dbDesc,
2745  void* db);
2746 
2748 // CLOSEOUT LAYERNORM DOXYGEN GROUP
2749 #endif
2750 
2751 #ifdef MIOPEN_BETA_API
2752 // Cat APIs
2768 MIOPEN_EXPORT miopenStatus_t miopenCatForward(miopenHandle_t handle,
2769  const int32_t xCount,
2770  const miopenTensorDescriptor_t* xDescs,
2771  const void* const* xs,
2772  const miopenTensorDescriptor_t yDesc,
2773  void* y,
2774  const int32_t dim);
2775 
2777 // CLOSEOUT CAT DOXYGEN GROUP
2778 #endif
2779 
2780 // Batch-Normalization APIs
2802 MIOPEN_EXPORT miopenStatus_t miopenDeriveBNTensorDescriptor(miopenTensorDescriptor_t derivedBnDesc,
2803  const miopenTensorDescriptor_t xDesc,
2804  miopenBatchNormMode_t bn_mode);
2805 
2844 MIOPEN_EXPORT miopenStatus_t
2846  miopenBatchNormMode_t bn_mode,
2847  void* alpha,
2848  void* beta,
2849  const miopenTensorDescriptor_t xDesc,
2850  const void* x,
2851  const miopenTensorDescriptor_t yDesc,
2852  void* y,
2853  const miopenTensorDescriptor_t bnScaleBiasMeanVarDesc,
2854  void* bnScale,
2855  void* bnBias,
2856  double expAvgFactor,
2857  void* resultRunningMean,
2858  void* resultRunningVariance,
2859  double epsilon,
2860  void* resultSaveMean,
2861  void* resultSaveInvVariance);
2902 MIOPEN_EXPORT miopenStatus_t
2904  miopenBatchNormMode_t bn_mode,
2905  void* alpha,
2906  void* beta,
2907  const miopenTensorDescriptor_t xDesc,
2908  const void* x,
2909  const miopenTensorDescriptor_t yDesc,
2910  void* y,
2911  const miopenTensorDescriptor_t scaleDesc,
2912  const miopenTensorDescriptor_t biasVarDesc,
2913  const miopenTensorDescriptor_t savedMeanDesc,
2914  const miopenTensorDescriptor_t savedVarDesc,
2915  void* bnScale,
2916  void* bnBias,
2917  double expAvgFactor,
2918  void* resultRunningMean,
2919  void* resultRunningVariance,
2920  double epsilon,
2921  void* resultSaveMean,
2922  void* resultSaveInvVariance);
2965 MIOPEN_EXPORT miopenStatus_t
2967  miopenBatchNormMode_t bn_mode,
2968  void* alpha,
2969  void* beta,
2970  const miopenTensorDescriptor_t xDesc,
2971  const void* x,
2972  const miopenTensorDescriptor_t yDesc,
2973  void* y,
2974  const miopenTensorDescriptor_t scaleDesc,
2975  const miopenTensorDescriptor_t biasVarDesc,
2976  const miopenTensorDescriptor_t savedMeanDesc,
2977  const miopenTensorDescriptor_t savedVarDesc,
2978  void* bnScale,
2979  void* bnBias,
2980  double expAvgFactor,
2981  const void* prevResultRunningMean,
2982  const void* prevResultRunningVariance,
2983  void* nextResultRunningMean,
2984  void* nextResultRunningVariance,
2985  double epsilon,
2986  void* resultSaveMean,
2987  void* resultSaveInvVariance);
3029 MIOPEN_EXPORT miopenStatus_t
3031  miopenBatchNormMode_t bn_mode,
3032  void* alpha,
3033  void* beta,
3034  const miopenTensorDescriptor_t xDesc,
3035  const void* x,
3036  const miopenTensorDescriptor_t yDesc,
3037  void* y,
3038  const miopenTensorDescriptor_t scaleDesc,
3039  const miopenTensorDescriptor_t biasVarDesc,
3040  const miopenTensorDescriptor_t savedMeanDesc,
3041  const miopenTensorDescriptor_t savedVarDesc,
3042  void* bnScale,
3043  void* bnBias,
3044  double expAvgFactor,
3045  void* resultRunningMean,
3046  void* resultRunningVariance,
3047  double epsilon,
3048  void* resultSaveMean,
3049  void* resultSaveInvVariance,
3050  const miopenActivationDescriptor_t activDesc);
3051 
3095 MIOPEN_EXPORT miopenStatus_t
3097  miopenBatchNormMode_t bn_mode,
3098  void* alpha,
3099  void* beta,
3100  const miopenTensorDescriptor_t xDesc,
3101  const void* x,
3102  const miopenTensorDescriptor_t yDesc,
3103  void* y,
3104  const miopenTensorDescriptor_t scaleDesc,
3105  const miopenTensorDescriptor_t biasVarDesc,
3106  const miopenTensorDescriptor_t savedMeanDesc,
3107  const miopenTensorDescriptor_t savedVarDesc,
3108  void* bnScale,
3109  void* bnBias,
3110  double expAvgFactor,
3111  const void* prevResultRunningMean,
3112  const void* prevResultRunningVariance,
3113  void* nextResultRunningMean,
3114  void* nextResultRunningVariance,
3115  double epsilon,
3116  void* resultSaveMean,
3117  void* resultSaveInvVariance,
3118  const miopenActivationDescriptor_t activDesc);
3119 
3149 MIOPEN_EXPORT miopenStatus_t
3151  miopenBatchNormMode_t bn_mode,
3152  void* alpha,
3153  void* beta,
3154  const miopenTensorDescriptor_t xDesc,
3155  const void* x,
3156  const miopenTensorDescriptor_t yDesc,
3157  void* y,
3158  const miopenTensorDescriptor_t bnScaleBiasMeanVarDesc,
3159  void* bnScale,
3160  void* bnBias,
3161  void* estimatedMean,
3162  void* estimatedVariance,
3163  double epsilon);
3164 
3196 MIOPEN_EXPORT miopenStatus_t
3198  miopenBatchNormMode_t bn_mode,
3199  void* alpha,
3200  void* beta,
3201  const miopenTensorDescriptor_t xDesc,
3202  const void* x,
3203  const miopenTensorDescriptor_t yDesc,
3204  void* y,
3205  const miopenTensorDescriptor_t scaleDesc,
3206  const miopenTensorDescriptor_t biasDesc,
3207  const miopenTensorDescriptor_t estMeanDesc,
3208  const miopenTensorDescriptor_t estVarianceDesc,
3209  void* bnScale,
3210  void* bnBias,
3211  void* estimatedMean,
3212  void* estimatedVariance,
3213  double epsilon);
3214 
3244  miopenHandle_t handle,
3245  miopenBatchNormMode_t bn_mode,
3246  void* alpha,
3247  void* beta,
3248  const miopenTensorDescriptor_t xDesc,
3249  const void* x,
3250  const miopenTensorDescriptor_t yDesc,
3251  void* y,
3252  const miopenTensorDescriptor_t scaleDesc,
3253  const miopenTensorDescriptor_t biasDesc,
3254  const miopenTensorDescriptor_t estMeanDesc,
3255  const miopenTensorDescriptor_t estInvVarianceDesc,
3256  void* bnScale,
3257  void* bnBias,
3258  void* estimatedMean,
3259  void* estimatedInvVariance);
3260 
3292  miopenHandle_t handle,
3293  miopenBatchNormMode_t bn_mode,
3294  void* alpha,
3295  void* beta,
3296  const miopenTensorDescriptor_t xDesc,
3297  const void* x,
3298  const miopenTensorDescriptor_t yDesc,
3299  void* y,
3300  const miopenTensorDescriptor_t scaleDesc,
3301  const miopenTensorDescriptor_t biasDesc,
3302  const miopenTensorDescriptor_t estMeanDesc,
3303  const miopenTensorDescriptor_t estInvVarianceDesc,
3304  void* bnScale,
3305  void* bnBias,
3306  void* estimatedMean,
3307  void* estimatedInvVariance,
3308  const miopenActivationDescriptor_t activDesc);
3309 
3342 MIOPEN_EXPORT miopenStatus_t
3344  miopenBatchNormMode_t bn_mode,
3345  void* alpha,
3346  void* beta,
3347  const miopenTensorDescriptor_t xDesc,
3348  const void* x,
3349  const miopenTensorDescriptor_t yDesc,
3350  void* y,
3351  const miopenTensorDescriptor_t scaleDesc,
3352  const miopenTensorDescriptor_t biasDesc,
3353  const miopenTensorDescriptor_t estMeanDesc,
3354  const miopenTensorDescriptor_t estVarianceDesc,
3355  void* bnScale,
3356  void* bnBias,
3357  void* estimatedMean,
3358  void* estimatedVariance,
3359  double epsilon,
3360  const miopenActivationDescriptor_t activDesc);
3361 
3396 MIOPEN_EXPORT miopenStatus_t
3397 miopenBatchNormalizationBackward(miopenHandle_t handle,
3398  miopenBatchNormMode_t bn_mode,
3399  const void* alphaDataDiff,
3400  const void* betaDataDiff,
3401  const void* alphaParamDiff,
3402  const void* betaParamDiff,
3403  const miopenTensorDescriptor_t xDesc,
3404  const void* x,
3405  const miopenTensorDescriptor_t dyDesc,
3406  const void* dy,
3407  const miopenTensorDescriptor_t dxDesc,
3408  void* dx,
3409  const miopenTensorDescriptor_t bnScaleBiasDiffDesc,
3410  const void* bnScale,
3411  void* resultBnScaleDiff,
3412  void* resultBnBiasDiff,
3413  double epsilon,
3414  const void* savedMean,
3415  const void* savedInvVariance);
3416 
3455 MIOPEN_EXPORT miopenStatus_t
3457  miopenBatchNormMode_t bn_mode,
3458  const void* alphaDataDiff,
3459  const void* betaDataDiff,
3460  const void* alphaParamDiff,
3461  const void* betaParamDiff,
3462  const miopenTensorDescriptor_t xDesc,
3463  const void* x,
3464  const miopenTensorDescriptor_t dyDesc,
3465  const void* dy,
3466  const miopenTensorDescriptor_t dxDesc,
3467  void* dx,
3468  const miopenTensorDescriptor_t scaleDesc,
3469  const miopenTensorDescriptor_t biasDesc,
3470  const miopenTensorDescriptor_t savedMeanDesc,
3471  const miopenTensorDescriptor_t savedVarDesc,
3472  const void* bnScale,
3473  void* resultBnScaleDiff,
3474  void* resultBnBiasDiff,
3475  double epsilon,
3476  const void* savedMean,
3477  const void* savedInvVariance);
3478 
3519 MIOPEN_EXPORT miopenStatus_t
3521  miopenBatchNormMode_t bn_mode,
3522  const void* alphaDataDiff,
3523  const void* betaDataDiff,
3524  const void* alphaParamDiff,
3525  const void* betaParamDiff,
3526  const miopenTensorDescriptor_t xDesc,
3527  const void* x,
3528  const miopenTensorDescriptor_t dyDesc,
3529  const void* dy,
3530  const miopenTensorDescriptor_t dxDesc,
3531  void* dx,
3532  const miopenTensorDescriptor_t scaleDesc,
3533  const miopenTensorDescriptor_t biasDesc,
3534  const miopenTensorDescriptor_t savedMeanDesc,
3535  const miopenTensorDescriptor_t savedVarianceDesc,
3536  const void* bnScale,
3537  const void* bnBias,
3538  void* resultBnScaleDiff,
3539  void* resultBnBiasDiff,
3540  double epsilon,
3541  const void* savedMean,
3542  const void* savedInvVariance,
3543  const miopenActivationDescriptor_t activDesc);
3545 // CLOSEOUT BATCHNORM DOXYGEN GROUP
3546 
3547 // Activation APIs
3557 MIOPEN_EXPORT miopenStatus_t
3558 miopenCreateActivationDescriptor(miopenActivationDescriptor_t* activDesc);
3559 
3571 MIOPEN_EXPORT miopenStatus_t
3572 miopenSetActivationDescriptor(const miopenActivationDescriptor_t activDesc,
3574  double activAlpha,
3575  double activBeta,
3576  double activGamma);
3577 
3589 MIOPEN_EXPORT miopenStatus_t
3590 miopenGetActivationDescriptor(const miopenActivationDescriptor_t activDesc,
3591  miopenActivationMode_t* mode,
3592  double* activAlpha,
3593  double* activBeta,
3594  double* activGamma);
3595 
3608 MIOPEN_EXPORT miopenStatus_t miopenActivationForward(miopenHandle_t handle,
3609  const miopenActivationDescriptor_t activDesc,
3610  const void* alpha,
3611  const miopenTensorDescriptor_t xDesc,
3612  const void* x,
3613  const void* beta,
3614  const miopenTensorDescriptor_t yDesc,
3615  void* y);
3616 
3633 MIOPEN_EXPORT miopenStatus_t miopenActivationBackward(miopenHandle_t handle,
3634  const miopenActivationDescriptor_t activDesc,
3635  const void* alpha,
3636  const miopenTensorDescriptor_t yDesc,
3637  const void* y,
3638  const miopenTensorDescriptor_t dyDesc,
3639  const void* dy,
3640  const miopenTensorDescriptor_t xDesc,
3641  const void* x,
3642  const void* beta,
3643  const miopenTensorDescriptor_t dxDesc,
3644  void* dx);
3645 
3651 MIOPEN_EXPORT miopenStatus_t
3652 miopenDestroyActivationDescriptor(miopenActivationDescriptor_t activDesc);
3653 
3655 // CLOSEOUT ACTIVATION DOXYGEN GROUP
3656 
3657 #ifdef MIOPEN_BETA_API
3673 MIOPEN_EXPORT miopenStatus_t miopenGLUForward(miopenHandle_t handle,
3674  const miopenTensorDescriptor_t inputDesc,
3675  const void* input,
3676  const miopenTensorDescriptor_t outputDesc,
3677  void* output,
3678  const uint32_t dim);
3679 
3692 MIOPEN_EXPORT miopenStatus_t miopenGLUBackward(miopenHandle_t handle,
3693  const miopenTensorDescriptor_t inputDesc,
3694  const void* input,
3695  const miopenTensorDescriptor_t outputGradDesc,
3696  const void* outputGrad,
3697  const miopenTensorDescriptor_t inputGradDesc,
3698  void* inputGrad,
3699  const uint32_t dim);
3700 
3702 // CLOSEOUT ACTIVATION DOXYGEN GROUP
3703 #endif // MIOPEN_BETA_API
3704 
3705 // Softmax APIs
3723 MIOPEN_EXPORT miopenStatus_t miopenSoftmaxForward(miopenHandle_t handle,
3724  const void* alpha,
3725  const miopenTensorDescriptor_t xDesc,
3726  const void* x,
3727  const void* beta,
3728  const miopenTensorDescriptor_t yDesc,
3729  void* y);
3730 
3746 MIOPEN_EXPORT miopenStatus_t miopenSoftmaxBackward(miopenHandle_t handle,
3747  const void* alpha,
3748  const miopenTensorDescriptor_t yDesc,
3749  const void* y,
3750  const miopenTensorDescriptor_t dyDesc,
3751  const void* dy,
3752  const void* beta,
3753  const miopenTensorDescriptor_t dxDesc,
3754  void* dx);
3755 
3769 MIOPEN_EXPORT miopenStatus_t miopenSoftmaxForward_V2(miopenHandle_t handle,
3770  const void* alpha,
3771  const miopenTensorDescriptor_t xDesc,
3772  const void* x,
3773  const void* beta,
3774  const miopenTensorDescriptor_t yDesc,
3775  void* y,
3776  miopenSoftmaxAlgorithm_t algorithm,
3777  miopenSoftmaxMode_t mode);
3778 
3794 MIOPEN_EXPORT miopenStatus_t miopenSoftmaxBackward_V2(miopenHandle_t handle,
3795  const void* alpha,
3796  const miopenTensorDescriptor_t yDesc,
3797  const void* y,
3798  const miopenTensorDescriptor_t dyDesc,
3799  const void* dy,
3800  const void* beta,
3801  const miopenTensorDescriptor_t dxDesc,
3802  void* dx,
3803  miopenSoftmaxAlgorithm_t algorithm,
3804  miopenSoftmaxMode_t mode);
3805 
3807 // CLOSEOUT SOFTMAX DOXYGEN GROUP
3808 
3812 MIOPEN_DECLARE_OBJECT(miopenFusionPlanDescriptor);
3813 MIOPEN_DECLARE_OBJECT(miopenOperatorDescriptor);
3814 MIOPEN_DECLARE_OBJECT(miopenOperatorArgs);
3815 
3824 typedef enum
3825 {
3829 
3837 MIOPEN_EXPORT miopenStatus_t miopenCreateFusionPlan(miopenFusionPlanDescriptor_t* fusePlanDesc,
3838  const miopenFusionDirection_t fuseDirection,
3839  const miopenTensorDescriptor_t inputDesc);
3840 
3846 MIOPEN_EXPORT miopenStatus_t miopenDestroyFusionPlan(miopenFusionPlanDescriptor_t fusePlanDesc);
3847 
3854 MIOPEN_EXPORT miopenStatus_t miopenCompileFusionPlan(miopenHandle_t handle,
3855  miopenFusionPlanDescriptor_t fusePlanDesc);
3856 
3867 MIOPEN_EXPORT miopenStatus_t miopenFusionPlanGetOp(miopenFusionPlanDescriptor_t fusePlanDesc,
3868  const int op_idx,
3869  miopenFusionOpDescriptor_t* op);
3870 
3878 MIOPEN_EXPORT miopenStatus_t
3879 miopenFusionPlanGetWorkSpaceSize(miopenHandle_t handle,
3880  miopenFusionPlanDescriptor_t fusePlanDesc,
3881  size_t* workSpaceSize,
3883 
3901 MIOPEN_EXPORT miopenStatus_t
3902 miopenFusionPlanConvolutionGetAlgo(miopenFusionPlanDescriptor_t fusePlanDesc,
3903  const int requestAlgoCount,
3904  int* returnedAlgoCount,
3905  miopenConvFwdAlgorithm_t* returnedAlgos);
3906 
3917  miopenFusionPlanDescriptor_t fusePlanDesc, miopenConvFwdAlgorithm_t algo);
3918 
3927 MIOPEN_EXPORT miopenStatus_t miopenCreateOpConvForward(miopenFusionPlanDescriptor_t fusePlanDesc,
3928  miopenFusionOpDescriptor_t* convOp,
3929  miopenConvolutionDescriptor_t convDesc,
3930  const miopenTensorDescriptor_t wDesc);
3931 
3932 //---
3933 
3934 // Activation forward create ops ---
3942 MIOPEN_EXPORT miopenStatus_t
3943 miopenCreateOpActivationForward(miopenFusionPlanDescriptor_t fusePlanDesc,
3944  miopenFusionOpDescriptor_t* activFwdOp,
3945  miopenActivationMode_t mode);
3946 
3947 // Activation backward create ops ---
3955 MIOPEN_EXPORT miopenStatus_t
3956 miopenCreateOpActivationBackward(miopenFusionPlanDescriptor_t fusePlanDesc,
3957  miopenFusionOpDescriptor_t* activBwdOp,
3958  miopenActivationMode_t mode);
3959 
3960 // Bias create ops ---
3968 MIOPEN_EXPORT miopenStatus_t miopenCreateOpBiasForward(miopenFusionPlanDescriptor_t fusePlanDesc,
3969  miopenFusionOpDescriptor_t* biasOp,
3970  const miopenTensorDescriptor_t bDesc);
3971 
3972 // Batch normalization create ops ---
3981 MIOPEN_EXPORT miopenStatus_t
3982 miopenCreateOpBatchNormInference(miopenFusionPlanDescriptor_t fusePlanDesc,
3983  miopenFusionOpDescriptor_t* bnOp,
3984  const miopenBatchNormMode_t bn_mode,
3985  const miopenTensorDescriptor_t bnScaleBiasMeanVarDesc);
3986 
3996 MIOPEN_EXPORT miopenStatus_t
3997 miopenCreateOpBatchNormForward(miopenFusionPlanDescriptor_t fusePlanDesc,
3998  miopenFusionOpDescriptor_t* bnFwdOp,
3999  const miopenBatchNormMode_t bn_mode,
4000  bool runningMeanVariance);
4001 
4009 MIOPEN_EXPORT miopenStatus_t
4010 miopenCreateOpBatchNormBackward(miopenFusionPlanDescriptor_t fusePlanDesc,
4011  miopenFusionOpDescriptor_t* bnBwdOp,
4012  const miopenBatchNormMode_t bn_mode);
4013 
4014 //---
4020 MIOPEN_EXPORT miopenStatus_t miopenCreateOperatorArgs(miopenOperatorArgs_t* args);
4021 
4027 MIOPEN_EXPORT miopenStatus_t miopenDestroyOperatorArgs(miopenOperatorArgs_t args);
4028 
4029 // Convolution set arguments ---
4039 MIOPEN_EXPORT miopenStatus_t miopenSetOpArgsConvForward(miopenOperatorArgs_t args,
4040  const miopenFusionOpDescriptor_t convOp,
4041  const void* alpha,
4042  const void* beta,
4043  const void* w);
4044 // Activation set arguments ---
4056 MIOPEN_EXPORT miopenStatus_t
4057 miopenSetOpArgsActivForward(miopenOperatorArgs_t args,
4058  const miopenFusionOpDescriptor_t activFwdOp,
4059  const void* alpha,
4060  const void* beta,
4061  double activAlpha,
4062  double activBeta,
4063  double activGamma);
4064 
4078 MIOPEN_EXPORT miopenStatus_t
4079 miopenSetOpArgsActivBackward(miopenOperatorArgs_t args,
4080  const miopenFusionOpDescriptor_t activBwdOp,
4081  const void* alpha,
4082  const void* beta,
4083  const void* y,
4084  const void* reserved,
4085  double activAlpha,
4086  double activBeta,
4087  double activGamma);
4088 
4089 // Batch Normalization set arguments ---
4103 MIOPEN_EXPORT miopenStatus_t
4104 miopenSetOpArgsBatchNormInference(miopenOperatorArgs_t args,
4105  const miopenFusionOpDescriptor_t bnOp,
4106  const void* alpha,
4107  const void* beta,
4108  const void* bnScale,
4109  const void* bnBias,
4110  const void* estimatedMean,
4111  const void* estimatedVariance,
4112  double epsilon);
4113 
4130 MIOPEN_EXPORT miopenStatus_t miopenSetOpArgsBatchNormForward(miopenOperatorArgs_t args,
4131  const miopenFusionOpDescriptor_t bnOp,
4132  const void* alpha,
4133  const void* beta,
4134  const void* bnScale,
4135  const void* bnBias,
4136  void* savedMean,
4137  void* savedInvVariance,
4138  void* runningMean,
4139  void* runningVariance,
4140  double expAvgFactor,
4141  double epsilon);
4142 
4158 MIOPEN_EXPORT miopenStatus_t miopenSetOpArgsBatchNormBackward(miopenOperatorArgs_t args,
4159  const miopenFusionOpDescriptor_t bnOp,
4160  const void* alpha,
4161  const void* beta,
4162  const void* x,
4163  const void* bnScale,
4164  const void* bnBias,
4165  void* resultBnScaleDiff,
4166  void* resultBnBiasDiff,
4167  const void* savedMean,
4168  const void* savedInvVariance);
4169 
4170 // Bias forward set arguments ---
4180 MIOPEN_EXPORT miopenStatus_t miopenSetOpArgsBiasForward(miopenOperatorArgs_t args,
4181  const miopenFusionOpDescriptor_t biasOp,
4182  const void* alpha,
4183  const void* beta,
4184  const void* bias);
4185 
4200 MIOPEN_EXPORT miopenStatus_t
4201 miopenExecuteFusionPlan(const miopenHandle_t handle,
4202  const miopenFusionPlanDescriptor_t fusePlanDesc,
4203  const miopenTensorDescriptor_t inputDesc,
4204  const void* input,
4205  const miopenTensorDescriptor_t outputDesc,
4206  void* output,
4207  miopenOperatorArgs_t args);
4208 
4223 MIOPEN_EXPORT miopenStatus_t
4224 miopenExecuteFusionPlan_v2(const miopenHandle_t handle,
4225  const miopenFusionPlanDescriptor_t fusePlanDesc,
4226  const miopenTensorDescriptor_t inputDesc,
4227  const void* input,
4228  const miopenTensorDescriptor_t outputDesc,
4229  void* output,
4230  miopenOperatorArgs_t args,
4231  void* workspace,
4232  size_t workspaceSize);
4233 
4257 MIOPEN_EXPORT miopenStatus_t
4259  const void* alpha1,
4260  const miopenTensorDescriptor_t xDesc,
4261  const void* x,
4262  const miopenTensorDescriptor_t wDesc,
4263  const void* w,
4264  const miopenConvolutionDescriptor_t convDesc,
4266  void* workspace,
4267  size_t workspaceSizeInBytes,
4268  const void* alpha2,
4269  const miopenTensorDescriptor_t zDesc,
4270  const void* z,
4271  const miopenTensorDescriptor_t biasDesc,
4272  const void* bias,
4273  const miopenActivationDescriptor_t activationDesc,
4274  const miopenTensorDescriptor_t yDesc,
4275  void* y);
4277 // CLOSEOUT FUSION DOXYGEN GROUP
4278 
4287 typedef enum
4288 {
4293 } miopenRNNMode_t;
4294 
4298 typedef enum
4299 {
4303 
4307 typedef enum
4308 {
4309  miopenRNNdefault = 0,
4311  miopenRNNfundamental = 1,
4315 } miopenRNNAlgo_t;
4316 
4320 typedef enum
4321 {
4325 
4329 typedef enum
4330 {
4334 
4338 typedef enum
4339 {
4342 
4346 typedef enum
4347 {
4351 
4355 typedef enum
4356 {
4360 
4364 typedef enum
4365 {
4371 
4378 MIOPEN_EXPORT miopenStatus_t miopenCreateRNNDescriptor(miopenRNNDescriptor_t* rnnDesc);
4379 
4392 MIOPEN_EXPORT miopenStatus_t miopenGetRNNDescriptor(miopenRNNDescriptor_t rnnDesc,
4393  miopenRNNMode_t* rnnMode,
4394  miopenRNNAlgo_t* algoMode,
4395  miopenRNNInputMode_t* inputMode,
4396  miopenRNNDirectionMode_t* dirMode,
4397  miopenRNNBiasMode_t* biasMode,
4398  int* hiddenSize,
4399  int* layer);
4400 
4417 MIOPEN_EXPORT miopenStatus_t miopenGetRNNDescriptor_V2(miopenRNNDescriptor_t rnnDesc,
4418  int* hiddenSize,
4419  int* layer,
4420  miopenDropoutDescriptor_t* dropoutDesc,
4421  miopenRNNInputMode_t* inputMode,
4422  miopenRNNDirectionMode_t* dirMode,
4423  miopenRNNMode_t* rnnMode,
4424  miopenRNNBiasMode_t* biasMode,
4425  miopenRNNAlgo_t* algoMode,
4426  miopenDataType_t* dataType);
4427 
4433 MIOPEN_EXPORT miopenStatus_t miopenDestroyRNNDescriptor(miopenRNNDescriptor_t rnnDesc);
4434 
4450 MIOPEN_EXPORT miopenStatus_t miopenSetRNNDescriptor(miopenRNNDescriptor_t rnnDesc,
4451  const int hsize,
4452  const int nlayers,
4453  miopenRNNInputMode_t inMode,
4454  miopenRNNDirectionMode_t direction,
4455  miopenRNNMode_t rnnMode,
4456  miopenRNNBiasMode_t biasMode,
4457  miopenRNNAlgo_t algo,
4458  miopenDataType_t dataType);
4459 
4478 MIOPEN_EXPORT miopenStatus_t miopenSetRNNDescriptor_V2(miopenRNNDescriptor_t rnnDesc,
4479  const int hsize,
4480  const int nlayers,
4481  miopenDropoutDescriptor_t dropoutDesc,
4482  miopenRNNInputMode_t inMode,
4483  miopenRNNDirectionMode_t direction,
4484  miopenRNNMode_t rnnMode,
4485  miopenRNNBiasMode_t biasMode,
4486  miopenRNNAlgo_t algo,
4487  miopenDataType_t dataType);
4488 
4503 MIOPEN_EXPORT miopenStatus_t
4504 miopenSetRNNDataSeqTensorDescriptor(miopenSeqTensorDescriptor_t seqTensorDesc,
4505  miopenDataType_t dataType,
4506  miopenRNNBaseLayout_t layout,
4507  int maxSequenceLen,
4508  int batchSize,
4509  int vectorSize,
4510  const int* sequenceLenArray,
4511  void* paddingMarker);
4512 
4531 MIOPEN_EXPORT miopenStatus_t
4532 miopenGetRNNDataSeqTensorDescriptor(miopenSeqTensorDescriptor_t seqTensorDesc,
4533  miopenDataType_t* dataType,
4534  miopenRNNBaseLayout_t* layout,
4535  int* maxSequenceLen,
4536  int* batchSize,
4537  int* vectorSize,
4538  int sequenceLenArrayLimit,
4539  int* sequenceLenArray,
4540  void* paddingMarker);
4541 
4558 MIOPEN_EXPORT miopenStatus_t miopenGetRNNWorkspaceSize(miopenHandle_t handle,
4559  const miopenRNNDescriptor_t rnnDesc,
4560  const int sequenceLen,
4561  const miopenTensorDescriptor_t* xDesc,
4562  size_t* numBytes);
4563 
4580 MIOPEN_EXPORT miopenStatus_t miopenGetRNNTrainingReserveSize(miopenHandle_t handle,
4581  miopenRNNDescriptor_t rnnDesc,
4582  const int sequenceLen,
4583  const miopenTensorDescriptor_t* xDesc,
4584  size_t* numBytes);
4585 
4602 MIOPEN_EXPORT miopenStatus_t miopenGetRNNTempSpaceSizes(miopenHandle_t handle,
4603  miopenRNNDescriptor_t rnnDesc,
4604  miopenSeqTensorDescriptor_t xDesc,
4605  miopenRNNFWDMode_t fwdMode,
4606  size_t* workSpaceSize,
4607  size_t* reserveSpaceSize);
4608 
4621 MIOPEN_EXPORT miopenStatus_t miopenGetRNNParamsSize(miopenHandle_t handle,
4622  miopenRNNDescriptor_t rnnDesc,
4623  miopenTensorDescriptor_t xDesc,
4624  size_t* numBytes,
4625  miopenDataType_t dtype);
4626 
4639 MIOPEN_EXPORT miopenStatus_t miopenGetRNNParamsDescriptor(miopenHandle_t handle,
4640  miopenRNNDescriptor_t rnnDesc,
4641  miopenTensorDescriptor_t xDesc,
4642  miopenTensorDescriptor_t wDesc,
4643  miopenDataType_t dtype);
4644 
4662 MIOPEN_EXPORT miopenStatus_t miopenGetRNNInputTensorSize(miopenHandle_t handle,
4663  miopenRNNDescriptor_t rnnDesc,
4664  const int seqLen,
4665  miopenTensorDescriptor_t* xDesc,
4666  size_t* numBytes);
4667 
4680 MIOPEN_EXPORT miopenStatus_t miopenGetRNNHiddenTensorSize(miopenHandle_t handle,
4681  miopenRNNDescriptor_t rnnDesc,
4682  const int seqLen,
4683  miopenTensorDescriptor_t* xDesc,
4684  size_t* numBytes);
4685 
4726 MIOPEN_EXPORT miopenStatus_t miopenGetRNNLayerParamSize(miopenHandle_t handle,
4727  miopenRNNDescriptor_t rnnDesc,
4728  const int layer,
4729  miopenTensorDescriptor_t xDesc,
4730  const int paramID,
4731  size_t* numBytes);
4732 
4770 MIOPEN_EXPORT miopenStatus_t miopenGetRNNLayerBiasSize(miopenHandle_t handle,
4771  miopenRNNDescriptor_t rnnDesc,
4772  const int layer,
4773  const int biasID,
4774  size_t* numBytes);
4775 
4834 MIOPEN_EXPORT miopenStatus_t miopenGetRNNLayerParam(miopenHandle_t handle,
4835  miopenRNNDescriptor_t rnnDesc,
4836  const int layer,
4837  miopenTensorDescriptor_t xDesc,
4838  miopenTensorDescriptor_t wDesc,
4839  const void* w,
4840  const int paramID,
4841  miopenTensorDescriptor_t paramDesc,
4842  void* layerParam);
4843 
4901 MIOPEN_EXPORT miopenStatus_t miopenGetRNNLayerBias(miopenHandle_t handle,
4902  miopenRNNDescriptor_t rnnDesc,
4903  const int layer,
4904  miopenTensorDescriptor_t xDesc,
4905  miopenTensorDescriptor_t wDesc,
4906  const void* w,
4907  const int biasID,
4908  miopenTensorDescriptor_t biasDesc,
4909  void* layerBias);
4910 
4965 MIOPEN_EXPORT miopenStatus_t miopenGetRNNLayerParamOffset(miopenRNNDescriptor_t rnnDesc,
4966  const int layer,
4967  miopenTensorDescriptor_t xDesc,
4968  const int paramID,
4969  miopenTensorDescriptor_t paramDesc,
4970  size_t* layerParamOffset);
4971 
5022 MIOPEN_EXPORT miopenStatus_t miopenGetRNNLayerBiasOffset(miopenRNNDescriptor_t rnnDesc,
5023  const int layer,
5024  miopenTensorDescriptor_t xDesc,
5025  const int biasID,
5026  miopenTensorDescriptor_t biasDesc,
5027  size_t* layerBiasOffset);
5028 
5081 MIOPEN_EXPORT miopenStatus_t miopenSetRNNLayerParam(miopenHandle_t handle,
5082  miopenRNNDescriptor_t rnnDesc,
5083  const int layer,
5084  miopenTensorDescriptor_t xDesc,
5085  miopenTensorDescriptor_t wDesc,
5086  void* w,
5087  const int paramID,
5088  miopenTensorDescriptor_t paramDesc,
5089  const void* layerParam);
5090 
5141 MIOPEN_EXPORT miopenStatus_t miopenSetRNNLayerBias(miopenHandle_t handle,
5142  miopenRNNDescriptor_t rnnDesc,
5143  const int layer,
5144  miopenTensorDescriptor_t xDesc,
5145  miopenTensorDescriptor_t wDesc,
5146  void* w,
5147  const int biasID,
5148  miopenTensorDescriptor_t biasDesc,
5149  const void* layerBias);
5150 
5162 MIOPEN_EXPORT miopenStatus_t miopenSetRNNPaddingMode(miopenRNNDescriptor_t rnnDesc,
5163  miopenRNNPaddingMode_t paddingMode);
5164 
5172 MIOPEN_EXPORT miopenStatus_t miopenGetRNNPaddingMode(miopenRNNDescriptor_t rnnDesc,
5173  miopenRNNPaddingMode_t* paddingMode);
5174 
5225 MIOPEN_EXPORT miopenStatus_t miopenRNNForward(miopenHandle_t handle,
5226  const miopenRNNDescriptor_t rnnDesc,
5227  miopenRNNFWDMode_t fwdMode,
5228  const miopenSeqTensorDescriptor_t xDesc,
5229  const void* x,
5230  const miopenTensorDescriptor_t hDesc,
5231  const void* hx,
5232  void* hy,
5233  const miopenTensorDescriptor_t cDesc,
5234  const void* cx,
5235  void* cy,
5236  const miopenSeqTensorDescriptor_t yDesc,
5237  void* y,
5238  const void* w,
5239  size_t weightSpaceSize,
5240  void* workSpace,
5241  size_t workSpaceNumBytes,
5242  void* reserveSpace,
5243  size_t reserveSpaceNumBytes);
5244 
5294 MIOPEN_EXPORT miopenStatus_t miopenRNNBackwardSeqData(miopenHandle_t handle,
5295  const miopenRNNDescriptor_t rnnDesc,
5296  const miopenSeqTensorDescriptor_t yDesc,
5297  const void* y,
5298  const void* dy,
5299  const miopenTensorDescriptor_t hDesc,
5300  const void* hx,
5301  const void* dhy,
5302  void* dhx,
5303  const miopenTensorDescriptor_t cDesc,
5304  const void* cx,
5305  const void* dcy,
5306  void* dcx,
5307  const miopenSeqTensorDescriptor_t xDesc,
5308  void* dx,
5309  const void* w,
5310  size_t weightSpaceSize,
5311  void* workSpace,
5312  size_t workSpaceNumBytes,
5313  void* reserveSpace,
5314  size_t reserveSpaceNumBytes);
5315 
5349 MIOPEN_EXPORT miopenStatus_t
5351  const miopenRNNDescriptor_t rnnDesc,
5352  const miopenSeqTensorDescriptor_t xDesc,
5353  const void* x,
5354  const miopenTensorDescriptor_t hDesc,
5355  const void* hx,
5356  const miopenSeqTensorDescriptor_t yDesc,
5357  const void* y,
5358  void* dw,
5359  size_t weightSpaceSize,
5360  void* workSpace,
5361  size_t workSpaceNumBytes,
5362  const void* reserveSpace,
5363  size_t reserveSpaceNumBytes);
5364 
5422 MIOPEN_EXPORT miopenStatus_t miopenRNNForwardTraining(miopenHandle_t handle,
5423  const miopenRNNDescriptor_t rnnDesc,
5424  const int sequenceLen,
5425  const miopenTensorDescriptor_t* xDesc,
5426  const void* x,
5427  const miopenTensorDescriptor_t hxDesc,
5428  const void* hx,
5429  const miopenTensorDescriptor_t cxDesc,
5430  const void* cx,
5431  const miopenTensorDescriptor_t wDesc,
5432  const void* w,
5433  const miopenTensorDescriptor_t* yDesc,
5434  void* y,
5435  const miopenTensorDescriptor_t hyDesc,
5436  void* hy,
5437  const miopenTensorDescriptor_t cyDesc,
5438  void* cy,
5439  void* workSpace,
5440  size_t workSpaceNumBytes,
5441  void* reserveSpace,
5442  size_t reserveSpaceNumBytes);
5443 
5516 MIOPEN_EXPORT miopenStatus_t miopenRNNBackwardData(miopenHandle_t handle,
5517  const miopenRNNDescriptor_t rnnDesc,
5518  const int sequenceLen,
5519  const miopenTensorDescriptor_t* yDesc,
5520  const void* y,
5521  const miopenTensorDescriptor_t* dyDesc,
5522  const void* dy,
5523  const miopenTensorDescriptor_t dhyDesc,
5524  const void* dhy,
5525  const miopenTensorDescriptor_t dcyDesc,
5526  const void* dcy,
5527  const miopenTensorDescriptor_t wDesc,
5528  const void* w,
5529  const miopenTensorDescriptor_t hxDesc,
5530  const void* hx,
5531  const miopenTensorDescriptor_t cxDesc,
5532  const void* cx,
5533  const miopenTensorDescriptor_t* dxDesc,
5534  void* dx,
5535  const miopenTensorDescriptor_t dhxDesc,
5536  void* dhx,
5537  const miopenTensorDescriptor_t dcxDesc,
5538  void* dcx,
5539  void* workSpace,
5540  size_t workSpaceNumBytes,
5541  void* reserveSpace,
5542  size_t reserveSpaceNumBytes);
5543 
5580 MIOPEN_EXPORT miopenStatus_t miopenRNNBackwardWeights(miopenHandle_t handle,
5581  const miopenRNNDescriptor_t rnnDesc,
5582  const int sequenceLen,
5583  const miopenTensorDescriptor_t* xDesc,
5584  const void* x,
5585  const miopenTensorDescriptor_t hxDesc,
5586  const void* hx,
5587  const miopenTensorDescriptor_t* yDesc,
5588  const void* y,
5589  const miopenTensorDescriptor_t dwDesc,
5590  void* dw,
5591  void* workSpace,
5592  size_t workSpaceNumBytes,
5593  const void* reserveSpace,
5594  size_t reserveSpaceNumBytes);
5595 
5651 MIOPEN_EXPORT miopenStatus_t miopenRNNForwardInference(miopenHandle_t handle,
5652  miopenRNNDescriptor_t rnnDesc,
5653  const int sequenceLen,
5654  const miopenTensorDescriptor_t* xDesc,
5655  const void* x,
5656  const miopenTensorDescriptor_t hxDesc,
5657  const void* hx,
5658  const miopenTensorDescriptor_t cxDesc,
5659  const void* cx,
5660  const miopenTensorDescriptor_t wDesc,
5661  const void* w,
5662  const miopenTensorDescriptor_t* yDesc,
5663  void* y,
5664  const miopenTensorDescriptor_t hyDesc,
5665  void* hy,
5666  const miopenTensorDescriptor_t cyDesc,
5667  void* cy,
5668  void* workSpace,
5669  size_t workSpaceNumBytes);
5670 
5672 // CLOSEOUT RNN DOXYGEN GROUP
5673 
5682 typedef enum
5683 {
5686 
5693 MIOPEN_EXPORT miopenStatus_t miopenCreateCTCLossDescriptor(miopenCTCLossDescriptor_t* ctcLossDesc);
5694 
5704 MIOPEN_EXPORT miopenStatus_t miopenGetCTCLossDescriptor(miopenCTCLossDescriptor_t ctcLossDesc,
5705  miopenDataType_t* dataType,
5706  int* blank_label_id,
5707  bool* apply_softmax_layer);
5708 
5714 MIOPEN_EXPORT miopenStatus_t miopenDestroyCTCLossDescriptor(miopenCTCLossDescriptor_t ctcLossDesc);
5715 
5725 MIOPEN_EXPORT miopenStatus_t miopenSetCTCLossDescriptor(miopenCTCLossDescriptor_t ctcLossDesc,
5726  miopenDataType_t dataType,
5727  const int blank_label_id,
5728  bool apply_softmax_layer);
5729 
5746 MIOPEN_EXPORT miopenStatus_t
5747 miopenGetCTCLossWorkspaceSize(miopenHandle_t handle,
5748  const miopenTensorDescriptor_t probsDesc,
5749  const miopenTensorDescriptor_t gradientsDesc,
5750  const int* labels,
5751  const int* labelLengths,
5752  const int* inputLengths,
5753  miopenCTCLossAlgo_t algo,
5754  const miopenCTCLossDescriptor_t ctcLossDesc,
5755  size_t* workSpaceSize);
5756 
5776 MIOPEN_EXPORT miopenStatus_t miopenCTCLoss(miopenHandle_t handle,
5777  const miopenTensorDescriptor_t probsDesc,
5778  const void* probs,
5779  const int* labels,
5780  const int* labelLengths,
5781  const int* inputLengths,
5782  void* losses,
5783  const miopenTensorDescriptor_t gradientsDesc,
5784  void* gradients,
5785  miopenCTCLossAlgo_t algo,
5786  const miopenCTCLossDescriptor_t ctcLossDesc,
5787  void* workSpace,
5788  size_t workSpaceSize);
5789 
5791 // CLOSEOUT LossFunction DOXYGEN GROUP
5792 
5793 // Dropout APIs
5802 typedef enum
5803 {
5805 } miopenRNGType_t;
5806 
5812 MIOPEN_EXPORT miopenStatus_t miopenCreateDropoutDescriptor(miopenDropoutDescriptor_t* dropoutDesc);
5813 
5819 MIOPEN_EXPORT miopenStatus_t miopenDestroyDropoutDescriptor(miopenDropoutDescriptor_t dropoutDesc);
5820 
5829 MIOPEN_EXPORT miopenStatus_t miopenDropoutGetReserveSpaceSize(const miopenTensorDescriptor_t xDesc,
5830  size_t* reserveSpaceSizeInBytes);
5831 
5840 MIOPEN_EXPORT miopenStatus_t miopenDropoutGetStatesSize(miopenHandle_t handle,
5841  size_t* stateSizeInBytes);
5842 
5859 MIOPEN_EXPORT miopenStatus_t miopenGetDropoutDescriptor(miopenDropoutDescriptor_t dropoutDesc,
5860  miopenHandle_t handle,
5861  float* dropout,
5862  void** states,
5863  unsigned long long* seed,
5864  bool* use_mask,
5865  bool* state_evo,
5866  miopenRNGType_t* rng_mode);
5867 
5890 MIOPEN_EXPORT miopenStatus_t miopenRestoreDropoutDescriptor(miopenDropoutDescriptor_t dropoutDesc,
5891  miopenHandle_t handle,
5892  float dropout,
5893  void* states,
5894  size_t stateSizeInBytes,
5895  unsigned long long seed,
5896  bool use_mask,
5897  bool state_evo,
5898  miopenRNGType_t rng_mode);
5899 
5919 MIOPEN_EXPORT miopenStatus_t miopenSetDropoutDescriptor(miopenDropoutDescriptor_t dropoutDesc,
5920  miopenHandle_t handle,
5921  float dropout,
5922  void* states,
5923  size_t stateSizeInBytes,
5924  unsigned long long seed,
5925  bool use_mask,
5926  bool state_evo,
5927  miopenRNGType_t rng_mode);
5928 
5946 MIOPEN_EXPORT miopenStatus_t miopenDropoutForward(miopenHandle_t handle,
5947  const miopenDropoutDescriptor_t dropoutDesc,
5948  const miopenTensorDescriptor_t noise_shape,
5949  const miopenTensorDescriptor_t xDesc,
5950  const void* x,
5951  const miopenTensorDescriptor_t yDesc,
5952  void* y,
5953  void* reserveSpace,
5954  size_t reserveSpaceSizeInBytes);
5955 
5973 MIOPEN_EXPORT miopenStatus_t miopenDropoutBackward(miopenHandle_t handle,
5974  const miopenDropoutDescriptor_t dropoutDesc,
5975  const miopenTensorDescriptor_t noise_shape,
5976  const miopenTensorDescriptor_t dyDesc,
5977  const void* dy,
5978  const miopenTensorDescriptor_t dxDesc,
5979  void* dx,
5980  void* reserveSpace,
5981  size_t reserveSpaceSizeInBytes);
5982 
5984 // CLOSEOUT DROPOUT DOXYGEN GROUP
5985 
5986 // TensorReduce APIs
5997 MIOPEN_EXPORT miopenStatus_t
5998 miopenCreateReduceTensorDescriptor(miopenReduceTensorDescriptor_t* reduceTensorDesc);
5999 
6005 MIOPEN_EXPORT miopenStatus_t
6006 miopenDestroyReduceTensorDescriptor(miopenReduceTensorDescriptor_t reduceTensorDesc);
6007 
6020 MIOPEN_EXPORT miopenStatus_t
6021 miopenSetReduceTensorDescriptor(miopenReduceTensorDescriptor_t reduceTensorDesc,
6022  miopenReduceTensorOp_t reduceTensorOp,
6023  miopenDataType_t reduceTensorCompType,
6024  miopenNanPropagation_t reduceTensorNanOpt,
6025  miopenReduceTensorIndices_t reduceTensorIndices,
6026  miopenIndicesType_t reduceTensorIndicesType);
6027 
6043 MIOPEN_EXPORT miopenStatus_t
6044 miopenGetReduceTensorDescriptor(const miopenReduceTensorDescriptor_t reduceTensorDesc,
6045  miopenReduceTensorOp_t* reduceTensorOp,
6046  miopenDataType_t* reduceTensorCompType,
6047  miopenNanPropagation_t* reduceTensorNanOpt,
6048  miopenReduceTensorIndices_t* reduceTensorIndices,
6049  miopenIndicesType_t* reduceTensorIndicesType);
6050 
6060 MIOPEN_EXPORT miopenStatus_t
6061 miopenGetReductionIndicesSize(miopenHandle_t handle,
6062  const miopenReduceTensorDescriptor_t reduceTensorDesc,
6063  const miopenTensorDescriptor_t aDesc,
6064  const miopenTensorDescriptor_t cDesc,
6065  size_t* sizeInBytes);
6066 
6076 MIOPEN_EXPORT miopenStatus_t
6077 miopenGetReductionWorkspaceSize(miopenHandle_t handle,
6078  const miopenReduceTensorDescriptor_t reduceTensorDesc,
6079  const miopenTensorDescriptor_t aDesc,
6080  const miopenTensorDescriptor_t cDesc,
6081  size_t* sizeInBytes);
6082 
6106 MIOPEN_EXPORT miopenStatus_t
6107 miopenReduceTensor(miopenHandle_t handle,
6108  const miopenReduceTensorDescriptor_t reduceTensorDesc,
6109  void* indices,
6110  size_t indicesSizeInBytes,
6111  void* workspace,
6112  size_t workspaceSizeInBytes,
6113  const void* alpha,
6114  const miopenTensorDescriptor_t aDesc,
6115  const void* A,
6116  const void* beta,
6117  const miopenTensorDescriptor_t cDesc,
6118  void* C);
6119 
6121 // CLOSEOUT TensorReduce DOXYGEN GROUP
6122 
6123 // Find 2.0 API
6134 MIOPEN_DECLARE_OBJECT(miopenProblem);
6135 
6139 typedef enum
6140 {
6144 #ifdef MIOPEN_BETA_API
6146 #endif
6148 
6152 typedef enum
6153 {
6158 
6192 
6193 #ifdef MIOPEN_BETA_API
6219 #endif
6220 
6222 
6224 #ifdef MIOPEN_BETA_API
6227 #endif
6229 
6233 typedef enum
6234 {
6238 
6246 MIOPEN_EXPORT miopenStatus_t miopenCreateConvProblem(miopenProblem_t* problem,
6247  miopenConvolutionDescriptor_t operatorDesc,
6248  miopenProblemDirection_t direction);
6249 
6261 typedef enum
6262 {
6265 } miopenMhaMask_t;
6266 
6267 MIOPEN_EXPORT miopenStatus_t miopenCreateMhaProblem(miopenProblem_t* problem,
6268  miopenMhaDescriptor_t operatorDesc,
6269  miopenProblemDirection_t direction);
6270 
6277 MIOPEN_EXPORT miopenStatus_t miopenCreateMhaDescriptor(miopenMhaDescriptor_t* mhaDesc);
6278 
6288 MIOPEN_EXPORT miopenStatus_t miopenSetMhaDescriptor(miopenMhaDescriptor_t mhaDesc, float scale);
6289 
6299 MIOPEN_EXPORT miopenStatus_t miopenGetMhaDescriptor(miopenMhaDescriptor_t mhaDesc, float* scale);
6300 
6307 MIOPEN_EXPORT miopenStatus_t miopenCreateSoftmaxDescriptor(miopenSoftmaxDescriptor_t* softmaxDesc);
6308 
6320 MIOPEN_EXPORT miopenStatus_t miopenSetSoftmaxDescriptor(miopenSoftmaxDescriptor_t softmaxDesc,
6321  float alpha,
6322  float beta,
6323  miopenSoftmaxAlgorithm_t algorithm,
6324  miopenSoftmaxMode_t mode);
6325 
6337 MIOPEN_EXPORT miopenStatus_t miopenGetSoftmaxDescriptor(const miopenSoftmaxDescriptor_t softmaxDesc,
6338  float* alpha,
6339  float* beta,
6340  miopenSoftmaxAlgorithm_t* algorithm,
6341  miopenSoftmaxMode_t* mode);
6342 
6348 MIOPEN_EXPORT miopenStatus_t miopenDestroyProblem(miopenProblem_t problem);
6349 
6357 MIOPEN_EXPORT miopenStatus_t
6358 miopenSetProblemTensorDescriptor(miopenProblem_t problem,
6360  const miopenTensorDescriptor_t descriptor);
6361 
6364 MIOPEN_DECLARE_OBJECT(miopenFindOptions);
6365 
6371 MIOPEN_EXPORT miopenStatus_t miopenCreateFindOptions(miopenFindOptions_t* options);
6372 
6378 MIOPEN_EXPORT miopenStatus_t miopenDestroyFindOptions(miopenFindOptions_t options);
6379 
6386 MIOPEN_EXPORT miopenStatus_t miopenSetFindOptionTuning(miopenFindOptions_t options, int value);
6387 
6394 MIOPEN_EXPORT miopenStatus_t miopenSetFindOptionResultsOrder(miopenFindOptions_t options,
6395  miopenFindResultsOrder_t value);
6396 
6404 MIOPEN_EXPORT miopenStatus_t miopenSetFindOptionWorkspaceLimit(miopenFindOptions_t options,
6405  size_t value);
6406 
6414 MIOPEN_EXPORT miopenStatus_t miopenSetFindOptionPreallocatedWorkspace(miopenFindOptions_t options,
6415  void* buffer,
6416  size_t size);
6417 
6426 MIOPEN_EXPORT miopenStatus_t miopenSetFindOptionPreallocatedTensor(miopenFindOptions_t options,
6428  void* buffer);
6429 
6437 MIOPEN_EXPORT miopenStatus_t miopenSetFindOptionAttachBinaries(miopenFindOptions_t options,
6438  unsigned attach);
6439 
6442 MIOPEN_DECLARE_OBJECT(miopenSolution);
6443 
6455 MIOPEN_EXPORT miopenStatus_t miopenFindSolutions(miopenHandle_t handle,
6456  miopenProblem_t problem,
6457  miopenFindOptions_t options,
6458  miopenSolution_t* solutions,
6459  size_t* numSolutions,
6460  size_t maxSolutions);
6461 
6465 {
6466  /* @brief Identifier of the tensor argument.
6467  */
6469  /* @brief Tensor descriptor to override the value stored in the solution.
6470  *
6471  * Some solvers may support overriding input and output tensor descriptors, but right now there
6472  * is no way to tell from the API. Intended for the future use.
6473  */
6474  miopenTensorDescriptor_t* descriptor;
6475  /* @brief Pointer to the device memory buffer to use for the operation or to the host memory if
6476  * the value is scalar.
6477  */
6478  void* buffer;
6479 };
6480 
6492 MIOPEN_EXPORT miopenStatus_t miopenRunSolution(miopenHandle_t handle,
6493  miopenSolution_t solution,
6494  size_t nInputs,
6495  const miopenTensorArgument_t* tensors,
6496  void* workspace,
6497  size_t workspaceSize);
6498 
6504 MIOPEN_EXPORT miopenStatus_t miopenDestroySolution(miopenSolution_t solution);
6505 
6513 MIOPEN_EXPORT miopenStatus_t miopenLoadSolution(miopenSolution_t* solution,
6514  const char* data,
6515  size_t size);
6516 
6523 MIOPEN_EXPORT miopenStatus_t miopenSaveSolution(miopenSolution_t solution, char* data);
6524 
6531 MIOPEN_EXPORT miopenStatus_t miopenGetSolutionSize(miopenSolution_t solution, size_t* size);
6532 
6539 MIOPEN_EXPORT miopenStatus_t miopenGetSolutionWorkspaceSize(miopenSolution_t solution,
6540  size_t* workspaceSize);
6541 
6548 MIOPEN_EXPORT miopenStatus_t miopenGetSolutionTime(miopenSolution_t solution, float* time);
6549 
6556 MIOPEN_EXPORT miopenStatus_t miopenGetSolutionSolverId(miopenSolution_t solution,
6557  uint64_t* solverId);
6558 
6565 MIOPEN_EXPORT miopenStatus_t miopenGetSolverIdConvAlgorithm(uint64_t solverId,
6566  miopenConvAlgorithm_t* result);
6567 
6568 #ifdef MIOPEN_BETA_API
6569 
6578 MIOPEN_EXPORT miopenStatus_t
6579 miopenCreateActivationProblem(miopenProblem_t* problem,
6580  miopenActivationDescriptor_t operatorDesc,
6581  miopenProblemDirection_t direction);
6582 
6591 MIOPEN_EXPORT miopenStatus_t miopenCreateBatchnormProblem(miopenProblem_t* problem,
6592  miopenBatchNormMode_t mode,
6593  bool runningMeanVariance,
6594  miopenProblemDirection_t direction);
6595 
6615 MIOPEN_EXPORT miopenStatus_t miopenFuseProblems(miopenProblem_t problem1, miopenProblem_t problem2);
6616 
6624 MIOPEN_EXPORT miopenStatus_t miopenCreateBiasProblem(miopenProblem_t* problem,
6625  miopenProblemDirection_t direction);
6626 
6635 MIOPEN_EXPORT miopenStatus_t miopenCreateSoftmaxProblem(miopenProblem_t* problem,
6636  miopenSoftmaxDescriptor_t operatorDesc,
6637  miopenProblemDirection_t direction);
6638 
6639 #endif
6640 
6642 // CLOSEOUT find2 DOXYGEN GROUP
6643 
6644 #ifdef MIOPEN_BETA_API
6645 
6650 typedef enum
6651 {
6654  1,
6656 
6657 // ReduceCalculation APIs
6666 typedef enum
6667 {
6669  1,
6671  2,
6673 
6683 MIOPEN_EXPORT miopenStatus_t
6685  const miopenTensorDescriptor_t xDesc,
6686  const int32_t dim,
6687  const miopenReduceCalculationOp_t reduceCalculationOp,
6688  const miopenTensorDescriptor_t reduceDesc,
6689  size_t* sizeInBytes);
6690 
6704 MIOPEN_EXPORT miopenStatus_t
6705 miopenReduceCalculationForward(miopenHandle_t handle,
6707  void* workspace,
6708  size_t workspaceSizeInBytes,
6709  const miopenTensorDescriptor_t xDesc,
6710  const void* x,
6711  const int32_t dim,
6712  const miopenReduceCalculationOp_t reduceCalculationOp,
6713  const miopenTensorDescriptor_t reduceDesc,
6714  void* y);
6715 
6717 // CLOSEOUT REDUCE CALCULATION DOXYGEN GROUP
6718 #endif // MIOPEN_BETA_API
6719 
6720 #ifdef MIOPEN_BETA_API
6721 
6726 typedef enum
6727 {
6729  1,
6731  2,
6733  3,
6735  4,
6737 
6738 // ReduceExtreme APIs
6758 MIOPEN_EXPORT miopenStatus_t
6759 miopenReduceExtremeForward(miopenHandle_t handle,
6760  const miopenTensorDescriptor_t xDesc,
6761  const void* x,
6762  const int32_t dim,
6763  const miopenReduceExtremeOp_t reduceExtremeOp,
6764  const miopenTensorDescriptor_t yDesc,
6765  void* y,
6766  const miopenTensorDescriptor_t indiceDesc,
6767  void* indice);
6768 
6770 // CLOSEOUT REDUCEEXTREME DOXYGEN GROUP
6771 #endif // MIOPEN_BETA_API
6772 
6773 #ifdef MIOPEN_BETA_API
6774 // GroupNorm APIs
6799 MIOPEN_EXPORT miopenStatus_t miopenGroupNormForward(miopenHandle_t handle,
6800  miopenNormMode_t mode,
6801  const miopenTensorDescriptor_t xDesc,
6802  const void* x,
6803  const miopenTensorDescriptor_t weightDesc,
6804  const void* weight,
6805  const miopenTensorDescriptor_t biasDesc,
6806  const void* bias,
6807  const uint64_t num_groups,
6808  const float epsilon,
6809  const miopenTensorDescriptor_t yDesc,
6810  void* y,
6811  const miopenTensorDescriptor_t meanDesc,
6812  void* mean,
6813  const miopenTensorDescriptor_t rstdDesc,
6814  void* rstd);
6815 
6817 // CLOSEOUT groupnorm DOXYGEN GROUP
6818 #endif // MIOPEN_BETA_API
6819 
6820 #ifdef MIOPEN_BETA_API
6821 // LayerNorm APIs
6848 MIOPEN_EXPORT miopenStatus_t miopenAddLayerNormForward(miopenHandle_t handle,
6849  miopenNormMode_t mode,
6850  const miopenTensorDescriptor_t xDesc,
6851  const void* x,
6852  const miopenTensorDescriptor_t x2Desc,
6853  const void* x2,
6854  const miopenTensorDescriptor_t weightDesc,
6855  const void* weight,
6856  const miopenTensorDescriptor_t biasDesc,
6857  const void* bias,
6858  const float epsilon,
6859  const int32_t normalized_dim,
6860  const miopenTensorDescriptor_t yDesc,
6861  void* y,
6862  const miopenTensorDescriptor_t meanDesc,
6863  void* mean,
6864  const miopenTensorDescriptor_t rstdDesc,
6865  void* rstd);
6866 
6868 // CLOSEOUT LAYERNORM DOXYGEN GROUP
6869 #endif // MIOPEN_BETA_API
6870 
6871 #ifdef MIOPEN_BETA_API
6872 // LayerNorm APIs
6892 MIOPEN_EXPORT miopenStatus_t miopenT5LayerNormForward(miopenHandle_t handle,
6893  miopenNormMode_t mode,
6894  const miopenTensorDescriptor_t xDesc,
6895  const void* x,
6896  const miopenTensorDescriptor_t weightDesc,
6897  const void* weight,
6898  const float epsilon,
6899  const miopenTensorDescriptor_t yDesc,
6900  void* y,
6901  const miopenTensorDescriptor_t rstdDesc,
6902  void* rstd);
6903 
6918 MIOPEN_EXPORT miopenStatus_t
6920  miopenNormMode_t mode,
6921  const miopenTensorDescriptor_t dyDesc,
6922  const miopenTensorDescriptor_t xDesc,
6923  const miopenTensorDescriptor_t weightDesc,
6924  const miopenTensorDescriptor_t rstdDesc,
6925  const miopenTensorDescriptor_t dxDesc,
6926  const miopenTensorDescriptor_t dwDesc,
6927  size_t* sizeInBytes);
6928 
6949 MIOPEN_EXPORT miopenStatus_t miopenT5LayerNormBackward(miopenHandle_t handle,
6950  miopenNormMode_t mode,
6951  void* workspace,
6952  size_t workspaceSizeInBytes,
6953  const miopenTensorDescriptor_t dyDesc,
6954  const void* dy,
6955  const miopenTensorDescriptor_t xDesc,
6956  const void* x,
6957  const miopenTensorDescriptor_t weightDesc,
6958  const void* weight,
6959  const miopenTensorDescriptor_t rstdDesc,
6960  const void* rstd,
6961  const miopenTensorDescriptor_t dxDesc,
6962  void* dx,
6963  const miopenTensorDescriptor_t dwDesc,
6964  void* dw);
6966 // CLOSEOUT LAYERNORM DOXYGEN GROUP
6967 #endif // MIOPEN_BETA_API
6968 
6973 typedef enum
6974 {
6975  /* IDENTITY alpha = 1.0 and beta = 0.0 */
6976  /* SCALE alpha = 4.2 and beta = 0.0 */
6977  /* BILINEAR alpha = 3.2 and beta = 1.1 */
6978  /* ERROR_STATE alpha = 0.0 and beta = 3.1 */
6979 
6980  DEFAULT = 0, /* alpha = 1.0 and beta = 0.0.*/
6981  SCALE = 1, /* alpha with some value and beta 0.0*/
6982  BILINEAR = 2, /* both alpha and beta with some value*/
6983  ERROR_STATE = 3 /* alpha 0.0 and beta with some value, this should not occur.
6984  But used to check for errors.*/
6986 
6987 #ifdef MIOPEN_BETA_API
6988 // FusedAdam APIs
7131 MIOPEN_EXPORT miopenStatus_t miopenFusedAdam(miopenHandle_t handle,
7132  const miopenTensorDescriptor_t paramDesc,
7133  void* param,
7134  const miopenTensorDescriptor_t gradDesc,
7135  const void* grad,
7136  const miopenTensorDescriptor_t expAvgDesc,
7137  void* expAvg,
7138  const miopenTensorDescriptor_t expAvgSqDesc,
7139  void* expAvgSq,
7140  const miopenTensorDescriptor_t maxExpAvgSqDesc,
7141  void* maxExpAvgSq,
7142  const miopenTensorDescriptor_t stateStepDesc,
7143  void* stateStep,
7144  const unsigned int state_step,
7145  const float lr,
7146  const float beta1,
7147  const float beta2,
7148  const float weight_decay,
7149  const float eps,
7150  const bool amsgrad,
7151  const bool maximize,
7152  const bool adamw,
7153  const miopenTensorDescriptor_t gradScaleDesc,
7154  const void* gradScale,
7155  const miopenTensorDescriptor_t foundInfDesc,
7156  const void* foundInf);
7157 
7298 MIOPEN_EXPORT miopenStatus_t
7299 miopenFusedAdamWithOutput(miopenHandle_t handle,
7300  const miopenTensorDescriptor_t paramInDesc,
7301  void* paramIn,
7302  const miopenTensorDescriptor_t paramOutDesc,
7303  void* paramOut,
7304  const miopenTensorDescriptor_t paramOutFloat16Desc,
7305  void* paramOutFloat16,
7306  const miopenTensorDescriptor_t gradInDesc,
7307  const void* gradIn,
7308  const miopenTensorDescriptor_t expAvgInDesc,
7309  void* expAvgIn,
7310  const miopenTensorDescriptor_t expAvgOutDesc,
7311  void* expAvgOut,
7312  const miopenTensorDescriptor_t expAvgSqInDesc,
7313  void* expAvgSqIn,
7314  const miopenTensorDescriptor_t expAvgSqOutDesc,
7315  void* expAvgSqOut,
7316  const miopenTensorDescriptor_t maxExpAvgSqInDesc,
7317  void* maxExpAvgSqIn,
7318  const miopenTensorDescriptor_t maxExpAvgSqOutDesc,
7319  void* maxExpAvgSqOut,
7320  const miopenTensorDescriptor_t stateStepInDesc,
7321  void* stateStepIn,
7322  const miopenTensorDescriptor_t stateStepOutDesc,
7323  void* stateStepOut,
7324  const unsigned int state_step,
7325  const float lr,
7326  const float beta1,
7327  const float beta2,
7328  const float weight_decay,
7329  const float eps,
7330  const bool amsgrad,
7331  const bool maximize,
7332  const bool adamw,
7333  const miopenTensorDescriptor_t gradScaleDesc,
7334  const void* gradScale,
7335  const miopenTensorDescriptor_t foundInfDesc,
7336  const void* foundInf);
7337 
7339 // CLOSEOUT SGD DOXYGEN GROUP
7340 #endif // MIOPEN_BETA_API
7341 
7342 #ifdef MIOPEN_BETA_API
7343 // TransformersAdamW APIs
7434 MIOPEN_EXPORT miopenStatus_t miopenTransformersAdamW(miopenHandle_t handle,
7435  const miopenTensorDescriptor_t paramDesc,
7436  void* param,
7437  const miopenTensorDescriptor_t gradDesc,
7438  const void* grad,
7439  const miopenTensorDescriptor_t expAvgDesc,
7440  void* expAvg,
7441  const miopenTensorDescriptor_t expAvgSqDesc,
7442  void* expAvgSq,
7443  const miopenTensorDescriptor_t stateStepDesc,
7444  void* stateStep,
7445  const unsigned int state_step,
7446  const float lr,
7447  const float beta1,
7448  const float beta2,
7449  const float weight_decay,
7450  const float eps,
7451  const bool correct_bias,
7452  const miopenTensorDescriptor_t gradScaleDesc,
7453  const void* gradScale,
7454  const miopenTensorDescriptor_t foundInfDesc,
7455  const void* foundInf);
7456 
7583 MIOPEN_EXPORT miopenStatus_t
7585  const miopenTensorDescriptor_t paramInDesc,
7586  void* paramIn,
7587  const miopenTensorDescriptor_t paramOutDesc,
7588  void* paramOut,
7589  const miopenTensorDescriptor_t paramOutFloat16Desc,
7590  void* paramOutFloat16,
7591  const miopenTensorDescriptor_t gradInDesc,
7592  const void* gradIn,
7593  const miopenTensorDescriptor_t expAvgInDesc,
7594  void* expAvgIn,
7595  const miopenTensorDescriptor_t expAvgOutDesc,
7596  void* expAvgOut,
7597  const miopenTensorDescriptor_t expAvgSqInDesc,
7598  void* expAvgSqIn,
7599  const miopenTensorDescriptor_t expAvgSqOutDesc,
7600  void* expAvgSqOut,
7601  const miopenTensorDescriptor_t stateStepInDesc,
7602  void* stateStepIn,
7603  const miopenTensorDescriptor_t stateStepOutDesc,
7604  void* stateStepOut,
7605  const unsigned int state_step,
7606  const float lr,
7607  const float beta1,
7608  const float beta2,
7609  const float weight_decay,
7610  const float eps,
7611  const float step_size,
7612  const bool correct_bias,
7613  const miopenTensorDescriptor_t gradScaleDesc,
7614  const void* gradScale,
7615  const miopenTensorDescriptor_t foundInfDesc,
7616  const void* foundInf);
7617 
7619 // CLOSEOUT SGD DOXYGEN GROUP
7620 #endif // MIOPEN_BETA_API
7621 
7622 #ifdef MIOPEN_BETA_API
7623 // GetItem APIs
7636 MIOPEN_EXPORT miopenStatus_t
7637 miopenGetGetitemWorkspaceSize(miopenHandle_t handle,
7638  uint32_t indexCount,
7639  const miopenTensorDescriptor_t* indexDescs,
7640  size_t* sizeInBytes);
7641 
7666 MIOPEN_EXPORT miopenStatus_t miopenGetitemBackward(miopenHandle_t handle,
7667  void* workspace,
7668  size_t workspaceSizeInBytes,
7669  const miopenTensorDescriptor_t dyDesc,
7670  const void* dy,
7671  uint32_t indexCount,
7672  const miopenTensorDescriptor_t* indexDescs,
7673  const void* const* indexs,
7674  const miopenTensorDescriptor_t dxDesc,
7675  void* dx,
7676  const miopenTensorDescriptor_t errorDesc,
7677  void* error,
7678  uint32_t dimCount,
7679  const int32_t* dims,
7680  uint32_t sliceCount,
7681  const int32_t* slices,
7682  uint32_t offset);
7683 
7685 // CLOSEOUT GETITEM DOXYGEN GROUP
7686 #endif // MIOPEN_BETA_API
7687 
7688 #ifdef MIOPEN_BETA_API
7689 // RotaryPositionalEmbeddings APIs
7707 MIOPEN_EXPORT miopenStatus_t miopenRoPEForward(miopenHandle_t handle,
7708  const miopenTensorDescriptor_t xDesc,
7709  const void* x,
7710  const miopenTensorDescriptor_t cosDesc,
7711  const void* cos,
7712  const miopenTensorDescriptor_t sinDesc,
7713  const void* sin,
7714  const miopenTensorDescriptor_t yDesc,
7715  void* y);
7716 
7730 MIOPEN_EXPORT miopenStatus_t miopenRoPEBackward(miopenHandle_t handle,
7731  const miopenTensorDescriptor_t dyDesc,
7732  const void* dy,
7733  const miopenTensorDescriptor_t cosDesc,
7734  const void* cos,
7735  const miopenTensorDescriptor_t sinDesc,
7736  const void* sin,
7737  const miopenTensorDescriptor_t dxDesc,
7738  void* dx);
7740 // CLOSEOUT ROPE DOXYGEN GROUP
7741 // kthvalue APIs
7762 MIOPEN_EXPORT miopenStatus_t miopenKthvalueForward(miopenHandle_t handle,
7763  miopenTensorDescriptor_t inputDesc,
7764  const void* input,
7765  miopenTensorDescriptor_t outputDesc,
7766  void* output,
7767  miopenTensorDescriptor_t indicesDesc,
7768  size_t* indices,
7769  size_t k,
7770  int32_t dim = -1,
7771  bool keepDim = false);
7772 
7774 // CLOSEOUT kthvalue DOXYGEN GROUP
7775 #endif // MIOPEN_BETA_API
7776 
7777 #ifdef MIOPEN_BETA_API
7791 MIOPEN_EXPORT miopenStatus_t
7793  miopenTensorDescriptor_t inputDesc,
7794  miopenTensorDescriptor_t weightDesc,
7795  size_t* sizeInBytes);
7796 
7813 MIOPEN_EXPORT miopenStatus_t miopenPReLUBackward(miopenHandle_t handle,
7814  void* workspace,
7815  size_t workspaceSizeInBytes,
7816  miopenTensorDescriptor_t inputDesc,
7817  const void* input,
7818  miopenTensorDescriptor_t weightDesc,
7819  const void* weight,
7820  miopenTensorDescriptor_t doutputDesc,
7821  const void* doutput,
7822  miopenTensorDescriptor_t dinputDesc,
7823  void* dinput,
7824  miopenTensorDescriptor_t dweightDesc,
7825  void* dweight);
7826 
7828 // CLOSEOUT RELU DOXYGEN GROUP
7829 #endif // MIOPEN_BETA_API
7830 
7831 #ifdef MIOPEN_BETA_API
7832 
7837 typedef enum
7838 {
7844 
7845 // SoftMarginLoss APIs
7863 MIOPEN_EXPORT miopenStatus_t
7865  miopenTensorDescriptor_t inputDesc,
7866  miopenTensorDescriptor_t targetDesc,
7867  miopenTensorDescriptor_t outputDesc,
7868  miopenLossReductionMode_t reduction,
7869  size_t* sizeInBytes);
7870 
7887 MIOPEN_EXPORT miopenStatus_t miopenSoftMarginLossForward(miopenHandle_t handle,
7888  miopenTensorDescriptor_t inputDesc,
7889  const void* input,
7890  miopenTensorDescriptor_t targetDesc,
7891  const void* target,
7892  miopenTensorDescriptor_t outputDesc,
7893  void* output,
7894  miopenLossReductionMode_t reduction,
7895  void* workspace = nullptr,
7896  size_t workspaceSizeInBytes = 0);
7897 
7912 MIOPEN_EXPORT miopenStatus_t miopenSoftMarginLossBackward(miopenHandle_t handle,
7913  miopenTensorDescriptor_t inputDesc,
7914  const void* input,
7915  miopenTensorDescriptor_t targetDesc,
7916  const void* target,
7917  miopenTensorDescriptor_t doutputDesc,
7918  const void* doutput,
7919  miopenTensorDescriptor_t dinputDesc,
7920  void* dinput,
7921  miopenLossReductionMode_t reduction);
7922 
7924 // CLOSEOUT LossFunction DOXYGEN GROUP
7925 #endif
7926 
7927 #ifdef MIOPEN_BETA_API
7928 // MultiMarginLoss APIs
7952 MIOPEN_EXPORT miopenStatus_t
7954  miopenTensorDescriptor_t inputDesc,
7955  miopenTensorDescriptor_t targetDesc,
7956  miopenTensorDescriptor_t weightDesc,
7957  miopenTensorDescriptor_t outputDesc,
7958  long p,
7959  float margin,
7960  miopenLossReductionMode_t reduction,
7961  size_t* sizeInBytes);
7962 
7990 MIOPEN_EXPORT miopenStatus_t miopenMultiMarginLossForward(miopenHandle_t handle,
7991  miopenTensorDescriptor_t inputDesc,
7992  const void* input,
7993  miopenTensorDescriptor_t targetDesc,
7994  const void* target,
7995  miopenTensorDescriptor_t weightDesc,
7996  const void* weight,
7997  miopenTensorDescriptor_t outputDesc,
7998  void* output,
7999  long p,
8000  float margin,
8001  miopenLossReductionMode_t reduction,
8002  void* workspace,
8003  size_t workspaceSizeInBytes);
8004 
8006 // CLOSEOUT LossFunction DOXYGEN GROUP
8007 #endif // MIOPEN_BETA_API
8008 
8020 typedef enum
8021 {
8025  3,
8029 
8038 MIOPEN_EXPORT miopenStatus_t miopenSetTuningPolicy(miopenHandle_t handle,
8039  miopenTuningPolicy_t newValue);
8040 
8048 MIOPEN_EXPORT miopenStatus_t miopenGetTuningPolicy(miopenHandle_t handle,
8049  miopenTuningPolicy_t* value);
8050 
8051 #ifdef __cplusplus
8052 }
8053 #endif
8054 
8055 #ifdef __clang__
8056 #pragma clang diagnostic pop
8057 #endif
8058 
8059 #endif // MIOPEN_GUARD_MIOPEN_H_
miopenStatus_t miopenCreateOpActivationBackward(miopenFusionPlanDescriptor_t fusePlanDesc, miopenFusionOpDescriptor_t *activBwdOp, miopenActivationMode_t mode)
Creates a backward activation operator.
miopenStatus_t miopenCreateOpBatchNormForward(miopenFusionPlanDescriptor_t fusePlanDesc, miopenFusionOpDescriptor_t *bnFwdOp, const miopenBatchNormMode_t bn_mode, bool runningMeanVariance)
Creates a forward training batch normalization operator.
miopenStatus_t miopenFusionPlanConvolutionGetAlgo(miopenFusionPlanDescriptor_t fusePlanDesc, const int requestAlgoCount, int *returnedAlgoCount, miopenConvFwdAlgorithm_t *returnedAlgos)
Returns the supported algorithms for the convolution operator in the Fusion Plan.
miopenStatus_t miopenFusionPlanGetWorkSpaceSize(miopenHandle_t handle, miopenFusionPlanDescriptor_t fusePlanDesc, size_t *workSpaceSize, miopenConvFwdAlgorithm_t algo)
Query the workspace size required for the fusion plan.
miopenStatus_t miopenFusionPlanConvolutionSetAlgo(miopenFusionPlanDescriptor_t fusePlanDesc, miopenConvFwdAlgorithm_t algo)
Requests the fusion runtime to choose a particular algorithm for the added convolution operation.
miopenStatus_t miopenCreateOpBatchNormInference(miopenFusionPlanDescriptor_t fusePlanDesc, miopenFusionOpDescriptor_t *bnOp, const miopenBatchNormMode_t bn_mode, const miopenTensorDescriptor_t bnScaleBiasMeanVarDesc)
Creates a forward inference batch normalization operator.
miopenStatus_t miopenSetOpArgsBiasForward(miopenOperatorArgs_t args, const miopenFusionOpDescriptor_t biasOp, const void *alpha, const void *beta, const void *bias)
Sets the arguments for forward bias op.
miopenStatus_t miopenCreateOpConvForward(miopenFusionPlanDescriptor_t fusePlanDesc, miopenFusionOpDescriptor_t *convOp, miopenConvolutionDescriptor_t convDesc, const miopenTensorDescriptor_t wDesc)
Creates forward convolution operator.
miopenStatus_t miopenSetOpArgsBatchNormInference(miopenOperatorArgs_t args, const miopenFusionOpDescriptor_t bnOp, const void *alpha, const void *beta, const void *bnScale, const void *bnBias, const void *estimatedMean, const void *estimatedVariance, double epsilon)
Sets the arguments for inference batch normalization op.
miopenStatus_t miopenSetOpArgsBatchNormForward(miopenOperatorArgs_t args, const miopenFusionOpDescriptor_t bnOp, const void *alpha, const void *beta, const void *bnScale, const void *bnBias, void *savedMean, void *savedInvVariance, void *runningMean, void *runningVariance, double expAvgFactor, double epsilon)
Sets the arguments for forward batch normalization op.
miopenStatus_t miopenExecuteFusionPlan(const miopenHandle_t handle, const miopenFusionPlanDescriptor_t fusePlanDesc, const miopenTensorDescriptor_t inputDesc, const void *input, const miopenTensorDescriptor_t outputDesc, void *output, miopenOperatorArgs_t args)
Executes the fusion plan. Only compatible with NHWC/NDHWC tensor layouts.
miopenStatus_t miopenFusionPlanGetOp(miopenFusionPlanDescriptor_t fusePlanDesc, const int op_idx, miopenFusionOpDescriptor_t *op)
Allows access to the operators in a fusion plan.
miopenStatus_t miopenDestroyFusionPlan(miopenFusionPlanDescriptor_t fusePlanDesc)
Destroy the fusion plan descriptor object.
miopenStatus_t miopenCreateOpActivationForward(miopenFusionPlanDescriptor_t fusePlanDesc, miopenFusionOpDescriptor_t *activFwdOp, miopenActivationMode_t mode)
Creates a forward activation operator.
miopenStatus_t miopenExecuteFusionPlan_v2(const miopenHandle_t handle, const miopenFusionPlanDescriptor_t fusePlanDesc, const miopenTensorDescriptor_t inputDesc, const void *input, const miopenTensorDescriptor_t outputDesc, void *output, miopenOperatorArgs_t args, void *workspace, size_t workspaceSize)
Executes the fusion plan with a workspace buffer for layout transformations.
miopenFusionDirection_t
Kernel fusion direction in the network.
Definition: miopen.h:3825
miopenStatus_t miopenSetOpArgsActivBackward(miopenOperatorArgs_t args, const miopenFusionOpDescriptor_t activBwdOp, const void *alpha, const void *beta, const void *y, const void *reserved, double activAlpha, double activBeta, double activGamma)
Sets the arguments for backward activation op.
miopenStatus_t miopenCompileFusionPlan(miopenHandle_t handle, miopenFusionPlanDescriptor_t fusePlanDesc)
Compiles the fusion plan.
miopenStatus_t miopenSetOpArgsBatchNormBackward(miopenOperatorArgs_t args, const miopenFusionOpDescriptor_t bnOp, const void *alpha, const void *beta, const void *x, const void *bnScale, const void *bnBias, void *resultBnScaleDiff, void *resultBnBiasDiff, const void *savedMean, const void *savedInvVariance)
Sets the arguments for backward batch normalization op.
miopenStatus_t miopenSetOpArgsActivForward(miopenOperatorArgs_t args, const miopenFusionOpDescriptor_t activFwdOp, const void *alpha, const void *beta, double activAlpha, double activBeta, double activGamma)
Sets the arguments for forward activation op.
miopenStatus_t miopenDestroyOperatorArgs(miopenOperatorArgs_t args)
Destroys an operator argument object.
miopenStatus_t miopenCreateOpBatchNormBackward(miopenFusionPlanDescriptor_t fusePlanDesc, miopenFusionOpDescriptor_t *bnBwdOp, const miopenBatchNormMode_t bn_mode)
Creates a back propagation batch normalization operator.
miopenStatus_t miopenCreateOperatorArgs(miopenOperatorArgs_t *args)
Creates an operator argument object.
miopenStatus_t miopenSetOpArgsConvForward(miopenOperatorArgs_t args, const miopenFusionOpDescriptor_t convOp, const void *alpha, const void *beta, const void *w)
Sets the arguments for forward convolution op.
miopenStatus_t miopenConvolutionBiasActivationForward(miopenHandle_t handle, const void *alpha1, const miopenTensorDescriptor_t xDesc, const void *x, const miopenTensorDescriptor_t wDesc, const void *w, const miopenConvolutionDescriptor_t convDesc, miopenConvFwdAlgorithm_t algo, void *workspace, size_t workspaceSizeInBytes, const void *alpha2, const miopenTensorDescriptor_t zDesc, const void *z, const miopenTensorDescriptor_t biasDesc, const void *bias, const miopenActivationDescriptor_t activationDesc, const miopenTensorDescriptor_t yDesc, void *y)
Prepares and executes the Convlution+Bias+Activation Fusion.
miopenStatus_t miopenCreateFusionPlan(miopenFusionPlanDescriptor_t *fusePlanDesc, const miopenFusionDirection_t fuseDirection, const miopenTensorDescriptor_t inputDesc)
Creates the kenrel fusion plan descriptor object.
miopenStatus_t miopenCreateOpBiasForward(miopenFusionPlanDescriptor_t fusePlanDesc, miopenFusionOpDescriptor_t *biasOp, const miopenTensorDescriptor_t bDesc)
Creates a forward bias operator.
@ miopenHorizontalFusion
Definition: miopen.h:3827
@ miopenVerticalFusion
Definition: miopen.h:3826
miopenStatus_t miopenSetLRNDescriptor(const miopenLRNDescriptor_t lrnDesc, miopenLRNMode_t mode, unsigned int lrnN, double lrnAlpha, double lrnBeta, double lrnK)
Sets a LRN layer descriptor details.
miopenStatus_t miopenCreateLRNDescriptor(miopenLRNDescriptor_t *lrnDesc)
Creates a local response normalization (LRN) layer descriptor.
miopenStatus_t miopenGetLRNDescriptor(const miopenLRNDescriptor_t lrnDesc, miopenLRNMode_t *mode, unsigned int *lrnN, double *lrnAlpha, double *lrnBeta, double *lrnK)
Gets a LRN layer descriptor details.
miopenStatus_t miopenLRNBackward(miopenHandle_t handle, const miopenLRNDescriptor_t lrnDesc, const void *alpha, const miopenTensorDescriptor_t yDesc, const void *y, const miopenTensorDescriptor_t dyDesc, const void *dy, const miopenTensorDescriptor_t xDesc, const void *x, const void *beta, const miopenTensorDescriptor_t dxDesc, void *dx, const void *workSpace)
Execute a LRN backward layer.
miopenStatus_t miopenLRNGetWorkSpaceSize(const miopenTensorDescriptor_t yDesc, size_t *workSpaceSize)
Determine the workspace requirements.
miopenLRNMode_t
Definition: miopen.h:461
miopenStatus_t miopenLRNForward(miopenHandle_t handle, const miopenLRNDescriptor_t lrnDesc, const void *alpha, const miopenTensorDescriptor_t xDesc, const void *x, const void *beta, const miopenTensorDescriptor_t yDesc, void *y, bool do_backward, void *workSpace)
Execute a LRN forward layer.
miopenStatus_t miopenDestroyLRNDescriptor(miopenLRNDescriptor_t lrnDesc)
Destroys the LRN descriptor object.
miopenStatus_t miopenGetCTCLossWorkspaceSize(miopenHandle_t handle, const miopenTensorDescriptor_t probsDesc, const miopenTensorDescriptor_t gradientsDesc, const int *labels, const int *labelLengths, const int *inputLengths, miopenCTCLossAlgo_t algo, const miopenCTCLossDescriptor_t ctcLossDesc, size_t *workSpaceSize)
Query the amount of memory required to execute miopenCTCLoss.
miopenStatus_t miopenGetSoftMarginLossForwardWorkspaceSize(miopenHandle_t handle, miopenTensorDescriptor_t inputDesc, miopenTensorDescriptor_t targetDesc, miopenTensorDescriptor_t outputDesc, miopenLossReductionMode_t reduction, size_t *sizeInBytes)
Helper function to query the minimum workspace size required by the SoftMarginLossForward call.
miopenLossReductionMode_t
Definition: miopen.h:7838
miopenStatus_t miopenGetMultiMarginLossForwardWorkspaceSize(miopenHandle_t handle, miopenTensorDescriptor_t inputDesc, miopenTensorDescriptor_t targetDesc, miopenTensorDescriptor_t weightDesc, miopenTensorDescriptor_t outputDesc, long p, float margin, miopenLossReductionMode_t reduction, size_t *sizeInBytes)
Helper function to query the minimum workspace size required by the MultiMarginLossForward call.
miopenCTCLossAlgo_t
Definition: miopen.h:5683
miopenStatus_t miopenSetCTCLossDescriptor(miopenCTCLossDescriptor_t ctcLossDesc, miopenDataType_t dataType, const int blank_label_id, bool apply_softmax_layer)
Set the details of a CTC loss function descriptor.
miopenStatus_t miopenCTCLoss(miopenHandle_t handle, const miopenTensorDescriptor_t probsDesc, const void *probs, const int *labels, const int *labelLengths, const int *inputLengths, void *losses, const miopenTensorDescriptor_t gradientsDesc, void *gradients, miopenCTCLossAlgo_t algo, const miopenCTCLossDescriptor_t ctcLossDesc, void *workSpace, size_t workSpaceSize)
Execute forward inference for CTCLoss layer.
miopenStatus_t miopenGetCTCLossDescriptor(miopenCTCLossDescriptor_t ctcLossDesc, miopenDataType_t *dataType, int *blank_label_id, bool *apply_softmax_layer)
Retrieves a CTC loss function descriptor's details.
miopenStatus_t miopenCreateCTCLossDescriptor(miopenCTCLossDescriptor_t *ctcLossDesc)
Create a CTC loss function Descriptor.
miopenStatus_t miopenMultiMarginLossForward(miopenHandle_t handle, miopenTensorDescriptor_t inputDesc, const void *input, miopenTensorDescriptor_t targetDesc, const void *target, miopenTensorDescriptor_t weightDesc, const void *weight, miopenTensorDescriptor_t outputDesc, void *output, long p, float margin, miopenLossReductionMode_t reduction, void *workspace, size_t workspaceSizeInBytes)
Execute a MultiMarginLoss forward layer.
miopenStatus_t miopenSoftMarginLossBackward(miopenHandle_t handle, miopenTensorDescriptor_t inputDesc, const void *input, miopenTensorDescriptor_t targetDesc, const void *target, miopenTensorDescriptor_t doutputDesc, const void *doutput, miopenTensorDescriptor_t dinputDesc, void *dinput, miopenLossReductionMode_t reduction)
Execute a SoftMarginLoss backward layer.
miopenStatus_t miopenSoftMarginLossForward(miopenHandle_t handle, miopenTensorDescriptor_t inputDesc, const void *input, miopenTensorDescriptor_t targetDesc, const void *target, miopenTensorDescriptor_t outputDesc, void *output, miopenLossReductionMode_t reduction, void *workspace=nullptr, size_t workspaceSizeInBytes=0)
Execute a SoftMarginLoss forward layer.
miopenStatus_t miopenDestroyCTCLossDescriptor(miopenCTCLossDescriptor_t ctcLossDesc)
Destroys a CTC loss function descriptor object.
@ MIOPEN_CTC_LOSS_ALGO_DETERMINISTIC
Definition: miopen.h:5684
miopenRNNMode_t
Definition: miopen.h:4288
miopenStatus_t miopenGetRNNDescriptor_V2(miopenRNNDescriptor_t rnnDesc, int *hiddenSize, int *layer, miopenDropoutDescriptor_t *dropoutDesc, miopenRNNInputMode_t *inputMode, miopenRNNDirectionMode_t *dirMode, miopenRNNMode_t *rnnMode, miopenRNNBiasMode_t *biasMode, miopenRNNAlgo_t *algoMode, miopenDataType_t *dataType)
Retrieves a RNN layer descriptor's details version 2. This version enables retrieving information of ...
miopenRNNInputMode_t
Definition: miopen.h:4299
miopenStatus_t miopenSetRNNLayerBias(miopenHandle_t handle, miopenRNNDescriptor_t rnnDesc, const int layer, miopenTensorDescriptor_t xDesc, miopenTensorDescriptor_t wDesc, void *w, const int biasID, miopenTensorDescriptor_t biasDesc, const void *layerBias)
Sets a bias for a specific layer in an RNN stack.
miopenStatus_t miopenRNNBackwardWeightsSeqTensor(miopenHandle_t handle, const miopenRNNDescriptor_t rnnDesc, const miopenSeqTensorDescriptor_t xDesc, const void *x, const miopenTensorDescriptor_t hDesc, const void *hx, const miopenSeqTensorDescriptor_t yDesc, const void *y, void *dw, size_t weightSpaceSize, void *workSpace, size_t workSpaceNumBytes, const void *reserveSpace, size_t reserveSpaceNumBytes)
Execute backward weights for recurrent layer.
miopenStatus_t miopenGetRNNParamsSize(miopenHandle_t handle, miopenRNNDescriptor_t rnnDesc, miopenTensorDescriptor_t xDesc, size_t *numBytes, miopenDataType_t dtype)
Query the amount of parameter memory required for RNN training.
miopenStatus_t miopenGetRNNLayerBiasOffset(miopenRNNDescriptor_t rnnDesc, const int layer, miopenTensorDescriptor_t xDesc, const int biasID, miopenTensorDescriptor_t biasDesc, size_t *layerBiasOffset)
Gets a bias index offset for a specific layer in an RNN stack.
miopenStatus_t miopenGetRNNLayerParamOffset(miopenRNNDescriptor_t rnnDesc, const int layer, miopenTensorDescriptor_t xDesc, const int paramID, miopenTensorDescriptor_t paramDesc, size_t *layerParamOffset)
Gets an index offset for a specific weight matrix for a layer in the RNN stack.
miopenRNNDirectionMode_t
Definition: miopen.h:4321
miopenStatus_t miopenRNNBackwardSeqData(miopenHandle_t handle, const miopenRNNDescriptor_t rnnDesc, const miopenSeqTensorDescriptor_t yDesc, const void *y, const void *dy, const miopenTensorDescriptor_t hDesc, const void *hx, const void *dhy, void *dhx, const miopenTensorDescriptor_t cDesc, const void *cx, const void *dcy, void *dcx, const miopenSeqTensorDescriptor_t xDesc, void *dx, const void *w, size_t weightSpaceSize, void *workSpace, size_t workSpaceNumBytes, void *reserveSpace, size_t reserveSpaceNumBytes)
Execute backward data for recurrent layer.
miopenRNNBiasMode_t
Definition: miopen.h:4330
miopenStatus_t miopenGetRNNLayerBias(miopenHandle_t handle, miopenRNNDescriptor_t rnnDesc, const int layer, miopenTensorDescriptor_t xDesc, miopenTensorDescriptor_t wDesc, const void *w, const int biasID, miopenTensorDescriptor_t biasDesc, void *layerBias)
Gets a bias for a specific layer in an RNN stack.
miopenStatus_t miopenRNNBackwardWeights(miopenHandle_t handle, const miopenRNNDescriptor_t rnnDesc, const int sequenceLen, const miopenTensorDescriptor_t *xDesc, const void *x, const miopenTensorDescriptor_t hxDesc, const void *hx, const miopenTensorDescriptor_t *yDesc, const void *y, const miopenTensorDescriptor_t dwDesc, void *dw, void *workSpace, size_t workSpaceNumBytes, const void *reserveSpace, size_t reserveSpaceNumBytes)
Execute backward weights for recurrent layer.
miopenStatus_t miopenGetRNNTrainingReserveSize(miopenHandle_t handle, miopenRNNDescriptor_t rnnDesc, const int sequenceLen, const miopenTensorDescriptor_t *xDesc, size_t *numBytes)
Query the amount of memory required for RNN training.
miopenStatus_t miopenGetRNNLayerBiasSize(miopenHandle_t handle, miopenRNNDescriptor_t rnnDesc, const int layer, const int biasID, size_t *numBytes)
Gets the number of bytes of a bias.
miopenStatus_t miopenSetRNNDescriptor(miopenRNNDescriptor_t rnnDesc, const int hsize, const int nlayers, miopenRNNInputMode_t inMode, miopenRNNDirectionMode_t direction, miopenRNNMode_t rnnMode, miopenRNNBiasMode_t biasMode, miopenRNNAlgo_t algo, miopenDataType_t dataType)
Set the details of the RNN descriptor.
miopenRNNBaseLayout_t
Definition: miopen.h:4365
miopenStatus_t miopenRNNBackwardData(miopenHandle_t handle, const miopenRNNDescriptor_t rnnDesc, const int sequenceLen, const miopenTensorDescriptor_t *yDesc, const void *y, const miopenTensorDescriptor_t *dyDesc, const void *dy, const miopenTensorDescriptor_t dhyDesc, const void *dhy, const miopenTensorDescriptor_t dcyDesc, const void *dcy, const miopenTensorDescriptor_t wDesc, const void *w, const miopenTensorDescriptor_t hxDesc, const void *hx, const miopenTensorDescriptor_t cxDesc, const void *cx, const miopenTensorDescriptor_t *dxDesc, void *dx, const miopenTensorDescriptor_t dhxDesc, void *dhx, const miopenTensorDescriptor_t dcxDesc, void *dcx, void *workSpace, size_t workSpaceNumBytes, void *reserveSpace, size_t reserveSpaceNumBytes)
Execute backward data for recurrent layer.
miopenStatus_t miopenGetRNNDescriptor(miopenRNNDescriptor_t rnnDesc, miopenRNNMode_t *rnnMode, miopenRNNAlgo_t *algoMode, miopenRNNInputMode_t *inputMode, miopenRNNDirectionMode_t *dirMode, miopenRNNBiasMode_t *biasMode, int *hiddenSize, int *layer)
Retrieves a RNN layer descriptor's details.
miopenRNNAlgo_t
Definition: miopen.h:4308
miopenStatus_t miopenGetRNNTempSpaceSizes(miopenHandle_t handle, miopenRNNDescriptor_t rnnDesc, miopenSeqTensorDescriptor_t xDesc, miopenRNNFWDMode_t fwdMode, size_t *workSpaceSize, size_t *reserveSpaceSize)
Query the amount of additional memory required for this RNN layer execution.
miopenStatus_t miopenGetRNNPaddingMode(miopenRNNDescriptor_t rnnDesc, miopenRNNPaddingMode_t *paddingMode)
This function retrieves the RNN padding mode from the RNN descriptor.
miopenStatus_t miopenRNNForward(miopenHandle_t handle, const miopenRNNDescriptor_t rnnDesc, miopenRNNFWDMode_t fwdMode, const miopenSeqTensorDescriptor_t xDesc, const void *x, const miopenTensorDescriptor_t hDesc, const void *hx, void *hy, const miopenTensorDescriptor_t cDesc, const void *cx, void *cy, const miopenSeqTensorDescriptor_t yDesc, void *y, const void *w, size_t weightSpaceSize, void *workSpace, size_t workSpaceNumBytes, void *reserveSpace, size_t reserveSpaceNumBytes)
Execute forward training for recurrent layer.
miopenStatus_t miopenGetRNNDataSeqTensorDescriptor(miopenSeqTensorDescriptor_t seqTensorDesc, miopenDataType_t *dataType, miopenRNNBaseLayout_t *layout, int *maxSequenceLen, int *batchSize, int *vectorSize, int sequenceLenArrayLimit, int *sequenceLenArray, void *paddingMarker)
Get shape of RNN seqData tensor.
miopenStatus_t miopenGetRNNInputTensorSize(miopenHandle_t handle, miopenRNNDescriptor_t rnnDesc, const int seqLen, miopenTensorDescriptor_t *xDesc, size_t *numBytes)
Obtain the size in bytes of the RNN input tensor.
miopenRNNFWDMode_t
Definition: miopen.h:4356
miopenStatus_t miopenSetRNNLayerParam(miopenHandle_t handle, miopenRNNDescriptor_t rnnDesc, const int layer, miopenTensorDescriptor_t xDesc, miopenTensorDescriptor_t wDesc, void *w, const int paramID, miopenTensorDescriptor_t paramDesc, const void *layerParam)
Sets a weight matrix for a specific layer in an RNN stack.
miopenStatus_t miopenGetRNNLayerParamSize(miopenHandle_t handle, miopenRNNDescriptor_t rnnDesc, const int layer, miopenTensorDescriptor_t xDesc, const int paramID, size_t *numBytes)
Gets the number of bytes of a parameter matrix.
miopenStatus_t miopenCreateRNNDescriptor(miopenRNNDescriptor_t *rnnDesc)
Create a RNN layer Descriptor.
miopenRNNGEMMalgoMode_t
Definition: miopen.h:4339
miopenStatus_t miopenGetRNNLayerParam(miopenHandle_t handle, miopenRNNDescriptor_t rnnDesc, const int layer, miopenTensorDescriptor_t xDesc, miopenTensorDescriptor_t wDesc, const void *w, const int paramID, miopenTensorDescriptor_t paramDesc, void *layerParam)
Gets a weight matrix for a specific layer in an RNN stack.
miopenStatus_t miopenGetRNNWorkspaceSize(miopenHandle_t handle, const miopenRNNDescriptor_t rnnDesc, const int sequenceLen, const miopenTensorDescriptor_t *xDesc, size_t *numBytes)
Query the amount of memory required to execute the RNN layer.
miopenStatus_t miopenSetRNNDataSeqTensorDescriptor(miopenSeqTensorDescriptor_t seqTensorDesc, miopenDataType_t dataType, miopenRNNBaseLayout_t layout, int maxSequenceLen, int batchSize, int vectorSize, const int *sequenceLenArray, void *paddingMarker)
Set shape of RNN seqData tensor.
miopenStatus_t miopenGetRNNParamsDescriptor(miopenHandle_t handle, miopenRNNDescriptor_t rnnDesc, miopenTensorDescriptor_t xDesc, miopenTensorDescriptor_t wDesc, miopenDataType_t dtype)
Obtain a weight tensor descriptor for RNNs.
miopenStatus_t miopenRNNForwardTraining(miopenHandle_t handle, const miopenRNNDescriptor_t rnnDesc, const int sequenceLen, const miopenTensorDescriptor_t *xDesc, const void *x, const miopenTensorDescriptor_t hxDesc, const void *hx, const miopenTensorDescriptor_t cxDesc, const void *cx, const miopenTensorDescriptor_t wDesc, const void *w, const miopenTensorDescriptor_t *yDesc, void *y, const miopenTensorDescriptor_t hyDesc, void *hy, const miopenTensorDescriptor_t cyDesc, void *cy, void *workSpace, size_t workSpaceNumBytes, void *reserveSpace, size_t reserveSpaceNumBytes)
Execute forward training for recurrent layer.
miopenRNNPaddingMode_t
Definition: miopen.h:4347
miopenStatus_t miopenSetRNNDescriptor_V2(miopenRNNDescriptor_t rnnDesc, const int hsize, const int nlayers, miopenDropoutDescriptor_t dropoutDesc, miopenRNNInputMode_t inMode, miopenRNNDirectionMode_t direction, miopenRNNMode_t rnnMode, miopenRNNBiasMode_t biasMode, miopenRNNAlgo_t algo, miopenDataType_t dataType)
Set the details of the RNN descriptor version 2. This version enables the use of dropout in rnn.
miopenStatus_t miopenDestroyRNNDescriptor(miopenRNNDescriptor_t rnnDesc)
Destroys the tensor descriptor object.
miopenStatus_t miopenGetRNNHiddenTensorSize(miopenHandle_t handle, miopenRNNDescriptor_t rnnDesc, const int seqLen, miopenTensorDescriptor_t *xDesc, size_t *numBytes)
Obtain the size in bytes of the RNN hidden tensor.
miopenStatus_t miopenSetRNNPaddingMode(miopenRNNDescriptor_t rnnDesc, miopenRNNPaddingMode_t paddingMode)
Sets a bias for a specific layer in an RNN stack.
miopenStatus_t miopenRNNForwardInference(miopenHandle_t handle, miopenRNNDescriptor_t rnnDesc, const int sequenceLen, const miopenTensorDescriptor_t *xDesc, const void *x, const miopenTensorDescriptor_t hxDesc, const void *hx, const miopenTensorDescriptor_t cxDesc, const void *cx, const miopenTensorDescriptor_t wDesc, const void *w, const miopenTensorDescriptor_t *yDesc, void *y, const miopenTensorDescriptor_t hyDesc, void *hy, const miopenTensorDescriptor_t cyDesc, void *cy, void *workSpace, size_t workSpaceNumBytes)
Execute forward inference for RNN layer.
@ miopenRNNTANH
Definition: miopen.h:4290
@ miopenLSTM
Definition: miopen.h:4291
@ miopenGRU
Definition: miopen.h:4292
@ miopenRNNRELU
Definition: miopen.h:4289
@ miopenRNNlinear
Definition: miopen.h:4300
@ miopenRNNskip
Definition: miopen.h:4301
@ miopenRNNbidirection
Definition: miopen.h:4323
@ miopenRNNunidirection
Definition: miopen.h:4322
@ miopenRNNwithBias
Definition: miopen.h:4332
@ miopenRNNNoBias
Definition: miopen.h:4331
@ miopenRNNDataSeqMajorPadded
Definition: miopen.h:4368
@ miopenRNNDataSeqMajorNotPadded
Definition: miopen.h:4367
@ miopenRNNDataBatchMajorPadded
Definition: miopen.h:4369
@ miopenRNNDataUnknownLayout
Definition: miopen.h:4366
@ miopenRNNroundedDynamic
Definition: miopen.h:4313
@ miopenRNNfundamental
Definition: miopen.h:4311
@ miopenRNNdefault
Definition: miopen.h:4309
@ miopenRNNTraining
Definition: miopen.h:4357
@ miopenRNNInference
Definition: miopen.h:4358
@ miopenRNNAlgoGEMM
Definition: miopen.h:4340
@ miopenRNNIOWithPadding
Definition: miopen.h:4349
@ miopenRNNIONotPadded
Definition: miopen.h:4348
miopenStatus_t miopenGetPReLUBackwardWorkspaceSize(miopenHandle_t handle, miopenTensorDescriptor_t inputDesc, miopenTensorDescriptor_t weightDesc, size_t *sizeInBytes)
Helper function to query the minimum workspace size required by the PReLU backward call.
miopenStatus_t miopenPReLUBackward(miopenHandle_t handle, void *workspace, size_t workspaceSizeInBytes, miopenTensorDescriptor_t inputDesc, const void *input, miopenTensorDescriptor_t weightDesc, const void *weight, miopenTensorDescriptor_t doutputDesc, const void *doutput, miopenTensorDescriptor_t dinputDesc, void *dinput, miopenTensorDescriptor_t dweightDesc, void *dweight)
Execute a PReLU backward layer.
miopenStatus_t miopenReduceExtremeForward(miopenHandle_t handle, const miopenTensorDescriptor_t xDesc, const void *x, const int32_t dim, const miopenReduceExtremeOp_t reduceExtremeOp, const miopenTensorDescriptor_t yDesc, void *y, const miopenTensorDescriptor_t indiceDesc, void *indice)
Find the the extreme (minimum, maximum) value and index of a tensor across Dimension.
miopenReduceExtremeOp_t
Definition: miopen.h:6727
miopenStatus_t miopenRoPEBackward(miopenHandle_t handle, const miopenTensorDescriptor_t dyDesc, const void *dy, const miopenTensorDescriptor_t cosDesc, const void *cos, const miopenTensorDescriptor_t sinDesc, const void *sin, const miopenTensorDescriptor_t dxDesc, void *dx)
Execute a rope backward layer.
miopenStatus_t miopenRoPEForward(miopenHandle_t handle, const miopenTensorDescriptor_t xDesc, const void *x, const miopenTensorDescriptor_t cosDesc, const void *cos, const miopenTensorDescriptor_t sinDesc, const void *sin, const miopenTensorDescriptor_t yDesc, void *y)
Execute a rope forward layer.
miopenStatus_t miopenFusedAdam(miopenHandle_t handle, const miopenTensorDescriptor_t paramDesc, void *param, const miopenTensorDescriptor_t gradDesc, const void *grad, const miopenTensorDescriptor_t expAvgDesc, void *expAvg, const miopenTensorDescriptor_t expAvgSqDesc, void *expAvgSq, const miopenTensorDescriptor_t maxExpAvgSqDesc, void *maxExpAvgSq, const miopenTensorDescriptor_t stateStepDesc, void *stateStep, const unsigned int state_step, const float lr, const float beta1, const float beta2, const float weight_decay, const float eps, const bool amsgrad, const bool maximize, const bool adamw, const miopenTensorDescriptor_t gradScaleDesc, const void *gradScale, const miopenTensorDescriptor_t foundInfDesc, const void *foundInf)
Perform Fused Adam optimization for a single tensor (Adaptive Moment Estimation).
miopenStatus_t miopenTransformersAdamW(miopenHandle_t handle, const miopenTensorDescriptor_t paramDesc, void *param, const miopenTensorDescriptor_t gradDesc, const void *grad, const miopenTensorDescriptor_t expAvgDesc, void *expAvg, const miopenTensorDescriptor_t expAvgSqDesc, void *expAvgSq, const miopenTensorDescriptor_t stateStepDesc, void *stateStep, const unsigned int state_step, const float lr, const float beta1, const float beta2, const float weight_decay, const float eps, const bool correct_bias, const miopenTensorDescriptor_t gradScaleDesc, const void *gradScale, const miopenTensorDescriptor_t foundInfDesc, const void *foundInf)
Implements Adam algorithm with weight decay fix as introduced in Decoupled Weight Decay Regularizatio...
miopenStatus_t miopenFusedAdamWithOutput(miopenHandle_t handle, const miopenTensorDescriptor_t paramInDesc, void *paramIn, const miopenTensorDescriptor_t paramOutDesc, void *paramOut, const miopenTensorDescriptor_t paramOutFloat16Desc, void *paramOutFloat16, const miopenTensorDescriptor_t gradInDesc, const void *gradIn, const miopenTensorDescriptor_t expAvgInDesc, void *expAvgIn, const miopenTensorDescriptor_t expAvgOutDesc, void *expAvgOut, const miopenTensorDescriptor_t expAvgSqInDesc, void *expAvgSqIn, const miopenTensorDescriptor_t expAvgSqOutDesc, void *expAvgSqOut, const miopenTensorDescriptor_t maxExpAvgSqInDesc, void *maxExpAvgSqIn, const miopenTensorDescriptor_t maxExpAvgSqOutDesc, void *maxExpAvgSqOut, const miopenTensorDescriptor_t stateStepInDesc, void *stateStepIn, const miopenTensorDescriptor_t stateStepOutDesc, void *stateStepOut, const unsigned int state_step, const float lr, const float beta1, const float beta2, const float weight_decay, const float eps, const bool amsgrad, const bool maximize, const bool adamw, const miopenTensorDescriptor_t gradScaleDesc, const void *gradScale, const miopenTensorDescriptor_t foundInfDesc, const void *foundInf)
Execute single tensor Adam optimization and receive the result in a separate output tensor.
miopenStatus_t miopenTransformersAdamWWithOutput(miopenHandle_t handle, const miopenTensorDescriptor_t paramInDesc, void *paramIn, const miopenTensorDescriptor_t paramOutDesc, void *paramOut, const miopenTensorDescriptor_t paramOutFloat16Desc, void *paramOutFloat16, const miopenTensorDescriptor_t gradInDesc, const void *gradIn, const miopenTensorDescriptor_t expAvgInDesc, void *expAvgIn, const miopenTensorDescriptor_t expAvgOutDesc, void *expAvgOut, const miopenTensorDescriptor_t expAvgSqInDesc, void *expAvgSqIn, const miopenTensorDescriptor_t expAvgSqOutDesc, void *expAvgSqOut, const miopenTensorDescriptor_t stateStepInDesc, void *stateStepIn, const miopenTensorDescriptor_t stateStepOutDesc, void *stateStepOut, const unsigned int state_step, const float lr, const float beta1, const float beta2, const float weight_decay, const float eps, const float step_size, const bool correct_bias, const miopenTensorDescriptor_t gradScaleDesc, const void *gradScale, const miopenTensorDescriptor_t foundInfDesc, const void *foundInf)
Execute single tensor Adam optimization and receive the result in a separate output tensor.
miopenReduceTensorIndices_t
Definition: miopen.h:589
miopenStatus_t miopenSetReduceTensorDescriptor(miopenReduceTensorDescriptor_t reduceTensorDesc, miopenReduceTensorOp_t reduceTensorOp, miopenDataType_t reduceTensorCompType, miopenNanPropagation_t reduceTensorNanOpt, miopenReduceTensorIndices_t reduceTensorIndices, miopenIndicesType_t reduceTensorIndicesType)
Initialize a ReduceTensor descriptor object.
miopenStatus_t miopenCreateReduceTensorDescriptor(miopenReduceTensorDescriptor_t *reduceTensorDesc)
Creates the ReduceTensor descriptor object.
miopenReduceTensorOp_t
Definition: miopen.h:554
miopenIndicesType_t
Definition: miopen.h:599
miopenStatus_t miopenReduceTensor(miopenHandle_t handle, const miopenReduceTensorDescriptor_t reduceTensorDesc, void *indices, size_t indicesSizeInBytes, void *workspace, size_t workspaceSizeInBytes, const void *alpha, const miopenTensorDescriptor_t aDesc, const void *A, const void *beta, const miopenTensorDescriptor_t cDesc, void *C)
TensorReduce function doing reduction on tensor A by implementing C = alpha * reduceOp(A)
miopenStatus_t miopenGetReductionIndicesSize(miopenHandle_t handle, const miopenReduceTensorDescriptor_t reduceTensorDesc, const miopenTensorDescriptor_t aDesc, const miopenTensorDescriptor_t cDesc, size_t *sizeInBytes)
Helper function to query the minimum index space size required by the ReduceTensor call.
miopenStatus_t miopenDestroyReduceTensorDescriptor(miopenReduceTensorDescriptor_t reduceTensorDesc)
Destroy the ReduceTensor descriptor object.
miopenStatus_t miopenGetReductionWorkspaceSize(miopenHandle_t handle, const miopenReduceTensorDescriptor_t reduceTensorDesc, const miopenTensorDescriptor_t aDesc, const miopenTensorDescriptor_t cDesc, size_t *sizeInBytes)
Helper function to query the minimum workspace size required by the ReduceTensor call.
miopenStatus_t miopenGetReduceTensorDescriptor(const miopenReduceTensorDescriptor_t reduceTensorDesc, miopenReduceTensorOp_t *reduceTensorOp, miopenDataType_t *reduceTensorCompType, miopenNanPropagation_t *reduceTensorNanOpt, miopenReduceTensorIndices_t *reduceTensorIndices, miopenIndicesType_t *reduceTensorIndicesType)
Query a ReduceTensor descriptor object.
miopenStatus_t miopenActivationForward(miopenHandle_t handle, const miopenActivationDescriptor_t activDesc, const void *alpha, const miopenTensorDescriptor_t xDesc, const void *x, const void *beta, const miopenTensorDescriptor_t yDesc, void *y)
Execute an activation forward layer.
miopenStatus_t miopenGLUBackward(miopenHandle_t handle, const miopenTensorDescriptor_t inputDesc, const void *input, const miopenTensorDescriptor_t outputGradDesc, const void *outputGrad, const miopenTensorDescriptor_t inputGradDesc, void *inputGrad, const uint32_t dim)
Execute a GLU backward layer.
miopenStatus_t miopenActivationBackward(miopenHandle_t handle, const miopenActivationDescriptor_t activDesc, const void *alpha, const miopenTensorDescriptor_t yDesc, const void *y, const miopenTensorDescriptor_t dyDesc, const void *dy, const miopenTensorDescriptor_t xDesc, const void *x, const void *beta, const miopenTensorDescriptor_t dxDesc, void *dx)
Execute a activation backwards layer.
miopenActivationMode_t
Definition: miopen.h:500
miopenStatus_t miopenSetActivationDescriptor(const miopenActivationDescriptor_t activDesc, miopenActivationMode_t mode, double activAlpha, double activBeta, double activGamma)
Sets the activation layer descriptor details.
miopenStatus_t miopenCreateActivationDescriptor(miopenActivationDescriptor_t *activDesc)
Creates the Activation descriptor object.
miopenStatus_t miopenGetActivationDescriptor(const miopenActivationDescriptor_t activDesc, miopenActivationMode_t *mode, double *activAlpha, double *activBeta, double *activGamma)
Gets the activation layer descriptor details.
miopenStatus_t miopenGLUForward(miopenHandle_t handle, const miopenTensorDescriptor_t inputDesc, const void *input, const miopenTensorDescriptor_t outputDesc, void *output, const uint32_t dim)
Execute a GLU forward layer.
miopenStatus_t miopenDestroyActivationDescriptor(miopenActivationDescriptor_t activDesc)
Destroys the activation descriptor object.
miopenStatus_t miopenBatchNormalizationForwardInference(miopenHandle_t handle, miopenBatchNormMode_t bn_mode, void *alpha, void *beta, const miopenTensorDescriptor_t xDesc, const void *x, const miopenTensorDescriptor_t yDesc, void *y, const miopenTensorDescriptor_t bnScaleBiasMeanVarDesc, void *bnScale, void *bnBias, void *estimatedMean, void *estimatedVariance, double epsilon)
Execute forward inference layer for batch normalization.
miopenStatus_t miopenBatchNormalizationForwardInference_V2(miopenHandle_t handle, miopenBatchNormMode_t bn_mode, void *alpha, void *beta, const miopenTensorDescriptor_t xDesc, const void *x, const miopenTensorDescriptor_t yDesc, void *y, const miopenTensorDescriptor_t scaleDesc, const miopenTensorDescriptor_t biasDesc, const miopenTensorDescriptor_t estMeanDesc, const miopenTensorDescriptor_t estVarianceDesc, void *bnScale, void *bnBias, void *estimatedMean, void *estimatedVariance, double epsilon)
Execute forward inference layer for batch normalization.
miopenStatus_t miopenBatchNormForwardTrainingActivation(miopenHandle_t handle, miopenBatchNormMode_t bn_mode, void *alpha, void *beta, const miopenTensorDescriptor_t xDesc, const void *x, const miopenTensorDescriptor_t yDesc, void *y, const miopenTensorDescriptor_t scaleDesc, const miopenTensorDescriptor_t biasVarDesc, const miopenTensorDescriptor_t savedMeanDesc, const miopenTensorDescriptor_t savedVarDesc, void *bnScale, void *bnBias, double expAvgFactor, void *resultRunningMean, void *resultRunningVariance, double epsilon, void *resultSaveMean, void *resultSaveInvVariance, const miopenActivationDescriptor_t activDesc)
Execute forward training layer for batch normalization with fused activation.
miopenStatus_t miopenBatchNormBackwardActivation(miopenHandle_t handle, miopenBatchNormMode_t bn_mode, const void *alphaDataDiff, const void *betaDataDiff, const void *alphaParamDiff, const void *betaParamDiff, const miopenTensorDescriptor_t xDesc, const void *x, const miopenTensorDescriptor_t dyDesc, const void *dy, const miopenTensorDescriptor_t dxDesc, void *dx, const miopenTensorDescriptor_t scaleDesc, const miopenTensorDescriptor_t biasDesc, const miopenTensorDescriptor_t savedMeanDesc, const miopenTensorDescriptor_t savedVarianceDesc, const void *bnScale, const void *bnBias, void *resultBnScaleDiff, void *resultBnBiasDiff, double epsilon, const void *savedMean, const void *savedInvVariance, const miopenActivationDescriptor_t activDesc)
Execute backwards propagation layer for batch normalization with fused activation.
miopenStatus_t miopenBatchNormalizationForwardTraining_V3(miopenHandle_t handle, miopenBatchNormMode_t bn_mode, void *alpha, void *beta, const miopenTensorDescriptor_t xDesc, const void *x, const miopenTensorDescriptor_t yDesc, void *y, const miopenTensorDescriptor_t scaleDesc, const miopenTensorDescriptor_t biasVarDesc, const miopenTensorDescriptor_t savedMeanDesc, const miopenTensorDescriptor_t savedVarDesc, void *bnScale, void *bnBias, double expAvgFactor, const void *prevResultRunningMean, const void *prevResultRunningVariance, void *nextResultRunningMean, void *nextResultRunningVariance, double epsilon, void *resultSaveMean, void *resultSaveInvVariance)
Execute forward training layer for batch normalization.
miopenStatus_t miopenBatchNormalizationForwardTraining(miopenHandle_t handle, miopenBatchNormMode_t bn_mode, void *alpha, void *beta, const miopenTensorDescriptor_t xDesc, const void *x, const miopenTensorDescriptor_t yDesc, void *y, const miopenTensorDescriptor_t bnScaleBiasMeanVarDesc, void *bnScale, void *bnBias, double expAvgFactor, void *resultRunningMean, void *resultRunningVariance, double epsilon, void *resultSaveMean, void *resultSaveInvVariance)
Execute forward training layer for batch normalization.
miopenStatus_t miopenBatchNormalizationForwardTraining_V2(miopenHandle_t handle, miopenBatchNormMode_t bn_mode, void *alpha, void *beta, const miopenTensorDescriptor_t xDesc, const void *x, const miopenTensorDescriptor_t yDesc, void *y, const miopenTensorDescriptor_t scaleDesc, const miopenTensorDescriptor_t biasVarDesc, const miopenTensorDescriptor_t savedMeanDesc, const miopenTensorDescriptor_t savedVarDesc, void *bnScale, void *bnBias, double expAvgFactor, void *resultRunningMean, void *resultRunningVariance, double epsilon, void *resultSaveMean, void *resultSaveInvVariance)
Execute forward training layer for batch normalization.
miopenStatus_t miopenBatchNormalizationBackward_V2(miopenHandle_t handle, miopenBatchNormMode_t bn_mode, const void *alphaDataDiff, const void *betaDataDiff, const void *alphaParamDiff, const void *betaParamDiff, const miopenTensorDescriptor_t xDesc, const void *x, const miopenTensorDescriptor_t dyDesc, const void *dy, const miopenTensorDescriptor_t dxDesc, void *dx, const miopenTensorDescriptor_t scaleDesc, const miopenTensorDescriptor_t biasDesc, const miopenTensorDescriptor_t savedMeanDesc, const miopenTensorDescriptor_t savedVarDesc, const void *bnScale, void *resultBnScaleDiff, void *resultBnBiasDiff, double epsilon, const void *savedMean, const void *savedInvVariance)
Execute backwards propagation layer for batch normalization.
miopenStatus_t miopenBatchNormForwardTrainingActivation_V2(miopenHandle_t handle, miopenBatchNormMode_t bn_mode, void *alpha, void *beta, const miopenTensorDescriptor_t xDesc, const void *x, const miopenTensorDescriptor_t yDesc, void *y, const miopenTensorDescriptor_t scaleDesc, const miopenTensorDescriptor_t biasVarDesc, const miopenTensorDescriptor_t savedMeanDesc, const miopenTensorDescriptor_t savedVarDesc, void *bnScale, void *bnBias, double expAvgFactor, const void *prevResultRunningMean, const void *prevResultRunningVariance, void *nextResultRunningMean, void *nextResultRunningVariance, double epsilon, void *resultSaveMean, void *resultSaveInvVariance, const miopenActivationDescriptor_t activDesc)
Execute forward training layer for batch normalization with fused activation.
miopenStatus_t miopenBatchNormalizationForwardInferenceInvVariance(miopenHandle_t handle, miopenBatchNormMode_t bn_mode, void *alpha, void *beta, const miopenTensorDescriptor_t xDesc, const void *x, const miopenTensorDescriptor_t yDesc, void *y, const miopenTensorDescriptor_t scaleDesc, const miopenTensorDescriptor_t biasDesc, const miopenTensorDescriptor_t estMeanDesc, const miopenTensorDescriptor_t estInvVarianceDesc, void *bnScale, void *bnBias, void *estimatedMean, void *estimatedInvVariance)
Execute forward inference layer for batch normalization using inverse variance.
miopenStatus_t miopenBatchNormalizationBackward(miopenHandle_t handle, miopenBatchNormMode_t bn_mode, const void *alphaDataDiff, const void *betaDataDiff, const void *alphaParamDiff, const void *betaParamDiff, const miopenTensorDescriptor_t xDesc, const void *x, const miopenTensorDescriptor_t dyDesc, const void *dy, const miopenTensorDescriptor_t dxDesc, void *dx, const miopenTensorDescriptor_t bnScaleBiasDiffDesc, const void *bnScale, void *resultBnScaleDiff, void *resultBnBiasDiff, double epsilon, const void *savedMean, const void *savedInvVariance)
Execute backwards propagation layer for batch normalization.
miopenBatchNormMode_t
Definition: miopen.h:490
miopenStatus_t miopenBatchNormForwardInferenceActivation(miopenHandle_t handle, miopenBatchNormMode_t bn_mode, void *alpha, void *beta, const miopenTensorDescriptor_t xDesc, const void *x, const miopenTensorDescriptor_t yDesc, void *y, const miopenTensorDescriptor_t scaleDesc, const miopenTensorDescriptor_t biasDesc, const miopenTensorDescriptor_t estMeanDesc, const miopenTensorDescriptor_t estVarianceDesc, void *bnScale, void *bnBias, void *estimatedMean, void *estimatedVariance, double epsilon, const miopenActivationDescriptor_t activDesc)
Execute forward inference layer for batch normalization with fused activation.
miopenStatus_t miopenBatchNormForwardInferenceActivationInvVariance(miopenHandle_t handle, miopenBatchNormMode_t bn_mode, void *alpha, void *beta, const miopenTensorDescriptor_t xDesc, const void *x, const miopenTensorDescriptor_t yDesc, void *y, const miopenTensorDescriptor_t scaleDesc, const miopenTensorDescriptor_t biasDesc, const miopenTensorDescriptor_t estMeanDesc, const miopenTensorDescriptor_t estInvVarianceDesc, void *bnScale, void *bnBias, void *estimatedMean, void *estimatedInvVariance, const miopenActivationDescriptor_t activDesc)
Execute forward inference layer for batch normalization with fused activation using inverse variance.
miopenStatus_t miopenDeriveBNTensorDescriptor(miopenTensorDescriptor_t derivedBnDesc, const miopenTensorDescriptor_t xDesc, miopenBatchNormMode_t bn_mode)
Derive tensor for gamma and beta from input tensor descriptor.
miopenStatus_t miopenCatForward(miopenHandle_t handle, const int32_t xCount, const miopenTensorDescriptor_t *xDescs, const void *const *xs, const miopenTensorDescriptor_t yDesc, void *y, const int32_t dim)
Execute a cat forward layer.
miopenStatus_t miopenInitConvolutionNdDescriptor(miopenConvolutionDescriptor_t convDesc, int spatialDim, const int *padA, const int *strideA, const int *dilationA, miopenConvolutionMode_t c_mode)
Creates a N-dimensional convolution layer descriptor.
miopenStatus_t miopenConvolutionForwardImmediate(miopenHandle_t handle, const miopenTensorDescriptor_t wDesc, const void *w, const miopenTensorDescriptor_t xDesc, const void *x, const miopenConvolutionDescriptor_t convDesc, const miopenTensorDescriptor_t yDesc, void *y, void *workSpace, size_t workSpaceSize, const uint64_t solution_id)
Executes the Forward convolution operation based on the provided solution ID.
miopenStatus_t miopenConvolutionBackwardDataCompileSolution(miopenHandle_t handle, const miopenTensorDescriptor_t dyDesc, const miopenTensorDescriptor_t wDesc, const miopenConvolutionDescriptor_t convDesc, const miopenTensorDescriptor_t dxDesc, const uint64_t solution_id)
Compiles the solution provided by the user, this solution may be acquired by the miopenConvolutionBac...
miopenStatus_t miopenConvolutionForwardGetSolutionWorkspaceSize(miopenHandle_t handle, const miopenTensorDescriptor_t wDesc, const miopenTensorDescriptor_t xDesc, const miopenConvolutionDescriptor_t convDesc, const miopenTensorDescriptor_t yDesc, const uint64_t solution_id, size_t *workSpaceSize)
Returns the workspace size required for a particular solution id.
miopenStatus_t miopenConvolutionForward(miopenHandle_t handle, const void *alpha, const miopenTensorDescriptor_t xDesc, const void *x, const miopenTensorDescriptor_t wDesc, const void *w, const miopenConvolutionDescriptor_t convDesc, miopenConvFwdAlgorithm_t algo, const void *beta, const miopenTensorDescriptor_t yDesc, void *y, void *workSpace, size_t workSpaceSize)
Execute a forward convolution layer.
miopenStatus_t miopenGetConvolutionGroupCount(miopenConvolutionDescriptor_t convDesc, int *groupCount)
Get the number of groups to be used in Group/Depthwise convolution.
miopenStatus_t miopenGetConvolutionFindMode(const miopenConvolutionDescriptor_t convDesc, miopenConvolutionFindMode_t *findMode)
Reads the Find Mode attribute from the convolution descriptor.
miopenStatus_t miopenInitConvolutionDescriptor(miopenConvolutionDescriptor_t convDesc, miopenConvolutionMode_t c_mode, int pad_h, int pad_w, int stride_h, int stride_w, int dilation_h, int dilation_w)
Creates a 2-D convolution layer descriptor.
miopenConvAlgorithm_t
Definition: miopen.h:1254
miopenStatus_t miopenConvolutionBackwardData(miopenHandle_t handle, const void *alpha, const miopenTensorDescriptor_t dyDesc, const void *dy, const miopenTensorDescriptor_t wDesc, const void *w, const miopenConvolutionDescriptor_t convDesc, miopenConvBwdDataAlgorithm_t algo, const void *beta, const miopenTensorDescriptor_t dxDesc, void *dx, void *workSpace, size_t workSpaceSize)
Execute a backward data convolution layer.
miopenStatus_t miopenGetConvolutionAttribute(miopenConvolutionDescriptor_t convDesc, const miopenConvolutionAttrib_t attr, int *value)
Get the attribute of the convolution descriptor.
miopenStatus_t miopenGetConvolutionNdForwardOutputDim(miopenConvolutionDescriptor_t convDesc, const miopenTensorDescriptor_t inputTensorDesc, const miopenTensorDescriptor_t filterDesc, int *nDim, int *outputTensorDimA)
Get the shape of a resulting N-dimensional tensor from a (N-2)-dimensional convolution.
miopenStatus_t miopenConvolutionBackwardWeightsCompileSolution(miopenHandle_t handle, const miopenTensorDescriptor_t dyDesc, const miopenTensorDescriptor_t xDesc, const miopenConvolutionDescriptor_t convDesc, const miopenTensorDescriptor_t dwDesc, const uint64_t solution_id)
Compiles the solution provided by the user, this solution may be acquired by the miopenConvolutionBac...
miopenStatus_t miopenFindConvolutionBackwardWeightsAlgorithm(miopenHandle_t handle, const miopenTensorDescriptor_t dyDesc, const void *dy, const miopenTensorDescriptor_t xDesc, const void *x, const miopenConvolutionDescriptor_t convDesc, const miopenTensorDescriptor_t dwDesc, void *dw, const int requestAlgoCount, int *returnedAlgoCount, miopenConvAlgoPerf_t *perfResults, void *workSpace, size_t workSpaceSize, bool exhaustiveSearch)
Search and run the backwards weights convolutional algorithms and return a list of kernel times.
miopenStatus_t miopenGetConvolutionForwardOutputDim(miopenConvolutionDescriptor_t convDesc, const miopenTensorDescriptor_t inputTensorDesc, const miopenTensorDescriptor_t filterDesc, int *n, int *c, int *h, int *w)
Get the shape of a resulting 4-D tensor from a 2-D convolution.
miopenConvFwdAlgorithm_t
Definition: miopen.h:1217
miopenStatus_t miopenConvolutionForwardGetWorkSpaceSize(miopenHandle_t handle, const miopenTensorDescriptor_t wDesc, const miopenTensorDescriptor_t xDesc, const miopenConvolutionDescriptor_t convDesc, const miopenTensorDescriptor_t yDesc, size_t *workSpaceSize)
Query the workspace size required for a forward convolution algorithm.
miopenStatus_t miopenCreateConvolutionDescriptor(miopenConvolutionDescriptor_t *convDesc)
Creates a convolution layer descriptor.
miopenStatus_t miopenConvolutionBackwardWeightsGetSolution(miopenHandle_t handle, const miopenTensorDescriptor_t dyDesc, const miopenTensorDescriptor_t xDesc, const miopenConvolutionDescriptor_t convDesc, const miopenTensorDescriptor_t dwDesc, const size_t maxSolutionCount, size_t *solutionCount, miopenConvSolution_t *solutions)
Query the applicable solutions for a backward convolution w-r-t weights as described by input,...
miopenStatus_t miopenGetConvolutionNdDescriptor(miopenConvolutionDescriptor_t convDesc, int requestedSpatialDim, int *spatialDim, int *padA, int *strideA, int *dilationA, miopenConvolutionMode_t *c_mode)
Retrieves a N-dimensional convolution layer descriptor's details.
miopenConvBwdDataAlgorithm_t
Definition: miopen.h:1240
miopenConvolutionAttrib_t
Definition: miopen.h:611
miopenStatus_t miopenConvolutionBackwardWeightsGetWorkSpaceSize(miopenHandle_t handle, const miopenTensorDescriptor_t dyDesc, const miopenTensorDescriptor_t xDesc, const miopenConvolutionDescriptor_t convDesc, const miopenTensorDescriptor_t dwDesc, size_t *workSpaceSize)
Get the GPU memory required for the backward weights convolution algorithm.
miopenStatus_t miopenConvolutionBackwardWeightsImmediate(miopenHandle_t handle, const miopenTensorDescriptor_t dyDesc, const void *dy, const miopenTensorDescriptor_t xDesc, const void *x, const miopenConvolutionDescriptor_t convDesc, const miopenTensorDescriptor_t dwDesc, void *dw, void *workSpace, size_t workSpaceSize, const uint64_t solution_id)
Executes the Backward convolution w-r-t weights operation based on the provided solution ID.
miopenStatus_t miopenFindConvolutionBackwardDataAlgorithm(miopenHandle_t handle, const miopenTensorDescriptor_t dyDesc, const void *dy, const miopenTensorDescriptor_t wDesc, const void *w, const miopenConvolutionDescriptor_t convDesc, const miopenTensorDescriptor_t dxDesc, void *dx, const int requestAlgoCount, int *returnedAlgoCount, miopenConvAlgoPerf_t *perfResults, void *workSpace, size_t workSpaceSize, bool exhaustiveSearch)
Search and run the backwards data convolution algorithms and return a list of kernel times.
miopenStatus_t miopenConvolutionBackwardDataGetSolution(miopenHandle_t handle, const miopenTensorDescriptor_t dyDesc, const miopenTensorDescriptor_t wDesc, const miopenConvolutionDescriptor_t convDesc, const miopenTensorDescriptor_t dxDesc, const size_t maxSolutionCount, size_t *solutionCount, miopenConvSolution_t *solutions)
Query the applicable solutions for a backward convolution w-r-t data as described by input,...
miopenStatus_t miopenConvolutionForwardBias(miopenHandle_t handle, const void *alpha, const miopenTensorDescriptor_t bDesc, const void *b, const void *beta, const miopenTensorDescriptor_t yDesc, void *y)
Calculate element-wise scale and shift of a tensor via a bias tensor.
miopenConvolutionMode_t
Definition: miopen.h:415
miopenStatus_t miopenSetConvolutionAttribute(miopenConvolutionDescriptor_t convDesc, const miopenConvolutionAttrib_t attr, int value)
Set the attribute of the convolution descriptor.
miopenStatus_t miopenConvolutionBackwardDataImmediate(miopenHandle_t handle, const miopenTensorDescriptor_t dyDesc, const void *dy, const miopenTensorDescriptor_t wDesc, const void *w, const miopenConvolutionDescriptor_t convDesc, const miopenTensorDescriptor_t dxDesc, void *dx, void *workSpace, size_t workSpaceSize, const uint64_t solution_id)
Executes the Backward convolution w-r-t data operation based on the provided solution ID.
miopenStatus_t miopenSetTransposeConvNdOutputPadding(miopenConvolutionDescriptor_t convDesc, int spatialDim, const int *adjA)
Set the output padding to be used in N-dimensional Transpose convolution.
miopenStatus_t miopenSetConvolutionFindMode(miopenConvolutionDescriptor_t convDesc, miopenConvolutionFindMode_t findMode)
Sets the Find Mode attribute in the convolution descriptor.
miopenStatus_t miopenSetTransposeConvOutputPadding(miopenConvolutionDescriptor_t convDesc, int adj_h, int adj_w)
Set the output padding to be used in 2-D Transpose convolution.
miopenConvolutionFindMode_t
Definition: miopen.h:640
miopenStatus_t miopenGetConvolutionSpatialDim(miopenConvolutionDescriptor_t convDesc, int *spatialDim)
Retrieves the spatial dimension of a convolution layer descriptor.
miopenConvBwdWeightsAlgorithm_t
Definition: miopen.h:1229
miopenStatus_t miopenConvolutionForwardCompileSolution(miopenHandle_t handle, const miopenTensorDescriptor_t wDesc, const miopenTensorDescriptor_t xDesc, const miopenConvolutionDescriptor_t convDesc, const miopenTensorDescriptor_t yDesc, const uint64_t solution_id)
Compiles the solution provided by the user, this solution may be acquired by the miopenConvolutionFor...
miopenStatus_t miopenConvolutionBackwardDataGetSolutionCount(miopenHandle_t handle, const miopenTensorDescriptor_t dyDesc, const miopenTensorDescriptor_t wDesc, const miopenConvolutionDescriptor_t convDesc, const miopenTensorDescriptor_t dxDesc, size_t *solutionCount)
Query the maximum number of solutions applicable for the given input/output and weights tensor descri...
miopenStatus_t miopenConvolutionBackwardWeights(miopenHandle_t handle, const void *alpha, const miopenTensorDescriptor_t dyDesc, const void *dy, const miopenTensorDescriptor_t xDesc, const void *x, const miopenConvolutionDescriptor_t convDesc, miopenConvBwdWeightsAlgorithm_t algo, const void *beta, const miopenTensorDescriptor_t dwDesc, void *dw, void *workSpace, size_t workSpaceSize)
Execute a backward weights convolution layer.
miopenStatus_t miopenConvolutionBackwardDataGetWorkSpaceSize(miopenHandle_t handle, const miopenTensorDescriptor_t dyDesc, const miopenTensorDescriptor_t wDesc, const miopenConvolutionDescriptor_t convDesc, const miopenTensorDescriptor_t dxDesc, size_t *workSpaceSize)
Query the workspace size required for a backward data convolution algorithm.
miopenStatus_t miopenFindConvolutionForwardAlgorithm(miopenHandle_t handle, const miopenTensorDescriptor_t xDesc, const void *x, const miopenTensorDescriptor_t wDesc, const void *w, const miopenConvolutionDescriptor_t convDesc, const miopenTensorDescriptor_t yDesc, void *y, const int requestAlgoCount, int *returnedAlgoCount, miopenConvAlgoPerf_t *perfResults, void *workSpace, size_t workSpaceSize, bool exhaustiveSearch)
Search and run the forward convolutional algorithms and return a list of kernel times.
miopenStatus_t miopenDestroyConvolutionDescriptor(miopenConvolutionDescriptor_t convDesc)
Destroys the tensor descriptor object.
miopenStatus_t miopenConvolutionBackwardWeightsGetSolutionCount(miopenHandle_t handle, const miopenTensorDescriptor_t dyDesc, const miopenTensorDescriptor_t xDesc, const miopenConvolutionDescriptor_t convDesc, const miopenTensorDescriptor_t dwDesc, size_t *solutionCount)
Query the maximum number of solutions applicable for the given input/output and weights tensor descri...
miopenStatus_t miopenSetConvolutionGroupCount(miopenConvolutionDescriptor_t convDesc, int groupCount)
Set the number of groups to be used in Group/Depthwise convolution.
miopenStatus_t miopenConvolutionBackwardWeightsGetSolutionWorkspaceSize(miopenHandle_t handle, const miopenTensorDescriptor_t dyDesc, const miopenTensorDescriptor_t xDesc, const miopenConvolutionDescriptor_t convDesc, const miopenTensorDescriptor_t dwDesc, const uint64_t solution_id, size_t *workSpaceSize)
Returns the workspace size required for a particular solution id.
miopenStatus_t miopenConvolutionForwardGetSolution(miopenHandle_t handle, const miopenTensorDescriptor_t wDesc, const miopenTensorDescriptor_t xDesc, const miopenConvolutionDescriptor_t convDesc, const miopenTensorDescriptor_t yDesc, const size_t maxSolutionCount, size_t *solutionCount, miopenConvSolution_t *solutions)
Query the applicable solutions for a convolution configuration described by input,...
miopenStatus_t miopenGetConvolutionDescriptor(miopenConvolutionDescriptor_t convDesc, miopenConvolutionMode_t *c_mode, int *pad_h, int *pad_w, int *stride_h, int *stride_w, int *dilation_h, int *dilation_w)
Retrieves a 2-D convolution layer descriptor's details.
miopenStatus_t miopenConvolutionForwardGetSolutionCount(miopenHandle_t handle, const miopenTensorDescriptor_t wDesc, const miopenTensorDescriptor_t xDesc, const miopenConvolutionDescriptor_t convDesc, const miopenTensorDescriptor_t yDesc, size_t *solutionCount)
Query the maximum number of solutions applicable for the given input/output and weights tensor descri...
miopenStatus_t miopenConvolutionBackwardDataGetSolutionWorkspaceSize(miopenHandle_t handle, const miopenTensorDescriptor_t dyDesc, const miopenTensorDescriptor_t wDesc, const miopenConvolutionDescriptor_t convDesc, const miopenTensorDescriptor_t dxDesc, const uint64_t solution_id, size_t *workSpaceSize)
Returns the workspace size required for a particular solution id.
miopenStatus_t miopenConvolutionBackwardBias(miopenHandle_t handle, const void *alpha, const miopenTensorDescriptor_t dyDesc, const void *dy, const void *beta, const miopenTensorDescriptor_t dbDesc, void *db)
Calculates the gradient with respect to the bias.
@ miopenConvolutionAlgoDirect
Definition: miopen.h:1256
@ miopenConvolutionAlgoWinograd
Definition: miopen.h:1258
@ miopenConvolutionAlgoFFT
Definition: miopen.h:1257
@ miopenConvolutionAlgoImplicitGEMM
Definition: miopen.h:1259
@ miopenConvolutionAlgoGEMM
Definition: miopen.h:1255
@ miopenConvolutionFwdAlgoFFT
Definition: miopen.h:1220
@ miopenConvolutionFwdAlgoImplicitGEMM
Definition: miopen.h:1222
@ miopenConvolutionFwdAlgoGEMM
Definition: miopen.h:1218
@ miopenConvolutionFwdAlgoWinograd
Definition: miopen.h:1221
@ miopenConvolutionFwdAlgoDirect
Definition: miopen.h:1219
@ miopenTransposeBwdDataAlgoGEMM
Definition: miopen.h:1245
@ miopenConvolutionBwdDataAlgoDirect
Definition: miopen.h:1242
@ miopenConvolutionBwdDataAlgoGEMM
Definition: miopen.h:1241
@ miopenConvolutionBwdDataAlgoFFT
Definition: miopen.h:1243
@ miopenConvolutionBwdDataAlgoWinograd
Definition: miopen.h:1244
@ miopenConvolutionBwdDataAlgoImplicitGEMM
Definition: miopen.h:1247
@ miopenConvolutionBwdWeightsAlgoGEMM
Definition: miopen.h:1230
@ miopenConvolutionBwdWeightsAlgoWinograd
Definition: miopen.h:1232
@ miopenConvolutionBwdWeightsAlgoDirect
Definition: miopen.h:1231
@ miopenConvolutionBwdWeightsAlgoImplicitGEMM
Definition: miopen.h:1233
miopenStatus_t miopenDestroyDropoutDescriptor(miopenDropoutDescriptor_t dropoutDesc)
Destroys the dropout descriptor object.
miopenRNGType_t
Definition: miopen.h:5803
miopenStatus_t miopenDropoutGetStatesSize(miopenHandle_t handle, size_t *stateSizeInBytes)
Query the amount of memory required to store the states of the random number generators.
miopenStatus_t miopenDropoutGetReserveSpaceSize(const miopenTensorDescriptor_t xDesc, size_t *reserveSpaceSizeInBytes)
Query the amount of memory required to run dropout.
miopenStatus_t miopenDropoutForward(miopenHandle_t handle, const miopenDropoutDescriptor_t dropoutDesc, const miopenTensorDescriptor_t noise_shape, const miopenTensorDescriptor_t xDesc, const void *x, const miopenTensorDescriptor_t yDesc, void *y, void *reserveSpace, size_t reserveSpaceSizeInBytes)
Execute forward dropout operation.
miopenStatus_t miopenGetDropoutDescriptor(miopenDropoutDescriptor_t dropoutDesc, miopenHandle_t handle, float *dropout, void **states, unsigned long long *seed, bool *use_mask, bool *state_evo, miopenRNGType_t *rng_mode)
Get the details of the dropout descriptor.
miopenStatus_t miopenCreateDropoutDescriptor(miopenDropoutDescriptor_t *dropoutDesc)
Creates the dropout descriptor object.
miopenStatus_t miopenDropoutBackward(miopenHandle_t handle, const miopenDropoutDescriptor_t dropoutDesc, const miopenTensorDescriptor_t noise_shape, const miopenTensorDescriptor_t dyDesc, const void *dy, const miopenTensorDescriptor_t dxDesc, void *dx, void *reserveSpace, size_t reserveSpaceSizeInBytes)
Execute backward dropout operation.
miopenStatus_t miopenRestoreDropoutDescriptor(miopenDropoutDescriptor_t dropoutDesc, miopenHandle_t handle, float dropout, void *states, size_t stateSizeInBytes, unsigned long long seed, bool use_mask, bool state_evo, miopenRNGType_t rng_mode)
Restore the dropout descriptor to a saved state.
miopenStatus_t miopenSetDropoutDescriptor(miopenDropoutDescriptor_t dropoutDesc, miopenHandle_t handle, float dropout, void *states, size_t stateSizeInBytes, unsigned long long seed, bool use_mask, bool state_evo, miopenRNGType_t rng_mode)
Initialize the dropout descriptor.
@ MIOPEN_RNG_PSEUDO_XORWOW
Definition: miopen.h:5804
miopenFindResultsOrder_t
Definition: miopen.h:6234
miopenStatus_t miopenGetSolutionSolverId(miopenSolution_t solution, uint64_t *solverId)
Reads id of the solver referred by the solution.
miopenStatus_t miopenSetSoftmaxDescriptor(miopenSoftmaxDescriptor_t softmaxDesc, float alpha, float beta, miopenSoftmaxAlgorithm_t algorithm, miopenSoftmaxMode_t mode)
Sets the softmax descriptor details.
miopenStatus_t miopenGetSolutionTime(miopenSolution_t solution, float *time)
Reads the time spent to execute the solution the last it was run.
miopenStatus_t miopenGetMhaDescriptor(miopenMhaDescriptor_t mhaDesc, float *scale)
Gets the Mha descriptor details.
miopenMhaMask_t
Initializes a problem object describing a Mha operation.
Definition: miopen.h:6262
miopenStatus_t miopenSetMhaDescriptor(miopenMhaDescriptor_t mhaDesc, float scale)
Sets the Mha descriptor details.
miopenStatus_t miopenSetFindOptionWorkspaceLimit(miopenFindOptions_t options, size_t value)
Sets the workspace limit find option. Default value is maximum of size_t.
miopenStatus_t miopenSetFindOptionPreallocatedTensor(miopenFindOptions_t options, miopenTensorArgumentId_t id, void *buffer)
Attaches a preallocated tensor to find options. If not used, buffers are allocated by MIOpen internal...
miopenStatus_t miopenCreateSoftmaxProblem(miopenProblem_t *problem, miopenSoftmaxDescriptor_t operatorDesc, miopenProblemDirection_t direction)
Initializes a problem object describing a softmax operation.
miopenStatus_t miopenDestroyFindOptions(miopenFindOptions_t options)
Destroys miopenFindOptions object.
miopenStatus_t miopenSetFindOptionAttachBinaries(miopenFindOptions_t options, unsigned attach)
Forces library to attach kernel binaries to solutions for later saving. This allows zero lookup miope...
miopenStatus_t miopenFuseProblems(miopenProblem_t problem1, miopenProblem_t problem2)
Fuse two problems into a single one. Problems can be either regular, or fused. No problems are dispos...
miopenStatus_t miopenGetSolutionSize(miopenSolution_t solution, size_t *size)
Reads the expected size of a solution.
miopenTensorArgumentId_t
Definition: miopen.h:6153
miopenStatus_t miopenFindSolutions(miopenHandle_t handle, miopenProblem_t problem, miopenFindOptions_t options, miopenSolution_t *solutions, size_t *numSolutions, size_t maxSolutions)
Finds solutions to a problem by running different applicable solutions. Memory is automatically alloc...
miopenStatus_t miopenSetFindOptionTuning(miopenFindOptions_t options, int value)
Sets the tuning find option. Default value is zero.
miopenStatus_t miopenGetSolutionWorkspaceSize(miopenSolution_t solution, size_t *workspaceSize)
Reads the amount of workspace required to execute the solution.
miopenStatus_t miopenSetFindOptionResultsOrder(miopenFindOptions_t options, miopenFindResultsOrder_t value)
Sets the results order find option. Default value is miopenFindResultsOrderByTime.
miopenStatus_t miopenRunSolution(miopenHandle_t handle, miopenSolution_t solution, size_t nInputs, const miopenTensorArgument_t *tensors, void *workspace, size_t workspaceSize)
Runs the solution using the passed in buffers.
miopenStatus_t miopenCreateMhaProblem(miopenProblem_t *problem, miopenMhaDescriptor_t operatorDesc, miopenProblemDirection_t direction)
miopenProblemDirection_t
Definition: miopen.h:6140
miopenStatus_t miopenCreateBatchnormProblem(miopenProblem_t *problem, miopenBatchNormMode_t mode, bool runningMeanVariance, miopenProblemDirection_t direction)
Initializes a problem object describing an activation operation.
miopenStatus_t miopenCreateMhaDescriptor(miopenMhaDescriptor_t *mhaDesc)
Creates the mha descriptor object.
miopenStatus_t miopenDestroySolution(miopenSolution_t solution)
Destroys solution object.
miopenStatus_t miopenLoadSolution(miopenSolution_t *solution, const char *data, size_t size)
Loads solution object from binary data.
miopenStatus_t miopenSaveSolution(miopenSolution_t solution, char *data)
Saves a solution object as binary data.
miopenStatus_t miopenGetSolverIdConvAlgorithm(uint64_t solverId, miopenConvAlgorithm_t *result)
Gets the convolution algorithm implemented by a solver.
miopenStatus_t miopenSetFindOptionPreallocatedWorkspace(miopenFindOptions_t options, void *buffer, size_t size)
Attaches the preallocated workspace to find options. Allocated by the library by default.
miopenStatus_t miopenCreateActivationProblem(miopenProblem_t *problem, miopenActivationDescriptor_t operatorDesc, miopenProblemDirection_t direction)
Initializes a problem object describing an activation operation.
miopenStatus_t miopenDestroyProblem(miopenProblem_t problem)
Destroys a problem object.
miopenStatus_t miopenCreateFindOptions(miopenFindOptions_t *options)
Initializes miopenFindOptions object.
miopenStatus_t miopenGetSoftmaxDescriptor(const miopenSoftmaxDescriptor_t softmaxDesc, float *alpha, float *beta, miopenSoftmaxAlgorithm_t *algorithm, miopenSoftmaxMode_t *mode)
Gets the softmax layer descriptor details.
miopenStatus_t miopenCreateConvProblem(miopenProblem_t *problem, miopenConvolutionDescriptor_t operatorDesc, miopenProblemDirection_t direction)
Initializes a problem object describing a convolution operation.
miopenStatus_t miopenCreateBiasProblem(miopenProblem_t *problem, miopenProblemDirection_t direction)
Initializes a problem object describing an bias operation.
miopenStatus_t miopenSetProblemTensorDescriptor(miopenProblem_t problem, miopenTensorArgumentId_t id, const miopenTensorDescriptor_t descriptor)
Sets a tensor descriptor for the specified argument.
miopenStatus_t miopenCreateSoftmaxDescriptor(miopenSoftmaxDescriptor_t *softmaxDesc)
Creates the Softmax descriptor object.
@ miopenFindResultsOrderByWorkspaceSize
Definition: miopen.h:6236
@ miopenFindResultsOrderByTime
Definition: miopen.h:6235
@ miopenMhaMaskCausal
Definition: miopen.h:6264
@ miopenMhaMaskNone
Definition: miopen.h:6263
@ miopenTensorMhaDescaleS
Definition: miopen.h:6165
@ miopenTensorMhaO
Definition: miopen.h:6171
@ miopenTensorMhaAmaxDV
Definition: miopen.h:6189
@ miopenTensorBatchnormScaleDiff
Definition: miopen.h:6212
@ miopenTensorMhaMask
Definition: miopen.h:6223
@ miopenTensorMhaDescaleDS
Definition: miopen.h:6179
@ miopenTensorMhaDropoutSeed
Definition: miopen.h:6169
@ miopenTensorBatchnormSavedMean
Definition: miopen.h:6209
@ miopenTensorActivationDY
Definition: miopen.h:6197
@ miopenTensorBatchnormDX
Definition: miopen.h:6217
@ miopenTensorMhaDescaleV
Definition: miopen.h:6164
@ miopenTensorMhaK
Definition: miopen.h:6159
@ miopenTensorConvolutionX
Definition: miopen.h:6155
@ miopenTensorMhaAmaxDK
Definition: miopen.h:6188
@ miopenTensorMhaScaleDS
Definition: miopen.h:6180
@ miopenTensorBatchnormBias
Definition: miopen.h:6215
@ miopenTensorMhaM
Definition: miopen.h:6174
@ miopenTensorBatchnormRunningMean
Definition: miopen.h:6207
@ miopenTensorMhaDropoutProbability
Definition: miopen.h:6168
@ miopenTensorSoftmaxY
Definition: miopen.h:6202
@ miopenTensorSoftmaxDY
Definition: miopen.h:6204
@ miopenTensorMhaDescaleO
Definition: miopen.h:6177
@ miopenTensorMhaScaleO
Definition: miopen.h:6167
@ miopenScalarBatchnormExpAvgFactor
Definition: miopen.h:6225
@ miopenTensorBatchnormScale
Definition: miopen.h:6211
@ miopenTensorBatchnormRunningVariance
Definition: miopen.h:6208
@ miopenTensorActivationDX
Definition: miopen.h:6196
@ miopenTensorMhaScaleDK
Definition: miopen.h:6182
@ miopenTensorMhaV
Definition: miopen.h:6161
@ miopenTensorMhaAmaxS
Definition: miopen.h:6173
@ miopenTensorBatchnormBiasDiff
Definition: miopen.h:6216
@ miopenTensorBatchnormSavedVariance
Definition: miopen.h:6210
@ miopenTensorActivationX
Definition: miopen.h:6194
@ miopenTensorMhaScaleDV
Definition: miopen.h:6183
@ miopenTensorMhaAmaxO
Definition: miopen.h:6172
@ miopenScalarBatchnormEpsilon
Definition: miopen.h:6226
@ miopenTensorMhaScaleDQ
Definition: miopen.h:6181
@ miopenTensorSoftmaxDX
Definition: miopen.h:6203
@ miopenTensorMhaAmaxDS
Definition: miopen.h:6190
@ miopenTensorMhaDV
Definition: miopen.h:6186
@ miopenTensorMhaQ
Definition: miopen.h:6160
@ miopenTensorMhaAmaxDQ
Definition: miopen.h:6187
@ miopenTensorConvolutionY
Definition: miopen.h:6157
@ miopenTensorBatchnormEstimatedMean
Definition: miopen.h:6213
@ miopenTensorBatchnormDY
Definition: miopen.h:6218
@ miopenTensorMhaZInv
Definition: miopen.h:6175
@ miopenTensorMhaDescaleQ
Definition: miopen.h:6163
@ miopenTensorMhaBias
Definition: miopen.h:6191
@ miopenTensorBatchnormEstimatedVariance
Definition: miopen.h:6214
@ miopenTensorArgumentIsScalar
Definition: miopen.h:6221
@ miopenTensorArgumentIdInvalid
Definition: miopen.h:6154
@ miopenTensorMhaDO
Definition: miopen.h:6176
@ miopenTensorMhaDescaleDO
Definition: miopen.h:6178
@ miopenTensorConvolutionW
Definition: miopen.h:6156
@ miopenTensorMhaDescaleK
Definition: miopen.h:6162
@ miopenTensorMhaDropoutOffset
Definition: miopen.h:6170
@ miopenTensorBiasY
Definition: miopen.h:6199
@ miopenTensorMhaDQ
Definition: miopen.h:6184
@ miopenTensorSoftmaxX
Definition: miopen.h:6201
@ miopenTensorBatchnormY
Definition: miopen.h:6206
@ miopenTensorMhaScaleS
Definition: miopen.h:6166
@ miopenTensorBias
Definition: miopen.h:6200
@ miopenTensorActivationY
Definition: miopen.h:6195
@ miopenTensorBatchnormX
Definition: miopen.h:6205
@ miopenTensorBiasX
Definition: miopen.h:6198
@ miopenTensorMhaDK
Definition: miopen.h:6185
@ miopenProblemDirectionBackwardWeights
Definition: miopen.h:6143
@ miopenProblemDirectionInference
Definition: miopen.h:6145
@ miopenProblemDirectionForward
Definition: miopen.h:6141
@ miopenProblemDirectionBackward
Definition: miopen.h:6142
miopenStatus_t miopenGetGetitemWorkspaceSize(miopenHandle_t handle, uint32_t indexCount, const miopenTensorDescriptor_t *indexDescs, size_t *sizeInBytes)
Helper function to query the minimum workspace size required by the getitem call.
miopenStatus_t miopenGetitemBackward(miopenHandle_t handle, void *workspace, size_t workspaceSizeInBytes, const miopenTensorDescriptor_t dyDesc, const void *dy, uint32_t indexCount, const miopenTensorDescriptor_t *indexDescs, const void *const *indexs, const miopenTensorDescriptor_t dxDesc, void *dx, const miopenTensorDescriptor_t errorDesc, void *error, uint32_t dimCount, const int32_t *dims, uint32_t sliceCount, const int32_t *slices, uint32_t offset)
Execute a getitem backward layer.
miopenStatus_t miopenGroupNormForward(miopenHandle_t handle, miopenNormMode_t mode, const miopenTensorDescriptor_t xDesc, const void *x, const miopenTensorDescriptor_t weightDesc, const void *weight, const miopenTensorDescriptor_t biasDesc, const void *bias, const uint64_t num_groups, const float epsilon, const miopenTensorDescriptor_t yDesc, void *y, const miopenTensorDescriptor_t meanDesc, void *mean, const miopenTensorDescriptor_t rstdDesc, void *rstd)
Execute a groupnorm forward layer.
miopenStatus_t miopenCreateWithStream(miopenHandle_t *handle, miopenAcceleratorQueue_t stream)
Create a MIOpen handle with an accelerator stream.
miopenStatus_t miopenDestroy(miopenHandle_t handle)
Destroys the MIOpen handle.
miopenTuningPolicy_t
Definition: miopen.h:8021
miopenStatus_t miopenGetTuningPolicy(miopenHandle_t handle, miopenTuningPolicy_t *value)
Get tuning policy from a handle.
void(* miopenDeallocatorFunction)(void *context, void *memory)
Custom deallocator function.
Definition: miopen.h:144
miopenMathType_t
Definition: miopen.h:102
miopenStatus_t miopenGetStream(miopenHandle_t handle, miopenAcceleratorQueue_t *streamID)
Get the previously created accelerator command queue.
miopenStatus_t miopenEnableProfiling(miopenHandle_t handle, bool enable)
Enable profiling to retrieve kernel time.
miopenStatus_t miopenGetVersion(size_t *major, size_t *minor, size_t *patch)
Method to return version of MIOpen.
miopenStatus_t miopenSetTuningPolicy(miopenHandle_t handle, miopenTuningPolicy_t newValue)
Update tuning policy for a specific handle. API alternative for MIOPEN_FIND_ENFORCE environment varia...
miopenStatus_t
Definition: miopen.h:87
miopenStatus_t miopenSetAllocator(miopenHandle_t handle, miopenAllocatorFunction allocator, miopenDeallocatorFunction deallocator, void *allocatorContext)
Set allocator for previously created miopenHandle.
miopenF8RoundingMode_t
Definition: miopen.h:111
void *(* miopenAllocatorFunction)(void *context, size_t sizeBytes)
Custom allocator function.
Definition: miopen.h:134
const char * miopenGetErrorString(miopenStatus_t error)
Get character string for an error code.
miopenStatus_t miopenCreate(miopenHandle_t *handle)
Method to create the MIOpen handle object.
miopenStatus_t miopenGetKernelTime(miopenHandle_t handle, float *time)
Get time for last kernel launched.
miopenStatus_t miopenSetStream(miopenHandle_t handle, miopenAcceleratorQueue_t streamID)
Set accelerator command queue previously created.
@ miopenMathDefault
Definition: miopen.h:104
@ miopenMathPedantic
Definition: miopen.h:105
@ miopenStatusUnsupportedOp
Definition: miopen.h:96
@ miopenStatusGpuOperationsSkipped
Definition: miopen.h:97
@ miopenStatusUnknownError
Definition: miopen.h:95
@ miopenStatusSuccess
Definition: miopen.h:88
@ miopenStatusVersionMismatch
Definition: miopen.h:98
@ miopenStatusAllocFailed
Definition: miopen.h:92
@ miopenStatusNotImplemented
Definition: miopen.h:94
@ miopenStatusBadParm
Definition: miopen.h:91
@ miopenStatusNotInitialized
Definition: miopen.h:89
@ miopenStatusInternalError
Definition: miopen.h:93
@ miopenStatusInvalidValue
Definition: miopen.h:90
@ miopenF8RoundingModeStandard
Definition: miopen.h:112
@ miopenF8RoundingModeStochastic
Definition: miopen.h:113
miopenStatus_t miopenKthvalueForward(miopenHandle_t handle, miopenTensorDescriptor_t inputDesc, const void *input, miopenTensorDescriptor_t outputDesc, void *output, miopenTensorDescriptor_t indicesDesc, size_t *indices, size_t k, int32_t dim=-1, bool keepDim=false)
Execute a Kthvalue forward layer.
miopenStatus_t miopenLayerNormForward(miopenHandle_t handle, miopenNormMode_t mode, const miopenTensorDescriptor_t xDesc, const void *x, const miopenTensorDescriptor_t weightDesc, const void *weight, const miopenTensorDescriptor_t biasDesc, const void *bias, const float epsilon, const int32_t normalized_dim, const miopenTensorDescriptor_t yDesc, void *y, const miopenTensorDescriptor_t meanDesc, void *mean, const miopenTensorDescriptor_t rstdDesc, void *rstd)
Execute a layernorm forward layer.
miopenStatus_t miopenT5LayerNormBackward(miopenHandle_t handle, miopenNormMode_t mode, void *workspace, size_t workspaceSizeInBytes, const miopenTensorDescriptor_t dyDesc, const void *dy, const miopenTensorDescriptor_t xDesc, const void *x, const miopenTensorDescriptor_t weightDesc, const void *weight, const miopenTensorDescriptor_t rstdDesc, const void *rstd, const miopenTensorDescriptor_t dxDesc, void *dx, const miopenTensorDescriptor_t dwDesc, void *dw)
Execute a T5layernorm backward layer.
miopenStatus_t miopenLayerNormBackward(miopenHandle_t handle, miopenNormMode_t mode, void *workspace, size_t workspaceSizeInBytes, const miopenTensorDescriptor_t dyDesc, const void *dy, const miopenTensorDescriptor_t xDesc, const void *x, const miopenTensorDescriptor_t weightDesc, const void *weight, const miopenTensorDescriptor_t meanDesc, const void *mean, const miopenTensorDescriptor_t rstdDesc, const void *rstd, const int32_t normalized_dim, const miopenTensorDescriptor_t dxDesc, void *dx, const miopenTensorDescriptor_t dwDesc, void *dw, const miopenTensorDescriptor_t dbDesc, void *db)
Execute a layernorm backward layer.
miopenStatus_t miopenT5LayerNormForward(miopenHandle_t handle, miopenNormMode_t mode, const miopenTensorDescriptor_t xDesc, const void *x, const miopenTensorDescriptor_t weightDesc, const void *weight, const float epsilon, const miopenTensorDescriptor_t yDesc, void *y, const miopenTensorDescriptor_t rstdDesc, void *rstd)
Execute a T5layernorm forward layer.
miopenNormMode_t
Definition: miopen.h:471
miopenStatus_t miopenGetLayerNormBackwardWorkspaceSize(miopenHandle_t handle, miopenNormMode_t mode, const miopenTensorDescriptor_t dyDesc, const miopenTensorDescriptor_t xDesc, const miopenTensorDescriptor_t weightDesc, const miopenTensorDescriptor_t meanDesc, const miopenTensorDescriptor_t rstdDesc, const int32_t normalized_dim, const miopenTensorDescriptor_t dxDesc, const miopenTensorDescriptor_t dwDesc, const miopenTensorDescriptor_t dbDesc, size_t *sizeInBytes)
Helper function to query the minimum workspace size required by the layernorm backward call.
miopenStatus_t miopenGetT5LayerNormBackwardWorkspaceSize(miopenHandle_t handle, miopenNormMode_t mode, const miopenTensorDescriptor_t dyDesc, const miopenTensorDescriptor_t xDesc, const miopenTensorDescriptor_t weightDesc, const miopenTensorDescriptor_t rstdDesc, const miopenTensorDescriptor_t dxDesc, const miopenTensorDescriptor_t dwDesc, size_t *sizeInBytes)
Helper function to query the minimum workspace size required by the T5layernorm backward call.
miopenStatus_t miopenAddLayerNormForward(miopenHandle_t handle, miopenNormMode_t mode, const miopenTensorDescriptor_t xDesc, const void *x, const miopenTensorDescriptor_t x2Desc, const void *x2, const miopenTensorDescriptor_t weightDesc, const void *weight, const miopenTensorDescriptor_t biasDesc, const void *bias, const float epsilon, const int32_t normalized_dim, const miopenTensorDescriptor_t yDesc, void *y, const miopenTensorDescriptor_t meanDesc, void *mean, const miopenTensorDescriptor_t rstdDesc, void *rstd)
Execute a add and layernorm forward layer.
miopenStatus_t miopenSet2dPoolingDescriptor(miopenPoolingDescriptor_t poolDesc, miopenPoolingMode_t mode, int windowHeight, int windowWidth, int pad_h, int pad_w, int stride_h, int stride_w)
Sets a 2-D pooling layer descriptor details.
miopenStatus_t miopenSetPoolingWorkSpaceIndexMode(miopenPoolingDescriptor_t poolDesc, miopenPoolingWorkspaceIndexMode_t workspace_index)
Set workspace index mode for pooling layer. The default mode is miopenPoolingWorkSpaceIndexMask.
miopenStatus_t miopenGetPoolingForwardOutputDim(const miopenPoolingDescriptor_t poolDesc, const miopenTensorDescriptor_t tensorDesc, int *n, int *c, int *h, int *w)
Gets the shape of the output tensor for 2-D pooling.
miopenStatus_t miopenPoolingGetWorkSpaceSize(const miopenTensorDescriptor_t yDesc, size_t *workSpaceSize)
Get the amount of GPU memory required for pooling.
miopenStatus_t miopenSetNdPoolingDescriptor(miopenPoolingDescriptor_t poolDesc, const miopenPoolingMode_t mode, int nbDims, const int *windowDimA, const int *padA, const int *stridesA)
Set details of a N-D pooling layer descriptor.
miopenStatus_t miopenPoolingForward(miopenHandle_t handle, const miopenPoolingDescriptor_t poolDesc, const void *alpha, const miopenTensorDescriptor_t xDesc, const void *x, const void *beta, const miopenTensorDescriptor_t yDesc, void *y, bool do_backward, void *workSpace, size_t workSpaceSize)
Execute a forward pooling layer.
miopenPoolingMode_t
Definition: miopen.h:438
miopenStatus_t miopenPoolingGetWorkSpaceSizeV2(const miopenPoolingDescriptor_t poolDesc, const miopenTensorDescriptor_t yDesc, size_t *workSpaceSize)
Get the amount of GPU memory required for pooling.
miopenStatus_t miopenGetPoolingWorkSpaceIndexMode(miopenPoolingDescriptor_t poolDesc, miopenPoolingWorkspaceIndexMode_t *workspace_index)
Get workspace index mode for pooling layer.
miopenIndexType_t
Definition: miopen.h:391
miopenStatus_t miopenGetPoolingIndexType(miopenPoolingDescriptor_t poolDesc, miopenIndexType_t *index_type)
Get the index data type for pooling layer. The index type to any of the miopenIndexType_t sizes; 8,...
miopenPoolingWorkspaceIndexMode_t
Definition: miopen.h:451
miopenStatus_t miopenGetPoolingNdForwardOutputDim(const miopenPoolingDescriptor_t poolDesc, const miopenTensorDescriptor_t tensorDesc, int dims, int *tensorDimArr)
Gets the shape of the output tensor for N-D pooling.
miopenStatus_t miopenGetNdPoolingDescriptor(const miopenPoolingDescriptor_t poolDesc, int nbDimsRequested, miopenPoolingMode_t *mode, int *nbDims, int *windowDimA, int *padA, int *stridesA)
Get details of a N-D pooling layer descriptor.
miopenStatus_t miopenCreatePoolingDescriptor(miopenPoolingDescriptor_t *poolDesc)
Creates a pooling layer descriptor.
miopenStatus_t miopenSetPoolingIndexType(miopenPoolingDescriptor_t poolDesc, miopenIndexType_t index_type)
Set index data type for pooling layer. The default indexing type is uint8_t. Users can set the index ...
miopenStatus_t miopenGet2dPoolingDescriptor(const miopenPoolingDescriptor_t poolDesc, miopenPoolingMode_t *mode, int *windowHeight, int *windowWidth, int *pad_h, int *pad_w, int *stride_h, int *stride_w)
Gets a 2-D pooling layer descriptor details.
miopenStatus_t miopenDestroyPoolingDescriptor(miopenPoolingDescriptor_t poolDesc)
Destroys the pooling descriptor object.
miopenStatus_t miopenPoolingBackward(miopenHandle_t handle, const miopenPoolingDescriptor_t poolDesc, const void *alpha, const miopenTensorDescriptor_t yDesc, const void *y, const miopenTensorDescriptor_t dyDesc, const void *dy, const miopenTensorDescriptor_t xDesc, const void *x, const void *beta, const miopenTensorDescriptor_t dxDesc, void *dx, void *workSpace)
Execute a backward pooling layer.
miopenStatus_t miopenReduceCalculationForward(miopenHandle_t handle, miopenReduceCalculationNanPropagation_t nanPropagation, void *workspace, size_t workspaceSizeInBytes, const miopenTensorDescriptor_t xDesc, const void *x, const int32_t dim, const miopenReduceCalculationOp_t reduceCalculationOp, const miopenTensorDescriptor_t reduceDesc, void *y)
Execute a reducecalculation forward layer.
miopenReduceCalculationOp_t
Definition: miopen.h:6667
miopenStatus_t miopenGetReduceCalculationWorkspaceSize(miopenHandle_t handle, const miopenTensorDescriptor_t xDesc, const int32_t dim, const miopenReduceCalculationOp_t reduceCalculationOp, const miopenTensorDescriptor_t reduceDesc, size_t *sizeInBytes)
Helper function to query the minimum workspace size required by the ReduceTensor call.
@ MIOPEN_REDUCE_CALCULATION_PROD
Definition: miopen.h:6668
@ MIOPEN_REDUCE_CALCULATION_SUM
Definition: miopen.h:6670
miopenStatus_t miopenSoftmaxBackward_V2(miopenHandle_t handle, const void *alpha, const miopenTensorDescriptor_t yDesc, const void *y, const miopenTensorDescriptor_t dyDesc, const void *dy, const void *beta, const miopenTensorDescriptor_t dxDesc, void *dx, miopenSoftmaxAlgorithm_t algorithm, miopenSoftmaxMode_t mode)
Execute a softmax backwards layer with expanded modes and algorithms.
miopenStatus_t miopenSoftmaxForward(miopenHandle_t handle, const void *alpha, const miopenTensorDescriptor_t xDesc, const void *x, const void *beta, const miopenTensorDescriptor_t yDesc, void *y)
Execute a softmax forward layer.
miopenStatus_t miopenSoftmaxForward_V2(miopenHandle_t handle, const void *alpha, const miopenTensorDescriptor_t xDesc, const void *x, const void *beta, const miopenTensorDescriptor_t yDesc, void *y, miopenSoftmaxAlgorithm_t algorithm, miopenSoftmaxMode_t mode)
Execute a softmax forward layer with expanded modes and algorithms.
miopenStatus_t miopenSoftmaxBackward(miopenHandle_t handle, const void *alpha, const miopenTensorDescriptor_t yDesc, const void *y, const miopenTensorDescriptor_t dyDesc, const void *dy, const void *beta, const miopenTensorDescriptor_t dxDesc, void *dx)
Execute a softmax backwards layer.
miopenSoftmaxMode_t
Definition: miopen.h:534
miopenSoftmaxAlgorithm_t
Definition: miopen.h:523
miopenStatus_t miopenDestroySeqTensorDescriptor(miopenSeqTensorDescriptor_t tensorDesc)
Destroys the sequence data tensor descriptor.
miopenStatus_t miopenGetTensorDescriptor(miopenTensorDescriptor_t tensorDesc, miopenDataType_t *dataType, int *dimsA, int *stridesA)
Get the details of the N-dimensional tensor descriptor.
miopenStatus_t miopenSetNdTensorDescriptorWithLayout(miopenTensorDescriptor_t tensorDesc, miopenDataType_t dataType, miopenTensorLayout_t tensorLayout, const int *lens, int num_lens)
Set shape of ND tensor with specific layout.
miopenStatus_t miopenSetTensor(miopenHandle_t handle, const miopenTensorDescriptor_t yDesc, void *y, const void *alpha)
Fills a tensor with a single value.
miopenStatus_t miopenOpTensor(miopenHandle_t handle, miopenTensorOp_t tensorOp, const void *alpha1, const miopenTensorDescriptor_t aDesc, const void *A, const void *alpha2, const miopenTensorDescriptor_t bDesc, const void *B, const void *beta, const miopenTensorDescriptor_t cDesc, void *C)
Execute element-wise tensor operations.
miopenStatus_t miopenGetTensorNumBytes(miopenTensorDescriptor_t tensorDesc, size_t *numBytes)
Returns number of bytes associated with tensor descriptor.
miopenStatus_t miopenGet4dTensorDescriptor(miopenTensorDescriptor_t tensorDesc, miopenDataType_t *dataType, int *n, int *c, int *h, int *w, int *nStride, int *cStride, int *hStride, int *wStride)
Get the details of the tensor descriptor.
miopenTensorLayout_t
Definition: miopen.h:374
miopenStatus_t miopenTransformTensor(miopenHandle_t handle, const void *alpha, const miopenTensorDescriptor_t xDesc, const void *x, const void *beta, const miopenTensorDescriptor_t yDesc, void *y)
Copies one tensor to another tensor with a different layout/scale.
miopenStatus_t miopenCreateTensorDescriptor(miopenTensorDescriptor_t *tensorDesc)
Create a Tensor Descriptor.
miopenStatus_t miopenSetTensorDescriptorV2(miopenTensorDescriptor_t tensorDesc, miopenDataType_t dataType, int nbDims, const size_t *dimsA, const size_t *stridesA)
Set shape of N-dimensional tensor.
miopenStatus_t miopenScaleTensor(miopenHandle_t handle, const miopenTensorDescriptor_t yDesc, void *y, const void *alpha)
Scales all elements in a tensor by a single value.
miopenStatus_t miopenSetTensorCastType(miopenTensorDescriptor_t tensorDesc, miopenDataType_t cast_type)
Set the tensor cast type.
miopenStatus_t miopenSet4dTensorDescriptor(miopenTensorDescriptor_t tensorDesc, miopenDataType_t dataType, int n, int c, int h, int w)
Set shape of 4D tensor.
miopenStatus_t miopenSet4dTensorDescriptorEx(miopenTensorDescriptor_t tensorDesc, miopenDataType_t dataType, int n, int c, int h, int w, int nStride, int cStride, int hStride, int wStride)
Set shape and stride of 4D tensor.
miopenDataType_t
Definition: miopen.h:354
miopenStatus_t miopenCreateSeqTensorDescriptor(miopenSeqTensorDescriptor_t *tensorDesc)
Create a Tensor Descriptor for sequence data.
miopenTensorOp_t
Definition: miopen.h:403
miopenStatus_t miopenGetTensorDescriptorSize(miopenTensorDescriptor_t tensorDesc, int *size)
Set shape of N-dimensional tensor.
miopenStatus_t miopenDestroyTensorDescriptor(miopenTensorDescriptor_t tensorDesc)
Destroys the tensor descriptor.
miopenStatus_t miopenSetTensorDescriptor(miopenTensorDescriptor_t tensorDesc, miopenDataType_t dataType, int nbDims, const int *dimsA, const int *stridesA)
Set shape of N-dimensional tensor.
miopenPaddingMode_t
Definition: miopen.h:427
@ miopenPaddingDefault
Definition: miopen.h:428
@ miopenPaddingSame
Definition: miopen.h:429
@ miopenPaddingValid
Definition: miopen.h:430
miopenReduceCalculationNanPropagation_t
Definition: miopen.h:6651
@ MIOPEN_REDUCE_CALCULATION_PROPAGATE_NAN
Definition: miopen.h:6653
@ MIOPEN_REDUCE_CALCULATION_NOT_PROPAGATE_NAN
Definition: miopen.h:6652
miopenAlphaBetaCase_t
Enum for specifying the alpha-beta case for convolution operations.
Definition: miopen.h:6974
@ SCALE
Definition: miopen.h:6981
@ BILINEAR
Definition: miopen.h:6982
@ DEFAULT
Definition: miopen.h:6980
@ ERROR_STATE
Definition: miopen.h:6983
#define MIOPEN_DECLARE_OBJECT(name)
Definition: miopen.h:57
miopenNanPropagation_t
Definition: miopen.h:579
@ MIOPEN_PROPAGATE_NAN
Definition: miopen.h:581
@ MIOPEN_NOT_PROPAGATE_NAN
Definition: miopen.h:580
@ MIOPEN_LOSS_REDUCTION_MEAN
Definition: miopen.h:7841
@ MIOPEN_LOSS_REDUCTION_SUM
Definition: miopen.h:7840
@ MIOPEN_LOSS_REDUCTION_NONE
Definition: miopen.h:7839
@ miopenTuningPolicyDbClean
Definition: miopen.h:8027
@ miopenTuningPolicyDbUpdate
Definition: miopen.h:8023
@ miopenTuningPolicyNone
Definition: miopen.h:8022
@ miopenTuningPolicySearch
Definition: miopen.h:8024
@ miopenTuningPolicySearchDbUpdate
Definition: miopen.h:8026
@ miopenPoolingAverage
Definition: miopen.h:440
@ miopenPoolingAverageInclusive
Definition: miopen.h:441
@ miopenPoolingMax
Definition: miopen.h:439
@ miopenTensorCHWN
Definition: miopen.h:377
@ miopenTensorNHWC
Definition: miopen.h:376
@ miopenTensorNDHWC
Definition: miopen.h:383
@ miopenTensorNCHW
Definition: miopen.h:375
@ miopenTensorCHWNc4
Definition: miopen.h:380
@ miopenTensorNCHWc8
Definition: miopen.h:379
@ miopenTensorNCDHW
Definition: miopen.h:382
@ miopenTensorCHWNc8
Definition: miopen.h:381
@ miopenTensorNCHWc4
Definition: miopen.h:378
@ MIOPEN_REDUCE_TENSOR_FLATTENED_INDICES
Definition: miopen.h:591
@ MIOPEN_REDUCE_TENSOR_NO_INDICES
Definition: miopen.h:590
@ miopenActivationLEAKYRELU
Definition: miopen.h:510
@ miopenActivationPASTHRU
Definition: miopen.h:501
@ miopenActivationABS
Definition: miopen.h:506
@ miopenActivationLOGISTIC
Definition: miopen.h:502
@ miopenActivationCLIPPEDRELU
Definition: miopen.h:508
@ miopenActivationRELU
Definition: miopen.h:504
@ miopenActivationPOWER
Definition: miopen.h:507
@ miopenActivationELU
Definition: miopen.h:512
@ miopenActivationCLAMP
Definition: miopen.h:515
@ miopenActivationSOFTRELU
Definition: miopen.h:505
@ miopenActivationTANH
Definition: miopen.h:503
@ MIOPEN_REDUCE_TENSOR_MUL
Definition: miopen.h:556
@ MIOPEN_REDUCE_TENSOR_MAX
Definition: miopen.h:560
@ MIOPEN_REDUCE_TENSOR_AVG
Definition: miopen.h:564
@ MIOPEN_REDUCE_TENSOR_NORM1
Definition: miopen.h:566
@ MIOPEN_REDUCE_TENSOR_AMAX
Definition: miopen.h:562
@ MIOPEN_REDUCE_TENSOR_MIN
Definition: miopen.h:558
@ MIOPEN_REDUCE_TENSOR_ADD
Definition: miopen.h:555
@ MIOPEN_REDUCE_TENSOR_NORM2
Definition: miopen.h:568
@ MIOPEN_CONVOLUTION_ATTRIB_DETERMINISTIC
Definition: miopen.h:616
@ MIOPEN_CONVOLUTION_ATTRIB_FP8_ROUNDING_MODE
Definition: miopen.h:620
@ MIOPEN_CONVOLUTION_ATTRIB_MATH_TYPE
Definition: miopen.h:629
@ MIOPEN_CONVOLUTION_ATTRIB_FP16_ALT_IMPL
Definition: miopen.h:612
@ miopenIndexUint16
Definition: miopen.h:393
@ miopenIndexUint64
Definition: miopen.h:395
@ miopenIndexUint32
Definition: miopen.h:394
@ miopenIndexUint8
Definition: miopen.h:392
@ miopenFloat
Definition: miopen.h:356
@ miopenBFloat8_fnuz
Definition: miopen.h:364
@ miopenBFloat16
Definition: miopen.h:360
@ miopenInt8
Definition: miopen.h:358
@ miopenInt32
Definition: miopen.h:357
@ miopenHalf
Definition: miopen.h:355
@ miopenInt64
Definition: miopen.h:365
@ miopenDouble
Definition: miopen.h:362
@ miopenFloat8_fnuz
Definition: miopen.h:363
@ MIOPEN_8BIT_INDICES
Definition: miopen.h:603
@ MIOPEN_32BIT_INDICES
Definition: miopen.h:600
@ MIOPEN_16BIT_INDICES
Definition: miopen.h:602
@ MIOPEN_64BIT_INDICES
Definition: miopen.h:601
@ miopenPoolingWorkspaceIndexImage
Definition: miopen.h:453
@ miopenPoolingWorkspaceIndexMask
Definition: miopen.h:452
@ miopenDepthwise
Definition: miopen.h:419
@ miopenGroupConv
Definition: miopen.h:418
@ miopenTranspose
Definition: miopen.h:417
@ miopenConvolution
Definition: miopen.h:416
@ MIOPEN_ELEMENTWISE_AFFINE_FUSED_ADD
Definition: miopen.h:475
@ MIOPEN_WEIGHT_BIAS
Definition: miopen.h:473
@ MIOPEN_ELEMENTWISE_AFFINE
Definition: miopen.h:472
@ MIOPEN_WEIGHT_BIAS_T5
Definition: miopen.h:481
@ MIOPEN_ELEMENTWISE_AFFINE_T5
Definition: miopen.h:479
@ MIOPEN_WEIGHT_BIAS_FUSED_ADD
Definition: miopen.h:477
@ MIOPEN_REDUCE_EXTREME_ARGMAX
Definition: miopen.h:6730
@ MIOPEN_REDUCE_EXTREME_MIN
Definition: miopen.h:6732
@ MIOPEN_REDUCE_EXTREME_MAX
Definition: miopen.h:6734
@ MIOPEN_REDUCE_EXTREME_ARGMIN
Definition: miopen.h:6728
@ miopenTensorOpMin
Definition: miopen.h:406
@ miopenTensorOpAdd
Definition: miopen.h:404
@ miopenTensorOpMul
Definition: miopen.h:405
@ miopenTensorOpMax
Definition: miopen.h:407
@ miopenBNSpatial
Definition: miopen.h:492
@ miopenBNPerActivation
Definition: miopen.h:491
@ miopenConvolutionFindModeDynamicHybrid
Definition: miopen.h:652
@ miopenConvolutionFindModeDefault
Definition: miopen.h:659
@ miopenConvolutionFindModeTrustVerifyFull
Definition: miopen.h:658
@ miopenConvolutionFindModeTrustVerify
Definition: miopen.h:657
@ miopenConvolutionFindModeHybrid
Definition: miopen.h:647
@ miopenConvolutionFindModeFast
Definition: miopen.h:643
@ miopenConvolutionFindModeNormal
Definition: miopen.h:641
@ miopenLRNWithinChannel
Definition: miopen.h:462
@ miopenLRNCrossChannel
Definition: miopen.h:463
@ MIOPEN_SOFTMAX_MODE_INSTANCE
Definition: miopen.h:535
@ MIOPEN_SOFTMAX_MODE_CHANNEL
Definition: miopen.h:536
@ MIOPEN_SOFTMAX_FAST
Definition: miopen.h:524
@ MIOPEN_SOFTMAX_ACCURATE
Definition: miopen.h:525
@ MIOPEN_SOFTMAX_LOG
Definition: miopen.h:526
Perf struct for forward, backward filter, or backward data algorithms.
Definition: miopen.h:1269
miopenConvFwdAlgorithm_t fwd_algo
Definition: miopen.h:1272
miopenConvBwdDataAlgorithm_t bwd_data_algo
Definition: miopen.h:1276
size_t memory
Definition: miopen.h:1280
miopenConvBwdWeightsAlgorithm_t bwd_weights_algo
Definition: miopen.h:1273
float time
Definition: miopen.h:1279
Performance struct for forward, backward filter, or backward data algorithms in immediate mode.
Definition: miopen.h:1293
miopenConvAlgorithm_t algorithm
Definition: miopen.h:1300
uint64_t solution_id
Definition: miopen.h:1299
size_t workspace_size
Definition: miopen.h:1297
float time
Definition: miopen.h:1294
Values of a tensor or scalar argument for the miopenRunSolution function.
Definition: miopen.h:6465
miopenTensorArgumentId_t id
Definition: miopen.h:6468
void * buffer
Definition: miopen.h:6478
miopenTensorDescriptor_t * descriptor
Definition: miopen.h:6474