/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-miopen/checkouts/develop/projects/miopen/include/miopen/miopen.h Source File

/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-miopen/checkouts/develop/projects/miopen/include/miopen/miopen.h Source File#

MIOpen: /home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-miopen/checkouts/develop/projects/miopen/include/miopen/miopen.h Source File
miopen.h
Go to the documentation of this file.
1 // Copyright (c) Advanced Micro Devices, Inc., or its affiliates.
2 // SPDX-License-Identifier: MIT
3 
4 #ifndef MIOPEN_GUARD_MIOPEN_H_
5 #define MIOPEN_GUARD_MIOPEN_H_
6 
7 #ifdef __clang__
8 #pragma clang diagnostic push
9 #pragma clang diagnostic ignored "-Wextern-c-compat"
10 #endif
11 
12 #include <stddef.h>
13 #include <stdbool.h>
14 #include <miopen/config.h>
15 #include <miopen/export.h>
16 
17 #if MIOPEN_BACKEND_OPENCL
18 #define CL_TARGET_OPENCL_VERSION 120
19 #if defined(__APPLE__) || defined(__MACOSX)
20 #include <OpenCL/cl.h>
21 #else
22 #define CL_USE_DEPRECATED_OPENCL_1_2_APIS
23 #include <CL/cl.h>
24 #endif
25 
26 #elif MIOPEN_BACKEND_HIP
27 #include <hip/hip_runtime_api.h>
28 #endif
29 
30 /*
31  * @defgroup convolutions
32  * @defgroup pooling
33  * @defgroup handle
34  * @defgroup layernorm
35  * @defgroup LRN
36  * @defgroup batchnorm
37  * @defgroup activation
38  * @defgroup tensor
39  * @defgroup softmax
40  * @defgroup RNN
41  * @defgroup fusion
42  * @defgroup LossFunction
43  * @defgroup TensorReduce
44  * @defgroup find2
45  * @defgroup ReduceExtreme
46  * @defgroup groupnorm
47  * @defgroup cat
48  * @defgroup SGD
49  * @defgroup getitem
50  * @defgroup ReduceCalculation
51  * @defgroup RotaryPositionalEmbeddings
52  * @defgroup ReLU
53  *
54  */
55 
57 #define MIOPEN_DECLARE_OBJECT(name) \
58  struct name \
59  { \
60  }; \
61  typedef struct name* name##_t;
62 
63 #ifdef __cplusplus
64 extern "C" {
65 #endif
66 
67 #if MIOPEN_BACKEND_OPENCL
68 typedef cl_command_queue miopenAcceleratorQueue_t;
69 #elif MIOPEN_BACKEND_HIP
70 typedef hipStream_t miopenAcceleratorQueue_t;
71 #endif
72 
76 MIOPEN_DECLARE_OBJECT(miopenHandle);
77 
86 typedef enum
87 {
100 
101 typedef enum
102 {
103  // TODO:(LYM) temporary use Pedantic as default until TF32 is fully supported
106  1,
108 
109 #ifdef MIOPEN_BETA_API
110 typedef enum
111 {
115 #endif
116 
124 MIOPEN_EXPORT const char* miopenGetErrorString(miopenStatus_t error);
125 
134 typedef void* (*miopenAllocatorFunction)(void* context, size_t sizeBytes);
135 
144 typedef void (*miopenDeallocatorFunction)(void* context, void* memory);
145 
159 MIOPEN_EXPORT miopenStatus_t miopenGetVersion(size_t* major, size_t* minor, size_t* patch);
160 
169 MIOPEN_EXPORT miopenStatus_t miopenCreate(miopenHandle_t* handle);
170 
182 MIOPEN_EXPORT miopenStatus_t miopenCreateWithStream(miopenHandle_t* handle,
183  miopenAcceleratorQueue_t stream);
184 
191 MIOPEN_EXPORT miopenStatus_t miopenDestroy(miopenHandle_t handle);
192 
200 MIOPEN_EXPORT miopenStatus_t miopenSetStream(miopenHandle_t handle,
201  miopenAcceleratorQueue_t streamID);
202 
210 MIOPEN_EXPORT miopenStatus_t miopenGetStream(miopenHandle_t handle,
211  miopenAcceleratorQueue_t* streamID);
212 
229 MIOPEN_EXPORT miopenStatus_t miopenSetAllocator(miopenHandle_t handle,
230  miopenAllocatorFunction allocator,
231  miopenDeallocatorFunction deallocator,
232  void* allocatorContext);
233 
245 MIOPEN_EXPORT miopenStatus_t miopenGetKernelTime(miopenHandle_t handle, float* time);
246 
254 MIOPEN_EXPORT miopenStatus_t miopenEnableProfiling(miopenHandle_t handle, bool enable);
256 // CLOSEOUT HANDLE DOXYGEN GROUP
257 
265 MIOPEN_DECLARE_OBJECT(miopenFusionOpDescriptor);
266 
274 MIOPEN_DECLARE_OBJECT(miopenTensorDescriptor);
275 
282 MIOPEN_DECLARE_OBJECT(miopenSeqTensorDescriptor);
283 
291 MIOPEN_DECLARE_OBJECT(miopenConvolutionDescriptor);
292 
300 MIOPEN_DECLARE_OBJECT(miopenPoolingDescriptor);
301 
309 MIOPEN_DECLARE_OBJECT(miopenLRNDescriptor);
310 
317 MIOPEN_DECLARE_OBJECT(miopenActivationDescriptor);
318 
322 MIOPEN_DECLARE_OBJECT(miopenRNNDescriptor);
323 
327 MIOPEN_DECLARE_OBJECT(miopenCTCLossDescriptor);
328 
332 MIOPEN_DECLARE_OBJECT(miopenDropoutDescriptor);
333 
337 MIOPEN_DECLARE_OBJECT(miopenReduceTensorDescriptor);
338 
342 MIOPEN_DECLARE_OBJECT(miopenMhaDescriptor);
343 
347 MIOPEN_DECLARE_OBJECT(miopenSoftmaxDescriptor);
348 
353 typedef enum
354 {
359  // miopenInt8x4 = 4, /*!< Pack of 4x Int8 in NCHW_VECT_C format (Support discontinued) */
360  miopenBFloat16 = 5,
367 
373 typedef enum
374 {
385 
390 typedef enum
391 {
397 
402 typedef enum
403 {
409 
414 typedef enum
415 {
421 
426 typedef enum
427 {
432 
437 typedef enum
438 {
443 
450 typedef enum
451 {
455 
460 typedef enum
461 {
465 #ifdef MIOPEN_BETA_API
470 typedef enum
471 {
474  1,
476  2,
480  4,
484 #endif
489 typedef enum
490 {
494 
499 typedef enum
500 {
509  7,
511  8,
513  9,
517 
522 typedef enum
523 {
528 
533 typedef enum
534 {
537  1,
539 
547 #define MIOPEN_API_VERSION_REDUCE_TENSOR 1
548 
553 typedef enum
554 {
557  1,
559  2,
561  3,
563  4,
565  5,
567  6,
570  // MIOPEN_REDUCE_TENSOR_MUL_NO_ZEROS =
571  // 8, /*!< the operation is same as MUL, but does not have the zero values considered */
573 
578 typedef enum
579 {
583 
588 typedef enum
589 {
593 
598 typedef enum
599 {
605 
610 typedef enum
611 {
613  0,
617  1,
619 #ifdef MIOPEN_BETA_API
621  2,
625 #else
626 // miopenReserved1 = 2,
627 #endif
628  // TODO:(LYM) temporarily use Pedantic as default until TF32 is fully supported
630  3,
632 
639 typedef enum
640 {
642  1,
644  2,
648  3,
651  // miopenConvolutionFindModeReserved_4 = 4, /*!< Reserved - do not use */
653  5,
662 
674 MIOPEN_EXPORT miopenStatus_t miopenCreateTensorDescriptor(miopenTensorDescriptor_t* tensorDesc);
675 
689  miopenTensorDescriptor_t tensorDesc, miopenDataType_t dataType, int n, int c, int h, int w);
690 
701 MIOPEN_EXPORT miopenStatus_t
702 miopenSetNdTensorDescriptorWithLayout(miopenTensorDescriptor_t tensorDesc,
703  miopenDataType_t dataType,
704  miopenTensorLayout_t tensorLayout,
705  const int* lens,
706  int num_lens);
726 MIOPEN_EXPORT miopenStatus_t miopenSet4dTensorDescriptorEx(miopenTensorDescriptor_t tensorDesc,
727  miopenDataType_t dataType,
728  int n,
729  int c,
730  int h,
731  int w,
732  int nStride,
733  int cStride,
734  int hStride,
735  int wStride);
736 
753 MIOPEN_EXPORT miopenStatus_t miopenGet4dTensorDescriptor(miopenTensorDescriptor_t tensorDesc,
754  miopenDataType_t* dataType,
755  int* n,
756  int* c,
757  int* h,
758  int* w,
759  int* nStride,
760  int* cStride,
761  int* hStride,
762  int* wStride);
763 
774 MIOPEN_EXPORT miopenStatus_t miopenSetTensorDescriptor(miopenTensorDescriptor_t tensorDesc,
775  miopenDataType_t dataType,
776  int nbDims,
777  const int* dimsA,
778  const int* stridesA);
779 
780 #ifdef MIOPEN_BETA_API
783 MIOPEN_EXPORT miopenStatus_t miopenSetTensorDescriptorV2(miopenTensorDescriptor_t tensorDesc,
784  miopenDataType_t dataType,
785  int nbDims,
786  const size_t* dimsA,
787  const size_t* stridesA);
788 #endif
789 
790 #ifdef MIOPEN_BETA_API
800 MIOPEN_EXPORT miopenStatus_t miopenSetTensorCastType(miopenTensorDescriptor_t tensorDesc,
801  miopenDataType_t cast_type);
802 #endif
803 
812 MIOPEN_EXPORT miopenStatus_t miopenGetTensorDescriptorSize(miopenTensorDescriptor_t tensorDesc,
813  int* size);
814 
823 MIOPEN_EXPORT miopenStatus_t miopenGetTensorDescriptor(miopenTensorDescriptor_t tensorDesc,
824  miopenDataType_t* dataType,
825  int* dimsA,
826  int* stridesA);
827 
833 MIOPEN_EXPORT miopenStatus_t miopenDestroyTensorDescriptor(miopenTensorDescriptor_t tensorDesc);
834 
841 MIOPEN_EXPORT miopenStatus_t
842 miopenCreateSeqTensorDescriptor(miopenSeqTensorDescriptor_t* tensorDesc);
843 
849 MIOPEN_EXPORT miopenStatus_t
850 miopenDestroySeqTensorDescriptor(miopenSeqTensorDescriptor_t tensorDesc);
851 
871 MIOPEN_EXPORT miopenStatus_t miopenOpTensor(miopenHandle_t handle,
872  miopenTensorOp_t tensorOp,
873  const void* alpha1,
874  const miopenTensorDescriptor_t aDesc,
875  const void* A,
876  const void* alpha2,
877  const miopenTensorDescriptor_t bDesc,
878  const void* B,
879  const void* beta,
880  const miopenTensorDescriptor_t cDesc,
881  void* C);
882 
893 MIOPEN_EXPORT miopenStatus_t miopenSetTensor(miopenHandle_t handle,
894  const miopenTensorDescriptor_t yDesc,
895  void* y,
896  const void* alpha);
897 
908 MIOPEN_EXPORT miopenStatus_t miopenScaleTensor(miopenHandle_t handle,
909  const miopenTensorDescriptor_t yDesc,
910  void* y,
911  const void* alpha);
912 
919 MIOPEN_EXPORT miopenStatus_t miopenGetTensorNumBytes(miopenTensorDescriptor_t tensorDesc,
920  size_t* numBytes);
921 
939 MIOPEN_EXPORT miopenStatus_t miopenTransformTensor(miopenHandle_t handle,
940  const void* alpha,
941  const miopenTensorDescriptor_t xDesc,
942  const void* x,
943  const void* beta,
944  const miopenTensorDescriptor_t yDesc,
945  void* y);
946 
948 // CLOSEOUT TENSOR DOXYGEN GROUP
949 
960 MIOPEN_EXPORT miopenStatus_t
961 miopenCreateConvolutionDescriptor(miopenConvolutionDescriptor_t* convDesc);
962 
978 MIOPEN_EXPORT miopenStatus_t miopenInitConvolutionDescriptor(miopenConvolutionDescriptor_t convDesc,
980  int pad_h,
981  int pad_w,
982  int stride_h,
983  int stride_w,
984  int dilation_h,
985  int dilation_w);
986 
997 MIOPEN_EXPORT miopenStatus_t
998 miopenInitConvolutionNdDescriptor(miopenConvolutionDescriptor_t convDesc,
999  int spatialDim,
1000  const int* padA,
1001  const int* strideA,
1002  const int* dilationA,
1003  miopenConvolutionMode_t c_mode);
1004 
1011 MIOPEN_EXPORT miopenStatus_t miopenGetConvolutionSpatialDim(miopenConvolutionDescriptor_t convDesc,
1012  int* spatialDim);
1013 
1029 MIOPEN_EXPORT miopenStatus_t miopenGetConvolutionDescriptor(miopenConvolutionDescriptor_t convDesc,
1030  miopenConvolutionMode_t* c_mode,
1031  int* pad_h,
1032  int* pad_w,
1033  int* stride_h,
1034  int* stride_w,
1035  int* dilation_h,
1036  int* dilation_w);
1037 
1049 MIOPEN_EXPORT miopenStatus_t
1050 miopenGetConvolutionNdDescriptor(miopenConvolutionDescriptor_t convDesc,
1051  int requestedSpatialDim,
1052  int* spatialDim,
1053  int* padA,
1054  int* strideA,
1055  int* dilationA,
1056  miopenConvolutionMode_t* c_mode);
1057 
1064 MIOPEN_EXPORT miopenStatus_t miopenGetConvolutionGroupCount(miopenConvolutionDescriptor_t convDesc,
1065  int* groupCount);
1066 
1080 MIOPEN_EXPORT miopenStatus_t miopenSetConvolutionGroupCount(miopenConvolutionDescriptor_t convDesc,
1081  int groupCount);
1082 
1095 MIOPEN_EXPORT miopenStatus_t
1096 miopenSetTransposeConvOutputPadding(miopenConvolutionDescriptor_t convDesc, int adj_h, int adj_w);
1097 
1111  miopenConvolutionDescriptor_t convDesc, int spatialDim, const int* adjA);
1112 
1130 MIOPEN_EXPORT miopenStatus_t
1131 miopenGetConvolutionForwardOutputDim(miopenConvolutionDescriptor_t convDesc,
1132  const miopenTensorDescriptor_t inputTensorDesc,
1133  const miopenTensorDescriptor_t filterDesc,
1134  int* n,
1135  int* c,
1136  int* h,
1137  int* w);
1138 
1152 MIOPEN_EXPORT miopenStatus_t
1153 miopenGetConvolutionNdForwardOutputDim(miopenConvolutionDescriptor_t convDesc,
1154  const miopenTensorDescriptor_t inputTensorDesc,
1155  const miopenTensorDescriptor_t filterDesc,
1156  int* nDim,
1157  int* outputTensorDimA);
1158 
1164 MIOPEN_EXPORT miopenStatus_t
1165 miopenDestroyConvolutionDescriptor(miopenConvolutionDescriptor_t convDesc);
1166 
1173 MIOPEN_EXPORT miopenStatus_t miopenSetConvolutionAttribute(miopenConvolutionDescriptor_t convDesc,
1174  const miopenConvolutionAttrib_t attr,
1175  int value);
1176 
1183 MIOPEN_EXPORT miopenStatus_t miopenGetConvolutionAttribute(miopenConvolutionDescriptor_t convDesc,
1184  const miopenConvolutionAttrib_t attr,
1185  int* value);
1186 
1200 MIOPEN_EXPORT miopenStatus_t miopenSetConvolutionFindMode(miopenConvolutionDescriptor_t convDesc,
1201  miopenConvolutionFindMode_t findMode);
1202 
1210  const miopenConvolutionDescriptor_t convDesc, miopenConvolutionFindMode_t* findMode);
1211 
1216 typedef enum
1217 {
1224 
1228 typedef enum
1229 {
1235 
1239 typedef enum
1240 {
1246  4,
1249 
1253 typedef enum
1254 {
1261 
1268 typedef struct
1269 {
1270  union
1271  {
1277  };
1278 
1279  float time;
1280  size_t memory;
1283 
1292 typedef struct
1293 {
1294  float time;
1299  uint64_t solution_id;
1303 
1319 MIOPEN_EXPORT miopenStatus_t
1321  const miopenTensorDescriptor_t wDesc,
1322  const miopenTensorDescriptor_t xDesc,
1323  const miopenConvolutionDescriptor_t convDesc,
1324  const miopenTensorDescriptor_t yDesc,
1325  size_t* solutionCount);
1326 
1350 MIOPEN_EXPORT miopenStatus_t
1352  const miopenTensorDescriptor_t wDesc,
1353  const miopenTensorDescriptor_t xDesc,
1354  const miopenConvolutionDescriptor_t convDesc,
1355  const miopenTensorDescriptor_t yDesc,
1356  const size_t maxSolutionCount,
1357  size_t* solutionCount,
1358  miopenConvSolution_t* solutions);
1359 
1377 MIOPEN_EXPORT miopenStatus_t
1379  const miopenTensorDescriptor_t wDesc,
1380  const miopenTensorDescriptor_t xDesc,
1381  const miopenConvolutionDescriptor_t convDesc,
1382  const miopenTensorDescriptor_t yDesc,
1383  const uint64_t solution_id,
1384  size_t* workSpaceSize);
1385 
1403 MIOPEN_EXPORT miopenStatus_t
1405  const miopenTensorDescriptor_t wDesc,
1406  const miopenTensorDescriptor_t xDesc,
1407  const miopenConvolutionDescriptor_t convDesc,
1408  const miopenTensorDescriptor_t yDesc,
1409  const uint64_t solution_id);
1410 
1428 MIOPEN_EXPORT miopenStatus_t
1430  const miopenTensorDescriptor_t wDesc,
1431  const void* w,
1432  const miopenTensorDescriptor_t xDesc,
1433  const void* x,
1434  const miopenConvolutionDescriptor_t convDesc,
1435  const miopenTensorDescriptor_t yDesc,
1436  void* y,
1437  void* workSpace,
1438  size_t workSpaceSize,
1439  const uint64_t solution_id);
1440 
1458 MIOPEN_EXPORT miopenStatus_t
1460  const miopenTensorDescriptor_t dyDesc,
1461  const miopenTensorDescriptor_t wDesc,
1462  const miopenConvolutionDescriptor_t convDesc,
1463  const miopenTensorDescriptor_t dxDesc,
1464  size_t* solutionCount);
1465 
1490 MIOPEN_EXPORT miopenStatus_t
1492  const miopenTensorDescriptor_t dyDesc,
1493  const miopenTensorDescriptor_t wDesc,
1494  const miopenConvolutionDescriptor_t convDesc,
1495  const miopenTensorDescriptor_t dxDesc,
1496  const size_t maxSolutionCount,
1497  size_t* solutionCount,
1498  miopenConvSolution_t* solutions);
1499 
1517 MIOPEN_EXPORT miopenStatus_t
1519  const miopenTensorDescriptor_t dyDesc,
1520  const miopenTensorDescriptor_t wDesc,
1521  const miopenConvolutionDescriptor_t convDesc,
1522  const miopenTensorDescriptor_t dxDesc,
1523  const uint64_t solution_id,
1524  size_t* workSpaceSize);
1525 
1544 MIOPEN_EXPORT miopenStatus_t
1546  const miopenTensorDescriptor_t dyDesc,
1547  const miopenTensorDescriptor_t wDesc,
1548  const miopenConvolutionDescriptor_t convDesc,
1549  const miopenTensorDescriptor_t dxDesc,
1550  const uint64_t solution_id);
1551 
1569 MIOPEN_EXPORT miopenStatus_t
1571  const miopenTensorDescriptor_t dyDesc,
1572  const void* dy,
1573  const miopenTensorDescriptor_t wDesc,
1574  const void* w,
1575  const miopenConvolutionDescriptor_t convDesc,
1576  const miopenTensorDescriptor_t dxDesc,
1577  void* dx,
1578  void* workSpace,
1579  size_t workSpaceSize,
1580  const uint64_t solution_id);
1581 
1599 MIOPEN_EXPORT miopenStatus_t
1601  const miopenTensorDescriptor_t dyDesc,
1602  const miopenTensorDescriptor_t xDesc,
1603  const miopenConvolutionDescriptor_t convDesc,
1604  const miopenTensorDescriptor_t dwDesc,
1605  size_t* solutionCount);
1606 
1630 MIOPEN_EXPORT miopenStatus_t
1632  const miopenTensorDescriptor_t dyDesc,
1633  const miopenTensorDescriptor_t xDesc,
1634  const miopenConvolutionDescriptor_t convDesc,
1635  const miopenTensorDescriptor_t dwDesc,
1636  const size_t maxSolutionCount,
1637  size_t* solutionCount,
1638  miopenConvSolution_t* solutions);
1639 
1658  miopenHandle_t handle,
1659  const miopenTensorDescriptor_t dyDesc,
1660  const miopenTensorDescriptor_t xDesc,
1661  const miopenConvolutionDescriptor_t convDesc,
1662  const miopenTensorDescriptor_t dwDesc,
1663  const uint64_t solution_id,
1664  size_t* workSpaceSize);
1665 
1683 MIOPEN_EXPORT miopenStatus_t
1685  const miopenTensorDescriptor_t dyDesc,
1686  const miopenTensorDescriptor_t xDesc,
1687  const miopenConvolutionDescriptor_t convDesc,
1688  const miopenTensorDescriptor_t dwDesc,
1689  const uint64_t solution_id);
1690 
1709 MIOPEN_EXPORT miopenStatus_t
1711  const miopenTensorDescriptor_t dyDesc,
1712  const void* dy,
1713  const miopenTensorDescriptor_t xDesc,
1714  const void* x,
1715  const miopenConvolutionDescriptor_t convDesc,
1716  const miopenTensorDescriptor_t dwDesc,
1717  void* dw,
1718  void* workSpace,
1719  size_t workSpaceSize,
1720  const uint64_t solution_id);
1721 
1748 MIOPEN_EXPORT miopenStatus_t
1750  const miopenTensorDescriptor_t wDesc,
1751  const miopenTensorDescriptor_t xDesc,
1752  const miopenConvolutionDescriptor_t convDesc,
1753  const miopenTensorDescriptor_t yDesc,
1754  size_t* workSpaceSize);
1755 
1799 MIOPEN_EXPORT miopenStatus_t
1801  const miopenTensorDescriptor_t xDesc,
1802  const void* x,
1803  const miopenTensorDescriptor_t wDesc,
1804  const void* w,
1805  const miopenConvolutionDescriptor_t convDesc,
1806  const miopenTensorDescriptor_t yDesc,
1807  void* y,
1808  const int requestAlgoCount,
1809  int* returnedAlgoCount,
1810  miopenConvAlgoPerf_t* perfResults,
1811  void* workSpace,
1812  size_t workSpaceSize,
1813  bool exhaustiveSearch);
1814 
1849 MIOPEN_EXPORT miopenStatus_t miopenConvolutionForward(miopenHandle_t handle,
1850  const void* alpha,
1851  const miopenTensorDescriptor_t xDesc,
1852  const void* x,
1853  const miopenTensorDescriptor_t wDesc,
1854  const void* w,
1855  const miopenConvolutionDescriptor_t convDesc,
1857  const void* beta,
1858  const miopenTensorDescriptor_t yDesc,
1859  void* y,
1860  void* workSpace,
1861  size_t workSpaceSize);
1862 
1878 MIOPEN_EXPORT miopenStatus_t miopenConvolutionForwardBias(miopenHandle_t handle,
1879  const void* alpha,
1880  const miopenTensorDescriptor_t bDesc,
1881  const void* b,
1882  const void* beta,
1883  const miopenTensorDescriptor_t yDesc,
1884  void* y);
1885 
1913 MIOPEN_EXPORT miopenStatus_t
1915  const miopenTensorDescriptor_t dyDesc,
1916  const miopenTensorDescriptor_t wDesc,
1917  const miopenConvolutionDescriptor_t convDesc,
1918  const miopenTensorDescriptor_t dxDesc,
1919  size_t* workSpaceSize);
1920 
1964 MIOPEN_EXPORT miopenStatus_t
1966  const miopenTensorDescriptor_t dyDesc,
1967  const void* dy,
1968  const miopenTensorDescriptor_t wDesc,
1969  const void* w,
1970  const miopenConvolutionDescriptor_t convDesc,
1971  const miopenTensorDescriptor_t dxDesc,
1972  void* dx,
1973  const int requestAlgoCount,
1974  int* returnedAlgoCount,
1975  miopenConvAlgoPerf_t* perfResults,
1976  void* workSpace,
1977  size_t workSpaceSize,
1978  bool exhaustiveSearch);
1979 
2013 MIOPEN_EXPORT miopenStatus_t
2014 miopenConvolutionBackwardData(miopenHandle_t handle,
2015  const void* alpha,
2016  const miopenTensorDescriptor_t dyDesc,
2017  const void* dy,
2018  const miopenTensorDescriptor_t wDesc,
2019  const void* w,
2020  const miopenConvolutionDescriptor_t convDesc,
2022  const void* beta,
2023  const miopenTensorDescriptor_t dxDesc,
2024  void* dx,
2025  void* workSpace,
2026  size_t workSpaceSize);
2027 
2055 MIOPEN_EXPORT miopenStatus_t
2057  const miopenTensorDescriptor_t dyDesc,
2058  const miopenTensorDescriptor_t xDesc,
2059  const miopenConvolutionDescriptor_t convDesc,
2060  const miopenTensorDescriptor_t dwDesc,
2061  size_t* workSpaceSize);
2062 
2106 MIOPEN_EXPORT miopenStatus_t
2108  const miopenTensorDescriptor_t dyDesc,
2109  const void* dy,
2110  const miopenTensorDescriptor_t xDesc,
2111  const void* x,
2112  const miopenConvolutionDescriptor_t convDesc,
2113  const miopenTensorDescriptor_t dwDesc,
2114  void* dw,
2115  const int requestAlgoCount,
2116  int* returnedAlgoCount,
2117  miopenConvAlgoPerf_t* perfResults,
2118  void* workSpace,
2119  size_t workSpaceSize,
2120  bool exhaustiveSearch);
2121 
2155 MIOPEN_EXPORT miopenStatus_t
2156 miopenConvolutionBackwardWeights(miopenHandle_t handle,
2157  const void* alpha,
2158  const miopenTensorDescriptor_t dyDesc,
2159  const void* dy,
2160  const miopenTensorDescriptor_t xDesc,
2161  const void* x,
2162  const miopenConvolutionDescriptor_t convDesc,
2164  const void* beta,
2165  const miopenTensorDescriptor_t dwDesc,
2166  void* dw,
2167  void* workSpace,
2168  size_t workSpaceSize);
2169 
2185 MIOPEN_EXPORT miopenStatus_t miopenConvolutionBackwardBias(miopenHandle_t handle,
2186  const void* alpha,
2187  const miopenTensorDescriptor_t dyDesc,
2188  const void* dy,
2189  const void* beta,
2190  const miopenTensorDescriptor_t dbDesc,
2191  void* db);
2192 
2194 // CLOSEOUT CONVOLUTIONS DOXYGEN GROUP
2195 
2196 // Pooling APIs
2207 MIOPEN_EXPORT miopenStatus_t miopenCreatePoolingDescriptor(miopenPoolingDescriptor_t* poolDesc);
2208 
2217 MIOPEN_EXPORT miopenStatus_t miopenSetPoolingIndexType(miopenPoolingDescriptor_t poolDesc,
2218  miopenIndexType_t index_type);
2219 
2227 MIOPEN_EXPORT miopenStatus_t miopenGetPoolingIndexType(miopenPoolingDescriptor_t poolDesc,
2228  miopenIndexType_t* index_type);
2229 
2238  miopenPoolingDescriptor_t poolDesc, miopenPoolingWorkspaceIndexMode_t workspace_index);
2239 
2247  miopenPoolingDescriptor_t poolDesc, miopenPoolingWorkspaceIndexMode_t* workspace_index);
2248 
2263 MIOPEN_EXPORT miopenStatus_t miopenSet2dPoolingDescriptor(miopenPoolingDescriptor_t poolDesc,
2264  miopenPoolingMode_t mode,
2265  int windowHeight,
2266  int windowWidth,
2267  int pad_h,
2268  int pad_w,
2269  int stride_h,
2270  int stride_w);
2271 
2286 MIOPEN_EXPORT miopenStatus_t miopenGet2dPoolingDescriptor(const miopenPoolingDescriptor_t poolDesc,
2287  miopenPoolingMode_t* mode,
2288  int* windowHeight,
2289  int* windowWidth,
2290  int* pad_h,
2291  int* pad_w,
2292  int* stride_h,
2293  int* stride_w);
2294 
2309 MIOPEN_EXPORT miopenStatus_t
2310 miopenGetPoolingForwardOutputDim(const miopenPoolingDescriptor_t poolDesc,
2311  const miopenTensorDescriptor_t tensorDesc,
2312  int* n,
2313  int* c,
2314  int* h,
2315  int* w);
2316 
2332 MIOPEN_EXPORT miopenStatus_t miopenSetNdPoolingDescriptor(miopenPoolingDescriptor_t poolDesc,
2333  const miopenPoolingMode_t mode,
2334  int nbDims,
2335  const int* windowDimA,
2336  const int* padA,
2337  const int* stridesA);
2338 
2355 MIOPEN_EXPORT miopenStatus_t miopenGetNdPoolingDescriptor(const miopenPoolingDescriptor_t poolDesc,
2356  int nbDimsRequested,
2357  miopenPoolingMode_t* mode,
2358  int* nbDims,
2359  int* windowDimA,
2360  int* padA,
2361  int* stridesA);
2362 
2375 MIOPEN_EXPORT miopenStatus_t
2376 miopenGetPoolingNdForwardOutputDim(const miopenPoolingDescriptor_t poolDesc,
2377  const miopenTensorDescriptor_t tensorDesc,
2378  int dims,
2379  int* tensorDimArr);
2380 
2393 MIOPEN_EXPORT miopenStatus_t miopenPoolingGetWorkSpaceSize(const miopenTensorDescriptor_t yDesc,
2394  size_t* workSpaceSize);
2395 
2408 MIOPEN_EXPORT miopenStatus_t
2409 miopenPoolingGetWorkSpaceSizeV2(const miopenPoolingDescriptor_t poolDesc,
2410  const miopenTensorDescriptor_t yDesc,
2411  size_t* workSpaceSize);
2412 
2433 MIOPEN_EXPORT miopenStatus_t miopenPoolingForward(miopenHandle_t handle,
2434  const miopenPoolingDescriptor_t poolDesc,
2435  const void* alpha,
2436  const miopenTensorDescriptor_t xDesc,
2437  const void* x,
2438  const void* beta,
2439  const miopenTensorDescriptor_t yDesc,
2440  void* y,
2441  bool do_backward,
2442  void* workSpace,
2443  size_t workSpaceSize);
2444 
2465 MIOPEN_EXPORT miopenStatus_t miopenPoolingBackward(miopenHandle_t handle,
2466  const miopenPoolingDescriptor_t poolDesc,
2467  const void* alpha,
2468  const miopenTensorDescriptor_t yDesc,
2469  const void* y,
2470  const miopenTensorDescriptor_t dyDesc,
2471  const void* dy,
2472  const miopenTensorDescriptor_t xDesc,
2473  const void* x,
2474  const void* beta,
2475  const miopenTensorDescriptor_t dxDesc,
2476  void* dx,
2477  void* workSpace);
2478 
2484 MIOPEN_EXPORT miopenStatus_t miopenDestroyPoolingDescriptor(miopenPoolingDescriptor_t poolDesc);
2485 
2487 // CLOSEOUT POOLING DOXYGEN GROUP
2488 
2489 // LRN APIs
2499 MIOPEN_EXPORT miopenStatus_t miopenCreateLRNDescriptor(miopenLRNDescriptor_t* lrnDesc);
2500 
2514 MIOPEN_EXPORT miopenStatus_t miopenSetLRNDescriptor(const miopenLRNDescriptor_t lrnDesc,
2515  miopenLRNMode_t mode,
2516  unsigned int lrnN,
2517  double lrnAlpha,
2518  double lrnBeta,
2519  double lrnK);
2520 
2533 MIOPEN_EXPORT miopenStatus_t miopenGetLRNDescriptor(const miopenLRNDescriptor_t lrnDesc,
2534  miopenLRNMode_t* mode,
2535  unsigned int* lrnN,
2536  double* lrnAlpha,
2537  double* lrnBeta,
2538  double* lrnK);
2539 
2549 MIOPEN_EXPORT miopenStatus_t miopenLRNGetWorkSpaceSize(const miopenTensorDescriptor_t yDesc,
2550  size_t* workSpaceSize);
2551 
2570 MIOPEN_EXPORT miopenStatus_t miopenLRNForward(miopenHandle_t handle,
2571  const miopenLRNDescriptor_t lrnDesc,
2572  const void* alpha,
2573  const miopenTensorDescriptor_t xDesc,
2574  const void* x,
2575  const void* beta,
2576  const miopenTensorDescriptor_t yDesc,
2577  void* y,
2578  bool do_backward,
2579  void* workSpace);
2580 
2598 MIOPEN_EXPORT miopenStatus_t miopenLRNBackward(miopenHandle_t handle,
2599  const miopenLRNDescriptor_t lrnDesc,
2600  const void* alpha,
2601  const miopenTensorDescriptor_t yDesc,
2602  const void* y,
2603  const miopenTensorDescriptor_t dyDesc,
2604  const void* dy,
2605  const miopenTensorDescriptor_t xDesc,
2606  const void* x,
2607  const void* beta,
2608  const miopenTensorDescriptor_t dxDesc,
2609  void* dx,
2610  const void* workSpace);
2611 
2617 MIOPEN_EXPORT miopenStatus_t miopenDestroyLRNDescriptor(miopenLRNDescriptor_t lrnDesc);
2618 
2620 // CLOSEOUT LRN DOXYGEN GROUP
2621 
2622 #ifdef MIOPEN_BETA_API
2623 // LayerNorm APIs
2648 MIOPEN_EXPORT miopenStatus_t miopenLayerNormForward(miopenHandle_t handle,
2649  miopenNormMode_t mode,
2650  const miopenTensorDescriptor_t xDesc,
2651  const void* x,
2652  const miopenTensorDescriptor_t weightDesc,
2653  const void* weight,
2654  const miopenTensorDescriptor_t biasDesc,
2655  const void* bias,
2656  const float epsilon,
2657  const int32_t normalized_dim,
2658  const miopenTensorDescriptor_t yDesc,
2659  void* y,
2660  const miopenTensorDescriptor_t meanDesc,
2661  void* mean,
2662  const miopenTensorDescriptor_t rstdDesc,
2663  void* rstd);
2664 
2682 MIOPEN_EXPORT miopenStatus_t
2684  miopenNormMode_t mode,
2685  const miopenTensorDescriptor_t dyDesc,
2686  const miopenTensorDescriptor_t xDesc,
2687  const miopenTensorDescriptor_t weightDesc,
2688  const miopenTensorDescriptor_t meanDesc,
2689  const miopenTensorDescriptor_t rstdDesc,
2690  const int32_t normalized_dim,
2691  const miopenTensorDescriptor_t dxDesc,
2692  const miopenTensorDescriptor_t dwDesc,
2693  const miopenTensorDescriptor_t dbDesc,
2694  size_t* sizeInBytes);
2695 
2721 MIOPEN_EXPORT miopenStatus_t miopenLayerNormBackward(miopenHandle_t handle,
2722  miopenNormMode_t mode,
2723  void* workspace,
2724  size_t workspaceSizeInBytes,
2725  const miopenTensorDescriptor_t dyDesc,
2726  const void* dy,
2727  const miopenTensorDescriptor_t xDesc,
2728  const void* x,
2729  const miopenTensorDescriptor_t weightDesc,
2730  const void* weight,
2731  const miopenTensorDescriptor_t meanDesc,
2732  const void* mean,
2733  const miopenTensorDescriptor_t rstdDesc,
2734  const void* rstd,
2735  const int32_t normalized_dim,
2736  const miopenTensorDescriptor_t dxDesc,
2737  void* dx,
2738  const miopenTensorDescriptor_t dwDesc,
2739  void* dw,
2740  const miopenTensorDescriptor_t dbDesc,
2741  void* db);
2742 
2744 // CLOSEOUT LAYERNORM DOXYGEN GROUP
2745 #endif
2746 
2747 #ifdef MIOPEN_BETA_API
2748 // Cat APIs
2764 MIOPEN_EXPORT miopenStatus_t miopenCatForward(miopenHandle_t handle,
2765  const int32_t xCount,
2766  const miopenTensorDescriptor_t* xDescs,
2767  const void* const* xs,
2768  const miopenTensorDescriptor_t yDesc,
2769  void* y,
2770  const int32_t dim);
2771 
2773 // CLOSEOUT CAT DOXYGEN GROUP
2774 #endif
2775 
2776 // Batch-Normalization APIs
2798 MIOPEN_EXPORT miopenStatus_t miopenDeriveBNTensorDescriptor(miopenTensorDescriptor_t derivedBnDesc,
2799  const miopenTensorDescriptor_t xDesc,
2800  miopenBatchNormMode_t bn_mode);
2801 
2840 MIOPEN_EXPORT miopenStatus_t
2842  miopenBatchNormMode_t bn_mode,
2843  void* alpha,
2844  void* beta,
2845  const miopenTensorDescriptor_t xDesc,
2846  const void* x,
2847  const miopenTensorDescriptor_t yDesc,
2848  void* y,
2849  const miopenTensorDescriptor_t bnScaleBiasMeanVarDesc,
2850  void* bnScale,
2851  void* bnBias,
2852  double expAvgFactor,
2853  void* resultRunningMean,
2854  void* resultRunningVariance,
2855  double epsilon,
2856  void* resultSaveMean,
2857  void* resultSaveInvVariance);
2898 MIOPEN_EXPORT miopenStatus_t
2900  miopenBatchNormMode_t bn_mode,
2901  void* alpha,
2902  void* beta,
2903  const miopenTensorDescriptor_t xDesc,
2904  const void* x,
2905  const miopenTensorDescriptor_t yDesc,
2906  void* y,
2907  const miopenTensorDescriptor_t scaleDesc,
2908  const miopenTensorDescriptor_t biasVarDesc,
2909  const miopenTensorDescriptor_t savedMeanDesc,
2910  const miopenTensorDescriptor_t savedVarDesc,
2911  void* bnScale,
2912  void* bnBias,
2913  double expAvgFactor,
2914  void* resultRunningMean,
2915  void* resultRunningVariance,
2916  double epsilon,
2917  void* resultSaveMean,
2918  void* resultSaveInvVariance);
2961 MIOPEN_EXPORT miopenStatus_t
2963  miopenBatchNormMode_t bn_mode,
2964  void* alpha,
2965  void* beta,
2966  const miopenTensorDescriptor_t xDesc,
2967  const void* x,
2968  const miopenTensorDescriptor_t yDesc,
2969  void* y,
2970  const miopenTensorDescriptor_t scaleDesc,
2971  const miopenTensorDescriptor_t biasVarDesc,
2972  const miopenTensorDescriptor_t savedMeanDesc,
2973  const miopenTensorDescriptor_t savedVarDesc,
2974  void* bnScale,
2975  void* bnBias,
2976  double expAvgFactor,
2977  const void* prevResultRunningMean,
2978  const void* prevResultRunningVariance,
2979  void* nextResultRunningMean,
2980  void* nextResultRunningVariance,
2981  double epsilon,
2982  void* resultSaveMean,
2983  void* resultSaveInvVariance);
3025 MIOPEN_EXPORT miopenStatus_t
3027  miopenBatchNormMode_t bn_mode,
3028  void* alpha,
3029  void* beta,
3030  const miopenTensorDescriptor_t xDesc,
3031  const void* x,
3032  const miopenTensorDescriptor_t yDesc,
3033  void* y,
3034  const miopenTensorDescriptor_t scaleDesc,
3035  const miopenTensorDescriptor_t biasVarDesc,
3036  const miopenTensorDescriptor_t savedMeanDesc,
3037  const miopenTensorDescriptor_t savedVarDesc,
3038  void* bnScale,
3039  void* bnBias,
3040  double expAvgFactor,
3041  void* resultRunningMean,
3042  void* resultRunningVariance,
3043  double epsilon,
3044  void* resultSaveMean,
3045  void* resultSaveInvVariance,
3046  const miopenActivationDescriptor_t activDesc);
3047 
3091 MIOPEN_EXPORT miopenStatus_t
3093  miopenBatchNormMode_t bn_mode,
3094  void* alpha,
3095  void* beta,
3096  const miopenTensorDescriptor_t xDesc,
3097  const void* x,
3098  const miopenTensorDescriptor_t yDesc,
3099  void* y,
3100  const miopenTensorDescriptor_t scaleDesc,
3101  const miopenTensorDescriptor_t biasVarDesc,
3102  const miopenTensorDescriptor_t savedMeanDesc,
3103  const miopenTensorDescriptor_t savedVarDesc,
3104  void* bnScale,
3105  void* bnBias,
3106  double expAvgFactor,
3107  const void* prevResultRunningMean,
3108  const void* prevResultRunningVariance,
3109  void* nextResultRunningMean,
3110  void* nextResultRunningVariance,
3111  double epsilon,
3112  void* resultSaveMean,
3113  void* resultSaveInvVariance,
3114  const miopenActivationDescriptor_t activDesc);
3115 
3145 MIOPEN_EXPORT miopenStatus_t
3147  miopenBatchNormMode_t bn_mode,
3148  void* alpha,
3149  void* beta,
3150  const miopenTensorDescriptor_t xDesc,
3151  const void* x,
3152  const miopenTensorDescriptor_t yDesc,
3153  void* y,
3154  const miopenTensorDescriptor_t bnScaleBiasMeanVarDesc,
3155  void* bnScale,
3156  void* bnBias,
3157  void* estimatedMean,
3158  void* estimatedVariance,
3159  double epsilon);
3160 
3192 MIOPEN_EXPORT miopenStatus_t
3194  miopenBatchNormMode_t bn_mode,
3195  void* alpha,
3196  void* beta,
3197  const miopenTensorDescriptor_t xDesc,
3198  const void* x,
3199  const miopenTensorDescriptor_t yDesc,
3200  void* y,
3201  const miopenTensorDescriptor_t scaleDesc,
3202  const miopenTensorDescriptor_t biasDesc,
3203  const miopenTensorDescriptor_t estMeanDesc,
3204  const miopenTensorDescriptor_t estVarianceDesc,
3205  void* bnScale,
3206  void* bnBias,
3207  void* estimatedMean,
3208  void* estimatedVariance,
3209  double epsilon);
3210 
3240  miopenHandle_t handle,
3241  miopenBatchNormMode_t bn_mode,
3242  void* alpha,
3243  void* beta,
3244  const miopenTensorDescriptor_t xDesc,
3245  const void* x,
3246  const miopenTensorDescriptor_t yDesc,
3247  void* y,
3248  const miopenTensorDescriptor_t scaleDesc,
3249  const miopenTensorDescriptor_t biasDesc,
3250  const miopenTensorDescriptor_t estMeanDesc,
3251  const miopenTensorDescriptor_t estInvVarianceDesc,
3252  void* bnScale,
3253  void* bnBias,
3254  void* estimatedMean,
3255  void* estimatedInvVariance);
3256 
3288  miopenHandle_t handle,
3289  miopenBatchNormMode_t bn_mode,
3290  void* alpha,
3291  void* beta,
3292  const miopenTensorDescriptor_t xDesc,
3293  const void* x,
3294  const miopenTensorDescriptor_t yDesc,
3295  void* y,
3296  const miopenTensorDescriptor_t scaleDesc,
3297  const miopenTensorDescriptor_t biasDesc,
3298  const miopenTensorDescriptor_t estMeanDesc,
3299  const miopenTensorDescriptor_t estInvVarianceDesc,
3300  void* bnScale,
3301  void* bnBias,
3302  void* estimatedMean,
3303  void* estimatedInvVariance,
3304  const miopenActivationDescriptor_t activDesc);
3305 
3338 MIOPEN_EXPORT miopenStatus_t
3340  miopenBatchNormMode_t bn_mode,
3341  void* alpha,
3342  void* beta,
3343  const miopenTensorDescriptor_t xDesc,
3344  const void* x,
3345  const miopenTensorDescriptor_t yDesc,
3346  void* y,
3347  const miopenTensorDescriptor_t scaleDesc,
3348  const miopenTensorDescriptor_t biasDesc,
3349  const miopenTensorDescriptor_t estMeanDesc,
3350  const miopenTensorDescriptor_t estVarianceDesc,
3351  void* bnScale,
3352  void* bnBias,
3353  void* estimatedMean,
3354  void* estimatedVariance,
3355  double epsilon,
3356  const miopenActivationDescriptor_t activDesc);
3357 
3392 MIOPEN_EXPORT miopenStatus_t
3393 miopenBatchNormalizationBackward(miopenHandle_t handle,
3394  miopenBatchNormMode_t bn_mode,
3395  const void* alphaDataDiff,
3396  const void* betaDataDiff,
3397  const void* alphaParamDiff,
3398  const void* betaParamDiff,
3399  const miopenTensorDescriptor_t xDesc,
3400  const void* x,
3401  const miopenTensorDescriptor_t dyDesc,
3402  const void* dy,
3403  const miopenTensorDescriptor_t dxDesc,
3404  void* dx,
3405  const miopenTensorDescriptor_t bnScaleBiasDiffDesc,
3406  const void* bnScale,
3407  void* resultBnScaleDiff,
3408  void* resultBnBiasDiff,
3409  double epsilon,
3410  const void* savedMean,
3411  const void* savedInvVariance);
3412 
3451 MIOPEN_EXPORT miopenStatus_t
3453  miopenBatchNormMode_t bn_mode,
3454  const void* alphaDataDiff,
3455  const void* betaDataDiff,
3456  const void* alphaParamDiff,
3457  const void* betaParamDiff,
3458  const miopenTensorDescriptor_t xDesc,
3459  const void* x,
3460  const miopenTensorDescriptor_t dyDesc,
3461  const void* dy,
3462  const miopenTensorDescriptor_t dxDesc,
3463  void* dx,
3464  const miopenTensorDescriptor_t scaleDesc,
3465  const miopenTensorDescriptor_t biasDesc,
3466  const miopenTensorDescriptor_t savedMeanDesc,
3467  const miopenTensorDescriptor_t savedVarDesc,
3468  const void* bnScale,
3469  void* resultBnScaleDiff,
3470  void* resultBnBiasDiff,
3471  double epsilon,
3472  const void* savedMean,
3473  const void* savedInvVariance);
3474 
3515 MIOPEN_EXPORT miopenStatus_t
3517  miopenBatchNormMode_t bn_mode,
3518  const void* alphaDataDiff,
3519  const void* betaDataDiff,
3520  const void* alphaParamDiff,
3521  const void* betaParamDiff,
3522  const miopenTensorDescriptor_t xDesc,
3523  const void* x,
3524  const miopenTensorDescriptor_t dyDesc,
3525  const void* dy,
3526  const miopenTensorDescriptor_t dxDesc,
3527  void* dx,
3528  const miopenTensorDescriptor_t scaleDesc,
3529  const miopenTensorDescriptor_t biasDesc,
3530  const miopenTensorDescriptor_t savedMeanDesc,
3531  const miopenTensorDescriptor_t savedVarianceDesc,
3532  const void* bnScale,
3533  const void* bnBias,
3534  void* resultBnScaleDiff,
3535  void* resultBnBiasDiff,
3536  double epsilon,
3537  const void* savedMean,
3538  const void* savedInvVariance,
3539  const miopenActivationDescriptor_t activDesc);
3541 // CLOSEOUT BATCHNORM DOXYGEN GROUP
3542 
3543 // Activation APIs
3553 MIOPEN_EXPORT miopenStatus_t
3554 miopenCreateActivationDescriptor(miopenActivationDescriptor_t* activDesc);
3555 
3567 MIOPEN_EXPORT miopenStatus_t
3568 miopenSetActivationDescriptor(const miopenActivationDescriptor_t activDesc,
3570  double activAlpha,
3571  double activBeta,
3572  double activGamma);
3573 
3585 MIOPEN_EXPORT miopenStatus_t
3586 miopenGetActivationDescriptor(const miopenActivationDescriptor_t activDesc,
3587  miopenActivationMode_t* mode,
3588  double* activAlpha,
3589  double* activBeta,
3590  double* activGamma);
3591 
3604 MIOPEN_EXPORT miopenStatus_t miopenActivationForward(miopenHandle_t handle,
3605  const miopenActivationDescriptor_t activDesc,
3606  const void* alpha,
3607  const miopenTensorDescriptor_t xDesc,
3608  const void* x,
3609  const void* beta,
3610  const miopenTensorDescriptor_t yDesc,
3611  void* y);
3612 
3629 MIOPEN_EXPORT miopenStatus_t miopenActivationBackward(miopenHandle_t handle,
3630  const miopenActivationDescriptor_t activDesc,
3631  const void* alpha,
3632  const miopenTensorDescriptor_t yDesc,
3633  const void* y,
3634  const miopenTensorDescriptor_t dyDesc,
3635  const void* dy,
3636  const miopenTensorDescriptor_t xDesc,
3637  const void* x,
3638  const void* beta,
3639  const miopenTensorDescriptor_t dxDesc,
3640  void* dx);
3641 
3647 MIOPEN_EXPORT miopenStatus_t
3648 miopenDestroyActivationDescriptor(miopenActivationDescriptor_t activDesc);
3649 
3651 // CLOSEOUT ACTIVATION DOXYGEN GROUP
3652 
3653 #ifdef MIOPEN_BETA_API
3669 MIOPEN_EXPORT miopenStatus_t miopenGLUForward(miopenHandle_t handle,
3670  const miopenTensorDescriptor_t inputDesc,
3671  const void* input,
3672  const miopenTensorDescriptor_t outputDesc,
3673  void* output,
3674  const uint32_t dim);
3675 
3688 MIOPEN_EXPORT miopenStatus_t miopenGLUBackward(miopenHandle_t handle,
3689  const miopenTensorDescriptor_t inputDesc,
3690  const void* input,
3691  const miopenTensorDescriptor_t outputGradDesc,
3692  const void* outputGrad,
3693  const miopenTensorDescriptor_t inputGradDesc,
3694  void* inputGrad,
3695  const uint32_t dim);
3696 
3698 // CLOSEOUT ACTIVATION DOXYGEN GROUP
3699 #endif // MIOPEN_BETA_API
3700 
3701 // Softmax APIs
3719 MIOPEN_EXPORT miopenStatus_t miopenSoftmaxForward(miopenHandle_t handle,
3720  const void* alpha,
3721  const miopenTensorDescriptor_t xDesc,
3722  const void* x,
3723  const void* beta,
3724  const miopenTensorDescriptor_t yDesc,
3725  void* y);
3726 
3742 MIOPEN_EXPORT miopenStatus_t miopenSoftmaxBackward(miopenHandle_t handle,
3743  const void* alpha,
3744  const miopenTensorDescriptor_t yDesc,
3745  const void* y,
3746  const miopenTensorDescriptor_t dyDesc,
3747  const void* dy,
3748  const void* beta,
3749  const miopenTensorDescriptor_t dxDesc,
3750  void* dx);
3751 
3765 MIOPEN_EXPORT miopenStatus_t miopenSoftmaxForward_V2(miopenHandle_t handle,
3766  const void* alpha,
3767  const miopenTensorDescriptor_t xDesc,
3768  const void* x,
3769  const void* beta,
3770  const miopenTensorDescriptor_t yDesc,
3771  void* y,
3772  miopenSoftmaxAlgorithm_t algorithm,
3773  miopenSoftmaxMode_t mode);
3774 
3790 MIOPEN_EXPORT miopenStatus_t miopenSoftmaxBackward_V2(miopenHandle_t handle,
3791  const void* alpha,
3792  const miopenTensorDescriptor_t yDesc,
3793  const void* y,
3794  const miopenTensorDescriptor_t dyDesc,
3795  const void* dy,
3796  const void* beta,
3797  const miopenTensorDescriptor_t dxDesc,
3798  void* dx,
3799  miopenSoftmaxAlgorithm_t algorithm,
3800  miopenSoftmaxMode_t mode);
3801 
3803 // CLOSEOUT SOFTMAX DOXYGEN GROUP
3804 
3808 MIOPEN_DECLARE_OBJECT(miopenFusionPlanDescriptor);
3809 MIOPEN_DECLARE_OBJECT(miopenOperatorDescriptor);
3810 MIOPEN_DECLARE_OBJECT(miopenOperatorArgs);
3811 
3820 typedef enum
3821 {
3825 
3833 MIOPEN_EXPORT miopenStatus_t miopenCreateFusionPlan(miopenFusionPlanDescriptor_t* fusePlanDesc,
3834  const miopenFusionDirection_t fuseDirection,
3835  const miopenTensorDescriptor_t inputDesc);
3836 
3842 MIOPEN_EXPORT miopenStatus_t miopenDestroyFusionPlan(miopenFusionPlanDescriptor_t fusePlanDesc);
3843 
3850 MIOPEN_EXPORT miopenStatus_t miopenCompileFusionPlan(miopenHandle_t handle,
3851  miopenFusionPlanDescriptor_t fusePlanDesc);
3852 
3863 MIOPEN_EXPORT miopenStatus_t miopenFusionPlanGetOp(miopenFusionPlanDescriptor_t fusePlanDesc,
3864  const int op_idx,
3865  miopenFusionOpDescriptor_t* op);
3866 
3874 MIOPEN_EXPORT miopenStatus_t
3875 miopenFusionPlanGetWorkSpaceSize(miopenHandle_t handle,
3876  miopenFusionPlanDescriptor_t fusePlanDesc,
3877  size_t* workSpaceSize,
3879 
3897 MIOPEN_EXPORT miopenStatus_t
3898 miopenFusionPlanConvolutionGetAlgo(miopenFusionPlanDescriptor_t fusePlanDesc,
3899  const int requestAlgoCount,
3900  int* returnedAlgoCount,
3901  miopenConvFwdAlgorithm_t* returnedAlgos);
3902 
3913  miopenFusionPlanDescriptor_t fusePlanDesc, miopenConvFwdAlgorithm_t algo);
3914 
3923 MIOPEN_EXPORT miopenStatus_t miopenCreateOpConvForward(miopenFusionPlanDescriptor_t fusePlanDesc,
3924  miopenFusionOpDescriptor_t* convOp,
3925  miopenConvolutionDescriptor_t convDesc,
3926  const miopenTensorDescriptor_t wDesc);
3927 
3928 //---
3929 
3930 // Activation forward create ops ---
3938 MIOPEN_EXPORT miopenStatus_t
3939 miopenCreateOpActivationForward(miopenFusionPlanDescriptor_t fusePlanDesc,
3940  miopenFusionOpDescriptor_t* activFwdOp,
3941  miopenActivationMode_t mode);
3942 
3943 // Activation backward create ops ---
3951 MIOPEN_EXPORT miopenStatus_t
3952 miopenCreateOpActivationBackward(miopenFusionPlanDescriptor_t fusePlanDesc,
3953  miopenFusionOpDescriptor_t* activBwdOp,
3954  miopenActivationMode_t mode);
3955 
3956 // Bias create ops ---
3964 MIOPEN_EXPORT miopenStatus_t miopenCreateOpBiasForward(miopenFusionPlanDescriptor_t fusePlanDesc,
3965  miopenFusionOpDescriptor_t* biasOp,
3966  const miopenTensorDescriptor_t bDesc);
3967 
3968 // Batch normalization create ops ---
3977 MIOPEN_EXPORT miopenStatus_t
3978 miopenCreateOpBatchNormInference(miopenFusionPlanDescriptor_t fusePlanDesc,
3979  miopenFusionOpDescriptor_t* bnOp,
3980  const miopenBatchNormMode_t bn_mode,
3981  const miopenTensorDescriptor_t bnScaleBiasMeanVarDesc);
3982 
3992 MIOPEN_EXPORT miopenStatus_t
3993 miopenCreateOpBatchNormForward(miopenFusionPlanDescriptor_t fusePlanDesc,
3994  miopenFusionOpDescriptor_t* bnFwdOp,
3995  const miopenBatchNormMode_t bn_mode,
3996  bool runningMeanVariance);
3997 
4005 MIOPEN_EXPORT miopenStatus_t
4006 miopenCreateOpBatchNormBackward(miopenFusionPlanDescriptor_t fusePlanDesc,
4007  miopenFusionOpDescriptor_t* bnBwdOp,
4008  const miopenBatchNormMode_t bn_mode);
4009 
4010 //---
4016 MIOPEN_EXPORT miopenStatus_t miopenCreateOperatorArgs(miopenOperatorArgs_t* args);
4017 
4023 MIOPEN_EXPORT miopenStatus_t miopenDestroyOperatorArgs(miopenOperatorArgs_t args);
4024 
4025 // Convolution set arguments ---
4035 MIOPEN_EXPORT miopenStatus_t miopenSetOpArgsConvForward(miopenOperatorArgs_t args,
4036  const miopenFusionOpDescriptor_t convOp,
4037  const void* alpha,
4038  const void* beta,
4039  const void* w);
4040 // Activation set arguments ---
4052 MIOPEN_EXPORT miopenStatus_t
4053 miopenSetOpArgsActivForward(miopenOperatorArgs_t args,
4054  const miopenFusionOpDescriptor_t activFwdOp,
4055  const void* alpha,
4056  const void* beta,
4057  double activAlpha,
4058  double activBeta,
4059  double activGamma);
4060 
4074 MIOPEN_EXPORT miopenStatus_t
4075 miopenSetOpArgsActivBackward(miopenOperatorArgs_t args,
4076  const miopenFusionOpDescriptor_t activBwdOp,
4077  const void* alpha,
4078  const void* beta,
4079  const void* y,
4080  const void* reserved,
4081  double activAlpha,
4082  double activBeta,
4083  double activGamma);
4084 
4085 // Batch Normalization set arguments ---
4099 MIOPEN_EXPORT miopenStatus_t
4100 miopenSetOpArgsBatchNormInference(miopenOperatorArgs_t args,
4101  const miopenFusionOpDescriptor_t bnOp,
4102  const void* alpha,
4103  const void* beta,
4104  const void* bnScale,
4105  const void* bnBias,
4106  const void* estimatedMean,
4107  const void* estimatedVariance,
4108  double epsilon);
4109 
4126 MIOPEN_EXPORT miopenStatus_t miopenSetOpArgsBatchNormForward(miopenOperatorArgs_t args,
4127  const miopenFusionOpDescriptor_t bnOp,
4128  const void* alpha,
4129  const void* beta,
4130  const void* bnScale,
4131  const void* bnBias,
4132  void* savedMean,
4133  void* savedInvVariance,
4134  void* runningMean,
4135  void* runningVariance,
4136  double expAvgFactor,
4137  double epsilon);
4138 
4154 MIOPEN_EXPORT miopenStatus_t miopenSetOpArgsBatchNormBackward(miopenOperatorArgs_t args,
4155  const miopenFusionOpDescriptor_t bnOp,
4156  const void* alpha,
4157  const void* beta,
4158  const void* x,
4159  const void* bnScale,
4160  const void* bnBias,
4161  void* resultBnScaleDiff,
4162  void* resultBnBiasDiff,
4163  const void* savedMean,
4164  const void* savedInvVariance);
4165 
4166 // Bias forward set arguments ---
4176 MIOPEN_EXPORT miopenStatus_t miopenSetOpArgsBiasForward(miopenOperatorArgs_t args,
4177  const miopenFusionOpDescriptor_t biasOp,
4178  const void* alpha,
4179  const void* beta,
4180  const void* bias);
4181 
4196 MIOPEN_EXPORT miopenStatus_t
4197 miopenExecuteFusionPlan(const miopenHandle_t handle,
4198  const miopenFusionPlanDescriptor_t fusePlanDesc,
4199  const miopenTensorDescriptor_t inputDesc,
4200  const void* input,
4201  const miopenTensorDescriptor_t outputDesc,
4202  void* output,
4203  miopenOperatorArgs_t args);
4204 
4219 MIOPEN_EXPORT miopenStatus_t
4220 miopenExecuteFusionPlan_v2(const miopenHandle_t handle,
4221  const miopenFusionPlanDescriptor_t fusePlanDesc,
4222  const miopenTensorDescriptor_t inputDesc,
4223  const void* input,
4224  const miopenTensorDescriptor_t outputDesc,
4225  void* output,
4226  miopenOperatorArgs_t args,
4227  void* workspace,
4228  size_t workspaceSize);
4229 
4253 MIOPEN_EXPORT miopenStatus_t
4255  const void* alpha1,
4256  const miopenTensorDescriptor_t xDesc,
4257  const void* x,
4258  const miopenTensorDescriptor_t wDesc,
4259  const void* w,
4260  const miopenConvolutionDescriptor_t convDesc,
4262  void* workspace,
4263  size_t workspaceSizeInBytes,
4264  const void* alpha2,
4265  const miopenTensorDescriptor_t zDesc,
4266  const void* z,
4267  const miopenTensorDescriptor_t biasDesc,
4268  const void* bias,
4269  const miopenActivationDescriptor_t activationDesc,
4270  const miopenTensorDescriptor_t yDesc,
4271  void* y);
4273 // CLOSEOUT FUSION DOXYGEN GROUP
4274 
4283 typedef enum
4284 {
4289 } miopenRNNMode_t;
4290 
4294 typedef enum
4295 {
4299 
4303 typedef enum
4304 {
4305  miopenRNNdefault = 0,
4307  miopenRNNfundamental = 1,
4311 } miopenRNNAlgo_t;
4312 
4316 typedef enum
4317 {
4321 
4325 typedef enum
4326 {
4330 
4334 typedef enum
4335 {
4338 
4342 typedef enum
4343 {
4347 
4351 typedef enum
4352 {
4356 
4360 typedef enum
4361 {
4367 
4374 MIOPEN_EXPORT miopenStatus_t miopenCreateRNNDescriptor(miopenRNNDescriptor_t* rnnDesc);
4375 
4388 MIOPEN_EXPORT miopenStatus_t miopenGetRNNDescriptor(miopenRNNDescriptor_t rnnDesc,
4389  miopenRNNMode_t* rnnMode,
4390  miopenRNNAlgo_t* algoMode,
4391  miopenRNNInputMode_t* inputMode,
4392  miopenRNNDirectionMode_t* dirMode,
4393  miopenRNNBiasMode_t* biasMode,
4394  int* hiddenSize,
4395  int* layer);
4396 
4413 MIOPEN_EXPORT miopenStatus_t miopenGetRNNDescriptor_V2(miopenRNNDescriptor_t rnnDesc,
4414  int* hiddenSize,
4415  int* layer,
4416  miopenDropoutDescriptor_t* dropoutDesc,
4417  miopenRNNInputMode_t* inputMode,
4418  miopenRNNDirectionMode_t* dirMode,
4419  miopenRNNMode_t* rnnMode,
4420  miopenRNNBiasMode_t* biasMode,
4421  miopenRNNAlgo_t* algoMode,
4422  miopenDataType_t* dataType);
4423 
4429 MIOPEN_EXPORT miopenStatus_t miopenDestroyRNNDescriptor(miopenRNNDescriptor_t rnnDesc);
4430 
4446 MIOPEN_EXPORT miopenStatus_t miopenSetRNNDescriptor(miopenRNNDescriptor_t rnnDesc,
4447  const int hsize,
4448  const int nlayers,
4449  miopenRNNInputMode_t inMode,
4450  miopenRNNDirectionMode_t direction,
4451  miopenRNNMode_t rnnMode,
4452  miopenRNNBiasMode_t biasMode,
4453  miopenRNNAlgo_t algo,
4454  miopenDataType_t dataType);
4455 
4474 MIOPEN_EXPORT miopenStatus_t miopenSetRNNDescriptor_V2(miopenRNNDescriptor_t rnnDesc,
4475  const int hsize,
4476  const int nlayers,
4477  miopenDropoutDescriptor_t dropoutDesc,
4478  miopenRNNInputMode_t inMode,
4479  miopenRNNDirectionMode_t direction,
4480  miopenRNNMode_t rnnMode,
4481  miopenRNNBiasMode_t biasMode,
4482  miopenRNNAlgo_t algo,
4483  miopenDataType_t dataType);
4484 
4499 MIOPEN_EXPORT miopenStatus_t
4500 miopenSetRNNDataSeqTensorDescriptor(miopenSeqTensorDescriptor_t seqTensorDesc,
4501  miopenDataType_t dataType,
4502  miopenRNNBaseLayout_t layout,
4503  int maxSequenceLen,
4504  int batchSize,
4505  int vectorSize,
4506  const int* sequenceLenArray,
4507  void* paddingMarker);
4508 
4527 MIOPEN_EXPORT miopenStatus_t
4528 miopenGetRNNDataSeqTensorDescriptor(miopenSeqTensorDescriptor_t seqTensorDesc,
4529  miopenDataType_t* dataType,
4530  miopenRNNBaseLayout_t* layout,
4531  int* maxSequenceLen,
4532  int* batchSize,
4533  int* vectorSize,
4534  int sequenceLenArrayLimit,
4535  int* sequenceLenArray,
4536  void* paddingMarker);
4537 
4554 MIOPEN_EXPORT miopenStatus_t miopenGetRNNWorkspaceSize(miopenHandle_t handle,
4555  const miopenRNNDescriptor_t rnnDesc,
4556  const int sequenceLen,
4557  const miopenTensorDescriptor_t* xDesc,
4558  size_t* numBytes);
4559 
4576 MIOPEN_EXPORT miopenStatus_t miopenGetRNNTrainingReserveSize(miopenHandle_t handle,
4577  miopenRNNDescriptor_t rnnDesc,
4578  const int sequenceLen,
4579  const miopenTensorDescriptor_t* xDesc,
4580  size_t* numBytes);
4581 
4598 MIOPEN_EXPORT miopenStatus_t miopenGetRNNTempSpaceSizes(miopenHandle_t handle,
4599  miopenRNNDescriptor_t rnnDesc,
4600  miopenSeqTensorDescriptor_t xDesc,
4601  miopenRNNFWDMode_t fwdMode,
4602  size_t* workSpaceSize,
4603  size_t* reserveSpaceSize);
4604 
4617 MIOPEN_EXPORT miopenStatus_t miopenGetRNNParamsSize(miopenHandle_t handle,
4618  miopenRNNDescriptor_t rnnDesc,
4619  miopenTensorDescriptor_t xDesc,
4620  size_t* numBytes,
4621  miopenDataType_t dtype);
4622 
4635 MIOPEN_EXPORT miopenStatus_t miopenGetRNNParamsDescriptor(miopenHandle_t handle,
4636  miopenRNNDescriptor_t rnnDesc,
4637  miopenTensorDescriptor_t xDesc,
4638  miopenTensorDescriptor_t wDesc,
4639  miopenDataType_t dtype);
4640 
4658 MIOPEN_EXPORT miopenStatus_t miopenGetRNNInputTensorSize(miopenHandle_t handle,
4659  miopenRNNDescriptor_t rnnDesc,
4660  const int seqLen,
4661  miopenTensorDescriptor_t* xDesc,
4662  size_t* numBytes);
4663 
4676 MIOPEN_EXPORT miopenStatus_t miopenGetRNNHiddenTensorSize(miopenHandle_t handle,
4677  miopenRNNDescriptor_t rnnDesc,
4678  const int seqLen,
4679  miopenTensorDescriptor_t* xDesc,
4680  size_t* numBytes);
4681 
4722 MIOPEN_EXPORT miopenStatus_t miopenGetRNNLayerParamSize(miopenHandle_t handle,
4723  miopenRNNDescriptor_t rnnDesc,
4724  const int layer,
4725  miopenTensorDescriptor_t xDesc,
4726  const int paramID,
4727  size_t* numBytes);
4728 
4766 MIOPEN_EXPORT miopenStatus_t miopenGetRNNLayerBiasSize(miopenHandle_t handle,
4767  miopenRNNDescriptor_t rnnDesc,
4768  const int layer,
4769  const int biasID,
4770  size_t* numBytes);
4771 
4830 MIOPEN_EXPORT miopenStatus_t miopenGetRNNLayerParam(miopenHandle_t handle,
4831  miopenRNNDescriptor_t rnnDesc,
4832  const int layer,
4833  miopenTensorDescriptor_t xDesc,
4834  miopenTensorDescriptor_t wDesc,
4835  const void* w,
4836  const int paramID,
4837  miopenTensorDescriptor_t paramDesc,
4838  void* layerParam);
4839 
4897 MIOPEN_EXPORT miopenStatus_t miopenGetRNNLayerBias(miopenHandle_t handle,
4898  miopenRNNDescriptor_t rnnDesc,
4899  const int layer,
4900  miopenTensorDescriptor_t xDesc,
4901  miopenTensorDescriptor_t wDesc,
4902  const void* w,
4903  const int biasID,
4904  miopenTensorDescriptor_t biasDesc,
4905  void* layerBias);
4906 
4961 MIOPEN_EXPORT miopenStatus_t miopenGetRNNLayerParamOffset(miopenRNNDescriptor_t rnnDesc,
4962  const int layer,
4963  miopenTensorDescriptor_t xDesc,
4964  const int paramID,
4965  miopenTensorDescriptor_t paramDesc,
4966  size_t* layerParamOffset);
4967 
5018 MIOPEN_EXPORT miopenStatus_t miopenGetRNNLayerBiasOffset(miopenRNNDescriptor_t rnnDesc,
5019  const int layer,
5020  miopenTensorDescriptor_t xDesc,
5021  const int biasID,
5022  miopenTensorDescriptor_t biasDesc,
5023  size_t* layerBiasOffset);
5024 
5077 MIOPEN_EXPORT miopenStatus_t miopenSetRNNLayerParam(miopenHandle_t handle,
5078  miopenRNNDescriptor_t rnnDesc,
5079  const int layer,
5080  miopenTensorDescriptor_t xDesc,
5081  miopenTensorDescriptor_t wDesc,
5082  void* w,
5083  const int paramID,
5084  miopenTensorDescriptor_t paramDesc,
5085  const void* layerParam);
5086 
5137 MIOPEN_EXPORT miopenStatus_t miopenSetRNNLayerBias(miopenHandle_t handle,
5138  miopenRNNDescriptor_t rnnDesc,
5139  const int layer,
5140  miopenTensorDescriptor_t xDesc,
5141  miopenTensorDescriptor_t wDesc,
5142  void* w,
5143  const int biasID,
5144  miopenTensorDescriptor_t biasDesc,
5145  const void* layerBias);
5146 
5158 MIOPEN_EXPORT miopenStatus_t miopenSetRNNPaddingMode(miopenRNNDescriptor_t rnnDesc,
5159  miopenRNNPaddingMode_t paddingMode);
5160 
5168 MIOPEN_EXPORT miopenStatus_t miopenGetRNNPaddingMode(miopenRNNDescriptor_t rnnDesc,
5169  miopenRNNPaddingMode_t* paddingMode);
5170 
5221 MIOPEN_EXPORT miopenStatus_t miopenRNNForward(miopenHandle_t handle,
5222  const miopenRNNDescriptor_t rnnDesc,
5223  miopenRNNFWDMode_t fwdMode,
5224  const miopenSeqTensorDescriptor_t xDesc,
5225  const void* x,
5226  const miopenTensorDescriptor_t hDesc,
5227  const void* hx,
5228  void* hy,
5229  const miopenTensorDescriptor_t cDesc,
5230  const void* cx,
5231  void* cy,
5232  const miopenSeqTensorDescriptor_t yDesc,
5233  void* y,
5234  const void* w,
5235  size_t weightSpaceSize,
5236  void* workSpace,
5237  size_t workSpaceNumBytes,
5238  void* reserveSpace,
5239  size_t reserveSpaceNumBytes);
5240 
5290 MIOPEN_EXPORT miopenStatus_t miopenRNNBackwardSeqData(miopenHandle_t handle,
5291  const miopenRNNDescriptor_t rnnDesc,
5292  const miopenSeqTensorDescriptor_t yDesc,
5293  const void* y,
5294  const void* dy,
5295  const miopenTensorDescriptor_t hDesc,
5296  const void* hx,
5297  const void* dhy,
5298  void* dhx,
5299  const miopenTensorDescriptor_t cDesc,
5300  const void* cx,
5301  const void* dcy,
5302  void* dcx,
5303  const miopenSeqTensorDescriptor_t xDesc,
5304  void* dx,
5305  const void* w,
5306  size_t weightSpaceSize,
5307  void* workSpace,
5308  size_t workSpaceNumBytes,
5309  void* reserveSpace,
5310  size_t reserveSpaceNumBytes);
5311 
5345 MIOPEN_EXPORT miopenStatus_t
5347  const miopenRNNDescriptor_t rnnDesc,
5348  const miopenSeqTensorDescriptor_t xDesc,
5349  const void* x,
5350  const miopenTensorDescriptor_t hDesc,
5351  const void* hx,
5352  const miopenSeqTensorDescriptor_t yDesc,
5353  const void* y,
5354  void* dw,
5355  size_t weightSpaceSize,
5356  void* workSpace,
5357  size_t workSpaceNumBytes,
5358  const void* reserveSpace,
5359  size_t reserveSpaceNumBytes);
5360 
5418 MIOPEN_EXPORT miopenStatus_t miopenRNNForwardTraining(miopenHandle_t handle,
5419  const miopenRNNDescriptor_t rnnDesc,
5420  const int sequenceLen,
5421  const miopenTensorDescriptor_t* xDesc,
5422  const void* x,
5423  const miopenTensorDescriptor_t hxDesc,
5424  const void* hx,
5425  const miopenTensorDescriptor_t cxDesc,
5426  const void* cx,
5427  const miopenTensorDescriptor_t wDesc,
5428  const void* w,
5429  const miopenTensorDescriptor_t* yDesc,
5430  void* y,
5431  const miopenTensorDescriptor_t hyDesc,
5432  void* hy,
5433  const miopenTensorDescriptor_t cyDesc,
5434  void* cy,
5435  void* workSpace,
5436  size_t workSpaceNumBytes,
5437  void* reserveSpace,
5438  size_t reserveSpaceNumBytes);
5439 
5512 MIOPEN_EXPORT miopenStatus_t miopenRNNBackwardData(miopenHandle_t handle,
5513  const miopenRNNDescriptor_t rnnDesc,
5514  const int sequenceLen,
5515  const miopenTensorDescriptor_t* yDesc,
5516  const void* y,
5517  const miopenTensorDescriptor_t* dyDesc,
5518  const void* dy,
5519  const miopenTensorDescriptor_t dhyDesc,
5520  const void* dhy,
5521  const miopenTensorDescriptor_t dcyDesc,
5522  const void* dcy,
5523  const miopenTensorDescriptor_t wDesc,
5524  const void* w,
5525  const miopenTensorDescriptor_t hxDesc,
5526  const void* hx,
5527  const miopenTensorDescriptor_t cxDesc,
5528  const void* cx,
5529  const miopenTensorDescriptor_t* dxDesc,
5530  void* dx,
5531  const miopenTensorDescriptor_t dhxDesc,
5532  void* dhx,
5533  const miopenTensorDescriptor_t dcxDesc,
5534  void* dcx,
5535  void* workSpace,
5536  size_t workSpaceNumBytes,
5537  void* reserveSpace,
5538  size_t reserveSpaceNumBytes);
5539 
5576 MIOPEN_EXPORT miopenStatus_t miopenRNNBackwardWeights(miopenHandle_t handle,
5577  const miopenRNNDescriptor_t rnnDesc,
5578  const int sequenceLen,
5579  const miopenTensorDescriptor_t* xDesc,
5580  const void* x,
5581  const miopenTensorDescriptor_t hxDesc,
5582  const void* hx,
5583  const miopenTensorDescriptor_t* yDesc,
5584  const void* y,
5585  const miopenTensorDescriptor_t dwDesc,
5586  void* dw,
5587  void* workSpace,
5588  size_t workSpaceNumBytes,
5589  const void* reserveSpace,
5590  size_t reserveSpaceNumBytes);
5591 
5647 MIOPEN_EXPORT miopenStatus_t miopenRNNForwardInference(miopenHandle_t handle,
5648  miopenRNNDescriptor_t rnnDesc,
5649  const int sequenceLen,
5650  const miopenTensorDescriptor_t* xDesc,
5651  const void* x,
5652  const miopenTensorDescriptor_t hxDesc,
5653  const void* hx,
5654  const miopenTensorDescriptor_t cxDesc,
5655  const void* cx,
5656  const miopenTensorDescriptor_t wDesc,
5657  const void* w,
5658  const miopenTensorDescriptor_t* yDesc,
5659  void* y,
5660  const miopenTensorDescriptor_t hyDesc,
5661  void* hy,
5662  const miopenTensorDescriptor_t cyDesc,
5663  void* cy,
5664  void* workSpace,
5665  size_t workSpaceNumBytes);
5666 
5668 // CLOSEOUT RNN DOXYGEN GROUP
5669 
5678 typedef enum
5679 {
5682 
5689 MIOPEN_EXPORT miopenStatus_t miopenCreateCTCLossDescriptor(miopenCTCLossDescriptor_t* ctcLossDesc);
5690 
5700 MIOPEN_EXPORT miopenStatus_t miopenGetCTCLossDescriptor(miopenCTCLossDescriptor_t ctcLossDesc,
5701  miopenDataType_t* dataType,
5702  int* blank_label_id,
5703  bool* apply_softmax_layer);
5704 
5710 MIOPEN_EXPORT miopenStatus_t miopenDestroyCTCLossDescriptor(miopenCTCLossDescriptor_t ctcLossDesc);
5711 
5721 MIOPEN_EXPORT miopenStatus_t miopenSetCTCLossDescriptor(miopenCTCLossDescriptor_t ctcLossDesc,
5722  miopenDataType_t dataType,
5723  const int blank_label_id,
5724  bool apply_softmax_layer);
5725 
5742 MIOPEN_EXPORT miopenStatus_t
5743 miopenGetCTCLossWorkspaceSize(miopenHandle_t handle,
5744  const miopenTensorDescriptor_t probsDesc,
5745  const miopenTensorDescriptor_t gradientsDesc,
5746  const int* labels,
5747  const int* labelLengths,
5748  const int* inputLengths,
5749  miopenCTCLossAlgo_t algo,
5750  const miopenCTCLossDescriptor_t ctcLossDesc,
5751  size_t* workSpaceSize);
5752 
5772 MIOPEN_EXPORT miopenStatus_t miopenCTCLoss(miopenHandle_t handle,
5773  const miopenTensorDescriptor_t probsDesc,
5774  const void* probs,
5775  const int* labels,
5776  const int* labelLengths,
5777  const int* inputLengths,
5778  void* losses,
5779  const miopenTensorDescriptor_t gradientsDesc,
5780  void* gradients,
5781  miopenCTCLossAlgo_t algo,
5782  const miopenCTCLossDescriptor_t ctcLossDesc,
5783  void* workSpace,
5784  size_t workSpaceSize);
5785 
5787 // CLOSEOUT LossFunction DOXYGEN GROUP
5788 
5789 // Dropout APIs
5798 typedef enum
5799 {
5801 } miopenRNGType_t;
5802 
5808 MIOPEN_EXPORT miopenStatus_t miopenCreateDropoutDescriptor(miopenDropoutDescriptor_t* dropoutDesc);
5809 
5815 MIOPEN_EXPORT miopenStatus_t miopenDestroyDropoutDescriptor(miopenDropoutDescriptor_t dropoutDesc);
5816 
5825 MIOPEN_EXPORT miopenStatus_t miopenDropoutGetReserveSpaceSize(const miopenTensorDescriptor_t xDesc,
5826  size_t* reserveSpaceSizeInBytes);
5827 
5836 MIOPEN_EXPORT miopenStatus_t miopenDropoutGetStatesSize(miopenHandle_t handle,
5837  size_t* stateSizeInBytes);
5838 
5855 MIOPEN_EXPORT miopenStatus_t miopenGetDropoutDescriptor(miopenDropoutDescriptor_t dropoutDesc,
5856  miopenHandle_t handle,
5857  float* dropout,
5858  void** states,
5859  unsigned long long* seed,
5860  bool* use_mask,
5861  bool* state_evo,
5862  miopenRNGType_t* rng_mode);
5863 
5886 MIOPEN_EXPORT miopenStatus_t miopenRestoreDropoutDescriptor(miopenDropoutDescriptor_t dropoutDesc,
5887  miopenHandle_t handle,
5888  float dropout,
5889  void* states,
5890  size_t stateSizeInBytes,
5891  unsigned long long seed,
5892  bool use_mask,
5893  bool state_evo,
5894  miopenRNGType_t rng_mode);
5895 
5915 MIOPEN_EXPORT miopenStatus_t miopenSetDropoutDescriptor(miopenDropoutDescriptor_t dropoutDesc,
5916  miopenHandle_t handle,
5917  float dropout,
5918  void* states,
5919  size_t stateSizeInBytes,
5920  unsigned long long seed,
5921  bool use_mask,
5922  bool state_evo,
5923  miopenRNGType_t rng_mode);
5924 
5942 MIOPEN_EXPORT miopenStatus_t miopenDropoutForward(miopenHandle_t handle,
5943  const miopenDropoutDescriptor_t dropoutDesc,
5944  const miopenTensorDescriptor_t noise_shape,
5945  const miopenTensorDescriptor_t xDesc,
5946  const void* x,
5947  const miopenTensorDescriptor_t yDesc,
5948  void* y,
5949  void* reserveSpace,
5950  size_t reserveSpaceSizeInBytes);
5951 
5969 MIOPEN_EXPORT miopenStatus_t miopenDropoutBackward(miopenHandle_t handle,
5970  const miopenDropoutDescriptor_t dropoutDesc,
5971  const miopenTensorDescriptor_t noise_shape,
5972  const miopenTensorDescriptor_t dyDesc,
5973  const void* dy,
5974  const miopenTensorDescriptor_t dxDesc,
5975  void* dx,
5976  void* reserveSpace,
5977  size_t reserveSpaceSizeInBytes);
5978 
5980 // CLOSEOUT DROPOUT DOXYGEN GROUP
5981 
5982 // TensorReduce APIs
5993 MIOPEN_EXPORT miopenStatus_t
5994 miopenCreateReduceTensorDescriptor(miopenReduceTensorDescriptor_t* reduceTensorDesc);
5995 
6001 MIOPEN_EXPORT miopenStatus_t
6002 miopenDestroyReduceTensorDescriptor(miopenReduceTensorDescriptor_t reduceTensorDesc);
6003 
6016 MIOPEN_EXPORT miopenStatus_t
6017 miopenSetReduceTensorDescriptor(miopenReduceTensorDescriptor_t reduceTensorDesc,
6018  miopenReduceTensorOp_t reduceTensorOp,
6019  miopenDataType_t reduceTensorCompType,
6020  miopenNanPropagation_t reduceTensorNanOpt,
6021  miopenReduceTensorIndices_t reduceTensorIndices,
6022  miopenIndicesType_t reduceTensorIndicesType);
6023 
6039 MIOPEN_EXPORT miopenStatus_t
6040 miopenGetReduceTensorDescriptor(const miopenReduceTensorDescriptor_t reduceTensorDesc,
6041  miopenReduceTensorOp_t* reduceTensorOp,
6042  miopenDataType_t* reduceTensorCompType,
6043  miopenNanPropagation_t* reduceTensorNanOpt,
6044  miopenReduceTensorIndices_t* reduceTensorIndices,
6045  miopenIndicesType_t* reduceTensorIndicesType);
6046 
6056 MIOPEN_EXPORT miopenStatus_t
6057 miopenGetReductionIndicesSize(miopenHandle_t handle,
6058  const miopenReduceTensorDescriptor_t reduceTensorDesc,
6059  const miopenTensorDescriptor_t aDesc,
6060  const miopenTensorDescriptor_t cDesc,
6061  size_t* sizeInBytes);
6062 
6072 MIOPEN_EXPORT miopenStatus_t
6073 miopenGetReductionWorkspaceSize(miopenHandle_t handle,
6074  const miopenReduceTensorDescriptor_t reduceTensorDesc,
6075  const miopenTensorDescriptor_t aDesc,
6076  const miopenTensorDescriptor_t cDesc,
6077  size_t* sizeInBytes);
6078 
6102 MIOPEN_EXPORT miopenStatus_t
6103 miopenReduceTensor(miopenHandle_t handle,
6104  const miopenReduceTensorDescriptor_t reduceTensorDesc,
6105  void* indices,
6106  size_t indicesSizeInBytes,
6107  void* workspace,
6108  size_t workspaceSizeInBytes,
6109  const void* alpha,
6110  const miopenTensorDescriptor_t aDesc,
6111  const void* A,
6112  const void* beta,
6113  const miopenTensorDescriptor_t cDesc,
6114  void* C);
6115 
6117 // CLOSEOUT TensorReduce DOXYGEN GROUP
6118 
6119 // Find 2.0 API
6130 MIOPEN_DECLARE_OBJECT(miopenProblem);
6131 
6135 typedef enum
6136 {
6140 #ifdef MIOPEN_BETA_API
6142 #endif
6144 
6148 typedef enum
6149 {
6154 
6188 
6189 #ifdef MIOPEN_BETA_API
6215 #endif
6216 
6218 
6220 #ifdef MIOPEN_BETA_API
6223 #endif
6225 
6229 typedef enum
6230 {
6234 
6242 MIOPEN_EXPORT miopenStatus_t miopenCreateConvProblem(miopenProblem_t* problem,
6243  miopenConvolutionDescriptor_t operatorDesc,
6244  miopenProblemDirection_t direction);
6245 
6257 typedef enum
6258 {
6261 } miopenMhaMask_t;
6262 
6263 MIOPEN_EXPORT miopenStatus_t miopenCreateMhaProblem(miopenProblem_t* problem,
6264  miopenMhaDescriptor_t operatorDesc,
6265  miopenProblemDirection_t direction);
6266 
6273 MIOPEN_EXPORT miopenStatus_t miopenCreateMhaDescriptor(miopenMhaDescriptor_t* mhaDesc);
6274 
6284 MIOPEN_EXPORT miopenStatus_t miopenSetMhaDescriptor(miopenMhaDescriptor_t mhaDesc, float scale);
6285 
6295 MIOPEN_EXPORT miopenStatus_t miopenGetMhaDescriptor(miopenMhaDescriptor_t mhaDesc, float* scale);
6296 
6303 MIOPEN_EXPORT miopenStatus_t miopenCreateSoftmaxDescriptor(miopenSoftmaxDescriptor_t* softmaxDesc);
6304 
6316 MIOPEN_EXPORT miopenStatus_t miopenSetSoftmaxDescriptor(miopenSoftmaxDescriptor_t softmaxDesc,
6317  float alpha,
6318  float beta,
6319  miopenSoftmaxAlgorithm_t algorithm,
6320  miopenSoftmaxMode_t mode);
6321 
6333 MIOPEN_EXPORT miopenStatus_t miopenGetSoftmaxDescriptor(const miopenSoftmaxDescriptor_t softmaxDesc,
6334  float* alpha,
6335  float* beta,
6336  miopenSoftmaxAlgorithm_t* algorithm,
6337  miopenSoftmaxMode_t* mode);
6338 
6344 MIOPEN_EXPORT miopenStatus_t miopenDestroyProblem(miopenProblem_t problem);
6345 
6353 MIOPEN_EXPORT miopenStatus_t
6354 miopenSetProblemTensorDescriptor(miopenProblem_t problem,
6356  const miopenTensorDescriptor_t descriptor);
6357 
6360 MIOPEN_DECLARE_OBJECT(miopenFindOptions);
6361 
6367 MIOPEN_EXPORT miopenStatus_t miopenCreateFindOptions(miopenFindOptions_t* options);
6368 
6374 MIOPEN_EXPORT miopenStatus_t miopenDestroyFindOptions(miopenFindOptions_t options);
6375 
6382 MIOPEN_EXPORT miopenStatus_t miopenSetFindOptionTuning(miopenFindOptions_t options, int value);
6383 
6390 MIOPEN_EXPORT miopenStatus_t miopenSetFindOptionResultsOrder(miopenFindOptions_t options,
6391  miopenFindResultsOrder_t value);
6392 
6400 MIOPEN_EXPORT miopenStatus_t miopenSetFindOptionWorkspaceLimit(miopenFindOptions_t options,
6401  size_t value);
6402 
6410 MIOPEN_EXPORT miopenStatus_t miopenSetFindOptionPreallocatedWorkspace(miopenFindOptions_t options,
6411  void* buffer,
6412  size_t size);
6413 
6422 MIOPEN_EXPORT miopenStatus_t miopenSetFindOptionPreallocatedTensor(miopenFindOptions_t options,
6424  void* buffer);
6425 
6433 MIOPEN_EXPORT miopenStatus_t miopenSetFindOptionAttachBinaries(miopenFindOptions_t options,
6434  unsigned attach);
6435 
6438 MIOPEN_DECLARE_OBJECT(miopenSolution);
6439 
6451 MIOPEN_EXPORT miopenStatus_t miopenFindSolutions(miopenHandle_t handle,
6452  miopenProblem_t problem,
6453  miopenFindOptions_t options,
6454  miopenSolution_t* solutions,
6455  size_t* numSolutions,
6456  size_t maxSolutions);
6457 
6461 {
6462  /* @brief Identifier of the tensor argument.
6463  */
6465  /* @brief Tensor descriptor to override the value stored in the solution.
6466  *
6467  * Some solvers may support overriding input and output tensor descriptors, but right now there
6468  * is no way to tell from the API. Intended for the future use.
6469  */
6470  miopenTensorDescriptor_t* descriptor;
6471  /* @brief Pointer to the device memory buffer to use for the operation or to the host memory if
6472  * the value is scalar.
6473  */
6474  void* buffer;
6475 };
6476 
6488 MIOPEN_EXPORT miopenStatus_t miopenRunSolution(miopenHandle_t handle,
6489  miopenSolution_t solution,
6490  size_t nInputs,
6491  const miopenTensorArgument_t* tensors,
6492  void* workspace,
6493  size_t workspaceSize);
6494 
6500 MIOPEN_EXPORT miopenStatus_t miopenDestroySolution(miopenSolution_t solution);
6501 
6509 MIOPEN_EXPORT miopenStatus_t miopenLoadSolution(miopenSolution_t* solution,
6510  const char* data,
6511  size_t size);
6512 
6519 MIOPEN_EXPORT miopenStatus_t miopenSaveSolution(miopenSolution_t solution, char* data);
6520 
6527 MIOPEN_EXPORT miopenStatus_t miopenGetSolutionSize(miopenSolution_t solution, size_t* size);
6528 
6535 MIOPEN_EXPORT miopenStatus_t miopenGetSolutionWorkspaceSize(miopenSolution_t solution,
6536  size_t* workspaceSize);
6537 
6544 MIOPEN_EXPORT miopenStatus_t miopenGetSolutionTime(miopenSolution_t solution, float* time);
6545 
6552 MIOPEN_EXPORT miopenStatus_t miopenGetSolutionSolverId(miopenSolution_t solution,
6553  uint64_t* solverId);
6554 
6561 MIOPEN_EXPORT miopenStatus_t miopenGetSolverIdConvAlgorithm(uint64_t solverId,
6562  miopenConvAlgorithm_t* result);
6563 
6564 #ifdef MIOPEN_BETA_API
6565 
6574 MIOPEN_EXPORT miopenStatus_t
6575 miopenCreateActivationProblem(miopenProblem_t* problem,
6576  miopenActivationDescriptor_t operatorDesc,
6577  miopenProblemDirection_t direction);
6578 
6587 MIOPEN_EXPORT miopenStatus_t miopenCreateBatchnormProblem(miopenProblem_t* problem,
6588  miopenBatchNormMode_t mode,
6589  bool runningMeanVariance,
6590  miopenProblemDirection_t direction);
6591 
6611 MIOPEN_EXPORT miopenStatus_t miopenFuseProblems(miopenProblem_t problem1, miopenProblem_t problem2);
6612 
6620 MIOPEN_EXPORT miopenStatus_t miopenCreateBiasProblem(miopenProblem_t* problem,
6621  miopenProblemDirection_t direction);
6622 
6631 MIOPEN_EXPORT miopenStatus_t miopenCreateSoftmaxProblem(miopenProblem_t* problem,
6632  miopenSoftmaxDescriptor_t operatorDesc,
6633  miopenProblemDirection_t direction);
6634 
6635 #endif
6636 
6638 // CLOSEOUT find2 DOXYGEN GROUP
6639 
6640 #ifdef MIOPEN_BETA_API
6641 
6646 typedef enum
6647 {
6650  1,
6652 
6653 // ReduceCalculation APIs
6662 typedef enum
6663 {
6665  1,
6667  2,
6669 
6679 MIOPEN_EXPORT miopenStatus_t
6681  const miopenTensorDescriptor_t xDesc,
6682  const int32_t dim,
6683  const miopenReduceCalculationOp_t reduceCalculationOp,
6684  const miopenTensorDescriptor_t reduceDesc,
6685  size_t* sizeInBytes);
6686 
6700 MIOPEN_EXPORT miopenStatus_t
6701 miopenReduceCalculationForward(miopenHandle_t handle,
6703  void* workspace,
6704  size_t workspaceSizeInBytes,
6705  const miopenTensorDescriptor_t xDesc,
6706  const void* x,
6707  const int32_t dim,
6708  const miopenReduceCalculationOp_t reduceCalculationOp,
6709  const miopenTensorDescriptor_t reduceDesc,
6710  void* y);
6711 
6713 // CLOSEOUT REDUCE CALCULATION DOXYGEN GROUP
6714 #endif // MIOPEN_BETA_API
6715 
6716 #ifdef MIOPEN_BETA_API
6717 
6722 typedef enum
6723 {
6725  1,
6727  2,
6729  3,
6731  4,
6733 
6734 // ReduceExtreme APIs
6754 MIOPEN_EXPORT miopenStatus_t
6755 miopenReduceExtremeForward(miopenHandle_t handle,
6756  const miopenTensorDescriptor_t xDesc,
6757  const void* x,
6758  const int32_t dim,
6759  const miopenReduceExtremeOp_t reduceExtremeOp,
6760  const miopenTensorDescriptor_t yDesc,
6761  void* y,
6762  const miopenTensorDescriptor_t indiceDesc,
6763  void* indice);
6764 
6766 // CLOSEOUT REDUCEEXTREME DOXYGEN GROUP
6767 #endif // MIOPEN_BETA_API
6768 
6769 #ifdef MIOPEN_BETA_API
6770 // GroupNorm APIs
6795 MIOPEN_EXPORT miopenStatus_t miopenGroupNormForward(miopenHandle_t handle,
6796  miopenNormMode_t mode,
6797  const miopenTensorDescriptor_t xDesc,
6798  const void* x,
6799  const miopenTensorDescriptor_t weightDesc,
6800  const void* weight,
6801  const miopenTensorDescriptor_t biasDesc,
6802  const void* bias,
6803  const uint64_t num_groups,
6804  const float epsilon,
6805  const miopenTensorDescriptor_t yDesc,
6806  void* y,
6807  const miopenTensorDescriptor_t meanDesc,
6808  void* mean,
6809  const miopenTensorDescriptor_t rstdDesc,
6810  void* rstd);
6811 
6813 // CLOSEOUT groupnorm DOXYGEN GROUP
6814 #endif // MIOPEN_BETA_API
6815 
6816 #ifdef MIOPEN_BETA_API
6817 // LayerNorm APIs
6844 MIOPEN_EXPORT miopenStatus_t miopenAddLayerNormForward(miopenHandle_t handle,
6845  miopenNormMode_t mode,
6846  const miopenTensorDescriptor_t xDesc,
6847  const void* x,
6848  const miopenTensorDescriptor_t x2Desc,
6849  const void* x2,
6850  const miopenTensorDescriptor_t weightDesc,
6851  const void* weight,
6852  const miopenTensorDescriptor_t biasDesc,
6853  const void* bias,
6854  const float epsilon,
6855  const int32_t normalized_dim,
6856  const miopenTensorDescriptor_t yDesc,
6857  void* y,
6858  const miopenTensorDescriptor_t meanDesc,
6859  void* mean,
6860  const miopenTensorDescriptor_t rstdDesc,
6861  void* rstd);
6862 
6864 // CLOSEOUT LAYERNORM DOXYGEN GROUP
6865 #endif // MIOPEN_BETA_API
6866 
6867 #ifdef MIOPEN_BETA_API
6868 // LayerNorm APIs
6888 MIOPEN_EXPORT miopenStatus_t miopenT5LayerNormForward(miopenHandle_t handle,
6889  miopenNormMode_t mode,
6890  const miopenTensorDescriptor_t xDesc,
6891  const void* x,
6892  const miopenTensorDescriptor_t weightDesc,
6893  const void* weight,
6894  const float epsilon,
6895  const miopenTensorDescriptor_t yDesc,
6896  void* y,
6897  const miopenTensorDescriptor_t rstdDesc,
6898  void* rstd);
6899 
6914 MIOPEN_EXPORT miopenStatus_t
6916  miopenNormMode_t mode,
6917  const miopenTensorDescriptor_t dyDesc,
6918  const miopenTensorDescriptor_t xDesc,
6919  const miopenTensorDescriptor_t weightDesc,
6920  const miopenTensorDescriptor_t rstdDesc,
6921  const miopenTensorDescriptor_t dxDesc,
6922  const miopenTensorDescriptor_t dwDesc,
6923  size_t* sizeInBytes);
6924 
6945 MIOPEN_EXPORT miopenStatus_t miopenT5LayerNormBackward(miopenHandle_t handle,
6946  miopenNormMode_t mode,
6947  void* workspace,
6948  size_t workspaceSizeInBytes,
6949  const miopenTensorDescriptor_t dyDesc,
6950  const void* dy,
6951  const miopenTensorDescriptor_t xDesc,
6952  const void* x,
6953  const miopenTensorDescriptor_t weightDesc,
6954  const void* weight,
6955  const miopenTensorDescriptor_t rstdDesc,
6956  const void* rstd,
6957  const miopenTensorDescriptor_t dxDesc,
6958  void* dx,
6959  const miopenTensorDescriptor_t dwDesc,
6960  void* dw);
6962 // CLOSEOUT LAYERNORM DOXYGEN GROUP
6963 #endif // MIOPEN_BETA_API
6964 
6969 typedef enum
6970 {
6971  /* IDENTITY alpha = 1.0 and beta = 0.0 */
6972  /* SCALE alpha = 4.2 and beta = 0.0 */
6973  /* BILINEAR alpha = 3.2 and beta = 1.1 */
6974  /* ERROR_STATE alpha = 0.0 and beta = 3.1 */
6975 
6976  DEFAULT = 0, /* alpha = 1.0 and beta = 0.0.*/
6977  SCALE = 1, /* alpha with some value and beta 0.0*/
6978  BILINEAR = 2, /* both alpha and beta with some value*/
6979  ERROR_STATE = 3 /* alpha 0.0 and beta with some value, this should not occur.
6980  But used to check for errors.*/
6982 
6983 #ifdef MIOPEN_BETA_API
6984 // FusedAdam APIs
7127 MIOPEN_EXPORT miopenStatus_t miopenFusedAdam(miopenHandle_t handle,
7128  const miopenTensorDescriptor_t paramDesc,
7129  void* param,
7130  const miopenTensorDescriptor_t gradDesc,
7131  const void* grad,
7132  const miopenTensorDescriptor_t expAvgDesc,
7133  void* expAvg,
7134  const miopenTensorDescriptor_t expAvgSqDesc,
7135  void* expAvgSq,
7136  const miopenTensorDescriptor_t maxExpAvgSqDesc,
7137  void* maxExpAvgSq,
7138  const miopenTensorDescriptor_t stateStepDesc,
7139  void* stateStep,
7140  const unsigned int state_step,
7141  const float lr,
7142  const float beta1,
7143  const float beta2,
7144  const float weight_decay,
7145  const float eps,
7146  const bool amsgrad,
7147  const bool maximize,
7148  const bool adamw,
7149  const miopenTensorDescriptor_t gradScaleDesc,
7150  const void* gradScale,
7151  const miopenTensorDescriptor_t foundInfDesc,
7152  const void* foundInf);
7153 
7294 MIOPEN_EXPORT miopenStatus_t
7295 miopenFusedAdamWithOutput(miopenHandle_t handle,
7296  const miopenTensorDescriptor_t paramInDesc,
7297  void* paramIn,
7298  const miopenTensorDescriptor_t paramOutDesc,
7299  void* paramOut,
7300  const miopenTensorDescriptor_t paramOutFloat16Desc,
7301  void* paramOutFloat16,
7302  const miopenTensorDescriptor_t gradInDesc,
7303  const void* gradIn,
7304  const miopenTensorDescriptor_t expAvgInDesc,
7305  void* expAvgIn,
7306  const miopenTensorDescriptor_t expAvgOutDesc,
7307  void* expAvgOut,
7308  const miopenTensorDescriptor_t expAvgSqInDesc,
7309  void* expAvgSqIn,
7310  const miopenTensorDescriptor_t expAvgSqOutDesc,
7311  void* expAvgSqOut,
7312  const miopenTensorDescriptor_t maxExpAvgSqInDesc,
7313  void* maxExpAvgSqIn,
7314  const miopenTensorDescriptor_t maxExpAvgSqOutDesc,
7315  void* maxExpAvgSqOut,
7316  const miopenTensorDescriptor_t stateStepInDesc,
7317  void* stateStepIn,
7318  const miopenTensorDescriptor_t stateStepOutDesc,
7319  void* stateStepOut,
7320  const unsigned int state_step,
7321  const float lr,
7322  const float beta1,
7323  const float beta2,
7324  const float weight_decay,
7325  const float eps,
7326  const bool amsgrad,
7327  const bool maximize,
7328  const bool adamw,
7329  const miopenTensorDescriptor_t gradScaleDesc,
7330  const void* gradScale,
7331  const miopenTensorDescriptor_t foundInfDesc,
7332  const void* foundInf);
7333 
7335 // CLOSEOUT SGD DOXYGEN GROUP
7336 #endif // MIOPEN_BETA_API
7337 
7338 #ifdef MIOPEN_BETA_API
7339 // TransformersAdamW APIs
7430 MIOPEN_EXPORT miopenStatus_t miopenTransformersAdamW(miopenHandle_t handle,
7431  const miopenTensorDescriptor_t paramDesc,
7432  void* param,
7433  const miopenTensorDescriptor_t gradDesc,
7434  const void* grad,
7435  const miopenTensorDescriptor_t expAvgDesc,
7436  void* expAvg,
7437  const miopenTensorDescriptor_t expAvgSqDesc,
7438  void* expAvgSq,
7439  const miopenTensorDescriptor_t stateStepDesc,
7440  void* stateStep,
7441  const unsigned int state_step,
7442  const float lr,
7443  const float beta1,
7444  const float beta2,
7445  const float weight_decay,
7446  const float eps,
7447  const bool correct_bias,
7448  const miopenTensorDescriptor_t gradScaleDesc,
7449  const void* gradScale,
7450  const miopenTensorDescriptor_t foundInfDesc,
7451  const void* foundInf);
7452 
7579 MIOPEN_EXPORT miopenStatus_t
7581  const miopenTensorDescriptor_t paramInDesc,
7582  void* paramIn,
7583  const miopenTensorDescriptor_t paramOutDesc,
7584  void* paramOut,
7585  const miopenTensorDescriptor_t paramOutFloat16Desc,
7586  void* paramOutFloat16,
7587  const miopenTensorDescriptor_t gradInDesc,
7588  const void* gradIn,
7589  const miopenTensorDescriptor_t expAvgInDesc,
7590  void* expAvgIn,
7591  const miopenTensorDescriptor_t expAvgOutDesc,
7592  void* expAvgOut,
7593  const miopenTensorDescriptor_t expAvgSqInDesc,
7594  void* expAvgSqIn,
7595  const miopenTensorDescriptor_t expAvgSqOutDesc,
7596  void* expAvgSqOut,
7597  const miopenTensorDescriptor_t stateStepInDesc,
7598  void* stateStepIn,
7599  const miopenTensorDescriptor_t stateStepOutDesc,
7600  void* stateStepOut,
7601  const unsigned int state_step,
7602  const float lr,
7603  const float beta1,
7604  const float beta2,
7605  const float weight_decay,
7606  const float eps,
7607  const float step_size,
7608  const bool correct_bias,
7609  const miopenTensorDescriptor_t gradScaleDesc,
7610  const void* gradScale,
7611  const miopenTensorDescriptor_t foundInfDesc,
7612  const void* foundInf);
7613 
7615 // CLOSEOUT SGD DOXYGEN GROUP
7616 #endif // MIOPEN_BETA_API
7617 
7618 #ifdef MIOPEN_BETA_API
7619 // GetItem APIs
7632 MIOPEN_EXPORT miopenStatus_t
7633 miopenGetGetitemWorkspaceSize(miopenHandle_t handle,
7634  uint32_t indexCount,
7635  const miopenTensorDescriptor_t* indexDescs,
7636  size_t* sizeInBytes);
7637 
7662 MIOPEN_EXPORT miopenStatus_t miopenGetitemBackward(miopenHandle_t handle,
7663  void* workspace,
7664  size_t workspaceSizeInBytes,
7665  const miopenTensorDescriptor_t dyDesc,
7666  const void* dy,
7667  uint32_t indexCount,
7668  const miopenTensorDescriptor_t* indexDescs,
7669  const void* const* indexs,
7670  const miopenTensorDescriptor_t dxDesc,
7671  void* dx,
7672  const miopenTensorDescriptor_t errorDesc,
7673  void* error,
7674  uint32_t dimCount,
7675  const int32_t* dims,
7676  uint32_t sliceCount,
7677  const int32_t* slices,
7678  uint32_t offset);
7679 
7681 // CLOSEOUT GETITEM DOXYGEN GROUP
7682 #endif // MIOPEN_BETA_API
7683 
7684 #ifdef MIOPEN_BETA_API
7685 // RotaryPositionalEmbeddings APIs
7703 MIOPEN_EXPORT miopenStatus_t miopenRoPEForward(miopenHandle_t handle,
7704  const miopenTensorDescriptor_t xDesc,
7705  const void* x,
7706  const miopenTensorDescriptor_t cosDesc,
7707  const void* cos,
7708  const miopenTensorDescriptor_t sinDesc,
7709  const void* sin,
7710  const miopenTensorDescriptor_t yDesc,
7711  void* y);
7712 
7726 MIOPEN_EXPORT miopenStatus_t miopenRoPEBackward(miopenHandle_t handle,
7727  const miopenTensorDescriptor_t dyDesc,
7728  const void* dy,
7729  const miopenTensorDescriptor_t cosDesc,
7730  const void* cos,
7731  const miopenTensorDescriptor_t sinDesc,
7732  const void* sin,
7733  const miopenTensorDescriptor_t dxDesc,
7734  void* dx);
7736 // CLOSEOUT ROPE DOXYGEN GROUP
7737 // kthvalue APIs
7758 MIOPEN_EXPORT miopenStatus_t miopenKthvalueForward(miopenHandle_t handle,
7759  miopenTensorDescriptor_t inputDesc,
7760  const void* input,
7761  miopenTensorDescriptor_t outputDesc,
7762  void* output,
7763  miopenTensorDescriptor_t indicesDesc,
7764  size_t* indices,
7765  size_t k,
7766  int32_t dim = -1,
7767  bool keepDim = false);
7768 
7770 // CLOSEOUT kthvalue DOXYGEN GROUP
7771 #endif // MIOPEN_BETA_API
7772 
7773 #ifdef MIOPEN_BETA_API
7787 MIOPEN_EXPORT miopenStatus_t
7789  miopenTensorDescriptor_t inputDesc,
7790  miopenTensorDescriptor_t weightDesc,
7791  size_t* sizeInBytes);
7792 
7809 MIOPEN_EXPORT miopenStatus_t miopenPReLUBackward(miopenHandle_t handle,
7810  void* workspace,
7811  size_t workspaceSizeInBytes,
7812  miopenTensorDescriptor_t inputDesc,
7813  const void* input,
7814  miopenTensorDescriptor_t weightDesc,
7815  const void* weight,
7816  miopenTensorDescriptor_t doutputDesc,
7817  const void* doutput,
7818  miopenTensorDescriptor_t dinputDesc,
7819  void* dinput,
7820  miopenTensorDescriptor_t dweightDesc,
7821  void* dweight);
7822 
7824 // CLOSEOUT RELU DOXYGEN GROUP
7825 #endif // MIOPEN_BETA_API
7826 
7827 #ifdef MIOPEN_BETA_API
7828 
7833 typedef enum
7834 {
7840 
7841 // SoftMarginLoss APIs
7859 MIOPEN_EXPORT miopenStatus_t
7861  miopenTensorDescriptor_t inputDesc,
7862  miopenTensorDescriptor_t targetDesc,
7863  miopenTensorDescriptor_t outputDesc,
7864  miopenLossReductionMode_t reduction,
7865  size_t* sizeInBytes);
7866 
7883 MIOPEN_EXPORT miopenStatus_t miopenSoftMarginLossForward(miopenHandle_t handle,
7884  miopenTensorDescriptor_t inputDesc,
7885  const void* input,
7886  miopenTensorDescriptor_t targetDesc,
7887  const void* target,
7888  miopenTensorDescriptor_t outputDesc,
7889  void* output,
7890  miopenLossReductionMode_t reduction,
7891  void* workspace = nullptr,
7892  size_t workspaceSizeInBytes = 0);
7893 
7908 MIOPEN_EXPORT miopenStatus_t miopenSoftMarginLossBackward(miopenHandle_t handle,
7909  miopenTensorDescriptor_t inputDesc,
7910  const void* input,
7911  miopenTensorDescriptor_t targetDesc,
7912  const void* target,
7913  miopenTensorDescriptor_t doutputDesc,
7914  const void* doutput,
7915  miopenTensorDescriptor_t dinputDesc,
7916  void* dinput,
7917  miopenLossReductionMode_t reduction);
7918 
7920 // CLOSEOUT LossFunction DOXYGEN GROUP
7921 #endif
7922 
7923 #ifdef MIOPEN_BETA_API
7924 // MultiMarginLoss APIs
7948 MIOPEN_EXPORT miopenStatus_t
7950  miopenTensorDescriptor_t inputDesc,
7951  miopenTensorDescriptor_t targetDesc,
7952  miopenTensorDescriptor_t weightDesc,
7953  miopenTensorDescriptor_t outputDesc,
7954  long p,
7955  float margin,
7956  miopenLossReductionMode_t reduction,
7957  size_t* sizeInBytes);
7958 
7986 MIOPEN_EXPORT miopenStatus_t miopenMultiMarginLossForward(miopenHandle_t handle,
7987  miopenTensorDescriptor_t inputDesc,
7988  const void* input,
7989  miopenTensorDescriptor_t targetDesc,
7990  const void* target,
7991  miopenTensorDescriptor_t weightDesc,
7992  const void* weight,
7993  miopenTensorDescriptor_t outputDesc,
7994  void* output,
7995  long p,
7996  float margin,
7997  miopenLossReductionMode_t reduction,
7998  void* workspace,
7999  size_t workspaceSizeInBytes);
8000 
8002 // CLOSEOUT LossFunction DOXYGEN GROUP
8003 #endif // MIOPEN_BETA_API
8004 
8016 typedef enum
8017 {
8021  3,
8025 
8034 MIOPEN_EXPORT miopenStatus_t miopenSetTuningPolicy(miopenHandle_t handle,
8035  miopenTuningPolicy_t newValue);
8036 
8044 MIOPEN_EXPORT miopenStatus_t miopenGetTuningPolicy(miopenHandle_t handle,
8045  miopenTuningPolicy_t* value);
8046 
8047 #ifdef __cplusplus
8048 }
8049 #endif
8050 
8051 #ifdef __clang__
8052 #pragma clang diagnostic pop
8053 #endif
8054 
8055 #endif // MIOPEN_GUARD_MIOPEN_H_
miopenStatus_t miopenCreateOpActivationBackward(miopenFusionPlanDescriptor_t fusePlanDesc, miopenFusionOpDescriptor_t *activBwdOp, miopenActivationMode_t mode)
Creates a backward activation operator.
miopenStatus_t miopenCreateOpBatchNormForward(miopenFusionPlanDescriptor_t fusePlanDesc, miopenFusionOpDescriptor_t *bnFwdOp, const miopenBatchNormMode_t bn_mode, bool runningMeanVariance)
Creates a forward training batch normalization operator.
miopenStatus_t miopenFusionPlanConvolutionGetAlgo(miopenFusionPlanDescriptor_t fusePlanDesc, const int requestAlgoCount, int *returnedAlgoCount, miopenConvFwdAlgorithm_t *returnedAlgos)
Returns the supported algorithms for the convolution operator in the Fusion Plan.
miopenStatus_t miopenFusionPlanGetWorkSpaceSize(miopenHandle_t handle, miopenFusionPlanDescriptor_t fusePlanDesc, size_t *workSpaceSize, miopenConvFwdAlgorithm_t algo)
Query the workspace size required for the fusion plan.
miopenStatus_t miopenFusionPlanConvolutionSetAlgo(miopenFusionPlanDescriptor_t fusePlanDesc, miopenConvFwdAlgorithm_t algo)
Requests the fusion runtime to choose a particular algorithm for the added convolution operation.
miopenStatus_t miopenCreateOpBatchNormInference(miopenFusionPlanDescriptor_t fusePlanDesc, miopenFusionOpDescriptor_t *bnOp, const miopenBatchNormMode_t bn_mode, const miopenTensorDescriptor_t bnScaleBiasMeanVarDesc)
Creates a forward inference batch normalization operator.
miopenStatus_t miopenSetOpArgsBiasForward(miopenOperatorArgs_t args, const miopenFusionOpDescriptor_t biasOp, const void *alpha, const void *beta, const void *bias)
Sets the arguments for forward bias op.
miopenStatus_t miopenCreateOpConvForward(miopenFusionPlanDescriptor_t fusePlanDesc, miopenFusionOpDescriptor_t *convOp, miopenConvolutionDescriptor_t convDesc, const miopenTensorDescriptor_t wDesc)
Creates forward convolution operator.
miopenStatus_t miopenSetOpArgsBatchNormInference(miopenOperatorArgs_t args, const miopenFusionOpDescriptor_t bnOp, const void *alpha, const void *beta, const void *bnScale, const void *bnBias, const void *estimatedMean, const void *estimatedVariance, double epsilon)
Sets the arguments for inference batch normalization op.
miopenStatus_t miopenSetOpArgsBatchNormForward(miopenOperatorArgs_t args, const miopenFusionOpDescriptor_t bnOp, const void *alpha, const void *beta, const void *bnScale, const void *bnBias, void *savedMean, void *savedInvVariance, void *runningMean, void *runningVariance, double expAvgFactor, double epsilon)
Sets the arguments for forward batch normalization op.
miopenStatus_t miopenExecuteFusionPlan(const miopenHandle_t handle, const miopenFusionPlanDescriptor_t fusePlanDesc, const miopenTensorDescriptor_t inputDesc, const void *input, const miopenTensorDescriptor_t outputDesc, void *output, miopenOperatorArgs_t args)
Executes the fusion plan. Only compatible with NHWC/NDHWC tensor layouts.
miopenStatus_t miopenFusionPlanGetOp(miopenFusionPlanDescriptor_t fusePlanDesc, const int op_idx, miopenFusionOpDescriptor_t *op)
Allows access to the operators in a fusion plan.
miopenStatus_t miopenDestroyFusionPlan(miopenFusionPlanDescriptor_t fusePlanDesc)
Destroy the fusion plan descriptor object.
miopenStatus_t miopenCreateOpActivationForward(miopenFusionPlanDescriptor_t fusePlanDesc, miopenFusionOpDescriptor_t *activFwdOp, miopenActivationMode_t mode)
Creates a forward activation operator.
miopenStatus_t miopenExecuteFusionPlan_v2(const miopenHandle_t handle, const miopenFusionPlanDescriptor_t fusePlanDesc, const miopenTensorDescriptor_t inputDesc, const void *input, const miopenTensorDescriptor_t outputDesc, void *output, miopenOperatorArgs_t args, void *workspace, size_t workspaceSize)
Executes the fusion plan with a workspace buffer for layout transformations.
miopenFusionDirection_t
Kernel fusion direction in the network.
Definition: miopen.h:3821
miopenStatus_t miopenSetOpArgsActivBackward(miopenOperatorArgs_t args, const miopenFusionOpDescriptor_t activBwdOp, const void *alpha, const void *beta, const void *y, const void *reserved, double activAlpha, double activBeta, double activGamma)
Sets the arguments for backward activation op.
miopenStatus_t miopenCompileFusionPlan(miopenHandle_t handle, miopenFusionPlanDescriptor_t fusePlanDesc)
Compiles the fusion plan.
miopenStatus_t miopenSetOpArgsBatchNormBackward(miopenOperatorArgs_t args, const miopenFusionOpDescriptor_t bnOp, const void *alpha, const void *beta, const void *x, const void *bnScale, const void *bnBias, void *resultBnScaleDiff, void *resultBnBiasDiff, const void *savedMean, const void *savedInvVariance)
Sets the arguments for backward batch normalization op.
miopenStatus_t miopenSetOpArgsActivForward(miopenOperatorArgs_t args, const miopenFusionOpDescriptor_t activFwdOp, const void *alpha, const void *beta, double activAlpha, double activBeta, double activGamma)
Sets the arguments for forward activation op.
miopenStatus_t miopenDestroyOperatorArgs(miopenOperatorArgs_t args)
Destroys an operator argument object.
miopenStatus_t miopenCreateOpBatchNormBackward(miopenFusionPlanDescriptor_t fusePlanDesc, miopenFusionOpDescriptor_t *bnBwdOp, const miopenBatchNormMode_t bn_mode)
Creates a back propagation batch normalization operator.
miopenStatus_t miopenCreateOperatorArgs(miopenOperatorArgs_t *args)
Creates an operator argument object.
miopenStatus_t miopenSetOpArgsConvForward(miopenOperatorArgs_t args, const miopenFusionOpDescriptor_t convOp, const void *alpha, const void *beta, const void *w)
Sets the arguments for forward convolution op.
miopenStatus_t miopenConvolutionBiasActivationForward(miopenHandle_t handle, const void *alpha1, const miopenTensorDescriptor_t xDesc, const void *x, const miopenTensorDescriptor_t wDesc, const void *w, const miopenConvolutionDescriptor_t convDesc, miopenConvFwdAlgorithm_t algo, void *workspace, size_t workspaceSizeInBytes, const void *alpha2, const miopenTensorDescriptor_t zDesc, const void *z, const miopenTensorDescriptor_t biasDesc, const void *bias, const miopenActivationDescriptor_t activationDesc, const miopenTensorDescriptor_t yDesc, void *y)
Prepares and executes the Convlution+Bias+Activation Fusion.
miopenStatus_t miopenCreateFusionPlan(miopenFusionPlanDescriptor_t *fusePlanDesc, const miopenFusionDirection_t fuseDirection, const miopenTensorDescriptor_t inputDesc)
Creates the kenrel fusion plan descriptor object.
miopenStatus_t miopenCreateOpBiasForward(miopenFusionPlanDescriptor_t fusePlanDesc, miopenFusionOpDescriptor_t *biasOp, const miopenTensorDescriptor_t bDesc)
Creates a forward bias operator.
@ miopenHorizontalFusion
Definition: miopen.h:3823
@ miopenVerticalFusion
Definition: miopen.h:3822
miopenStatus_t miopenSetLRNDescriptor(const miopenLRNDescriptor_t lrnDesc, miopenLRNMode_t mode, unsigned int lrnN, double lrnAlpha, double lrnBeta, double lrnK)
Sets a LRN layer descriptor details.
miopenStatus_t miopenCreateLRNDescriptor(miopenLRNDescriptor_t *lrnDesc)
Creates a local response normalization (LRN) layer descriptor.
miopenStatus_t miopenGetLRNDescriptor(const miopenLRNDescriptor_t lrnDesc, miopenLRNMode_t *mode, unsigned int *lrnN, double *lrnAlpha, double *lrnBeta, double *lrnK)
Gets a LRN layer descriptor details.
miopenStatus_t miopenLRNBackward(miopenHandle_t handle, const miopenLRNDescriptor_t lrnDesc, const void *alpha, const miopenTensorDescriptor_t yDesc, const void *y, const miopenTensorDescriptor_t dyDesc, const void *dy, const miopenTensorDescriptor_t xDesc, const void *x, const void *beta, const miopenTensorDescriptor_t dxDesc, void *dx, const void *workSpace)
Execute a LRN backward layer.
miopenStatus_t miopenLRNGetWorkSpaceSize(const miopenTensorDescriptor_t yDesc, size_t *workSpaceSize)
Determine the workspace requirements.
miopenLRNMode_t
Definition: miopen.h:461
miopenStatus_t miopenLRNForward(miopenHandle_t handle, const miopenLRNDescriptor_t lrnDesc, const void *alpha, const miopenTensorDescriptor_t xDesc, const void *x, const void *beta, const miopenTensorDescriptor_t yDesc, void *y, bool do_backward, void *workSpace)
Execute a LRN forward layer.
miopenStatus_t miopenDestroyLRNDescriptor(miopenLRNDescriptor_t lrnDesc)
Destroys the LRN descriptor object.
miopenStatus_t miopenGetCTCLossWorkspaceSize(miopenHandle_t handle, const miopenTensorDescriptor_t probsDesc, const miopenTensorDescriptor_t gradientsDesc, const int *labels, const int *labelLengths, const int *inputLengths, miopenCTCLossAlgo_t algo, const miopenCTCLossDescriptor_t ctcLossDesc, size_t *workSpaceSize)
Query the amount of memory required to execute miopenCTCLoss.
miopenStatus_t miopenGetSoftMarginLossForwardWorkspaceSize(miopenHandle_t handle, miopenTensorDescriptor_t inputDesc, miopenTensorDescriptor_t targetDesc, miopenTensorDescriptor_t outputDesc, miopenLossReductionMode_t reduction, size_t *sizeInBytes)
Helper function to query the minimum workspace size required by the SoftMarginLossForward call.
miopenLossReductionMode_t
Definition: miopen.h:7834
miopenStatus_t miopenGetMultiMarginLossForwardWorkspaceSize(miopenHandle_t handle, miopenTensorDescriptor_t inputDesc, miopenTensorDescriptor_t targetDesc, miopenTensorDescriptor_t weightDesc, miopenTensorDescriptor_t outputDesc, long p, float margin, miopenLossReductionMode_t reduction, size_t *sizeInBytes)
Helper function to query the minimum workspace size required by the MultiMarginLossForward call.
miopenCTCLossAlgo_t
Definition: miopen.h:5679
miopenStatus_t miopenSetCTCLossDescriptor(miopenCTCLossDescriptor_t ctcLossDesc, miopenDataType_t dataType, const int blank_label_id, bool apply_softmax_layer)
Set the details of a CTC loss function descriptor.
miopenStatus_t miopenCTCLoss(miopenHandle_t handle, const miopenTensorDescriptor_t probsDesc, const void *probs, const int *labels, const int *labelLengths, const int *inputLengths, void *losses, const miopenTensorDescriptor_t gradientsDesc, void *gradients, miopenCTCLossAlgo_t algo, const miopenCTCLossDescriptor_t ctcLossDesc, void *workSpace, size_t workSpaceSize)
Execute forward inference for CTCLoss layer.
miopenStatus_t miopenGetCTCLossDescriptor(miopenCTCLossDescriptor_t ctcLossDesc, miopenDataType_t *dataType, int *blank_label_id, bool *apply_softmax_layer)
Retrieves a CTC loss function descriptor's details.
miopenStatus_t miopenCreateCTCLossDescriptor(miopenCTCLossDescriptor_t *ctcLossDesc)
Create a CTC loss function Descriptor.
miopenStatus_t miopenMultiMarginLossForward(miopenHandle_t handle, miopenTensorDescriptor_t inputDesc, const void *input, miopenTensorDescriptor_t targetDesc, const void *target, miopenTensorDescriptor_t weightDesc, const void *weight, miopenTensorDescriptor_t outputDesc, void *output, long p, float margin, miopenLossReductionMode_t reduction, void *workspace, size_t workspaceSizeInBytes)
Execute a MultiMarginLoss forward layer.
miopenStatus_t miopenSoftMarginLossBackward(miopenHandle_t handle, miopenTensorDescriptor_t inputDesc, const void *input, miopenTensorDescriptor_t targetDesc, const void *target, miopenTensorDescriptor_t doutputDesc, const void *doutput, miopenTensorDescriptor_t dinputDesc, void *dinput, miopenLossReductionMode_t reduction)
Execute a SoftMarginLoss backward layer.
miopenStatus_t miopenSoftMarginLossForward(miopenHandle_t handle, miopenTensorDescriptor_t inputDesc, const void *input, miopenTensorDescriptor_t targetDesc, const void *target, miopenTensorDescriptor_t outputDesc, void *output, miopenLossReductionMode_t reduction, void *workspace=nullptr, size_t workspaceSizeInBytes=0)
Execute a SoftMarginLoss forward layer.
miopenStatus_t miopenDestroyCTCLossDescriptor(miopenCTCLossDescriptor_t ctcLossDesc)
Destroys a CTC loss function descriptor object.
@ MIOPEN_CTC_LOSS_ALGO_DETERMINISTIC
Definition: miopen.h:5680
miopenRNNMode_t
Definition: miopen.h:4284
miopenStatus_t miopenGetRNNDescriptor_V2(miopenRNNDescriptor_t rnnDesc, int *hiddenSize, int *layer, miopenDropoutDescriptor_t *dropoutDesc, miopenRNNInputMode_t *inputMode, miopenRNNDirectionMode_t *dirMode, miopenRNNMode_t *rnnMode, miopenRNNBiasMode_t *biasMode, miopenRNNAlgo_t *algoMode, miopenDataType_t *dataType)
Retrieves a RNN layer descriptor's details version 2. This version enables retrieving information of ...
miopenRNNInputMode_t
Definition: miopen.h:4295
miopenStatus_t miopenSetRNNLayerBias(miopenHandle_t handle, miopenRNNDescriptor_t rnnDesc, const int layer, miopenTensorDescriptor_t xDesc, miopenTensorDescriptor_t wDesc, void *w, const int biasID, miopenTensorDescriptor_t biasDesc, const void *layerBias)
Sets a bias for a specific layer in an RNN stack.
miopenStatus_t miopenRNNBackwardWeightsSeqTensor(miopenHandle_t handle, const miopenRNNDescriptor_t rnnDesc, const miopenSeqTensorDescriptor_t xDesc, const void *x, const miopenTensorDescriptor_t hDesc, const void *hx, const miopenSeqTensorDescriptor_t yDesc, const void *y, void *dw, size_t weightSpaceSize, void *workSpace, size_t workSpaceNumBytes, const void *reserveSpace, size_t reserveSpaceNumBytes)
Execute backward weights for recurrent layer.
miopenStatus_t miopenGetRNNParamsSize(miopenHandle_t handle, miopenRNNDescriptor_t rnnDesc, miopenTensorDescriptor_t xDesc, size_t *numBytes, miopenDataType_t dtype)
Query the amount of parameter memory required for RNN training.
miopenStatus_t miopenGetRNNLayerBiasOffset(miopenRNNDescriptor_t rnnDesc, const int layer, miopenTensorDescriptor_t xDesc, const int biasID, miopenTensorDescriptor_t biasDesc, size_t *layerBiasOffset)
Gets a bias index offset for a specific layer in an RNN stack.
miopenStatus_t miopenGetRNNLayerParamOffset(miopenRNNDescriptor_t rnnDesc, const int layer, miopenTensorDescriptor_t xDesc, const int paramID, miopenTensorDescriptor_t paramDesc, size_t *layerParamOffset)
Gets an index offset for a specific weight matrix for a layer in the RNN stack.
miopenRNNDirectionMode_t
Definition: miopen.h:4317
miopenStatus_t miopenRNNBackwardSeqData(miopenHandle_t handle, const miopenRNNDescriptor_t rnnDesc, const miopenSeqTensorDescriptor_t yDesc, const void *y, const void *dy, const miopenTensorDescriptor_t hDesc, const void *hx, const void *dhy, void *dhx, const miopenTensorDescriptor_t cDesc, const void *cx, const void *dcy, void *dcx, const miopenSeqTensorDescriptor_t xDesc, void *dx, const void *w, size_t weightSpaceSize, void *workSpace, size_t workSpaceNumBytes, void *reserveSpace, size_t reserveSpaceNumBytes)
Execute backward data for recurrent layer.
miopenRNNBiasMode_t
Definition: miopen.h:4326
miopenStatus_t miopenGetRNNLayerBias(miopenHandle_t handle, miopenRNNDescriptor_t rnnDesc, const int layer, miopenTensorDescriptor_t xDesc, miopenTensorDescriptor_t wDesc, const void *w, const int biasID, miopenTensorDescriptor_t biasDesc, void *layerBias)
Gets a bias for a specific layer in an RNN stack.
miopenStatus_t miopenRNNBackwardWeights(miopenHandle_t handle, const miopenRNNDescriptor_t rnnDesc, const int sequenceLen, const miopenTensorDescriptor_t *xDesc, const void *x, const miopenTensorDescriptor_t hxDesc, const void *hx, const miopenTensorDescriptor_t *yDesc, const void *y, const miopenTensorDescriptor_t dwDesc, void *dw, void *workSpace, size_t workSpaceNumBytes, const void *reserveSpace, size_t reserveSpaceNumBytes)
Execute backward weights for recurrent layer.
miopenStatus_t miopenGetRNNTrainingReserveSize(miopenHandle_t handle, miopenRNNDescriptor_t rnnDesc, const int sequenceLen, const miopenTensorDescriptor_t *xDesc, size_t *numBytes)
Query the amount of memory required for RNN training.
miopenStatus_t miopenGetRNNLayerBiasSize(miopenHandle_t handle, miopenRNNDescriptor_t rnnDesc, const int layer, const int biasID, size_t *numBytes)
Gets the number of bytes of a bias.
miopenStatus_t miopenSetRNNDescriptor(miopenRNNDescriptor_t rnnDesc, const int hsize, const int nlayers, miopenRNNInputMode_t inMode, miopenRNNDirectionMode_t direction, miopenRNNMode_t rnnMode, miopenRNNBiasMode_t biasMode, miopenRNNAlgo_t algo, miopenDataType_t dataType)
Set the details of the RNN descriptor.
miopenRNNBaseLayout_t
Definition: miopen.h:4361
miopenStatus_t miopenRNNBackwardData(miopenHandle_t handle, const miopenRNNDescriptor_t rnnDesc, const int sequenceLen, const miopenTensorDescriptor_t *yDesc, const void *y, const miopenTensorDescriptor_t *dyDesc, const void *dy, const miopenTensorDescriptor_t dhyDesc, const void *dhy, const miopenTensorDescriptor_t dcyDesc, const void *dcy, const miopenTensorDescriptor_t wDesc, const void *w, const miopenTensorDescriptor_t hxDesc, const void *hx, const miopenTensorDescriptor_t cxDesc, const void *cx, const miopenTensorDescriptor_t *dxDesc, void *dx, const miopenTensorDescriptor_t dhxDesc, void *dhx, const miopenTensorDescriptor_t dcxDesc, void *dcx, void *workSpace, size_t workSpaceNumBytes, void *reserveSpace, size_t reserveSpaceNumBytes)
Execute backward data for recurrent layer.
miopenStatus_t miopenGetRNNDescriptor(miopenRNNDescriptor_t rnnDesc, miopenRNNMode_t *rnnMode, miopenRNNAlgo_t *algoMode, miopenRNNInputMode_t *inputMode, miopenRNNDirectionMode_t *dirMode, miopenRNNBiasMode_t *biasMode, int *hiddenSize, int *layer)
Retrieves a RNN layer descriptor's details.
miopenRNNAlgo_t
Definition: miopen.h:4304
miopenStatus_t miopenGetRNNTempSpaceSizes(miopenHandle_t handle, miopenRNNDescriptor_t rnnDesc, miopenSeqTensorDescriptor_t xDesc, miopenRNNFWDMode_t fwdMode, size_t *workSpaceSize, size_t *reserveSpaceSize)
Query the amount of additional memory required for this RNN layer execution.
miopenStatus_t miopenGetRNNPaddingMode(miopenRNNDescriptor_t rnnDesc, miopenRNNPaddingMode_t *paddingMode)
This function retrieves the RNN padding mode from the RNN descriptor.
miopenStatus_t miopenRNNForward(miopenHandle_t handle, const miopenRNNDescriptor_t rnnDesc, miopenRNNFWDMode_t fwdMode, const miopenSeqTensorDescriptor_t xDesc, const void *x, const miopenTensorDescriptor_t hDesc, const void *hx, void *hy, const miopenTensorDescriptor_t cDesc, const void *cx, void *cy, const miopenSeqTensorDescriptor_t yDesc, void *y, const void *w, size_t weightSpaceSize, void *workSpace, size_t workSpaceNumBytes, void *reserveSpace, size_t reserveSpaceNumBytes)
Execute forward training for recurrent layer.
miopenStatus_t miopenGetRNNDataSeqTensorDescriptor(miopenSeqTensorDescriptor_t seqTensorDesc, miopenDataType_t *dataType, miopenRNNBaseLayout_t *layout, int *maxSequenceLen, int *batchSize, int *vectorSize, int sequenceLenArrayLimit, int *sequenceLenArray, void *paddingMarker)
Get shape of RNN seqData tensor.
miopenStatus_t miopenGetRNNInputTensorSize(miopenHandle_t handle, miopenRNNDescriptor_t rnnDesc, const int seqLen, miopenTensorDescriptor_t *xDesc, size_t *numBytes)
Obtain the size in bytes of the RNN input tensor.
miopenRNNFWDMode_t
Definition: miopen.h:4352
miopenStatus_t miopenSetRNNLayerParam(miopenHandle_t handle, miopenRNNDescriptor_t rnnDesc, const int layer, miopenTensorDescriptor_t xDesc, miopenTensorDescriptor_t wDesc, void *w, const int paramID, miopenTensorDescriptor_t paramDesc, const void *layerParam)
Sets a weight matrix for a specific layer in an RNN stack.
miopenStatus_t miopenGetRNNLayerParamSize(miopenHandle_t handle, miopenRNNDescriptor_t rnnDesc, const int layer, miopenTensorDescriptor_t xDesc, const int paramID, size_t *numBytes)
Gets the number of bytes of a parameter matrix.
miopenStatus_t miopenCreateRNNDescriptor(miopenRNNDescriptor_t *rnnDesc)
Create a RNN layer Descriptor.
miopenRNNGEMMalgoMode_t
Definition: miopen.h:4335
miopenStatus_t miopenGetRNNLayerParam(miopenHandle_t handle, miopenRNNDescriptor_t rnnDesc, const int layer, miopenTensorDescriptor_t xDesc, miopenTensorDescriptor_t wDesc, const void *w, const int paramID, miopenTensorDescriptor_t paramDesc, void *layerParam)
Gets a weight matrix for a specific layer in an RNN stack.
miopenStatus_t miopenGetRNNWorkspaceSize(miopenHandle_t handle, const miopenRNNDescriptor_t rnnDesc, const int sequenceLen, const miopenTensorDescriptor_t *xDesc, size_t *numBytes)
Query the amount of memory required to execute the RNN layer.
miopenStatus_t miopenSetRNNDataSeqTensorDescriptor(miopenSeqTensorDescriptor_t seqTensorDesc, miopenDataType_t dataType, miopenRNNBaseLayout_t layout, int maxSequenceLen, int batchSize, int vectorSize, const int *sequenceLenArray, void *paddingMarker)
Set shape of RNN seqData tensor.
miopenStatus_t miopenGetRNNParamsDescriptor(miopenHandle_t handle, miopenRNNDescriptor_t rnnDesc, miopenTensorDescriptor_t xDesc, miopenTensorDescriptor_t wDesc, miopenDataType_t dtype)
Obtain a weight tensor descriptor for RNNs.
miopenStatus_t miopenRNNForwardTraining(miopenHandle_t handle, const miopenRNNDescriptor_t rnnDesc, const int sequenceLen, const miopenTensorDescriptor_t *xDesc, const void *x, const miopenTensorDescriptor_t hxDesc, const void *hx, const miopenTensorDescriptor_t cxDesc, const void *cx, const miopenTensorDescriptor_t wDesc, const void *w, const miopenTensorDescriptor_t *yDesc, void *y, const miopenTensorDescriptor_t hyDesc, void *hy, const miopenTensorDescriptor_t cyDesc, void *cy, void *workSpace, size_t workSpaceNumBytes, void *reserveSpace, size_t reserveSpaceNumBytes)
Execute forward training for recurrent layer.
miopenRNNPaddingMode_t
Definition: miopen.h:4343
miopenStatus_t miopenSetRNNDescriptor_V2(miopenRNNDescriptor_t rnnDesc, const int hsize, const int nlayers, miopenDropoutDescriptor_t dropoutDesc, miopenRNNInputMode_t inMode, miopenRNNDirectionMode_t direction, miopenRNNMode_t rnnMode, miopenRNNBiasMode_t biasMode, miopenRNNAlgo_t algo, miopenDataType_t dataType)
Set the details of the RNN descriptor version 2. This version enables the use of dropout in rnn.
miopenStatus_t miopenDestroyRNNDescriptor(miopenRNNDescriptor_t rnnDesc)
Destroys the tensor descriptor object.
miopenStatus_t miopenGetRNNHiddenTensorSize(miopenHandle_t handle, miopenRNNDescriptor_t rnnDesc, const int seqLen, miopenTensorDescriptor_t *xDesc, size_t *numBytes)
Obtain the size in bytes of the RNN hidden tensor.
miopenStatus_t miopenSetRNNPaddingMode(miopenRNNDescriptor_t rnnDesc, miopenRNNPaddingMode_t paddingMode)
Sets a bias for a specific layer in an RNN stack.
miopenStatus_t miopenRNNForwardInference(miopenHandle_t handle, miopenRNNDescriptor_t rnnDesc, const int sequenceLen, const miopenTensorDescriptor_t *xDesc, const void *x, const miopenTensorDescriptor_t hxDesc, const void *hx, const miopenTensorDescriptor_t cxDesc, const void *cx, const miopenTensorDescriptor_t wDesc, const void *w, const miopenTensorDescriptor_t *yDesc, void *y, const miopenTensorDescriptor_t hyDesc, void *hy, const miopenTensorDescriptor_t cyDesc, void *cy, void *workSpace, size_t workSpaceNumBytes)
Execute forward inference for RNN layer.
@ miopenRNNTANH
Definition: miopen.h:4286
@ miopenLSTM
Definition: miopen.h:4287
@ miopenGRU
Definition: miopen.h:4288
@ miopenRNNRELU
Definition: miopen.h:4285
@ miopenRNNlinear
Definition: miopen.h:4296
@ miopenRNNskip
Definition: miopen.h:4297
@ miopenRNNbidirection
Definition: miopen.h:4319
@ miopenRNNunidirection
Definition: miopen.h:4318
@ miopenRNNwithBias
Definition: miopen.h:4328
@ miopenRNNNoBias
Definition: miopen.h:4327
@ miopenRNNDataSeqMajorPadded
Definition: miopen.h:4364
@ miopenRNNDataSeqMajorNotPadded
Definition: miopen.h:4363
@ miopenRNNDataBatchMajorPadded
Definition: miopen.h:4365
@ miopenRNNDataUnknownLayout
Definition: miopen.h:4362
@ miopenRNNroundedDynamic
Definition: miopen.h:4309
@ miopenRNNfundamental
Definition: miopen.h:4307
@ miopenRNNdefault
Definition: miopen.h:4305
@ miopenRNNTraining
Definition: miopen.h:4353
@ miopenRNNInference
Definition: miopen.h:4354
@ miopenRNNAlgoGEMM
Definition: miopen.h:4336
@ miopenRNNIOWithPadding
Definition: miopen.h:4345
@ miopenRNNIONotPadded
Definition: miopen.h:4344
miopenStatus_t miopenGetPReLUBackwardWorkspaceSize(miopenHandle_t handle, miopenTensorDescriptor_t inputDesc, miopenTensorDescriptor_t weightDesc, size_t *sizeInBytes)
Helper function to query the minimum workspace size required by the PReLU backward call.
miopenStatus_t miopenPReLUBackward(miopenHandle_t handle, void *workspace, size_t workspaceSizeInBytes, miopenTensorDescriptor_t inputDesc, const void *input, miopenTensorDescriptor_t weightDesc, const void *weight, miopenTensorDescriptor_t doutputDesc, const void *doutput, miopenTensorDescriptor_t dinputDesc, void *dinput, miopenTensorDescriptor_t dweightDesc, void *dweight)
Execute a PReLU backward layer.
miopenStatus_t miopenReduceExtremeForward(miopenHandle_t handle, const miopenTensorDescriptor_t xDesc, const void *x, const int32_t dim, const miopenReduceExtremeOp_t reduceExtremeOp, const miopenTensorDescriptor_t yDesc, void *y, const miopenTensorDescriptor_t indiceDesc, void *indice)
Find the the extreme (minimum, maximum) value and index of a tensor across Dimension.
miopenReduceExtremeOp_t
Definition: miopen.h:6723
miopenStatus_t miopenRoPEBackward(miopenHandle_t handle, const miopenTensorDescriptor_t dyDesc, const void *dy, const miopenTensorDescriptor_t cosDesc, const void *cos, const miopenTensorDescriptor_t sinDesc, const void *sin, const miopenTensorDescriptor_t dxDesc, void *dx)
Execute a rope backward layer.
miopenStatus_t miopenRoPEForward(miopenHandle_t handle, const miopenTensorDescriptor_t xDesc, const void *x, const miopenTensorDescriptor_t cosDesc, const void *cos, const miopenTensorDescriptor_t sinDesc, const void *sin, const miopenTensorDescriptor_t yDesc, void *y)
Execute a rope forward layer.
miopenStatus_t miopenFusedAdam(miopenHandle_t handle, const miopenTensorDescriptor_t paramDesc, void *param, const miopenTensorDescriptor_t gradDesc, const void *grad, const miopenTensorDescriptor_t expAvgDesc, void *expAvg, const miopenTensorDescriptor_t expAvgSqDesc, void *expAvgSq, const miopenTensorDescriptor_t maxExpAvgSqDesc, void *maxExpAvgSq, const miopenTensorDescriptor_t stateStepDesc, void *stateStep, const unsigned int state_step, const float lr, const float beta1, const float beta2, const float weight_decay, const float eps, const bool amsgrad, const bool maximize, const bool adamw, const miopenTensorDescriptor_t gradScaleDesc, const void *gradScale, const miopenTensorDescriptor_t foundInfDesc, const void *foundInf)
Perform Fused Adam optimization for a single tensor (Adaptive Moment Estimation).
miopenStatus_t miopenTransformersAdamW(miopenHandle_t handle, const miopenTensorDescriptor_t paramDesc, void *param, const miopenTensorDescriptor_t gradDesc, const void *grad, const miopenTensorDescriptor_t expAvgDesc, void *expAvg, const miopenTensorDescriptor_t expAvgSqDesc, void *expAvgSq, const miopenTensorDescriptor_t stateStepDesc, void *stateStep, const unsigned int state_step, const float lr, const float beta1, const float beta2, const float weight_decay, const float eps, const bool correct_bias, const miopenTensorDescriptor_t gradScaleDesc, const void *gradScale, const miopenTensorDescriptor_t foundInfDesc, const void *foundInf)
Implements Adam algorithm with weight decay fix as introduced in Decoupled Weight Decay Regularizatio...
miopenStatus_t miopenFusedAdamWithOutput(miopenHandle_t handle, const miopenTensorDescriptor_t paramInDesc, void *paramIn, const miopenTensorDescriptor_t paramOutDesc, void *paramOut, const miopenTensorDescriptor_t paramOutFloat16Desc, void *paramOutFloat16, const miopenTensorDescriptor_t gradInDesc, const void *gradIn, const miopenTensorDescriptor_t expAvgInDesc, void *expAvgIn, const miopenTensorDescriptor_t expAvgOutDesc, void *expAvgOut, const miopenTensorDescriptor_t expAvgSqInDesc, void *expAvgSqIn, const miopenTensorDescriptor_t expAvgSqOutDesc, void *expAvgSqOut, const miopenTensorDescriptor_t maxExpAvgSqInDesc, void *maxExpAvgSqIn, const miopenTensorDescriptor_t maxExpAvgSqOutDesc, void *maxExpAvgSqOut, const miopenTensorDescriptor_t stateStepInDesc, void *stateStepIn, const miopenTensorDescriptor_t stateStepOutDesc, void *stateStepOut, const unsigned int state_step, const float lr, const float beta1, const float beta2, const float weight_decay, const float eps, const bool amsgrad, const bool maximize, const bool adamw, const miopenTensorDescriptor_t gradScaleDesc, const void *gradScale, const miopenTensorDescriptor_t foundInfDesc, const void *foundInf)
Execute single tensor Adam optimization and receive the result in a separate output tensor.
miopenStatus_t miopenTransformersAdamWWithOutput(miopenHandle_t handle, const miopenTensorDescriptor_t paramInDesc, void *paramIn, const miopenTensorDescriptor_t paramOutDesc, void *paramOut, const miopenTensorDescriptor_t paramOutFloat16Desc, void *paramOutFloat16, const miopenTensorDescriptor_t gradInDesc, const void *gradIn, const miopenTensorDescriptor_t expAvgInDesc, void *expAvgIn, const miopenTensorDescriptor_t expAvgOutDesc, void *expAvgOut, const miopenTensorDescriptor_t expAvgSqInDesc, void *expAvgSqIn, const miopenTensorDescriptor_t expAvgSqOutDesc, void *expAvgSqOut, const miopenTensorDescriptor_t stateStepInDesc, void *stateStepIn, const miopenTensorDescriptor_t stateStepOutDesc, void *stateStepOut, const unsigned int state_step, const float lr, const float beta1, const float beta2, const float weight_decay, const float eps, const float step_size, const bool correct_bias, const miopenTensorDescriptor_t gradScaleDesc, const void *gradScale, const miopenTensorDescriptor_t foundInfDesc, const void *foundInf)
Execute single tensor Adam optimization and receive the result in a separate output tensor.
miopenReduceTensorIndices_t
Definition: miopen.h:589
miopenStatus_t miopenSetReduceTensorDescriptor(miopenReduceTensorDescriptor_t reduceTensorDesc, miopenReduceTensorOp_t reduceTensorOp, miopenDataType_t reduceTensorCompType, miopenNanPropagation_t reduceTensorNanOpt, miopenReduceTensorIndices_t reduceTensorIndices, miopenIndicesType_t reduceTensorIndicesType)
Initialize a ReduceTensor descriptor object.
miopenStatus_t miopenCreateReduceTensorDescriptor(miopenReduceTensorDescriptor_t *reduceTensorDesc)
Creates the ReduceTensor descriptor object.
miopenReduceTensorOp_t
Definition: miopen.h:554
miopenIndicesType_t
Definition: miopen.h:599
miopenStatus_t miopenReduceTensor(miopenHandle_t handle, const miopenReduceTensorDescriptor_t reduceTensorDesc, void *indices, size_t indicesSizeInBytes, void *workspace, size_t workspaceSizeInBytes, const void *alpha, const miopenTensorDescriptor_t aDesc, const void *A, const void *beta, const miopenTensorDescriptor_t cDesc, void *C)
TensorReduce function doing reduction on tensor A by implementing C = alpha * reduceOp(A)
miopenStatus_t miopenGetReductionIndicesSize(miopenHandle_t handle, const miopenReduceTensorDescriptor_t reduceTensorDesc, const miopenTensorDescriptor_t aDesc, const miopenTensorDescriptor_t cDesc, size_t *sizeInBytes)
Helper function to query the minimum index space size required by the ReduceTensor call.
miopenStatus_t miopenDestroyReduceTensorDescriptor(miopenReduceTensorDescriptor_t reduceTensorDesc)
Destroy the ReduceTensor descriptor object.
miopenStatus_t miopenGetReductionWorkspaceSize(miopenHandle_t handle, const miopenReduceTensorDescriptor_t reduceTensorDesc, const miopenTensorDescriptor_t aDesc, const miopenTensorDescriptor_t cDesc, size_t *sizeInBytes)
Helper function to query the minimum workspace size required by the ReduceTensor call.
miopenStatus_t miopenGetReduceTensorDescriptor(const miopenReduceTensorDescriptor_t reduceTensorDesc, miopenReduceTensorOp_t *reduceTensorOp, miopenDataType_t *reduceTensorCompType, miopenNanPropagation_t *reduceTensorNanOpt, miopenReduceTensorIndices_t *reduceTensorIndices, miopenIndicesType_t *reduceTensorIndicesType)
Query a ReduceTensor descriptor object.
miopenStatus_t miopenActivationForward(miopenHandle_t handle, const miopenActivationDescriptor_t activDesc, const void *alpha, const miopenTensorDescriptor_t xDesc, const void *x, const void *beta, const miopenTensorDescriptor_t yDesc, void *y)
Execute an activation forward layer.
miopenStatus_t miopenGLUBackward(miopenHandle_t handle, const miopenTensorDescriptor_t inputDesc, const void *input, const miopenTensorDescriptor_t outputGradDesc, const void *outputGrad, const miopenTensorDescriptor_t inputGradDesc, void *inputGrad, const uint32_t dim)
Execute a GLU backward layer.
miopenStatus_t miopenActivationBackward(miopenHandle_t handle, const miopenActivationDescriptor_t activDesc, const void *alpha, const miopenTensorDescriptor_t yDesc, const void *y, const miopenTensorDescriptor_t dyDesc, const void *dy, const miopenTensorDescriptor_t xDesc, const void *x, const void *beta, const miopenTensorDescriptor_t dxDesc, void *dx)
Execute a activation backwards layer.
miopenActivationMode_t
Definition: miopen.h:500
miopenStatus_t miopenSetActivationDescriptor(const miopenActivationDescriptor_t activDesc, miopenActivationMode_t mode, double activAlpha, double activBeta, double activGamma)
Sets the activation layer descriptor details.
miopenStatus_t miopenCreateActivationDescriptor(miopenActivationDescriptor_t *activDesc)
Creates the Activation descriptor object.
miopenStatus_t miopenGetActivationDescriptor(const miopenActivationDescriptor_t activDesc, miopenActivationMode_t *mode, double *activAlpha, double *activBeta, double *activGamma)
Gets the activation layer descriptor details.
miopenStatus_t miopenGLUForward(miopenHandle_t handle, const miopenTensorDescriptor_t inputDesc, const void *input, const miopenTensorDescriptor_t outputDesc, void *output, const uint32_t dim)
Execute a GLU forward layer.
miopenStatus_t miopenDestroyActivationDescriptor(miopenActivationDescriptor_t activDesc)
Destroys the activation descriptor object.
miopenStatus_t miopenBatchNormalizationForwardInference(miopenHandle_t handle, miopenBatchNormMode_t bn_mode, void *alpha, void *beta, const miopenTensorDescriptor_t xDesc, const void *x, const miopenTensorDescriptor_t yDesc, void *y, const miopenTensorDescriptor_t bnScaleBiasMeanVarDesc, void *bnScale, void *bnBias, void *estimatedMean, void *estimatedVariance, double epsilon)
Execute forward inference layer for batch normalization.
miopenStatus_t miopenBatchNormalizationForwardInference_V2(miopenHandle_t handle, miopenBatchNormMode_t bn_mode, void *alpha, void *beta, const miopenTensorDescriptor_t xDesc, const void *x, const miopenTensorDescriptor_t yDesc, void *y, const miopenTensorDescriptor_t scaleDesc, const miopenTensorDescriptor_t biasDesc, const miopenTensorDescriptor_t estMeanDesc, const miopenTensorDescriptor_t estVarianceDesc, void *bnScale, void *bnBias, void *estimatedMean, void *estimatedVariance, double epsilon)
Execute forward inference layer for batch normalization.
miopenStatus_t miopenBatchNormForwardTrainingActivation(miopenHandle_t handle, miopenBatchNormMode_t bn_mode, void *alpha, void *beta, const miopenTensorDescriptor_t xDesc, const void *x, const miopenTensorDescriptor_t yDesc, void *y, const miopenTensorDescriptor_t scaleDesc, const miopenTensorDescriptor_t biasVarDesc, const miopenTensorDescriptor_t savedMeanDesc, const miopenTensorDescriptor_t savedVarDesc, void *bnScale, void *bnBias, double expAvgFactor, void *resultRunningMean, void *resultRunningVariance, double epsilon, void *resultSaveMean, void *resultSaveInvVariance, const miopenActivationDescriptor_t activDesc)
Execute forward training layer for batch normalization with fused activation.
miopenStatus_t miopenBatchNormBackwardActivation(miopenHandle_t handle, miopenBatchNormMode_t bn_mode, const void *alphaDataDiff, const void *betaDataDiff, const void *alphaParamDiff, const void *betaParamDiff, const miopenTensorDescriptor_t xDesc, const void *x, const miopenTensorDescriptor_t dyDesc, const void *dy, const miopenTensorDescriptor_t dxDesc, void *dx, const miopenTensorDescriptor_t scaleDesc, const miopenTensorDescriptor_t biasDesc, const miopenTensorDescriptor_t savedMeanDesc, const miopenTensorDescriptor_t savedVarianceDesc, const void *bnScale, const void *bnBias, void *resultBnScaleDiff, void *resultBnBiasDiff, double epsilon, const void *savedMean, const void *savedInvVariance, const miopenActivationDescriptor_t activDesc)
Execute backwards propagation layer for batch normalization with fused activation.
miopenStatus_t miopenBatchNormalizationForwardTraining_V3(miopenHandle_t handle, miopenBatchNormMode_t bn_mode, void *alpha, void *beta, const miopenTensorDescriptor_t xDesc, const void *x, const miopenTensorDescriptor_t yDesc, void *y, const miopenTensorDescriptor_t scaleDesc, const miopenTensorDescriptor_t biasVarDesc, const miopenTensorDescriptor_t savedMeanDesc, const miopenTensorDescriptor_t savedVarDesc, void *bnScale, void *bnBias, double expAvgFactor, const void *prevResultRunningMean, const void *prevResultRunningVariance, void *nextResultRunningMean, void *nextResultRunningVariance, double epsilon, void *resultSaveMean, void *resultSaveInvVariance)
Execute forward training layer for batch normalization.
miopenStatus_t miopenBatchNormalizationForwardTraining(miopenHandle_t handle, miopenBatchNormMode_t bn_mode, void *alpha, void *beta, const miopenTensorDescriptor_t xDesc, const void *x, const miopenTensorDescriptor_t yDesc, void *y, const miopenTensorDescriptor_t bnScaleBiasMeanVarDesc, void *bnScale, void *bnBias, double expAvgFactor, void *resultRunningMean, void *resultRunningVariance, double epsilon, void *resultSaveMean, void *resultSaveInvVariance)
Execute forward training layer for batch normalization.
miopenStatus_t miopenBatchNormalizationForwardTraining_V2(miopenHandle_t handle, miopenBatchNormMode_t bn_mode, void *alpha, void *beta, const miopenTensorDescriptor_t xDesc, const void *x, const miopenTensorDescriptor_t yDesc, void *y, const miopenTensorDescriptor_t scaleDesc, const miopenTensorDescriptor_t biasVarDesc, const miopenTensorDescriptor_t savedMeanDesc, const miopenTensorDescriptor_t savedVarDesc, void *bnScale, void *bnBias, double expAvgFactor, void *resultRunningMean, void *resultRunningVariance, double epsilon, void *resultSaveMean, void *resultSaveInvVariance)
Execute forward training layer for batch normalization.
miopenStatus_t miopenBatchNormalizationBackward_V2(miopenHandle_t handle, miopenBatchNormMode_t bn_mode, const void *alphaDataDiff, const void *betaDataDiff, const void *alphaParamDiff, const void *betaParamDiff, const miopenTensorDescriptor_t xDesc, const void *x, const miopenTensorDescriptor_t dyDesc, const void *dy, const miopenTensorDescriptor_t dxDesc, void *dx, const miopenTensorDescriptor_t scaleDesc, const miopenTensorDescriptor_t biasDesc, const miopenTensorDescriptor_t savedMeanDesc, const miopenTensorDescriptor_t savedVarDesc, const void *bnScale, void *resultBnScaleDiff, void *resultBnBiasDiff, double epsilon, const void *savedMean, const void *savedInvVariance)
Execute backwards propagation layer for batch normalization.
miopenStatus_t miopenBatchNormForwardTrainingActivation_V2(miopenHandle_t handle, miopenBatchNormMode_t bn_mode, void *alpha, void *beta, const miopenTensorDescriptor_t xDesc, const void *x, const miopenTensorDescriptor_t yDesc, void *y, const miopenTensorDescriptor_t scaleDesc, const miopenTensorDescriptor_t biasVarDesc, const miopenTensorDescriptor_t savedMeanDesc, const miopenTensorDescriptor_t savedVarDesc, void *bnScale, void *bnBias, double expAvgFactor, const void *prevResultRunningMean, const void *prevResultRunningVariance, void *nextResultRunningMean, void *nextResultRunningVariance, double epsilon, void *resultSaveMean, void *resultSaveInvVariance, const miopenActivationDescriptor_t activDesc)
Execute forward training layer for batch normalization with fused activation.
miopenStatus_t miopenBatchNormalizationForwardInferenceInvVariance(miopenHandle_t handle, miopenBatchNormMode_t bn_mode, void *alpha, void *beta, const miopenTensorDescriptor_t xDesc, const void *x, const miopenTensorDescriptor_t yDesc, void *y, const miopenTensorDescriptor_t scaleDesc, const miopenTensorDescriptor_t biasDesc, const miopenTensorDescriptor_t estMeanDesc, const miopenTensorDescriptor_t estInvVarianceDesc, void *bnScale, void *bnBias, void *estimatedMean, void *estimatedInvVariance)
Execute forward inference layer for batch normalization using inverse variance.
miopenStatus_t miopenBatchNormalizationBackward(miopenHandle_t handle, miopenBatchNormMode_t bn_mode, const void *alphaDataDiff, const void *betaDataDiff, const void *alphaParamDiff, const void *betaParamDiff, const miopenTensorDescriptor_t xDesc, const void *x, const miopenTensorDescriptor_t dyDesc, const void *dy, const miopenTensorDescriptor_t dxDesc, void *dx, const miopenTensorDescriptor_t bnScaleBiasDiffDesc, const void *bnScale, void *resultBnScaleDiff, void *resultBnBiasDiff, double epsilon, const void *savedMean, const void *savedInvVariance)
Execute backwards propagation layer for batch normalization.
miopenBatchNormMode_t
Definition: miopen.h:490
miopenStatus_t miopenBatchNormForwardInferenceActivation(miopenHandle_t handle, miopenBatchNormMode_t bn_mode, void *alpha, void *beta, const miopenTensorDescriptor_t xDesc, const void *x, const miopenTensorDescriptor_t yDesc, void *y, const miopenTensorDescriptor_t scaleDesc, const miopenTensorDescriptor_t biasDesc, const miopenTensorDescriptor_t estMeanDesc, const miopenTensorDescriptor_t estVarianceDesc, void *bnScale, void *bnBias, void *estimatedMean, void *estimatedVariance, double epsilon, const miopenActivationDescriptor_t activDesc)
Execute forward inference layer for batch normalization with fused activation.
miopenStatus_t miopenBatchNormForwardInferenceActivationInvVariance(miopenHandle_t handle, miopenBatchNormMode_t bn_mode, void *alpha, void *beta, const miopenTensorDescriptor_t xDesc, const void *x, const miopenTensorDescriptor_t yDesc, void *y, const miopenTensorDescriptor_t scaleDesc, const miopenTensorDescriptor_t biasDesc, const miopenTensorDescriptor_t estMeanDesc, const miopenTensorDescriptor_t estInvVarianceDesc, void *bnScale, void *bnBias, void *estimatedMean, void *estimatedInvVariance, const miopenActivationDescriptor_t activDesc)
Execute forward inference layer for batch normalization with fused activation using inverse variance.
miopenStatus_t miopenDeriveBNTensorDescriptor(miopenTensorDescriptor_t derivedBnDesc, const miopenTensorDescriptor_t xDesc, miopenBatchNormMode_t bn_mode)
Derive tensor for gamma and beta from input tensor descriptor.
miopenStatus_t miopenCatForward(miopenHandle_t handle, const int32_t xCount, const miopenTensorDescriptor_t *xDescs, const void *const *xs, const miopenTensorDescriptor_t yDesc, void *y, const int32_t dim)
Execute a cat forward layer.
miopenStatus_t miopenInitConvolutionNdDescriptor(miopenConvolutionDescriptor_t convDesc, int spatialDim, const int *padA, const int *strideA, const int *dilationA, miopenConvolutionMode_t c_mode)
Creates a N-dimensional convolution layer descriptor.
miopenStatus_t miopenConvolutionForwardImmediate(miopenHandle_t handle, const miopenTensorDescriptor_t wDesc, const void *w, const miopenTensorDescriptor_t xDesc, const void *x, const miopenConvolutionDescriptor_t convDesc, const miopenTensorDescriptor_t yDesc, void *y, void *workSpace, size_t workSpaceSize, const uint64_t solution_id)
Executes the Forward convolution operation based on the provided solution ID.
miopenStatus_t miopenConvolutionBackwardDataCompileSolution(miopenHandle_t handle, const miopenTensorDescriptor_t dyDesc, const miopenTensorDescriptor_t wDesc, const miopenConvolutionDescriptor_t convDesc, const miopenTensorDescriptor_t dxDesc, const uint64_t solution_id)
Compiles the solution provided by the user, this solution may be acquired by the miopenConvolutionBac...
miopenStatus_t miopenConvolutionForwardGetSolutionWorkspaceSize(miopenHandle_t handle, const miopenTensorDescriptor_t wDesc, const miopenTensorDescriptor_t xDesc, const miopenConvolutionDescriptor_t convDesc, const miopenTensorDescriptor_t yDesc, const uint64_t solution_id, size_t *workSpaceSize)
Returns the workspace size required for a particular solution id.
miopenStatus_t miopenConvolutionForward(miopenHandle_t handle, const void *alpha, const miopenTensorDescriptor_t xDesc, const void *x, const miopenTensorDescriptor_t wDesc, const void *w, const miopenConvolutionDescriptor_t convDesc, miopenConvFwdAlgorithm_t algo, const void *beta, const miopenTensorDescriptor_t yDesc, void *y, void *workSpace, size_t workSpaceSize)
Execute a forward convolution layer.
miopenStatus_t miopenGetConvolutionGroupCount(miopenConvolutionDescriptor_t convDesc, int *groupCount)
Get the number of groups to be used in Group/Depthwise convolution.
miopenStatus_t miopenGetConvolutionFindMode(const miopenConvolutionDescriptor_t convDesc, miopenConvolutionFindMode_t *findMode)
Reads the Find Mode attribute from the convolution descriptor.
miopenStatus_t miopenInitConvolutionDescriptor(miopenConvolutionDescriptor_t convDesc, miopenConvolutionMode_t c_mode, int pad_h, int pad_w, int stride_h, int stride_w, int dilation_h, int dilation_w)
Creates a 2-D convolution layer descriptor.
miopenConvAlgorithm_t
Definition: miopen.h:1254
miopenStatus_t miopenConvolutionBackwardData(miopenHandle_t handle, const void *alpha, const miopenTensorDescriptor_t dyDesc, const void *dy, const miopenTensorDescriptor_t wDesc, const void *w, const miopenConvolutionDescriptor_t convDesc, miopenConvBwdDataAlgorithm_t algo, const void *beta, const miopenTensorDescriptor_t dxDesc, void *dx, void *workSpace, size_t workSpaceSize)
Execute a backward data convolution layer.
miopenStatus_t miopenGetConvolutionAttribute(miopenConvolutionDescriptor_t convDesc, const miopenConvolutionAttrib_t attr, int *value)
Get the attribute of the convolution descriptor.
miopenStatus_t miopenGetConvolutionNdForwardOutputDim(miopenConvolutionDescriptor_t convDesc, const miopenTensorDescriptor_t inputTensorDesc, const miopenTensorDescriptor_t filterDesc, int *nDim, int *outputTensorDimA)
Get the shape of a resulting N-dimensional tensor from a (N-2)-dimensional convolution.
miopenStatus_t miopenConvolutionBackwardWeightsCompileSolution(miopenHandle_t handle, const miopenTensorDescriptor_t dyDesc, const miopenTensorDescriptor_t xDesc, const miopenConvolutionDescriptor_t convDesc, const miopenTensorDescriptor_t dwDesc, const uint64_t solution_id)
Compiles the solution provided by the user, this solution may be acquired by the miopenConvolutionBac...
miopenStatus_t miopenFindConvolutionBackwardWeightsAlgorithm(miopenHandle_t handle, const miopenTensorDescriptor_t dyDesc, const void *dy, const miopenTensorDescriptor_t xDesc, const void *x, const miopenConvolutionDescriptor_t convDesc, const miopenTensorDescriptor_t dwDesc, void *dw, const int requestAlgoCount, int *returnedAlgoCount, miopenConvAlgoPerf_t *perfResults, void *workSpace, size_t workSpaceSize, bool exhaustiveSearch)
Search and run the backwards weights convolutional algorithms and return a list of kernel times.
miopenStatus_t miopenGetConvolutionForwardOutputDim(miopenConvolutionDescriptor_t convDesc, const miopenTensorDescriptor_t inputTensorDesc, const miopenTensorDescriptor_t filterDesc, int *n, int *c, int *h, int *w)
Get the shape of a resulting 4-D tensor from a 2-D convolution.
miopenConvFwdAlgorithm_t
Definition: miopen.h:1217
miopenStatus_t miopenConvolutionForwardGetWorkSpaceSize(miopenHandle_t handle, const miopenTensorDescriptor_t wDesc, const miopenTensorDescriptor_t xDesc, const miopenConvolutionDescriptor_t convDesc, const miopenTensorDescriptor_t yDesc, size_t *workSpaceSize)
Query the workspace size required for a forward convolution algorithm.
miopenStatus_t miopenCreateConvolutionDescriptor(miopenConvolutionDescriptor_t *convDesc)
Creates a convolution layer descriptor.
miopenStatus_t miopenConvolutionBackwardWeightsGetSolution(miopenHandle_t handle, const miopenTensorDescriptor_t dyDesc, const miopenTensorDescriptor_t xDesc, const miopenConvolutionDescriptor_t convDesc, const miopenTensorDescriptor_t dwDesc, const size_t maxSolutionCount, size_t *solutionCount, miopenConvSolution_t *solutions)
Query the applicable solutions for a backward convolution w-r-t weights as described by input,...
miopenStatus_t miopenGetConvolutionNdDescriptor(miopenConvolutionDescriptor_t convDesc, int requestedSpatialDim, int *spatialDim, int *padA, int *strideA, int *dilationA, miopenConvolutionMode_t *c_mode)
Retrieves a N-dimensional convolution layer descriptor's details.
miopenConvBwdDataAlgorithm_t
Definition: miopen.h:1240
miopenConvolutionAttrib_t
Definition: miopen.h:611
miopenStatus_t miopenConvolutionBackwardWeightsGetWorkSpaceSize(miopenHandle_t handle, const miopenTensorDescriptor_t dyDesc, const miopenTensorDescriptor_t xDesc, const miopenConvolutionDescriptor_t convDesc, const miopenTensorDescriptor_t dwDesc, size_t *workSpaceSize)
Get the GPU memory required for the backward weights convolution algorithm.
miopenStatus_t miopenConvolutionBackwardWeightsImmediate(miopenHandle_t handle, const miopenTensorDescriptor_t dyDesc, const void *dy, const miopenTensorDescriptor_t xDesc, const void *x, const miopenConvolutionDescriptor_t convDesc, const miopenTensorDescriptor_t dwDesc, void *dw, void *workSpace, size_t workSpaceSize, const uint64_t solution_id)
Executes the Backward convolution w-r-t weights operation based on the provided solution ID.
miopenStatus_t miopenFindConvolutionBackwardDataAlgorithm(miopenHandle_t handle, const miopenTensorDescriptor_t dyDesc, const void *dy, const miopenTensorDescriptor_t wDesc, const void *w, const miopenConvolutionDescriptor_t convDesc, const miopenTensorDescriptor_t dxDesc, void *dx, const int requestAlgoCount, int *returnedAlgoCount, miopenConvAlgoPerf_t *perfResults, void *workSpace, size_t workSpaceSize, bool exhaustiveSearch)
Search and run the backwards data convolution algorithms and return a list of kernel times.
miopenStatus_t miopenConvolutionBackwardDataGetSolution(miopenHandle_t handle, const miopenTensorDescriptor_t dyDesc, const miopenTensorDescriptor_t wDesc, const miopenConvolutionDescriptor_t convDesc, const miopenTensorDescriptor_t dxDesc, const size_t maxSolutionCount, size_t *solutionCount, miopenConvSolution_t *solutions)
Query the applicable solutions for a backward convolution w-r-t data as described by input,...
miopenStatus_t miopenConvolutionForwardBias(miopenHandle_t handle, const void *alpha, const miopenTensorDescriptor_t bDesc, const void *b, const void *beta, const miopenTensorDescriptor_t yDesc, void *y)
Calculate element-wise scale and shift of a tensor via a bias tensor.
miopenConvolutionMode_t
Definition: miopen.h:415
miopenStatus_t miopenSetConvolutionAttribute(miopenConvolutionDescriptor_t convDesc, const miopenConvolutionAttrib_t attr, int value)
Set the attribute of the convolution descriptor.
miopenStatus_t miopenConvolutionBackwardDataImmediate(miopenHandle_t handle, const miopenTensorDescriptor_t dyDesc, const void *dy, const miopenTensorDescriptor_t wDesc, const void *w, const miopenConvolutionDescriptor_t convDesc, const miopenTensorDescriptor_t dxDesc, void *dx, void *workSpace, size_t workSpaceSize, const uint64_t solution_id)
Executes the Backward convolution w-r-t data operation based on the provided solution ID.
miopenStatus_t miopenSetTransposeConvNdOutputPadding(miopenConvolutionDescriptor_t convDesc, int spatialDim, const int *adjA)
Set the output padding to be used in N-dimensional Transpose convolution.
miopenStatus_t miopenSetConvolutionFindMode(miopenConvolutionDescriptor_t convDesc, miopenConvolutionFindMode_t findMode)
Sets the Find Mode attribute in the convolution descriptor.
miopenStatus_t miopenSetTransposeConvOutputPadding(miopenConvolutionDescriptor_t convDesc, int adj_h, int adj_w)
Set the output padding to be used in 2-D Transpose convolution.
miopenConvolutionFindMode_t
Definition: miopen.h:640
miopenStatus_t miopenGetConvolutionSpatialDim(miopenConvolutionDescriptor_t convDesc, int *spatialDim)
Retrieves the spatial dimension of a convolution layer descriptor.
miopenConvBwdWeightsAlgorithm_t
Definition: miopen.h:1229
miopenStatus_t miopenConvolutionForwardCompileSolution(miopenHandle_t handle, const miopenTensorDescriptor_t wDesc, const miopenTensorDescriptor_t xDesc, const miopenConvolutionDescriptor_t convDesc, const miopenTensorDescriptor_t yDesc, const uint64_t solution_id)
Compiles the solution provided by the user, this solution may be acquired by the miopenConvolutionFor...
miopenStatus_t miopenConvolutionBackwardDataGetSolutionCount(miopenHandle_t handle, const miopenTensorDescriptor_t dyDesc, const miopenTensorDescriptor_t wDesc, const miopenConvolutionDescriptor_t convDesc, const miopenTensorDescriptor_t dxDesc, size_t *solutionCount)
Query the maximum number of solutions applicable for the given input/output and weights tensor descri...
miopenStatus_t miopenConvolutionBackwardWeights(miopenHandle_t handle, const void *alpha, const miopenTensorDescriptor_t dyDesc, const void *dy, const miopenTensorDescriptor_t xDesc, const void *x, const miopenConvolutionDescriptor_t convDesc, miopenConvBwdWeightsAlgorithm_t algo, const void *beta, const miopenTensorDescriptor_t dwDesc, void *dw, void *workSpace, size_t workSpaceSize)
Execute a backward weights convolution layer.
miopenStatus_t miopenConvolutionBackwardDataGetWorkSpaceSize(miopenHandle_t handle, const miopenTensorDescriptor_t dyDesc, const miopenTensorDescriptor_t wDesc, const miopenConvolutionDescriptor_t convDesc, const miopenTensorDescriptor_t dxDesc, size_t *workSpaceSize)
Query the workspace size required for a backward data convolution algorithm.
miopenStatus_t miopenFindConvolutionForwardAlgorithm(miopenHandle_t handle, const miopenTensorDescriptor_t xDesc, const void *x, const miopenTensorDescriptor_t wDesc, const void *w, const miopenConvolutionDescriptor_t convDesc, const miopenTensorDescriptor_t yDesc, void *y, const int requestAlgoCount, int *returnedAlgoCount, miopenConvAlgoPerf_t *perfResults, void *workSpace, size_t workSpaceSize, bool exhaustiveSearch)
Search and run the forward convolutional algorithms and return a list of kernel times.
miopenStatus_t miopenDestroyConvolutionDescriptor(miopenConvolutionDescriptor_t convDesc)
Destroys the tensor descriptor object.
miopenStatus_t miopenConvolutionBackwardWeightsGetSolutionCount(miopenHandle_t handle, const miopenTensorDescriptor_t dyDesc, const miopenTensorDescriptor_t xDesc, const miopenConvolutionDescriptor_t convDesc, const miopenTensorDescriptor_t dwDesc, size_t *solutionCount)
Query the maximum number of solutions applicable for the given input/output and weights tensor descri...
miopenStatus_t miopenSetConvolutionGroupCount(miopenConvolutionDescriptor_t convDesc, int groupCount)
Set the number of groups to be used in Group/Depthwise convolution.
miopenStatus_t miopenConvolutionBackwardWeightsGetSolutionWorkspaceSize(miopenHandle_t handle, const miopenTensorDescriptor_t dyDesc, const miopenTensorDescriptor_t xDesc, const miopenConvolutionDescriptor_t convDesc, const miopenTensorDescriptor_t dwDesc, const uint64_t solution_id, size_t *workSpaceSize)
Returns the workspace size required for a particular solution id.
miopenStatus_t miopenConvolutionForwardGetSolution(miopenHandle_t handle, const miopenTensorDescriptor_t wDesc, const miopenTensorDescriptor_t xDesc, const miopenConvolutionDescriptor_t convDesc, const miopenTensorDescriptor_t yDesc, const size_t maxSolutionCount, size_t *solutionCount, miopenConvSolution_t *solutions)
Query the applicable solutions for a convolution configuration described by input,...
miopenStatus_t miopenGetConvolutionDescriptor(miopenConvolutionDescriptor_t convDesc, miopenConvolutionMode_t *c_mode, int *pad_h, int *pad_w, int *stride_h, int *stride_w, int *dilation_h, int *dilation_w)
Retrieves a 2-D convolution layer descriptor's details.
miopenStatus_t miopenConvolutionForwardGetSolutionCount(miopenHandle_t handle, const miopenTensorDescriptor_t wDesc, const miopenTensorDescriptor_t xDesc, const miopenConvolutionDescriptor_t convDesc, const miopenTensorDescriptor_t yDesc, size_t *solutionCount)
Query the maximum number of solutions applicable for the given input/output and weights tensor descri...
miopenStatus_t miopenConvolutionBackwardDataGetSolutionWorkspaceSize(miopenHandle_t handle, const miopenTensorDescriptor_t dyDesc, const miopenTensorDescriptor_t wDesc, const miopenConvolutionDescriptor_t convDesc, const miopenTensorDescriptor_t dxDesc, const uint64_t solution_id, size_t *workSpaceSize)
Returns the workspace size required for a particular solution id.
miopenStatus_t miopenConvolutionBackwardBias(miopenHandle_t handle, const void *alpha, const miopenTensorDescriptor_t dyDesc, const void *dy, const void *beta, const miopenTensorDescriptor_t dbDesc, void *db)
Calculates the gradient with respect to the bias.
@ miopenConvolutionAlgoDirect
Definition: miopen.h:1256
@ miopenConvolutionAlgoWinograd
Definition: miopen.h:1258
@ miopenConvolutionAlgoFFT
Definition: miopen.h:1257
@ miopenConvolutionAlgoImplicitGEMM
Definition: miopen.h:1259
@ miopenConvolutionAlgoGEMM
Definition: miopen.h:1255
@ miopenConvolutionFwdAlgoFFT
Definition: miopen.h:1220
@ miopenConvolutionFwdAlgoImplicitGEMM
Definition: miopen.h:1222
@ miopenConvolutionFwdAlgoGEMM
Definition: miopen.h:1218
@ miopenConvolutionFwdAlgoWinograd
Definition: miopen.h:1221
@ miopenConvolutionFwdAlgoDirect
Definition: miopen.h:1219
@ miopenTransposeBwdDataAlgoGEMM
Definition: miopen.h:1245
@ miopenConvolutionBwdDataAlgoDirect
Definition: miopen.h:1242
@ miopenConvolutionBwdDataAlgoGEMM
Definition: miopen.h:1241
@ miopenConvolutionBwdDataAlgoFFT
Definition: miopen.h:1243
@ miopenConvolutionBwdDataAlgoWinograd
Definition: miopen.h:1244
@ miopenConvolutionBwdDataAlgoImplicitGEMM
Definition: miopen.h:1247
@ miopenConvolutionBwdWeightsAlgoGEMM
Definition: miopen.h:1230
@ miopenConvolutionBwdWeightsAlgoWinograd
Definition: miopen.h:1232
@ miopenConvolutionBwdWeightsAlgoDirect
Definition: miopen.h:1231
@ miopenConvolutionBwdWeightsAlgoImplicitGEMM
Definition: miopen.h:1233
miopenStatus_t miopenDestroyDropoutDescriptor(miopenDropoutDescriptor_t dropoutDesc)
Destroys the dropout descriptor object.
miopenRNGType_t
Definition: miopen.h:5799
miopenStatus_t miopenDropoutGetStatesSize(miopenHandle_t handle, size_t *stateSizeInBytes)
Query the amount of memory required to store the states of the random number generators.
miopenStatus_t miopenDropoutGetReserveSpaceSize(const miopenTensorDescriptor_t xDesc, size_t *reserveSpaceSizeInBytes)
Query the amount of memory required to run dropout.
miopenStatus_t miopenDropoutForward(miopenHandle_t handle, const miopenDropoutDescriptor_t dropoutDesc, const miopenTensorDescriptor_t noise_shape, const miopenTensorDescriptor_t xDesc, const void *x, const miopenTensorDescriptor_t yDesc, void *y, void *reserveSpace, size_t reserveSpaceSizeInBytes)
Execute forward dropout operation.
miopenStatus_t miopenGetDropoutDescriptor(miopenDropoutDescriptor_t dropoutDesc, miopenHandle_t handle, float *dropout, void **states, unsigned long long *seed, bool *use_mask, bool *state_evo, miopenRNGType_t *rng_mode)
Get the details of the dropout descriptor.
miopenStatus_t miopenCreateDropoutDescriptor(miopenDropoutDescriptor_t *dropoutDesc)
Creates the dropout descriptor object.
miopenStatus_t miopenDropoutBackward(miopenHandle_t handle, const miopenDropoutDescriptor_t dropoutDesc, const miopenTensorDescriptor_t noise_shape, const miopenTensorDescriptor_t dyDesc, const void *dy, const miopenTensorDescriptor_t dxDesc, void *dx, void *reserveSpace, size_t reserveSpaceSizeInBytes)
Execute backward dropout operation.
miopenStatus_t miopenRestoreDropoutDescriptor(miopenDropoutDescriptor_t dropoutDesc, miopenHandle_t handle, float dropout, void *states, size_t stateSizeInBytes, unsigned long long seed, bool use_mask, bool state_evo, miopenRNGType_t rng_mode)
Restore the dropout descriptor to a saved state.
miopenStatus_t miopenSetDropoutDescriptor(miopenDropoutDescriptor_t dropoutDesc, miopenHandle_t handle, float dropout, void *states, size_t stateSizeInBytes, unsigned long long seed, bool use_mask, bool state_evo, miopenRNGType_t rng_mode)
Initialize the dropout descriptor.
@ MIOPEN_RNG_PSEUDO_XORWOW
Definition: miopen.h:5800
miopenFindResultsOrder_t
Definition: miopen.h:6230
miopenStatus_t miopenGetSolutionSolverId(miopenSolution_t solution, uint64_t *solverId)
Reads id of the solver referred by the solution.
miopenStatus_t miopenSetSoftmaxDescriptor(miopenSoftmaxDescriptor_t softmaxDesc, float alpha, float beta, miopenSoftmaxAlgorithm_t algorithm, miopenSoftmaxMode_t mode)
Sets the softmax descriptor details.
miopenStatus_t miopenGetSolutionTime(miopenSolution_t solution, float *time)
Reads the time spent to execute the solution the last it was run.
miopenStatus_t miopenGetMhaDescriptor(miopenMhaDescriptor_t mhaDesc, float *scale)
Gets the Mha descriptor details.
miopenMhaMask_t
Initializes a problem object describing a Mha operation.
Definition: miopen.h:6258
miopenStatus_t miopenSetMhaDescriptor(miopenMhaDescriptor_t mhaDesc, float scale)
Sets the Mha descriptor details.
miopenStatus_t miopenSetFindOptionWorkspaceLimit(miopenFindOptions_t options, size_t value)
Sets the workspace limit find option. Default value is maximum of size_t.
miopenStatus_t miopenSetFindOptionPreallocatedTensor(miopenFindOptions_t options, miopenTensorArgumentId_t id, void *buffer)
Attaches a preallocated tensor to find options. If not used, buffers are allocated by MIOpen internal...
miopenStatus_t miopenCreateSoftmaxProblem(miopenProblem_t *problem, miopenSoftmaxDescriptor_t operatorDesc, miopenProblemDirection_t direction)
Initializes a problem object describing a softmax operation.
miopenStatus_t miopenDestroyFindOptions(miopenFindOptions_t options)
Destroys miopenFindOptions object.
miopenStatus_t miopenSetFindOptionAttachBinaries(miopenFindOptions_t options, unsigned attach)
Forces library to attach kernel binaries to solutions for later saving. This allows zero lookup miope...
miopenStatus_t miopenFuseProblems(miopenProblem_t problem1, miopenProblem_t problem2)
Fuse two problems into a single one. Problems can be either regular, or fused. No problems are dispos...
miopenStatus_t miopenGetSolutionSize(miopenSolution_t solution, size_t *size)
Reads the expected size of a solution.
miopenTensorArgumentId_t
Definition: miopen.h:6149
miopenStatus_t miopenFindSolutions(miopenHandle_t handle, miopenProblem_t problem, miopenFindOptions_t options, miopenSolution_t *solutions, size_t *numSolutions, size_t maxSolutions)
Finds solutions to a problem by running different applicable solutions. Memory is automatically alloc...
miopenStatus_t miopenSetFindOptionTuning(miopenFindOptions_t options, int value)
Sets the tuning find option. Default value is zero.
miopenStatus_t miopenGetSolutionWorkspaceSize(miopenSolution_t solution, size_t *workspaceSize)
Reads the amount of workspace required to execute the solution.
miopenStatus_t miopenSetFindOptionResultsOrder(miopenFindOptions_t options, miopenFindResultsOrder_t value)
Sets the results order find option. Default value is miopenFindResultsOrderByTime.
miopenStatus_t miopenRunSolution(miopenHandle_t handle, miopenSolution_t solution, size_t nInputs, const miopenTensorArgument_t *tensors, void *workspace, size_t workspaceSize)
Runs the solution using the passed in buffers.
miopenStatus_t miopenCreateMhaProblem(miopenProblem_t *problem, miopenMhaDescriptor_t operatorDesc, miopenProblemDirection_t direction)
miopenProblemDirection_t
Definition: miopen.h:6136
miopenStatus_t miopenCreateBatchnormProblem(miopenProblem_t *problem, miopenBatchNormMode_t mode, bool runningMeanVariance, miopenProblemDirection_t direction)
Initializes a problem object describing an activation operation.
miopenStatus_t miopenCreateMhaDescriptor(miopenMhaDescriptor_t *mhaDesc)
Creates the mha descriptor object.
miopenStatus_t miopenDestroySolution(miopenSolution_t solution)
Destroys solution object.
miopenStatus_t miopenLoadSolution(miopenSolution_t *solution, const char *data, size_t size)
Loads solution object from binary data.
miopenStatus_t miopenSaveSolution(miopenSolution_t solution, char *data)
Saves a solution object as binary data.
miopenStatus_t miopenGetSolverIdConvAlgorithm(uint64_t solverId, miopenConvAlgorithm_t *result)
Gets the convolution algorithm implemented by a solver.
miopenStatus_t miopenSetFindOptionPreallocatedWorkspace(miopenFindOptions_t options, void *buffer, size_t size)
Attaches the preallocated workspace to find options. Allocated by the library by default.
miopenStatus_t miopenCreateActivationProblem(miopenProblem_t *problem, miopenActivationDescriptor_t operatorDesc, miopenProblemDirection_t direction)
Initializes a problem object describing an activation operation.
miopenStatus_t miopenDestroyProblem(miopenProblem_t problem)
Destroys a problem object.
miopenStatus_t miopenCreateFindOptions(miopenFindOptions_t *options)
Initializes miopenFindOptions object.
miopenStatus_t miopenGetSoftmaxDescriptor(const miopenSoftmaxDescriptor_t softmaxDesc, float *alpha, float *beta, miopenSoftmaxAlgorithm_t *algorithm, miopenSoftmaxMode_t *mode)
Gets the softmax layer descriptor details.
miopenStatus_t miopenCreateConvProblem(miopenProblem_t *problem, miopenConvolutionDescriptor_t operatorDesc, miopenProblemDirection_t direction)
Initializes a problem object describing a convolution operation.
miopenStatus_t miopenCreateBiasProblem(miopenProblem_t *problem, miopenProblemDirection_t direction)
Initializes a problem object describing an bias operation.
miopenStatus_t miopenSetProblemTensorDescriptor(miopenProblem_t problem, miopenTensorArgumentId_t id, const miopenTensorDescriptor_t descriptor)
Sets a tensor descriptor for the specified argument.
miopenStatus_t miopenCreateSoftmaxDescriptor(miopenSoftmaxDescriptor_t *softmaxDesc)
Creates the Softmax descriptor object.
@ miopenFindResultsOrderByWorkspaceSize
Definition: miopen.h:6232
@ miopenFindResultsOrderByTime
Definition: miopen.h:6231
@ miopenMhaMaskCausal
Definition: miopen.h:6260
@ miopenMhaMaskNone
Definition: miopen.h:6259
@ miopenTensorMhaDescaleS
Definition: miopen.h:6161
@ miopenTensorMhaO
Definition: miopen.h:6167
@ miopenTensorMhaAmaxDV
Definition: miopen.h:6185
@ miopenTensorBatchnormScaleDiff
Definition: miopen.h:6208
@ miopenTensorMhaMask
Definition: miopen.h:6219
@ miopenTensorMhaDescaleDS
Definition: miopen.h:6175
@ miopenTensorMhaDropoutSeed
Definition: miopen.h:6165
@ miopenTensorBatchnormSavedMean
Definition: miopen.h:6205
@ miopenTensorActivationDY
Definition: miopen.h:6193
@ miopenTensorBatchnormDX
Definition: miopen.h:6213
@ miopenTensorMhaDescaleV
Definition: miopen.h:6160
@ miopenTensorMhaK
Definition: miopen.h:6155
@ miopenTensorConvolutionX
Definition: miopen.h:6151
@ miopenTensorMhaAmaxDK
Definition: miopen.h:6184
@ miopenTensorMhaScaleDS
Definition: miopen.h:6176
@ miopenTensorBatchnormBias
Definition: miopen.h:6211
@ miopenTensorMhaM
Definition: miopen.h:6170
@ miopenTensorBatchnormRunningMean
Definition: miopen.h:6203
@ miopenTensorMhaDropoutProbability
Definition: miopen.h:6164
@ miopenTensorSoftmaxY
Definition: miopen.h:6198
@ miopenTensorSoftmaxDY
Definition: miopen.h:6200
@ miopenTensorMhaDescaleO
Definition: miopen.h:6173
@ miopenTensorMhaScaleO
Definition: miopen.h:6163
@ miopenScalarBatchnormExpAvgFactor
Definition: miopen.h:6221
@ miopenTensorBatchnormScale
Definition: miopen.h:6207
@ miopenTensorBatchnormRunningVariance
Definition: miopen.h:6204
@ miopenTensorActivationDX
Definition: miopen.h:6192
@ miopenTensorMhaScaleDK
Definition: miopen.h:6178
@ miopenTensorMhaV
Definition: miopen.h:6157
@ miopenTensorMhaAmaxS
Definition: miopen.h:6169
@ miopenTensorBatchnormBiasDiff
Definition: miopen.h:6212
@ miopenTensorBatchnormSavedVariance
Definition: miopen.h:6206
@ miopenTensorActivationX
Definition: miopen.h:6190
@ miopenTensorMhaScaleDV
Definition: miopen.h:6179
@ miopenTensorMhaAmaxO
Definition: miopen.h:6168
@ miopenScalarBatchnormEpsilon
Definition: miopen.h:6222
@ miopenTensorMhaScaleDQ
Definition: miopen.h:6177
@ miopenTensorSoftmaxDX
Definition: miopen.h:6199
@ miopenTensorMhaAmaxDS
Definition: miopen.h:6186
@ miopenTensorMhaDV
Definition: miopen.h:6182
@ miopenTensorMhaQ
Definition: miopen.h:6156
@ miopenTensorMhaAmaxDQ
Definition: miopen.h:6183
@ miopenTensorConvolutionY
Definition: miopen.h:6153
@ miopenTensorBatchnormEstimatedMean
Definition: miopen.h:6209
@ miopenTensorBatchnormDY
Definition: miopen.h:6214
@ miopenTensorMhaZInv
Definition: miopen.h:6171
@ miopenTensorMhaDescaleQ
Definition: miopen.h:6159
@ miopenTensorMhaBias
Definition: miopen.h:6187
@ miopenTensorBatchnormEstimatedVariance
Definition: miopen.h:6210
@ miopenTensorArgumentIsScalar
Definition: miopen.h:6217
@ miopenTensorArgumentIdInvalid
Definition: miopen.h:6150
@ miopenTensorMhaDO
Definition: miopen.h:6172
@ miopenTensorMhaDescaleDO
Definition: miopen.h:6174
@ miopenTensorConvolutionW
Definition: miopen.h:6152
@ miopenTensorMhaDescaleK
Definition: miopen.h:6158
@ miopenTensorMhaDropoutOffset
Definition: miopen.h:6166
@ miopenTensorBiasY
Definition: miopen.h:6195
@ miopenTensorMhaDQ
Definition: miopen.h:6180
@ miopenTensorSoftmaxX
Definition: miopen.h:6197
@ miopenTensorBatchnormY
Definition: miopen.h:6202
@ miopenTensorMhaScaleS
Definition: miopen.h:6162
@ miopenTensorBias
Definition: miopen.h:6196
@ miopenTensorActivationY
Definition: miopen.h:6191
@ miopenTensorBatchnormX
Definition: miopen.h:6201
@ miopenTensorBiasX
Definition: miopen.h:6194
@ miopenTensorMhaDK
Definition: miopen.h:6181
@ miopenProblemDirectionBackwardWeights
Definition: miopen.h:6139
@ miopenProblemDirectionInference
Definition: miopen.h:6141
@ miopenProblemDirectionForward
Definition: miopen.h:6137
@ miopenProblemDirectionBackward
Definition: miopen.h:6138
miopenStatus_t miopenGetGetitemWorkspaceSize(miopenHandle_t handle, uint32_t indexCount, const miopenTensorDescriptor_t *indexDescs, size_t *sizeInBytes)
Helper function to query the minimum workspace size required by the getitem call.
miopenStatus_t miopenGetitemBackward(miopenHandle_t handle, void *workspace, size_t workspaceSizeInBytes, const miopenTensorDescriptor_t dyDesc, const void *dy, uint32_t indexCount, const miopenTensorDescriptor_t *indexDescs, const void *const *indexs, const miopenTensorDescriptor_t dxDesc, void *dx, const miopenTensorDescriptor_t errorDesc, void *error, uint32_t dimCount, const int32_t *dims, uint32_t sliceCount, const int32_t *slices, uint32_t offset)
Execute a getitem backward layer.
miopenStatus_t miopenGroupNormForward(miopenHandle_t handle, miopenNormMode_t mode, const miopenTensorDescriptor_t xDesc, const void *x, const miopenTensorDescriptor_t weightDesc, const void *weight, const miopenTensorDescriptor_t biasDesc, const void *bias, const uint64_t num_groups, const float epsilon, const miopenTensorDescriptor_t yDesc, void *y, const miopenTensorDescriptor_t meanDesc, void *mean, const miopenTensorDescriptor_t rstdDesc, void *rstd)
Execute a groupnorm forward layer.
miopenStatus_t miopenCreateWithStream(miopenHandle_t *handle, miopenAcceleratorQueue_t stream)
Create a MIOpen handle with an accelerator stream.
miopenStatus_t miopenDestroy(miopenHandle_t handle)
Destroys the MIOpen handle.
miopenTuningPolicy_t
Definition: miopen.h:8017
miopenStatus_t miopenGetTuningPolicy(miopenHandle_t handle, miopenTuningPolicy_t *value)
Get tuning policy from a handle.
void(* miopenDeallocatorFunction)(void *context, void *memory)
Custom deallocator function.
Definition: miopen.h:144
miopenMathType_t
Definition: miopen.h:102
miopenStatus_t miopenGetStream(miopenHandle_t handle, miopenAcceleratorQueue_t *streamID)
Get the previously created accelerator command queue.
miopenStatus_t miopenEnableProfiling(miopenHandle_t handle, bool enable)
Enable profiling to retrieve kernel time.
miopenStatus_t miopenGetVersion(size_t *major, size_t *minor, size_t *patch)
Method to return version of MIOpen.
miopenStatus_t miopenSetTuningPolicy(miopenHandle_t handle, miopenTuningPolicy_t newValue)
Update tuning policy for a specific handle. API alternative for MIOPEN_FIND_ENFORCE environment varia...
miopenStatus_t
Definition: miopen.h:87
miopenStatus_t miopenSetAllocator(miopenHandle_t handle, miopenAllocatorFunction allocator, miopenDeallocatorFunction deallocator, void *allocatorContext)
Set allocator for previously created miopenHandle.
miopenF8RoundingMode_t
Definition: miopen.h:111
void *(* miopenAllocatorFunction)(void *context, size_t sizeBytes)
Custom allocator function.
Definition: miopen.h:134
const char * miopenGetErrorString(miopenStatus_t error)
Get character string for an error code.
miopenStatus_t miopenCreate(miopenHandle_t *handle)
Method to create the MIOpen handle object.
miopenStatus_t miopenGetKernelTime(miopenHandle_t handle, float *time)
Get time for last kernel launched.
miopenStatus_t miopenSetStream(miopenHandle_t handle, miopenAcceleratorQueue_t streamID)
Set accelerator command queue previously created.
@ miopenMathDefault
Definition: miopen.h:104
@ miopenMathPedantic
Definition: miopen.h:105
@ miopenStatusUnsupportedOp
Definition: miopen.h:96
@ miopenStatusGpuOperationsSkipped
Definition: miopen.h:97
@ miopenStatusUnknownError
Definition: miopen.h:95
@ miopenStatusSuccess
Definition: miopen.h:88
@ miopenStatusVersionMismatch
Definition: miopen.h:98
@ miopenStatusAllocFailed
Definition: miopen.h:92
@ miopenStatusNotImplemented
Definition: miopen.h:94
@ miopenStatusBadParm
Definition: miopen.h:91
@ miopenStatusNotInitialized
Definition: miopen.h:89
@ miopenStatusInternalError
Definition: miopen.h:93
@ miopenStatusInvalidValue
Definition: miopen.h:90
@ miopenF8RoundingModeStandard
Definition: miopen.h:112
@ miopenF8RoundingModeStochastic
Definition: miopen.h:113
miopenStatus_t miopenKthvalueForward(miopenHandle_t handle, miopenTensorDescriptor_t inputDesc, const void *input, miopenTensorDescriptor_t outputDesc, void *output, miopenTensorDescriptor_t indicesDesc, size_t *indices, size_t k, int32_t dim=-1, bool keepDim=false)
Execute a Kthvalue forward layer.
miopenStatus_t miopenLayerNormForward(miopenHandle_t handle, miopenNormMode_t mode, const miopenTensorDescriptor_t xDesc, const void *x, const miopenTensorDescriptor_t weightDesc, const void *weight, const miopenTensorDescriptor_t biasDesc, const void *bias, const float epsilon, const int32_t normalized_dim, const miopenTensorDescriptor_t yDesc, void *y, const miopenTensorDescriptor_t meanDesc, void *mean, const miopenTensorDescriptor_t rstdDesc, void *rstd)
Execute a layernorm forward layer.
miopenStatus_t miopenT5LayerNormBackward(miopenHandle_t handle, miopenNormMode_t mode, void *workspace, size_t workspaceSizeInBytes, const miopenTensorDescriptor_t dyDesc, const void *dy, const miopenTensorDescriptor_t xDesc, const void *x, const miopenTensorDescriptor_t weightDesc, const void *weight, const miopenTensorDescriptor_t rstdDesc, const void *rstd, const miopenTensorDescriptor_t dxDesc, void *dx, const miopenTensorDescriptor_t dwDesc, void *dw)
Execute a T5layernorm backward layer.
miopenStatus_t miopenLayerNormBackward(miopenHandle_t handle, miopenNormMode_t mode, void *workspace, size_t workspaceSizeInBytes, const miopenTensorDescriptor_t dyDesc, const void *dy, const miopenTensorDescriptor_t xDesc, const void *x, const miopenTensorDescriptor_t weightDesc, const void *weight, const miopenTensorDescriptor_t meanDesc, const void *mean, const miopenTensorDescriptor_t rstdDesc, const void *rstd, const int32_t normalized_dim, const miopenTensorDescriptor_t dxDesc, void *dx, const miopenTensorDescriptor_t dwDesc, void *dw, const miopenTensorDescriptor_t dbDesc, void *db)
Execute a layernorm backward layer.
miopenStatus_t miopenT5LayerNormForward(miopenHandle_t handle, miopenNormMode_t mode, const miopenTensorDescriptor_t xDesc, const void *x, const miopenTensorDescriptor_t weightDesc, const void *weight, const float epsilon, const miopenTensorDescriptor_t yDesc, void *y, const miopenTensorDescriptor_t rstdDesc, void *rstd)
Execute a T5layernorm forward layer.
miopenNormMode_t
Definition: miopen.h:471
miopenStatus_t miopenGetLayerNormBackwardWorkspaceSize(miopenHandle_t handle, miopenNormMode_t mode, const miopenTensorDescriptor_t dyDesc, const miopenTensorDescriptor_t xDesc, const miopenTensorDescriptor_t weightDesc, const miopenTensorDescriptor_t meanDesc, const miopenTensorDescriptor_t rstdDesc, const int32_t normalized_dim, const miopenTensorDescriptor_t dxDesc, const miopenTensorDescriptor_t dwDesc, const miopenTensorDescriptor_t dbDesc, size_t *sizeInBytes)
Helper function to query the minimum workspace size required by the layernorm backward call.
miopenStatus_t miopenGetT5LayerNormBackwardWorkspaceSize(miopenHandle_t handle, miopenNormMode_t mode, const miopenTensorDescriptor_t dyDesc, const miopenTensorDescriptor_t xDesc, const miopenTensorDescriptor_t weightDesc, const miopenTensorDescriptor_t rstdDesc, const miopenTensorDescriptor_t dxDesc, const miopenTensorDescriptor_t dwDesc, size_t *sizeInBytes)
Helper function to query the minimum workspace size required by the T5layernorm backward call.
miopenStatus_t miopenAddLayerNormForward(miopenHandle_t handle, miopenNormMode_t mode, const miopenTensorDescriptor_t xDesc, const void *x, const miopenTensorDescriptor_t x2Desc, const void *x2, const miopenTensorDescriptor_t weightDesc, const void *weight, const miopenTensorDescriptor_t biasDesc, const void *bias, const float epsilon, const int32_t normalized_dim, const miopenTensorDescriptor_t yDesc, void *y, const miopenTensorDescriptor_t meanDesc, void *mean, const miopenTensorDescriptor_t rstdDesc, void *rstd)
Execute a add and layernorm forward layer.
miopenStatus_t miopenSet2dPoolingDescriptor(miopenPoolingDescriptor_t poolDesc, miopenPoolingMode_t mode, int windowHeight, int windowWidth, int pad_h, int pad_w, int stride_h, int stride_w)
Sets a 2-D pooling layer descriptor details.
miopenStatus_t miopenSetPoolingWorkSpaceIndexMode(miopenPoolingDescriptor_t poolDesc, miopenPoolingWorkspaceIndexMode_t workspace_index)
Set workspace index mode for pooling layer. The default mode is miopenPoolingWorkSpaceIndexMask.
miopenStatus_t miopenGetPoolingForwardOutputDim(const miopenPoolingDescriptor_t poolDesc, const miopenTensorDescriptor_t tensorDesc, int *n, int *c, int *h, int *w)
Gets the shape of the output tensor for 2-D pooling.
miopenStatus_t miopenPoolingGetWorkSpaceSize(const miopenTensorDescriptor_t yDesc, size_t *workSpaceSize)
Get the amount of GPU memory required for pooling.
miopenStatus_t miopenSetNdPoolingDescriptor(miopenPoolingDescriptor_t poolDesc, const miopenPoolingMode_t mode, int nbDims, const int *windowDimA, const int *padA, const int *stridesA)
Set details of a N-D pooling layer descriptor.
miopenStatus_t miopenPoolingForward(miopenHandle_t handle, const miopenPoolingDescriptor_t poolDesc, const void *alpha, const miopenTensorDescriptor_t xDesc, const void *x, const void *beta, const miopenTensorDescriptor_t yDesc, void *y, bool do_backward, void *workSpace, size_t workSpaceSize)
Execute a forward pooling layer.
miopenPoolingMode_t
Definition: miopen.h:438
miopenStatus_t miopenPoolingGetWorkSpaceSizeV2(const miopenPoolingDescriptor_t poolDesc, const miopenTensorDescriptor_t yDesc, size_t *workSpaceSize)
Get the amount of GPU memory required for pooling.
miopenStatus_t miopenGetPoolingWorkSpaceIndexMode(miopenPoolingDescriptor_t poolDesc, miopenPoolingWorkspaceIndexMode_t *workspace_index)
Get workspace index mode for pooling layer.
miopenIndexType_t
Definition: miopen.h:391
miopenStatus_t miopenGetPoolingIndexType(miopenPoolingDescriptor_t poolDesc, miopenIndexType_t *index_type)
Get the index data type for pooling layer. The index type to any of the miopenIndexType_t sizes; 8,...
miopenPoolingWorkspaceIndexMode_t
Definition: miopen.h:451
miopenStatus_t miopenGetPoolingNdForwardOutputDim(const miopenPoolingDescriptor_t poolDesc, const miopenTensorDescriptor_t tensorDesc, int dims, int *tensorDimArr)
Gets the shape of the output tensor for N-D pooling.
miopenStatus_t miopenGetNdPoolingDescriptor(const miopenPoolingDescriptor_t poolDesc, int nbDimsRequested, miopenPoolingMode_t *mode, int *nbDims, int *windowDimA, int *padA, int *stridesA)
Get details of a N-D pooling layer descriptor.
miopenStatus_t miopenCreatePoolingDescriptor(miopenPoolingDescriptor_t *poolDesc)
Creates a pooling layer descriptor.
miopenStatus_t miopenSetPoolingIndexType(miopenPoolingDescriptor_t poolDesc, miopenIndexType_t index_type)
Set index data type for pooling layer. The default indexing type is uint8_t. Users can set the index ...
miopenStatus_t miopenGet2dPoolingDescriptor(const miopenPoolingDescriptor_t poolDesc, miopenPoolingMode_t *mode, int *windowHeight, int *windowWidth, int *pad_h, int *pad_w, int *stride_h, int *stride_w)
Gets a 2-D pooling layer descriptor details.
miopenStatus_t miopenDestroyPoolingDescriptor(miopenPoolingDescriptor_t poolDesc)
Destroys the pooling descriptor object.
miopenStatus_t miopenPoolingBackward(miopenHandle_t handle, const miopenPoolingDescriptor_t poolDesc, const void *alpha, const miopenTensorDescriptor_t yDesc, const void *y, const miopenTensorDescriptor_t dyDesc, const void *dy, const miopenTensorDescriptor_t xDesc, const void *x, const void *beta, const miopenTensorDescriptor_t dxDesc, void *dx, void *workSpace)
Execute a backward pooling layer.
miopenStatus_t miopenReduceCalculationForward(miopenHandle_t handle, miopenReduceCalculationNanPropagation_t nanPropagation, void *workspace, size_t workspaceSizeInBytes, const miopenTensorDescriptor_t xDesc, const void *x, const int32_t dim, const miopenReduceCalculationOp_t reduceCalculationOp, const miopenTensorDescriptor_t reduceDesc, void *y)
Execute a reducecalculation forward layer.
miopenReduceCalculationOp_t
Definition: miopen.h:6663
miopenStatus_t miopenGetReduceCalculationWorkspaceSize(miopenHandle_t handle, const miopenTensorDescriptor_t xDesc, const int32_t dim, const miopenReduceCalculationOp_t reduceCalculationOp, const miopenTensorDescriptor_t reduceDesc, size_t *sizeInBytes)
Helper function to query the minimum workspace size required by the ReduceTensor call.
@ MIOPEN_REDUCE_CALCULATION_PROD
Definition: miopen.h:6664
@ MIOPEN_REDUCE_CALCULATION_SUM
Definition: miopen.h:6666
miopenStatus_t miopenSoftmaxBackward_V2(miopenHandle_t handle, const void *alpha, const miopenTensorDescriptor_t yDesc, const void *y, const miopenTensorDescriptor_t dyDesc, const void *dy, const void *beta, const miopenTensorDescriptor_t dxDesc, void *dx, miopenSoftmaxAlgorithm_t algorithm, miopenSoftmaxMode_t mode)
Execute a softmax backwards layer with expanded modes and algorithms.
miopenStatus_t miopenSoftmaxForward(miopenHandle_t handle, const void *alpha, const miopenTensorDescriptor_t xDesc, const void *x, const void *beta, const miopenTensorDescriptor_t yDesc, void *y)
Execute a softmax forward layer.
miopenStatus_t miopenSoftmaxForward_V2(miopenHandle_t handle, const void *alpha, const miopenTensorDescriptor_t xDesc, const void *x, const void *beta, const miopenTensorDescriptor_t yDesc, void *y, miopenSoftmaxAlgorithm_t algorithm, miopenSoftmaxMode_t mode)
Execute a softmax forward layer with expanded modes and algorithms.
miopenStatus_t miopenSoftmaxBackward(miopenHandle_t handle, const void *alpha, const miopenTensorDescriptor_t yDesc, const void *y, const miopenTensorDescriptor_t dyDesc, const void *dy, const void *beta, const miopenTensorDescriptor_t dxDesc, void *dx)
Execute a softmax backwards layer.
miopenSoftmaxMode_t
Definition: miopen.h:534
miopenSoftmaxAlgorithm_t
Definition: miopen.h:523
miopenStatus_t miopenDestroySeqTensorDescriptor(miopenSeqTensorDescriptor_t tensorDesc)
Destroys the sequence data tensor descriptor.
miopenStatus_t miopenGetTensorDescriptor(miopenTensorDescriptor_t tensorDesc, miopenDataType_t *dataType, int *dimsA, int *stridesA)
Get the details of the N-dimensional tensor descriptor.
miopenStatus_t miopenSetNdTensorDescriptorWithLayout(miopenTensorDescriptor_t tensorDesc, miopenDataType_t dataType, miopenTensorLayout_t tensorLayout, const int *lens, int num_lens)
Set shape of ND tensor with specific layout.
miopenStatus_t miopenSetTensor(miopenHandle_t handle, const miopenTensorDescriptor_t yDesc, void *y, const void *alpha)
Fills a tensor with a single value.
miopenStatus_t miopenOpTensor(miopenHandle_t handle, miopenTensorOp_t tensorOp, const void *alpha1, const miopenTensorDescriptor_t aDesc, const void *A, const void *alpha2, const miopenTensorDescriptor_t bDesc, const void *B, const void *beta, const miopenTensorDescriptor_t cDesc, void *C)
Execute element-wise tensor operations.
miopenStatus_t miopenGetTensorNumBytes(miopenTensorDescriptor_t tensorDesc, size_t *numBytes)
Returns number of bytes associated with tensor descriptor.
miopenStatus_t miopenGet4dTensorDescriptor(miopenTensorDescriptor_t tensorDesc, miopenDataType_t *dataType, int *n, int *c, int *h, int *w, int *nStride, int *cStride, int *hStride, int *wStride)
Get the details of the tensor descriptor.
miopenTensorLayout_t
Definition: miopen.h:374
miopenStatus_t miopenTransformTensor(miopenHandle_t handle, const void *alpha, const miopenTensorDescriptor_t xDesc, const void *x, const void *beta, const miopenTensorDescriptor_t yDesc, void *y)
Copies one tensor to another tensor with a different layout/scale.
miopenStatus_t miopenCreateTensorDescriptor(miopenTensorDescriptor_t *tensorDesc)
Create a Tensor Descriptor.
miopenStatus_t miopenSetTensorDescriptorV2(miopenTensorDescriptor_t tensorDesc, miopenDataType_t dataType, int nbDims, const size_t *dimsA, const size_t *stridesA)
Set shape of N-dimensional tensor.
miopenStatus_t miopenScaleTensor(miopenHandle_t handle, const miopenTensorDescriptor_t yDesc, void *y, const void *alpha)
Scales all elements in a tensor by a single value.
miopenStatus_t miopenSetTensorCastType(miopenTensorDescriptor_t tensorDesc, miopenDataType_t cast_type)
Set the tensor cast type.
miopenStatus_t miopenSet4dTensorDescriptor(miopenTensorDescriptor_t tensorDesc, miopenDataType_t dataType, int n, int c, int h, int w)
Set shape of 4D tensor.
miopenStatus_t miopenSet4dTensorDescriptorEx(miopenTensorDescriptor_t tensorDesc, miopenDataType_t dataType, int n, int c, int h, int w, int nStride, int cStride, int hStride, int wStride)
Set shape and stride of 4D tensor.
miopenDataType_t
Definition: miopen.h:354
miopenStatus_t miopenCreateSeqTensorDescriptor(miopenSeqTensorDescriptor_t *tensorDesc)
Create a Tensor Descriptor for sequence data.
miopenTensorOp_t
Definition: miopen.h:403
miopenStatus_t miopenGetTensorDescriptorSize(miopenTensorDescriptor_t tensorDesc, int *size)
Set shape of N-dimensional tensor.
miopenStatus_t miopenDestroyTensorDescriptor(miopenTensorDescriptor_t tensorDesc)
Destroys the tensor descriptor.
miopenStatus_t miopenSetTensorDescriptor(miopenTensorDescriptor_t tensorDesc, miopenDataType_t dataType, int nbDims, const int *dimsA, const int *stridesA)
Set shape of N-dimensional tensor.
miopenPaddingMode_t
Definition: miopen.h:427
@ miopenPaddingDefault
Definition: miopen.h:428
@ miopenPaddingSame
Definition: miopen.h:429
@ miopenPaddingValid
Definition: miopen.h:430
miopenReduceCalculationNanPropagation_t
Definition: miopen.h:6647
@ MIOPEN_REDUCE_CALCULATION_PROPAGATE_NAN
Definition: miopen.h:6649
@ MIOPEN_REDUCE_CALCULATION_NOT_PROPAGATE_NAN
Definition: miopen.h:6648
miopenAlphaBetaCase_t
Enum for specifying the alpha-beta case for convolution operations.
Definition: miopen.h:6970
@ SCALE
Definition: miopen.h:6977
@ BILINEAR
Definition: miopen.h:6978
@ DEFAULT
Definition: miopen.h:6976
@ ERROR_STATE
Definition: miopen.h:6979
#define MIOPEN_DECLARE_OBJECT(name)
Definition: miopen.h:57
miopenNanPropagation_t
Definition: miopen.h:579
@ MIOPEN_PROPAGATE_NAN
Definition: miopen.h:581
@ MIOPEN_NOT_PROPAGATE_NAN
Definition: miopen.h:580
@ MIOPEN_LOSS_REDUCTION_MEAN
Definition: miopen.h:7837
@ MIOPEN_LOSS_REDUCTION_SUM
Definition: miopen.h:7836
@ MIOPEN_LOSS_REDUCTION_NONE
Definition: miopen.h:7835
@ miopenTuningPolicyDbClean
Definition: miopen.h:8023
@ miopenTuningPolicyDbUpdate
Definition: miopen.h:8019
@ miopenTuningPolicyNone
Definition: miopen.h:8018
@ miopenTuningPolicySearch
Definition: miopen.h:8020
@ miopenTuningPolicySearchDbUpdate
Definition: miopen.h:8022
@ miopenPoolingAverage
Definition: miopen.h:440
@ miopenPoolingAverageInclusive
Definition: miopen.h:441
@ miopenPoolingMax
Definition: miopen.h:439
@ miopenTensorCHWN
Definition: miopen.h:377
@ miopenTensorNHWC
Definition: miopen.h:376
@ miopenTensorNDHWC
Definition: miopen.h:383
@ miopenTensorNCHW
Definition: miopen.h:375
@ miopenTensorCHWNc4
Definition: miopen.h:380
@ miopenTensorNCHWc8
Definition: miopen.h:379
@ miopenTensorNCDHW
Definition: miopen.h:382
@ miopenTensorCHWNc8
Definition: miopen.h:381
@ miopenTensorNCHWc4
Definition: miopen.h:378
@ MIOPEN_REDUCE_TENSOR_FLATTENED_INDICES
Definition: miopen.h:591
@ MIOPEN_REDUCE_TENSOR_NO_INDICES
Definition: miopen.h:590
@ miopenActivationLEAKYRELU
Definition: miopen.h:510
@ miopenActivationPASTHRU
Definition: miopen.h:501
@ miopenActivationABS
Definition: miopen.h:506
@ miopenActivationLOGISTIC
Definition: miopen.h:502
@ miopenActivationCLIPPEDRELU
Definition: miopen.h:508
@ miopenActivationRELU
Definition: miopen.h:504
@ miopenActivationPOWER
Definition: miopen.h:507
@ miopenActivationELU
Definition: miopen.h:512
@ miopenActivationCLAMP
Definition: miopen.h:515
@ miopenActivationSOFTRELU
Definition: miopen.h:505
@ miopenActivationTANH
Definition: miopen.h:503
@ MIOPEN_REDUCE_TENSOR_MUL
Definition: miopen.h:556
@ MIOPEN_REDUCE_TENSOR_MAX
Definition: miopen.h:560
@ MIOPEN_REDUCE_TENSOR_AVG
Definition: miopen.h:564
@ MIOPEN_REDUCE_TENSOR_NORM1
Definition: miopen.h:566
@ MIOPEN_REDUCE_TENSOR_AMAX
Definition: miopen.h:562
@ MIOPEN_REDUCE_TENSOR_MIN
Definition: miopen.h:558
@ MIOPEN_REDUCE_TENSOR_ADD
Definition: miopen.h:555
@ MIOPEN_REDUCE_TENSOR_NORM2
Definition: miopen.h:568
@ MIOPEN_CONVOLUTION_ATTRIB_DETERMINISTIC
Definition: miopen.h:616
@ MIOPEN_CONVOLUTION_ATTRIB_FP8_ROUNDING_MODE
Definition: miopen.h:620
@ MIOPEN_CONVOLUTION_ATTRIB_MATH_TYPE
Definition: miopen.h:629
@ MIOPEN_CONVOLUTION_ATTRIB_FP16_ALT_IMPL
Definition: miopen.h:612
@ miopenIndexUint16
Definition: miopen.h:393
@ miopenIndexUint64
Definition: miopen.h:395
@ miopenIndexUint32
Definition: miopen.h:394
@ miopenIndexUint8
Definition: miopen.h:392
@ miopenFloat
Definition: miopen.h:356
@ miopenBFloat8_fnuz
Definition: miopen.h:364
@ miopenBFloat16
Definition: miopen.h:360
@ miopenInt8
Definition: miopen.h:358
@ miopenInt32
Definition: miopen.h:357
@ miopenHalf
Definition: miopen.h:355
@ miopenInt64
Definition: miopen.h:365
@ miopenDouble
Definition: miopen.h:362
@ miopenFloat8_fnuz
Definition: miopen.h:363
@ MIOPEN_8BIT_INDICES
Definition: miopen.h:603
@ MIOPEN_32BIT_INDICES
Definition: miopen.h:600
@ MIOPEN_16BIT_INDICES
Definition: miopen.h:602
@ MIOPEN_64BIT_INDICES
Definition: miopen.h:601
@ miopenPoolingWorkspaceIndexImage
Definition: miopen.h:453
@ miopenPoolingWorkspaceIndexMask
Definition: miopen.h:452
@ miopenDepthwise
Definition: miopen.h:419
@ miopenGroupConv
Definition: miopen.h:418
@ miopenTranspose
Definition: miopen.h:417
@ miopenConvolution
Definition: miopen.h:416
@ MIOPEN_ELEMENTWISE_AFFINE_FUSED_ADD
Definition: miopen.h:475
@ MIOPEN_WEIGHT_BIAS
Definition: miopen.h:473
@ MIOPEN_ELEMENTWISE_AFFINE
Definition: miopen.h:472
@ MIOPEN_WEIGHT_BIAS_T5
Definition: miopen.h:481
@ MIOPEN_ELEMENTWISE_AFFINE_T5
Definition: miopen.h:479
@ MIOPEN_WEIGHT_BIAS_FUSED_ADD
Definition: miopen.h:477
@ MIOPEN_REDUCE_EXTREME_ARGMAX
Definition: miopen.h:6726
@ MIOPEN_REDUCE_EXTREME_MIN
Definition: miopen.h:6728
@ MIOPEN_REDUCE_EXTREME_MAX
Definition: miopen.h:6730
@ MIOPEN_REDUCE_EXTREME_ARGMIN
Definition: miopen.h:6724
@ miopenTensorOpMin
Definition: miopen.h:406
@ miopenTensorOpAdd
Definition: miopen.h:404
@ miopenTensorOpMul
Definition: miopen.h:405
@ miopenTensorOpMax
Definition: miopen.h:407
@ miopenBNSpatial
Definition: miopen.h:492
@ miopenBNPerActivation
Definition: miopen.h:491
@ miopenConvolutionFindModeDynamicHybrid
Definition: miopen.h:652
@ miopenConvolutionFindModeDefault
Definition: miopen.h:659
@ miopenConvolutionFindModeTrustVerifyFull
Definition: miopen.h:658
@ miopenConvolutionFindModeTrustVerify
Definition: miopen.h:657
@ miopenConvolutionFindModeHybrid
Definition: miopen.h:647
@ miopenConvolutionFindModeFast
Definition: miopen.h:643
@ miopenConvolutionFindModeNormal
Definition: miopen.h:641
@ miopenLRNWithinChannel
Definition: miopen.h:462
@ miopenLRNCrossChannel
Definition: miopen.h:463
@ MIOPEN_SOFTMAX_MODE_INSTANCE
Definition: miopen.h:535
@ MIOPEN_SOFTMAX_MODE_CHANNEL
Definition: miopen.h:536
@ MIOPEN_SOFTMAX_FAST
Definition: miopen.h:524
@ MIOPEN_SOFTMAX_ACCURATE
Definition: miopen.h:525
@ MIOPEN_SOFTMAX_LOG
Definition: miopen.h:526
Perf struct for forward, backward filter, or backward data algorithms.
Definition: miopen.h:1269
miopenConvFwdAlgorithm_t fwd_algo
Definition: miopen.h:1272
miopenConvBwdDataAlgorithm_t bwd_data_algo
Definition: miopen.h:1276
size_t memory
Definition: miopen.h:1280
miopenConvBwdWeightsAlgorithm_t bwd_weights_algo
Definition: miopen.h:1273
float time
Definition: miopen.h:1279
Performance struct for forward, backward filter, or backward data algorithms in immediate mode.
Definition: miopen.h:1293
miopenConvAlgorithm_t algorithm
Definition: miopen.h:1300
uint64_t solution_id
Definition: miopen.h:1299
size_t workspace_size
Definition: miopen.h:1297
float time
Definition: miopen.h:1294
Values of a tensor or scalar argument for the miopenRunSolution function.
Definition: miopen.h:6461
miopenTensorArgumentId_t id
Definition: miopen.h:6464
void * buffer
Definition: miopen.h:6474
miopenTensorDescriptor_t * descriptor
Definition: miopen.h:6470