Functions
miopenStatus_t	miopenFusedAdam (miopenHandle_t handle, const miopenTensorDescriptor_t paramDesc, void param, const miopenTensorDescriptor_t gradDesc, const void grad, const miopenTensorDescriptor_t expAvgDesc, void expAvg, const miopenTensorDescriptor_t expAvgSqDesc, void expAvgSq, const miopenTensorDescriptor_t maxExpAvgSqDesc, void maxExpAvgSq, const miopenTensorDescriptor_t stateStepDesc, void stateStep, const unsigned int state_step, const float lr, const float beta1, const float beta2, const float weight_decay, const float eps, const bool amsgrad, const bool maximize, const bool adamw, const miopenTensorDescriptor_t gradScaleDesc, const void gradScale, const miopenTensorDescriptor_t foundInfDesc, const void foundInf)
	Perform Fused Adam optimization for a single tensor (Adaptive Moment Estimation). More...

miopenStatus_t	miopenFusedAdamWithOutput (miopenHandle_t handle, const miopenTensorDescriptor_t paramInDesc, void paramIn, const miopenTensorDescriptor_t paramOutDesc, void paramOut, const miopenTensorDescriptor_t paramOutFloat16Desc, void paramOutFloat16, const miopenTensorDescriptor_t gradInDesc, const void gradIn, const miopenTensorDescriptor_t expAvgInDesc, void expAvgIn, const miopenTensorDescriptor_t expAvgOutDesc, void expAvgOut, const miopenTensorDescriptor_t expAvgSqInDesc, void expAvgSqIn, const miopenTensorDescriptor_t expAvgSqOutDesc, void expAvgSqOut, const miopenTensorDescriptor_t maxExpAvgSqInDesc, void maxExpAvgSqIn, const miopenTensorDescriptor_t maxExpAvgSqOutDesc, void maxExpAvgSqOut, const miopenTensorDescriptor_t stateStepInDesc, void stateStepIn, const miopenTensorDescriptor_t stateStepOutDesc, void stateStepOut, const unsigned int state_step, const float lr, const float beta1, const float beta2, const float weight_decay, const float eps, const bool amsgrad, const bool maximize, const bool adamw, const miopenTensorDescriptor_t gradScaleDesc, const void gradScale, const miopenTensorDescriptor_t foundInfDesc, const void foundInf)
	Execute single tensor Adam optimization and receive the result in a separate output tensor. More...

miopenStatus_t	miopenTransformersAdamW (miopenHandle_t handle, const miopenTensorDescriptor_t paramDesc, void param, const miopenTensorDescriptor_t gradDesc, const void grad, const miopenTensorDescriptor_t expAvgDesc, void expAvg, const miopenTensorDescriptor_t expAvgSqDesc, void expAvgSq, const miopenTensorDescriptor_t stateStepDesc, void stateStep, const unsigned int state_step, const float lr, const float beta1, const float beta2, const float weight_decay, const float eps, const bool correct_bias, const miopenTensorDescriptor_t gradScaleDesc, const void gradScale, const miopenTensorDescriptor_t foundInfDesc, const void *foundInf)
	Implements Adam algorithm with weight decay fix as introduced in Decoupled Weight Decay Regularization. This is the fused kernel version of AdamW included in the Hugging Face Transformers module. More...

miopenStatus_t	miopenTransformersAdamWWithOutput (miopenHandle_t handle, const miopenTensorDescriptor_t paramInDesc, void paramIn, const miopenTensorDescriptor_t paramOutDesc, void paramOut, const miopenTensorDescriptor_t paramOutFloat16Desc, void paramOutFloat16, const miopenTensorDescriptor_t gradInDesc, const void gradIn, const miopenTensorDescriptor_t expAvgInDesc, void expAvgIn, const miopenTensorDescriptor_t expAvgOutDesc, void expAvgOut, const miopenTensorDescriptor_t expAvgSqInDesc, void expAvgSqIn, const miopenTensorDescriptor_t expAvgSqOutDesc, void expAvgSqOut, const miopenTensorDescriptor_t stateStepInDesc, void stateStepIn, const miopenTensorDescriptor_t stateStepOutDesc, void stateStepOut, const unsigned int state_step, const float lr, const float beta1, const float beta2, const float weight_decay, const float eps, const float step_size, const bool correct_bias, const miopenTensorDescriptor_t gradScaleDesc, const void gradScale, const miopenTensorDescriptor_t foundInfDesc, const void foundInf)
	Execute single tensor Adam optimization and receive the result in a separate output tensor. More...

Detailed Description

Function Documentation

◆ miopenFusedAdam()

miopenStatus_t miopenFusedAdam	(	miopenHandle_t	handle,
		const miopenTensorDescriptor_t	paramDesc,
		void *	param,
		const miopenTensorDescriptor_t	gradDesc,
		const void *	grad,
		const miopenTensorDescriptor_t	expAvgDesc,
		void *	expAvg,
		const miopenTensorDescriptor_t	expAvgSqDesc,
		void *	expAvgSq,
		const miopenTensorDescriptor_t	maxExpAvgSqDesc,
		void *	maxExpAvgSq,
		const miopenTensorDescriptor_t	stateStepDesc,
		void *	stateStep,
		const unsigned int	state_step,
		const float	lr,
		const float	beta1,
		const float	beta2,
		const float	weight_decay,
		const float	eps,
		const bool	amsgrad,
		const bool	maximize,
		const bool	adamw,
		const miopenTensorDescriptor_t	gradScaleDesc,
		const void *	gradScale,
		const miopenTensorDescriptor_t	foundInfDesc,
		const void *	foundInf
	)

Perform Fused Adam optimization for a single tensor (Adaptive Moment Estimation).

This function implements the Fused Adam optimization algorithm. Adam, short for Adaptive Moment Estimation, extends the RMSProp optimizer. It combines the advantages of AdaGrad and RMSProp by adaptively adjusting learning rates for each parameter using the first and second moments of gradients. Fused Adam optimization efficiently combines multiple operations into a single kernel, reducing memory access overhead and improving performance.

Additionally, Fused Adam can be utilized in both adam w and Automatic Mixed Precision (AMP), enabling accelerated model training and reduced memory consumption. AMP supports FP16 computation, optimizing model calculations using a mixture of FP32 and FP16 precision to enhance training speed. When utilizing AMP, FoundInf, ScaleGrad, and step tensors should be employed. In AMP mode, the execution of Adam is determined based on the FoundInf value. State Step accepts both int values and int tensors. If a Step tensor is employed, the step received as an int is disregarded, and if Adam is executed, the step tensor is incremented by 1.

// Execute Adam
miopenFusedAdam(handle,
                paramDesc,
                param,
                gradDesc,
                grad,
                expAvgDesc,
                expAvg,
                expAvgSqDesc,
                expAvgSq,
                NULL,     // Unused maxExpAvgSqDesc because amsgrad is false
                NULL,
                NULL,     // Unused stateStep Tensor because use step integer argument
                NULL,
                step,
                lr,
                beta1,
                beta2,
                weight_decay,
                eps,
                false,    // amsgrad
                false,    // maximize
                false,    // adamw
                NULL,     // Unused gradScale Tensor because not amp
                NULL,
                NULL,     // Unused foundInf Tensor because not amp
                NULL);
 
// Execute AdamW
miopenFusedAdam(handle,
                paramDesc,
                param,
                gradDesc,
                grad,
                expAvgDesc,
                expAvg,
                expAvgSqDesc,
                expAvgSq,
                NULL,     // Unused maxExpAvgSqDesc because amsgrad is false
                NULL,
                NULL,     // Unused stateStep Tensor because use step integer argument
                NULL,
                step,
                lr,
                beta1,
                beta2,
                weight_decay,
                eps,
                false,    // amsgrad
                false,    // maximize
                true,     // adamw
                NULL,     // Unused gradScale Tensor because not amp
                NULL,
                NULL,     // Unused foundInf Tensor because not amp
                NULL);
 
// Execute AMP Adam
miopenFusedAdam(handle,
                paramDesc,
                param,
                gradDesc,
                grad,
                expAvgDesc,
                expAvg,
                expAvgSqDesc,
                expAvgSq,
                NULL,     // Unused maxExpAvgSqDesc because amsgrad is false
                NULL,
                stateStepDesc,
                stateStep,
                -1,       // Ignore step value because stateStep Tensor is used
                lr,
                beta1,
                beta2,
                weight_decay,
                eps,
                false,    // amsgrad
                false,    // maximize
                false,    // adamw
                gradScaleDesc,
                gradScale,
                foundInfDesc,
                foundInf);

Parameters

handle	MIOpen handle (input)
paramDesc	Tensor descriptor for the input parameter tensor (input)
param	Input parameter tensor (input)
gradDesc	Tensor descriptor for the input gradient tensor (input)
grad	Input gradient tensor (input)
expAvgDesc	Tensor descriptor for the input exponential moving average tensor (input)
expAvg	Input exponential moving average tensor (input)
expAvgSqDesc	Tensor descriptor for the input exponential moving average squared tensor (input)
expAvgSq	Input exponential moving average squared tensor (input)
maxExpAvgSqDesc	Tensor descriptor for the input maximum exponential moving average squared tensor. Used when amsgrad is true (input, optional)
maxExpAvgSq	Input maximum exponential moving average squared tensor. Used when amsgrad is true (input, optional)
stateStepDesc	Tensor descriptor for the input state step tensor (input)
stateStep	Input state step tensor (input)
state_step	Input state step. used when the step tensor is null (input)
lr	Learning rate (input)
beta1	Coefficient used for computing the first moment running average of gradient (input)
beta2	Coefficient used for computing the second moment running average of gradient (input)
weight_decay	Weight decay (input)
eps	Term added to the denominator to improve numerical stability (input)
amsgrad	Flag indicating whether to use the AMSGrad variant of Adam (input)
maximize	Flag indicating whether to maximize the objective with respect to the parameters (input)
adamw	If true, the operation becomes AdamW (input)
gradScaleDesc	Tensor descriptor for the input grad scale tensor (input, optional)
gradScale	Input grad scale tensor (input, optional)
foundInfDesc	Tensor descriptor for the input found inf tensor (input, optional)
foundInf	Tensor indicating the presence of inf or NaN in gradients. If true, skips operation and step update (input, optional)

Returns: miopenStatus_t

Examples: /home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-miopen/checkouts/develop/include/miopen/miopen.h.

◆ miopenFusedAdamWithOutput()

miopenStatus_t miopenFusedAdamWithOutput	(	miopenHandle_t	handle,
		const miopenTensorDescriptor_t	paramInDesc,
		void *	paramIn,
		const miopenTensorDescriptor_t	paramOutDesc,
		void *	paramOut,
		const miopenTensorDescriptor_t	paramOutFloat16Desc,
		void *	paramOutFloat16,
		const miopenTensorDescriptor_t	gradInDesc,
		const void *	gradIn,
		const miopenTensorDescriptor_t	expAvgInDesc,
		void *	expAvgIn,
		const miopenTensorDescriptor_t	expAvgOutDesc,
		void *	expAvgOut,
		const miopenTensorDescriptor_t	expAvgSqInDesc,
		void *	expAvgSqIn,
		const miopenTensorDescriptor_t	expAvgSqOutDesc,
		void *	expAvgSqOut,
		const miopenTensorDescriptor_t	maxExpAvgSqInDesc,
		void *	maxExpAvgSqIn,
		const miopenTensorDescriptor_t	maxExpAvgSqOutDesc,
		void *	maxExpAvgSqOut,
		const miopenTensorDescriptor_t	stateStepInDesc,
		void *	stateStepIn,
		const miopenTensorDescriptor_t	stateStepOutDesc,
		void *	stateStepOut,
		const unsigned int	state_step,
		const float	lr,
		const float	beta1,
		const float	beta2,
		const float	weight_decay,
		const float	eps,
		const bool	amsgrad,
		const bool	maximize,
		const bool	adamw,
		const miopenTensorDescriptor_t	gradScaleDesc,
		const void *	gradScale,
		const miopenTensorDescriptor_t	foundInfDesc,
		const void *	foundInf
	)

Execute single tensor Adam optimization and receive the result in a separate output tensor.

This function is equivalent to miopenFusedAdam but receives the result in a separate output tensor.

See also: miopenFusedAdam

// Execute Adam
miopenFusedAdamWithOutput(handle,
                          paramInDesc,
                          paramIn,
                          paramOutDesc,
                          paramOut,
                          NULL,   // Unused paramOutFloat16 tensor because is not amp
                          NULL,
                          gradInDesc,
                          gradIn,
                          expAvgInDesc,
                          expAvgIn,
                          expAvgOutDesc,
                          expAvgOut,
                          expAvgInSqDesc,
                          expAvgSqIn,
                          expAvgSqOutDesc,
                          expAvgSqOut,
                          NULL,   // Unused maxExpAvgSqIn tensor because amsgrad is false
                          NULL,
                          NULL,   // Unused maxExpAvgSqOut tensor because amsgrad is false
                          NULL,
                          NULL,   // Unused stateStepIn tensor because use step integer argument
                          NULL,
                          NULL,   // Unused stateStepOut tensor because use step integer argument
                          NULL,
                          step,
                          lr,
                          beta1,
                          beta2,
                          weight_decay,
                          eps,
                          false,  // amsgrad
                          false,  // maximize
                          false,  // adamw
                          NULL,   // Unused gradScale Tensor because not amp
                          NULL,
                          NULL,   // Unused foundInf Tensor because not amp
                          NULL);
 
// Execute Amp Adam
miopenFusedAdamWithOutput(handle,
                          paramInDesc,
                          paramIn,
                          paramOutDesc,
                          paramOut,
                          paramOutFloat16Desc,  // paramOutFloat16 tensor is optional in amp
                          paramOutFloat16,
                          gradInDesc,
                          gradIn,
                          expAvgInDesc,
                          expAvgIn,
                          expAvgOutDesc,
                          expAvgOut,
                          expAvgInSqDesc,
                          expAvgSqIn,
                          expAvgSqIn,
                          expAvgSqOutDesc,
                          expAvgSqOut,
                          NULL,         // Unused maxExpAvgSqIn tensor because amsgrad is false
                          NULL,
                          NULL,         // Unused maxExpAvgSqOut tensor because amsgrad is false
                          NULL,
                          stateStepInDesc,
                          stateStepIn,
                          stateStepOutDesc,
                          stateStepOut
                          -1,           // Ignore step value because stateStep Tensor is used
                          lr, beta1, beta2, weight_decay, eps,
                          false,        // amsgrad
                          false,        // maximize
                          false,        // adamw
                          gradScaleDesc,
                          gradScale,
                          foundInfDesc,
                          foundInf);

Parameters

handle	MIOpen handle (input)
paramInDesc	Tensor descriptor for the input parameter tensor (input)
paramIn	Input parameter tensor (input)
paramOutDesc	Tensor descriptor for the output parameter tensor (input)
paramOut	Output parameter tensor (output)
paramOutFloat16Desc	Tensor descriptor for the output parameter tensor float16 (input, optional)
paramOutFloat16	Output parameter tensor (output, optional)
gradInDesc	Tensor descriptor for the input gradient tensor (input)
gradIn	Input gradient tensor (input)
expAvgInDesc	Tensor descriptor for the input exponential moving average tensor (input)
expAvgIn	Input exponential moving average tensor (input)
expAvgOutDesc	Tensor descriptor for the output exponential moving average tensor (input)
expAvgOut	Output exponential moving average tensor (output)
expAvgSqInDesc	Tensor descriptor for the input exponential moving average squared tensor (input)
expAvgSqIn	Input exponential moving average squared tensor (input)
expAvgSqOutDesc	Tensor descriptor for the output exponential moving average squared tensor (input)
expAvgSqOut	Output exponential moving average squared tensor (output)
maxExpAvgSqInDesc	Tensor descriptor for the input maximum exponential moving average squared tensor. Used when amsgrad is true (input, optional)
maxExpAvgSqIn	Input maximum exponential moving average squared tensor. Used when amsgrad is true (input, optional)
maxExpAvgSqOutDesc	Tensor descriptor for the output maximum exponential moving average squared tensor. Used when amsgrad is true (input, optional)
maxExpAvgSqOut	Output maximum exponential moving average squared tensor. Used when amsgrad is true (output, optional)
stateStepInDesc	Tensor descriptor for the input state step tensor (input, optional)
stateStepIn	Input state step tensor (input, optional)
stateStepOutDesc	Tensor descriptor for the output state step tensor (input, optional)
stateStepOut	Output state step tensor that stores the updated step value. (output, optional)
state_step	Input state step, It is used when the step tensor is null. (input)
lr	Learning rate (input)
beta1	Coefficient used for computing the first moment running average of gradient (input)
beta2	Coefficient used for computing the second moment running average of gradient (input)
weight_decay	Weight decay (input)
eps	Term added to the denominator to improve numerical stability (input)
amsgrad	Flag indicating whether to use the AMSGrad variant of Adam (input)
maximize	Flag indicating whether to maximize the objective with respect to the parameters (input)
adamw	If it is true, the operation becomes AdamW (input)
gradScaleDesc	Tensor descriptor for the input grad scale tensor (input, optional)
gradScale	Input grad scale tensor (input, optional)
foundInfDesc	Tensor descriptor for the input found inf tensor (input, optional)
foundInf	Tensor indicating presence of inf or nan in gradients. If true, skips operation and step update. (input, optional)

Returns: miopenStatus_t

Examples: /home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-miopen/checkouts/develop/include/miopen/miopen.h.

◆ miopenTransformersAdamW()

miopenStatus_t miopenTransformersAdamW	(	miopenHandle_t	handle,
		const miopenTensorDescriptor_t	paramDesc,
		void *	param,
		const miopenTensorDescriptor_t	gradDesc,
		const void *	grad,
		const miopenTensorDescriptor_t	expAvgDesc,
		void *	expAvg,
		const miopenTensorDescriptor_t	expAvgSqDesc,
		void *	expAvgSq,
		const miopenTensorDescriptor_t	stateStepDesc,
		void *	stateStep,
		const unsigned int	state_step,
		const float	lr,
		const float	beta1,
		const float	beta2,
		const float	weight_decay,
		const float	eps,
		const bool	correct_bias,
		const miopenTensorDescriptor_t	gradScaleDesc,
		const void *	gradScale,
		const miopenTensorDescriptor_t	foundInfDesc,
		const void *	foundInf
	)

Implements Adam algorithm with weight decay fix as introduced in Decoupled Weight Decay Regularization. This is the fused kernel version of AdamW included in the Hugging Face Transformers module.

See also: miopenFusedAdam

// Execute Adam
miopenTransformersAdamW(handle,
                        paramDesc,
                        param,
                        gradDesc,
                        grad,
                        expAvgDesc,
                        expAvg,
                        expAvgSqDesc,
                        expAvgSq,
                        NULL,     // Unused stateStep Tensor because use step integer argument
                        NULL,
                        step,
                        lr,
                        beta1,
                        beta2,
                        weight_decay,
                        eps,
                        true,     // correct_bias
                        NULL,     // Unused gradScale Tensor because not amp
                        NULL,
                        NULL,     // Unused foundInf Tensor because not amp
                        NULL);
 
// Execute AMP Adam
miopenTransformersAdamW(handle,
                        paramDesc,
                        param,
                        gradDesc,
                        grad,
                        expAvgDesc,
                        expAvg,
                        expAvgSqDesc,
                        expAvgSq,
                        stateStepDesc,
                        stateStep,
                        -1,       // Ignore step value because stateStep Tensor is used
                        lr,
                        beta1,
                        beta2,
                        weight_decay,
                        eps,
                        true,     // correct_bias
                        gradScaleDesc,
                        gradScale,
                        foundInfDesc,
                        foundInf);

Parameters

handle	MIOpen handle (input)
paramDesc	Tensor descriptor for the input parameter tensor (input)
param	Input parameter tensor (input)
gradDesc	Tensor descriptor for the input gradient tensor (input)
grad	Input gradient tensor (input)
expAvgDesc	Tensor descriptor for the input exponential moving average tensor (input)
expAvg	Input exponential moving average tensor (input)
expAvgSqDesc	Tensor descriptor for the input exponential moving average squared tensor (input)
expAvgSq	Input exponential moving average squared tensor (input)
stateStepDesc	Tensor descriptor for the input state step tensor (input)
stateStep	Input state step tensor (input)
state_step	Input state step. used when the step tensor is null (input)
lr	Learning rate (input)
beta1	Coefficient used for computing the first moment running average of gradient (input)
beta2	Coefficient used for computing the second moment running average of gradient (input)
weight_decay	Weight decay (input)
eps	Term added to the denominator to improve numerical stability (input)
correct_bias	Whether or not to correct bias in Adam (for instance, in Bert TF repository they use False).
gradScaleDesc	Tensor descriptor for the input grad scale tensor (input, optional)
gradScale	Input grad scale tensor (input, optional)
foundInfDesc	Tensor descriptor for the input found inf tensor (input, optional)
foundInf	Tensor indicating the presence of inf or NaN in gradients. If true, skips operation and step update (input, optional)

Returns: miopenStatus_t

Examples: /home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-miopen/checkouts/develop/include/miopen/miopen.h.

◆ miopenTransformersAdamWWithOutput()

miopenStatus_t miopenTransformersAdamWWithOutput	(	miopenHandle_t	handle,
		const miopenTensorDescriptor_t	paramInDesc,
		void *	paramIn,
		const miopenTensorDescriptor_t	paramOutDesc,
		void *	paramOut,
		const miopenTensorDescriptor_t	paramOutFloat16Desc,
		void *	paramOutFloat16,
		const miopenTensorDescriptor_t	gradInDesc,
		const void *	gradIn,
		const miopenTensorDescriptor_t	expAvgInDesc,
		void *	expAvgIn,
		const miopenTensorDescriptor_t	expAvgOutDesc,
		void *	expAvgOut,
		const miopenTensorDescriptor_t	expAvgSqInDesc,
		void *	expAvgSqIn,
		const miopenTensorDescriptor_t	expAvgSqOutDesc,
		void *	expAvgSqOut,
		const miopenTensorDescriptor_t	stateStepInDesc,
		void *	stateStepIn,
		const miopenTensorDescriptor_t	stateStepOutDesc,
		void *	stateStepOut,
		const unsigned int	state_step,
		const float	lr,
		const float	beta1,
		const float	beta2,
		const float	weight_decay,
		const float	eps,
		const float	step_size,
		const bool	correct_bias,
		const miopenTensorDescriptor_t	gradScaleDesc,
		const void *	gradScale,
		const miopenTensorDescriptor_t	foundInfDesc,
		const void *	foundInf
	)

Execute single tensor Adam optimization and receive the result in a separate output tensor.

This function is equivalent to miopenTransformersAdam but receives the result in a separate output tensor.

See also: miopenTransformersAdamW; miopenFusedAdamWithOutput

// Execute Adam
miopenTransformersAdamWWithOutput(handle,
                                  paramInDesc,
                                  paramIn,
                                  paramOutDesc,
                                  paramOut,
                                  NULL,   // Unused paramOutFloat16 tensor because is not amp
                                  NULL,
                                  gradInDesc,
                                  gradIn,
                                  expAvgInDesc,
                                  expAvgIn,
                                  expAvgOutDesc,
                                  expAvgOut,
                                  expAvgInSqDesc,
                                  expAvgSqIn,
                                  expAvgSqOutDesc,
                                  expAvgSqOut,
                                  NULL,   // Unused stateStepIn tensor because use step int
                                  NULL,
                                  NULL,   // Unused stateStepOut tensor because use step int
                                  NULL,
                                  step,
                                  lr,
                                  beta1,
                                  beta2,
                                  weight_decay,
                                  eps,
                                  -1,     // step_size
                                  true,   // correct_bias
                                  NULL,   // Unused gradScale Tensor because not amp
                                  NULL,
                                  NULL,   // Unused foundInf Tensor because not amp
                                  NULL);
 
// Execute Amp Adam
miopenTransformersAdamWWithOutput(handle,
                                  paramInDesc,
                                  paramIn,
                                  paramOutDesc,
                                  paramOut,
                                  paramOutFloat16Desc,  // optional in amp
                                  paramOutFloat16,
                                  gradInDesc,
                                  gradIn,
                                  expAvgInDesc,
                                  expAvgIn,
                                  expAvgOutDesc,
                                  expAvgOut,
                                  expAvgInSqDesc,
                                  expAvgSqIn,
                                  expAvgSqIn,
                                  expAvgSqOutDesc,
                                  expAvgSqOut,
                                  stateStepInDesc,
                                  stateStepIn,
                                  stateStepOutDesc,
                                  stateStepOut
                                  -1,   // Ignore step value because stateStep Tensor is used
                                  lr,
                                  beta1,
                                  beta2,
                                  weight_decay,
                                  eps,
                                  -1,   // step_size
                                  true, // correct_bias
                                  NULL, // Unused gradScale Tensor because not amp
                                  NULL,
                                  NULL, // Unused foundInf Tensor because not amp
                                  NULL);

Parameters

handle	MIOpen handle (input)
paramInDesc	Tensor descriptor for the input parameter tensor (input)
paramIn	Input parameter tensor (input)
paramOutDesc	Tensor descriptor for the output parameter tensor (input)
paramOut	Output parameter tensor (output)
paramOutFloat16Desc	Tensor descriptor for the output parameter tensor float16 (input, optional)
paramOutFloat16	Output parameter tensor (output, optional)
gradInDesc	Tensor descriptor for the input gradient tensor (input)
gradIn	Input gradient tensor (input)
expAvgInDesc	Tensor descriptor for the input exponential moving average tensor (input)
expAvgIn	Input exponential moving average tensor (input)
expAvgOutDesc	Tensor descriptor for the output exponential moving average tensor (input)
expAvgOut	Output exponential moving average tensor (output)
expAvgSqInDesc	Tensor descriptor for the input exponential moving average squared tensor (input)
expAvgSqIn	Input exponential moving average squared tensor (input)
expAvgSqOutDesc	Tensor descriptor for the output exponential moving average squared tensor (input)
expAvgSqOut	Output exponential moving average squared tensor (output)
stateStepInDesc	Tensor descriptor for the input state step tensor (input, optional)
stateStepIn	Input state step tensor (input, optional)
stateStepOutDesc	Tensor descriptor for the output state step tensor (input, optional)
stateStepOut	Output state step tensor that stores the updated step value. (output, optional)
state_step	Input state step, It is used when the step tensor is null. (input)
lr	Learning rate (input)
beta1	Coefficient used for computing the first moment running average of gradient (input)
beta2	Coefficient used for computing the second moment running average of gradient (input)
weight_decay	Weight decay (input)
eps	Term added to the denominator to improve numerical stability (input)
step_size	Pre-calculated step_size, used for performance enhancement (input)
correct_bias	Whether or not to correct bias in Adam (for instance, in Bert TF repository they use False) (input)
gradScaleDesc	Tensor descriptor for the input grad scale tensor (input, optional)
gradScale	Input grad scale tensor (input, optional)
foundInfDesc	Tensor descriptor for the input found inf tensor (input, optional)
foundInf	Tensor indicating presence of inf or nan in gradients. If true, skips operation and step update. (input, optional)

Returns: miopenStatus_t

Examples: /home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-miopen/checkouts/develop/include/miopen/miopen.h.

SGD

SGD#

Functions

Detailed Description

Function Documentation

◆ miopenFusedAdam()

◆ miopenFusedAdamWithOutput()

◆ miopenTransformersAdamW()

◆ miopenTransformersAdamWWithOutput()