SGD

SGD#

MIOpen: SGD

Functions
miopenStatus_t	miopenFusedAdam (miopenHandle_t handle, const miopenTensorDescriptor_t paramDesc, void param, const miopenTensorDescriptor_t gradDesc, const void grad, const miopenTensorDescriptor_t expAvgDesc, void expAvg, const miopenTensorDescriptor_t expAvgSqDesc, void expAvgSq, const miopenTensorDescriptor_t maxExpAvgSqDesc, void maxExpAvgSq, const miopenTensorDescriptor_t stateStepDesc, void stateStep, const unsigned int state_step, const float lr, const float beta1, const float beta2, const float weight_decay, const float eps, const bool amsgrad, const bool maximize, const bool adamw, const miopenTensorDescriptor_t gradScaleDesc, const void gradScale, const miopenTensorDescriptor_t foundInfDesc, const void foundInf)
	Perform Fused Adam optimization for a single tensor (Adaptive Moment Estimation). More...

miopenStatus_t	miopenFusedAdamWithOutput (miopenHandle_t handle, const miopenTensorDescriptor_t paramInDesc, void paramIn, const miopenTensorDescriptor_t paramOutDesc, void paramOut, const miopenTensorDescriptor_t paramOutFloat16Desc, void paramOutFloat16, const miopenTensorDescriptor_t gradInDesc, const void gradIn, const miopenTensorDescriptor_t expAvgInDesc, void expAvgIn, const miopenTensorDescriptor_t expAvgOutDesc, void expAvgOut, const miopenTensorDescriptor_t expAvgSqInDesc, void expAvgSqIn, const miopenTensorDescriptor_t expAvgSqOutDesc, void expAvgSqOut, const miopenTensorDescriptor_t maxExpAvgSqInDesc, void maxExpAvgSqIn, const miopenTensorDescriptor_t maxExpAvgSqOutDesc, void maxExpAvgSqOut, const miopenTensorDescriptor_t stateStepInDesc, void stateStepIn, const miopenTensorDescriptor_t stateStepOutDesc, void stateStepOut, const unsigned int state_step, const float lr, const float beta1, const float beta2, const float weight_decay, const float eps, const bool amsgrad, const bool maximize, const bool adamw, const miopenTensorDescriptor_t gradScaleDesc, const void gradScale, const miopenTensorDescriptor_t foundInfDesc, const void foundInf)
	Execute single tensor Adam optimization and receive the result in a separate output tensor. More...

Detailed Description

Function Documentation

◆ miopenFusedAdam()

miopenStatus_t miopenFusedAdam	(	miopenHandle_t	handle,
		const miopenTensorDescriptor_t	paramDesc,
		void *	param,
		const miopenTensorDescriptor_t	gradDesc,
		const void *	grad,
		const miopenTensorDescriptor_t	expAvgDesc,
		void *	expAvg,
		const miopenTensorDescriptor_t	expAvgSqDesc,
		void *	expAvgSq,
		const miopenTensorDescriptor_t	maxExpAvgSqDesc,
		void *	maxExpAvgSq,
		const miopenTensorDescriptor_t	stateStepDesc,
		void *	stateStep,
		const unsigned int	state_step,
		const float	lr,
		const float	beta1,
		const float	beta2,
		const float	weight_decay,
		const float	eps,
		const bool	amsgrad,
		const bool	maximize,
		const bool	adamw,
		const miopenTensorDescriptor_t	gradScaleDesc,
		const void *	gradScale,
		const miopenTensorDescriptor_t	foundInfDesc,
		const void *	foundInf
	)

Perform Fused Adam optimization for a single tensor (Adaptive Moment Estimation).

This function implements the Fused Adam optimization algorithm. Adam, short for Adaptive Moment Estimation, extends the RMSProp optimizer. It combines the advantages of AdaGrad and RMSProp by adaptively adjusting learning rates for each parameter using the first and second moments of gradients. Fused Adam optimization efficiently combines multiple operations into a single kernel, reducing memory access overhead and improving performance.

// Execute Adam
miopenFusedAdam(handle,
                paramDesc,
                param,
                gradDesc,
                grad,
                expAvgDesc,
                expAvg,
                expAvgSqDesc,
                expAvgSq,
                NULL,     // Unused maxExpAvgSqDesc because amsgrad is false
                NULL,
                NULL,     // Unused stateStep Tensor because use step integer argument
                NULL,
                step,
                lr,
                beta1,
                beta2,
                weight_decay,
                eps,
                false,    // amsgrad
                false,    // maximize
                false,    // adamw
                NULL,     // Unused gradScale Tensor because not amp
                NULL,
                NULL,     // Unused foundInf Tensor because not amp
                NULL);
 
// Execute AMP Adam
miopenFusedAdam(handle,
                paramDesc,
                param,
                gradDesc,
                grad,
                expAvgDesc,
                expAvg,
                expAvgSqDesc,
                expAvgSq,
                NULL,     // Unused maxExpAvgSqDesc because amsgrad is false
                NULL,
                stateStepDesc,
                stateStep,
                -1,       // Ignore step value because stateStep Tensor is used
                lr,
                beta1,
                beta2,
                weight_decay,
                eps,
                false,    // amsgrad
                false,    // maximize
                false,    // adamw
                gradScaleDesc,
                gradScale,
                foundInfDesc,
                foundInf);

Parameters

handle	MIOpen handle (input)
paramDesc	Tensor descriptor for the input parameter tensor (input)
param	Input parameter tensor (input)
gradDesc	Tensor descriptor for the input gradient tensor (input)
grad	Input gradient tensor (input)
expAvgDesc	Tensor descriptor for the input exponential moving average tensor (input)
expAvg	Input exponential moving average tensor (input)
expAvgSqDesc	Tensor descriptor for the input exponential moving average squared tensor (input)
expAvgSq	Input exponential moving average squared tensor (input)
maxExpAvgSqDesc	Tensor descriptor for the input maximum exponential moving average squared tensor. Used when amsgrad is true (input, optional)
maxExpAvgSq	Input maximum exponential moving average squared tensor. Used when amsgrad is true (input, optional)
stateStepDesc	Tensor descriptor for the input state step tensor (input)
stateStep	Input state step tensor (input)
state_step	Input state step. used when the step tensor is null (input)
lr	Learning rate (input)
beta1	Coefficient used for computing the first moment running average of gradient (input)
beta2	Coefficient used for computing the second moment running average of gradient (input)
weight_decay	Weight decay (input)
eps	Term added to the denominator to improve numerical stability (input)
amsgrad	Flag indicating whether to use the AMSGrad variant of Adam (input)
maximize	Flag indicating whether to maximize the objective with respect to the parameters (input)
adamw	If true, the operation becomes AdamW (input) (not supported)
gradScaleDesc	Tensor descriptor for the input grad scale tensor (input, optional)
gradScale	Input grad scale tensor (input, optional)
foundInfDesc	Tensor descriptor for the input found inf tensor (input, optional)
foundInf	Tensor indicating the presence of inf or NaN in gradients. If true, skips operation and step update (input, optional)

Returns: miopenStatus_t

Examples: /home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-miopen/checkouts/docs-6.2.1/include/miopen/miopen.h.

◆ miopenFusedAdamWithOutput()

miopenStatus_t miopenFusedAdamWithOutput	(	miopenHandle_t	handle,
		const miopenTensorDescriptor_t	paramInDesc,
		void *	paramIn,
		const miopenTensorDescriptor_t	paramOutDesc,
		void *	paramOut,
		const miopenTensorDescriptor_t	paramOutFloat16Desc,
		void *	paramOutFloat16,
		const miopenTensorDescriptor_t	gradInDesc,
		const void *	gradIn,
		const miopenTensorDescriptor_t	expAvgInDesc,
		void *	expAvgIn,
		const miopenTensorDescriptor_t	expAvgOutDesc,
		void *	expAvgOut,
		const miopenTensorDescriptor_t	expAvgSqInDesc,
		void *	expAvgSqIn,
		const miopenTensorDescriptor_t	expAvgSqOutDesc,
		void *	expAvgSqOut,
		const miopenTensorDescriptor_t	maxExpAvgSqInDesc,
		void *	maxExpAvgSqIn,
		const miopenTensorDescriptor_t	maxExpAvgSqOutDesc,
		void *	maxExpAvgSqOut,
		const miopenTensorDescriptor_t	stateStepInDesc,
		void *	stateStepIn,
		const miopenTensorDescriptor_t	stateStepOutDesc,
		void *	stateStepOut,
		const unsigned int	state_step,
		const float	lr,
		const float	beta1,
		const float	beta2,
		const float	weight_decay,
		const float	eps,
		const bool	amsgrad,
		const bool	maximize,
		const bool	adamw,
		const miopenTensorDescriptor_t	gradScaleDesc,
		const void *	gradScale,
		const miopenTensorDescriptor_t	foundInfDesc,
		const void *	foundInf
	)

Execute single tensor Adam optimization and receive the result in a separate output tensor.

This function is equivalent to miopenFusedAdam but receives the result in a separate output tensor.

See also: miopenFusedAdam

// Execute Adam
miopenFusedAdamWithOutput(handle,
                          paramInDesc,
                          paramIn,
                          paramOutDesc,
                          paramOut,
                          NULL,   // Unused paramOutFloat16 tensor because is not amp
                          NULL,
                          gradInDesc,
                          gradIn,
                          expAvgInDesc,
                          expAvgIn,
                          expAvgOutDesc,
                          expAvgOut,
                          expAvgInSqDesc,
                          expAvgSqIn,
                          expAvgSqOutDesc,
                          expAvgSqOut,
                          NULL,   // Unused maxExpAvgSqIn tensor because amsgrad is false
                          NULL,
                          NULL,   // Unused maxExpAvgSqOut tensor because amsgrad is false
                          NULL,
                          NULL,   // Unused stateStepIn tensor because use step integer argument
                          NULL,
                          NULL,   // Unused stateStepOut tensor because use step integer argument
                          NULL,
                          step,
                          lr,
                          beta1,
                          beta2,
                          weight_decay,
                          eps,
                          false,  // amsgrad
                          false,  // maximize
                          false,  // adamw
                          NULL,   // Unused gradScale Tensor because not amp
                          NULL,
                          NULL,   // Unused foundInf Tensor because not amp
                          NULL);
 
// Execute Amp Adam
miopenFusedAdamWithOutput(handle,
                          paramInDesc,
                          paramIn,
                          paramOutDesc,
                          paramOut,
                          paramOutFloat16Desc,  // paramOutFloat16 tensor is optional in amp
                          paramOutFloat16,
                          gradInDesc,
                          gradIn,
                          expAvgInDesc,
                          expAvgIn,
                          expAvgOutDesc,
                          expAvgOut,
                          expAvgInSqDesc,
                          expAvgSqIn,
                          expAvgSqIn,
                          expAvgSqOutDesc,
                          expAvgSqOut,
                          NULL,         // Unused maxExpAvgSqIn tensor because amsgrad is false
                          NULL,
                          NULL,         // Unused maxExpAvgSqOut tensor because amsgrad is false
                          NULL,
                          stateStepInDesc,
                          stateStepIn,
                          stateStepOutDesc,
                          stateStepOut
                          -1,           // Ignore step value because stateStep Tensor is used
                          lr, beta1, beta2, weight_decay, eps,
                          false,        // amsgrad
                          false,        // maximize
                          false,        // adamw
                          gradScaleDesc,
                          gradScale,
                          foundInfDesc,
                          foundInf);