From e88cad9d31cc8fe7ba266a526b7577d844cd1eee Mon Sep 17 00:00:00 2001 From: Olatunji Ruwase Date: Mon, 30 Sep 2024 14:45:37 -0400 Subject: [PATCH] Format fix --- docs/code-docs/source/zero3.rst | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/code-docs/source/zero3.rst b/docs/code-docs/source/zero3.rst index 951a9316437b..77caefb1c0fd 100644 --- a/docs/code-docs/source/zero3.rst +++ b/docs/code-docs/source/zero3.rst @@ -415,7 +415,7 @@ The routines for modifying parameters and optimizer states can be used at any po .. code-block:: python [...] - from deepspeed.runtime.zero.utils import is_zero_param + from deepspeed.runtime.zero.utils import is_zero_param from deepspeed.utils import safe_set_full_fp32_param, safe_set_full_optimizer_state from deepspeed.utils import safe_set_local_fp32_param, safe_set_local_optimizer_state # Here is an example to zero all the fp32 parameters and optimizer states. @@ -443,16 +443,16 @@ The routines for modifying gradients can be used after ``backward`` but before ` backward(loss) [...] - from deepspeed.runtime.zero.utils import is_zero_param + from deepspeed.runtime.zero.utils import is_zero_param from deepspeed.utils import safe_set_full_grad, safe_set_local_grad - # Here is an example of how to zero all the gradients. + # Here is an example of how to zero all the gradients. for n, lp in model.named_parameters(): # 1. For zero stage 1, 2, or 3 set the full gradient. zero_tensor = torch.zeros(lp.ds_shape) if is_zero_param(lp) else torch.zeros(lp.shape) safe_set_full_grad(lp, zero_tensor) - # 2. For zero stage 3, each process sets its local gradient partition. + # 2. For zero stage 3, each process sets its local gradient partition. zero_tensor_local = torch.zeros_like(lp.ds_tensor.shape) safe_set_local_grad(lp, zero_tensor_local)