diff --git a/accelerator/real_accelerator.py b/accelerator/real_accelerator.py index ced9218d7aca..eb4e17850882 100644 --- a/accelerator/real_accelerator.py +++ b/accelerator/real_accelerator.py @@ -178,7 +178,7 @@ def get_accelerator(): if accelerator_name is None: # borrow this log from PR#5084 if accel_logger is not None: - accel_logger.warn( + accel_logger.warning( "Setting accelerator to CPU. If you have GPU or other accelerator, we were unable to detect it.") # cpu added as catch-all when accelerator detection fails accelerator_name = "cpu" diff --git a/deepspeed/runtime/base_optimizer.py b/deepspeed/runtime/base_optimizer.py index b8df7499450d..d2c54155da89 100644 --- a/deepspeed/runtime/base_optimizer.py +++ b/deepspeed/runtime/base_optimizer.py @@ -28,7 +28,7 @@ def load_hp_checkpoint_state_from_checkpoint_dir(self, lp_groups_name: str, chec tp_rank = bwc_tensor_model_parallel_rank(mpu=self.mpu) if self.mpu is None: - logger.warn("MPU is not provided, setting tp size to 1 in checkpoint loading.") + logger.warning("MPU is not provided, setting tp size to 1 in checkpoint loading.") tp_world_size = 1 else: tp_world_size = self.mpu.get_slice_parallel_world_size() if hasattr(self.mpu, "get_slice_parallel_world_size") \ diff --git a/deepspeed/runtime/engine.py b/deepspeed/runtime/engine.py index 5f023d87f375..9b9a2e509d61 100755 --- a/deepspeed/runtime/engine.py +++ b/deepspeed/runtime/engine.py @@ -3120,7 +3120,7 @@ def _get_all_zero_checkpoints(self, load_dir, tag): if bf16_mode is not self.bfloat16_enabled(): checkpoint_bit16 = BFLOAT16 if bf16_mode else FP16 engine_bit16 = BFLOAT16 if self.bfloat16_enabled() else FP16 - logger.warn(f'Loading {checkpoint_bit16} zero checkpoints into {engine_bit16} training engine') + logger.warning(f'Loading {checkpoint_bit16} zero checkpoints into {engine_bit16} training engine') return self._get_all_zero_checkpoint_state_dicts(zero_ckpt_names) return None @@ -3276,7 +3276,7 @@ def _save_moe_checkpoint(self, save_dir, tag, client_state={}, exclude_frozen_pa local_expert_id = None if not m: - logger.warn(f'No expert found in key {key}.') + logger.warning(f'No expert found in key {key}.') else: local_expert_id = m.group(1) diff --git a/deepspeed/runtime/lr_schedules.py b/deepspeed/runtime/lr_schedules.py index 899358e2c5ef..2ffd0bf9f036 100755 --- a/deepspeed/runtime/lr_schedules.py +++ b/deepspeed/runtime/lr_schedules.py @@ -508,7 +508,7 @@ def _initialize_lr(self, optimizer, cycle_min_lr, cycle_max_lr, decay_lr_rate, l def _initialize_momentum(self, optimizer, cycle_min_mom, cycle_max_mom, decay_mom_rate, last_batch_iteration): if 'betas' not in optimizer.defaults: optimizer_name = type(optimizer).__name__ - logger.warn( + logger.warning( f"cycle_momentum is disabled because optimizer {optimizer_name} does not support momentum, no betas attribute in defaults" ) self.cycle_momentum = False diff --git a/deepspeed/runtime/zero/stage_1_and_2.py b/deepspeed/runtime/zero/stage_1_and_2.py index ecb2a527f870..0508766f8896 100755 --- a/deepspeed/runtime/zero/stage_1_and_2.py +++ b/deepspeed/runtime/zero/stage_1_and_2.py @@ -614,7 +614,7 @@ def _configure_moe_settings(self): assert self.contiguous_gradients, "Contiguous Gradients in ZeRO Stage 2 must be set to True for MoE. Other code paths are not tested with MoE" # NOTE: To run ZeRO stage 1 with MoE, we need to set self.contiguous_gradients to True or ignore the assertion if not self.partition_gradients and not self.contiguous_gradients: - logger.warn( + logger.warning( "ZeRO Stage 1 has not been thoroughly tested with MoE. This configuration is still experimental.") assert self.reduce_scatter, "Reduce Scatter in ZeRO Stage 2 must be set to True for MoE. Other code paths are not tested with MoE"