Mcore dist opt ckpt fix

Signed-off-by: Alexandros Koumparoulis <[email protected]>
NVIDIA · May 15, 2024 · fcb657b · fcb657b
1 parent 6cb618a
commit fcb657b
Showing 1 changed file with 2 additions and 2 deletions.
diff --git a/nemo/collections/nlp/parts/nlp_overrides.py b/nemo/collections/nlp/parts/nlp_overrides.py
@@ -78,7 +78,7 @@
  from apex.transformer.pipeline_parallel.utils import get_num_microbatches
 
  from nemo.core.optim.distributed_adam import MegatronDistributedFusedAdam
-
+ from nemo.core.optim.mcore_optim import McoreDistributedOptimizer
  HAVE_APEX = True
 
 except (ImportError, ModuleNotFoundError):
@@ -294,7 +294,7 @@ def optimizer_sharded_state_dict(self, unsharded_optim_state=None):
  key: value for key, value in model_sharded_state_dict.items() if not key.endswith('_extra_state')
  }
 
- if isinstance(optimizer, MegatronDistributedFusedAdam):
+ if isinstance(optimizer, MegatronDistributedFusedAdam) or isinstance(optimizer, McoreDistributedOptimizer):
  return optimizer.sharded_state_dict(model_sharded_state_dict, unsharded_optim_state)
  elif not isinstance(optimizer, MainParamsOptimizerWrapper):
  # Regular optimizer, e.g. Adam or FusedAdam