Traceback (most recent call last):
File "pretrain_gpt2.py", line 829, in
Traceback (most recent call last):
File "pretrain_gpt2.py", line 829, in
main()
File "pretrain_gpt2.py", line 742, in main
main()
File "pretrain_gpt2.py", line 742, in main
initialize_distributed(args)
File "pretrain_gpt2.py", line 675, in initialize_distributed
initialize_distributed(args)
File "pretrain_gpt2.py", line 675, in initialize_distributed
set_deepspeed_activation_checkpointing(args)
File "pretrain_gpt2.py", line 637, in set_deepspeed_activation_checkpointing
set_deepspeed_activation_checkpointing(args)
File "pretrain_gpt2.py", line 637, in set_deepspeed_activation_checkpointing
deepspeed.checkpointing.configure(mpu, deepspeed_config=args.deepspeed_config, num_checkpoints=args.num_layers)
File "/home/atr/.conda/envs/stt/lib/python3.7/site-packages/deepspeed/runtime/activation_checkpointing/checkpointing.py", line 881, in configure
deepspeed.checkpointing.configure(mpu, deepspeed_config=args.deepspeed_config, num_checkpoints=args.num_layers)
File "/home/atr/.conda/envs/stt/lib/python3.7/site-packages/deepspeed/runtime/activation_checkpointing/checkpointing.py", line 881, in configure
_configure_using_config_file(deepspeed_config, mpu=mpu)
File "/home/atr/.conda/envs/stt/lib/python3.7/site-packages/deepspeed/runtime/activation_checkpointing/checkpointing.py", line 799, in _configure_using_config_file
_configure_using_config_file(deepspeed_config, mpu=mpu)
File "/home/atr/.conda/envs/stt/lib/python3.7/site-packages/deepspeed/runtime/activation_checkpointing/checkpointing.py", line 799, in _configure_using_config_file
if dist.get_rank() == 0:
File "/home/atr/.conda/envs/stt/lib/python3.7/site-packages/deepspeed/comm/comm.py", line 352, in get_rank
assert cdb is not None and cdb.is_initialized(), 'DeepSpeed backend not set, please initialize it using init_process_group()'
AssertionError: DeepSpeed backend not set, please initialize it using init_process_group()
if dist.get_rank() == 0:
File "/home/atr/.conda/envs/stt/lib/python3.7/site-packages/deepspeed/comm/comm.py", line 352, in get_rank
assert cdb is not None and cdb.is_initialized(), 'DeepSpeed backend not set, please initialize it using init_process_group()'
AssertionError: DeepSpeed backend not set, please initialize it using init_process_group()