You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Traceback (most recent call last):
File "train.py", line 111, in
main(**args)
File "train.py", line 85, in main
agent = load_model(args)
File "/remote-home/zhubin/NExT-GPT/code/model/init.py", line 14, in load_model
agent = globals()[agent_name](model, args)
File "/remote-home/zhubin/NExT-GPT/code/model/agent.py", line 24, in init
self.ds_engine, self.optimizer, _, _ = deepspeed.initialize(
File "/root/anaconda3/envs/nextgpt/lib/python3.8/site-packages/deepspeed/init.py", line 165, in initialize
engine = DeepSpeedEngine(args=args,
File "/root/anaconda3/envs/nextgpt/lib/python3.8/site-packages/deepspeed/runtime/engine.py", line 309, in init
self._configure_optimizer(optimizer, model_parameters)
File "/root/anaconda3/envs/nextgpt/lib/python3.8/site-packages/deepspeed/runtime/engine.py", line 1174, in _configure_optimizer
basic_optimizer = self._configure_basic_optimizer(model_parameters)
File "/root/anaconda3/envs/nextgpt/lib/python3.8/site-packages/deepspeed/runtime/engine.py", line 1230, in _configure_basic_optimizer
optimizer = DeepSpeedCPUAdam(model_parameters,
File "/root/anaconda3/envs/nextgpt/lib/python3.8/site-packages/deepspeed/ops/adam/cpu_adam.py", line 94, in init
self.ds_opt_adam = CPUAdamBuilder().load()
File "/root/anaconda3/envs/nextgpt/lib/python3.8/site-packages/deepspeed/ops/op_builder/builder.py", line 454, in load
return self.jit_load(verbose)
File "/root/anaconda3/envs/nextgpt/lib/python3.8/site-packages/deepspeed/ops/op_builder/builder.py", line 497, in jit_load
op_module = load(name=self.name,
File "/root/anaconda3/envs/nextgpt/lib/python3.8/site-packages/torch/utils/cpp_extension.py", line 1284, in load
return _jit_compile(
File "/root/anaconda3/envs/nextgpt/lib/python3.8/site-packages/torch/utils/cpp_extension.py", line 1508, in _jit_compile
_write_ninja_file_and_build_library(
File "/root/anaconda3/envs/nextgpt/lib/python3.8/site-packages/torch/utils/cpp_extension.py", line 1623, in _write_ninja_file_and_build_library
_run_ninja_build(
File "/root/anaconda3/envs/nextgpt/lib/python3.8/site-packages/torch/utils/cpp_extension.py", line 1916, in _run_ninja_build
raise RuntimeError(message) from e
RuntimeError: Error building extension 'cpu_adam'
Exception ignored in: <function DeepSpeedCPUAdam.del at 0x7fa770ee4280>
Traceback (most recent call last):
File "/root/anaconda3/envs/nextgpt/lib/python3.8/site-packages/deepspeed/ops/adam/cpu_adam.py", line 102, in del
AttributeError: 'DeepSpeedCPUAdam' object has no attribute 'ds_opt_adam'
[2023-11-07 02:48:42,654] [INFO] [launch.py:314:sigkill_handler] Killing subprocess 72319
[2023-11-07 02:48:42,654] [ERROR] [launch.py:320:sigkill_handler] ['/root/anaconda3/envs/nextgpt/bin/python', '-u', 'train.py', '--local_rank=0', '--model', 'nextgpt', '--stage', '1', '--save_path', '../ckpt/delta_ckpt/nextgpt/7b_tiva_v0', '--log_path', '../ckpt/delta_ckpt/nextgpt/7b_tiva_v0/log'] exits with return code = 1
The text was updated successfully, but these errors were encountered:
Traceback (most recent call last):
File "train.py", line 111, in
main(**args)
File "train.py", line 85, in main
agent = load_model(args)
File "/remote-home/zhubin/NExT-GPT/code/model/init.py", line 14, in load_model
agent = globals()[agent_name](model, args)
File "/remote-home/zhubin/NExT-GPT/code/model/agent.py", line 24, in init
self.ds_engine, self.optimizer, _, _ = deepspeed.initialize(
File "/root/anaconda3/envs/nextgpt/lib/python3.8/site-packages/deepspeed/init.py", line 165, in initialize
engine = DeepSpeedEngine(args=args,
File "/root/anaconda3/envs/nextgpt/lib/python3.8/site-packages/deepspeed/runtime/engine.py", line 309, in init
self._configure_optimizer(optimizer, model_parameters)
File "/root/anaconda3/envs/nextgpt/lib/python3.8/site-packages/deepspeed/runtime/engine.py", line 1174, in _configure_optimizer
basic_optimizer = self._configure_basic_optimizer(model_parameters)
File "/root/anaconda3/envs/nextgpt/lib/python3.8/site-packages/deepspeed/runtime/engine.py", line 1230, in _configure_basic_optimizer
optimizer = DeepSpeedCPUAdam(model_parameters,
File "/root/anaconda3/envs/nextgpt/lib/python3.8/site-packages/deepspeed/ops/adam/cpu_adam.py", line 94, in init
self.ds_opt_adam = CPUAdamBuilder().load()
File "/root/anaconda3/envs/nextgpt/lib/python3.8/site-packages/deepspeed/ops/op_builder/builder.py", line 454, in load
return self.jit_load(verbose)
File "/root/anaconda3/envs/nextgpt/lib/python3.8/site-packages/deepspeed/ops/op_builder/builder.py", line 497, in jit_load
op_module = load(name=self.name,
File "/root/anaconda3/envs/nextgpt/lib/python3.8/site-packages/torch/utils/cpp_extension.py", line 1284, in load
return _jit_compile(
File "/root/anaconda3/envs/nextgpt/lib/python3.8/site-packages/torch/utils/cpp_extension.py", line 1508, in _jit_compile
_write_ninja_file_and_build_library(
File "/root/anaconda3/envs/nextgpt/lib/python3.8/site-packages/torch/utils/cpp_extension.py", line 1623, in _write_ninja_file_and_build_library
_run_ninja_build(
File "/root/anaconda3/envs/nextgpt/lib/python3.8/site-packages/torch/utils/cpp_extension.py", line 1916, in _run_ninja_build
raise RuntimeError(message) from e
RuntimeError: Error building extension 'cpu_adam'
Exception ignored in: <function DeepSpeedCPUAdam.del at 0x7fa770ee4280>
Traceback (most recent call last):
File "/root/anaconda3/envs/nextgpt/lib/python3.8/site-packages/deepspeed/ops/adam/cpu_adam.py", line 102, in del
AttributeError: 'DeepSpeedCPUAdam' object has no attribute 'ds_opt_adam'
[2023-11-07 02:48:42,654] [INFO] [launch.py:314:sigkill_handler] Killing subprocess 72319
[2023-11-07 02:48:42,654] [ERROR] [launch.py:320:sigkill_handler] ['/root/anaconda3/envs/nextgpt/bin/python', '-u', 'train.py', '--local_rank=0', '--model', 'nextgpt', '--stage', '1', '--save_path', '../ckpt/delta_ckpt/nextgpt/7b_tiva_v0', '--log_path', '../ckpt/delta_ckpt/nextgpt/7b_tiva_v0/log'] exits with return code = 1
The text was updated successfully, but these errors were encountered: