You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
详细报错内容如下:
/home/huskar/.local/lib/python3.10/site-packages/transformers/optimization.py:391: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set no_deprecation_warning=True to disable this warning
warnings.warn(
0%| | 0/3000 [00:00<?, ?it/s]02/23/2024 11:00:45 - WARNING - transformers_modules.chatglm-6b-int4.modeling_chatglm - use_cache=True is incompatible with gradient checkpointing. Setting use_cache=False...
/home/huskar/.local/lib/python3.10/site-packages/torch/utils/checkpoint.py:460: UserWarning: torch.utils.checkpoint: please pass in use_reentrant=True or use_reentrant=False explicitly. The default value of use_reentrant will be updated to be False in the future. To maintain current behavior, pass use_reentrant=True. It is recommended that you use use_reentrant=False. Refer to docs for more details on the differences between the two variants.
warnings.warn(
Traceback (most recent call last):
File "/home/huskar/ChatGLM-6B/ptuning/main.py", line 430, in
main()
File "/home/huskar/ChatGLM-6B/ptuning/main.py", line 369, in main
train_result = trainer.train(resume_from_checkpoint=checkpoint)
File "/home/huskar/ChatGLM-6B/ptuning/trainer.py", line 1635, in train
return inner_training_loop(
File "/home/huskar/ChatGLM-6B/ptuning/trainer.py", line 1904, in _inner_training_loop
tr_loss_step = self.training_step(model, inputs)
File "/home/huskar/ChatGLM-6B/ptuning/trainer.py", line 2647, in training_step
loss = self.compute_loss(model, inputs)
File "/home/huskar/ChatGLM-6B/ptuning/trainer.py", line 2679, in compute_loss
outputs = model(**inputs)
File "/home/huskar/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/home/huskar/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(*args, **kwargs)
File "/home/huskar/.cache/huggingface/modules/transformers_modules/chatglm-6b-int4/modeling_chatglm.py", line 1190, in forward
transformer_outputs = self.transformer(
File "/home/huskar/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/home/huskar/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(*args, **kwargs)
File "/home/huskar/.cache/huggingface/modules/transformers_modules/chatglm-6b-int4/modeling_chatglm.py", line 985, in forward
layer_ret = torch.utils.checkpoint.checkpoint(
File "/home/huskar/.local/lib/python3.10/site-packages/torch/_compile.py", line 24, in inner
return torch._dynamo.disable(fn, recursive)(*args, **kwargs)
File "/home/huskar/.local/lib/python3.10/site-packages/torch/_dynamo/eval_frame.py", line 489, in _fn
return fn(*args, **kwargs)
File "/home/huskar/.local/lib/python3.10/site-packages/torch/_dynamo/external_utils.py", line 17, in inner
return fn(*args, **kwargs)
File "/home/huskar/.local/lib/python3.10/site-packages/torch/utils/checkpoint.py", line 482, in checkpoint
return CheckpointFunction.apply(function, preserve, *args)
File "/home/huskar/.local/lib/python3.10/site-packages/torch/autograd/function.py", line 553, in apply
return super().apply(*args, **kwargs) # type: ignore[misc]
File "/home/huskar/.local/lib/python3.10/site-packages/torch/utils/checkpoint.py", line 261, in forward
outputs = run_function(*args)
File "/home/huskar/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/home/huskar/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(*args, **kwargs)
File "/home/huskar/.cache/huggingface/modules/transformers_modules/chatglm-6b-int4/modeling_chatglm.py", line 624, in forward
attention_input = self.input_layernorm(hidden_states)
File "/home/huskar/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/home/huskar/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(*args, **kwargs)
File "/home/huskar/.local/lib/python3.10/site-packages/torch/nn/modules/normalization.py", line 201, in forward
return F.layer_norm(
File "/home/huskar/.local/lib/python3.10/site-packages/torch/nn/functional.py", line 2546, in layer_norm
return torch.layer_norm(input, normalized_shape, weight, bias, eps, torch.backends.cudnn.enabled)
RuntimeError: "LayerNormKernelImpl" not implemented for 'Half'
0%| | 0/3000 [00:00<?, ?it/s]
Expected Behavior
No response
Steps To Reproduce
修改 train.sh line 4 为 CUDA_VISIBLE_DEVICES=1 python3 main.py
Is there an existing issue for this?
Current Behavior
在使用P-tuning微调时报错 LayerNormKernelImpl,系统为WSL-Ubuntu 22.04.3,硬件为nvidia 4060,8G显存,运行 chatglm-6b-int4量化版本
详细报错内容如下:
/home/huskar/.local/lib/python3.10/site-packages/transformers/optimization.py:391: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set
no_deprecation_warning=True
to disable this warningwarnings.warn(
0%| | 0/3000 [00:00<?, ?it/s]02/23/2024 11:00:45 - WARNING - transformers_modules.chatglm-6b-int4.modeling_chatglm -
use_cache=True
is incompatible with gradient checkpointing. Settinguse_cache=False
.../home/huskar/.local/lib/python3.10/site-packages/torch/utils/checkpoint.py:460: UserWarning: torch.utils.checkpoint: please pass in use_reentrant=True or use_reentrant=False explicitly. The default value of use_reentrant will be updated to be False in the future. To maintain current behavior, pass use_reentrant=True. It is recommended that you use use_reentrant=False. Refer to docs for more details on the differences between the two variants.
warnings.warn(
Traceback (most recent call last):
File "/home/huskar/ChatGLM-6B/ptuning/main.py", line 430, in
main()
File "/home/huskar/ChatGLM-6B/ptuning/main.py", line 369, in main
train_result = trainer.train(resume_from_checkpoint=checkpoint)
File "/home/huskar/ChatGLM-6B/ptuning/trainer.py", line 1635, in train
return inner_training_loop(
File "/home/huskar/ChatGLM-6B/ptuning/trainer.py", line 1904, in _inner_training_loop
tr_loss_step = self.training_step(model, inputs)
File "/home/huskar/ChatGLM-6B/ptuning/trainer.py", line 2647, in training_step
loss = self.compute_loss(model, inputs)
File "/home/huskar/ChatGLM-6B/ptuning/trainer.py", line 2679, in compute_loss
outputs = model(**inputs)
File "/home/huskar/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/home/huskar/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(*args, **kwargs)
File "/home/huskar/.cache/huggingface/modules/transformers_modules/chatglm-6b-int4/modeling_chatglm.py", line 1190, in forward
transformer_outputs = self.transformer(
File "/home/huskar/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/home/huskar/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(*args, **kwargs)
File "/home/huskar/.cache/huggingface/modules/transformers_modules/chatglm-6b-int4/modeling_chatglm.py", line 985, in forward
layer_ret = torch.utils.checkpoint.checkpoint(
File "/home/huskar/.local/lib/python3.10/site-packages/torch/_compile.py", line 24, in inner
return torch._dynamo.disable(fn, recursive)(*args, **kwargs)
File "/home/huskar/.local/lib/python3.10/site-packages/torch/_dynamo/eval_frame.py", line 489, in _fn
return fn(*args, **kwargs)
File "/home/huskar/.local/lib/python3.10/site-packages/torch/_dynamo/external_utils.py", line 17, in inner
return fn(*args, **kwargs)
File "/home/huskar/.local/lib/python3.10/site-packages/torch/utils/checkpoint.py", line 482, in checkpoint
return CheckpointFunction.apply(function, preserve, *args)
File "/home/huskar/.local/lib/python3.10/site-packages/torch/autograd/function.py", line 553, in apply
return super().apply(*args, **kwargs) # type: ignore[misc]
File "/home/huskar/.local/lib/python3.10/site-packages/torch/utils/checkpoint.py", line 261, in forward
outputs = run_function(*args)
File "/home/huskar/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/home/huskar/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(*args, **kwargs)
File "/home/huskar/.cache/huggingface/modules/transformers_modules/chatglm-6b-int4/modeling_chatglm.py", line 624, in forward
attention_input = self.input_layernorm(hidden_states)
File "/home/huskar/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/home/huskar/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(*args, **kwargs)
File "/home/huskar/.local/lib/python3.10/site-packages/torch/nn/modules/normalization.py", line 201, in forward
return F.layer_norm(
File "/home/huskar/.local/lib/python3.10/site-packages/torch/nn/functional.py", line 2546, in layer_norm
return torch.layer_norm(input, normalized_shape, weight, bias, eps, torch.backends.cudnn.enabled)
RuntimeError: "LayerNormKernelImpl" not implemented for 'Half'
0%| | 0/3000 [00:00<?, ?it/s]
Expected Behavior
No response
Steps To Reproduce
Environment
Anything else?
No response
The text was updated successfully, but these errors were encountered: