报错。同样的代码(run_clm_pt_with_peft.py)和配置参数,在windows能正常在chinese-llama-2-1.3b-hf继续预训练,搬到linux就报错 #579
Unanswered
yinggaohai
asked this question in
Q&A
Replies: 1 comment
-
|
把fp16改成false也继续报同样的错误 |
Beta Was this translation helpful? Give feedback.
0 replies
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Uh oh!
There was an error while loading. Please reload this page.
-
Traceback (most recent call last):
File "/home/yinggaohai/Mycodes/LLM/Chinese-LLaMA-Alpaca/Chinese-LLaMA-Alpaca-2/scripts/training/run_clm_pt_with_peft.py", line 726, in
main()
File "/home/yinggaohai/Mycodes/LLM/Chinese-LLaMA-Alpaca/Chinese-LLaMA-Alpaca-2/scripts/training/run_clm_pt_with_peft.py", line 694, in main
train_result = trainer.train(resume_from_checkpoint=checkpoint)
File "/home/yinggaohai/anaconda3/envs/Chinese-LLaMA-Alpaca/lib/python3.10/site-packages/transformers/trainer.py", line 1555, in train
return inner_training_loop(
File "/home/yinggaohai/anaconda3/envs/Chinese-LLaMA-Alpaca/lib/python3.10/site-packages/transformers/trainer.py", line 1860, in _inner_training_loop
tr_loss_step = self.training_step(model, inputs)
File "/home/yinggaohai/anaconda3/envs/Chinese-LLaMA-Alpaca/lib/python3.10/site-packages/transformers/trainer.py", line 2725, in training_step
loss = self.compute_loss(model, inputs)
File "/home/yinggaohai/anaconda3/envs/Chinese-LLaMA-Alpaca/lib/python3.10/site-packages/transformers/trainer.py", line 2748, in compute_loss
outputs = model(**inputs)
File "/home/yinggaohai/anaconda3/envs/Chinese-LLaMA-Alpaca/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
File "/home/yinggaohai/anaconda3/envs/Chinese-LLaMA-Alpaca/lib/python3.10/site-packages/torch/nn/parallel/data_parallel.py", line 171, in forward
outputs = self.parallel_apply(replicas, inputs, kwargs)
File "/home/yinggaohai/anaconda3/envs/Chinese-LLaMA-Alpaca/lib/python3.10/site-packages/torch/nn/parallel/data_parallel.py", line 181, in parallel_apply
return parallel_apply(replicas, inputs, kwargs, self.device_ids[:len(replicas)])
File "/home/yinggaohai/anaconda3/envs/Chinese-LLaMA-Alpaca/lib/python3.10/site-packages/torch/nn/parallel/parallel_apply.py", line 89, in parallel_apply
output.reraise()
File "/home/yinggaohai/anaconda3/envs/Chinese-LLaMA-Alpaca/lib/python3.10/site-packages/torch/_utils.py", line 644, in reraise
raise exception
RuntimeError: Caught RuntimeError in replica 0 on device 0.
Original Traceback (most recent call last):
File "/home/yinggaohai/anaconda3/envs/Chinese-LLaMA-Alpaca/lib/python3.10/site-packages/torch/nn/parallel/parallel_apply.py", line 64, in _worker
output = module(*input, **kwargs)
File "/home/yinggaohai/anaconda3/envs/Chinese-LLaMA-Alpaca/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
File "/home/yinggaohai/Mycodes/LLM/Chinese-LLaMA-Alpaca/Chinese-LLaMA-Alpaca-2/scripts/training/peft/peft_model.py", line 529, in forward
return self.base_model(
File "/home/yinggaohai/anaconda3/envs/Chinese-LLaMA-Alpaca/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
File "/home/yinggaohai/anaconda3/envs/Chinese-LLaMA-Alpaca/lib/python3.10/site-packages/transformers/models/llama/modeling_llama.py", line 1034, in forward
outputs = self.model(
File "/home/yinggaohai/anaconda3/envs/Chinese-LLaMA-Alpaca/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
File "/home/yinggaohai/anaconda3/envs/Chinese-LLaMA-Alpaca/lib/python3.10/site-packages/transformers/models/llama/modeling_llama.py", line 912, in forward
layer_outputs = self._gradient_checkpointing_func(
File "/home/yinggaohai/anaconda3/envs/Chinese-LLaMA-Alpaca/lib/python3.10/site-packages/torch/utils/checkpoint.py", line 249, in checkpoint
return CheckpointFunction.apply(function, preserve, *args)
File "/home/yinggaohai/anaconda3/envs/Chinese-LLaMA-Alpaca/lib/python3.10/site-packages/torch/autograd/function.py", line 506, in apply
return super().apply(*args, **kwargs) # type: ignore[misc]
File "/home/yinggaohai/anaconda3/envs/Chinese-LLaMA-Alpaca/lib/python3.10/site-packages/torch/utils/checkpoint.py", line 107, in forward
outputs = run_function(*args)
File "/home/yinggaohai/anaconda3/envs/Chinese-LLaMA-Alpaca/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
File "/home/yinggaohai/anaconda3/envs/Chinese-LLaMA-Alpaca/lib/python3.10/site-packages/transformers/models/llama/modeling_llama.py", line 672, in forward
hidden_states, self_attn_weights, present_key_value = self.self_attn(
File "/home/yinggaohai/anaconda3/envs/Chinese-LLaMA-Alpaca/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
File "/home/yinggaohai/anaconda3/envs/Chinese-LLaMA-Alpaca/lib/python3.10/site-packages/transformers/models/llama/modeling_llama.py", line 366, in forward
query_states = self.q_proj(hidden_states)
File "/home/yinggaohai/anaconda3/envs/Chinese-LLaMA-Alpaca/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
File "/home/yinggaohai/Mycodes/LLM/Chinese-LLaMA-Alpaca/Chinese-LLaMA-Alpaca-2/scripts/training/peft/tuners/lora.py", line 375, in forward
result += self.lora_B(self.lora_A(self.lora_dropout(x))) * self.scaling
File "/home/yinggaohai/anaconda3/envs/Chinese-LLaMA-Alpaca/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
File "/home/yinggaohai/anaconda3/envs/Chinese-LLaMA-Alpaca/lib/python3.10/site-packages/torch/nn/modules/linear.py", line 114, in forward
return F.linear(input, self.weight, self.bias)
RuntimeError: expected scalar type Half but found Float
Beta Was this translation helpful? Give feedback.
All reactions