I have trained the model with the following code.
from datasets import load_dataset
from trl import SFTTrainer
from transformers import AutoModel, DataCollatorForLanguageModeling, AutoTokenizer, TrainingArguments
from peft import LoraConfig
# 加载模型和tokenizer
MODEL_PATH = "/home/qiji/chatglm2-6b"
MODEL_SAVE_PATH = "/home/qiji/qiji_project/jkd/SFT/results"
# model = AutoModelForCausalLM.from_pretrained(MODEL_PATH, trust_remote_code=True).half().cuda()
model = AutoModel.from_pretrained(MODEL_PATH, trust_remote_code=True)
tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH, trust_remote_code=True)
# 设置微调参数
training_arguments = TrainingArguments(
output_dir=MODEL_SAVE_PATH,
per_device_train_batch_size=4,
gradient_accumulation_steps=4,
save_steps=5000,
logging_steps=1000,
learning_rate=2e-4,
fp16=True,
max_grad_norm=0.3,
max_steps=5000,
warmup_ratio=0.03,
group_by_length=True,
lr_scheduler_type='constant',
)
model.config.use_cache = False
peft_config = LoraConfig(
r=64,
lora_alpha=16,
lora_dropout=0.1,
bias="none",
task_type="CAUSAL_LM",
)
data_collator = DataCollatorForLanguageModeling(
tokenizer=tokenizer,
mlm=False,
)
dataset = load_dataset("/home/qiji/qiji_project/jkd/SFT/SFT_dataset/", split="train")
trainer = SFTTrainer(
model=model,
train_dataset=dataset,
dataset_text_field="input",
max_seq_length=512,
peft_config=peft_config,
args=training_arguments,
data_collator=data_collator,
packing=False,
)
trainer.train()
trainer.save_model(MODEL_SAVE_PATH)
This is the wrong message.
And as you can see, the save path is MODEL_SAVE_PATH. But when I tried to load the model in the way that load the pre-trained model and the checkpoint, I got something wrong.
RuntimeError Traceback (most recent call last)
Cell In[6], line 6
4 CHECKPOINT_PATH = "/home/qiji/qiji_project/jkd/SFT/results/"
5 llm = GLM()
----> 6 llm.load_model(model_name_or_path=MODEL_PATH, checkpoint_path=CHECKPOINT_PATH)
7 # llm.load_model(model_name_or_path=MODEL_PATH)
File ~/qiji_project/QiJiModel/GLM_with_checkpoint.py:34, in GLM.load_model(self, llm_device, model_name_or_path, checkpoint_path)
32 if k.startswith("transformer.prefix_encoder."):
33 new_prefix_state_dict[k[len("transformer.prefix_encoder."):]] = v
---> 34 model.transformer.prefix_encoder.load_state_dict(new_prefix_state_dict)
36 # Comment out the following line if you don't use quantization
37 model = model.half()
File ~/anaconda3/envs/tmp/lib/python3.11/site-packages/torch/nn/modules/module.py:2041, in Module.load_state_dict(self, state_dict, strict)
2036 error_msgs.insert(
2037 0, 'Missing key(s) in state_dict: {}. '.format(
2038 ', '.join('"{}"'.format(k) for k in missing_keys)))
2040 if len(error_msgs) > 0:
-> 2041 raise RuntimeError('Error(s) in loading state_dict for {}:\n\t{}'.format(
2042 self.__class__.__name__, "\n\t".join(error_msgs)))
2043 return _IncompatibleKeys(missing_keys, unexpected_keys)
RuntimeError: Error(s) in loading state_dict for PrefixEncoder:
Missing key(s) in state_dict: "embedding.weight".
How can I get rid of this problem?
I tried many ways like modify the path. But I can't figuer it out.