0

I have trained the model with the following code.

from datasets import load_dataset
from trl import SFTTrainer
from transformers import AutoModel, DataCollatorForLanguageModeling, AutoTokenizer, TrainingArguments
from peft import LoraConfig

# 加载模型和tokenizer
MODEL_PATH = "/home/qiji/chatglm2-6b"
MODEL_SAVE_PATH = "/home/qiji/qiji_project/jkd/SFT/results"
# model = AutoModelForCausalLM.from_pretrained(MODEL_PATH, trust_remote_code=True).half().cuda()
model = AutoModel.from_pretrained(MODEL_PATH, trust_remote_code=True)
tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH, trust_remote_code=True)
# 设置微调参数
training_arguments = TrainingArguments(
    output_dir=MODEL_SAVE_PATH,
    per_device_train_batch_size=4,
    gradient_accumulation_steps=4,
    save_steps=5000,
    logging_steps=1000,
    learning_rate=2e-4,
    fp16=True,
    max_grad_norm=0.3,
    max_steps=5000,
    warmup_ratio=0.03,
    group_by_length=True,
    lr_scheduler_type='constant',
)
model.config.use_cache = False

peft_config = LoraConfig(
    r=64,
    lora_alpha=16,
    lora_dropout=0.1,
    bias="none",
    task_type="CAUSAL_LM",
)

data_collator = DataCollatorForLanguageModeling(
    tokenizer=tokenizer,
    mlm=False,
)

dataset = load_dataset("/home/qiji/qiji_project/jkd/SFT/SFT_dataset/", split="train")
trainer = SFTTrainer(
    model=model,
    train_dataset=dataset,
    dataset_text_field="input",
    max_seq_length=512,
    peft_config=peft_config,
    args=training_arguments,
    data_collator=data_collator,
    packing=False,
)

trainer.train()

trainer.save_model(MODEL_SAVE_PATH)

This is the wrong message.

And as you can see, the save path is MODEL_SAVE_PATH. But when I tried to load the model in the way that load the pre-trained model and the checkpoint, I got something wrong.

RuntimeError                              Traceback (most recent call last)
Cell In[6], line 6
      4 CHECKPOINT_PATH = "/home/qiji/qiji_project/jkd/SFT/results/"
      5 llm = GLM()
----> 6 llm.load_model(model_name_or_path=MODEL_PATH, checkpoint_path=CHECKPOINT_PATH)
      7 # llm.load_model(model_name_or_path=MODEL_PATH)

File ~/qiji_project/QiJiModel/GLM_with_checkpoint.py:34, in GLM.load_model(self, llm_device, model_name_or_path, checkpoint_path)
     32     if k.startswith("transformer.prefix_encoder."):
     33         new_prefix_state_dict[k[len("transformer.prefix_encoder."):]] = v
---> 34 model.transformer.prefix_encoder.load_state_dict(new_prefix_state_dict)
     36 # Comment out the following line if you don't use quantization
     37 model = model.half()

File ~/anaconda3/envs/tmp/lib/python3.11/site-packages/torch/nn/modules/module.py:2041, in Module.load_state_dict(self, state_dict, strict)
   2036         error_msgs.insert(
   2037             0, 'Missing key(s) in state_dict: {}. '.format(
   2038                 ', '.join('"{}"'.format(k) for k in missing_keys)))
   2040 if len(error_msgs) > 0:
-> 2041     raise RuntimeError('Error(s) in loading state_dict for {}:\n\t{}'.format(
   2042                        self.__class__.__name__, "\n\t".join(error_msgs)))
   2043 return _IncompatibleKeys(missing_keys, unexpected_keys)

RuntimeError: Error(s) in loading state_dict for PrefixEncoder:
    Missing key(s) in state_dict: "embedding.weight". 

How can I get rid of this problem?

I tried many ways like modify the path. But I can't figuer it out.

金坤东
  • 1
  • 1

0 Answers0