Code was based off on https://github.com/tloen/alpaca-lora/blob/main/finetune.py
My objective of this training was to made use of unsupervised training dataset to get the model to understand how words are written in my domain (basically masked language modelling). Reason i don't use the conventional instructional fine tuning is because there's no such dataset of sufficient quantity available to me.
The 2 main changes i've made are as follows
- instead of fine tuning from Llama's weights, i'll finetune from a existing alpaca-lora's weight. As such, i've edited the code as follows
from peft import (
# LoraConfig,
PeftModel,
get_peft_model,
get_peft_model_state_dict,
prepare_model_for_int8_training,
set_peft_model_state_dict,
)
as well as
# config = LoraConfig(
# r=lora_r,
# lora_alpha=lora_alpha,
# target_modules=lora_target_modules,
# lora_dropout=lora_dropout,
# bias="none",
# task_type="CAUSAL_LM",
# )
# model = get_peft_model(model, config)
# replace with this to load directly from alpaca
LORA_WEIGHTS = "tloen/alpaca-lora-7b"
model = PeftModel.from_pretrained(
model,
LORA_WEIGHTS,
torch_dtype=torch.float16,
)
- edited the dataset to my own. (I am not using the prompt template). My code for generating the dataset as follows:
def chunk_text(data):
concantenated_text = ''
all_result = []
for i in range(data['train'].num_rows):
concantenated_text += data['train']['combined'][i]
tokenized_concantenated_text = tokenizer.encode(concantenated_text)[1:]
tokenized_prompt = tokenizer.encode("### Text: ")[1:]
full_length = len(tokenized_concantenated_text)
for i in range(0, full_length, chunk_size):
text = tokenized_concantenated_text[i: i+chunk_size+overlap_size]
text = tokenized_prompt + text
text = tokenizer.decode(text)
result = tokenizer(text, padding=False)
if result["input_ids"][-1] != tokenizer.eos_token_id:
result["input_ids"].append(tokenizer.eos_token_id)
result["attention_mask"].append(1)
result["labels"] = result["input_ids"].copy()
all_result.append(result)
return all_result
However, i keep facing the following error no matter how i tweak the code. Really appreciate any help rendered!
╭─────────────────────────────── Traceback (most recent call last) ────────────────────────────────╮
│ in <cell line: 2>:2 │
│ │
│ /usr/local/lib/python3.9/dist-packages/transformers/trainer.py:1662 in train │
│ │
│ 1659 │ │ inner_training_loop = find_executable_batch_size( │
│ 1660 │ │ │ self._inner_training_loop, self._train_batch_size, args.auto_find_batch_size │
│ 1661 │ │ ) │
│ ❱ 1662 │ │ return inner_training_loop( │
│ 1663 │ │ │ args=args, │
│ 1664 │ │ │ resume_from_checkpoint=resume_from_checkpoint, │
│ 1665 │ │ │ trial=trial, │
│ │
│ /usr/local/lib/python3.9/dist-packages/transformers/trainer.py:1991 in _inner_training_loop │
│ │
│ 1988 │ │ │ │ │ │ │ xm.optimizer_step(self.optimizer) │
│ 1989 │ │ │ │ │ elif self.do_grad_scaling: │
│ 1990 │ │ │ │ │ │ scale_before = self.scaler.get_scale() │
│ ❱ 1991 │ │ │ │ │ │ self.scaler.step(self.optimizer) │
│ 1992 │ │ │ │ │ │ self.scaler.update() │
│ 1993 │ │ │ │ │ │ scale_after = self.scaler.get_scale() │
│ 1994 │ │ │ │ │ │ optimizer_was_run = scale_before <= scale_after │
│ │
│ /usr/local/lib/python3.9/dist-packages/torch/cuda/amp/grad_scaler.py:368 in step │
│ │
│ 365 │ │ if optimizer_state["stage"] is OptState.READY: │
│ 366 │ │ │ self.unscale_(optimizer) │
│ 367 │ │ │
│ ❱ 368 │ │ assert len(optimizer_state["found_inf_per_device"]) > 0, "No inf checks were rec │
│ 369 │ │ │
│ 370 │ │ retval = self._maybe_opt_step(optimizer, optimizer_state, *args, **kwargs) │
│ 371 │
╰──────────────────────────────────────────────────────────────────────────────────────────────────╯
AssertionError: No inf checks were recorded for this optimizer.
Environment: Python: 3.9 cuda: 11.8