I am new to using HuggingFace and the PyTorch ML ecosystem. I am trying to use a GPU device instead of the default CPU.
Can someone tell me if the following script is correct? The only thing I am calling is lmhead_model.to(device)
.
I am not sure whether or not I need to move the tokenizer
, train_dataset
, data_collator
, or anything else. Any insight would be appreciated for this beginner.
from transformers import GPT2Tokenizer
from transformers import GPT2LMHeadModel
from transformers import TextDataset
from transformers import DataCollatorForLanguageModeling
from transformers import Trainer
from transformers import TrainingArguments
# Load the GPT-2 tokenizer and LM head model
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
lmhead_model = GPT2LMHeadModel.from_pretrained('gpt2')
# Load the training dataset and divide blocksize
train_dataset = TextDataset(
tokenizer=tokenizer,
file_path='tinyshakespeare.txt',
block_size=64
)
# Create a data collator for preprocessing batches
data_collator = DataCollatorForLanguageModeling(
tokenizer=tokenizer,
mlm=False
)
# Optionally configure the model and pytorch w/ gpu
import torch
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
lmhead_model = lmhead_model.to(device)
if device.type == 'cuda':
print('We are currently using a GPU')
else:
print('We are currently using a CPU')
# Defining the training arguments
training_args = TrainingArguments(
output_dir='tinyshakespeare', # output directory for checkpoints
overwrite_output_dir=True, # overwrite any existing content
per_device_train_batch_size=32, # sample batch size for training
dataloader_num_workers=0, # number of workers for dataloader
max_steps=1000, # maximum number of training steps
save_steps=1000, # after # steps checkpoints are saved
save_total_limit=1, # maximum number of checkpoints to save
prediction_loss_only=True, # only compute loss during prediction
learning_rate=3e-4, # learning rate
fp16=True if device.type == 'cuda' else False, # use 16-bit (mixed) precision
optim='adamw_torch', # define the optimizer for training
lr_scheduler_type='linear', # define the learning rate scheduler
logging_steps=10, # after # steps logs are printed
report_to='none', # report to wandb, tensorboard, etc.
)
# Performing the ML training loop
if __name__ == '__main__':
trainer = Trainer(
model=lmhead_model,
args=training_args,
data_collator=data_collator,
train_dataset=train_dataset,
)
result = trainer.train()
print_summary(result)