I'm trying to use this model from deepset/roberta-base-squad2 to essentially go through a column of work related activities and have it answer the question what are the necessary skills for this job ? However the model is simply handing me back my context or my question+context. I'm not quite sure why it's doing that.
Here's what I'm running,
import pandas as pd
import torch
from transformers import AutoTokenizer, AutoModelForQuestionAnswering
# Your DataFrame loading code here
# df = pd.read_csv("your_data.csv")
def generate_skills(question, context):
tokenizer = AutoTokenizer.from_pretrained("deepset/roberta-base-squad2")
model = AutoModelForQuestionAnswering.from_pretrained("deepset/roberta-base-squad2")
inputs = tokenizer(question, context, return_tensors='pt')
outputs = model(**inputs)
start_scores = outputs.start_logits
end_scores = outputs.end_logits
start_index = torch.argmax(start_scores)
end_index = torch.argmax(end_scores) + 1
tokens = inputs['input_ids'][0][start_index:end_index]
answer = tokenizer.decode(tokens, skip_special_tokens=True)
return answer
def generate_skills_for_row(row):
context = row['top_words']
question = "What are the necessary skills a data scientist should have?"
skills = generate_skills(question, context)
return skills
# Create a new column 'skills' based on the 'top_words' column
df['skills'] = df.apply(generate_skills_for_row, axis=1)