When I run my FinBert model it always crashes the RAM in Google Collab at outputs = model(**input)
from transformers.utils.dummy_pt_objects import HubertModel
import textwrap
# Reads all files at once but you will have to upload it again
import pandas as pd
import glob
import numpy as np
import torch
all_files = glob.glob("*.csv")
tickerList = []
textList = []
model.eval()
for filename in all_files:
# Get ticker symbol
ticker = filename.split('_', 1)[0].replace('.', '').upper()
#Read file into dataframe
df = pd.read_csv(filename)
headlines_array = np.array(df)
# Data fram will not be a list of text for tokenizer to process
text = list(headlines_array[:,0])
textList.append(text)
#Checks if we have seen this ticker before
if ticker not in tickerList:
tickerList.append(ticker)
#Gets data to be an acceptable format for our model
inputs = tokenizer(text, padding = True, truncation = True, return_tensors='pt')
outputs = model(**inputs) #time consuming and crashes RAM so can't up int for loop
predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)
positive = predictions[:, 0].tolist()
negative = predictions[:, 1].tolist()
neutral = predictions[:, 2].tolist()
table = {'Headline': text,
'Ticker' : ticker,
"Positive":positive,
"Negative":negative,
"Neutral":neutral}
df = pd.DataFrame(table, columns = ["Headline", "Ticker", "Positive", "Negative", "Neutral"])
final_table = wandb.Table(columns=["Sentence", "Ticker", "Positive", "Negative", "Neutral"])
for headline, pos, neg, neutr in zip(text, predictions[:, 0].tolist(), predictions[:, 1].tolist(), predictions[:, 2].tolist() ):
final_table.add_data(headline, ticker, pos, neg, neutr)
Not quite sure what is going wrong as outputs = model(**input) runs fine outside the for loop but does not seems to run even once when I bring it inside the for loop.