I am trying to run code to get stacked embedding from flair and bert and I am getting the following error. one of the suggestion was to reduce the batch size, but how to pass the data in batches? here is the code and error.
from tqdm import tqdm ## tracks progress of loop ##
import torch
from flair.data import Sentence
from flair.embeddings import TransformerDocumentEmbeddings
from flair.embeddings import DocumentPoolEmbeddings
bert_embeddings = TransformerDocumentEmbeddings('bert-base-uncased')
### initialize the document embeddings, mode = mean ###
document_embeddings = DocumentPoolEmbeddings([
flair_forward,
flair_backward,
bert_embeddings
])
# Storing Size of embedding #
z = sentence.embedding.size()[0]
print(z)
### Vectorising text ###
# creating a tensor for storing sentence embeddings
sen = torch.zeros(0,z)
print(sen)
# iterating Sentences #
for tweet in tqdm(txt):
sentence = Sentence(tweet)
document_embeddings.embed(sentence)# *****this line is giving error*****
# Adding Document embeddings to list #
if(torch.cuda.is_available()):
sen = sen.cuda()
sen = torch.cat((sen, sentence.embedding.view(-1,z)),0)
and this the error I am getting.
RuntimeError Traceback (most recent call last)
<ipython-input-24-1eee00445350> in <module>()
24 for tweet in tqdm(txt):
25 sentence = Sentence(tweet)
---> 26 document_embeddings.embed(sentence)
27 # Adding Document embeddings to list #
28 if(torch.cuda.is_available()):
7 frames
/usr/local/lib/python3.6/dist-packages/torch/nn/modules/rnn.py in forward(self, input, hx)
580 if batch_sizes is None:
581 result = _VF.lstm(input, hx, self._flat_weights, self.bias, self.num_layers,
--> 582 self.dropout, self.training, self.bidirectional, self.batch_first)
583 else:
584 result = _VF.lstm(input, batch_sizes, hx, self._flat_weights, self.bias,
RuntimeError: CUDA out of memory. Tried to allocate 20.00 MiB (GPU 0; 7.43 GiB total capacity; 6.54 GiB already allocated; 10.94 MiB free; 6.70 GiB reserved in total by PyTorch)