I am getting a MemoryError
using Python 64-bits. Here is my function:
def entr_langue(path,nom_langue):
mots_ts=[]
table_tr=dict((ord(char),None) for char in string.punctuation)#table de translation/mapping
with codecs.open(path,"r","utf-8") as filep:
for i,line in enumerate(filep):
#extraction par ligne
line=" ".join(line.split()[1:])
line=line.lower()
line=re.sub(r"\d+"," ",line) #suppression des digits
if len(line) !=0:
line=line.translate(table_tr)#suppression des poncts
mots_ts += line
mots_ts.append(" ")#ajout des espaces
ts_str=''.join(mots_ts)
ts_str=re.sub(' +',' ',ts_str) #remp des series d'espaces par un seul espace
seq_ts=[i for i in ts_str]
#daba extraction des Bigram et les trier selon la frequ
fn=BigramCollocationFinder.from_words(seq_ts)
fn.apply_freq_filter(6) #"li 3ndhom frequ 9el m 6 ytfiltraw
bigram_model=fn.ngram_fd.viewitems()
bigram_model=sorted(fn.ngram_fd.viewitems(), key=lambda item: item[1],reverse=True)
print (bigram_model)
np.save(nom_langue+".npy",bigram_model)
The error:
File "C:/Users/msi/Documents/projIA/extraction_bigram.py", line 23, in entr_langue
mots_ts += line
MemoryError