i have"MemoryError" when im trying to read file with 45 millions files.
How to solve this problem?
NOTE: My code works for small files
import numpy as np
import pandas as pd
from pandas import Series, DataFrame
from pandas.tools.plotting import scatter_matrix
import matplotlib.pyplot as plt
from pylab import rcParams
import seaborn as sb
address = 'file.txt' //File with 45 millions lines = 500 MB
test = pd.read_csv(address)
test.columns = ['Year','Data']
test.boxplot(column='Data', by = 'Year')
plt.show()
This is the error:
Traceback (most recent call last):
File "plot2.py", line 13, in <module>
test = pd.read_csv(address)
File "C:\Users\EYM\Desktop\web_scraping\venv\lib\site-packages\pandas\io\parsers.py", line 678, in parser_f
return _read(filepath_or_buffer, kwds)
File "C:\Users\EYM\Desktop\web_scraping\venv\lib\site-packages\pandas\io\parsers.py", line 446, in _read
data = parser.read(nrows)
File "C:\Users\EYM\Desktop\web_scraping\venv\lib\site-packages\pandas\io\parsers.py", line 1036, in read
ret = self._engine.read(nrows)
File "C:\Users\EYM\Desktop\web_scraping\venv\lib\site-packages\pandas\io\parsers.py", line 1848, in read
data = self._reader.read(nrows)
File "pandas\_libs\parsers.pyx", line 876, in pandas._libs.parsers.TextReader.read
File "pandas\_libs\parsers.pyx", line 919, in pandas._libs.parsers.TextReader._read_low_memory
File "pandas\_libs\parsers.pyx", line 2141, in pandas._libs.parsers._concatenate_chunks
MemoryError