I wanted to build a list of all the csv files in a folder and read them to a dataframe. So I followed this question and and was able to get all the csv files in a desired folder. Then when I attempt to read in each folder iteratively, I get an error that the specific folder doesn't exist.
import os
directory = os.path.join("/home/5G-production-dataset/5G-production-dataset/Download","Driving/")
print(directory) ## works
df_final = pd.DataFrame() ## each dataframe read will be concatenated to this
for root,dirs,files in os.walk(directory):
print(f"files = {files}") ## gives a list with all the filenames as elements
for file in files:
print(f"current file is {file} and the type is {type(file)}") ## prints the current file and dtype is string
if file.endswith(".csv"):
df = pd.read_csv(file) ## error that the files doess't exist
print(len(df))
df_final = pd.concat([df], axis = 0, ignore_index=True)
print(len(df_final))
I verified the filenames and it is from the list contains all the files that I want to read. The full error is
FileNotFoundError Traceback (most recent call last)
<ipython-input-17-65d96982fe32> in <module>
13 # f=open(file, 'r')
14 # perform calculation
---> 15 df = pd.read_csv(file)
16 print(len(df))
17 df_final = pd.concat([df], axis = 0, ignore_index=True)
~/anaconda3/lib/python3.7/site-packages/pandas/io/parsers.py in parser_f(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, dialect, error_bad_lines, warn_bad_lines, delim_whitespace, low_memory, memory_map, float_precision)
683 )
684
--> 685 return _read(filepath_or_buffer, kwds)
686
687 parser_f.__name__ = name
~/anaconda3/lib/python3.7/site-packages/pandas/io/parsers.py in _read(filepath_or_buffer, kwds)
455
456 # Create the parser.
--> 457 parser = TextFileReader(fp_or_buf, **kwds)
458
459 if chunksize or iterator:
~/anaconda3/lib/python3.7/site-packages/pandas/io/parsers.py in __init__(self, f, engine, **kwds)
893 self.options["has_index_names"] = kwds["has_index_names"]
894
--> 895 self._make_engine(self.engine)
896
897 def close(self):
~/anaconda3/lib/python3.7/site-packages/pandas/io/parsers.py in _make_engine(self, engine)
1133 def _make_engine(self, engine="c"):
1134 if engine == "c":
-> 1135 self._engine = CParserWrapper(self.f, **self.options)
1136 else:
1137 if engine == "python":
~/anaconda3/lib/python3.7/site-packages/pandas/io/parsers.py in __init__(self, src, **kwds)
1915 kwds["usecols"] = self.usecols
1916
-> 1917 self._reader = parsers.TextReader(src, **kwds)
1918 self.unnamed_cols = self._reader.unnamed_cols
1919
pandas/_libs/parsers.pyx in pandas._libs.parsers.TextReader.__cinit__()
pandas/_libs/parsers.pyx in pandas._libs.parsers.TextReader._setup_parser_source()
FileNotFoundError: [Errno 2] File b'B_2019.12.16_14.23.32.csv' does not exist: b'B_2019.12.16_14.23.32.csv'
Could this be causing as a result of the filenames having multiple dot (.) symbols ? I have a lot of files and can't check them manually.