I have the following method that loads data from a CSV file stored in Google drive:
def load(self):
filename = 'temp_{}.csv'.format(uuid1())
self.driver.download_file(self.file_id, filename)
try:
data = LocalCsvHandler(
filename, self.delimiter, self.encoding,self.skipinitialspace).load()
os.remove(filename)
except UnicodeDecodeError as error:
os.remove(filename)
raise error
return data
class LocalCsvHandler:
def __init__(self, filename, delimiter, encoding, skipinitialspace=None):
self.filename = filename
self.delimiter = delimiter
self.encoding = encoding
self.skipinitialspace = skipinitialspace
def load(self):
data = []
with open(self.filename, encoding=self.encoding) as csvfile:
csvreader = DictReader(
csvfile, delimiter=self.delimiter,
skipinitialspace=self.skipinitialspace)
for row in csvreader:
data.append(row)
return data
After loaded, I generate a Pandas DataFramework with de ordered dict obtained from that previous method:
def _dataframe_from_ordered_dict(self, source_data: List[OrderedDict]) -> pd.DataFrame:
"""Genreates a Pandas DataFrame in case data loades as OrderdDict
Args:
source_data (List[OrderedDict]): Data loaded
Returns:
pd.DataFrame: Pandas DataFrame
"""
return pd.DataFrame([source_data[i] for i, j in enumerate(source_data)])
The problem is that when I list the names of the columns of the DataFrame, I get this list:
['\ufeffzipcode', 'primary_city','state']
As you can see, the name of the first column has some extra characters, so I can't reference it by its name.
How can I avoid loading the data with these characters?