NOTES fragment below causes an error from .\Anaconda3\pandas\core\indexes\base.py) " 'NoneType' object has no attribute 'keys'" (see listing below)
if method is None:
if tolerance is not None:
raise ValueError(
"tolerance argument only valid if using pad, "
"backfill or nearest lookups"
)
casted_key = self._maybe_cast_indexer(key)
try:
return self._engine.get_loc(casted_key)
except KeyError as err:
raise KeyError(key) from err
except TypeError:
# If we have a listlike key, _check_indexing_error will raise
# InvalidIndexError. Otherwise we fall through and re-raise
# the TypeError.
self._check_indexing_error(key)
raise
When the casted_key can't be read from a dictionary of Nonetype, the code fails. (from base.py)
PROBLEM: In a json file, there can be multiple levels. When parsing a dictionary inside of a list, the code errors out because it cannot pull the casted_key from the level required as it is null at that lower level. The dictionaries are cast inside of a list or a tuple. Would a lambda be written to get around this null value while pulling the df[columns - sql_type - data]?
What would that lambda look like?
.apply(lambda x: strorNan(k,v))
with open(r'D:\\meta.json', encoding='utf-16') as data_file:
data = json.loads(data_file.read())
df=pd.json_normalize(data)
result=pd.concat([pd.json_normalize(df['columns'][k]['sql_type'], 'data') for k in df['columns'].keys())
JSON STRUCTURE: SAMPLE FILE
{
"^o":"MetaDataTable",
"filename":"meta.csv",
"schema":"meta",
"tablename":"meta",
"headers":[
"fidped"
],
"num_rows":6,
"num_cols":3,
"columns":{
"fidped":{
"^o":"MetaDataColumn",
"name":"fidped",
"number":27,
"sql_type":{
"^o":"SQLDate",
"data":[
{
"^O":"Date",
"year":2019,
"month":12,
"day":31,
"start":2299161.0},
null,
{
"^O":"Date",
"year":2021,
"month":9,
"day":30,
"start":2299161.0},
{
"^O":"Date",
"year":2022,
"month":9,
"day":30,
"start":2299161.0},
{
"^O":"Date",
"year":2011,
"month":12,
"day":31,
"start":2299161.0},
{
"^O":"Date",
"year":2020,
"month":6,
"day":30,
"start":2299161.0},
{
"^O":"Date",
"year":2016,
"month":12,
"day":31,
"start":2299161.0}
],
"nullable":true,
"bytes":4,
"type":"date"
},
"rows":6,
"min":null,
"max":null,
"distribution":{
"^#2d70":[
{
"^O":"Date",
"year":2019,
"month":12,
"day":31,
"start":2299161.0},
12
],
"^#2d71":[
null,
360
],
"^#2d72":[
{
"^O":"Date",
"year":2021,
"month":9,
"day":30,
"start":2299161.0},
43
],
"^#2d73":[
{
"^O":"Date",
"year":2022,
"month":9,
"day":30,
"start":2299161.0},
59
],
"^#2d74":[
{
"^O":"Date",
"year":2011,
"month":12,
"day":31,
"start":2299161.0},
115
],
"^#2d75":[
{
"^O":"Date",
"year":2010,
"month":6,
"day":30,
"start":2299161.0},
2
]
},
"num_distinct":6,
"distinct":[ {
"^O":"Date",
"year":2001,
"month":2,
"day":28,
"start":2299161.0},
{
"^O":"Date",
"year":2003,
"month":8,
"day":31,
"start":2299161.0},
{
"^O":"Date",
"year":2004,
"month":6,
"day":30,
"start":2299161.0},
{
"^O":"Date",
"year":2004,
"month":11,
"day":30,
"start":2299161.0},
{
"^O":"Date",
"year":2005,
"month":2,
"day":28,
"start":2299161.0},
{
"^O":"Date",
"year":2005,
"month":4,
"day":30,
"start":2299161.0}
]
}
}
}
POSSIBLE SOLUTION - in need of a method to circumvent the base.py which expects the dictionary object being iterated to not be null (nonetype):
def transformMDY(x):
for k,v in x.items():
if isinstance(v, dict):
x['month'] =x['month'].astype(str)
x['month'] =x['month'].apply(lambda x: fullColumnDate(x))
x['day'] =x['day'].astype(str)
x['day'] =x['day'].apply(lambda x: fullColumnDate(x))
dtMerge=pd.DataFrame()
dfyr=[i for i in x['year']]
dfmo=[i for i in x['month']]
dfdy=[i for i in x['day']]
dfyr=pd.DataFrame(dfyr)
dfmo=pd.DataFrame(dfmo)
dfdy=pd.DataFrame(dfdy)
dtMerge['year']=dfyr
dtMerge['month']=dfmo
dtMerge['day']=dfdy
dtMerge['Full_Date'] = pd.to_datetime(dtMerge['day'].astype("str")+"/"+dtMerge['month'].astype("str")+"/"+dtMerge["year"].astype("str"), format = "%d/%m/%Y")
dtMerge.drop('year',axis=1,inplace=True)
dtMerge.drop('month',axis=1,inplace=True)
dtMerge.drop('day',axis=1,inplace=True)
return dtMerge
else:
v="1970-01-01"
return (x)
main()