First of all, let's solve this problem.
raw = {
"20211229": {
"00101": [
"비고101-1",
"비고101-2",
"비고101-3",
{0: [" UT1213K", "1", "11.00", "11", "2"]},
]
},
"20211230": {
"00102": [
"비고102-1",
"비고102-2",
"비고102-3",
{0: ["B 001", "2", "22.00", "44", "5"]},
]
},
"20211231": {
"00103": [
"비고103-1",
"비고103-2",
"비고103-3",
{
0: ["B 004", "10", "66.00", "660", "66"],
1: ["B 005", "20", "77.00", "1540", "154"],
2: ["B 006", "30", "88.00", "2640", "264"],
3: ["B 007", "40", "99.00", "3960", "396"],
},
],
"00104": [
"비고1",
"비고2",
"",
{
0: ["B 003", "3", "33.00", "99", "10"],
1: ["B 004", "4", "44.00", "176", "18"],
},
],
},
}
formatted_dict = []
for first_level_key, first_level_value in raw.items():
for second_level_key, second_level_value in first_level_value.items():
third_level_tmp_list = []
for third_level_value in second_level_value:
if isinstance(third_level_value, str):
third_level_tmp_list.append(third_level_value)
third_level_tmp_dict = dict(
zip(["remark1", "remark2", "remark3"], third_level_tmp_list)
)
for third_level_value in second_level_value:
if isinstance(third_level_value, dict):
for (
fourth_level_key,
fourth_level_value,
) in third_level_value.items():
new_record = {}
new_record.update(
{
"date": first_level_key,
"customer_code": second_level_key,
"item": fourth_level_key,
}
)
new_record.update(
dict(
zip(
[
"item_code",
"qty",
"unit_price",
"supply_price",
"tax_amount",
],
fourth_level_value,
)
)
)
new_record.update(third_level_tmp_dict)
formatted_dict.append(new_record)
print(formatted_dict)
result = pd.DataFrame(formatted_dict).set_index(
["date", "customer_code", "remark1", "remark2", "remark3"]
)[["item", "item_code", "qty", "unit_price", "supply_price", "tax_amount"]]
pd.set_option("display.max_columns", 500)
pd.set_option("display.width", 1000)
print(result)
Second, I suggest you do not construct raw data like that. I suggest you make your raw data like this.
[
{
"date": "20211229",
"customer_code": "00101",
"item": 0,
"item_code": " UT1213K",
"qty": "1",
"unit_price": "11.00",
"supply_price": "11",
"tax_amount": "2",
"remark1": "비고101-1",
"remark2": "비고101-2",
"remark3": "비고101-3",
},
{
"date": "20211230",
"customer_code": "00102",
"item": 0,
"item_code": "B 001",
"qty": "2",
"unit_price": "22.00",
"supply_price": "44",
"tax_amount": "5",
"remark1": "비고102-1",
"remark2": "비고102-2",
"remark3": "비고102-3",
},
{
"date": "20211231",
"customer_code": "00103",
"item": 0,
"item_code": "B 004",
"qty": "10",
"unit_price": "66.00",
"supply_price": "660",
"tax_amount": "66",
"remark1": "비고103-1",
"remark2": "비고103-2",
"remark3": "비고103-3",
},
{
"date": "20211231",
"customer_code": "00103",
"item": 1,
"item_code": "B 005",
"qty": "20",
"unit_price": "77.00",
"supply_price": "1540",
"tax_amount": "154",
"remark1": "비고103-1",
"remark2": "비고103-2",
"remark3": "비고103-3",
},
{
"date": "20211231",
"customer_code": "00103",
"item": 2,
"item_code": "B 006",
"qty": "30",
"unit_price": "88.00",
"supply_price": "2640",
"tax_amount": "264",
"remark1": "비고103-1",
"remark2": "비고103-2",
"remark3": "비고103-3",
},
{
"date": "20211231",
"customer_code": "00103",
"item": 3,
"item_code": "B 007",
"qty": "40",
"unit_price": "99.00",
"supply_price": "3960",
"tax_amount": "396",
"remark1": "비고103-1",
"remark2": "비고103-2",
"remark3": "비고103-3",
},
{
"date": "20211231",
"customer_code": "00104",
"item": 0,
"item_code": "B 003",
"qty": "3",
"unit_price": "33.00",
"supply_price": "99",
"tax_amount": "10",
"remark1": "비고1",
"remark2": "비고2",
"remark3": "",
},
{
"date": "20211231",
"customer_code": "00104",
"item": 1,
"item_code": "B 004",
"qty": "4",
"unit_price": "44.00",
"supply_price": "176",
"tax_amount": "18",
"remark1": "비고1",
"remark2": "비고2",
"remark3": "",
},
]
If your raw data like this, You could just get your result simply like this.
result = pd.DataFrame(raw).set_index(
["date", "customer_code", "remark1", "remark2", "remark3"]
)[["item", "item_code", "qty", "unit_price", "supply_price", "tax_amount"]]