I'm reading a JSON file in spark but the dataframe output keeps giving me a table with all null values when I use df.show()....................... I was just wondering if there's something with my structure schema? Or maybe I'm missing something? I've also attached the json data below. Any help would be appreciate it.....................................
discount_type = StructType([StructField("amount", IntegerType(), True),
StructField("description", StringType(), True)
])
child_item_type = StructType([StructField("InsuranceNumber", StringType(), True),
StructField("InsuranceLabel", StringType(), True),
StructField("Insurancequantity", DoubleType(), True),
StructField("Insuranceprice", IntegerType(), True),
StructField("discountsreceived",discount_type , True),
])
item_type = StructType([StructField("InsuranceNumber", StringType(), True),
StructField("InsuranceLabel", StringType(), True),
StructField("Insurancequantity", IntegerType(), True),
StructField("Insuranceprice", IntegerType(), True),
StructField("discountsreceived", discount_type, True),
StructField("childItems",child_item_type , True),
])
order_paid_type = StructType([StructField("Insuranceid", StringType(), True),
StructField("Insurancedesc", StringType(), True),
StructField("purchaseditems", item_type, True),
])
message_type = StructType([StructField("PaidIn", order_paid_type, True)])
data_type = StructType([StructField("Client", message_type, True)])
body_type = StructType([StructField("id", StringType(), True),
StructField("InsuranceProvider", StringType(), True),
StructField("Type", data_type, True),
StructField("eventTime", StringType(), True),
])
[
df = spark.read.schema(body_type).json(INPUT_FILE)
[
{
"id": "164651478631223455788978942317",
"InsuranceProvider": "Embroker",
"Type": {
"Client": {
"PaidIn": {
"Insuranceid": "97331549875122744335422",
"Insurancedesc": "Magic happens here",
"purchaseditems": [
{
"InsuranceNumber": "1",
"InsuranceLabel": "DNO",
"Insurancequantity": 1,
"Insuranceprice": 345,
"discountsreceived": [
{
"amount": 495,
"description": "Item 1, Discount 1"
}
],
"childItems": [
{
"InsuranceNumber": "1",
"InsuranceLabel": "Cyber",
"Insurancequantity": 1,
"Insuranceprice": 0,
"discountsreceived": [
{
"amount": 2495,
"description": "Insurance item 1, Discount 1"
}
]
}
]
},
{
"InsuranceNumber": "2",
"InsuranceLabel": "LPL",
"Insurancequantity": 2,
"Insuranceprice": 945,
"discountsreceived": [
{
"amount": -295,
"description": "Item 2, Discount 1"
}
],
"childItems": [
{
"InsuranceNumber": "1",
"InsuranceLabel": "Cyber",
"Insurancequantity": 1,
"Insuranceprice": 0,
"discountsreceived": [
{
"amount": 495,
"description": "Insurance item 2, Discount 1"
}
]
}
]
},
{
"InsuranceNumber": "3",
"InsuranceLabel": "LPL",
"Insurancequantity": 2,
"Insuranceprice": 945,
"discountsreceived": [
{
"amount": 295,
"description": "Item 2, Discount 1"
}
],
"childItems": [
{
"InsuranceNumber": "1",
"InsuranceLabel": "Cyber",
"Insurancequantity": 1,
"Insuranceprice": 0,
"discountsreceived": [
{
"amount": 400,
"description": "Insurance item 2, Discount 1"
}
]
}
]
},
{
"InsuranceNumber": "4",
"InsuranceLabel": "LPL",
"Insurancequantity": 2,
"Insuranceprice": 945,
"discountsreceived": [
{
"amount": 295,
"description": "Item 2, Discount 1"
}
],
"childItems": [
{
"InsuranceNumber": "1",
"InsuranceLabel": "Cyber",
"Insurancequantity": 1,
"Insuranceprice": 0,
"discountsreceived": [
{
"amount": 335,
"description": "Insurance item 2, Discount 1"
}
]
}
]
},
{
"InsuranceNumber": "5",
"InsuranceLabel": "Employment Practices Liability",
"Insurancequantity": 2,
"Insuranceprice": 945,
"discountsreceived": [
{
"amount": 1295,
"description": "Item 2, Discount 1"
}
],
"childItems": [
{
"InsuranceNumber": "1",
"InsuranceLabel": "Cyber",
"Insurancequantity": 1,
"Insuranceprice": 0,
"discountsreceived": [
{
"amount": 195,
"description": "Insurance item 2, Discount 1"
}
]
}
]
},
{
"InsuranceNumber": "6",
"InsuranceLabel": "Employment Practices Liability",
"Insurancequantity": 2,
"Insuranceprice": 945,
"discountsreceived": [
{
"amount": 805,
"description": "Item 2, Discount 1"
}
],
"childItems": [
{
"InsuranceNumber": "1",
"InsuranceLabel": "Cyber",
"Insurancequantity": 1,
"Insuranceprice": 0,
"discountsreceived": [
{
"amount": 501,
"description": "Insurance item 2, Discount 1"
}
]
}
]
},
{
"InsuranceNumber": "7",
"InsuranceLabel": "Employment Practices Liability",
"Insurancequantity": 2,
"Insuranceprice": 945,
"discountsreceived": [
{
"amount": 521,
"description": "Item 2, Discount 1"
}
],
"childItems": [
{
"InsuranceNumber": "1",
"InsuranceLabel": "Cyber",
"Insurancequantity": 1,
"Insuranceprice": 0,
"discountsreceived": [
{
"amount": 533,
"description": "Insurance item 2, Discount 1"
}
]
}
]
},
{
"InsuranceNumber": "8",
"InsuranceLabel": "Employment Practices Liability",
"Insurancequantity": 2,
"Insuranceprice": 945,
"discountsreceived": [
{
"amount": 422,
"description": "Item 2, Discount 1"
}
],
"childItems": [
{
"InsuranceNumber": "1",
"InsuranceLabel": "Cyber",
"Insurancequantity": 1,
"Insuranceprice": 0,
"discountsreceived": [
{
"amount": 333,
"description": "Insurance item 2, Discount 1"
}
]
}
]
},
{
"InsuranceNumber": "9",
"InsuranceLabel": "Employment Practices Liability",
"Insurancequantity": 2,
"Insuranceprice": 945,
"discountsreceived": [
{
"amount": 444,
"description": "Item 2, Discount 1"
}
],
"childItems": [
{
"InsuranceNumber": "1",
"InsuranceLabel": "Cyber",
"Insurancequantity": 1,
"Insuranceprice": 0,
"discountsreceived": [
{
"amount": 666,
"description": "Insurance item 2, Discount 1"
}
]
}
]
},
{
"InsuranceNumber": "10",
"InsuranceLabel": "DNO",
"Insurancequantity": 2,
"Insuranceprice": 945,
"discountsreceived": [
{
"amount": 10,
"description": "Item 2, Discount 1"
}
],
"childItems": [
{
"InsuranceNumber": "1",
"InsuranceLabel": "Cyber",
"Insurancequantity": 1,
"Insuranceprice": 0,
"discountsreceived": [
{
"amount": 63,
"description": "Insurance item 2, Discount 1"
}
]
}
]
}
]
}
}
},
"eventTime": "2020-05-19T01:59:10.379Z"
}
]