0

I'm reading a JSON file in spark but the dataframe output keeps giving me a table with all null values when I use df.show()....................... I was just wondering if there's something with my structure schema? Or maybe I'm missing something? I've also attached the json data below. Any help would be appreciate it.....................................

      discount_type = StructType([StructField("amount", IntegerType(), True),
                                    StructField("description", StringType(), True)
                                    ])
    
        child_item_type = StructType([StructField("InsuranceNumber", StringType(), True),
                                      StructField("InsuranceLabel", StringType(), True),
                                      StructField("Insurancequantity", DoubleType(), True),
                                      StructField("Insuranceprice", IntegerType(), True),
                                      StructField("discountsreceived",discount_type , True),
                                      ])
    
        item_type = StructType([StructField("InsuranceNumber", StringType(), True),
                                StructField("InsuranceLabel", StringType(), True),
                                StructField("Insurancequantity", IntegerType(), True),
                                StructField("Insuranceprice", IntegerType(), True),
                                StructField("discountsreceived", discount_type, True),
                                StructField("childItems",child_item_type , True),
                                ])
    
        order_paid_type = StructType([StructField("Insuranceid", StringType(), True),
                                      StructField("Insurancedesc", StringType(), True),
                                      StructField("purchaseditems", item_type, True),
                                      ])
    
        message_type = StructType([StructField("PaidIn", order_paid_type, True)])
    
        data_type = StructType([StructField("Client", message_type, True)])
    
        body_type = StructType([StructField("id", StringType(), True),
                                StructField("InsuranceProvider", StringType(), True),
                                StructField("Type", data_type, True),
                                StructField("eventTime", StringType(), True),
                                ])
        [
    
    
df = spark.read.schema(body_type).json(INPUT_FILE)

[
    {
      "id": "164651478631223455788978942317",
      "InsuranceProvider": "Embroker",
      "Type": {
        "Client": {
          "PaidIn": {
            "Insuranceid": "97331549875122744335422",
            "Insurancedesc": "Magic happens here",
            "purchaseditems": [
              {
                "InsuranceNumber": "1",
                "InsuranceLabel": "DNO",
                "Insurancequantity": 1,
                "Insuranceprice": 345,
                "discountsreceived": [
                  {
                    "amount": 495,
                    "description": "Item 1, Discount 1"
                  }
                ],
                "childItems": [
                  {
                    "InsuranceNumber": "1",
                    "InsuranceLabel": "Cyber",
                    "Insurancequantity": 1,
                    "Insuranceprice": 0,
                    "discountsreceived": [
                      {
                        "amount": 2495,
                        "description": "Insurance item 1, Discount 1"
                      }
                    ]
                  }
                ]
              },
              {
                "InsuranceNumber": "2",
                "InsuranceLabel": "LPL",
                "Insurancequantity": 2,
                "Insuranceprice": 945,
                "discountsreceived": [
                  {
                    "amount": -295,
                    "description": "Item 2, Discount 1"
                  }
                ],
                "childItems": [
                  {
                    "InsuranceNumber": "1",
                    "InsuranceLabel": "Cyber",
                    "Insurancequantity": 1,
                    "Insuranceprice": 0,
                    "discountsreceived": [
                      {
                        "amount": 495,
                        "description": "Insurance item 2, Discount 1"
                      }
                    ]
                  }
                ]
              },
              {
                "InsuranceNumber": "3",
                "InsuranceLabel": "LPL",
                "Insurancequantity": 2,
                "Insuranceprice": 945,
                "discountsreceived": [
                  {
                    "amount": 295,
                    "description": "Item 2, Discount 1"
                  }
                ],
                "childItems": [
                  {
                    "InsuranceNumber": "1",
                    "InsuranceLabel": "Cyber",
                    "Insurancequantity": 1,
                    "Insuranceprice": 0,
                    "discountsreceived": [
                      {
                        "amount": 400,
                        "description": "Insurance item 2, Discount 1"
                      }
                    ]
                  }
                ]
              },
              {
                "InsuranceNumber": "4",
                "InsuranceLabel": "LPL",
                "Insurancequantity": 2,
                "Insuranceprice": 945,
                "discountsreceived": [
                  {
                    "amount": 295,
                    "description": "Item 2, Discount 1"
                  }
                ],
                "childItems": [
                  {
                    "InsuranceNumber": "1",
                    "InsuranceLabel": "Cyber",
                    "Insurancequantity": 1,
                    "Insuranceprice": 0,
                    "discountsreceived": [
                      {
                        "amount": 335,
                        "description": "Insurance item 2, Discount 1"
                      }
                    ]
                  }
                ]
              },
              {
                "InsuranceNumber": "5",
                "InsuranceLabel": "Employment Practices Liability",
                "Insurancequantity": 2,
                "Insuranceprice": 945,
                "discountsreceived": [
                  {
                    "amount": 1295,
                    "description": "Item 2, Discount 1"
                  }
                ],
                "childItems": [
                  {
                    "InsuranceNumber": "1",
                    "InsuranceLabel": "Cyber",
                    "Insurancequantity": 1,
                    "Insuranceprice": 0,
                    "discountsreceived": [
                      {
                        "amount": 195,
                        "description": "Insurance item 2, Discount 1"
                      }
                    ]
                  }
                ]
              },
              {
                "InsuranceNumber": "6",
                "InsuranceLabel": "Employment Practices Liability",
                "Insurancequantity": 2,
                "Insuranceprice": 945,
                "discountsreceived": [
                  {
                    "amount": 805,
                    "description": "Item 2, Discount 1"
                  }
                ],
                "childItems": [
                  {
                    "InsuranceNumber": "1",
                    "InsuranceLabel": "Cyber",
                    "Insurancequantity": 1,
                    "Insuranceprice": 0,
                    "discountsreceived": [
                      {
                        "amount": 501,
                        "description": "Insurance item 2, Discount 1"
                      }
                    ]
                  }
                ]
              },
              {
                "InsuranceNumber": "7",
                "InsuranceLabel": "Employment Practices Liability",
                "Insurancequantity": 2,
                "Insuranceprice": 945,
                "discountsreceived": [
                  {
                    "amount": 521,
                    "description": "Item 2, Discount 1"
                  }
                ],
                "childItems": [
                  {
                    "InsuranceNumber": "1",
                    "InsuranceLabel": "Cyber",
                    "Insurancequantity": 1,
                    "Insuranceprice": 0,
                    "discountsreceived": [
                      {
                        "amount": 533,
                        "description": "Insurance item 2, Discount 1"
                      }
                    ]
                  }
                ]
              },
              {
                "InsuranceNumber": "8",
                "InsuranceLabel": "Employment Practices Liability",
                "Insurancequantity": 2,
                "Insuranceprice": 945,
                "discountsreceived": [
                  {
                    "amount": 422,
                    "description": "Item 2, Discount 1"
                  }
                ],
                "childItems": [
                  {
                    "InsuranceNumber": "1",
                    "InsuranceLabel": "Cyber",
                    "Insurancequantity": 1,
                    "Insuranceprice": 0,
                    "discountsreceived": [
                      {
                        "amount": 333,
                        "description": "Insurance item 2, Discount 1"
                      }
                    ]
                  }
                ]
              },
              {
                "InsuranceNumber": "9",
                "InsuranceLabel": "Employment Practices Liability",
                "Insurancequantity": 2,
                "Insuranceprice": 945,
                "discountsreceived": [
                  {
                    "amount": 444,
                    "description": "Item 2, Discount 1"
                  }
                ],
                "childItems": [
                  {
                    "InsuranceNumber": "1",
                    "InsuranceLabel": "Cyber",
                    "Insurancequantity": 1,
                    "Insuranceprice": 0,
                    "discountsreceived": [
                      {
                        "amount": 666,
                        "description": "Insurance item 2, Discount 1"
                      }
                    ]
                  }
                ]
              },
              {
                "InsuranceNumber": "10",
                "InsuranceLabel": "DNO",
                "Insurancequantity": 2,
                "Insuranceprice": 945,
                "discountsreceived": [
                  {
                    "amount": 10,
                    "description": "Item 2, Discount 1"
                  }
                ],
                "childItems": [
                  {
                    "InsuranceNumber": "1",
                    "InsuranceLabel": "Cyber",
                    "Insurancequantity": 1,
                    "Insuranceprice": 0,
                    "discountsreceived": [
                      {
                        "amount": 63,
                        "description": "Insurance item 2, Discount 1"
                      }
                    ]
                  }
                ]
              }
            ]
          }
        }
      },
      "eventTime": "2020-05-19T01:59:10.379Z"
    }
  ]
thebluephantom
  • 16,458
  • 8
  • 40
  • 83
to3
  • 29
  • 5
  • you need `multiLine` option. https://stackoverflow.com/q/38545850/2956135 – Emma Aug 11 '22 at 19:17
  • spark.read.option("multiLine", True).option("mode", "PERMISSIVE").schema(body_type).json(INPUT_FILE)?? I still get the same null value but with only one row of data – to3 Aug 11 '22 at 19:24
  • Ah sorry missed it. You need to wrap `item_type` and other object within array with `ArrayType(item_type)`. There are 4 of them if I counted correctly. – Emma Aug 11 '22 at 20:13
  • I'm still null values unfortunately every after I wrap ArrayType() with all my object :( – to3 Aug 11 '22 at 21:26
  • 1
    Could you update the question with your latest code? Also make sure to wrap only for array of objects not all objects. `item_type`, `child_item_type`, 2 `discount_type`s. – Emma Aug 11 '22 at 21:27
  • Oh ok gotcha ! could you explain why I should wrap for only those objects? – to3 Aug 11 '22 at 21:42
  • 1
    JSON object map to `StructType` and JSON array map to `ArrayType` so if you have an array of object in JSON, you need `ArrayType(StructType())`. If you have an object of object, then `StructType(StructType())`. – Emma Aug 11 '22 at 21:53

0 Answers0