0

one of my sample record for Json:

{
  "viewerId": "Ext-04835139",
  "sid5": "269410578:2995631181:2211755370:3307088398:33879957",
  "firstHbTimems": 1.506283958371E12,
  "ipAddress": "74.58.57.31",
  "streamUrl": "https://dc3-ll-livedazn-dznlivejp.hs.llnwd.net/live/channel/1007/all/stream.m3u8?event_id=61824040049&h=c912885e2a69ffa7ea84f45dc18c004d",
  "asset": "[nlq9biy7trxl1cjceg70rogvd] Saints @ Panthers",
  "os": "IOS",
  "osVersion": "10.3.3",
  "deviceModel": "iPhone",
  "geoInfo": {
    "city": 63666,
    "state": 3851,
    "isp": 120,
    "longitudeTimes1K": -73562,
    "country": 37,
    "dma": 0,
    "asn": 5769,
    "latitudeTimes1K": 45502,
    "publicIP": 1245329695
  },
  "totalPlayingTime": 4.097,
  "totalBufferingTime": 0.0,
  "VST": 1.411,
  "avgBitrate": 202.0,
  "playStateSwitch": [
    "{'seqNum': 0, 'eventNum': 0, 'sessionTimeMs': 7, 'startPlayState': 'eUnknown', 'endPlayState': 'eBuffering'}",
    "{'seqNum': 1, 'eventNum': 5, 'sessionTimeMs': 1411, 'startPlayState': 'eBuffering', 'endPlayState': 'ePlaying'}"
  ],
  "bitrateSwitch": [

  ],
  "errorEvent": [

  ],
  "tags": {
    "LSsportName": "Football",
    "c3.device.model": "iPhone+6+Plus",
    "LSvideoType": "LIVE",
    "c3.device.ua": "DAZN%2F5560+CFNetwork%2F811.5.4+Darwin%2F16.7.0",
    "LSfixtureId": "5trxst8tv7slixckvawmtf949",
    "genre": "Sport",
    "LScompetitionName": "NFL+Game+Pass",
    "show": "NFL+Game+Pass",
    "c3.cmp.0._type": "DEVATLAS",
    "c3.protocol.type": "cws",
    "LSsportId": "9ita1e50vxttzd1xll3iyaulu",
    "stageId": "8hm0ew6b8m7907ty8vy8tu4tl",
    "LSvenueId": "na",
    "syndicator": "None",
    "applicationVersion": "2.0.8",
    "deviceConnectionType": "wifi",
    "c3.client.marketingName": "iPhone+6+Plus",
    "playerVersion": "1.2.6.0",
    "c3.cmp.0._id": "da",
    "drmType": "AES128",
    "c3.sh": "dc3-ll-livedazn-dznlivejp.hs.llnwd.net",
    "c3.pt.ver": "10.3.3",
    "applicationType": "ios",
    "c3.viewer.id": "Ext-04835139",
    "LSinterfaceLanguage": "en",
    "c3.pt.os": "IOS",
    "playerVendor": "Open+Source",
    "c3.client.brand": "Apple",
    "c3.cws.sf": "7",
    "c3.cmp.0._ver": "1",
    "c3.client.hwType": "Mobile+Phone",
    "c3.pt.os.ver": "10.3.3",
    "isAd": "false",
    "c3.device.cver.bld": "2.124.0.33357",
    "stageName": "Regular+Season",
    "c3.client.osName": "iOS",
    "contentType": "Live",
    "c3.device.cver": "2.124.0",
    "LScompetitionId": "wy3kluvb4efae1of0d8146c1",
    "expireDate": "na",
    "c3.client.model": "iPhone+6+Plus",
    "c3.client.manufacturer": "Apple",
    "LSproductionValue": "na",
    "pubDate": "2017-09-23",
    "c3.cluster.name": "production",
    "accountType": "FreeTrial",
    "c3.adaptor.type": "eCws1_7",
    "c3.device.brand": "iPhone",
    "c3.pt.br": "Non-Browser+Apps",
    "contentId": "nlq9biy7trxl1cjceg70rogvd",
    "streamingProtocol": "FairPlay",
    "LSvenueName": "na",
    "c3.device.type": "Mobile",
    "c3.protocol.level": "2.4",
    "c3.player.name": "AVPlayer",
    "contentName": "Saints+%40+Panthers",
    "c3.device.manufacturer": "Apple",
    "c3.framework": "AVFoundation",
    "c3.pt": "iOS",
    "c3.device.ver": "6+Plus",
    "c3.video.isLive": "T",
    "c3.cmp.0._cfg_ver": "1504808821",
    "c3.cws.clv": "2.124.0.33357",
    "LScountryCode": "America%2FEl_Salvador"
  },
  "playername": "AVPlayer",
  "isLive": "T",
  "playerVersion": "1.2.6.0"
}

How to create jsonpath file to load it in redshift ?

Thanks

alexander.polomodov
  • 5,396
  • 14
  • 39
  • 46

2 Answers2

0

You have a nested array within your json - so a jsonpath will not expand that out for you.

You have a couple of choices on how to proceed:

  1. You can load your data at the higher level (e.g. playStateSwitch rather than seqNum within that) - and then try to use redshift to process that data. This can be tricky as you cannot explode json data from an array in redshift.
  2. You can preprocess the data using e.g. aws glue / python / pyspark or some other etl tool that can handle these nested arrays.
Jon Scott
  • 4,144
  • 17
  • 29
0

It all depends on the end goal, which is not clear form the above description. I will approach the solution in the following order

Define which fields and array values that are required to be loaded into the Redshift. If the need is to copy all the records then the next check is how to handle the multiple array records.

If array or key/value are missing as part of JSON source then JSONPath will not work as is - So, better to update the JSON to add the missing array prior to COPY the data set over to RS. The JSON update can be done using Linux commands or external tools like JP or refer additional reference

If all the values in the nested arrays are required then an alternative work around will be using external table an example

Otherwise, the JSONPATH file can be developed in this format

{
    "jsonpaths": [
       "$.viewerId", ///root level fields
        ...
       "$geoInfo.city", /// object hierarchy 
        ...
       "$playStateSwitch[0].seqNum" ///define the required array number
        ...
    ]
 }

Hope, this helps.

SunSmiles
  • 186
  • 9