3

JSON newbie here. Could you please help with parsing JSON files using R. I did try jsonlite & rjson, but keep getting errors.

Below is the data retrieved via the api.

data <- GET("http://svcs.ebay.com/services/search/FindingService/v1?OPERATION-NAME=findItemsByKeywords&SERVICE-VERSION=1.0.0&SECURITY-APPNAME=GLOBAL-ID=EBAY-US&RESPONSE-DATA-FORMAT=JSON&callback=_cb_findItemsByKeywords&REST-PAYLOAD&keywords=harry%20potter&paginationInput.entriesPerPage=10")

The JSON looks like this:

/**/_cb_findItemsByKeywords({
   "findItemsByKeywordsResponse":[
      {
         "ack":[
            "Success"
         ],
         "version":[
            "1.13.0"
         ],
         "timestamp":[
            "2016-01-29T16:36:25.984Z"
         ],
         "searchResult":[
            {
               "@count":"1",
               "item":[
                  {
                     "itemId":[
                        "371533364795"
                     ],
                     "title":[
                        "Harry Potter: Complete 8-Film Collection (DVD, 2011, 8-Disc Set)"
                     ],
                     "globalId":[
                        "EBAY-US"
                     ],
                     "primaryCategory":[
                        {
                           "categoryId":[
                              "617"
                           ],
                           "categoryName":[
                              "DVDs & Blu-ray Discs"
                           ]
                        }
                     ],
                     "galleryURL":[
                        "http:\/\/thumbs4.ebaystatic.com\/m\/mn5Agt0HFD89L7_-lqfrZZw\/140.jpg"
                     ],
                     "viewItemURL":[
                        "http:\/\/www.ebay.com\/itm\/Harry-Potter-Complete-8-Film-Collection-DVD-2011-8-Disc-Set-\/371533364795"
                     ],
                     "productId":[
                        {
                           "@type":"ReferenceID",
                           "__value__":"110258144"
                        }
                     ],
                     "paymentMethod":[
                        "PayPal"
                     ],
                     "autoPay":[
                        "false"
                     ],
                     "postalCode":[
                        "60131"
                     ],
                     "location":[
                        "Franklin Park,IL,USA"
                     ],
                     "country":[
                        "US"
                     ],
                     "shippingInfo":[
                        {
                           "shippingServiceCost":[
                              {
                                 "@currencyId":"USD",
                                 "__value__":"0.0"
                              }
                           ],
                           "shippingType":[
                              "FlatDomesticCalculatedInternational"
                           ],
                           "shipToLocations":[
                              "US",
                              "CA",
                              "GB",
                              "AU",
                              "AT",
                              "BE",
                              "FR",
                              "DE",
                              "IT",
                              "JP",
                              "ES",
                              "TW",
                              "NL",
                              "CN",
                              "HK",
                              "MX",
                              "DK",
                              "RO",
                              "SK",
                              "BG",
                              "CZ",
                              "FI",
                              "HU",
                              "LV",
                              "LT",
                              "MT",
                              "EE",
                              "GR",
                              "PT",
                              "CY",
                              "SI",
                              "SE",
                              "KR",
                              "ID",
                              "ZA",
                              "TH",
                              "IE",
                              "PL",
                              "RU",
                              "IL"
                           ],
                           "expeditedShipping":[
                              "false"
                           ],
                           "oneDayShippingAvailable":[
                              "false"
                           ],
                           "handlingTime":[
                              "1"
                           ]
                        }
                     ],
                     "sellingStatus":[
                        {
                           "currentPrice":[
                              {
                                 "@currencyId":"USD",
                                 "__value__":"26.95"
                              }
                           ],
                           "convertedCurrentPrice":[
                              {
                                 "@currencyId":"USD",
                                 "__value__":"26.95"
                              }
                           ],
                           "sellingState":[
                              "Active"
                           ],
                           "timeLeft":[
                              "P16DT3H12M6S"
                           ]
                        }
                     ],
                     "listingInfo":[
                        {
                           "bestOfferEnabled":[
                              "false"
                           ],
                           "buyItNowAvailable":[
                              "false"
                           ],
                           "startTime":[
                              "2016-01-15T19:43:31.000Z"
                           ],
                           "endTime":[
                              "2016-02-14T19:48:31.000Z"
                           ],
                           "listingType":[
                              "StoreInventory"
                           ],
                           "gift":[
                              "false"
                           ]
                        }
                     ],
                     "returnsAccepted":[
                        "true"
                     ],
                     "condition":[
                        {
                           "conditionId":[
                              "1000"
                           ],
                           "conditionDisplayName":[
                              "Brand New"
                           ]
                        }
                     ],
                     "isMultiVariationListing":[
                        "false"
                     ],
                     "topRatedListing":[
                        "true"
                     ]
                  }
               ]
            }
         ],
         "paginationOutput":[
            {
               "pageNumber":[
                  "1"
               ],
               "entriesPerPage":[
                  "1"
               ],
               "totalPages":[
                  "138112"
               ],
               "totalEntries":[
                  "138112"
               ]
            }
         ],
         "itemSearchURL":[
            "http:\/\/www.ebay.com\/sch\/i.html?_nkw=harry+potter&_ddo=1&_ipg=1&_pgn=1"
         ]
      }
   ]
})
Jeroen Ooms
  • 31,998
  • 35
  • 134
  • 207
J1975
  • 79
  • 5

2 Answers2

4

The problem is that your data is not json, but it is JavaScript, jsonp to be exactly. If you just want to parse the JSON data you have to strip off the padding callback function.

req <- httr::GET("http://svcs.ebay.com/services/search/FindingService/v1?OPERATION-NAME=findItemsByKeywords&SERVICE-VERSION=1.0.0&SECURITY-APPNAME=YOUR-APP-123456&GLOBAL-ID=EBAY-US&RESPONSE-DATA-FORMAT=JSON&callback=_cb_findItemsByKeywords&REST-PAYLOAD&keywords=harry%20potter&paginationInput.entriesPerPage=10")
txt <- content(req, "text")
json <- sub("/**/_cb_findItemsByKeywords(", "", txt, fixed = TRUE)
json <- sub(")$", "", json)
mydata <- jsonlite::fromJSON(json)

Extra credit: alternatively you could use an actual JavaScript engine to parse the JavaScript:

library(V8)
ctx <- V8::v8()
ctx$eval("var out;")
ctx$eval("function _cb_findItemsByKeywords(x){out = x;}")
ctx$source("http://svcs.ebay.com/services/search/FindingService/v1?OPERATION-NAME=findItemsByKeywords&SERVICE-VERSION=1.0.0&SECURITY-APPNAME=YOUR-APP-123456&GLOBAL-ID=EBAY-US&RESPONSE-DATA-FORMAT=JSON&callback=_cb_findItemsByKeywords&REST-PAYLOAD&keywords=harry%20potter&paginationInput.entriesPerPage=10")
mydata <- ctx$get("out")
Jeroen Ooms
  • 31,998
  • 35
  • 134
  • 207
  • That was so helpful. Thank you so very much. – J1975 Feb 02 '16 at 17:09
  • Found a link with additional info on JSONP. http://stackoverflow.com/questions/2067472/what-is-jsonp-all-about?rq=1 – J1975 Feb 02 '16 at 18:08
  • Can you please clarify what the "$" sign signifies in the second json statement. I understand you are trying to remove the ")" at the end, but wasn't sure of the usage of $. Please clarify. json <- sub(")$", "", json) – J1975 Feb 02 '16 at 18:31
  • 1
    Since the output of mydata is a list. I tried mydata2 <- jsonlite::stream_in(file("mydata")). But that didn't work. Please let me know if the next step is to flatten mydata, before stream_in, so I can get the data as a data.frame. Basically, I just need 2 the itemId & title fields that are nested within "item. – J1975 Feb 02 '16 at 19:33
-1

First, your json file seems to have a little issue. It should have started in the opening bracket "[".

I removed the text before it and I've tried this code, which worked perfectly:

library(rjson)
obj <- fromJSON(file = "v2.json")

That returned a list in obj with the contents of v2.json.

EDITED: Including a full functional soltion:

library(rjson)
library(stringr)
obj <- read.table("v2.json", sep = "\n", stringsAsFactors = FALSE, quote = "")

# Gets the first line with the string "[" ("\\" for scape)
firstline <- grep("\\[", obj[,1])[1]

# Gets the position of the string "[" in the line
fpos <- which(strsplit(obj[firstline, 1], "")[[1]] == "[")

# Gets the last line with the string "]"
lastline <- grep("\\]", obj[,1])
lastline <- lastline[length(lastline)]

# Gets the position of the string "]" in the line
lpos <- which(strsplit(obj[lastline, 1], "")[[1]] == "]")

# Changes the lines with the first "[" and the last "]" to keep the text
# between both (after "[" and before "]") if there is any.
obj[firstline, 1] <- str_sub(obj[firstline, 1], fpos)
obj[lastline, 1] <- str_sub(obj[lastline, 1], 1, lpos)

obj2 <- data.frame(obj[firstline:lastline, 1])
write.table(obj2, "v3.json", row.names = FALSE, col.names = FALSE, quote = FALSE)

obj3 <- fromJSON(file = "v3.json")
  • Hi Fernando - That is the data returned via the API. I may be able to use gsub and remove the initial characters up to the first square [, but then I will also need to delete the 2 braces at the end too '})'. – J1975 Jan 29 '16 at 19:08
  • Yeah, I think `gsub()` combined with a function that finds the positions of the characters "["and "]" will fit perfectly (maybe something like `which(strsplit(line, "")[[1]] == "[")` will work). – Fernando Macedo Jan 30 '16 at 19:55
  • Thanks Fernando. Could you please help explain the which statement that you provided. – J1975 Jan 30 '16 at 21:29
  • I made a mistake. It's not with `gsub()`, I was thinking in "stringr" function `str_sub()`. I edited the answer with a full solution to explain what I meant. – Fernando Macedo Jan 31 '16 at 16:03
  • @ Fernando. Thanks for the detailed explanation Fernando. However, I am still getting errors when I run the code. Note: I am using a .JSON file that has the unformatted text - that I have appended to my question. Please check. – J1975 Jan 31 '16 at 21:43