0

i want to access the values in the dict but i can not do response["establishments"] because there is a strange b symbol in the dict when it was return . How can I get rid of the b symbol so i can access the dict ?

b'{"establishments":[{"FHRSID":775937,"ChangesByServerID":0,"LocalAuthorityBusinessID":"13/00068/COMM","BusinessName":"Lexington Catering - Fidessa Restaurant","BusinessType":"Restaurant/Cafe/Canteen","BusinessTypeID":1,"AddressLine1":"Block E First To Seventh Floors Dukes Court Duke Street Woking Surrey","AddressLine2":"","AddressLine3":"","AddressLine4":"","PostCode":"GU21 5BH","Phone":"","RatingValue":"5","RatingKey":"fhrs_5_cy-gb","RatingDate":"2020-01-10T00:00:00","LocalAuthorityCode":"315","LocalAuthorityName":"Woking","LocalAuthorityWebSite":"http://www.woking.gov.uk","LocalAuthorityEmailAddress":"emma.bourne@woking.gov.uk","scores":{"Hygiene":0,"Structural":0,"ConfidenceInManagement":0},"SchemeType":"FHRS","geocode":{"longitude":"-0.554158","latitude":"51.320771"},"RightToReply":"","Distance":null,"NewRatingPending":false,"meta":{"dataSource":null,"extractDate":"0001-01-01T00:00:00","itemCount":0,"returncode":null,"totalCount":0,"totalPages":0,"pageSize":0,"pageNumber":0},"links":[{"rel":"self","href":"https://api.ratings.food.gov.uk/establishments/775937"}]},{"FHRSID":1474143,"ChangesByServerID":0,"LocalAuthorityBusinessID":"22/00013/COMM","BusinessName":"Duke\'s Bar & Deli","BusinessType":"Other catering premises","BusinessTypeID":7841,"AddressLine1":"Dukes Bar And Deli 3 Duke Street Woking Surrey","AddressLine2":"","AddressLine3":"","AddressLine4":"","PostCode":"GU21 5BH","Phone":"","RatingValue":"4","RatingKey":"fhrs_4_cy-gb","RatingDate":"2022-02-24T00:00:00","LocalAuthorityCode":"315","LocalAuthorityName":"Woking","LocalAuthorityWebSite":"http://www.woking.gov.uk","LocalAuthorityEmailAddress":"emma.bourne@woking.gov.uk","scores":{"Hygiene":10,"Structural":0,"ConfidenceInManagement":10},"SchemeType":"FHRS","geocode":{"longitude":null,"latitude":null},"RightToReply":"","Distance":null,"NewRatingPending":false,"meta":{"dataSource":null,"extractDate":"0001-01-01T00:00:00","itemCount":0,"returncode":null,"totalCount":0,"totalPages":0,"pageSize":0,"pageNumber":0},"links":[{"rel":"self","href":"https://api.ratings.food.gov.uk/establishments/1474143"}]}],"meta":{"dataSource":"Lucene","extractDate":"2022-09-03T00:50:19.6362148+01:00","itemCount":2,"returncode":"OK","totalCount":2,"totalPages":1,"pageSize":5000,"pageNumber":1},"links":[{"rel":"self","href":"https://api.ratings.food.gov.uk/establishments?address=gu21%205bh"}]}'

here is my code

headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36",
    "Upgrade-Insecure-Requests": "1",
    "DNT": "1",
    # "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
    "Accept-Language": "cy-GB",
    "Accept-Encoding": "gzip, deflate, br",
    "x-api-version": "2",
    # 'Content-Type': 'application/json'
}

def get_web_page_content(post_code):
    print('post_code', post_code)
    BOMS = [
        codecs.BOM,
        codecs.BOM_BE,
        codecs.BOM_LE,
        codecs.BOM_UTF8,
        codecs.BOM_UTF16,
        codecs.BOM_UTF16_BE,
        codecs.BOM_UTF16_LE,
        codecs.BOM_UTF32,
        codecs.BOM_UTF32_BE,
        codecs.BOM_UTF32_LE,
    ]

    url = rf'https://api.ratings.food.gov.uk/Establishments?address={post_code}'
    time.sleep(5)
    response = requests.get(url, headers=headers)
    data = response.content

    for BOM in BOMS:
        if data.startswith(BOM):
            data = json.loads(data[len(BOM):])
            break
    return data


if __name__ == '__main__':
    response = get_web_page_content('GU21 5BH')
    print('response', response)

Code Ninja
  • 157
  • 4
  • 13
  • The `data` does not start with any of the `BOM`s, because **it is not a string** (this is the first thing that should be learned about a) how the Internet works; b) Python 3.x's **correct** treatment of bytes vs text). You are **not supposed to** decode the JSON yourself - that's one of the most important selling points of the Requests library, and something [clearly shown as an example on the front page of the documentation](https://requests.readthedocs.io/en/latest/), in the first code block, right at the top of the page. – Karl Knechtel Sep 03 '22 at 00:13

3 Answers3

1

The "strange" b'...' symbol means you have got bytes object - that's what response.content returns. To decode json string use response.json():

import requests

headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36",
    "Upgrade-Insecure-Requests": "1",
    "DNT": "1",
    "Accept-Language": "cy-GB",
    "Accept-Encoding": "gzip, deflate, br",
    "x-api-version": "2",
}


def get_web_page_content(post_code):
    print("post_code", post_code)

    url = rf"https://api.ratings.food.gov.uk/Establishments?address={post_code}"
    response = requests.get(url, headers=headers)
    return response.json()  # <-- use .json() here to parse JSON response


if __name__ == "__main__":
    response = get_web_page_content("GU21 5BH")
    print("response", response)

Prints:

post_code GU21 5BH
response {'establishments': [{'FHRSID': 775937, 'ChangesByServerID': 0, 'LocalAuthorityBusinessID': '13/00068/COMM', 'BusinessName': 'Lexington Catering - Fidessa Restaurant', 'BusinessType': 'Restaurant/Cafe/Canteen', 'BusinessTypeID': 1, 'AddressLine1': 'Block E First To Seventh Floors Dukes Court Duke Street Woking Surrey', 'AddressLine2': '', 'AddressLine3': '', 'AddressLine4': '', 'PostCode': 'GU21 5BH', 'Phone': '', 'RatingValue': '5', 'RatingKey': 'fhrs_5_cy-gb', 'RatingDate': '2020-01-10T00:00:00', 'LocalAuthorityCode': '315', 'LocalAuthorityName': 'Woking', 'LocalAuthorityWebSite': 'http://www.woking.gov.uk', 'LocalAuthorityEmailAddress': 'emma.bourne@woking.gov.uk', 'scores': {'Hygiene': 0, 'Structural': 0, 'ConfidenceInManagement': 0}, 'SchemeType': 'FHRS', 'geocode': {'longitude': '-0.554158', 'latitude': '51.320771'}, 'RightToReply': '', 'Distance': None, 'NewRatingPending': False, 'meta': {'dataSource': None, 'extractDate': '0001-01-01T00:00:00', 'itemCount': 0, 'returncode': None, 'totalCount': 0, 'totalPages': 0, 'pageSize': 0, 'pageNumber': 0}, 'links': [{'rel': 'self', 'href': 'https://api.ratings.food.gov.uk/establishments/775937'}]}, {'FHRSID': 1474143, 'ChangesByServerID': 0, 'LocalAuthorityBusinessID': '22/00013/COMM', 'BusinessName': "Duke's Bar & Deli", 'BusinessType': 'Other catering premises', 'BusinessTypeID': 7841, 'AddressLine1': 'Dukes Bar And Deli 3 Duke Street Woking Surrey', 'AddressLine2': '', 'AddressLine3': '', 'AddressLine4': '', 'PostCode': 'GU21 5BH', 'Phone': '', 'RatingValue': '4', 'RatingKey': 'fhrs_4_cy-gb', 'RatingDate': '2022-02-24T00:00:00', 'LocalAuthorityCode': '315', 'LocalAuthorityName': 'Woking', 'LocalAuthorityWebSite': 'http://www.woking.gov.uk', 'LocalAuthorityEmailAddress': 'emma.bourne@woking.gov.uk', 'scores': {'Hygiene': 10, 'Structural': 0, 'ConfidenceInManagement': 10}, 'SchemeType': 'FHRS', 'geocode': {'longitude': None, 'latitude': None}, 'RightToReply': '', 'Distance': None, 'NewRatingPending': False, 'meta': {'dataSource': None, 'extractDate': '0001-01-01T00:00:00', 'itemCount': 0, 'returncode': None, 'totalCount': 0, 'totalPages': 0, 'pageSize': 0, 'pageNumber': 0}, 'links': [{'rel': 'self', 'href': 'https://api.ratings.food.gov.uk/establishments/1474143'}]}], 'meta': {'dataSource': 'Lucene', 'extractDate': '2022-09-03T00:59:03.9485729+01:00', 'itemCount': 2, 'returncode': 'OK', 'totalCount': 2, 'totalPages': 1, 'pageSize': 5000, 'pageNumber': 1}, 'links': [{'rel': 'self', 'href': 'https://api.ratings.food.gov.uk/establishments?address=gu21%205bh'}]}
Andrej Kesely
  • 168,389
  • 15
  • 48
  • 91
1

You can access response in different formats e.g bytes, json or raw. Here in your case you are using the bytes way that's why the result is in byte which prefixed with b, if you want to access it as dict like syntax i.e response["establishments"] then you need to use response.json()

Like,

import requests
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36",
    "Upgrade-Insecure-Requests": "1",
    "DNT": "1",
    # "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
    "Accept-Language": "cy-GB",
    "Accept-Encoding": "gzip, deflate, br",
    "x-api-version": "2",
    #'Content-Type': 'application/json'
}

def get_web_page_content(post_code):
    print('post_code', post_code)

    url = rf'https://api.ratings.food.gov.uk/Establishments?address={post_code}'
    response = requests.get(url, headers=headers)
    data = response.json()
    return data

if __name__ == '__main__':
    response = get_web_page_content('GU21 5BH')
    print('response', response)
    print('establishments', response["establishments"])
A l w a y s S u n n y
  • 36,497
  • 8
  • 60
  • 103
0

You can get the JSON using response.json:

print("response", response.json)

The strange b thingy represents that it is a bytes object.

bichanna
  • 954
  • 2
  • 21