-3

I am trying to extract some key-value pairs from the results of this code.

response.xpath('/html/body/script[contains(.,pageType)]').extract()

The type is "list" and I need to convert it to a dict. My initial approach was to convert it to a string but then I found out I still cannot access the key-value pairs with something a key["value"] approach.

I have been suggested to find the pattern with regex, convert it with json.load to obtain a python dict object. But I cannot seem to find a way to find the pattern with regex.

How should I isolate with regex the key-value variables? (ex.: key "price" and value "34,95" and then store them into a dict? Thanks for your help!

<script type = "text/javascript" > dataLayer.push({
    "pageType": "productPage", "ecommerce": {
        "currencyCode": "EUR",
        "detail": {
            "actionField": {"list": "Detail", "action": "detail"},
            "products": [{
                "name": "Desodorante Spray Alien",
                "id": "10483558",
                "price": "34,95",
                "brand": "MUGLER",
                "category": "higiene\/desodorantes",
                "variant": "100 ML",
                "dimension5": "Mucho stock",
                "dimension6": "Unisex",
                "dimension7": "CLARINS SPAIN, S.A",
                "dimension8": "No",
                "dimension9": "",
                "metric2": 1
            }]
        },
        "impressions": [{
            "name": "Angel men recarga Eau de Toilette",
            "id": "10059432",
            "price": "47.95",
            "brand": "MUGLER",
            "category": "perfumes_hombre_edt",
            "variant": "100 ML ",
            "list": "you may want",
            "position": 1
        }, {
            "name": "Angel men rubber flask Eau de Toilette",
            "id": "10351154",
            "price": "42.95",
            "brand": "MUGLER",
            "category": "perfumes_hombre_edt",
            "variant": "50 ML ",
            "list": "you may want",
            "position": 2
        }, {
            "name": "Alien Shower Milk",
            "id": "10483565",
            "price": "26.00",
            "brand": "MUGLER",
            "category": "higiene_geles",
            "variant": "200 ML ",
            "list": "you may want",
            "position": 3
        }, {
            "name": "Amen Desodorante en Stick",
            "id": "10532706",
            "price": "21.95",
            "brand": "MUGLER",
            "category": "hombre_desodorantes",
            "variant": "75 ML ",
            "list": "you may want",
            "position": 4
        }]
    }
});
(window["rrApiOnReady"] = window["rrApiOnReady"] || []).push(function () {
    retailrocket.productsGroup.post({
        "groupId": 10483558,
        "name": "Desodorante Spray Alien",
        "price": 34.95,
        "pictureUrl": "https://ima.douglas.es/img/1467/desodorante_spray_alien-0-.png",
        "url": "https://douglas.es/p/mugler/desodorante_spray_alien",
        "isAvailable": true,
        "categoryPaths": ["Higiene/Corporal", "Corporal", "Corporal/Higiene", "Higiene", "Higiene/Desodorante", "Marca/Mugler"],
        "description": "El elixir de feminidad y de sensualidad del Eau de Parfum Alien en su versión desodorante en spray. Déjate envolver con los mismos acordes de la fragancia.",
        "vendor": "MUGLER",
        "products": {
            "10483558": {
                "isAvailable": true,
                "name": "Desodorante Spray Alien",
                "size": "100",
                "url": "https://douglas.es/p/mugler/desodorante_spray_alien",
                "pictureUrl": "https://ima.douglas.es/img/1467/desodorante_spray_alien-0-.png",
                "price": 34.95,
                "oldPrice": 34.95,
                "params": {}
            }
        },
        "params": {"medida": "ML", "subTitle": "Todo tipo de piel"},
        "model": "Desodorante Spray Alien",
        "typePrefix": "higiene_desodorantes",
        "oldPrice": 34.95
    });
    rrApi.groupView([10483558]);
});
App.page.webshop = "DOU";
App.page.warehouse = ["ALM"];
App.page.codPostal = "";
</script>
L_S_P
  • 5
  • 1

1 Answers1

0

You can use this regex to capture the JSON string:

dataLayer\.push\((.+?)\);

and then use Python to parse the contents of \1

https://regex101.com/r/cVXxRA/1/

MonkeyZeus
  • 20,375
  • 4
  • 36
  • 77