How to solve the KeyError: "None of [Index([....]) are in the [columns]"

Question

Here is my code, it may error and stop when running. Not always, maybe I get an error on the hundredth run. I do not understand what is the reason. I know that there is a problem when reading the csv file.

Desired result: I want to be able to at least use the try catch function and not stop the launch. In case of error, continue running.

import json
from datetime import datetime
import pandas as pd
import requests
from bs4 import BeautifulSoup
import time
import os
import urllib.request
import urllib.parse
import smtplib
from requests.exceptions import ConnectionError
from requests.packages.urllib3.exceptions import MaxRetryError
from requests.packages.urllib3.exceptions import ProxyError as urllib3_ProxyError


def calc():
    json_url = "https://www.example.com/........"
    proxies = {
        'http': '......',
        'https': '.........',
    }
    headers = requests.utils.default_headers()
    headers.update(
        {
            'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'
            })

    try:
        s = requests.Session()
        response = s.get(json_url, proxies=proxies, headers=headers, timeout=15)
        soup = BeautifulSoup(response.content, 'html.parser')

        try:
            json.loads(soup.text)
            result = json.loads(soup.text)
            result = pd.json_normalize(result["listings"])
            result = result[['itemId', 'title', 'attributes', 'vipUrl',
                             'priceInfo.priceCents', 'priceInfo.priceType']]

            result = result.join(pd.json_normalize(result["attributes"]))

            result = result[['itemId', 'title', 'vipUrl', 'priceInfo.priceCents',
                             'priceInfo.priceType', 'constructionYear', 'mileage', 'fuel',
                             'transmission']]

            try:
                df = pd.read_csv(r'filecsv.csv')   #Maybe that's where the problem lies.
                df = df.rename(columns={'title': 'title_1', 'vipUrl': 'vipUrl_1', 'priceInfo.priceCents': 'pp',
                                        'priceInfo.priceType': 'dd',
                                        'constructionYear': 'Year_1', 'mileage': 'mileage_1', 'fuel': 'fuel_1',
                                        'transmission': 'transmission_1'})
                filter_df = result.merge(df, how='outer', on='itemId')
                filter_df = filter_df[filter_df['dd'].isnull()]
                filter_df = filter_df[['itemId', 'title', 'vipUrl', 'priceInfo.priceCents', 'priceInfo.priceType',
                                       'constructionYear', 'mileage', 'fuel', 'transmission']]

                Max_ID = df['itemId'].str.extract('(\d+)').astype('int').max()[0]
                Max_ID_Filter = filter_df['itemId'].str.extract('(\d+)').astype('int').max()[0]
                print(Max_ID_Filter, Max_ID)
                if (Max_ID_Filter > Max_ID):
                    print("Yes")
                    for index, row in filter_df.iterrows():
                        apiToken = '63691.....'
                        chatID =[630.......] 
                        for i in chatID:
                            apiURL = f'https://api.telegram.org/bot{apiToken}/sendMessage'

                            try:
                                msgg = 'Ok'
                                response = requests.post(apiURL, json={'chat_id':i, 'text':msgg})
                                
                            except Exception as e:
                                print(e)
                else:
                    print("No")

                df = df.rename(columns={'title_1': 'title', 'vipUrl_1': 'vipUrl', 'pp': 'priceInfo.priceCents',
                                        'dd': 'priceInfo.priceType',
                                        'Year_1': 'constructionYear', 'mileage_1': 'mileage', 'fuel_1': 'fuel',
                                        'transmission_1': 'transmission'})
                df = pd.concat([df, filter_df])
                df.to_csv("filecsv.csv", encoding='utf-8-sig', index=False)
            except KeyError:
                pass
            except:
                result.to_csv("filecsv.csv", encoding='utf-8-sig', index=False)

        except json.JSONDecodeError as e:
            print(e.msg, e)
            print(repr(soup))
            print(len(soup))
    except ConnectionError as ce:
        if (isinstance(ce.args[0], MaxRetryError) and
                isinstance(ce.args[0].reason, urllib3_ProxyError)):
            pass
    except requests.exceptions.Timeout:
        pass
    except requests.exceptions.ChunkedEncodingError:
        pass
    time.sleep(6)




stop = 1
while stop > 0:
    #stop = stop - 1
    calc()

i try usa try catch function, however i do not get done.

Can you post the full traceback? Anytime you construct a `response`, you probably want to check `response.ok` before using it in a more meaningful way. — JonSG, Jul 24 '23 at 13:39
Generally this error means that a specific column does not exists in the dataframe — gtomer, Jul 24 '23 at 13:47
@Tornikek I'm speculating that you are actually getting an API error response rather than your data one out of 100 times. — JonSG, Jul 24 '23 at 14:59

How to solve the KeyError: "None of [Index([....]) are in the [columns]"

0 Answers0