0

Essentially I am able to do what I want with my current code, which is upload a csv file, manipulate it with pandas and then update a MSQL Database. I would like to add error handling somehow. Essentially the upload function will only work for one particular file and throw different errors for all others.

Is there a way that I can catch multiple errors and return an error message to the user ?

Possibly something like a check on the input csv file column headers.

@app.route('/upload', methods =['GET', 'POST'])
def csv_input():
tempfile_path = tempfile.NamedTemporaryFile().name
#file.save(tempfile_path)
#sheet = pd.read_csv(tempfile_path)
if request.method == 'POST':
    file = request.files['file']
    if file: #and allowed_filename(file.filename):
        #filename = secure_filename(file.filename)
        file.save(tempfile_path)
        input_csv = pd.read_csv(tempfile_path,sep=",", engine='python')

        #### Data Cleansing From Uploded Data
        col_titles = ['id','title','vote_average','w_average','vote_count','year','runtime',
          'budget','revenue','profit']
        # Only Keep Data where the Original Language is English
        input_csv = input_csv[input_csv['original_language']=='en']
        # New Dataframe that only contains data with vote count > 10 
        input_csv = input_csv[input_csv['vote_count'] >= 10]
        # Fill all NA values to 0 - Needed to set datatypes
        input_csv = input_csv.fillna(0)
        # Remove all Rows with no Runtime
        input_csv = input_csv[input_csv['runtime']!=0]
        # Revmove all duplciate Rows
        input_csv = input_csv.drop_duplicates()

        input_csv['vote_average'] = input_csv.vote_average.astype(float).round(1)
        input_csv.vote_average.round(1)
        input_csv['runtime'] = input_csv.runtime.astype(int)
        input_csv['vote_count'] = input_csv.vote_count.astype(int)
        input_csv['revenue'] = input_csv.revenue.astype('int64')
        input_csv['budget'] = input_csv.budget.astype('int64')

        profit_cal(input_csv,'revenue','budget','profit')

        input_csv['profit']=input_csv.profit.astype('int64')
        input_csv['profit']=input_csv.profit.replace(0,'No Data')

        #reorder_data = pd.DataFrame(input_csv)
        # Year Cleaning
        input_csv['year'] = pd.to_datetime(input_csv['release_date'], errors='coerce').apply(lambda x: str(x).split('-')[0] if x != np.nan else np.nan)
        #C = reorder_data['vote_average'].mean()
        #m = reorder_data['vote_count'].quantile(0.10)
        #w_average = org_data.copy().loc[reorder_data['vote_count'] >= m]

        #### IMDB Data Calculation
        V = input_csv['vote_count']
        R = input_csv['vote_average']
        C = input_csv['vote_average'].mean()
        m = input_csv['vote_count'].quantile(0.10)
        input_csv['w_average'] = (V/(V+m) * R) + (m/(m+V) * C)


        #C = input_csv['vote_average'].mean()
        #m = input_csv['vote_count'].quantile(0.10)

        #input_csv['w_average'] = input_csv.apply(weighted_rating, axis = 1)
        input_csv['w_average'] = input_csv.w_average.astype(float).round(1)



        reorder_data = input_csv[col_titles]
        reorder_data.to_sql(name='title_data', con=engine, if_exists = 'replace', index=False)    
        # Reorder the data and output in the correct order

        ##### Genre Loads == DataFrame 2
        df = input_csv
        v = df.genres.apply(json.loads)

        df = pd.DataFrame(
        {
            'id' : df['id'].values.repeat(v.str.len(), axis=0),
            'genre' : np.concatenate(v.tolist())
        })

        df['genre'] = df['genre'].map(lambda x: x.get('name'))

        genre_data = df.genre.str.get_dummies().sum(level=0)

        genre_data = df.loc[(df!=0).any(1)]
        #genre_data = genre_data.set_index('id')

        genre_order = ['id','genre']

        ## Dataframw to SQL
        genre_data[genre_order].to_sql(name='genre_data', con=engine, if_exists = 'replace', index=False) 
        ####### Keyword Search ### Dataframe

        #genre_data.to_csv("genre_data.csv")

        #return genre_data[genre_order].to_html()

        flash('Database has been updated successfully','success')
        #return reorder_data[col_titles].to_html()
        #stream = io.StringIO(file.stream.read().decode("UTF8"), newline=None)
        #csv_input = csv.reader(stream)
        #return reorder_data.to_html(index=False)
        #flash('File Uploaded Successfully')
        #return redirect(url_for('index'))
return render_template('upload.html')

1 Answers1

0

There are several methods:

  1. The Python way, just add try: and except with the relevant exception classes.

    try:
        # parsing & processing logic here
        pass
    except EmptyDataError as ex:
        # tell user we don't except empty data
        pass
    except ParserError as ex:
        # tell user we failed to parse their input
        pass
    except Exception as ex:
        # tell user that something went wrong.
        pass
    
  2. The Flask way, register error handlers with flask for specific exceptions (this effects the whole flask application):

    @app.errorhandler(pandas.errors.EmptyDataError)
    def handle_empty_data():
        return 'Failed parsing Input', 200