5

The required task is deploy the data preprocessing web application on Streamlit in which user can upload the raw dataframe and download the processed dataframe. I am trying to download the file on which data preprocessing like missing value imputation has been done but i am getting an error as below :

RuntimeError: Invalid binary data format: <class 'pandas.core.frame.DataFrame'>

I do not have any clue as to how to solve the problem. Please help me as I am new to python and StreamLit. The code is

import streamlit as st
import pandas as pd
import os
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
temp='\\temp.csv'
path=os.getcwd()
path=path+temp

def upload_csv(zxc):
    if zxc:
        df=pd.read_csv(zxc)
        st.dataframe(df)
        df.to_csv(path,index=False)
        return df
    

def upload_xlsx(zxc):
    if zxc:
        df=pd.read_excel(zxc)
        st.dataframe(df)
        df.to_csv(path,index=False)
        return df

def mvt_mean(df):
    new_df=df.fillna(df.mean())
    new_df=new_df.fillna(df.select_dtypes(include='object').mode().iloc[0])
    st.dataframe(new_df)
    return new_df


def mvt_median(df):
    new_df=df.fillna(df.median())
    new_df=new_df.fillna(df.select_dtypes(include='object').mode().iloc[0])
    st.dataframe(new_df)
    return new_df

def mvt_mode(df):
    new_df=df.fillna(df.select_dtypes(include='object').mode().iloc[0])
    st.dataframe(new_df)
    return new_df

def export_data():
    with open('temp.csv','r+') as f:
        #contents=csv.reader(f)
        df=pd.read_csv(path)
        result=mvt_mean(df)
        result.to_csv(index=False)
        st.sidebar.download_button(label='Download CSV',data=result,mime='text/csv',file_name='Download.csv')
        return



st.title('Hello Streamlit Front end for Python ')
option = st.sidebar.selectbox('DSA Application',('Data-PreProcessing', 'Data-Visualization', 'Machine-Learning'))
st.sidebar.write('You selected:', option)
if option=='Data-PreProcessing':
    format_options=['csv','xlsx']
    a=st.sidebar.radio('Choose the file type',format_options)
    if a=='csv':
        b=st.sidebar.file_uploader("Choose a file",type='csv')
        if b :
            if st.sidebar.checkbox('Upload File'):
                df=upload_csv(b)
                #if st.sidebar.checkbox('Proceed for missing value treatment'):
                opt=st.sidebar.radio('Impute Missing Value using',('Mean','Median','Mode'))
                st.sidebar.write('You selected',opt)
                if opt=='Mean':
                    mvt_mean(df)
                    
                    
                    export_data()
                elif opt=='Median':
                    mvt_median(df)
                    #st.sidebar.download_button(label='Download CSV',data=d,mime='text/csv',file_name='Download.csv')
                    export_data()
                elif opt=='Mode':
                    mvt_mode(df)
                    #st.sidebar.download_button(label='Download CSV',data=d,mime='text/csv',file_name='Download.csv')
                    export_data()
    #elif a=='xlsx':
        #a=st.sidebar.file_uploader("Choose a file",type='xlsx')
        #b=st.sidebar.button('Upload File')
        #if b:
            #df=upload_xlsx(a)
            #st.dataframe(df)
        


        
Ailurophile
  • 2,552
  • 7
  • 21
  • 46
Python Learner
  • 147
  • 2
  • 12
  • 1
    You can convert the dataframe into binary format before passing it to the download button and that should solve the problem. The example code [here](https://stackoverflow.com/a/70120061/6434970) might be helpful. – Coding Tumbleweed Jan 12 '22 at 12:34

1 Answers1

1

I had the same error as you. I solved it through :

data_as_csv= data_to_csv.to_csv(index=False).encode("utf-8")
st.download_button(
    "Download data as CSV", 
    data_as_csv, 
    "benchmark-tools.csv",
    "text/csv",
    key="download-tools-csv",
)
vvvvv
  • 25,404
  • 19
  • 49
  • 81
A. chahid
  • 184
  • 2
  • 5