The required task is deploy the data preprocessing web application on Streamlit in which user can upload the raw dataframe and download the processed dataframe. I am trying to download the file on which data preprocessing like missing value imputation has been done but i am getting an error as below :
RuntimeError: Invalid binary data format: <class 'pandas.core.frame.DataFrame'>
I do not have any clue as to how to solve the problem. Please help me as I am new to python and StreamLit. The code is
import streamlit as st
import pandas as pd
import os
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
temp='\\temp.csv'
path=os.getcwd()
path=path+temp
def upload_csv(zxc):
if zxc:
df=pd.read_csv(zxc)
st.dataframe(df)
df.to_csv(path,index=False)
return df
def upload_xlsx(zxc):
if zxc:
df=pd.read_excel(zxc)
st.dataframe(df)
df.to_csv(path,index=False)
return df
def mvt_mean(df):
new_df=df.fillna(df.mean())
new_df=new_df.fillna(df.select_dtypes(include='object').mode().iloc[0])
st.dataframe(new_df)
return new_df
def mvt_median(df):
new_df=df.fillna(df.median())
new_df=new_df.fillna(df.select_dtypes(include='object').mode().iloc[0])
st.dataframe(new_df)
return new_df
def mvt_mode(df):
new_df=df.fillna(df.select_dtypes(include='object').mode().iloc[0])
st.dataframe(new_df)
return new_df
def export_data():
with open('temp.csv','r+') as f:
#contents=csv.reader(f)
df=pd.read_csv(path)
result=mvt_mean(df)
result.to_csv(index=False)
st.sidebar.download_button(label='Download CSV',data=result,mime='text/csv',file_name='Download.csv')
return
st.title('Hello Streamlit Front end for Python ')
option = st.sidebar.selectbox('DSA Application',('Data-PreProcessing', 'Data-Visualization', 'Machine-Learning'))
st.sidebar.write('You selected:', option)
if option=='Data-PreProcessing':
format_options=['csv','xlsx']
a=st.sidebar.radio('Choose the file type',format_options)
if a=='csv':
b=st.sidebar.file_uploader("Choose a file",type='csv')
if b :
if st.sidebar.checkbox('Upload File'):
df=upload_csv(b)
#if st.sidebar.checkbox('Proceed for missing value treatment'):
opt=st.sidebar.radio('Impute Missing Value using',('Mean','Median','Mode'))
st.sidebar.write('You selected',opt)
if opt=='Mean':
mvt_mean(df)
export_data()
elif opt=='Median':
mvt_median(df)
#st.sidebar.download_button(label='Download CSV',data=d,mime='text/csv',file_name='Download.csv')
export_data()
elif opt=='Mode':
mvt_mode(df)
#st.sidebar.download_button(label='Download CSV',data=d,mime='text/csv',file_name='Download.csv')
export_data()
#elif a=='xlsx':
#a=st.sidebar.file_uploader("Choose a file",type='xlsx')
#b=st.sidebar.button('Upload File')
#if b:
#df=upload_xlsx(a)
#st.dataframe(df)