Say I have a DataFrame looks like the following,
df = pd.DataFrame({
'Name': ['', 'Dave', 'Tom'],
'Age':[20, 30, None],
'Job':['singer', 'teacher', 'student'],
})
and I'd like to make a function that can help me filter the value I want for analysis
def filt(name=False, age=False, job=False) -> pd.DataFrame:
'''
this function return the dataframe with selected param
'''
if name:
df = df[df.Name.notna()]
if age:
df = df[df.Age.notna()]
if job:
df = df[df.Job.notna()]
return df
# Expected Usage: return the dataframe where "Name" and "Age" should not be empty.
filt(df, name=True, age=True)
In real world I would like to have at least 10 kwargs from case to case, and this code seems to be redundant. What is the convention to set param without using too many if-else statements?
EDIT: sorry for misunderstood with filtering na values only. There's also cases like " filter job=='students' ", and the default param would be all of them.
# i.e. How could the function possible looks like
def filt(df, name=False, job=False, age=False,
selected_name=None, selected_job=None):
# make the default selected_name value to all of them
selected_name = tuple(df.Name.tolist()) if not selected_name else tuple(selected_name) #tuple() cause dict keys cannot be list
selected_job = tuple(df.Job.tolist()) if not selected_job else tuple(selected_job) #tuple() cause dict keys cannot be list
# Try to make dict when keys=True, use the values as conditions
filtlist = {
name: df.Name!='',
age : df.Age!='',
job : df.Job!='',
selected_name: df.Name.isin(selected_name),
selected_job: df.Job.isin(selected_job)
}
x = [filtlist[i] for i in filtlist.keys() if i]
# since I pass in 3 kwargs, len(x)=3, i have 3 conditions.
# ^ I stuck here, How do I know concat those conditions together without knowing how many conditions I have?
# '&'.join(x) returns a string, I cannot do this
return df[x[0] & x[1] & x[2]] # I manually do this cause I know I have 3 kwargs. just to make sample run.
filt(df,name=True, age=True, selected_job=['teacher']) #yeah...for some reason name=True does not seems to work...`