Find out the start date and end date of week in pyspark dataframe. Monday being the first day of week.
def add_start_end_week(dataframe, timestamp_col, StartDate, EndDate):
""""
Function:
Get the start date and the end date of week
args
dataframe: spark dataframe
column_name: timestamp column based on which we have to calculate the start date and end date
StartDate: start date column name of week
EndDate: end date column name of week
"""
dataframe = dataframe.withColumn(
'day_of_week', dayofweek(col(timestamp_col)))
# start of the week (Monday as first day)
dataframe = dataframe.withColumn('StartDate',when(col("day_of_week")>1, \
expr("date_add(date_sub({},day_of_week-1),1)".format(timestamp_col))). \
otherwise(expr("date_sub({},6)".format(timestamp_col))))
#End of the Week
dataframe = dataframe.withColumn('EndDate',when(col("day_of_week")>1, \
expr("date_add(date_add({},7-day_of_week),1)".format(timestamp_col))). \
otherwise(col("{}".format(timestamp_col))))
return dataframe
Validate the above function:
df = spark.createDataFrame([('2021-09-26',),('2021-09-25',),('2021-09-24',),('2021-09-23',),('2021-09-22',),('2021-09-21',),('2021-09-20',)], ['dt'])
dataframe = df.withColumn('day_of_week', dayofweek(col('dt')))
# start of the week (Monday as first day)
dataframe = dataframe.withColumn('StartDate',when(col("day_of_week")>1,expr("date_add(date_sub(dt,day_of_week-1),1)")).otherwise(expr("date_sub(dt,6)")))
#End of the Week
dataframe = dataframe.withColumn('EndDate',when(col("day_of_week")>1,expr("date_add(date_add(dt,7-day_of_week),1)")).otherwise(col("dt")))