I am calculating age from birth date in pyspark :
def run(first):
out = spark.sql("""
SELECT
p.birth_date,
FROM table1 p
LEFT JOIN table2 a USING(id)
LEFT JOIN table2 m ON m.id = p.id
LEFT JOIN table4 i USING(id))"""
out = out.withColumn('month', F.lit(first))
out = out.withColumn('age',
F.when(F.col('birth_date').isNull(), None).otherwise(
F.floor(F.datediff(
F.col('month'), F.col('birth_date'))/365.25)))
I get the following error at this line:
F.col('month'), F.col('birth_date'))/365.25)))
TypeError: unsupported operand type(s) for -: 'DataFrame' and 'DataFrame'
Any ideas on how to resolve this ?