I am trying to create classes in a new column, based on existing words in another column. For that, I need to include multiple .contains() conditions. But none of the one I tried work.
def classes_creation(data):
df = data.withColumn("classes", when(data.where(F.col("MISP_RFW_Title").like('galleys') | F.col("MISP_RFW_Title").like('coffee')),"galleys") ).otherwise(lit(na))
return df
# RETURNS ERROR
def classes_creation(data):
df = data.withColumn("classes", when(col("MISP_RFW_Title").contains("galleys").contains("word"), 'galleys').otherwise(lit(na))
return df
# RETURNS COLUMN OF NA ONLY
def classes_creation(data):
df = data.withColumn("classes", when(col("MISP_RFW_Title").contains("galleys" | "word"), 'galleys').otherwise(lit(na))
return df
# RETURNS COLUMN OF NA ONLY