I'm trying to implement a udf which takes input Dataframe and column name every record in the input column has to parse through all the regular expression pattern. I'm new to pyspark.
import re
from pyspark.sql.functions import udf
def group_nm_transfrom(inDF,column_name):
column_name = re.findall(r's/ AND / /', column_name)
column_name = re.findall(r's/ ADVANCED | ADVANCE / ADV /', olumn_name)
column_name = re.findall(r's/ ASC | ASSOCI | ASSC | ASSOCIAT | ASSOCIA | ASSO | ASSOCS | AS | ASSOCIATES / ASSOC /', column_name)
return matches[0] if matches else None