I have created the following data and output based on the provided link
the program with pyspark DataFrame API would like the following:
# imports
from pyspark.sql import functions as F
from pyspark.sql.window import Window
# replicating the data
cols = ['ID','LIST_VALUES']
row_1 = [1,['a','b','c']]
row_2 = [2,['a','n','t']]
row_3 = [3,['x']]
row_4 = [4, ['h','h']]
rows = [row_1, row_2,row_3,row_4]
df_1 = spark.createDataFrame(rows, cols)
cols = ['VALUE','MAPPING']
row_1 = ['a','alpha']
row_2 = ['b', 'bravo']
row_3 = ['c', 'charlie']
row_4 = ['n', 'november']
row_5 = ['h', 'hotel']
row_6 = ['t', 'tango']
row_7 = ['x', 'xray']
rows = [row_1, row_2,row_3,row_4, row_5, row_6, row_7]
df_a = spark.createDataFrame(rows, cols)
# we need to explode the LIST_VALUES Column first
df_1 = df_1.withColumn("EXP_LIST_VALUES",F.explode(F.col('LIST_VALUES')))
df_2 = df_1.select('ID','EXP_LIST_VALUES')
# then we can do a left join with df_2 and df_a
df_new = df_a.join(df_2,df_a.VALUE == df_2.EXP_LIST_VALUES,'left')
# applying a window functions
df_output = df_new.select(F.col('ID'),
F.collect_set(F.col('VALUE')).over(Window.partitionBy(F.col('ID'))).alias('LIST_VALUES'), \F.array_join(F.collect_set(F.col('MAPPING')).over(Window.partitionBy(F.col('ID'))),',').alias('new_col')).dropDuplicates()
display(df_output)
The output looks like the following dataframe
# +---+-----------+--------------------+
# | ID|LIST_VALUES| new_col|
# +---+-----------+--------------------+
# | 1|[c, b, a] | bravo,charlie,alpha|
# | 2|[t, n, a] |november,tango,alpha|
# | 3| [x] | xray|
# | 4| [h] | hotel|
# +---+-----------+--------------------|