Create a map for the age
column and use map_concat
to merge the two maps.
# input data
data_sdf.show()
# +---+-----------------+---+
# | id| col_1|age|
# +---+-----------------+---+
# | 1| {name -> James}| 20|
# | 2|{name -> Charlie}| 29|
# +---+-----------------+---+
data_sdf. \
withColumn('col_1_updated',
func.map_concat('col_1', func.create_map(func.lit('age'), func.col('age')))
). \
show(truncate=False)
# +---+-----------------+---+----------------------------+
# |id |col_1 |age|col_1_updated |
# +---+-----------------+---+----------------------------+
# |1 |{name -> James} |20 |{name -> James, age -> 20} |
# |2 |{name -> Charlie}|29 |{name -> Charlie, age -> 29}|
# +---+-----------------+---+----------------------------+
If there are multiple columns and you want to create the map dynamically, you can use reduce
to create the input to the create_map
.
cols_to_map = ['age', 'gender', 'email']
input_to_create_map = reduce(lambda x, y: x + y, [[func.lit(k), func.col(k)] for k in cols_to_map])
data_sdf. \
withColumn('col_1_updated',
func.map_concat('col_1',
func.create_map(*input_to_create_map)
)
). \
show(truncate=False)
# +---+-----------------+---+------+-----------+---------------------------------------------------------------+
# |id |col_1 |age|gender|email |col_1_updated |
# +---+-----------------+---+------+-----------+---------------------------------------------------------------+
# |1 |{name -> James} |20 |M |abc@xyz.com|{name -> James, age -> 20, gender -> M, email -> abc@xyz.com} |
# |2 |{name -> Charlie}|29 |M |foo@bar.com|{name -> Charlie, age -> 29, gender -> M, email -> foo@bar.com}|
# +---+-----------------+---+------+-----------+---------------------------------------------------------------+