You should combine the required files together using spark before hand then finally extract the results
Data Preparation
input_str = """
A|09:00:00 |Not started|
B|09:30:00 |Completed|
C|09:30:00 |Running
""".split("|")
input_values = list(map(lambda x: x.strip() if x.strip() != 'null' else None, input_str))
cols = list(map(lambda x: x.strip() if x.strip() != 'null' else None, "Job_name start_time status".split()))
n = len(input_values)
n_cols = 3
input_list = [tuple(input_values[i:i+n_cols]) for i in range(0,n,n_cols)]
sparkDF = sql.createDataFrame(input_list, cols)
sparkDF.show()
+--------+----------+-----------+
|Job_name|start_time| status|
+--------+----------+-----------+
| A| 09:00:00|Not started|
| B| 09:30:00| Completed|
| C| 09:30:00| Running|
+--------+----------+-----------+
Collect
[x['output'] for x in
sparkDF.select(F.concat(
F.lit("job "),F.col('Job_name'),F.lit(" "),F.col("status"),F.lit(" at "),F.col('start_time')
).alias("output")
).collect()
]
['job A Not started at 09:00:00',
'job B Completed at 09:30:00',
'job C Running at 09:30:00']