I would like to convert a DenseVector based dataframe with a single column into a new dataframe with each item in the vector in its own column using pyspark. I do not want the output to be a vector.
df = sqlContext.createDataFrame([
(1, 'a'), (2, 'a'),
(3, 'b'), (4, 'b'),
(5, 'c'), (6, 'c'),
(7, 'd'), (8, 'd'),
], schema=['value', 'name'])
xf = df.select(df["name"].alias("nam"), df["value"].alias("val"))
pf = df.join(xf, df["name"] == xf["nam"], "inner").where(xf["val"] < df["value"]).select(df["value"], xf["val"], df["name"])
from pyspark.ml.feature import VectorAssembler
assembler = VectorAssembler(inputCols=['value', "val"], outputCol="features")
selected_features = assembler.transform(pf).select('features')
selected_features.collect()
selected_features.show()