You cannot simply update that column. But what you can do is
create a new column using the StringIndexer
delete the original column
rename the new column with the name of the original column
You can use this code
from pyspark.ml.feature import StringIndexer
import pyspark.sql.functions as F
df = spark.createDataFrame([['a', 1], ['b', 1], ['c', 2], ['b', 5]], ['WindGustDir', 'value'])
df.show()
# +-----------+-----+
# |WindGustDir|value|
# +-----------+-----+
# | a| 1|
# | b| 1|
# | c| 2|
# | b| 5|
# +-----------+-----+
# 1. create new column
label_stringIdx = StringIndexer(inputCol ="WindGustDir", outputCol = "WindGustDir_index")
label_stringIdx_model = label_stringIdx.fit(df)
df = label_stringIdx_model.transform(df)
# 2. delete original column
df = df.drop("WindGustDir")
# 3. rename new column
to_rename = ['WindGustDir_index', 'value']
replace_with = ['WindGustDir', 'value']
mapping = dict(zip(to_rename, replace_with))
df = df.select([F.col(c).alias(mapping.get(c, c)) for c in to_rename])
df.show()
# +-----------+-----+
# |WindGustDir|value|
# +-----------+-----+
# | 1.0| 1|
# | 0.0| 1|
# | 2.0| 2|
# | 0.0| 5|
# +-----------+-----+