from pyspark import SparkContext, SparkConf
from pyspark.sql import SQLContext
conf = SparkConf().setAppName("myApp").setMaster("local")
sc = SparkContext(conf=conf)
a = sc.parallelize([[1, "a"], [2, "b"], [3, "c"], [4, "d"], [5, "e"]]).toDF(['ind', "state"])
a.show()
Results in:
Traceback (most recent call last):
File "/Users/ktemlyakov/messing_around/SparkStuff/mock_maersk_data.py", line 7, in <module>
a = sc.parallelize([[1, "a"], [2, "b"], [3, "c"], [4, "d"], [5, "e"]]).toDF(['ind', "state"])
AttributeError: 'RDD' object has no attribute 'toDF'
What am I missing?