i am trying to import a file into spark. i have tried different methods to import it but all of them resulted the same result.
from pyspark.sql.functions import to_timestamp,col,lit
rc = spark.read.csv('dbfs:/databricks/driver/reported-crimes.csv',header=True).withColumn('Date',to_timestamp(col('Date'),'MM/dd/yyyy hh:mm:ss a')).filter(col('Date') <= lit('2018-11-11'))
rc.show(5)
df = spark.read\
.option("header", "true")\
.option("inferSchema", "true")\
.csv("dbfs:/databricks/driver/reported-crimes.csv")
i get the following error:
AnalysisException: Path does not exist: dbfs:/databricks/driver/reported-crimes.csv
---------------------------------------------------------------------------
AnalysisException Traceback (most recent call last)
<command-4105155275559303> in <cell line: 1>()
----> 1 df = spark.read\
2 .option("header", "true")\
3 .option("inferSchema", "true")\
4 .csv("dbfs:/databricks/driver/reported-crimes.csv")
/databricks/spark/python/pyspark/instrumentation_utils.py in wrapper(*args, **kwargs)
46 start = time.perf_counter()
47 try:
---> 48 res = func(*args, **kwargs)
49 logger.log_success(
50 module_name, class_name, function_name, time.perf_counter() - start, signature
/databricks/spark/python/pyspark/sql/readwriter.py in csv(self, path, schema, sep, encoding, quote, escape, comment, header, inferSchema, ignoreLeadingWhiteSpace, ignoreTrailingWhiteSpace, nullValue, nanValue, positiveInf, negativeInf, dateFormat, timestampFormat, maxColumns, maxCharsPerColumn, maxMalformedLogPerPartition, mode, columnNameOfCorruptRecord, multiLine, charToEscapeQuoteEscaping, samplingRatio, enforceSchema, emptyValue, locale, lineSep, pathGlobFilter, recursiveFileLookup, modifiedBefore, modifiedAfter, unescapedQuoteHandling)
533 if type(path) == list:
534 assert self._spark._sc._jvm is not None
--> 535 return self._df(self._jreader.csv(self._spark._sc._jvm.PythonUtils.toSeq(path)))
i am expecting to read a file in databricks file system instead i am seeing an error