0

i am trying to import a file into spark. i have tried different methods to import it but all of them resulted the same result.

from pyspark.sql.functions import to_timestamp,col,lit
rc = spark.read.csv('dbfs:/databricks/driver/reported-crimes.csv',header=True).withColumn('Date',to_timestamp(col('Date'),'MM/dd/yyyy hh:mm:ss a')).filter(col('Date') <= lit('2018-11-11'))
rc.show(5)
df = spark.read\
      .option("header", "true")\
      .option("inferSchema", "true")\
      .csv("dbfs:/databricks/driver/reported-crimes.csv")

i get the following error:

AnalysisException: Path does not exist: dbfs:/databricks/driver/reported-crimes.csv
---------------------------------------------------------------------------
AnalysisException                         Traceback (most recent call last)
<command-4105155275559303> in <cell line: 1>()
----> 1 df = spark.read\
      2       .option("header", "true")\
      3       .option("inferSchema", "true")\
      4       .csv("dbfs:/databricks/driver/reported-crimes.csv")

/databricks/spark/python/pyspark/instrumentation_utils.py in wrapper(*args, **kwargs)
     46             start = time.perf_counter()
     47             try:
---> 48                 res = func(*args, **kwargs)
     49                 logger.log_success(
     50                     module_name, class_name, function_name, time.perf_counter() - start, signature

/databricks/spark/python/pyspark/sql/readwriter.py in csv(self, path, schema, sep, encoding, quote, escape, comment, header, inferSchema, ignoreLeadingWhiteSpace, ignoreTrailingWhiteSpace, nullValue, nanValue, positiveInf, negativeInf, dateFormat, timestampFormat, maxColumns, maxCharsPerColumn, maxMalformedLogPerPartition, mode, columnNameOfCorruptRecord, multiLine, charToEscapeQuoteEscaping, samplingRatio, enforceSchema, emptyValue, locale, lineSep, pathGlobFilter, recursiveFileLookup, modifiedBefore, modifiedAfter, unescapedQuoteHandling)
    533         if type(path) == list:
    534             assert self._spark._sc._jvm is not None
--> 535             return self._df(self._jreader.csv(self._spark._sc._jvm.PythonUtils.toSeq(path)))

i am expecting to read a file in databricks file system instead i am seeing an error

OneCricketeer
  • 179,855
  • 19
  • 132
  • 245

0 Answers0