0

I'm trying to use ocrMyPdf library and here is my code:

ocrmypdf.ocr("input/mypdf.pdf", 
             "input/mypdf_ocr.pdf",
              skip_text=False,
              force_ocr=True,
              deskew=True,
              rotate_pages=True,
              remove_background=False,
              rotate_pages_threshold=3,
              pages="1,72",
              max_image_mpixels=1_000_000_000,
              keep_temporary_files=False,
              pdf_renderer="sandwich",
              unpaper_args="",
              clean=True,
              progress_bar=False,)

Above call is throwing PermissionError: [Errno 13] Permission denied: 'unpaper'. Not sure where to debug the cause of the error.

I'm using WSL2 (Ubuntu 20.4) on Windows 11.

Here is the full trace from jupyter notebook:

---------------------------------------------------------------------------
PermissionError                           Traceback (most recent call last)
Input In [8], in <cell line: 1>()
----> 1 ocrmypdf.ocr("input/mypdf.pdf", 
      2              "input/mypdf_ocr.pdf",
      3               skip_text=False,
      4               force_ocr=True,
      5               deskew=True,
      6               rotate_pages=True,
      7               remove_background=False,
      8               rotate_pages_threshold=3,
      9               pages="1,72",
     10               max_image_mpixels=1_000_000_000,
     11               keep_temporary_files=False,
     12               pdf_renderer="sandwich",
     13               unpaper_args="",
     14               clean=True,
     15               progress_bar=False)

File ~/AI/nexus/aps-esg-data-scraper/venv/lib/python3.8/site-packages/ocrmypdf/api.py:339, in ocr(input_file, output_file, language, image_dpi, output_type, sidecar, jobs, use_threads, title, author, subject, keywords, rotate_pages, remove_background, deskew, clean, clean_final, unpaper_args, oversample, remove_vectors, force_ocr, skip_text, redo_ocr, skip_big, optimize, jpg_quality, png_quality, jbig2_lossy, jbig2_page_group_size, pages, max_image_mpixels, tesseract_config, tesseract_pagesegmode, tesseract_oem, tesseract_thresholding, pdf_renderer, tesseract_timeout, rotate_pages_threshold, pdfa_image_compression, user_words, user_patterns, fast_web_view, plugins, plugin_manager, keep_temporary_files, progress_bar, **kwargs)
    336     warn("ocrmypdf.ocr(verbose=) is ignored. Use ocrmypdf.configure_logging().")
    338 options = create_options(**create_options_kwargs)
--> 339 check_options(options, plugin_manager)
    340 return run_pipeline(options=options, plugin_manager=plugin_manager, api=True)

File ~/AI/nexus/aps-esg-data-scraper/venv/lib/python3.8/site-packages/ocrmypdf/_validation.py:245, in check_options(options, plugin_manager)
    244 def check_options(options: Namespace, plugin_manager: PluginManager) -> None:
--> 245     _check_plugin_invariant_options(options)
    246     _check_plugin_options(options, plugin_manager)

File ~/AI/nexus/aps-esg-data-scraper/venv/lib/python3.8/site-packages/ocrmypdf/_validation.py:232, in _check_plugin_invariant_options(options)
    230 check_options_output(options)
    231 check_options_sidecar(options)
--> 232 check_options_preprocessing(options)
    233 check_options_ocr_behavior(options)
    234 check_options_advanced(options)

File ~/AI/nexus/aps-esg-data-scraper/venv/lib/python3.8/site-packages/ocrmypdf/_validation.py:132, in check_options_preprocessing(options)
    130     raise BadArgsError("--clean is required for --unpaper-args")
    131 if options.clean:
--> 132     check_external_program(
    133         program='unpaper',
    134         package='unpaper',
    135         version_checker=unpaper.version,
    136         need_version='6.1',
    137         required_for=['--clean, --clean-final'],
    138     )
    139     try:
    140         if options.unpaper_args:

File ~/AI/nexus/aps-esg-data-scraper/venv/lib/python3.8/site-packages/ocrmypdf/subprocess/__init__.py:331, in check_external_program(program, package, version_checker, need_version, required_for, recommended, version_parser)
    329 try:
    330     if callable(version_checker):
--> 331         found_version = version_checker()
    332     else:  # deprecated
    333         found_version = version_checker

File ~/AI/nexus/aps-esg-data-scraper/venv/lib/python3.8/site-packages/ocrmypdf/_exec/unpaper.py:69, in version()
     68 def version() -> str:
---> 69     return get_version('unpaper')

File ~/AI/nexus/aps-esg-data-scraper/venv/lib/python3.8/site-packages/ocrmypdf/subprocess/__init__.py:157, in get_version(program, version_arg, regex, env)
    155 args_prog = [program, version_arg]
    156 try:
--> 157     proc = run(
    158         args_prog,
    159         close_fds=True,
    160         text=True,
    161         stdout=PIPE,
    162         stderr=STDOUT,
    163         check=True,
    164         env=env,
    165     )
    166     output: str = proc.stdout
    167 except FileNotFoundError as e:

File ~/AI/nexus/aps-esg-data-scraper/venv/lib/python3.8/site-packages/ocrmypdf/subprocess/__init__.py:58, in run(args, env, logs_errors_to_stdout, check, **kwargs)
     56 stderr_name = 'stderr' if not logs_errors_to_stdout else 'stdout'
     57 try:
---> 58     proc = subprocess_run(args, env=env, check=check, **kwargs)
     59 except CalledProcessError as e:
     60     stderr = getattr(e, stderr_name, None)

File ~/.pyenv/versions/3.8.3/lib/python3.8/subprocess.py:489, in run(input, capture_output, timeout, check, *popenargs, **kwargs)
    486     kwargs['stdout'] = PIPE
    487     kwargs['stderr'] = PIPE
--> 489 with Popen(*popenargs, **kwargs) as process:
    490     try:
    491         stdout, stderr = process.communicate(input, timeout=timeout)

File ~/.pyenv/versions/3.8.3/lib/python3.8/subprocess.py:854, in Popen.__init__(self, args, bufsize, executable, stdin, stdout, stderr, preexec_fn, close_fds, shell, cwd, env, universal_newlines, startupinfo, creationflags, restore_signals, start_new_session, pass_fds, encoding, errors, text)
    850         if self.text_mode:
    851             self.stderr = io.TextIOWrapper(self.stderr,
    852                     encoding=encoding, errors=errors)
--> 854     self._execute_child(args, executable, preexec_fn, close_fds,
    855                         pass_fds, cwd, env,
    856                         startupinfo, creationflags, shell,
    857                         p2cread, p2cwrite,
    858                         c2pread, c2pwrite,
    859                         errread, errwrite,
    860                         restore_signals, start_new_session)
    861 except:
    862     # Cleanup if the child failed starting.
    863     for f in filter(None, (self.stdin, self.stdout, self.stderr)):

File ~/.pyenv/versions/3.8.3/lib/python3.8/subprocess.py:1702, in Popen._execute_child(self, args, executable, preexec_fn, close_fds, pass_fds, cwd, env, startupinfo, creationflags, shell, p2cread, p2cwrite, c2pread, c2pwrite, errread, errwrite, restore_signals, start_new_session)
   1700     if errno_num != 0:
   1701         err_msg = os.strerror(errno_num)
-> 1702     raise child_exception_type(errno_num, err_msg, err_filename)
   1703 raise child_exception_type(err_msg)

PermissionError: [Errno 13] Permission denied: 'unpaper'
Dalireeza
  • 107
  • 3
  • 13

0 Answers0