3

Let's say I have a file script.py located at path = "foo/bar/script.py". I'm looking for a way in Python to programmatically execute script.py from within my main Python program through a function execute_script(). However, I've got a few requirements that seem to prevent me from employing a naive approach involving importlib or exec():

  • script.py should get executed in a "fresh-looking" Python environment as if it were run through $ python script.py. That is, all relevant globals like __name__, __file__, sys.modules, sys.path and the working directory should be set accordingly and as little information as possible should leak from my main program into the file's execution. (It is okay, though, if script.py could find out through the inspect module that it wasn't executed through $ python script.py directly.)

  • I need access to the result of the execution, i.e. execute_script() should return the module given by script.py with all its variables, functions and classes. (This prevents starting a new Python interpreter in a subprocess.)

  • execute_script() must internally use open() to read in script.py. This is so that I can use the pyfakefs package to mock out the file system during unit tests. (This prevents a simple solution involving importlib.)

  • execute_script() must not (permanently) modify any global state in my main program like sys.path or sys.modules.

  • If possible, script.py should not be able to affect my main program's global state. (At the very least it should not be able to affect sys.path and sys.modules in my main program.)

  • I need to be able to modify the sys.path that script.py sees. execute_function() should therefore accept an optional list of system paths as argument.

  • Stack traces and handling of errors occurring during the execution of script.py should work as usual. (This makes a solution involving exec() difficult.)

  • The solution should be as future-proof as possible and not depend on implementation details of the Python interpreter.

I'd be very grateful for any ideas!

balu
  • 3,500
  • 4
  • 34
  • 35

1 Answers1

2

I just came across the fact that exec() also accepts code objects (that can be obtained e.g. from compile()) and have come up with an approach that seems to fulfill nearly all requirements. "nearly" because with the exception of sys.path and sys.modules the script can still affect the global state of the main program. Moreover, it also gets to see all modules that are imported before execute_script() is called. For the time being I'm happy with this, though.

Here is the full code including tests:

import os
import sys
from typing import List


module = os.__class__


def create_module(name: str, file: str) -> module:
    mod = module(name)
    # Instances of `module` automatically come with properties __doc__,
    # __loader__, __name__, __package__ and __spec___. Let's add some
    # more properties that main modules usually come with:

    mod.__annotations__ = {}
    # __builtins__ doesn't show up in dir() but still exists
    mod.__builtins__ = __builtins__
    mod.__file__ = file

    return mod


def exec_script(path: str, working_dir: str, syspath: List[str] = None) -> module:
    """
    Execute a Python script as if it were executed using `$ python
    <path>` from inside the given working directory. `path` can either
    be an absolute path or a path relative to `working_dir`.

    If `syspath` is provided, a copy of it will be used as `sys.path`
    during execution. Otherwise, `sys.path` will be set to
    `sys.path[1:]` which – assuming that `sys.path` has not been
    modified so far – removes the working directory from the time when
    the current Python program was started. Either way, the directory
    containing the script at `path` will always be added at position 0
    in `sys.path` afterwards, so as to simulate execution via `$ python
    <path>`.
    """

    if os.path.isabs(path):
        abs_path = path
    else:
        abs_path = os.path.join(os.path.abspath(working_dir), path)

    with open(abs_path, "r") as f:
        source = f.read()

    if sys.version_info < (3, 9):
        # Prior to Python 3.9, the __file__ variable inside the main
        # module always contained the path exactly as it was given to `$
        # python`, no matter whether it is relative or absolute and/or a
        # symlink.
        the__file__ = path
    else:
        # Starting from Python 3.9, __file__ inside the main module is
        # always an absolute path.
        the__file__ = abs_path

    # The filename passed to compile() will be used in stack traces and
    # error messages. It normally it agrees with __file__.
    code = compile(source, filename=the__file__, mode="exec")

    sysmodules_backup = sys.modules
    sys.modules = sys.modules.copy()
    the_module = create_module(name="__main__", file=the__file__)
    sys.modules["__main__"] = the_module

    # According to
    # https://docs.python.org/3/tutorial/modules.html#the-module-search-path
    # if the script is a symlink, the symlink is followed before the
    # directory containing the script is added to sys.path.
    if os.path.islink(abs_path):
        sys_path_dir = os.path.dirname(os.readlink(abs_path))
    else:
        sys_path_dir = os.path.dirname(abs_path)

    if syspath is None:
        syspath = sys.path[1:]
    syspath_backup = sys.path
    sys.path = [
        sys_path_dir
    ] + syspath  # This will automatically create a copy of syspath

    cwd_backup = os.getcwd()
    os.chdir(working_dir)

    # For code inside a module, global and local variables are given by
    # the *same* dictionary
    globals_ = the_module.__dict__
    locals_ = the_module.__dict__
    exec(code, globals_, locals_)

    os.chdir(cwd_backup)
    sys.modules = sysmodules_backup
    sys.path = syspath_backup

    return the_module


#################
##### Tests #####
#################

# Make sure to install pyfakefs via pip!

import unittest

import pyfakefs


class Test_exec_script(pyfakefs.fake_filesystem_unittest.TestCase):
    def setUp(self):
        self.setUpPyfakefs()
        self.fs.create_file(
            "/folder/script.py",
            contents="\n".join(
                [
                    "import os",
                    "import sys",
                    "",
                    "cwd = os.getcwd()",
                    "sysmodules = sys.modules",
                    "syspath = sys.path",
                    "",
                    "sys.modules['test_module'] = 'bar'",
                    "sys.path.append('/some/path')",
                ]
            ),
        )
        self.fs.create_symlink("/folder2/symlink.py", "/folder/script.py")

    #
    # __name__
    #
    def test__name__is_set_correctly(self):
        module = exec_script("script.py", "/folder")

        assert module.__name__ == "__main__"

    #
    # __file__
    #
    def test_relative_path_works_and__file__shows_it(self):
        module = exec_script("script.py", "/folder")

        assert module.__file__ == "script.py"

    def test_absolute_path_works_and__file__shows_it(self):
        module = exec_script("/folder/script.py", "/folder")

        assert module.__file__ == "/folder/script.py"

    def test__file__doesnt_follow_symlink(self):
        module = exec_script("symlink.py", "/folder2")

        assert module.__file__ == "symlink.py"

    #
    # working dir
    #
    def test_working_directory_is_set_and_reset_correctly(self):
        os.chdir("/")

        module = exec_script("/folder/script.py", "/folder")

        assert module.cwd == "/folder"
        assert os.getcwd() == "/"

    #
    # sys.modules
    #
    def test__main__module_is_set_correctly(self):
        module = exec_script("/folder/script.py", "/folder")

        assert module.sysmodules["__main__"] == module

    def test_script_cannot_modify_our_sys_modules(self):
        sysmodules_backup = sys.modules.copy()

        exec_script("/folder/script.py", "/folder")

        assert sys.modules == sysmodules_backup

    #
    # sys.path
    #
    def test_script_cannot_modify_our_sys_path(self):
        syspath_backup = sys.path.copy()

        exec_script("/folder/script.py", "/folder")

        assert sys.path == syspath_backup

    def test_sys_path_is_set_up_correctly(self):
        syspath_backup = sys.path[:]
        module = exec_script("/folder/script.py", "/folder")

        assert module.syspath[0] == "/folder"
        assert module.syspath[1:] == syspath_backup[1:] + ["/some/path"]

    def test_symlink_is_followed_before_adding_base_dir_to_sys_path(self):
        module = exec_script("symlink.py", "/folder2")

        assert module.syspath[0] == "/folder"


if __name__ == "__main__":
    unittest.main()

balu
  • 3,500
  • 4
  • 34
  • 35