1

I'm trying to dynamically load modules and packages from arbitrary folder locations in python 2.7. It works great with bare, single file modules. But trying to load in a package is a bit harder.

The best I could figure out was to load the init.py file inside the package (folder). But say for example I have this:

root:
  mod.py
  package:
    __init__.py
    sub.py

If mod.py contains:

from package import sub

Using my current loading code (below), it will fail stating that there is no package named "sub", unless I add the following to package/__init__.py

import sub

I have to imagine that this is because when you import a package it would normally also scan for all the other sub files in it. Do I also just need to do that manually, or is there a method similar to imp.load_source that will also handle package folders?

Loading code:

import md5
import sys
import os.path
import imp
import traceback
import glob

def load_package(path, base):
    try:
        try:
            sys.path.append(path + "/" + base)
            init = path + "/" + base + "/__init__.py"
            if not os.path.exists(init):
                return None

            fin = open(init, 'rb')

            return  (base, imp.load_source(base, init, fin))
        finally:
            try: fin.close()
            except: pass
    except ImportError, x:
        traceback.print_exc(file = sys.stderr)
        raise
    except:
        traceback.print_exc(file = sys.stderr)
        raise

def load_module(path):
    try:
        try:
            code_dir = os.path.dirname(path)
            code_file = os.path.basename(path)
            base = code_file.replace(".py", "")

            fin = open(path, 'rb')

            hash = md5.new(path).hexdigest() + "_" + code_file
            return  (base, imp.load_source(base, path, fin))
        finally:
            try: fin.close()
            except: pass
    except ImportError, x:
        traceback.print_exc(file = sys.stderr)
        raise
    except:
        traceback.print_exc(file = sys.stderr)
        raise

def load_folder(dir):
    sys.path.append(dir)
    mods = {}

    for p in glob.glob(dir + "/*/"):
        base = p.replace("\\", "").replace("/", "")
        base = base.replace(dir.replace("\\", "").replace("/", ""), "")
        package = load_package(dir, base) 
        if package:
            hash, pack = package
            mods[hash] = pack

    for m in glob.glob(dir + "/*.py"):
        hash, mod = load_module(m) 
        mods[hash] = mod

    return mods
Mr_and_Mrs_D
  • 32,208
  • 39
  • 178
  • 361
Adam Haile
  • 30,705
  • 58
  • 191
  • 286

1 Answers1

1

The code below is functionally equivalent to your code modulo the traceback.print_exc (which you should let the client handle - if not handled the exception will end up printed anyway):

def _load_package(path, base):
    sys.path.append(path + "/" + base)
    init = path + "/" + base + "/__init__.py"
    if not os.path.exists(init):
        return None, None
    with open(init, 'rb') as fin:
        return base, imp.load_source(base, init, fin)

def _load_module(path):
    code_file = os.path.basename(path)
    base = code_file.replace(".py", "")
    with open(path, 'rb') as fin:
        return base, imp.load_source(base, path, fin)

def load_folder(dir):
    sys.path.append(dir)
    mods = {}
    for p in glob.glob(dir + "/*/"):
        base = p.replace("\\", "").replace("/", "")
        base = base.replace(dir.replace("\\", "").replace("/", ""), "")
        hash, pack = _load_package(dir, base)
        if hash: mods[hash] = pack
    for m in glob.glob(dir + "/*.py"): ##: /*/*.py
        hash, mod = _load_module(m)
        mods[hash] = mod
    return mods

## My added code
print('Python %s on %s' % (sys.version, sys.platform))

root_ = r'C:\Dropbox\eclipse_workspaces\python\sandbox\root'

def depyc(root, _indent=''): # deletes .pyc which will end up being imported
    if not _indent: print '\nListing', root
    for p in os.listdir(root):
        name = _indent + p
        abspath = os.path.join(root, p)
        if os.path.isdir(abspath):
            print name + ':'
            depyc(abspath, _indent=_indent + '  ')
        else:
            name_ = name[-4:]
            if name_ == '.pyc':
                os.remove(abspath)
                continue
            print name
    if not _indent: print

depyc(root_)
load_folder(root_)

Prints:

Python 2.7.10 (default, May 23 2015, 09:40:32) [MSC v.1500 32 bit (Intel)] on win32

Listing C:\Dropbox\eclipse_workspaces\python\sandbox\root
mod.py
package:
  sub.py
  __init__.py

C:\Dropbox\eclipse_workspaces\python\sandbox\root/package/__init__.py imported!
C:\Dropbox\eclipse_workspaces\python\sandbox\root\mod.py imported!

mod.py, sub.py and __init__.py just contain

print(__file__ + u' imported!')

Now modifying mod.py to:

from package import sub
print(__file__ + u' imported!')

we get indeed:

Listing....

C:\Dropbox\eclipse_workspaces\python\sandbox\root/package/__init__.py imported! <### this may move around ###>
Traceback (most recent call last):
  File "C:/Users/MrD/.PyCharm40/config/scratches/load_folder.py", line 57, in <module>
    load_folder(root_)
  File "C:/Users/MrD/.PyCharm40/config/scratches/load_folder.py", line 31, in load_folder
    hash, mod = _load_module(m)
  File "C:/Users/MrD/.PyCharm40/config/scratches/load_folder.py", line 20, in _load_module
    return base, imp.load_source(base, path, fin)
  File "C:\Dropbox\eclipse_workspaces\python\sandbox\root\mod.py", line 1, in <module>
    from package import sub
ImportError: cannot import name sub

Note the error is "cannot import name sub" and not "there is no package named "sub"". So why can't it ?

Modifying __init__.py:

# package/__init__.py    
print(__file__ + u' imported!')

print '__name__', '->', __name__
print '__package__', '->', __package__
print '__path__', '->', __path__

prints:

Listing...

C:\Dropbox\eclipse_workspaces\python\sandbox\root/package/__init__.py imported! <### not really ###>
__name__ -> package
__package__ -> None
__path__ ->
Traceback (most recent call last):
  File "C:/Users/MrD/.PyCharm40/config/scratches/load_folder.py", line 59, in <module>
    load_folder(root_)
  File "C:/Users/MrD/.PyCharm40/config/scratches/load_folder.py", line 30, in load_folder
    hash, pack = _load_package(dir, base)
  File "C:/Users/MrD/.PyCharm40/config/scratches/load_folder.py", line 14, in _load_package
    init = imp.load_source(base, init, fin)
  File "C:\Dropbox\eclipse_workspaces\python\sandbox\root/package/__init__.py", line 5, in <module>
    print '__path__', '->', __path__
NameError: name '__path__' is not defined

While directly importing it would print:

>>> sys.path.extend([r'C:\Dropbox\eclipse_workspaces\python\sandbox\root'])
>>> import package
C:\Dropbox\eclipse_workspaces\python\sandbox\root\package\__init__.py imported!
__name__ -> package
__package__ -> None
__path__ -> ['C:\\Dropbox\\eclipse_workspaces\\python\\sandbox\\root\\package']

So modify _load_package to:

def _load_package(path, base):
    pkgDir = os.path.abspath(os.path.join(path, base))
    init = os.path.join(pkgDir, "__init__.py")
    if not os.path.exists(init):
        return None, None
    file, pathname, description = imp.find_module(base, [path])
    print file, pathname, description # None, pkgDir, ('', '', 5)
    pack = sys.modules.get(base, None) # load_module will reload - yak!
    if pack is None:
        sys.modules[base] = pack = imp.load_module(base, file, pathname, description)
    return base, pack

Solves it as would:

...
    if pack is None:
        sys.modules[base] = pack = imp.load_module(base, None, '', description)
        pack.__path__ = [pkgDir]

or in your original code:

with open(init, 'rb') as fin:
    source = imp.load_source(base, init, fin)
    source.__path__ = path + "/" + base
    return base, source

So what's going on is that package relies on its __path __ attribute to function correctly.


Kept hacking on that and came up with:

import sys
import os.path
import imp

def _load_(root, name):
    file_object, pathname, description = imp.find_module(name, [root])
    pack = sys.modules.get(name, None)
    try:
        if pack is None:
            pack = imp.load_module(name, file_object, pathname, description)
        else:
            print 'In cache', pack
    finally:
        if file_object is not None: file_object.close()
    return name, pack

def load_folder(root):
    # sys.path.append(root)
    mods = {}
    paths = [(item, os.path.join(root, item)) for item in os.listdir(root)]
    packages = filter(lambda path_tuple: os.path.exists(
        os.path.join((path_tuple[1]), "__init__.py")), paths)
    py_files = filter(lambda path_tuple: path_tuple[0][-3:] == '.py', paths)
    del paths
    # first import packages as in original - modules may import from them
    for path, _abspath in packages:
        print 'Importing', _abspath
        key, mod = _load_(root, name=path) # will use pyc if available!
        mods[key] = mod
    # then modules
    for path, _abspath in py_files:
        print 'Importing', _abspath
        key, mod = _load_(root, name=path[:-3])
        mods[key] = mod
    return mods

I merged package and modules loading code dropping imp.load_source (one less tricky function) and relying on imp.load_module instead. I do not mess with sys.path directly and since imp.load_module will reload [!] I check the sys.modules cache. The mods dict returned is completelly untested - you have to somehow implement a hash (the _abspath should suffice).

Run as:

def depyc(root, rmpyc, _indent=''):
    if not _indent: print '\nListing', root
    for p in os.listdir(root):
        name = _indent + p
        abspath = os.path.join(root, p)
        if os.path.isdir(abspath):
            print name + ':'
            depyc(abspath, rmpyc, _indent=_indent + '  ')
        else:
            if rmpyc and name[-4:] == '.pyc':
                os.remove(abspath)
                continue
            print name
    if not _indent: print

## Run ##
print('Python %s on %s' % (sys.version, sys.platform))
root_ = os.path.join(os.getcwdu(), u'root')
depyc(root_, False) # False will end up importing the pyc files !
load_folder(root_)

to test various scenarios -

The code with an example root/ dir is here

Mr_and_Mrs_D
  • 32,208
  • 39
  • 178
  • 361
  • Related: http://stackoverflow.com/questions/67631/how-to-import-a-module-given-the-full-path, http://stackoverflow.com/questions/10533679/python-import-a-module-from-a-directory-thats-not-a-package – Mr_and_Mrs_D Oct 07 '15 at 19:11