Source code for logilab.common.modutils

# copyright 2003-2013 LOGILAB S.A. (Paris, FRANCE), all rights reserved.
# contact http://www.logilab.fr/ -- mailto:contact@logilab.fr
#
# This file is part of logilab-common.
#
# logilab-common is free software: you can redistribute it and/or modify it under
# the terms of the GNU Lesser General Public License as published by the Free
# Software Foundation, either version 2.1 of the License, or (at your option) any
# later version.
#
# logilab-common is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for more
# details.
#
# You should have received a copy of the GNU Lesser General Public License along
# with logilab-common.  If not, see <http://www.gnu.org/licenses/>.
"""Python modules manipulation utility functions.

:type PY_SOURCE_EXTS: tuple(str)
:var PY_SOURCE_EXTS: list of possible python source file extension

:type STD_LIB_DIR: str
:var STD_LIB_DIR: directory where standard modules are located

:type BUILTIN_MODULES: dict
:var BUILTIN_MODULES: dictionary with builtin module names as key
"""

__docformat__ = "restructuredtext en"

import sys
import os
from os.path import (
    splitext,
    join,
    abspath,
    isdir,
    dirname,
    exists,
    expanduser,
    normcase,
    realpath,
)
from imp import find_module, load_module, C_BUILTIN, PY_COMPILED, PKG_DIRECTORY
from distutils.sysconfig import get_python_lib
from distutils.errors import DistutilsPlatformError
from typing import Dict, List, Optional, Any, Tuple, Union, Sequence
from types import ModuleType
from _frozen_importlib_external import FileFinder

from logilab.common import STD_BLACKLIST, _handle_blacklist
from logilab.common.deprecation import callable_deprecated

try:
    import zipimport
except ImportError:
    # mypy: Incompatible types in assignment (expression has type "None", variable has type Module)
    # conditional import
    zipimport = None  # type: ignore

ZIPFILE = object()

# Notes about STD_LIB_DIR
# Consider arch-specific installation for STD_LIB_DIR definition
# :mod:`distutils.sysconfig` contains to much hardcoded values to rely on
#
# :see: `Problems with /usr/lib64 builds <http://bugs.python.org/issue1294959>`_
# :see: `FHS <http://www.pathname.com/fhs/pub/fhs-2.3.html#LIBLTQUALGTALTERNATEFORMATESSENTIAL>`_
if sys.platform.startswith("win"):
    PY_SOURCE_EXTS = ("py", "pyw")
    PY_COMPILED_EXTS = ("dll", "pyd")
else:
    PY_SOURCE_EXTS = ("py",)
    PY_COMPILED_EXTS = ("so",)

try:
    STD_LIB_DIR = get_python_lib(standard_lib=True)
# get_python_lib(standard_lib=1) is not available on pypy, set STD_LIB_DIR to
# non-valid path, see https://bugs.pypy.org/issue1164
except DistutilsPlatformError:
    STD_LIB_DIR = "//"

EXT_LIB_DIR = get_python_lib()

BUILTIN_MODULES = dict.fromkeys(sys.builtin_module_names, True)


[docs]class NoSourceFile(Exception): """exception raised when we are not able to get a python source file for a precompiled file """
[docs]class LazyObject(object): """ This class allows to lazyly declare a object (most likely only a callable according to the code) from a module without importing it. The import will be triggered when the user tries to access attributes of the object/callable or call it. Trying to set or delete attributes of the wrapped object/callable will not works as expected. """ def __init__(self, module, obj): self.module = module self.obj = obj self._imported = None def _getobj(self): if self._imported is None: self._imported = getattr(load_module_from_name(self.module), self.obj) return self._imported def __getattribute__(self, attr): try: return super(LazyObject, self).__getattribute__(attr) except AttributeError: return getattr(self._getobj(), attr) def __call__(self, *args, **kwargs): return self._getobj()(*args, **kwargs)
[docs]def load_module_from_name( dotted_name: str, path: Optional[Any] = None, use_sys: int = True ) -> ModuleType: """Load a Python module from its name. :type dotted_name: str :param dotted_name: python name of a module or package :type path: list or None :param path: optional list of path where the module or package should be searched (use sys.path if nothing or None is given) :type use_sys: bool :param use_sys: boolean indicating whether the sys.modules dictionary should be used or not :raise ImportError: if the module or package is not found :rtype: module :return: the loaded module """ module = load_module_from_modpath(dotted_name.split("."), path, use_sys) if module is None: raise ImportError("module %s doesn't exist" % dotted_name) return module
[docs]def load_module_from_modpath( parts: List[str], path: Optional[Any] = None, use_sys: int = True ) -> Optional[ModuleType]: """Load a python module from its splitted name. :type parts: list(str) or tuple(str) :param parts: python name of a module or package splitted on '.' :type path: list or None :param path: optional list of path where the module or package should be searched (use sys.path if nothing or None is given) :type use_sys: bool :param use_sys: boolean indicating whether the sys.modules dictionary should be used or not :raise ImportError: if the module or package is not found :rtype: module :return: the loaded module """ if use_sys: try: return sys.modules[".".join(parts)] except KeyError: pass modpath = [] prevmodule = None for part in parts: modpath.append(part) curname = ".".join(modpath) module = None if len(modpath) != len(parts): # even with use_sys=False, should try to get outer packages from sys.modules module = sys.modules.get(curname) elif use_sys: # because it may have been indirectly loaded through a parent module = sys.modules.get(curname) if module is None: mp_file, mp_filename, mp_desc = find_module(part, path) try: # mypy: Argument 2 to "load_module" has incompatible type "IO[Any]"; # mypy: expected "Optional[_FileLike]" # this is handled by the try/finally somehow? module = load_module(curname, mp_file, mp_filename, mp_desc) # type: ignore finally: if mp_file is not None: mp_file.close() if prevmodule: setattr(prevmodule, part, module) _file = getattr(module, "__file__", "") prevmodule = module if not _file and _is_namespace(curname): continue if not _file and len(modpath) != len(parts): raise ImportError("no module in %s" % ".".join(parts[len(modpath) :])) path = [dirname(_file)] return module
[docs]def load_module_from_file(filepath, path=None, use_sys=True, extrapath=None): """Load a Python module from it's path. :type filepath: str :param filepath: path to the python module or package :type path: list or None :param path: optional list of path where the module or package should be searched (use sys.path if nothing or None is given) :type use_sys: bool :param use_sys: boolean indicating whether the sys.modules dictionary should be used or not :raise ImportError: if the module or package is not found :rtype: module :return: the loaded module """ modpath = modpath_from_file(filepath, extrapath) return load_module_from_modpath(modpath, path, use_sys)
def _check_init(path: str, mod_path: List[str]) -> bool: """check there are some __init__.py all along the way""" modpath = [] for part in mod_path: modpath.append(part) path = join(path, part) if not _is_namespace(".".join(modpath)) and not _has_init(path): return False return True def _canonicalize_path(path: str) -> str: return realpath(expanduser(path)) @callable_deprecated("you should avoid using modpath_from_file()") def modpath_from_file(filename: str, extrapath: Optional[Dict[str, str]] = None) -> List[str]: """DEPRECATED: doens't play well with symlinks and sys.meta_path Given a file path return the corresponding splitted module's name (i.e name of a module or package splitted on '.') :type filename: str :param filename: file's path for which we want the module's name :type extrapath: dict :param extrapath: optional extra search path, with path as key and package name for the path as value. This is usually useful to handle package splitted in multiple directories using __path__ trick. :raise ImportError: if the corresponding module's name has not been found :rtype: list(str) :return: the corresponding splitted module's name """ filename = _canonicalize_path(filename) base = os.path.splitext(filename)[0] if extrapath is not None: for path_ in map(_canonicalize_path, extrapath): path = abspath(path_) if path and normcase(base[: len(path)]) == normcase(path): submodpath = [pkg for pkg in base[len(path) :].split(os.sep) if pkg] if _check_init(path, submodpath[:-1]): return extrapath[path_].split(".") + submodpath for path in map(_canonicalize_path, sys.path): if path and normcase(base).startswith(path): modpath = [pkg for pkg in base[len(path) :].split(os.sep) if pkg] if _check_init(path, modpath[:-1]): return modpath raise ImportError("Unable to find module for %s in %s" % (filename, ", \n".join(sys.path)))
[docs]def file_from_modpath( modpath: List[str], path: Optional[Any] = None, context_file: Optional[str] = None ) -> Optional[str]: """given a mod path (i.e. splitted module / package name), return the corresponding file, giving priority to source file over precompiled file if it exists :type modpath: list or tuple :param modpath: splitted module's name (i.e name of a module or package splitted on '.') (this means explicit relative imports that start with dots have empty strings in this list!) :type path: list or None :param path: optional list of path where the module or package should be searched (use sys.path if nothing or None is given) :type context_file: str or None :param context_file: context file to consider, necessary if the identifier has been introduced using a relative import unresolvable in the actual context (i.e. modutils) :raise ImportError: if there is no such module in the directory :rtype: str or None :return: the path to the module's file or None if it's an integrated builtin module such as 'sys' """ context: Optional[str] if context_file is not None: context = dirname(context_file) else: context = context_file if modpath[0] == "xml": # handle _xmlplus try: return _file_from_modpath(["_xmlplus"] + modpath[1:], path, context) except ImportError: return _file_from_modpath(modpath, path, context) elif modpath == ["os", "path"]: # FIXME: currently ignoring search_path... return os.path.__file__ return _file_from_modpath(modpath, path, context)
[docs]def get_module_part(dotted_name: str, context_file: Optional[str] = None) -> str: """given a dotted name return the module part of the name : >>> get_module_part('logilab.common.modutils.get_module_part') 'logilab.common.modutils' :type dotted_name: str :param dotted_name: full name of the identifier we are interested in :type context_file: str or None :param context_file: context file to consider, necessary if the identifier has been introduced using a relative import unresolvable in the actual context (i.e. modutils) :raise ImportError: if there is no such module in the directory :rtype: str or None :return: the module part of the name or None if we have not been able at all to import the given name XXX: deprecated, since it doesn't handle package precedence over module (see #10066) """ # os.path trick if dotted_name.startswith("os.path"): return "os.path" parts = dotted_name.split(".") if context_file is not None: # first check for builtin module which won't be considered latter # in that case (path != None) if parts[0] in BUILTIN_MODULES: if len(parts) > 2: raise ImportError(dotted_name) return parts[0] # don't use += or insert, we want a new list to be created ! path: Optional[List] = None starti = 0 if parts[0] == "": assert context_file is not None, "explicit relative import, but no context_file?" path = [] # prevent resolving the import non-relatively starti = 1 while parts[starti] == "": # for all further dots: change context starti += 1 assert context_file is not None context_file = dirname(context_file) for i in range(starti, len(parts)): try: file_from_modpath(parts[starti : i + 1], path=path, context_file=context_file) except ImportError: if not i >= max(1, len(parts) - 2): raise return ".".join(parts[:i]) return dotted_name
[docs]def get_modules( package: str, src_directory: str, blacklist: Sequence[str] = STD_BLACKLIST ) -> List[str]: """given a package directory return a list of all available python modules in the package and its subpackages :type package: str :param package: the python name for the package :type src_directory: str :param src_directory: path of the directory corresponding to the package :type blacklist: list or tuple :param blacklist: optional list of files or directory to ignore, default to the value of `logilab.common.STD_BLACKLIST` :rtype: list :return: the list of all available python modules in the package and its subpackages """ modules = [] for directory, dirnames, filenames in os.walk(src_directory): _handle_blacklist(blacklist, dirnames, filenames) # check for __init__.py if "__init__.py" not in filenames: dirnames[:] = () continue if directory != src_directory: dir_package = directory[len(src_directory) :].replace(os.sep, ".") modules.append(package + dir_package) for filename in filenames: if _is_python_file(filename) and filename != "__init__.py": src = join(directory, filename) module = package + src[len(src_directory) : -3] modules.append(module.replace(os.sep, ".")) return modules
[docs]def get_module_files(src_directory: str, blacklist: Sequence[str] = STD_BLACKLIST) -> List[str]: """given a package directory return a list of all available python module's files in the package and its subpackages :type src_directory: str :param src_directory: path of the directory corresponding to the package :type blacklist: list or tuple :param blacklist: optional list of files or directory to ignore, default to the value of `logilab.common.STD_BLACKLIST` :rtype: list :return: the list of all available python module's files in the package and its subpackages """ files = [] for directory, dirnames, filenames in os.walk(src_directory): _handle_blacklist(blacklist, dirnames, filenames) # check for __init__.py if "__init__.py" not in filenames: dirnames[:] = () continue for filename in filenames: if _is_python_file(filename): src = join(directory, filename) files.append(src) return files
[docs]def get_source_file(filename: str, include_no_ext: bool = False) -> str: """given a python module's file name return the matching source file name (the filename will be returned identically if it's a already an absolute path to a python source file...) :type filename: str :param filename: python module's file name :raise NoSourceFile: if no source file exists on the file system :rtype: str :return: the absolute path of the source file if it exists """ base, orig_ext = splitext(abspath(filename)) for ext in PY_SOURCE_EXTS: source_path = "%s.%s" % (base, ext) if exists(source_path): return source_path if include_no_ext and not orig_ext and exists(base): return base raise NoSourceFile(filename)
[docs]def cleanup_sys_modules(directories): """remove submodules of `directories` from `sys.modules`""" cleaned = [] for modname, module in list(sys.modules.items()): modfile = getattr(module, "__file__", None) if modfile: for directory in directories: if modfile.startswith(directory): cleaned.append(modname) del sys.modules[modname] break return cleaned
[docs]def clean_sys_modules(names): """remove submodules starting with name from `names` from `sys.modules`""" cleaned = set() for modname in list(sys.modules): for name in names: if modname.startswith(name): del sys.modules[modname] cleaned.add(modname) break return cleaned
[docs]def is_python_source(filename): """ rtype: bool return: True if the filename is a python source file """ return splitext(filename)[1][1:] in PY_SOURCE_EXTS
[docs]def is_standard_module( modname: str, std_path: Union[List[str], Tuple[str]] = (STD_LIB_DIR,) ) -> bool: """try to guess if a module is a standard python module (by default, see `std_path` parameter's description) :type modname: str :param modname: name of the module we are interested in :type std_path: list(str) or tuple(str) :param std_path: list of path considered as standard :rtype: bool :return: true if the module: - is located on the path listed in one of the directory in `std_path` - is a built-in module Note: this function is known to return wrong values when inside virtualenv. See https://www.logilab.org/ticket/294756. """ modname = modname.split(".")[0] try: filename = file_from_modpath([modname]) except ImportError: # import failed, i'm probably not so wrong by supposing it's # not standard... return False # modules which are not living in a file are considered standard # (sys and __builtin__ for instance) if filename is None: # we assume there are no namespaces in stdlib return not _is_namespace(modname) filename = abspath(filename) if filename.startswith(EXT_LIB_DIR): return False for path in std_path: if filename.startswith(abspath(path)): return True return False
[docs]def is_relative(modname: str, from_file: str) -> bool: """return true if the given module name is relative to the given file name :type modname: str :param modname: name of the module we are interested in :type from_file: str :param from_file: path of the module from which modname has been imported :rtype: bool :return: true if the module has been imported relatively to `from_file` """ if not isdir(from_file): from_file = dirname(from_file) if from_file in sys.path: return False try: find_module(modname.split(".")[0], [from_file]) return True except ImportError: return False
# internal only functions ##################################################### def _file_from_modpath( modpath: List[str], path: Optional[Any] = None, context: Optional[str] = None ) -> Optional[str]: """given a mod path (i.e. splitted module / package name), return the corresponding file this function is used internally, see `file_from_modpath`'s documentation for more information """ assert len(modpath) > 0 if context is not None: try: mtype, mp_filename = _module_file(modpath, [context]) except ImportError: mtype, mp_filename = _module_file(modpath, path) else: mtype, mp_filename = _module_file(modpath, path) if mtype == PY_COMPILED: try: assert mp_filename is not None return get_source_file(mp_filename) except NoSourceFile: return mp_filename elif mtype == C_BUILTIN: # integrated builtin module return None elif mtype == PKG_DIRECTORY: assert mp_filename is not None mp_filename = _has_init(mp_filename) return mp_filename def _search_zip( modpath: List[str], pic: Dict[str, Optional[FileFinder]] ) -> Tuple[object, str, str]: for filepath, importer in pic.items(): if importer is not None: if importer.find_module(modpath[0]): if not importer.find_module("/".join(modpath)): raise ImportError( "No module named %s in %s/%s" % (".".join(modpath[1:]), filepath, modpath) ) return ZIPFILE, abspath(filepath) + "/" + "/".join(modpath), filepath raise ImportError("No module named %s" % ".".join(modpath)) try: import pkg_resources except ImportError: # mypy: Incompatible types in assignment (expression has type "None", variable has type Module) # conditional import pkg_resources = None # type: ignore def _is_namespace(modname: str) -> bool: # mypy: Module has no attribute "_namespace_packages"; maybe "fixup_namespace_packages"?" # but is still has? or is it a failure from python3 port? return ( pkg_resources is not None and modname in pkg_resources._namespace_packages # type: ignore ) def _module_file( modpath: List[str], path: Optional[List[str]] = None ) -> Tuple[Union[int, object], Optional[str]]: """get a module type / file path :type modpath: list or tuple :param modpath: splitted module's name (i.e name of a module or package splitted on '.'), with leading empty strings for explicit relative import :type path: list or None :param path: optional list of path where the module or package should be searched (use sys.path if nothing or None is given) :rtype: tuple(int, str) :return: the module type flag and the file path for a module """ # egg support compat try: pic = sys.path_importer_cache _path = path if path is not None else sys.path for __path in _path: if __path not in pic: try: pic[__path] = zipimport.zipimporter(__path) except zipimport.ZipImportError: pic[__path] = None checkeggs = True except AttributeError: checkeggs = False # pkg_resources support (aka setuptools namespace packages) if _is_namespace(modpath[0]) and modpath[0] in sys.modules: # setuptools has added into sys.modules a module object with proper # __path__, get back information from there module = sys.modules[modpath.pop(0)] # use list() to protect against _NamespacePath instance we get with python 3, which # find_module later doesn't like # mypy: Module has no attribute "__path__" # I guess it does thanks to logilab's magic? path = list(module.__path__) # type: ignore if not modpath: return C_BUILTIN, None imported = [] while modpath: modname = modpath[0] # take care to changes in find_module implementation wrt builtin modules # # Python 2.6.6 (r266:84292, Sep 11 2012, 08:34:23) # >>> imp.find_module('posix') # (None, 'posix', ('', '', 6)) # # Python 3.3.1 (default, Apr 26 2013, 12:08:46) # >>> imp.find_module('posix') # (None, None, ('', '', 6)) try: _, mp_filename, mp_desc = find_module(modname, path) except ImportError: if checkeggs: return _search_zip(modpath, pic)[:2] raise else: if checkeggs and mp_filename: fullabspath = [abspath(x) for x in _path] try: pathindex = fullabspath.index(dirname(abspath(mp_filename))) emtype, emp_filename, zippath = _search_zip(modpath, pic) if pathindex > _path.index(zippath): # an egg takes priority return emtype, emp_filename except ValueError: # XXX not in _path pass except ImportError: pass checkeggs = False imported.append(modpath.pop(0)) mtype = mp_desc[2] if modpath: if mtype != PKG_DIRECTORY: raise ImportError("No module %s in %s" % (".".join(modpath), ".".join(imported))) # XXX guess if package is using pkgutil.extend_path by looking for # those keywords in the first four Kbytes try: with open(join(mp_filename, "__init__.py")) as stream: data = stream.read(4096) except IOError: path = [mp_filename] else: if "pkgutil" in data and "extend_path" in data: # extend_path is called, search sys.path for module/packages # of this name see pkgutil.extend_path documentation path = [join(p, *imported) for p in sys.path if isdir(join(p, *imported))] else: path = [mp_filename] return mtype, mp_filename def _is_python_file(filename: str) -> bool: """return true if the given filename should be considered as a python file .pyc and .pyo are ignored """ for ext in (".py", ".so", ".pyd", ".pyw"): if filename.endswith(ext): return True return False def _has_init(directory: str) -> Optional[str]: """if the given directory has a valid __init__ file, return its path, else return None """ mod_or_pack = join(directory, "__init__") for ext in PY_SOURCE_EXTS + ("pyc", "pyo"): if exists(mod_or_pack + "." + ext): return mod_or_pack + "." + ext return None