PyXR

c:\python24\lib \ modulefinder.py



0001 """Find modules used by a script, using introspection."""
0002 
0003 # This module should be kept compatible with Python 2.2, see PEP 291.
0004 
0005 import dis
0006 import imp
0007 import marshal
0008 import os
0009 import sys
0010 import new
0011 
0012 if hasattr(sys.__stdout__, "newlines"):
0013     READ_MODE = "U"  # universal line endings
0014 else:
0015     # remain compatible with Python  < 2.3
0016     READ_MODE = "r"
0017 
0018 LOAD_CONST = dis.opname.index('LOAD_CONST')
0019 IMPORT_NAME = dis.opname.index('IMPORT_NAME')
0020 STORE_NAME = dis.opname.index('STORE_NAME')
0021 STORE_GLOBAL = dis.opname.index('STORE_GLOBAL')
0022 STORE_OPS = [STORE_NAME, STORE_GLOBAL]
0023 
0024 # Modulefinder does a good job at simulating Python's, but it can not
0025 # handle __path__ modifications packages make at runtime.  Therefore there
0026 # is a mechanism whereby you can register extra paths in this map for a
0027 # package, and it will be honored.
0028 
0029 # Note this is a mapping is lists of paths.
0030 packagePathMap = {}
0031 
0032 # A Public interface
0033 def AddPackagePath(packagename, path):
0034     paths = packagePathMap.get(packagename, [])
0035     paths.append(path)
0036     packagePathMap[packagename] = paths
0037 
0038 replacePackageMap = {}
0039 
0040 # This ReplacePackage mechanism allows modulefinder to work around the
0041 # way the _xmlplus package injects itself under the name "xml" into
0042 # sys.modules at runtime by calling ReplacePackage("_xmlplus", "xml")
0043 # before running ModuleFinder.
0044 
0045 def ReplacePackage(oldname, newname):
0046     replacePackageMap[oldname] = newname
0047 
0048 
0049 class Module:
0050 
0051     def __init__(self, name, file=None, path=None):
0052         self.__name__ = name
0053         self.__file__ = file
0054         self.__path__ = path
0055         self.__code__ = None
0056         # The set of global names that are assigned to in the module.
0057         # This includes those names imported through starimports of
0058         # Python modules.
0059         self.globalnames = {}
0060         # The set of starimports this module did that could not be
0061         # resolved, ie. a starimport from a non-Python module.
0062         self.starimports = {}
0063 
0064     def __repr__(self):
0065         s = "Module(%r" % (self.__name__,)
0066         if self.__file__ is not None:
0067             s = s + ", %r" % (self.__file__,)
0068         if self.__path__ is not None:
0069             s = s + ", %r" % (self.__path__,)
0070         s = s + ")"
0071         return s
0072 
0073 class ModuleFinder:
0074 
0075     def __init__(self, path=None, debug=0, excludes=[], replace_paths=[]):
0076         if path is None:
0077             path = sys.path
0078         self.path = path
0079         self.modules = {}
0080         self.badmodules = {}
0081         self.debug = debug
0082         self.indent = 0
0083         self.excludes = excludes
0084         self.replace_paths = replace_paths
0085         self.processed_paths = []   # Used in debugging only
0086 
0087     def msg(self, level, str, *args):
0088         if level <= self.debug:
0089             for i in range(self.indent):
0090                 print "   ",
0091             print str,
0092             for arg in args:
0093                 print repr(arg),
0094             print
0095 
0096     def msgin(self, *args):
0097         level = args[0]
0098         if level <= self.debug:
0099             self.indent = self.indent + 1
0100             self.msg(*args)
0101 
0102     def msgout(self, *args):
0103         level = args[0]
0104         if level <= self.debug:
0105             self.indent = self.indent - 1
0106             self.msg(*args)
0107 
0108     def run_script(self, pathname):
0109         self.msg(2, "run_script", pathname)
0110         fp = open(pathname, READ_MODE)
0111         stuff = ("", "r", imp.PY_SOURCE)
0112         self.load_module('__main__', fp, pathname, stuff)
0113 
0114     def load_file(self, pathname):
0115         dir, name = os.path.split(pathname)
0116         name, ext = os.path.splitext(name)
0117         fp = open(pathname, READ_MODE)
0118         stuff = (ext, "r", imp.PY_SOURCE)
0119         self.load_module(name, fp, pathname, stuff)
0120 
0121     def import_hook(self, name, caller=None, fromlist=None):
0122         self.msg(3, "import_hook", name, caller, fromlist)
0123         parent = self.determine_parent(caller)
0124         q, tail = self.find_head_package(parent, name)
0125         m = self.load_tail(q, tail)
0126         if not fromlist:
0127             return q
0128         if m.__path__:
0129             self.ensure_fromlist(m, fromlist)
0130         return None
0131 
0132     def determine_parent(self, caller):
0133         self.msgin(4, "determine_parent", caller)
0134         if not caller:
0135             self.msgout(4, "determine_parent -> None")
0136             return None
0137         pname = caller.__name__
0138         if caller.__path__:
0139             parent = self.modules[pname]
0140             assert caller is parent
0141             self.msgout(4, "determine_parent ->", parent)
0142             return parent
0143         if '.' in pname:
0144             i = pname.rfind('.')
0145             pname = pname[:i]
0146             parent = self.modules[pname]
0147             assert parent.__name__ == pname
0148             self.msgout(4, "determine_parent ->", parent)
0149             return parent
0150         self.msgout(4, "determine_parent -> None")
0151         return None
0152 
0153     def find_head_package(self, parent, name):
0154         self.msgin(4, "find_head_package", parent, name)
0155         if '.' in name:
0156             i = name.find('.')
0157             head = name[:i]
0158             tail = name[i+1:]
0159         else:
0160             head = name
0161             tail = ""
0162         if parent:
0163             qname = "%s.%s" % (parent.__name__, head)
0164         else:
0165             qname = head
0166         q = self.import_module(head, qname, parent)
0167         if q:
0168             self.msgout(4, "find_head_package ->", (q, tail))
0169             return q, tail
0170         if parent:
0171             qname = head
0172             parent = None
0173             q = self.import_module(head, qname, parent)
0174             if q:
0175                 self.msgout(4, "find_head_package ->", (q, tail))
0176                 return q, tail
0177         self.msgout(4, "raise ImportError: No module named", qname)
0178         raise ImportError, "No module named " + qname
0179 
0180     def load_tail(self, q, tail):
0181         self.msgin(4, "load_tail", q, tail)
0182         m = q
0183         while tail:
0184             i = tail.find('.')
0185             if i < 0: i = len(tail)
0186             head, tail = tail[:i], tail[i+1:]
0187             mname = "%s.%s" % (m.__name__, head)
0188             m = self.import_module(head, mname, m)
0189             if not m:
0190                 self.msgout(4, "raise ImportError: No module named", mname)
0191                 raise ImportError, "No module named " + mname
0192         self.msgout(4, "load_tail ->", m)
0193         return m
0194 
0195     def ensure_fromlist(self, m, fromlist, recursive=0):
0196         self.msg(4, "ensure_fromlist", m, fromlist, recursive)
0197         for sub in fromlist:
0198             if sub == "*":
0199                 if not recursive:
0200                     all = self.find_all_submodules(m)
0201                     if all:
0202                         self.ensure_fromlist(m, all, 1)
0203             elif not hasattr(m, sub):
0204                 subname = "%s.%s" % (m.__name__, sub)
0205                 submod = self.import_module(sub, subname, m)
0206                 if not submod:
0207                     raise ImportError, "No module named " + subname
0208 
0209     def find_all_submodules(self, m):
0210         if not m.__path__:
0211             return
0212         modules = {}
0213         # 'suffixes' used to be a list hardcoded to [".py", ".pyc", ".pyo"].
0214         # But we must also collect Python extension modules - although
0215         # we cannot separate normal dlls from Python extensions.
0216         suffixes = []
0217         for triple in imp.get_suffixes():
0218             suffixes.append(triple[0])
0219         for dir in m.__path__:
0220             try:
0221                 names = os.listdir(dir)
0222             except os.error:
0223                 self.msg(2, "can't list directory", dir)
0224                 continue
0225             for name in names:
0226                 mod = None
0227                 for suff in suffixes:
0228                     n = len(suff)
0229                     if name[-n:] == suff:
0230                         mod = name[:-n]
0231                         break
0232                 if mod and mod != "__init__":
0233                     modules[mod] = mod
0234         return modules.keys()
0235 
0236     def import_module(self, partname, fqname, parent):
0237         self.msgin(3, "import_module", partname, fqname, parent)
0238         try:
0239             m = self.modules[fqname]
0240         except KeyError:
0241             pass
0242         else:
0243             self.msgout(3, "import_module ->", m)
0244             return m
0245         if self.badmodules.has_key(fqname):
0246             self.msgout(3, "import_module -> None")
0247             return None
0248         if parent and parent.__path__ is None:
0249             self.msgout(3, "import_module -> None")
0250             return None
0251         try:
0252             fp, pathname, stuff = self.find_module(partname,
0253                                                    parent and parent.__path__, parent)
0254         except ImportError:
0255             self.msgout(3, "import_module ->", None)
0256             return None
0257         try:
0258             m = self.load_module(fqname, fp, pathname, stuff)
0259         finally:
0260             if fp: fp.close()
0261         if parent:
0262             setattr(parent, partname, m)
0263         self.msgout(3, "import_module ->", m)
0264         return m
0265 
0266     def load_module(self, fqname, fp, pathname, (suffix, mode, type)):
0267         self.msgin(2, "load_module", fqname, fp and "fp", pathname)
0268         if type == imp.PKG_DIRECTORY:
0269             m = self.load_package(fqname, pathname)
0270             self.msgout(2, "load_module ->", m)
0271             return m
0272         if type == imp.PY_SOURCE:
0273             co = compile(fp.read()+'\n', pathname, 'exec')
0274         elif type == imp.PY_COMPILED:
0275             if fp.read(4) != imp.get_magic():
0276                 self.msgout(2, "raise ImportError: Bad magic number", pathname)
0277                 raise ImportError, "Bad magic number in %s" % pathname
0278             fp.read(4)
0279             co = marshal.load(fp)
0280         else:
0281             co = None
0282         m = self.add_module(fqname)
0283         m.__file__ = pathname
0284         if co:
0285             if self.replace_paths:
0286                 co = self.replace_paths_in_code(co)
0287             m.__code__ = co
0288             self.scan_code(co, m)
0289         self.msgout(2, "load_module ->", m)
0290         return m
0291 
0292     def _add_badmodule(self, name, caller):
0293         if name not in self.badmodules:
0294             self.badmodules[name] = {}
0295         self.badmodules[name][caller.__name__] = 1
0296 
0297     def _safe_import_hook(self, name, caller, fromlist):
0298         # wrapper for self.import_hook() that won't raise ImportError
0299         if name in self.badmodules:
0300             self._add_badmodule(name, caller)
0301             return
0302         try:
0303             self.import_hook(name, caller)
0304         except ImportError, msg:
0305             self.msg(2, "ImportError:", str(msg))
0306             self._add_badmodule(name, caller)
0307         else:
0308             if fromlist:
0309                 for sub in fromlist:
0310                     if sub in self.badmodules:
0311                         self._add_badmodule(sub, caller)
0312                         continue
0313                     try:
0314                         self.import_hook(name, caller, [sub])
0315                     except ImportError, msg:
0316                         self.msg(2, "ImportError:", str(msg))
0317                         fullname = name + "." + sub
0318                         self._add_badmodule(fullname, caller)
0319 
0320     def scan_code(self, co, m):
0321         code = co.co_code
0322         n = len(code)
0323         i = 0
0324         fromlist = None
0325         while i < n:
0326             c = code[i]
0327             i = i+1
0328             op = ord(c)
0329             if op >= dis.HAVE_ARGUMENT:
0330                 oparg = ord(code[i]) + ord(code[i+1])*256
0331                 i = i+2
0332             if op == LOAD_CONST:
0333                 # An IMPORT_NAME is always preceded by a LOAD_CONST, it's
0334                 # a tuple of "from" names, or None for a regular import.
0335                 # The tuple may contain "*" for "from <mod> import *"
0336                 fromlist = co.co_consts[oparg]
0337             elif op == IMPORT_NAME:
0338                 assert fromlist is None or type(fromlist) is tuple
0339                 name = co.co_names[oparg]
0340                 have_star = 0
0341                 if fromlist is not None:
0342                     if "*" in fromlist:
0343                         have_star = 1
0344                     fromlist = [f for f in fromlist if f != "*"]
0345                 self._safe_import_hook(name, m, fromlist)
0346                 if have_star:
0347                     # We've encountered an "import *". If it is a Python module,
0348                     # the code has already been parsed and we can suck out the
0349                     # global names.
0350                     mm = None
0351                     if m.__path__:
0352                         # At this point we don't know whether 'name' is a
0353                         # submodule of 'm' or a global module. Let's just try
0354                         # the full name first.
0355                         mm = self.modules.get(m.__name__ + "." + name)
0356                     if mm is None:
0357                         mm = self.modules.get(name)
0358                     if mm is not None:
0359                         m.globalnames.update(mm.globalnames)
0360                         m.starimports.update(mm.starimports)
0361                         if mm.__code__ is None:
0362                             m.starimports[name] = 1
0363                     else:
0364                         m.starimports[name] = 1
0365             elif op in STORE_OPS:
0366                 # keep track of all global names that are assigned to
0367                 name = co.co_names[oparg]
0368                 m.globalnames[name] = 1
0369         for c in co.co_consts:
0370             if isinstance(c, type(co)):
0371                 self.scan_code(c, m)
0372 
0373     def load_package(self, fqname, pathname):
0374         self.msgin(2, "load_package", fqname, pathname)
0375         newname = replacePackageMap.get(fqname)
0376         if newname:
0377             fqname = newname
0378         m = self.add_module(fqname)
0379         m.__file__ = pathname
0380         m.__path__ = [pathname]
0381 
0382         # As per comment at top of file, simulate runtime __path__ additions.
0383         m.__path__ = m.__path__ + packagePathMap.get(fqname, [])
0384 
0385         fp, buf, stuff = self.find_module("__init__", m.__path__)
0386         self.load_module(fqname, fp, buf, stuff)
0387         self.msgout(2, "load_package ->", m)
0388         return m
0389 
0390     def add_module(self, fqname):
0391         if self.modules.has_key(fqname):
0392             return self.modules[fqname]
0393         self.modules[fqname] = m = Module(fqname)
0394         return m
0395 
0396     def find_module(self, name, path, parent=None):
0397         if parent is not None:
0398             # assert path is not None
0399             fullname = parent.__name__+'.'+name
0400         else:
0401             fullname = name
0402         if fullname in self.excludes:
0403             self.msgout(3, "find_module -> Excluded", fullname)
0404             raise ImportError, name
0405 
0406         if path is None:
0407             if name in sys.builtin_module_names:
0408                 return (None, None, ("", "", imp.C_BUILTIN))
0409 
0410             path = self.path
0411         return imp.find_module(name, path)
0412 
0413     def report(self):
0414         """Print a report to stdout, listing the found modules with their
0415         paths, as well as modules that are missing, or seem to be missing.
0416         """
0417         print
0418         print "  %-25s %s" % ("Name", "File")
0419         print "  %-25s %s" % ("----", "----")
0420         # Print modules found
0421         keys = self.modules.keys()
0422         keys.sort()
0423         for key in keys:
0424             m = self.modules[key]
0425             if m.__path__:
0426                 print "P",
0427             else:
0428                 print "m",
0429             print "%-25s" % key, m.__file__ or ""
0430 
0431         # Print missing modules
0432         missing, maybe = self.any_missing_maybe()
0433         if missing:
0434             print
0435             print "Missing modules:"
0436             for name in missing:
0437                 mods = self.badmodules[name].keys()
0438                 mods.sort()
0439                 print "?", name, "imported from", ', '.join(mods)
0440         # Print modules that may be missing, but then again, maybe not...
0441         if maybe:
0442             print
0443             print "Submodules thay appear to be missing, but could also be",
0444             print "global names in the parent package:"
0445             for name in maybe:
0446                 mods = self.badmodules[name].keys()
0447                 mods.sort()
0448                 print "?", name, "imported from", ', '.join(mods)
0449 
0450     def any_missing(self):
0451         """Return a list of modules that appear to be missing. Use
0452         any_missing_maybe() if you want to know which modules are
0453         certain to be missing, and which *may* be missing.
0454         """
0455         missing, maybe = self.any_missing_maybe()
0456         return missing + maybe
0457 
0458     def any_missing_maybe(self):
0459         """Return two lists, one with modules that are certainly missing
0460         and one with modules that *may* be missing. The latter names could
0461         either be submodules *or* just global names in the package.
0462 
0463         The reason it can't always be determined is that it's impossible to
0464         tell which names are imported when "from module import *" is done
0465         with an extension module, short of actually importing it.
0466         """
0467         missing = []
0468         maybe = []
0469         for name in self.badmodules:
0470             if name in self.excludes:
0471                 continue
0472             i = name.rfind(".")
0473             if i < 0:
0474                 missing.append(name)
0475                 continue
0476             subname = name[i+1:]
0477             pkgname = name[:i]
0478             pkg = self.modules.get(pkgname)
0479             if pkg is not None:
0480                 if pkgname in self.badmodules[name]:
0481                     # The package tried to import this module itself and
0482                     # failed. It's definitely missing.
0483                     missing.append(name)
0484                 elif subname in pkg.globalnames:
0485                     # It's a global in the package: definitely not missing.
0486                     pass
0487                 elif pkg.starimports:
0488                     # It could be missing, but the package did an "import *"
0489                     # from a non-Python module, so we simply can't be sure.
0490                     maybe.append(name)
0491                 else:
0492                     # It's not a global in the package, the package didn't
0493                     # do funny star imports, it's very likely to be missing.
0494                     # The symbol could be inserted into the package from the
0495                     # outside, but since that's not good style we simply list
0496                     # it missing.
0497                     missing.append(name)
0498             else:
0499                 missing.append(name)
0500         missing.sort()
0501         maybe.sort()
0502         return missing, maybe
0503 
0504     def replace_paths_in_code(self, co):
0505         new_filename = original_filename = os.path.normpath(co.co_filename)
0506         for f, r in self.replace_paths:
0507             if original_filename.startswith(f):
0508                 new_filename = r + original_filename[len(f):]
0509                 break
0510 
0511         if self.debug and original_filename not in self.processed_paths:
0512             if new_filename != original_filename:
0513                 self.msgout(2, "co_filename %r changed to %r" \
0514                                     % (original_filename,new_filename,))
0515             else:
0516                 self.msgout(2, "co_filename %r remains unchanged" \
0517                                     % (original_filename,))
0518             self.processed_paths.append(original_filename)
0519 
0520         consts = list(co.co_consts)
0521         for i in range(len(consts)):
0522             if isinstance(consts[i], type(co)):
0523                 consts[i] = self.replace_paths_in_code(consts[i])
0524 
0525         return new.code(co.co_argcount, co.co_nlocals, co.co_stacksize,
0526                          co.co_flags, co.co_code, tuple(consts), co.co_names,
0527                          co.co_varnames, new_filename, co.co_name,
0528                          co.co_firstlineno, co.co_lnotab,
0529                          co.co_freevars, co.co_cellvars)
0530 
0531 
0532 def test():
0533     # Parse command line
0534     import getopt
0535     try:
0536         opts, args = getopt.getopt(sys.argv[1:], "dmp:qx:")
0537     except getopt.error, msg:
0538         print msg
0539         return
0540 
0541     # Process options
0542     debug = 1
0543     domods = 0
0544     addpath = []
0545     exclude = []
0546     for o, a in opts:
0547         if o == '-d':
0548             debug = debug + 1
0549         if o == '-m':
0550             domods = 1
0551         if o == '-p':
0552             addpath = addpath + a.split(os.pathsep)
0553         if o == '-q':
0554             debug = 0
0555         if o == '-x':
0556             exclude.append(a)
0557 
0558     # Provide default arguments
0559     if not args:
0560         script = "hello.py"
0561     else:
0562         script = args[0]
0563 
0564     # Set the path based on sys.path and the script directory
0565     path = sys.path[:]
0566     path[0] = os.path.dirname(script)
0567     path = addpath + path
0568     if debug > 1:
0569         print "path:"
0570         for item in path:
0571             print "   ", repr(item)
0572 
0573     # Create the module finder and turn its crank
0574     mf = ModuleFinder(path, debug, exclude)
0575     for arg in args[1:]:
0576         if arg == '-m':
0577             domods = 1
0578             continue
0579         if domods:
0580             if arg[-2:] == '.*':
0581                 mf.import_hook(arg[:-2], None, ["*"])
0582             else:
0583                 mf.import_hook(arg)
0584         else:
0585             mf.load_file(arg)
0586     mf.run_script(script)
0587     mf.report()
0588     return mf  # for -i debugging
0589 
0590 
0591 if __name__ == '__main__':
0592     try:
0593         mf = test()
0594     except KeyboardInterrupt:
0595         print "\n[interrupt]"
0596 

Generated by PyXR 0.9.4
SourceForge.net Logo