0001 """Find modules used by a script, using introspection.""" 0002 0003 # This module should be kept compatible with Python 2.2, see PEP 291. 0004 0005 import dis 0006 import imp 0007 import marshal 0008 import os 0009 import sys 0010 import new 0011 0012 if hasattr(sys.__stdout__, "newlines"): 0013 READ_MODE = "U" # universal line endings 0014 else: 0015 # remain compatible with Python < 2.3 0016 READ_MODE = "r" 0017 0018 LOAD_CONST = dis.opname.index('LOAD_CONST') 0019 IMPORT_NAME = dis.opname.index('IMPORT_NAME') 0020 STORE_NAME = dis.opname.index('STORE_NAME') 0021 STORE_GLOBAL = dis.opname.index('STORE_GLOBAL') 0022 STORE_OPS = [STORE_NAME, STORE_GLOBAL] 0023 0024 # Modulefinder does a good job at simulating Python's, but it can not 0025 # handle __path__ modifications packages make at runtime. Therefore there 0026 # is a mechanism whereby you can register extra paths in this map for a 0027 # package, and it will be honored. 0028 0029 # Note this is a mapping is lists of paths. 0030 packagePathMap = {} 0031 0032 # A Public interface 0033 def AddPackagePath(packagename, path): 0034 paths = packagePathMap.get(packagename, []) 0035 paths.append(path) 0036 packagePathMap[packagename] = paths 0037 0038 replacePackageMap = {} 0039 0040 # This ReplacePackage mechanism allows modulefinder to work around the 0041 # way the _xmlplus package injects itself under the name "xml" into 0042 # sys.modules at runtime by calling ReplacePackage("_xmlplus", "xml") 0043 # before running ModuleFinder. 0044 0045 def ReplacePackage(oldname, newname): 0046 replacePackageMap[oldname] = newname 0047 0048 0049 class Module: 0050 0051 def __init__(self, name, file=None, path=None): 0052 self.__name__ = name 0053 self.__file__ = file 0054 self.__path__ = path 0055 self.__code__ = None 0056 # The set of global names that are assigned to in the module. 0057 # This includes those names imported through starimports of 0058 # Python modules. 0059 self.globalnames = {} 0060 # The set of starimports this module did that could not be 0061 # resolved, ie. a starimport from a non-Python module. 0062 self.starimports = {} 0063 0064 def __repr__(self): 0065 s = "Module(%r" % (self.__name__,) 0066 if self.__file__ is not None: 0067 s = s + ", %r" % (self.__file__,) 0068 if self.__path__ is not None: 0069 s = s + ", %r" % (self.__path__,) 0070 s = s + ")" 0071 return s 0072 0073 class ModuleFinder: 0074 0075 def __init__(self, path=None, debug=0, excludes=[], replace_paths=[]): 0076 if path is None: 0077 path = sys.path 0078 self.path = path 0079 self.modules = {} 0080 self.badmodules = {} 0081 self.debug = debug 0082 self.indent = 0 0083 self.excludes = excludes 0084 self.replace_paths = replace_paths 0085 self.processed_paths = [] # Used in debugging only 0086 0087 def msg(self, level, str, *args): 0088 if level <= self.debug: 0089 for i in range(self.indent): 0090 print " ", 0091 print str, 0092 for arg in args: 0093 print repr(arg), 0094 print 0095 0096 def msgin(self, *args): 0097 level = args[0] 0098 if level <= self.debug: 0099 self.indent = self.indent + 1 0100 self.msg(*args) 0101 0102 def msgout(self, *args): 0103 level = args[0] 0104 if level <= self.debug: 0105 self.indent = self.indent - 1 0106 self.msg(*args) 0107 0108 def run_script(self, pathname): 0109 self.msg(2, "run_script", pathname) 0110 fp = open(pathname, READ_MODE) 0111 stuff = ("", "r", imp.PY_SOURCE) 0112 self.load_module('__main__', fp, pathname, stuff) 0113 0114 def load_file(self, pathname): 0115 dir, name = os.path.split(pathname) 0116 name, ext = os.path.splitext(name) 0117 fp = open(pathname, READ_MODE) 0118 stuff = (ext, "r", imp.PY_SOURCE) 0119 self.load_module(name, fp, pathname, stuff) 0120 0121 def import_hook(self, name, caller=None, fromlist=None): 0122 self.msg(3, "import_hook", name, caller, fromlist) 0123 parent = self.determine_parent(caller) 0124 q, tail = self.find_head_package(parent, name) 0125 m = self.load_tail(q, tail) 0126 if not fromlist: 0127 return q 0128 if m.__path__: 0129 self.ensure_fromlist(m, fromlist) 0130 return None 0131 0132 def determine_parent(self, caller): 0133 self.msgin(4, "determine_parent", caller) 0134 if not caller: 0135 self.msgout(4, "determine_parent -> None") 0136 return None 0137 pname = caller.__name__ 0138 if caller.__path__: 0139 parent = self.modules[pname] 0140 assert caller is parent 0141 self.msgout(4, "determine_parent ->", parent) 0142 return parent 0143 if '.' in pname: 0144 i = pname.rfind('.') 0145 pname = pname[:i] 0146 parent = self.modules[pname] 0147 assert parent.__name__ == pname 0148 self.msgout(4, "determine_parent ->", parent) 0149 return parent 0150 self.msgout(4, "determine_parent -> None") 0151 return None 0152 0153 def find_head_package(self, parent, name): 0154 self.msgin(4, "find_head_package", parent, name) 0155 if '.' in name: 0156 i = name.find('.') 0157 head = name[:i] 0158 tail = name[i+1:] 0159 else: 0160 head = name 0161 tail = "" 0162 if parent: 0163 qname = "%s.%s" % (parent.__name__, head) 0164 else: 0165 qname = head 0166 q = self.import_module(head, qname, parent) 0167 if q: 0168 self.msgout(4, "find_head_package ->", (q, tail)) 0169 return q, tail 0170 if parent: 0171 qname = head 0172 parent = None 0173 q = self.import_module(head, qname, parent) 0174 if q: 0175 self.msgout(4, "find_head_package ->", (q, tail)) 0176 return q, tail 0177 self.msgout(4, "raise ImportError: No module named", qname) 0178 raise ImportError, "No module named " + qname 0179 0180 def load_tail(self, q, tail): 0181 self.msgin(4, "load_tail", q, tail) 0182 m = q 0183 while tail: 0184 i = tail.find('.') 0185 if i < 0: i = len(tail) 0186 head, tail = tail[:i], tail[i+1:] 0187 mname = "%s.%s" % (m.__name__, head) 0188 m = self.import_module(head, mname, m) 0189 if not m: 0190 self.msgout(4, "raise ImportError: No module named", mname) 0191 raise ImportError, "No module named " + mname 0192 self.msgout(4, "load_tail ->", m) 0193 return m 0194 0195 def ensure_fromlist(self, m, fromlist, recursive=0): 0196 self.msg(4, "ensure_fromlist", m, fromlist, recursive) 0197 for sub in fromlist: 0198 if sub == "*": 0199 if not recursive: 0200 all = self.find_all_submodules(m) 0201 if all: 0202 self.ensure_fromlist(m, all, 1) 0203 elif not hasattr(m, sub): 0204 subname = "%s.%s" % (m.__name__, sub) 0205 submod = self.import_module(sub, subname, m) 0206 if not submod: 0207 raise ImportError, "No module named " + subname 0208 0209 def find_all_submodules(self, m): 0210 if not m.__path__: 0211 return 0212 modules = {} 0213 # 'suffixes' used to be a list hardcoded to [".py", ".pyc", ".pyo"]. 0214 # But we must also collect Python extension modules - although 0215 # we cannot separate normal dlls from Python extensions. 0216 suffixes = [] 0217 for triple in imp.get_suffixes(): 0218 suffixes.append(triple[0]) 0219 for dir in m.__path__: 0220 try: 0221 names = os.listdir(dir) 0222 except os.error: 0223 self.msg(2, "can't list directory", dir) 0224 continue 0225 for name in names: 0226 mod = None 0227 for suff in suffixes: 0228 n = len(suff) 0229 if name[-n:] == suff: 0230 mod = name[:-n] 0231 break 0232 if mod and mod != "__init__": 0233 modules[mod] = mod 0234 return modules.keys() 0235 0236 def import_module(self, partname, fqname, parent): 0237 self.msgin(3, "import_module", partname, fqname, parent) 0238 try: 0239 m = self.modules[fqname] 0240 except KeyError: 0241 pass 0242 else: 0243 self.msgout(3, "import_module ->", m) 0244 return m 0245 if self.badmodules.has_key(fqname): 0246 self.msgout(3, "import_module -> None") 0247 return None 0248 if parent and parent.__path__ is None: 0249 self.msgout(3, "import_module -> None") 0250 return None 0251 try: 0252 fp, pathname, stuff = self.find_module(partname, 0253 parent and parent.__path__, parent) 0254 except ImportError: 0255 self.msgout(3, "import_module ->", None) 0256 return None 0257 try: 0258 m = self.load_module(fqname, fp, pathname, stuff) 0259 finally: 0260 if fp: fp.close() 0261 if parent: 0262 setattr(parent, partname, m) 0263 self.msgout(3, "import_module ->", m) 0264 return m 0265 0266 def load_module(self, fqname, fp, pathname, (suffix, mode, type)): 0267 self.msgin(2, "load_module", fqname, fp and "fp", pathname) 0268 if type == imp.PKG_DIRECTORY: 0269 m = self.load_package(fqname, pathname) 0270 self.msgout(2, "load_module ->", m) 0271 return m 0272 if type == imp.PY_SOURCE: 0273 co = compile(fp.read()+'\n', pathname, 'exec') 0274 elif type == imp.PY_COMPILED: 0275 if fp.read(4) != imp.get_magic(): 0276 self.msgout(2, "raise ImportError: Bad magic number", pathname) 0277 raise ImportError, "Bad magic number in %s" % pathname 0278 fp.read(4) 0279 co = marshal.load(fp) 0280 else: 0281 co = None 0282 m = self.add_module(fqname) 0283 m.__file__ = pathname 0284 if co: 0285 if self.replace_paths: 0286 co = self.replace_paths_in_code(co) 0287 m.__code__ = co 0288 self.scan_code(co, m) 0289 self.msgout(2, "load_module ->", m) 0290 return m 0291 0292 def _add_badmodule(self, name, caller): 0293 if name not in self.badmodules: 0294 self.badmodules[name] = {} 0295 self.badmodules[name][caller.__name__] = 1 0296 0297 def _safe_import_hook(self, name, caller, fromlist): 0298 # wrapper for self.import_hook() that won't raise ImportError 0299 if name in self.badmodules: 0300 self._add_badmodule(name, caller) 0301 return 0302 try: 0303 self.import_hook(name, caller) 0304 except ImportError, msg: 0305 self.msg(2, "ImportError:", str(msg)) 0306 self._add_badmodule(name, caller) 0307 else: 0308 if fromlist: 0309 for sub in fromlist: 0310 if sub in self.badmodules: 0311 self._add_badmodule(sub, caller) 0312 continue 0313 try: 0314 self.import_hook(name, caller, [sub]) 0315 except ImportError, msg: 0316 self.msg(2, "ImportError:", str(msg)) 0317 fullname = name + "." + sub 0318 self._add_badmodule(fullname, caller) 0319 0320 def scan_code(self, co, m): 0321 code = co.co_code 0322 n = len(code) 0323 i = 0 0324 fromlist = None 0325 while i < n: 0326 c = code[i] 0327 i = i+1 0328 op = ord(c) 0329 if op >= dis.HAVE_ARGUMENT: 0330 oparg = ord(code[i]) + ord(code[i+1])*256 0331 i = i+2 0332 if op == LOAD_CONST: 0333 # An IMPORT_NAME is always preceded by a LOAD_CONST, it's 0334 # a tuple of "from" names, or None for a regular import. 0335 # The tuple may contain "*" for "from <mod> import *" 0336 fromlist = co.co_consts[oparg] 0337 elif op == IMPORT_NAME: 0338 assert fromlist is None or type(fromlist) is tuple 0339 name = co.co_names[oparg] 0340 have_star = 0 0341 if fromlist is not None: 0342 if "*" in fromlist: 0343 have_star = 1 0344 fromlist = [f for f in fromlist if f != "*"] 0345 self._safe_import_hook(name, m, fromlist) 0346 if have_star: 0347 # We've encountered an "import *". If it is a Python module, 0348 # the code has already been parsed and we can suck out the 0349 # global names. 0350 mm = None 0351 if m.__path__: 0352 # At this point we don't know whether 'name' is a 0353 # submodule of 'm' or a global module. Let's just try 0354 # the full name first. 0355 mm = self.modules.get(m.__name__ + "." + name) 0356 if mm is None: 0357 mm = self.modules.get(name) 0358 if mm is not None: 0359 m.globalnames.update(mm.globalnames) 0360 m.starimports.update(mm.starimports) 0361 if mm.__code__ is None: 0362 m.starimports[name] = 1 0363 else: 0364 m.starimports[name] = 1 0365 elif op in STORE_OPS: 0366 # keep track of all global names that are assigned to 0367 name = co.co_names[oparg] 0368 m.globalnames[name] = 1 0369 for c in co.co_consts: 0370 if isinstance(c, type(co)): 0371 self.scan_code(c, m) 0372 0373 def load_package(self, fqname, pathname): 0374 self.msgin(2, "load_package", fqname, pathname) 0375 newname = replacePackageMap.get(fqname) 0376 if newname: 0377 fqname = newname 0378 m = self.add_module(fqname) 0379 m.__file__ = pathname 0380 m.__path__ = [pathname] 0381 0382 # As per comment at top of file, simulate runtime __path__ additions. 0383 m.__path__ = m.__path__ + packagePathMap.get(fqname, []) 0384 0385 fp, buf, stuff = self.find_module("__init__", m.__path__) 0386 self.load_module(fqname, fp, buf, stuff) 0387 self.msgout(2, "load_package ->", m) 0388 return m 0389 0390 def add_module(self, fqname): 0391 if self.modules.has_key(fqname): 0392 return self.modules[fqname] 0393 self.modules[fqname] = m = Module(fqname) 0394 return m 0395 0396 def find_module(self, name, path, parent=None): 0397 if parent is not None: 0398 # assert path is not None 0399 fullname = parent.__name__+'.'+name 0400 else: 0401 fullname = name 0402 if fullname in self.excludes: 0403 self.msgout(3, "find_module -> Excluded", fullname) 0404 raise ImportError, name 0405 0406 if path is None: 0407 if name in sys.builtin_module_names: 0408 return (None, None, ("", "", imp.C_BUILTIN)) 0409 0410 path = self.path 0411 return imp.find_module(name, path) 0412 0413 def report(self): 0414 """Print a report to stdout, listing the found modules with their 0415 paths, as well as modules that are missing, or seem to be missing. 0416 """ 0417 print 0418 print " %-25s %s" % ("Name", "File") 0419 print " %-25s %s" % ("----", "----") 0420 # Print modules found 0421 keys = self.modules.keys() 0422 keys.sort() 0423 for key in keys: 0424 m = self.modules[key] 0425 if m.__path__: 0426 print "P", 0427 else: 0428 print "m", 0429 print "%-25s" % key, m.__file__ or "" 0430 0431 # Print missing modules 0432 missing, maybe = self.any_missing_maybe() 0433 if missing: 0434 print 0435 print "Missing modules:" 0436 for name in missing: 0437 mods = self.badmodules[name].keys() 0438 mods.sort() 0439 print "?", name, "imported from", ', '.join(mods) 0440 # Print modules that may be missing, but then again, maybe not... 0441 if maybe: 0442 print 0443 print "Submodules thay appear to be missing, but could also be", 0444 print "global names in the parent package:" 0445 for name in maybe: 0446 mods = self.badmodules[name].keys() 0447 mods.sort() 0448 print "?", name, "imported from", ', '.join(mods) 0449 0450 def any_missing(self): 0451 """Return a list of modules that appear to be missing. Use 0452 any_missing_maybe() if you want to know which modules are 0453 certain to be missing, and which *may* be missing. 0454 """ 0455 missing, maybe = self.any_missing_maybe() 0456 return missing + maybe 0457 0458 def any_missing_maybe(self): 0459 """Return two lists, one with modules that are certainly missing 0460 and one with modules that *may* be missing. The latter names could 0461 either be submodules *or* just global names in the package. 0462 0463 The reason it can't always be determined is that it's impossible to 0464 tell which names are imported when "from module import *" is done 0465 with an extension module, short of actually importing it. 0466 """ 0467 missing = [] 0468 maybe = [] 0469 for name in self.badmodules: 0470 if name in self.excludes: 0471 continue 0472 i = name.rfind(".") 0473 if i < 0: 0474 missing.append(name) 0475 continue 0476 subname = name[i+1:] 0477 pkgname = name[:i] 0478 pkg = self.modules.get(pkgname) 0479 if pkg is not None: 0480 if pkgname in self.badmodules[name]: 0481 # The package tried to import this module itself and 0482 # failed. It's definitely missing. 0483 missing.append(name) 0484 elif subname in pkg.globalnames: 0485 # It's a global in the package: definitely not missing. 0486 pass 0487 elif pkg.starimports: 0488 # It could be missing, but the package did an "import *" 0489 # from a non-Python module, so we simply can't be sure. 0490 maybe.append(name) 0491 else: 0492 # It's not a global in the package, the package didn't 0493 # do funny star imports, it's very likely to be missing. 0494 # The symbol could be inserted into the package from the 0495 # outside, but since that's not good style we simply list 0496 # it missing. 0497 missing.append(name) 0498 else: 0499 missing.append(name) 0500 missing.sort() 0501 maybe.sort() 0502 return missing, maybe 0503 0504 def replace_paths_in_code(self, co): 0505 new_filename = original_filename = os.path.normpath(co.co_filename) 0506 for f, r in self.replace_paths: 0507 if original_filename.startswith(f): 0508 new_filename = r + original_filename[len(f):] 0509 break 0510 0511 if self.debug and original_filename not in self.processed_paths: 0512 if new_filename != original_filename: 0513 self.msgout(2, "co_filename %r changed to %r" \ 0514 % (original_filename,new_filename,)) 0515 else: 0516 self.msgout(2, "co_filename %r remains unchanged" \ 0517 % (original_filename,)) 0518 self.processed_paths.append(original_filename) 0519 0520 consts = list(co.co_consts) 0521 for i in range(len(consts)): 0522 if isinstance(consts[i], type(co)): 0523 consts[i] = self.replace_paths_in_code(consts[i]) 0524 0525 return new.code(co.co_argcount, co.co_nlocals, co.co_stacksize, 0526 co.co_flags, co.co_code, tuple(consts), co.co_names, 0527 co.co_varnames, new_filename, co.co_name, 0528 co.co_firstlineno, co.co_lnotab, 0529 co.co_freevars, co.co_cellvars) 0530 0531 0532 def test(): 0533 # Parse command line 0534 import getopt 0535 try: 0536 opts, args = getopt.getopt(sys.argv[1:], "dmp:qx:") 0537 except getopt.error, msg: 0538 print msg 0539 return 0540 0541 # Process options 0542 debug = 1 0543 domods = 0 0544 addpath = [] 0545 exclude = [] 0546 for o, a in opts: 0547 if o == '-d': 0548 debug = debug + 1 0549 if o == '-m': 0550 domods = 1 0551 if o == '-p': 0552 addpath = addpath + a.split(os.pathsep) 0553 if o == '-q': 0554 debug = 0 0555 if o == '-x': 0556 exclude.append(a) 0557 0558 # Provide default arguments 0559 if not args: 0560 script = "hello.py" 0561 else: 0562 script = args[0] 0563 0564 # Set the path based on sys.path and the script directory 0565 path = sys.path[:] 0566 path[0] = os.path.dirname(script) 0567 path = addpath + path 0568 if debug > 1: 0569 print "path:" 0570 for item in path: 0571 print " ", repr(item) 0572 0573 # Create the module finder and turn its crank 0574 mf = ModuleFinder(path, debug, exclude) 0575 for arg in args[1:]: 0576 if arg == '-m': 0577 domods = 1 0578 continue 0579 if domods: 0580 if arg[-2:] == '.*': 0581 mf.import_hook(arg[:-2], None, ["*"]) 0582 else: 0583 mf.import_hook(arg) 0584 else: 0585 mf.load_file(arg) 0586 mf.run_script(script) 0587 mf.report() 0588 return mf # for -i debugging 0589 0590 0591 if __name__ == '__main__': 0592 try: 0593 mf = test() 0594 except KeyboardInterrupt: 0595 print "\n[interrupt]" 0596
Generated by PyXR 0.9.4