0001 """Parse a Python module and describe its classes and methods. 0002 0003 Parse enough of a Python file to recognize imports and class and 0004 method definitions, and to find out the superclasses of a class. 0005 0006 The interface consists of a single function: 0007 readmodule_ex(module [, path]) 0008 where module is the name of a Python module, and path is an optional 0009 list of directories where the module is to be searched. If present, 0010 path is prepended to the system search path sys.path. The return 0011 value is a dictionary. The keys of the dictionary are the names of 0012 the classes defined in the module (including classes that are defined 0013 via the from XXX import YYY construct). The values are class 0014 instances of the class Class defined here. One special key/value pair 0015 is present for packages: the key '__path__' has a list as its value 0016 which contains the package search path. 0017 0018 A class is described by the class Class in this module. Instances 0019 of this class have the following instance variables: 0020 module -- the module name 0021 name -- the name of the class 0022 super -- a list of super classes (Class instances) 0023 methods -- a dictionary of methods 0024 file -- the file in which the class was defined 0025 lineno -- the line in the file on which the class statement occurred 0026 The dictionary of methods uses the method names as keys and the line 0027 numbers on which the method was defined as values. 0028 If the name of a super class is not recognized, the corresponding 0029 entry in the list of super classes is not a class instance but a 0030 string giving the name of the super class. Since import statements 0031 are recognized and imported modules are scanned as well, this 0032 shouldn't happen often. 0033 0034 A function is described by the class Function in this module. 0035 Instances of this class have the following instance variables: 0036 module -- the module name 0037 name -- the name of the class 0038 file -- the file in which the class was defined 0039 lineno -- the line in the file on which the class statement occurred 0040 """ 0041 0042 import sys 0043 import imp 0044 import tokenize # Python tokenizer 0045 from token import NAME, DEDENT, NEWLINE 0046 from operator import itemgetter 0047 0048 __all__ = ["readmodule", "readmodule_ex", "Class", "Function"] 0049 0050 _modules = {} # cache of modules we've seen 0051 0052 # each Python class is represented by an instance of this class 0053 class Class: 0054 '''Class to represent a Python class.''' 0055 def __init__(self, module, name, super, file, lineno): 0056 self.module = module 0057 self.name = name 0058 if super is None: 0059 super = [] 0060 self.super = super 0061 self.methods = {} 0062 self.file = file 0063 self.lineno = lineno 0064 0065 def _addmethod(self, name, lineno): 0066 self.methods[name] = lineno 0067 0068 class Function: 0069 '''Class to represent a top-level Python function''' 0070 def __init__(self, module, name, file, lineno): 0071 self.module = module 0072 self.name = name 0073 self.file = file 0074 self.lineno = lineno 0075 0076 def readmodule(module, path=[]): 0077 '''Backwards compatible interface. 0078 0079 Call readmodule_ex() and then only keep Class objects from the 0080 resulting dictionary.''' 0081 0082 dict = _readmodule(module, path) 0083 res = {} 0084 for key, value in dict.items(): 0085 if isinstance(value, Class): 0086 res[key] = value 0087 return res 0088 0089 def readmodule_ex(module, path=[]): 0090 '''Read a module file and return a dictionary of classes. 0091 0092 Search for MODULE in PATH and sys.path, read and parse the 0093 module and return a dictionary with one entry for each class 0094 found in the module. 0095 0096 If INPACKAGE is true, it must be the dotted name of the package in 0097 which we are searching for a submodule, and then PATH must be the 0098 package search path; otherwise, we are searching for a top-level 0099 module, and PATH is combined with sys.path. 0100 ''' 0101 return _readmodule(module, path) 0102 0103 def _readmodule(module, path, inpackage=None): 0104 '''Do the hard work for readmodule[_ex].''' 0105 # Compute the full module name (prepending inpackage if set) 0106 if inpackage: 0107 fullmodule = "%s.%s" % (inpackage, module) 0108 else: 0109 fullmodule = module 0110 0111 # Check in the cache 0112 if fullmodule in _modules: 0113 return _modules[fullmodule] 0114 0115 # Initialize the dict for this module's contents 0116 dict = {} 0117 0118 # Check if it is a built-in module; we don't do much for these 0119 if module in sys.builtin_module_names and not inpackage: 0120 _modules[module] = dict 0121 return dict 0122 0123 # Check for a dotted module name 0124 i = module.rfind('.') 0125 if i >= 0: 0126 package = module[:i] 0127 submodule = module[i+1:] 0128 parent = _readmodule(package, path, inpackage) 0129 if inpackage: 0130 package = "%s.%s" % (inpackage, package) 0131 return _readmodule(submodule, parent['__path__'], package) 0132 0133 # Search the path for the module 0134 f = None 0135 if inpackage: 0136 f, file, (suff, mode, type) = imp.find_module(module, path) 0137 else: 0138 f, file, (suff, mode, type) = imp.find_module(module, path + sys.path) 0139 if type == imp.PKG_DIRECTORY: 0140 dict['__path__'] = [file] 0141 path = [file] + path 0142 f, file, (suff, mode, type) = imp.find_module('__init__', [file]) 0143 _modules[fullmodule] = dict 0144 if type != imp.PY_SOURCE: 0145 # not Python source, can't do anything with this module 0146 f.close() 0147 return dict 0148 0149 stack = [] # stack of (class, indent) pairs 0150 0151 g = tokenize.generate_tokens(f.readline) 0152 try: 0153 for tokentype, token, start, end, line in g: 0154 if tokentype == DEDENT: 0155 lineno, thisindent = start 0156 # close nested classes and defs 0157 while stack and stack[-1][1] >= thisindent: 0158 del stack[-1] 0159 elif token == 'def': 0160 lineno, thisindent = start 0161 # close previous nested classes and defs 0162 while stack and stack[-1][1] >= thisindent: 0163 del stack[-1] 0164 tokentype, meth_name, start, end, line = g.next() 0165 if tokentype != NAME: 0166 continue # Syntax error 0167 if stack: 0168 cur_class = stack[-1][0] 0169 if isinstance(cur_class, Class): 0170 # it's a method 0171 cur_class._addmethod(meth_name, lineno) 0172 # else it's a nested def 0173 else: 0174 # it's a function 0175 dict[meth_name] = Function(module, meth_name, file, lineno) 0176 stack.append((None, thisindent)) # Marker for nested fns 0177 elif token == 'class': 0178 lineno, thisindent = start 0179 # close previous nested classes and defs 0180 while stack and stack[-1][1] >= thisindent: 0181 del stack[-1] 0182 tokentype, class_name, start, end, line = g.next() 0183 if tokentype != NAME: 0184 continue # Syntax error 0185 # parse what follows the class name 0186 tokentype, token, start, end, line = g.next() 0187 inherit = None 0188 if token == '(': 0189 names = [] # List of superclasses 0190 # there's a list of superclasses 0191 level = 1 0192 super = [] # Tokens making up current superclass 0193 while True: 0194 tokentype, token, start, end, line = g.next() 0195 if token in (')', ',') and level == 1: 0196 n = "".join(super) 0197 if n in dict: 0198 # we know this super class 0199 n = dict[n] 0200 else: 0201 c = n.split('.') 0202 if len(c) > 1: 0203 # super class is of the form 0204 # module.class: look in module for 0205 # class 0206 m = c[-2] 0207 c = c[-1] 0208 if m in _modules: 0209 d = _modules[m] 0210 if c in d: 0211 n = d[c] 0212 names.append(n) 0213 super = [] 0214 if token == '(': 0215 level += 1 0216 elif token == ')': 0217 level -= 1 0218 if level == 0: 0219 break 0220 elif token == ',' and level == 1: 0221 pass 0222 else: 0223 super.append(token) 0224 inherit = names 0225 cur_class = Class(fullmodule, class_name, inherit, file, lineno) 0226 if not stack: 0227 dict[class_name] = cur_class 0228 stack.append((cur_class, thisindent)) 0229 elif token == 'import' and start[1] == 0: 0230 modules = _getnamelist(g) 0231 for mod, mod2 in modules: 0232 try: 0233 # Recursively read the imported module 0234 if not inpackage: 0235 _readmodule(mod, path) 0236 else: 0237 try: 0238 _readmodule(mod, path, inpackage) 0239 except ImportError: 0240 _readmodule(mod, []) 0241 except: 0242 # If we can't find or parse the imported module, 0243 # too bad -- don't die here. 0244 pass 0245 elif token == 'from' and start[1] == 0: 0246 mod, token = _getname(g) 0247 if not mod or token != "import": 0248 continue 0249 names = _getnamelist(g) 0250 try: 0251 # Recursively read the imported module 0252 d = _readmodule(mod, path, inpackage) 0253 except: 0254 # If we can't find or parse the imported module, 0255 # too bad -- don't die here. 0256 continue 0257 # add any classes that were defined in the imported module 0258 # to our name space if they were mentioned in the list 0259 for n, n2 in names: 0260 if n in d: 0261 dict[n2 or n] = d[n] 0262 elif n == '*': 0263 # don't add names that start with _ 0264 for n in d: 0265 if n[0] != '_': 0266 dict[n] = d[n] 0267 except StopIteration: 0268 pass 0269 0270 f.close() 0271 return dict 0272 0273 def _getnamelist(g): 0274 # Helper to get a comma-separated list of dotted names plus 'as' 0275 # clauses. Return a list of pairs (name, name2) where name2 is 0276 # the 'as' name, or None if there is no 'as' clause. 0277 names = [] 0278 while True: 0279 name, token = _getname(g) 0280 if not name: 0281 break 0282 if token == 'as': 0283 name2, token = _getname(g) 0284 else: 0285 name2 = None 0286 names.append((name, name2)) 0287 while token != "," and "\n" not in token: 0288 tokentype, token, start, end, line = g.next() 0289 if token != ",": 0290 break 0291 return names 0292 0293 def _getname(g): 0294 # Helper to get a dotted name, return a pair (name, token) where 0295 # name is the dotted name, or None if there was no dotted name, 0296 # and token is the next input token. 0297 parts = [] 0298 tokentype, token, start, end, line = g.next() 0299 if tokentype != NAME and token != '*': 0300 return (None, token) 0301 parts.append(token) 0302 while True: 0303 tokentype, token, start, end, line = g.next() 0304 if token != '.': 0305 break 0306 tokentype, token, start, end, line = g.next() 0307 if tokentype != NAME: 0308 break 0309 parts.append(token) 0310 return (".".join(parts), token) 0311 0312 def _main(): 0313 # Main program for testing. 0314 import os 0315 mod = sys.argv[1] 0316 if os.path.exists(mod): 0317 path = [os.path.dirname(mod)] 0318 mod = os.path.basename(mod) 0319 if mod.lower().endswith(".py"): 0320 mod = mod[:-3] 0321 else: 0322 path = [] 0323 dict = readmodule_ex(mod, path) 0324 objs = dict.values() 0325 objs.sort(lambda a, b: cmp(getattr(a, 'lineno', 0), 0326 getattr(b, 'lineno', 0))) 0327 for obj in objs: 0328 if isinstance(obj, Class): 0329 print "class", obj.name, obj.super, obj.lineno 0330 methods = sorted(obj.methods.iteritems(), key=itemgetter(1)) 0331 for name, lineno in methods: 0332 if name != "__path__": 0333 print " def", name, lineno 0334 elif isinstance(obj, Function): 0335 print "def", obj.name, obj.lineno 0336 0337 if __name__ == "__main__": 0338 _main() 0339
Generated by PyXR 0.9.4