PyXR

c:\python24\lib \ pyclbr.py



0001 """Parse a Python module and describe its classes and methods.
0002 
0003 Parse enough of a Python file to recognize imports and class and
0004 method definitions, and to find out the superclasses of a class.
0005 
0006 The interface consists of a single function:
0007         readmodule_ex(module [, path])
0008 where module is the name of a Python module, and path is an optional
0009 list of directories where the module is to be searched.  If present,
0010 path is prepended to the system search path sys.path.  The return
0011 value is a dictionary.  The keys of the dictionary are the names of
0012 the classes defined in the module (including classes that are defined
0013 via the from XXX import YYY construct).  The values are class
0014 instances of the class Class defined here.  One special key/value pair
0015 is present for packages: the key '__path__' has a list as its value
0016 which contains the package search path.
0017 
0018 A class is described by the class Class in this module.  Instances
0019 of this class have the following instance variables:
0020         module -- the module name
0021         name -- the name of the class
0022         super -- a list of super classes (Class instances)
0023         methods -- a dictionary of methods
0024         file -- the file in which the class was defined
0025         lineno -- the line in the file on which the class statement occurred
0026 The dictionary of methods uses the method names as keys and the line
0027 numbers on which the method was defined as values.
0028 If the name of a super class is not recognized, the corresponding
0029 entry in the list of super classes is not a class instance but a
0030 string giving the name of the super class.  Since import statements
0031 are recognized and imported modules are scanned as well, this
0032 shouldn't happen often.
0033 
0034 A function is described by the class Function in this module.
0035 Instances of this class have the following instance variables:
0036         module -- the module name
0037         name -- the name of the class
0038         file -- the file in which the class was defined
0039         lineno -- the line in the file on which the class statement occurred
0040 """
0041 
0042 import sys
0043 import imp
0044 import tokenize # Python tokenizer
0045 from token import NAME, DEDENT, NEWLINE
0046 from operator import itemgetter
0047 
0048 __all__ = ["readmodule", "readmodule_ex", "Class", "Function"]
0049 
0050 _modules = {}                           # cache of modules we've seen
0051 
0052 # each Python class is represented by an instance of this class
0053 class Class:
0054     '''Class to represent a Python class.'''
0055     def __init__(self, module, name, super, file, lineno):
0056         self.module = module
0057         self.name = name
0058         if super is None:
0059             super = []
0060         self.super = super
0061         self.methods = {}
0062         self.file = file
0063         self.lineno = lineno
0064 
0065     def _addmethod(self, name, lineno):
0066         self.methods[name] = lineno
0067 
0068 class Function:
0069     '''Class to represent a top-level Python function'''
0070     def __init__(self, module, name, file, lineno):
0071         self.module = module
0072         self.name = name
0073         self.file = file
0074         self.lineno = lineno
0075 
0076 def readmodule(module, path=[]):
0077     '''Backwards compatible interface.
0078 
0079     Call readmodule_ex() and then only keep Class objects from the
0080     resulting dictionary.'''
0081 
0082     dict = _readmodule(module, path)
0083     res = {}
0084     for key, value in dict.items():
0085         if isinstance(value, Class):
0086             res[key] = value
0087     return res
0088 
0089 def readmodule_ex(module, path=[]):
0090     '''Read a module file and return a dictionary of classes.
0091 
0092     Search for MODULE in PATH and sys.path, read and parse the
0093     module and return a dictionary with one entry for each class
0094     found in the module.
0095 
0096     If INPACKAGE is true, it must be the dotted name of the package in
0097     which we are searching for a submodule, and then PATH must be the
0098     package search path; otherwise, we are searching for a top-level
0099     module, and PATH is combined with sys.path.
0100     '''
0101     return _readmodule(module, path)
0102 
0103 def _readmodule(module, path, inpackage=None):
0104     '''Do the hard work for readmodule[_ex].'''
0105     # Compute the full module name (prepending inpackage if set)
0106     if inpackage:
0107         fullmodule = "%s.%s" % (inpackage, module)
0108     else:
0109         fullmodule = module
0110 
0111     # Check in the cache
0112     if fullmodule in _modules:
0113         return _modules[fullmodule]
0114 
0115     # Initialize the dict for this module's contents
0116     dict = {}
0117 
0118     # Check if it is a built-in module; we don't do much for these
0119     if module in sys.builtin_module_names and not inpackage:
0120         _modules[module] = dict
0121         return dict
0122 
0123     # Check for a dotted module name
0124     i = module.rfind('.')
0125     if i >= 0:
0126         package = module[:i]
0127         submodule = module[i+1:]
0128         parent = _readmodule(package, path, inpackage)
0129         if inpackage:
0130             package = "%s.%s" % (inpackage, package)
0131         return _readmodule(submodule, parent['__path__'], package)
0132 
0133     # Search the path for the module
0134     f = None
0135     if inpackage:
0136         f, file, (suff, mode, type) = imp.find_module(module, path)
0137     else:
0138         f, file, (suff, mode, type) = imp.find_module(module, path + sys.path)
0139     if type == imp.PKG_DIRECTORY:
0140         dict['__path__'] = [file]
0141         path = [file] + path
0142         f, file, (suff, mode, type) = imp.find_module('__init__', [file])
0143     _modules[fullmodule] = dict
0144     if type != imp.PY_SOURCE:
0145         # not Python source, can't do anything with this module
0146         f.close()
0147         return dict
0148 
0149     stack = [] # stack of (class, indent) pairs
0150 
0151     g = tokenize.generate_tokens(f.readline)
0152     try:
0153         for tokentype, token, start, end, line in g:
0154             if tokentype == DEDENT:
0155                 lineno, thisindent = start
0156                 # close nested classes and defs
0157                 while stack and stack[-1][1] >= thisindent:
0158                     del stack[-1]
0159             elif token == 'def':
0160                 lineno, thisindent = start
0161                 # close previous nested classes and defs
0162                 while stack and stack[-1][1] >= thisindent:
0163                     del stack[-1]
0164                 tokentype, meth_name, start, end, line = g.next()
0165                 if tokentype != NAME:
0166                     continue # Syntax error
0167                 if stack:
0168                     cur_class = stack[-1][0]
0169                     if isinstance(cur_class, Class):
0170                         # it's a method
0171                         cur_class._addmethod(meth_name, lineno)
0172                     # else it's a nested def
0173                 else:
0174                     # it's a function
0175                     dict[meth_name] = Function(module, meth_name, file, lineno)
0176                 stack.append((None, thisindent)) # Marker for nested fns
0177             elif token == 'class':
0178                 lineno, thisindent = start
0179                 # close previous nested classes and defs
0180                 while stack and stack[-1][1] >= thisindent:
0181                     del stack[-1]
0182                 tokentype, class_name, start, end, line = g.next()
0183                 if tokentype != NAME:
0184                     continue # Syntax error
0185                 # parse what follows the class name
0186                 tokentype, token, start, end, line = g.next()
0187                 inherit = None
0188                 if token == '(':
0189                     names = [] # List of superclasses
0190                     # there's a list of superclasses
0191                     level = 1
0192                     super = [] # Tokens making up current superclass
0193                     while True:
0194                         tokentype, token, start, end, line = g.next()
0195                         if token in (')', ',') and level == 1:
0196                             n = "".join(super)
0197                             if n in dict:
0198                                 # we know this super class
0199                                 n = dict[n]
0200                             else:
0201                                 c = n.split('.')
0202                                 if len(c) > 1:
0203                                     # super class is of the form
0204                                     # module.class: look in module for
0205                                     # class
0206                                     m = c[-2]
0207                                     c = c[-1]
0208                                     if m in _modules:
0209                                         d = _modules[m]
0210                                         if c in d:
0211                                             n = d[c]
0212                             names.append(n)
0213                             super = []
0214                         if token == '(':
0215                             level += 1
0216                         elif token == ')':
0217                             level -= 1
0218                             if level == 0:
0219                                 break
0220                         elif token == ',' and level == 1:
0221                             pass
0222                         else:
0223                             super.append(token)
0224                     inherit = names
0225                 cur_class = Class(fullmodule, class_name, inherit, file, lineno)
0226                 if not stack:
0227                     dict[class_name] = cur_class
0228                 stack.append((cur_class, thisindent))
0229             elif token == 'import' and start[1] == 0:
0230                 modules = _getnamelist(g)
0231                 for mod, mod2 in modules:
0232                     try:
0233                         # Recursively read the imported module
0234                         if not inpackage:
0235                             _readmodule(mod, path)
0236                         else:
0237                             try:
0238                                 _readmodule(mod, path, inpackage)
0239                             except ImportError:
0240                                 _readmodule(mod, [])
0241                     except:
0242                         # If we can't find or parse the imported module,
0243                         # too bad -- don't die here.
0244                         pass
0245             elif token == 'from' and start[1] == 0:
0246                 mod, token = _getname(g)
0247                 if not mod or token != "import":
0248                     continue
0249                 names = _getnamelist(g)
0250                 try:
0251                     # Recursively read the imported module
0252                     d = _readmodule(mod, path, inpackage)
0253                 except:
0254                     # If we can't find or parse the imported module,
0255                     # too bad -- don't die here.
0256                     continue
0257                 # add any classes that were defined in the imported module
0258                 # to our name space if they were mentioned in the list
0259                 for n, n2 in names:
0260                     if n in d:
0261                         dict[n2 or n] = d[n]
0262                     elif n == '*':
0263                         # don't add names that start with _
0264                         for n in d:
0265                             if n[0] != '_':
0266                                 dict[n] = d[n]
0267     except StopIteration:
0268         pass
0269 
0270     f.close()
0271     return dict
0272 
0273 def _getnamelist(g):
0274     # Helper to get a comma-separated list of dotted names plus 'as'
0275     # clauses.  Return a list of pairs (name, name2) where name2 is
0276     # the 'as' name, or None if there is no 'as' clause.
0277     names = []
0278     while True:
0279         name, token = _getname(g)
0280         if not name:
0281             break
0282         if token == 'as':
0283             name2, token = _getname(g)
0284         else:
0285             name2 = None
0286         names.append((name, name2))
0287         while token != "," and "\n" not in token:
0288             tokentype, token, start, end, line = g.next()
0289         if token != ",":
0290             break
0291     return names
0292 
0293 def _getname(g):
0294     # Helper to get a dotted name, return a pair (name, token) where
0295     # name is the dotted name, or None if there was no dotted name,
0296     # and token is the next input token.
0297     parts = []
0298     tokentype, token, start, end, line = g.next()
0299     if tokentype != NAME and token != '*':
0300         return (None, token)
0301     parts.append(token)
0302     while True:
0303         tokentype, token, start, end, line = g.next()
0304         if token != '.':
0305             break
0306         tokentype, token, start, end, line = g.next()
0307         if tokentype != NAME:
0308             break
0309         parts.append(token)
0310     return (".".join(parts), token)
0311 
0312 def _main():
0313     # Main program for testing.
0314     import os
0315     mod = sys.argv[1]
0316     if os.path.exists(mod):
0317         path = [os.path.dirname(mod)]
0318         mod = os.path.basename(mod)
0319         if mod.lower().endswith(".py"):
0320             mod = mod[:-3]
0321     else:
0322         path = []
0323     dict = readmodule_ex(mod, path)
0324     objs = dict.values()
0325     objs.sort(lambda a, b: cmp(getattr(a, 'lineno', 0),
0326                                getattr(b, 'lineno', 0)))
0327     for obj in objs:
0328         if isinstance(obj, Class):
0329             print "class", obj.name, obj.super, obj.lineno
0330             methods = sorted(obj.methods.iteritems(), key=itemgetter(1))
0331             for name, lineno in methods:
0332                 if name != "__path__":
0333                     print "  def", name, lineno
0334         elif isinstance(obj, Function):
0335             print "def", obj.name, obj.lineno
0336 
0337 if __name__ == "__main__":
0338     _main()
0339 

Generated by PyXR 0.9.4
SourceForge.net Logo