PyXR

c:\python24\lib \ tabnanny.py


0001 #! /usr/bin/env python
0002 
0003 """The Tab Nanny despises ambiguous indentation.  She knows no mercy.
0004 
0005 tabnanny -- Detection of ambiguous indentation
0006 
0007 For the time being this module is intended to be called as a script.
0008 However it is possible to import it into an IDE and use the function
0009 check() described below.
0010 
0011 Warning: The API provided by this module is likely to change in future
0012 releases; such changes may not be backward compatible.
0013 """
0014 
0015 # Released to the public domain, by Tim Peters, 15 April 1998.
0016 
0017 # XXX Note: this is now a standard library module.
0018 # XXX The API needs to undergo changes however; the current code is too
0019 # XXX script-like.  This will be addressed later.
0020 
0021 __version__ = "6"
0022 
0023 import os
0024 import sys
0025 import getopt
0026 import tokenize
0027 if not hasattr(tokenize, 'NL'):
0028     raise ValueError("tokenize.NL doesn't exist -- tokenize module too old")
0029 
0030 __all__ = ["check", "NannyNag", "process_tokens"]
0031 
0032 verbose = 0
0033 filename_only = 0
0034 
0035 def errprint(*args):
0036     sep = ""
0037     for arg in args:
0038         sys.stderr.write(sep + str(arg))
0039         sep = " "
0040     sys.stderr.write("\n")
0041 
0042 def main():
0043     global verbose, filename_only
0044     try:
0045         opts, args = getopt.getopt(sys.argv[1:], "qv")
0046     except getopt.error, msg:
0047         errprint(msg)
0048         return
0049     for o, a in opts:
0050         if o == '-q':
0051             filename_only = filename_only + 1
0052         if o == '-v':
0053             verbose = verbose + 1
0054     if not args:
0055         errprint("Usage:", sys.argv[0], "[-v] file_or_directory ...")
0056         return
0057     for arg in args:
0058         check(arg)
0059 
0060 class NannyNag(Exception):
0061     """
0062     Raised by tokeneater() if detecting an ambiguous indent.
0063     Captured and handled in check().
0064     """
0065     def __init__(self, lineno, msg, line):
0066         self.lineno, self.msg, self.line = lineno, msg, line
0067     def get_lineno(self):
0068         return self.lineno
0069     def get_msg(self):
0070         return self.msg
0071     def get_line(self):
0072         return self.line
0073 
0074 def check(file):
0075     """check(file_or_dir)
0076 
0077     If file_or_dir is a directory and not a symbolic link, then recursively
0078     descend the directory tree named by file_or_dir, checking all .py files
0079     along the way. If file_or_dir is an ordinary Python source file, it is
0080     checked for whitespace related problems. The diagnostic messages are
0081     written to standard output using the print statement.
0082     """
0083 
0084     if os.path.isdir(file) and not os.path.islink(file):
0085         if verbose:
0086             print "%r: listing directory" % (file,)
0087         names = os.listdir(file)
0088         for name in names:
0089             fullname = os.path.join(file, name)
0090             if (os.path.isdir(fullname) and
0091                 not os.path.islink(fullname) or
0092                 os.path.normcase(name[-3:]) == ".py"):
0093                 check(fullname)
0094         return
0095 
0096     try:
0097         f = open(file)
0098     except IOError, msg:
0099         errprint("%r: I/O Error: %s" % (file, msg))
0100         return
0101 
0102     if verbose > 1:
0103         print "checking %r ..." % file
0104 
0105     try:
0106         process_tokens(tokenize.generate_tokens(f.readline))
0107 
0108     except tokenize.TokenError, msg:
0109         errprint("%r: Token Error: %s" % (file, msg))
0110         return
0111 
0112     except NannyNag, nag:
0113         badline = nag.get_lineno()
0114         line = nag.get_line()
0115         if verbose:
0116             print "%r: *** Line %d: trouble in tab city! ***" % (file, badline)
0117             print "offending line: %r" % (line,)
0118             print nag.get_msg()
0119         else:
0120             if ' ' in file: file = '"' + file + '"'
0121             if filename_only: print file
0122             else: print file, badline, repr(line)
0123         return
0124 
0125     if verbose:
0126         print "%r: Clean bill of health." % (file,)
0127 
0128 class Whitespace:
0129     # the characters used for space and tab
0130     S, T = ' \t'
0131 
0132     # members:
0133     #   raw
0134     #       the original string
0135     #   n
0136     #       the number of leading whitespace characters in raw
0137     #   nt
0138     #       the number of tabs in raw[:n]
0139     #   norm
0140     #       the normal form as a pair (count, trailing), where:
0141     #       count
0142     #           a tuple such that raw[:n] contains count[i]
0143     #           instances of S * i + T
0144     #       trailing
0145     #           the number of trailing spaces in raw[:n]
0146     #       It's A Theorem that m.indent_level(t) ==
0147     #       n.indent_level(t) for all t >= 1 iff m.norm == n.norm.
0148     #   is_simple
0149     #       true iff raw[:n] is of the form (T*)(S*)
0150 
0151     def __init__(self, ws):
0152         self.raw  = ws
0153         S, T = Whitespace.S, Whitespace.T
0154         count = []
0155         b = n = nt = 0
0156         for ch in self.raw:
0157             if ch == S:
0158                 n = n + 1
0159                 b = b + 1
0160             elif ch == T:
0161                 n = n + 1
0162                 nt = nt + 1
0163                 if b >= len(count):
0164                     count = count + [0] * (b - len(count) + 1)
0165                 count[b] = count[b] + 1
0166                 b = 0
0167             else:
0168                 break
0169         self.n    = n
0170         self.nt   = nt
0171         self.norm = tuple(count), b
0172         self.is_simple = len(count) <= 1
0173 
0174     # return length of longest contiguous run of spaces (whether or not
0175     # preceding a tab)
0176     def longest_run_of_spaces(self):
0177         count, trailing = self.norm
0178         return max(len(count)-1, trailing)
0179 
0180     def indent_level(self, tabsize):
0181         # count, il = self.norm
0182         # for i in range(len(count)):
0183         #    if count[i]:
0184         #        il = il + (i/tabsize + 1)*tabsize * count[i]
0185         # return il
0186 
0187         # quicker:
0188         # il = trailing + sum (i/ts + 1)*ts*count[i] =
0189         # trailing + ts * sum (i/ts + 1)*count[i] =
0190         # trailing + ts * sum i/ts*count[i] + count[i] =
0191         # trailing + ts * [(sum i/ts*count[i]) + (sum count[i])] =
0192         # trailing + ts * [(sum i/ts*count[i]) + num_tabs]
0193         # and note that i/ts*count[i] is 0 when i < ts
0194 
0195         count, trailing = self.norm
0196         il = 0
0197         for i in range(tabsize, len(count)):
0198             il = il + i/tabsize * count[i]
0199         return trailing + tabsize * (il + self.nt)
0200 
0201     # return true iff self.indent_level(t) == other.indent_level(t)
0202     # for all t >= 1
0203     def equal(self, other):
0204         return self.norm == other.norm
0205 
0206     # return a list of tuples (ts, i1, i2) such that
0207     # i1 == self.indent_level(ts) != other.indent_level(ts) == i2.
0208     # Intended to be used after not self.equal(other) is known, in which
0209     # case it will return at least one witnessing tab size.
0210     def not_equal_witness(self, other):
0211         n = max(self.longest_run_of_spaces(),
0212                 other.longest_run_of_spaces()) + 1
0213         a = []
0214         for ts in range(1, n+1):
0215             if self.indent_level(ts) != other.indent_level(ts):
0216                 a.append( (ts,
0217                            self.indent_level(ts),
0218                            other.indent_level(ts)) )
0219         return a
0220 
0221     # Return True iff self.indent_level(t) < other.indent_level(t)
0222     # for all t >= 1.
0223     # The algorithm is due to Vincent Broman.
0224     # Easy to prove it's correct.
0225     # XXXpost that.
0226     # Trivial to prove n is sharp (consider T vs ST).
0227     # Unknown whether there's a faster general way.  I suspected so at
0228     # first, but no longer.
0229     # For the special (but common!) case where M and N are both of the
0230     # form (T*)(S*), M.less(N) iff M.len() < N.len() and
0231     # M.num_tabs() <= N.num_tabs(). Proof is easy but kinda long-winded.
0232     # XXXwrite that up.
0233     # Note that M is of the form (T*)(S*) iff len(M.norm[0]) <= 1.
0234     def less(self, other):
0235         if self.n >= other.n:
0236             return False
0237         if self.is_simple and other.is_simple:
0238             return self.nt <= other.nt
0239         n = max(self.longest_run_of_spaces(),
0240                 other.longest_run_of_spaces()) + 1
0241         # the self.n >= other.n test already did it for ts=1
0242         for ts in range(2, n+1):
0243             if self.indent_level(ts) >= other.indent_level(ts):
0244                 return False
0245         return True
0246 
0247     # return a list of tuples (ts, i1, i2) such that
0248     # i1 == self.indent_level(ts) >= other.indent_level(ts) == i2.
0249     # Intended to be used after not self.less(other) is known, in which
0250     # case it will return at least one witnessing tab size.
0251     def not_less_witness(self, other):
0252         n = max(self.longest_run_of_spaces(),
0253                 other.longest_run_of_spaces()) + 1
0254         a = []
0255         for ts in range(1, n+1):
0256             if self.indent_level(ts) >= other.indent_level(ts):
0257                 a.append( (ts,
0258                            self.indent_level(ts),
0259                            other.indent_level(ts)) )
0260         return a
0261 
0262 def format_witnesses(w):
0263     firsts = map(lambda tup: str(tup[0]), w)
0264     prefix = "at tab size"
0265     if len(w) > 1:
0266         prefix = prefix + "s"
0267     return prefix + " " + ', '.join(firsts)
0268 
0269 def process_tokens(tokens):
0270     INDENT = tokenize.INDENT
0271     DEDENT = tokenize.DEDENT
0272     NEWLINE = tokenize.NEWLINE
0273     JUNK = tokenize.COMMENT, tokenize.NL
0274     indents = [Whitespace("")]
0275     check_equal = 0
0276 
0277     for (type, token, start, end, line) in tokens:
0278         if type == NEWLINE:
0279             # a program statement, or ENDMARKER, will eventually follow,
0280             # after some (possibly empty) run of tokens of the form
0281             #     (NL | COMMENT)* (INDENT | DEDENT+)?
0282             # If an INDENT appears, setting check_equal is wrong, and will
0283             # be undone when we see the INDENT.
0284             check_equal = 1
0285 
0286         elif type == INDENT:
0287             check_equal = 0
0288             thisguy = Whitespace(token)
0289             if not indents[-1].less(thisguy):
0290                 witness = indents[-1].not_less_witness(thisguy)
0291                 msg = "indent not greater e.g. " + format_witnesses(witness)
0292                 raise NannyNag(start[0], msg, line)
0293             indents.append(thisguy)
0294 
0295         elif type == DEDENT:
0296             # there's nothing we need to check here!  what's important is
0297             # that when the run of DEDENTs ends, the indentation of the
0298             # program statement (or ENDMARKER) that triggered the run is
0299             # equal to what's left at the top of the indents stack
0300 
0301             # Ouch!  This assert triggers if the last line of the source
0302             # is indented *and* lacks a newline -- then DEDENTs pop out
0303             # of thin air.
0304             # assert check_equal  # else no earlier NEWLINE, or an earlier INDENT
0305             check_equal = 1
0306 
0307             del indents[-1]
0308 
0309         elif check_equal and type not in JUNK:
0310             # this is the first "real token" following a NEWLINE, so it
0311             # must be the first token of the next program statement, or an
0312             # ENDMARKER; the "line" argument exposes the leading whitespace
0313             # for this statement; in the case of ENDMARKER, line is an empty
0314             # string, so will properly match the empty string with which the
0315             # "indents" stack was seeded
0316             check_equal = 0
0317             thisguy = Whitespace(line)
0318             if not indents[-1].equal(thisguy):
0319                 witness = indents[-1].not_equal_witness(thisguy)
0320                 msg = "indent not equal e.g. " + format_witnesses(witness)
0321                 raise NannyNag(start[0], msg, line)
0322 
0323 
0324 if __name__ == '__main__':
0325     main()
0326
Generated by PyXR 0.9.4