0001 #! /usr/bin/env python 0002 0003 """The Tab Nanny despises ambiguous indentation. She knows no mercy. 0004 0005 tabnanny -- Detection of ambiguous indentation 0006 0007 For the time being this module is intended to be called as a script. 0008 However it is possible to import it into an IDE and use the function 0009 check() described below. 0010 0011 Warning: The API provided by this module is likely to change in future 0012 releases; such changes may not be backward compatible. 0013 """ 0014 0015 # Released to the public domain, by Tim Peters, 15 April 1998. 0016 0017 # XXX Note: this is now a standard library module. 0018 # XXX The API needs to undergo changes however; the current code is too 0019 # XXX script-like. This will be addressed later. 0020 0021 __version__ = "6" 0022 0023 import os 0024 import sys 0025 import getopt 0026 import tokenize 0027 if not hasattr(tokenize, 'NL'): 0028 raise ValueError("tokenize.NL doesn't exist -- tokenize module too old") 0029 0030 __all__ = ["check", "NannyNag", "process_tokens"] 0031 0032 verbose = 0 0033 filename_only = 0 0034 0035 def errprint(*args): 0036 sep = "" 0037 for arg in args: 0038 sys.stderr.write(sep + str(arg)) 0039 sep = " " 0040 sys.stderr.write("\n") 0041 0042 def main(): 0043 global verbose, filename_only 0044 try: 0045 opts, args = getopt.getopt(sys.argv[1:], "qv") 0046 except getopt.error, msg: 0047 errprint(msg) 0048 return 0049 for o, a in opts: 0050 if o == '-q': 0051 filename_only = filename_only + 1 0052 if o == '-v': 0053 verbose = verbose + 1 0054 if not args: 0055 errprint("Usage:", sys.argv[0], "[-v] file_or_directory ...") 0056 return 0057 for arg in args: 0058 check(arg) 0059 0060 class NannyNag(Exception): 0061 """ 0062 Raised by tokeneater() if detecting an ambiguous indent. 0063 Captured and handled in check(). 0064 """ 0065 def __init__(self, lineno, msg, line): 0066 self.lineno, self.msg, self.line = lineno, msg, line 0067 def get_lineno(self): 0068 return self.lineno 0069 def get_msg(self): 0070 return self.msg 0071 def get_line(self): 0072 return self.line 0073 0074 def check(file): 0075 """check(file_or_dir) 0076 0077 If file_or_dir is a directory and not a symbolic link, then recursively 0078 descend the directory tree named by file_or_dir, checking all .py files 0079 along the way. If file_or_dir is an ordinary Python source file, it is 0080 checked for whitespace related problems. The diagnostic messages are 0081 written to standard output using the print statement. 0082 """ 0083 0084 if os.path.isdir(file) and not os.path.islink(file): 0085 if verbose: 0086 print "%r: listing directory" % (file,) 0087 names = os.listdir(file) 0088 for name in names: 0089 fullname = os.path.join(file, name) 0090 if (os.path.isdir(fullname) and 0091 not os.path.islink(fullname) or 0092 os.path.normcase(name[-3:]) == ".py"): 0093 check(fullname) 0094 return 0095 0096 try: 0097 f = open(file) 0098 except IOError, msg: 0099 errprint("%r: I/O Error: %s" % (file, msg)) 0100 return 0101 0102 if verbose > 1: 0103 print "checking %r ..." % file 0104 0105 try: 0106 process_tokens(tokenize.generate_tokens(f.readline)) 0107 0108 except tokenize.TokenError, msg: 0109 errprint("%r: Token Error: %s" % (file, msg)) 0110 return 0111 0112 except NannyNag, nag: 0113 badline = nag.get_lineno() 0114 line = nag.get_line() 0115 if verbose: 0116 print "%r: *** Line %d: trouble in tab city! ***" % (file, badline) 0117 print "offending line: %r" % (line,) 0118 print nag.get_msg() 0119 else: 0120 if ' ' in file: file = '"' + file + '"' 0121 if filename_only: print file 0122 else: print file, badline, repr(line) 0123 return 0124 0125 if verbose: 0126 print "%r: Clean bill of health." % (file,) 0127 0128 class Whitespace: 0129 # the characters used for space and tab 0130 S, T = ' \t' 0131 0132 # members: 0133 # raw 0134 # the original string 0135 # n 0136 # the number of leading whitespace characters in raw 0137 # nt 0138 # the number of tabs in raw[:n] 0139 # norm 0140 # the normal form as a pair (count, trailing), where: 0141 # count 0142 # a tuple such that raw[:n] contains count[i] 0143 # instances of S * i + T 0144 # trailing 0145 # the number of trailing spaces in raw[:n] 0146 # It's A Theorem that m.indent_level(t) == 0147 # n.indent_level(t) for all t >= 1 iff m.norm == n.norm. 0148 # is_simple 0149 # true iff raw[:n] is of the form (T*)(S*) 0150 0151 def __init__(self, ws): 0152 self.raw = ws 0153 S, T = Whitespace.S, Whitespace.T 0154 count = [] 0155 b = n = nt = 0 0156 for ch in self.raw: 0157 if ch == S: 0158 n = n + 1 0159 b = b + 1 0160 elif ch == T: 0161 n = n + 1 0162 nt = nt + 1 0163 if b >= len(count): 0164 count = count + [0] * (b - len(count) + 1) 0165 count[b] = count[b] + 1 0166 b = 0 0167 else: 0168 break 0169 self.n = n 0170 self.nt = nt 0171 self.norm = tuple(count), b 0172 self.is_simple = len(count) <= 1 0173 0174 # return length of longest contiguous run of spaces (whether or not 0175 # preceding a tab) 0176 def longest_run_of_spaces(self): 0177 count, trailing = self.norm 0178 return max(len(count)-1, trailing) 0179 0180 def indent_level(self, tabsize): 0181 # count, il = self.norm 0182 # for i in range(len(count)): 0183 # if count[i]: 0184 # il = il + (i/tabsize + 1)*tabsize * count[i] 0185 # return il 0186 0187 # quicker: 0188 # il = trailing + sum (i/ts + 1)*ts*count[i] = 0189 # trailing + ts * sum (i/ts + 1)*count[i] = 0190 # trailing + ts * sum i/ts*count[i] + count[i] = 0191 # trailing + ts * [(sum i/ts*count[i]) + (sum count[i])] = 0192 # trailing + ts * [(sum i/ts*count[i]) + num_tabs] 0193 # and note that i/ts*count[i] is 0 when i < ts 0194 0195 count, trailing = self.norm 0196 il = 0 0197 for i in range(tabsize, len(count)): 0198 il = il + i/tabsize * count[i] 0199 return trailing + tabsize * (il + self.nt) 0200 0201 # return true iff self.indent_level(t) == other.indent_level(t) 0202 # for all t >= 1 0203 def equal(self, other): 0204 return self.norm == other.norm 0205 0206 # return a list of tuples (ts, i1, i2) such that 0207 # i1 == self.indent_level(ts) != other.indent_level(ts) == i2. 0208 # Intended to be used after not self.equal(other) is known, in which 0209 # case it will return at least one witnessing tab size. 0210 def not_equal_witness(self, other): 0211 n = max(self.longest_run_of_spaces(), 0212 other.longest_run_of_spaces()) + 1 0213 a = [] 0214 for ts in range(1, n+1): 0215 if self.indent_level(ts) != other.indent_level(ts): 0216 a.append( (ts, 0217 self.indent_level(ts), 0218 other.indent_level(ts)) ) 0219 return a 0220 0221 # Return True iff self.indent_level(t) < other.indent_level(t) 0222 # for all t >= 1. 0223 # The algorithm is due to Vincent Broman. 0224 # Easy to prove it's correct. 0225 # XXXpost that. 0226 # Trivial to prove n is sharp (consider T vs ST). 0227 # Unknown whether there's a faster general way. I suspected so at 0228 # first, but no longer. 0229 # For the special (but common!) case where M and N are both of the 0230 # form (T*)(S*), M.less(N) iff M.len() < N.len() and 0231 # M.num_tabs() <= N.num_tabs(). Proof is easy but kinda long-winded. 0232 # XXXwrite that up. 0233 # Note that M is of the form (T*)(S*) iff len(M.norm[0]) <= 1. 0234 def less(self, other): 0235 if self.n >= other.n: 0236 return False 0237 if self.is_simple and other.is_simple: 0238 return self.nt <= other.nt 0239 n = max(self.longest_run_of_spaces(), 0240 other.longest_run_of_spaces()) + 1 0241 # the self.n >= other.n test already did it for ts=1 0242 for ts in range(2, n+1): 0243 if self.indent_level(ts) >= other.indent_level(ts): 0244 return False 0245 return True 0246 0247 # return a list of tuples (ts, i1, i2) such that 0248 # i1 == self.indent_level(ts) >= other.indent_level(ts) == i2. 0249 # Intended to be used after not self.less(other) is known, in which 0250 # case it will return at least one witnessing tab size. 0251 def not_less_witness(self, other): 0252 n = max(self.longest_run_of_spaces(), 0253 other.longest_run_of_spaces()) + 1 0254 a = [] 0255 for ts in range(1, n+1): 0256 if self.indent_level(ts) >= other.indent_level(ts): 0257 a.append( (ts, 0258 self.indent_level(ts), 0259 other.indent_level(ts)) ) 0260 return a 0261 0262 def format_witnesses(w): 0263 firsts = map(lambda tup: str(tup[0]), w) 0264 prefix = "at tab size" 0265 if len(w) > 1: 0266 prefix = prefix + "s" 0267 return prefix + " " + ', '.join(firsts) 0268 0269 def process_tokens(tokens): 0270 INDENT = tokenize.INDENT 0271 DEDENT = tokenize.DEDENT 0272 NEWLINE = tokenize.NEWLINE 0273 JUNK = tokenize.COMMENT, tokenize.NL 0274 indents = [Whitespace("")] 0275 check_equal = 0 0276 0277 for (type, token, start, end, line) in tokens: 0278 if type == NEWLINE: 0279 # a program statement, or ENDMARKER, will eventually follow, 0280 # after some (possibly empty) run of tokens of the form 0281 # (NL | COMMENT)* (INDENT | DEDENT+)? 0282 # If an INDENT appears, setting check_equal is wrong, and will 0283 # be undone when we see the INDENT. 0284 check_equal = 1 0285 0286 elif type == INDENT: 0287 check_equal = 0 0288 thisguy = Whitespace(token) 0289 if not indents[-1].less(thisguy): 0290 witness = indents[-1].not_less_witness(thisguy) 0291 msg = "indent not greater e.g. " + format_witnesses(witness) 0292 raise NannyNag(start[0], msg, line) 0293 indents.append(thisguy) 0294 0295 elif type == DEDENT: 0296 # there's nothing we need to check here! what's important is 0297 # that when the run of DEDENTs ends, the indentation of the 0298 # program statement (or ENDMARKER) that triggered the run is 0299 # equal to what's left at the top of the indents stack 0300 0301 # Ouch! This assert triggers if the last line of the source 0302 # is indented *and* lacks a newline -- then DEDENTs pop out 0303 # of thin air. 0304 # assert check_equal # else no earlier NEWLINE, or an earlier INDENT 0305 check_equal = 1 0306 0307 del indents[-1] 0308 0309 elif check_equal and type not in JUNK: 0310 # this is the first "real token" following a NEWLINE, so it 0311 # must be the first token of the next program statement, or an 0312 # ENDMARKER; the "line" argument exposes the leading whitespace 0313 # for this statement; in the case of ENDMARKER, line is an empty 0314 # string, so will properly match the empty string with which the 0315 # "indents" stack was seeded 0316 check_equal = 0 0317 thisguy = Whitespace(line) 0318 if not indents[-1].equal(thisguy): 0319 witness = indents[-1].not_equal_witness(thisguy) 0320 msg = "indent not equal e.g. " + format_witnesses(witness) 0321 raise NannyNag(start[0], msg, line) 0322 0323 0324 if __name__ == '__main__': 0325 main() 0326
Generated by PyXR 0.9.4