0001 """text_file 0002 0003 provides the TextFile class, which gives an interface to text files 0004 that (optionally) takes care of stripping comments, ignoring blank 0005 lines, and joining lines with backslashes.""" 0006 0007 __revision__ = "$Id: text_file.py,v 1.15 2002/11/14 02:25:41 akuchling Exp $" 0008 0009 from types import * 0010 import sys, os, string 0011 0012 0013 class TextFile: 0014 0015 """Provides a file-like object that takes care of all the things you 0016 commonly want to do when processing a text file that has some 0017 line-by-line syntax: strip comments (as long as "#" is your 0018 comment character), skip blank lines, join adjacent lines by 0019 escaping the newline (ie. backslash at end of line), strip 0020 leading and/or trailing whitespace. All of these are optional 0021 and independently controllable. 0022 0023 Provides a 'warn()' method so you can generate warning messages that 0024 report physical line number, even if the logical line in question 0025 spans multiple physical lines. Also provides 'unreadline()' for 0026 implementing line-at-a-time lookahead. 0027 0028 Constructor is called as: 0029 0030 TextFile (filename=None, file=None, **options) 0031 0032 It bombs (RuntimeError) if both 'filename' and 'file' are None; 0033 'filename' should be a string, and 'file' a file object (or 0034 something that provides 'readline()' and 'close()' methods). It is 0035 recommended that you supply at least 'filename', so that TextFile 0036 can include it in warning messages. If 'file' is not supplied, 0037 TextFile creates its own using the 'open()' builtin. 0038 0039 The options are all boolean, and affect the value returned by 0040 'readline()': 0041 strip_comments [default: true] 0042 strip from "#" to end-of-line, as well as any whitespace 0043 leading up to the "#" -- unless it is escaped by a backslash 0044 lstrip_ws [default: false] 0045 strip leading whitespace from each line before returning it 0046 rstrip_ws [default: true] 0047 strip trailing whitespace (including line terminator!) from 0048 each line before returning it 0049 skip_blanks [default: true} 0050 skip lines that are empty *after* stripping comments and 0051 whitespace. (If both lstrip_ws and rstrip_ws are false, 0052 then some lines may consist of solely whitespace: these will 0053 *not* be skipped, even if 'skip_blanks' is true.) 0054 join_lines [default: false] 0055 if a backslash is the last non-newline character on a line 0056 after stripping comments and whitespace, join the following line 0057 to it to form one "logical line"; if N consecutive lines end 0058 with a backslash, then N+1 physical lines will be joined to 0059 form one logical line. 0060 collapse_join [default: false] 0061 strip leading whitespace from lines that are joined to their 0062 predecessor; only matters if (join_lines and not lstrip_ws) 0063 0064 Note that since 'rstrip_ws' can strip the trailing newline, the 0065 semantics of 'readline()' must differ from those of the builtin file 0066 object's 'readline()' method! In particular, 'readline()' returns 0067 None for end-of-file: an empty string might just be a blank line (or 0068 an all-whitespace line), if 'rstrip_ws' is true but 'skip_blanks' is 0069 not.""" 0070 0071 default_options = { 'strip_comments': 1, 0072 'skip_blanks': 1, 0073 'lstrip_ws': 0, 0074 'rstrip_ws': 1, 0075 'join_lines': 0, 0076 'collapse_join': 0, 0077 } 0078 0079 def __init__ (self, filename=None, file=None, **options): 0080 """Construct a new TextFile object. At least one of 'filename' 0081 (a string) and 'file' (a file-like object) must be supplied. 0082 They keyword argument options are described above and affect 0083 the values returned by 'readline()'.""" 0084 0085 if filename is None and file is None: 0086 raise RuntimeError, \ 0087 "you must supply either or both of 'filename' and 'file'" 0088 0089 # set values for all options -- either from client option hash 0090 # or fallback to default_options 0091 for opt in self.default_options.keys(): 0092 if options.has_key (opt): 0093 setattr (self, opt, options[opt]) 0094 0095 else: 0096 setattr (self, opt, self.default_options[opt]) 0097 0098 # sanity check client option hash 0099 for opt in options.keys(): 0100 if not self.default_options.has_key (opt): 0101 raise KeyError, "invalid TextFile option '%s'" % opt 0102 0103 if file is None: 0104 self.open (filename) 0105 else: 0106 self.filename = filename 0107 self.file = file 0108 self.current_line = 0 # assuming that file is at BOF! 0109 0110 # 'linebuf' is a stack of lines that will be emptied before we 0111 # actually read from the file; it's only populated by an 0112 # 'unreadline()' operation 0113 self.linebuf = [] 0114 0115 0116 def open (self, filename): 0117 """Open a new file named 'filename'. This overrides both the 0118 'filename' and 'file' arguments to the constructor.""" 0119 0120 self.filename = filename 0121 self.file = open (self.filename, 'r') 0122 self.current_line = 0 0123 0124 0125 def close (self): 0126 """Close the current file and forget everything we know about it 0127 (filename, current line number).""" 0128 0129 self.file.close () 0130 self.file = None 0131 self.filename = None 0132 self.current_line = None 0133 0134 0135 def gen_error (self, msg, line=None): 0136 outmsg = [] 0137 if line is None: 0138 line = self.current_line 0139 outmsg.append(self.filename + ", ") 0140 if type (line) in (ListType, TupleType): 0141 outmsg.append("lines %d-%d: " % tuple (line)) 0142 else: 0143 outmsg.append("line %d: " % line) 0144 outmsg.append(str(msg)) 0145 return string.join(outmsg, "") 0146 0147 0148 def error (self, msg, line=None): 0149 raise ValueError, "error: " + self.gen_error(msg, line) 0150 0151 def warn (self, msg, line=None): 0152 """Print (to stderr) a warning message tied to the current logical 0153 line in the current file. If the current logical line in the 0154 file spans multiple physical lines, the warning refers to the 0155 whole range, eg. "lines 3-5". If 'line' supplied, it overrides 0156 the current line number; it may be a list or tuple to indicate a 0157 range of physical lines, or an integer for a single physical 0158 line.""" 0159 sys.stderr.write("warning: " + self.gen_error(msg, line) + "\n") 0160 0161 0162 def readline (self): 0163 """Read and return a single logical line from the current file (or 0164 from an internal buffer if lines have previously been "unread" 0165 with 'unreadline()'). If the 'join_lines' option is true, this 0166 may involve reading multiple physical lines concatenated into a 0167 single string. Updates the current line number, so calling 0168 'warn()' after 'readline()' emits a warning about the physical 0169 line(s) just read. Returns None on end-of-file, since the empty 0170 string can occur if 'rstrip_ws' is true but 'strip_blanks' is 0171 not.""" 0172 0173 # If any "unread" lines waiting in 'linebuf', return the top 0174 # one. (We don't actually buffer read-ahead data -- lines only 0175 # get put in 'linebuf' if the client explicitly does an 0176 # 'unreadline()'. 0177 if self.linebuf: 0178 line = self.linebuf[-1] 0179 del self.linebuf[-1] 0180 return line 0181 0182 buildup_line = '' 0183 0184 while 1: 0185 # read the line, make it None if EOF 0186 line = self.file.readline() 0187 if line == '': line = None 0188 0189 if self.strip_comments and line: 0190 0191 # Look for the first "#" in the line. If none, never 0192 # mind. If we find one and it's the first character, or 0193 # is not preceded by "\", then it starts a comment -- 0194 # strip the comment, strip whitespace before it, and 0195 # carry on. Otherwise, it's just an escaped "#", so 0196 # unescape it (and any other escaped "#"'s that might be 0197 # lurking in there) and otherwise leave the line alone. 0198 0199 pos = string.find (line, "#") 0200 if pos == -1: # no "#" -- no comments 0201 pass 0202 0203 # It's definitely a comment -- either "#" is the first 0204 # character, or it's elsewhere and unescaped. 0205 elif pos == 0 or line[pos-1] != "\\": 0206 # Have to preserve the trailing newline, because it's 0207 # the job of a later step (rstrip_ws) to remove it -- 0208 # and if rstrip_ws is false, we'd better preserve it! 0209 # (NB. this means that if the final line is all comment 0210 # and has no trailing newline, we will think that it's 0211 # EOF; I think that's OK.) 0212 eol = (line[-1] == '\n') and '\n' or '' 0213 line = line[0:pos] + eol 0214 0215 # If all that's left is whitespace, then skip line 0216 # *now*, before we try to join it to 'buildup_line' -- 0217 # that way constructs like 0218 # hello \\ 0219 # # comment that should be ignored 0220 # there 0221 # result in "hello there". 0222 if string.strip(line) == "": 0223 continue 0224 0225 else: # it's an escaped "#" 0226 line = string.replace (line, "\\#", "#") 0227 0228 0229 # did previous line end with a backslash? then accumulate 0230 if self.join_lines and buildup_line: 0231 # oops: end of file 0232 if line is None: 0233 self.warn ("continuation line immediately precedes " 0234 "end-of-file") 0235 return buildup_line 0236 0237 if self.collapse_join: 0238 line = string.lstrip (line) 0239 line = buildup_line + line 0240 0241 # careful: pay attention to line number when incrementing it 0242 if type (self.current_line) is ListType: 0243 self.current_line[1] = self.current_line[1] + 1 0244 else: 0245 self.current_line = [self.current_line, 0246 self.current_line+1] 0247 # just an ordinary line, read it as usual 0248 else: 0249 if line is None: # eof 0250 return None 0251 0252 # still have to be careful about incrementing the line number! 0253 if type (self.current_line) is ListType: 0254 self.current_line = self.current_line[1] + 1 0255 else: 0256 self.current_line = self.current_line + 1 0257 0258 0259 # strip whitespace however the client wants (leading and 0260 # trailing, or one or the other, or neither) 0261 if self.lstrip_ws and self.rstrip_ws: 0262 line = string.strip (line) 0263 elif self.lstrip_ws: 0264 line = string.lstrip (line) 0265 elif self.rstrip_ws: 0266 line = string.rstrip (line) 0267 0268 # blank line (whether we rstrip'ed or not)? skip to next line 0269 # if appropriate 0270 if (line == '' or line == '\n') and self.skip_blanks: 0271 continue 0272 0273 if self.join_lines: 0274 if line[-1] == '\\': 0275 buildup_line = line[:-1] 0276 continue 0277 0278 if line[-2:] == '\\\n': 0279 buildup_line = line[0:-2] + '\n' 0280 continue 0281 0282 # well, I guess there's some actual content there: return it 0283 return line 0284 0285 # readline () 0286 0287 0288 def readlines (self): 0289 """Read and return the list of all logical lines remaining in the 0290 current file.""" 0291 0292 lines = [] 0293 while 1: 0294 line = self.readline() 0295 if line is None: 0296 return lines 0297 lines.append (line) 0298 0299 0300 def unreadline (self, line): 0301 """Push 'line' (a string) onto an internal buffer that will be 0302 checked by future 'readline()' calls. Handy for implementing 0303 a parser with line-at-a-time lookahead.""" 0304 0305 self.linebuf.append (line) 0306 0307 0308 if __name__ == "__main__": 0309 test_data = """# test file 0310 0311 line 3 \\ 0312 # intervening comment 0313 continues on next line 0314 """ 0315 # result 1: no fancy options 0316 result1 = map (lambda x: x + "\n", string.split (test_data, "\n")[0:-1]) 0317 0318 # result 2: just strip comments 0319 result2 = ["\n", 0320 "line 3 \\\n", 0321 " continues on next line\n"] 0322 0323 # result 3: just strip blank lines 0324 result3 = ["# test file\n", 0325 "line 3 \\\n", 0326 "# intervening comment\n", 0327 " continues on next line\n"] 0328 0329 # result 4: default, strip comments, blank lines, and trailing whitespace 0330 result4 = ["line 3 \\", 0331 " continues on next line"] 0332 0333 # result 5: strip comments and blanks, plus join lines (but don't 0334 # "collapse" joined lines 0335 result5 = ["line 3 continues on next line"] 0336 0337 # result 6: strip comments and blanks, plus join lines (and 0338 # "collapse" joined lines 0339 result6 = ["line 3 continues on next line"] 0340 0341 def test_input (count, description, file, expected_result): 0342 result = file.readlines () 0343 # result = string.join (result, '') 0344 if result == expected_result: 0345 print "ok %d (%s)" % (count, description) 0346 else: 0347 print "not ok %d (%s):" % (count, description) 0348 print "** expected:" 0349 print expected_result 0350 print "** received:" 0351 print result 0352 0353 0354 filename = "test.txt" 0355 out_file = open (filename, "w") 0356 out_file.write (test_data) 0357 out_file.close () 0358 0359 in_file = TextFile (filename, strip_comments=0, skip_blanks=0, 0360 lstrip_ws=0, rstrip_ws=0) 0361 test_input (1, "no processing", in_file, result1) 0362 0363 in_file = TextFile (filename, strip_comments=1, skip_blanks=0, 0364 lstrip_ws=0, rstrip_ws=0) 0365 test_input (2, "strip comments", in_file, result2) 0366 0367 in_file = TextFile (filename, strip_comments=0, skip_blanks=1, 0368 lstrip_ws=0, rstrip_ws=0) 0369 test_input (3, "strip blanks", in_file, result3) 0370 0371 in_file = TextFile (filename) 0372 test_input (4, "default processing", in_file, result4) 0373 0374 in_file = TextFile (filename, strip_comments=1, skip_blanks=1, 0375 join_lines=1, rstrip_ws=1) 0376 test_input (5, "join lines without collapsing", in_file, result5) 0377 0378 in_file = TextFile (filename, strip_comments=1, skip_blanks=1, 0379 join_lines=1, rstrip_ws=1, collapse_join=1) 0380 test_input (6, "join lines with collapsing", in_file, result6) 0381 0382 os.remove (filename) 0383
Generated by PyXR 0.9.4