PyXR

c:\python24\lib \ distutils \ text_file.py



0001 """text_file
0002 
0003 provides the TextFile class, which gives an interface to text files
0004 that (optionally) takes care of stripping comments, ignoring blank
0005 lines, and joining lines with backslashes."""
0006 
0007 __revision__ = "$Id: text_file.py,v 1.15 2002/11/14 02:25:41 akuchling Exp $"
0008 
0009 from types import *
0010 import sys, os, string
0011 
0012 
0013 class TextFile:
0014 
0015     """Provides a file-like object that takes care of all the things you
0016        commonly want to do when processing a text file that has some
0017        line-by-line syntax: strip comments (as long as "#" is your
0018        comment character), skip blank lines, join adjacent lines by
0019        escaping the newline (ie. backslash at end of line), strip
0020        leading and/or trailing whitespace.  All of these are optional
0021        and independently controllable.
0022 
0023        Provides a 'warn()' method so you can generate warning messages that
0024        report physical line number, even if the logical line in question
0025        spans multiple physical lines.  Also provides 'unreadline()' for
0026        implementing line-at-a-time lookahead.
0027 
0028        Constructor is called as:
0029 
0030            TextFile (filename=None, file=None, **options)
0031 
0032        It bombs (RuntimeError) if both 'filename' and 'file' are None;
0033        'filename' should be a string, and 'file' a file object (or
0034        something that provides 'readline()' and 'close()' methods).  It is
0035        recommended that you supply at least 'filename', so that TextFile
0036        can include it in warning messages.  If 'file' is not supplied,
0037        TextFile creates its own using the 'open()' builtin.
0038 
0039        The options are all boolean, and affect the value returned by
0040        'readline()':
0041          strip_comments [default: true]
0042            strip from "#" to end-of-line, as well as any whitespace
0043            leading up to the "#" -- unless it is escaped by a backslash
0044          lstrip_ws [default: false]
0045            strip leading whitespace from each line before returning it
0046          rstrip_ws [default: true]
0047            strip trailing whitespace (including line terminator!) from
0048            each line before returning it
0049          skip_blanks [default: true}
0050            skip lines that are empty *after* stripping comments and
0051            whitespace.  (If both lstrip_ws and rstrip_ws are false,
0052            then some lines may consist of solely whitespace: these will
0053            *not* be skipped, even if 'skip_blanks' is true.)
0054          join_lines [default: false]
0055            if a backslash is the last non-newline character on a line
0056            after stripping comments and whitespace, join the following line
0057            to it to form one "logical line"; if N consecutive lines end
0058            with a backslash, then N+1 physical lines will be joined to
0059            form one logical line.
0060          collapse_join [default: false]
0061            strip leading whitespace from lines that are joined to their
0062            predecessor; only matters if (join_lines and not lstrip_ws)
0063 
0064        Note that since 'rstrip_ws' can strip the trailing newline, the
0065        semantics of 'readline()' must differ from those of the builtin file
0066        object's 'readline()' method!  In particular, 'readline()' returns
0067        None for end-of-file: an empty string might just be a blank line (or
0068        an all-whitespace line), if 'rstrip_ws' is true but 'skip_blanks' is
0069        not."""
0070 
0071     default_options = { 'strip_comments': 1,
0072                         'skip_blanks':    1,
0073                         'lstrip_ws':      0,
0074                         'rstrip_ws':      1,
0075                         'join_lines':     0,
0076                         'collapse_join':  0,
0077                       }
0078 
0079     def __init__ (self, filename=None, file=None, **options):
0080         """Construct a new TextFile object.  At least one of 'filename'
0081            (a string) and 'file' (a file-like object) must be supplied.
0082            They keyword argument options are described above and affect
0083            the values returned by 'readline()'."""
0084 
0085         if filename is None and file is None:
0086             raise RuntimeError, \
0087                   "you must supply either or both of 'filename' and 'file'"
0088 
0089         # set values for all options -- either from client option hash
0090         # or fallback to default_options
0091         for opt in self.default_options.keys():
0092             if options.has_key (opt):
0093                 setattr (self, opt, options[opt])
0094 
0095             else:
0096                 setattr (self, opt, self.default_options[opt])
0097 
0098         # sanity check client option hash
0099         for opt in options.keys():
0100             if not self.default_options.has_key (opt):
0101                 raise KeyError, "invalid TextFile option '%s'" % opt
0102 
0103         if file is None:
0104             self.open (filename)
0105         else:
0106             self.filename = filename
0107             self.file = file
0108             self.current_line = 0       # assuming that file is at BOF!
0109 
0110         # 'linebuf' is a stack of lines that will be emptied before we
0111         # actually read from the file; it's only populated by an
0112         # 'unreadline()' operation
0113         self.linebuf = []
0114 
0115 
0116     def open (self, filename):
0117         """Open a new file named 'filename'.  This overrides both the
0118            'filename' and 'file' arguments to the constructor."""
0119 
0120         self.filename = filename
0121         self.file = open (self.filename, 'r')
0122         self.current_line = 0
0123 
0124 
0125     def close (self):
0126         """Close the current file and forget everything we know about it
0127            (filename, current line number)."""
0128 
0129         self.file.close ()
0130         self.file = None
0131         self.filename = None
0132         self.current_line = None
0133 
0134 
0135     def gen_error (self, msg, line=None):
0136         outmsg = []
0137         if line is None:
0138             line = self.current_line
0139         outmsg.append(self.filename + ", ")
0140         if type (line) in (ListType, TupleType):
0141             outmsg.append("lines %d-%d: " % tuple (line))
0142         else:
0143             outmsg.append("line %d: " % line)
0144         outmsg.append(str(msg))
0145         return string.join(outmsg, "")
0146 
0147 
0148     def error (self, msg, line=None):
0149         raise ValueError, "error: " + self.gen_error(msg, line)
0150 
0151     def warn (self, msg, line=None):
0152         """Print (to stderr) a warning message tied to the current logical
0153            line in the current file.  If the current logical line in the
0154            file spans multiple physical lines, the warning refers to the
0155            whole range, eg. "lines 3-5".  If 'line' supplied, it overrides
0156            the current line number; it may be a list or tuple to indicate a
0157            range of physical lines, or an integer for a single physical
0158            line."""
0159         sys.stderr.write("warning: " + self.gen_error(msg, line) + "\n")
0160 
0161 
0162     def readline (self):
0163         """Read and return a single logical line from the current file (or
0164            from an internal buffer if lines have previously been "unread"
0165            with 'unreadline()').  If the 'join_lines' option is true, this
0166            may involve reading multiple physical lines concatenated into a
0167            single string.  Updates the current line number, so calling
0168            'warn()' after 'readline()' emits a warning about the physical
0169            line(s) just read.  Returns None on end-of-file, since the empty
0170            string can occur if 'rstrip_ws' is true but 'strip_blanks' is
0171            not."""
0172 
0173         # If any "unread" lines waiting in 'linebuf', return the top
0174         # one.  (We don't actually buffer read-ahead data -- lines only
0175         # get put in 'linebuf' if the client explicitly does an
0176         # 'unreadline()'.
0177         if self.linebuf:
0178             line = self.linebuf[-1]
0179             del self.linebuf[-1]
0180             return line
0181 
0182         buildup_line = ''
0183 
0184         while 1:
0185             # read the line, make it None if EOF
0186             line = self.file.readline()
0187             if line == '': line = None
0188 
0189             if self.strip_comments and line:
0190 
0191                 # Look for the first "#" in the line.  If none, never
0192                 # mind.  If we find one and it's the first character, or
0193                 # is not preceded by "\", then it starts a comment --
0194                 # strip the comment, strip whitespace before it, and
0195                 # carry on.  Otherwise, it's just an escaped "#", so
0196                 # unescape it (and any other escaped "#"'s that might be
0197                 # lurking in there) and otherwise leave the line alone.
0198 
0199                 pos = string.find (line, "#")
0200                 if pos == -1:           # no "#" -- no comments
0201                     pass
0202 
0203                 # It's definitely a comment -- either "#" is the first
0204                 # character, or it's elsewhere and unescaped.
0205                 elif pos == 0 or line[pos-1] != "\\":
0206                     # Have to preserve the trailing newline, because it's
0207                     # the job of a later step (rstrip_ws) to remove it --
0208                     # and if rstrip_ws is false, we'd better preserve it!
0209                     # (NB. this means that if the final line is all comment
0210                     # and has no trailing newline, we will think that it's
0211                     # EOF; I think that's OK.)
0212                     eol = (line[-1] == '\n') and '\n' or ''
0213                     line = line[0:pos] + eol
0214 
0215                     # If all that's left is whitespace, then skip line
0216                     # *now*, before we try to join it to 'buildup_line' --
0217                     # that way constructs like
0218                     #   hello \\
0219                     #   # comment that should be ignored
0220                     #   there
0221                     # result in "hello there".
0222                     if string.strip(line) == "":
0223                         continue
0224 
0225                 else:                   # it's an escaped "#"
0226                     line = string.replace (line, "\\#", "#")
0227 
0228 
0229             # did previous line end with a backslash? then accumulate
0230             if self.join_lines and buildup_line:
0231                 # oops: end of file
0232                 if line is None:
0233                     self.warn ("continuation line immediately precedes "
0234                                "end-of-file")
0235                     return buildup_line
0236 
0237                 if self.collapse_join:
0238                     line = string.lstrip (line)
0239                 line = buildup_line + line
0240 
0241                 # careful: pay attention to line number when incrementing it
0242                 if type (self.current_line) is ListType:
0243                     self.current_line[1] = self.current_line[1] + 1
0244                 else:
0245                     self.current_line = [self.current_line,
0246                                          self.current_line+1]
0247             # just an ordinary line, read it as usual
0248             else:
0249                 if line is None:        # eof
0250                     return None
0251 
0252                 # still have to be careful about incrementing the line number!
0253                 if type (self.current_line) is ListType:
0254                     self.current_line = self.current_line[1] + 1
0255                 else:
0256                     self.current_line = self.current_line + 1
0257 
0258 
0259             # strip whitespace however the client wants (leading and
0260             # trailing, or one or the other, or neither)
0261             if self.lstrip_ws and self.rstrip_ws:
0262                 line = string.strip (line)
0263             elif self.lstrip_ws:
0264                 line = string.lstrip (line)
0265             elif self.rstrip_ws:
0266                 line = string.rstrip (line)
0267 
0268             # blank line (whether we rstrip'ed or not)? skip to next line
0269             # if appropriate
0270             if (line == '' or line == '\n') and self.skip_blanks:
0271                 continue
0272 
0273             if self.join_lines:
0274                 if line[-1] == '\\':
0275                     buildup_line = line[:-1]
0276                     continue
0277 
0278                 if line[-2:] == '\\\n':
0279                     buildup_line = line[0:-2] + '\n'
0280                     continue
0281 
0282             # well, I guess there's some actual content there: return it
0283             return line
0284 
0285     # readline ()
0286 
0287 
0288     def readlines (self):
0289         """Read and return the list of all logical lines remaining in the
0290            current file."""
0291 
0292         lines = []
0293         while 1:
0294             line = self.readline()
0295             if line is None:
0296                 return lines
0297             lines.append (line)
0298 
0299 
0300     def unreadline (self, line):
0301         """Push 'line' (a string) onto an internal buffer that will be
0302            checked by future 'readline()' calls.  Handy for implementing
0303            a parser with line-at-a-time lookahead."""
0304 
0305         self.linebuf.append (line)
0306 
0307 
0308 if __name__ == "__main__":
0309     test_data = """# test file
0310 
0311 line 3 \\
0312 # intervening comment
0313   continues on next line
0314 """
0315     # result 1: no fancy options
0316     result1 = map (lambda x: x + "\n", string.split (test_data, "\n")[0:-1])
0317 
0318     # result 2: just strip comments
0319     result2 = ["\n",
0320                "line 3 \\\n",
0321                "  continues on next line\n"]
0322 
0323     # result 3: just strip blank lines
0324     result3 = ["# test file\n",
0325                "line 3 \\\n",
0326                "# intervening comment\n",
0327                "  continues on next line\n"]
0328 
0329     # result 4: default, strip comments, blank lines, and trailing whitespace
0330     result4 = ["line 3 \\",
0331                "  continues on next line"]
0332 
0333     # result 5: strip comments and blanks, plus join lines (but don't
0334     # "collapse" joined lines
0335     result5 = ["line 3   continues on next line"]
0336 
0337     # result 6: strip comments and blanks, plus join lines (and
0338     # "collapse" joined lines
0339     result6 = ["line 3 continues on next line"]
0340 
0341     def test_input (count, description, file, expected_result):
0342         result = file.readlines ()
0343         # result = string.join (result, '')
0344         if result == expected_result:
0345             print "ok %d (%s)" % (count, description)
0346         else:
0347             print "not ok %d (%s):" % (count, description)
0348             print "** expected:"
0349             print expected_result
0350             print "** received:"
0351             print result
0352 
0353 
0354     filename = "test.txt"
0355     out_file = open (filename, "w")
0356     out_file.write (test_data)
0357     out_file.close ()
0358 
0359     in_file = TextFile (filename, strip_comments=0, skip_blanks=0,
0360                         lstrip_ws=0, rstrip_ws=0)
0361     test_input (1, "no processing", in_file, result1)
0362 
0363     in_file = TextFile (filename, strip_comments=1, skip_blanks=0,
0364                         lstrip_ws=0, rstrip_ws=0)
0365     test_input (2, "strip comments", in_file, result2)
0366 
0367     in_file = TextFile (filename, strip_comments=0, skip_blanks=1,
0368                         lstrip_ws=0, rstrip_ws=0)
0369     test_input (3, "strip blanks", in_file, result3)
0370 
0371     in_file = TextFile (filename)
0372     test_input (4, "default processing", in_file, result4)
0373 
0374     in_file = TextFile (filename, strip_comments=1, skip_blanks=1,
0375                         join_lines=1, rstrip_ws=1)
0376     test_input (5, "join lines without collapsing", in_file, result5)
0377 
0378     in_file = TextFile (filename, strip_comments=1, skip_blanks=1,
0379                         join_lines=1, rstrip_ws=1, collapse_join=1)
0380     test_input (6, "join lines with collapsing", in_file, result6)
0381 
0382     os.remove (filename)
0383 

Generated by PyXR 0.9.4
SourceForge.net Logo