PyXR

c:\python24\lib \ fileinput.py



0001 """Helper class to quickly write a loop over all standard input files.
0002 
0003 Typical use is:
0004 
0005     import fileinput
0006     for line in fileinput.input():
0007         process(line)
0008 
0009 This iterates over the lines of all files listed in sys.argv[1:],
0010 defaulting to sys.stdin if the list is empty.  If a filename is '-' it
0011 is also replaced by sys.stdin.  To specify an alternative list of
0012 filenames, pass it as the argument to input().  A single file name is
0013 also allowed.
0014 
0015 Functions filename(), lineno() return the filename and cumulative line
0016 number of the line that has just been read; filelineno() returns its
0017 line number in the current file; isfirstline() returns true iff the
0018 line just read is the first line of its file; isstdin() returns true
0019 iff the line was read from sys.stdin.  Function nextfile() closes the
0020 current file so that the next iteration will read the first line from
0021 the next file (if any); lines not read from the file will not count
0022 towards the cumulative line count; the filename is not changed until
0023 after the first line of the next file has been read.  Function close()
0024 closes the sequence.
0025 
0026 Before any lines have been read, filename() returns None and both line
0027 numbers are zero; nextfile() has no effect.  After all lines have been
0028 read, filename() and the line number functions return the values
0029 pertaining to the last line read; nextfile() has no effect.
0030 
0031 All files are opened in text mode.  If an I/O error occurs during
0032 opening or reading a file, the IOError exception is raised.
0033 
0034 If sys.stdin is used more than once, the second and further use will
0035 return no lines, except perhaps for interactive use, or if it has been
0036 explicitly reset (e.g. using sys.stdin.seek(0)).
0037 
0038 Empty files are opened and immediately closed; the only time their
0039 presence in the list of filenames is noticeable at all is when the
0040 last file opened is empty.
0041 
0042 It is possible that the last line of a file doesn't end in a newline
0043 character; otherwise lines are returned including the trailing
0044 newline.
0045 
0046 Class FileInput is the implementation; its methods filename(),
0047 lineno(), fileline(), isfirstline(), isstdin(), nextfile() and close()
0048 correspond to the functions in the module.  In addition it has a
0049 readline() method which returns the next input line, and a
0050 __getitem__() method which implements the sequence behavior.  The
0051 sequence must be accessed in strictly sequential order; sequence
0052 access and readline() cannot be mixed.
0053 
0054 Optional in-place filtering: if the keyword argument inplace=1 is
0055 passed to input() or to the FileInput constructor, the file is moved
0056 to a backup file and standard output is directed to the input file.
0057 This makes it possible to write a filter that rewrites its input file
0058 in place.  If the keyword argument backup=".<some extension>" is also
0059 given, it specifies the extension for the backup file, and the backup
0060 file remains around; by default, the extension is ".bak" and it is
0061 deleted when the output file is closed.  In-place filtering is
0062 disabled when standard input is read.  XXX The current implementation
0063 does not work for MS-DOS 8+3 filesystems.
0064 
0065 Performance: this module is unfortunately one of the slower ways of
0066 processing large numbers of input lines.  Nevertheless, a significant
0067 speed-up has been obtained by using readlines(bufsize) instead of
0068 readline().  A new keyword argument, bufsize=N, is present on the
0069 input() function and the FileInput() class to override the default
0070 buffer size.
0071 
0072 XXX Possible additions:
0073 
0074 - optional getopt argument processing
0075 - specify open mode ('r' or 'rb')
0076 - fileno()
0077 - isatty()
0078 - read(), read(size), even readlines()
0079 
0080 """
0081 
0082 import sys, os
0083 
0084 __all__ = ["input","close","nextfile","filename","lineno","filelineno",
0085            "isfirstline","isstdin","FileInput"]
0086 
0087 _state = None
0088 
0089 DEFAULT_BUFSIZE = 8*1024
0090 
0091 def input(files=None, inplace=0, backup="", bufsize=0):
0092     """input([files[, inplace[, backup]]])
0093 
0094     Create an instance of the FileInput class. The instance will be used
0095     as global state for the functions of this module, and is also returned
0096     to use during iteration. The parameters to this function will be passed
0097     along to the constructor of the FileInput class.
0098     """
0099     global _state
0100     if _state and _state._file:
0101         raise RuntimeError, "input() already active"
0102     _state = FileInput(files, inplace, backup, bufsize)
0103     return _state
0104 
0105 def close():
0106     """Close the sequence."""
0107     global _state
0108     state = _state
0109     _state = None
0110     if state:
0111         state.close()
0112 
0113 def nextfile():
0114     """
0115     Close the current file so that the next iteration will read the first
0116     line from the next file (if any); lines not read from the file will
0117     not count towards the cumulative line count. The filename is not
0118     changed until after the first line of the next file has been read.
0119     Before the first line has been read, this function has no effect;
0120     it cannot be used to skip the first file. After the last line of the
0121     last file has been read, this function has no effect.
0122     """
0123     if not _state:
0124         raise RuntimeError, "no active input()"
0125     return _state.nextfile()
0126 
0127 def filename():
0128     """
0129     Return the name of the file currently being read.
0130     Before the first line has been read, returns None.
0131     """
0132     if not _state:
0133         raise RuntimeError, "no active input()"
0134     return _state.filename()
0135 
0136 def lineno():
0137     """
0138     Return the cumulative line number of the line that has just been read.
0139     Before the first line has been read, returns 0. After the last line
0140     of the last file has been read, returns the line number of that line.
0141     """
0142     if not _state:
0143         raise RuntimeError, "no active input()"
0144     return _state.lineno()
0145 
0146 def filelineno():
0147     """
0148     Return the line number in the current file. Before the first line
0149     has been read, returns 0. After the last line of the last file has
0150     been read, returns the line number of that line within the file.
0151     """
0152     if not _state:
0153         raise RuntimeError, "no active input()"
0154     return _state.filelineno()
0155 
0156 def isfirstline():
0157     """
0158     Returns true the line just read is the first line of its file,
0159     otherwise returns false.
0160     """
0161     if not _state:
0162         raise RuntimeError, "no active input()"
0163     return _state.isfirstline()
0164 
0165 def isstdin():
0166     """
0167     Returns true if the last line was read from sys.stdin,
0168     otherwise returns false.
0169     """
0170     if not _state:
0171         raise RuntimeError, "no active input()"
0172     return _state.isstdin()
0173 
0174 class FileInput:
0175     """class FileInput([files[, inplace[, backup]]])
0176 
0177     Class FileInput is the implementation of the module; its methods
0178     filename(), lineno(), fileline(), isfirstline(), isstdin(), nextfile()
0179     and close() correspond to the functions of the same name in the module.
0180     In addition it has a readline() method which returns the next
0181     input line, and a __getitem__() method which implements the
0182     sequence behavior. The sequence must be accessed in strictly
0183     sequential order; random access and readline() cannot be mixed.
0184     """
0185 
0186     def __init__(self, files=None, inplace=0, backup="", bufsize=0):
0187         if type(files) == type(''):
0188             files = (files,)
0189         else:
0190             if files is None:
0191                 files = sys.argv[1:]
0192             if not files:
0193                 files = ('-',)
0194             else:
0195                 files = tuple(files)
0196         self._files = files
0197         self._inplace = inplace
0198         self._backup = backup
0199         self._bufsize = bufsize or DEFAULT_BUFSIZE
0200         self._savestdout = None
0201         self._output = None
0202         self._filename = None
0203         self._lineno = 0
0204         self._filelineno = 0
0205         self._file = None
0206         self._isstdin = False
0207         self._backupfilename = None
0208         self._buffer = []
0209         self._bufindex = 0
0210 
0211     def __del__(self):
0212         self.close()
0213 
0214     def close(self):
0215         self.nextfile()
0216         self._files = ()
0217 
0218     def __iter__(self):
0219         return self
0220 
0221     def next(self):
0222         try:
0223             line = self._buffer[self._bufindex]
0224         except IndexError:
0225             pass
0226         else:
0227             self._bufindex += 1
0228             self._lineno += 1
0229             self._filelineno += 1
0230             return line
0231         line = self.readline()
0232         if not line:
0233             raise StopIteration
0234         return line
0235 
0236     def __getitem__(self, i):
0237         if i != self._lineno:
0238             raise RuntimeError, "accessing lines out of order"
0239         try:
0240             return self.next()
0241         except StopIteration:
0242             raise IndexError, "end of input reached"
0243 
0244     def nextfile(self):
0245         savestdout = self._savestdout
0246         self._savestdout = 0
0247         if savestdout:
0248             sys.stdout = savestdout
0249 
0250         output = self._output
0251         self._output = 0
0252         if output:
0253             output.close()
0254 
0255         file = self._file
0256         self._file = 0
0257         if file and not self._isstdin:
0258             file.close()
0259 
0260         backupfilename = self._backupfilename
0261         self._backupfilename = 0
0262         if backupfilename and not self._backup:
0263             try: os.unlink(backupfilename)
0264             except OSError: pass
0265 
0266         self._isstdin = False
0267         self._buffer = []
0268         self._bufindex = 0
0269 
0270     def readline(self):
0271         try:
0272             line = self._buffer[self._bufindex]
0273         except IndexError:
0274             pass
0275         else:
0276             self._bufindex += 1
0277             self._lineno += 1
0278             self._filelineno += 1
0279             return line
0280         if not self._file:
0281             if not self._files:
0282                 return ""
0283             self._filename = self._files[0]
0284             self._files = self._files[1:]
0285             self._filelineno = 0
0286             self._file = None
0287             self._isstdin = False
0288             self._backupfilename = 0
0289             if self._filename == '-':
0290                 self._filename = '<stdin>'
0291                 self._file = sys.stdin
0292                 self._isstdin = True
0293             else:
0294                 if self._inplace:
0295                     self._backupfilename = (
0296                         self._filename + (self._backup or os.extsep+"bak"))
0297                     try: os.unlink(self._backupfilename)
0298                     except os.error: pass
0299                     # The next few lines may raise IOError
0300                     os.rename(self._filename, self._backupfilename)
0301                     self._file = open(self._backupfilename, "r")
0302                     try:
0303                         perm = os.fstat(self._file.fileno()).st_mode
0304                     except OSError:
0305                         self._output = open(self._filename, "w")
0306                     else:
0307                         fd = os.open(self._filename,
0308                                      os.O_CREAT | os.O_WRONLY | os.O_TRUNC,
0309                                      perm)
0310                         self._output = os.fdopen(fd, "w")
0311                         try:
0312                             if hasattr(os, 'chmod'):
0313                                 os.chmod(self._filename, perm)
0314                         except OSError:
0315                             pass
0316                     self._savestdout = sys.stdout
0317                     sys.stdout = self._output
0318                 else:
0319                     # This may raise IOError
0320                     self._file = open(self._filename, "r")
0321         self._buffer = self._file.readlines(self._bufsize)
0322         self._bufindex = 0
0323         if not self._buffer:
0324             self.nextfile()
0325         # Recursive call
0326         return self.readline()
0327 
0328     def filename(self):
0329         return self._filename
0330 
0331     def lineno(self):
0332         return self._lineno
0333 
0334     def filelineno(self):
0335         return self._filelineno
0336 
0337     def isfirstline(self):
0338         return self._filelineno == 1
0339 
0340     def isstdin(self):
0341         return self._isstdin
0342 
0343 def _test():
0344     import getopt
0345     inplace = 0
0346     backup = 0
0347     opts, args = getopt.getopt(sys.argv[1:], "ib:")
0348     for o, a in opts:
0349         if o == '-i': inplace = 1
0350         if o == '-b': backup = a
0351     for line in input(args, inplace=inplace, backup=backup):
0352         if line[-1:] == '\n': line = line[:-1]
0353         if line[-1:] == '\r': line = line[:-1]
0354         print "%d: %s[%d]%s %s" % (lineno(), filename(), filelineno(),
0355                                    isfirstline() and "*" or "", line)
0356     print "%d: %s[%d]" % (lineno(), filename(), filelineno())
0357 
0358 if __name__ == '__main__':
0359     _test()
0360 

Generated by PyXR 0.9.4
SourceForge.net Logo