0001 """Helper class to quickly write a loop over all standard input files. 0002 0003 Typical use is: 0004 0005 import fileinput 0006 for line in fileinput.input(): 0007 process(line) 0008 0009 This iterates over the lines of all files listed in sys.argv[1:], 0010 defaulting to sys.stdin if the list is empty. If a filename is '-' it 0011 is also replaced by sys.stdin. To specify an alternative list of 0012 filenames, pass it as the argument to input(). A single file name is 0013 also allowed. 0014 0015 Functions filename(), lineno() return the filename and cumulative line 0016 number of the line that has just been read; filelineno() returns its 0017 line number in the current file; isfirstline() returns true iff the 0018 line just read is the first line of its file; isstdin() returns true 0019 iff the line was read from sys.stdin. Function nextfile() closes the 0020 current file so that the next iteration will read the first line from 0021 the next file (if any); lines not read from the file will not count 0022 towards the cumulative line count; the filename is not changed until 0023 after the first line of the next file has been read. Function close() 0024 closes the sequence. 0025 0026 Before any lines have been read, filename() returns None and both line 0027 numbers are zero; nextfile() has no effect. After all lines have been 0028 read, filename() and the line number functions return the values 0029 pertaining to the last line read; nextfile() has no effect. 0030 0031 All files are opened in text mode. If an I/O error occurs during 0032 opening or reading a file, the IOError exception is raised. 0033 0034 If sys.stdin is used more than once, the second and further use will 0035 return no lines, except perhaps for interactive use, or if it has been 0036 explicitly reset (e.g. using sys.stdin.seek(0)). 0037 0038 Empty files are opened and immediately closed; the only time their 0039 presence in the list of filenames is noticeable at all is when the 0040 last file opened is empty. 0041 0042 It is possible that the last line of a file doesn't end in a newline 0043 character; otherwise lines are returned including the trailing 0044 newline. 0045 0046 Class FileInput is the implementation; its methods filename(), 0047 lineno(), fileline(), isfirstline(), isstdin(), nextfile() and close() 0048 correspond to the functions in the module. In addition it has a 0049 readline() method which returns the next input line, and a 0050 __getitem__() method which implements the sequence behavior. The 0051 sequence must be accessed in strictly sequential order; sequence 0052 access and readline() cannot be mixed. 0053 0054 Optional in-place filtering: if the keyword argument inplace=1 is 0055 passed to input() or to the FileInput constructor, the file is moved 0056 to a backup file and standard output is directed to the input file. 0057 This makes it possible to write a filter that rewrites its input file 0058 in place. If the keyword argument backup=".<some extension>" is also 0059 given, it specifies the extension for the backup file, and the backup 0060 file remains around; by default, the extension is ".bak" and it is 0061 deleted when the output file is closed. In-place filtering is 0062 disabled when standard input is read. XXX The current implementation 0063 does not work for MS-DOS 8+3 filesystems. 0064 0065 Performance: this module is unfortunately one of the slower ways of 0066 processing large numbers of input lines. Nevertheless, a significant 0067 speed-up has been obtained by using readlines(bufsize) instead of 0068 readline(). A new keyword argument, bufsize=N, is present on the 0069 input() function and the FileInput() class to override the default 0070 buffer size. 0071 0072 XXX Possible additions: 0073 0074 - optional getopt argument processing 0075 - specify open mode ('r' or 'rb') 0076 - fileno() 0077 - isatty() 0078 - read(), read(size), even readlines() 0079 0080 """ 0081 0082 import sys, os 0083 0084 __all__ = ["input","close","nextfile","filename","lineno","filelineno", 0085 "isfirstline","isstdin","FileInput"] 0086 0087 _state = None 0088 0089 DEFAULT_BUFSIZE = 8*1024 0090 0091 def input(files=None, inplace=0, backup="", bufsize=0): 0092 """input([files[, inplace[, backup]]]) 0093 0094 Create an instance of the FileInput class. The instance will be used 0095 as global state for the functions of this module, and is also returned 0096 to use during iteration. The parameters to this function will be passed 0097 along to the constructor of the FileInput class. 0098 """ 0099 global _state 0100 if _state and _state._file: 0101 raise RuntimeError, "input() already active" 0102 _state = FileInput(files, inplace, backup, bufsize) 0103 return _state 0104 0105 def close(): 0106 """Close the sequence.""" 0107 global _state 0108 state = _state 0109 _state = None 0110 if state: 0111 state.close() 0112 0113 def nextfile(): 0114 """ 0115 Close the current file so that the next iteration will read the first 0116 line from the next file (if any); lines not read from the file will 0117 not count towards the cumulative line count. The filename is not 0118 changed until after the first line of the next file has been read. 0119 Before the first line has been read, this function has no effect; 0120 it cannot be used to skip the first file. After the last line of the 0121 last file has been read, this function has no effect. 0122 """ 0123 if not _state: 0124 raise RuntimeError, "no active input()" 0125 return _state.nextfile() 0126 0127 def filename(): 0128 """ 0129 Return the name of the file currently being read. 0130 Before the first line has been read, returns None. 0131 """ 0132 if not _state: 0133 raise RuntimeError, "no active input()" 0134 return _state.filename() 0135 0136 def lineno(): 0137 """ 0138 Return the cumulative line number of the line that has just been read. 0139 Before the first line has been read, returns 0. After the last line 0140 of the last file has been read, returns the line number of that line. 0141 """ 0142 if not _state: 0143 raise RuntimeError, "no active input()" 0144 return _state.lineno() 0145 0146 def filelineno(): 0147 """ 0148 Return the line number in the current file. Before the first line 0149 has been read, returns 0. After the last line of the last file has 0150 been read, returns the line number of that line within the file. 0151 """ 0152 if not _state: 0153 raise RuntimeError, "no active input()" 0154 return _state.filelineno() 0155 0156 def isfirstline(): 0157 """ 0158 Returns true the line just read is the first line of its file, 0159 otherwise returns false. 0160 """ 0161 if not _state: 0162 raise RuntimeError, "no active input()" 0163 return _state.isfirstline() 0164 0165 def isstdin(): 0166 """ 0167 Returns true if the last line was read from sys.stdin, 0168 otherwise returns false. 0169 """ 0170 if not _state: 0171 raise RuntimeError, "no active input()" 0172 return _state.isstdin() 0173 0174 class FileInput: 0175 """class FileInput([files[, inplace[, backup]]]) 0176 0177 Class FileInput is the implementation of the module; its methods 0178 filename(), lineno(), fileline(), isfirstline(), isstdin(), nextfile() 0179 and close() correspond to the functions of the same name in the module. 0180 In addition it has a readline() method which returns the next 0181 input line, and a __getitem__() method which implements the 0182 sequence behavior. The sequence must be accessed in strictly 0183 sequential order; random access and readline() cannot be mixed. 0184 """ 0185 0186 def __init__(self, files=None, inplace=0, backup="", bufsize=0): 0187 if type(files) == type(''): 0188 files = (files,) 0189 else: 0190 if files is None: 0191 files = sys.argv[1:] 0192 if not files: 0193 files = ('-',) 0194 else: 0195 files = tuple(files) 0196 self._files = files 0197 self._inplace = inplace 0198 self._backup = backup 0199 self._bufsize = bufsize or DEFAULT_BUFSIZE 0200 self._savestdout = None 0201 self._output = None 0202 self._filename = None 0203 self._lineno = 0 0204 self._filelineno = 0 0205 self._file = None 0206 self._isstdin = False 0207 self._backupfilename = None 0208 self._buffer = [] 0209 self._bufindex = 0 0210 0211 def __del__(self): 0212 self.close() 0213 0214 def close(self): 0215 self.nextfile() 0216 self._files = () 0217 0218 def __iter__(self): 0219 return self 0220 0221 def next(self): 0222 try: 0223 line = self._buffer[self._bufindex] 0224 except IndexError: 0225 pass 0226 else: 0227 self._bufindex += 1 0228 self._lineno += 1 0229 self._filelineno += 1 0230 return line 0231 line = self.readline() 0232 if not line: 0233 raise StopIteration 0234 return line 0235 0236 def __getitem__(self, i): 0237 if i != self._lineno: 0238 raise RuntimeError, "accessing lines out of order" 0239 try: 0240 return self.next() 0241 except StopIteration: 0242 raise IndexError, "end of input reached" 0243 0244 def nextfile(self): 0245 savestdout = self._savestdout 0246 self._savestdout = 0 0247 if savestdout: 0248 sys.stdout = savestdout 0249 0250 output = self._output 0251 self._output = 0 0252 if output: 0253 output.close() 0254 0255 file = self._file 0256 self._file = 0 0257 if file and not self._isstdin: 0258 file.close() 0259 0260 backupfilename = self._backupfilename 0261 self._backupfilename = 0 0262 if backupfilename and not self._backup: 0263 try: os.unlink(backupfilename) 0264 except OSError: pass 0265 0266 self._isstdin = False 0267 self._buffer = [] 0268 self._bufindex = 0 0269 0270 def readline(self): 0271 try: 0272 line = self._buffer[self._bufindex] 0273 except IndexError: 0274 pass 0275 else: 0276 self._bufindex += 1 0277 self._lineno += 1 0278 self._filelineno += 1 0279 return line 0280 if not self._file: 0281 if not self._files: 0282 return "" 0283 self._filename = self._files[0] 0284 self._files = self._files[1:] 0285 self._filelineno = 0 0286 self._file = None 0287 self._isstdin = False 0288 self._backupfilename = 0 0289 if self._filename == '-': 0290 self._filename = '<stdin>' 0291 self._file = sys.stdin 0292 self._isstdin = True 0293 else: 0294 if self._inplace: 0295 self._backupfilename = ( 0296 self._filename + (self._backup or os.extsep+"bak")) 0297 try: os.unlink(self._backupfilename) 0298 except os.error: pass 0299 # The next few lines may raise IOError 0300 os.rename(self._filename, self._backupfilename) 0301 self._file = open(self._backupfilename, "r") 0302 try: 0303 perm = os.fstat(self._file.fileno()).st_mode 0304 except OSError: 0305 self._output = open(self._filename, "w") 0306 else: 0307 fd = os.open(self._filename, 0308 os.O_CREAT | os.O_WRONLY | os.O_TRUNC, 0309 perm) 0310 self._output = os.fdopen(fd, "w") 0311 try: 0312 if hasattr(os, 'chmod'): 0313 os.chmod(self._filename, perm) 0314 except OSError: 0315 pass 0316 self._savestdout = sys.stdout 0317 sys.stdout = self._output 0318 else: 0319 # This may raise IOError 0320 self._file = open(self._filename, "r") 0321 self._buffer = self._file.readlines(self._bufsize) 0322 self._bufindex = 0 0323 if not self._buffer: 0324 self.nextfile() 0325 # Recursive call 0326 return self.readline() 0327 0328 def filename(self): 0329 return self._filename 0330 0331 def lineno(self): 0332 return self._lineno 0333 0334 def filelineno(self): 0335 return self._filelineno 0336 0337 def isfirstline(self): 0338 return self._filelineno == 1 0339 0340 def isstdin(self): 0341 return self._isstdin 0342 0343 def _test(): 0344 import getopt 0345 inplace = 0 0346 backup = 0 0347 opts, args = getopt.getopt(sys.argv[1:], "ib:") 0348 for o, a in opts: 0349 if o == '-i': inplace = 1 0350 if o == '-b': backup = a 0351 for line in input(args, inplace=inplace, backup=backup): 0352 if line[-1:] == '\n': line = line[:-1] 0353 if line[-1:] == '\r': line = line[:-1] 0354 print "%d: %s[%d]%s %s" % (lineno(), filename(), filelineno(), 0355 isfirstline() and "*" or "", line) 0356 print "%d: %s[%d]" % (lineno(), filename(), filelineno()) 0357 0358 if __name__ == '__main__': 0359 _test() 0360
Generated by PyXR 0.9.4