0001 #!/usr/bin/env python 0002 # -*- coding: iso-8859-1 -*- 0003 #------------------------------------------------------------------- 0004 # tarfile.py 0005 #------------------------------------------------------------------- 0006 # Copyright (C) 2002 Lars Gustäbel <lars@gustaebel.de> 0007 # All rights reserved. 0008 # 0009 # Permission is hereby granted, free of charge, to any person 0010 # obtaining a copy of this software and associated documentation 0011 # files (the "Software"), to deal in the Software without 0012 # restriction, including without limitation the rights to use, 0013 # copy, modify, merge, publish, distribute, sublicense, and/or sell 0014 # copies of the Software, and to permit persons to whom the 0015 # Software is furnished to do so, subject to the following 0016 # conditions: 0017 # 0018 # The above copyright notice and this permission notice shall be 0019 # included in all copies or substantial portions of the Software. 0020 # 0021 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 0022 # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 0023 # OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 0024 # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 0025 # HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 0026 # WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 0027 # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 0028 # OTHER DEALINGS IN THE SOFTWARE. 0029 # 0030 """Read from and write to tar format archives. 0031 """ 0032 0033 __version__ = "$Revision: 1.21 $" 0034 # $Source: /cvsroot/python/python/dist/src/Lib/tarfile.py,v $ 0035 0036 version = "0.6.4" 0037 __author__ = "Lars Gustäbel (lars@gustaebel.de)" 0038 __date__ = "$Date: 2004/10/20 11:48:42 $" 0039 __cvsid__ = "$Id: tarfile.py,v 1.21 2004/10/20 11:48:42 akuchling Exp $" 0040 __credits__ = "Gustavo Niemeyer, Niels Gustäbel, Richard Townsend." 0041 0042 #--------- 0043 # Imports 0044 #--------- 0045 import sys 0046 import os 0047 import shutil 0048 import stat 0049 import errno 0050 import time 0051 import struct 0052 0053 if sys.platform == 'mac': 0054 # This module needs work for MacOS9, especially in the area of pathname 0055 # handling. In many places it is assumed a simple substitution of / by the 0056 # local os.path.sep is good enough to convert pathnames, but this does not 0057 # work with the mac rooted:path:name versus :nonrooted:path:name syntax 0058 raise ImportError, "tarfile does not work for platform==mac" 0059 0060 try: 0061 import grp, pwd 0062 except ImportError: 0063 grp = pwd = None 0064 0065 # from tarfile import * 0066 __all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError"] 0067 0068 #--------------------------------------------------------- 0069 # tar constants 0070 #--------------------------------------------------------- 0071 NUL = "\0" # the null character 0072 BLOCKSIZE = 512 # length of processing blocks 0073 RECORDSIZE = BLOCKSIZE * 20 # length of records 0074 MAGIC = "ustar" # magic tar string 0075 VERSION = "00" # version number 0076 0077 LENGTH_NAME = 100 # maximum length of a filename 0078 LENGTH_LINK = 100 # maximum length of a linkname 0079 LENGTH_PREFIX = 155 # maximum length of the prefix field 0080 MAXSIZE_MEMBER = 077777777777L # maximum size of a file (11 octal digits) 0081 0082 REGTYPE = "0" # regular file 0083 AREGTYPE = "\0" # regular file 0084 LNKTYPE = "1" # link (inside tarfile) 0085 SYMTYPE = "2" # symbolic link 0086 CHRTYPE = "3" # character special device 0087 BLKTYPE = "4" # block special device 0088 DIRTYPE = "5" # directory 0089 FIFOTYPE = "6" # fifo special device 0090 CONTTYPE = "7" # contiguous file 0091 0092 GNUTYPE_LONGNAME = "L" # GNU tar extension for longnames 0093 GNUTYPE_LONGLINK = "K" # GNU tar extension for longlink 0094 GNUTYPE_SPARSE = "S" # GNU tar extension for sparse file 0095 0096 #--------------------------------------------------------- 0097 # tarfile constants 0098 #--------------------------------------------------------- 0099 SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE, # file types that tarfile 0100 SYMTYPE, DIRTYPE, FIFOTYPE, # can cope with. 0101 CONTTYPE, CHRTYPE, BLKTYPE, 0102 GNUTYPE_LONGNAME, GNUTYPE_LONGLINK, 0103 GNUTYPE_SPARSE) 0104 0105 REGULAR_TYPES = (REGTYPE, AREGTYPE, # file types that somehow 0106 CONTTYPE, GNUTYPE_SPARSE) # represent regular files 0107 0108 #--------------------------------------------------------- 0109 # Bits used in the mode field, values in octal. 0110 #--------------------------------------------------------- 0111 S_IFLNK = 0120000 # symbolic link 0112 S_IFREG = 0100000 # regular file 0113 S_IFBLK = 0060000 # block device 0114 S_IFDIR = 0040000 # directory 0115 S_IFCHR = 0020000 # character device 0116 S_IFIFO = 0010000 # fifo 0117 0118 TSUID = 04000 # set UID on execution 0119 TSGID = 02000 # set GID on execution 0120 TSVTX = 01000 # reserved 0121 0122 TUREAD = 0400 # read by owner 0123 TUWRITE = 0200 # write by owner 0124 TUEXEC = 0100 # execute/search by owner 0125 TGREAD = 0040 # read by group 0126 TGWRITE = 0020 # write by group 0127 TGEXEC = 0010 # execute/search by group 0128 TOREAD = 0004 # read by other 0129 TOWRITE = 0002 # write by other 0130 TOEXEC = 0001 # execute/search by other 0131 0132 #--------------------------------------------------------- 0133 # Some useful functions 0134 #--------------------------------------------------------- 0135 def nts(s): 0136 """Convert a null-terminated string buffer to a python string. 0137 """ 0138 return s.rstrip(NUL) 0139 0140 def calc_chksum(buf): 0141 """Calculate the checksum for a member's header. It's a simple addition 0142 of all bytes, treating the chksum field as if filled with spaces. 0143 buf is a 512 byte long string buffer which holds the header. 0144 """ 0145 chk = 256 # chksum field is treated as blanks, 0146 # so the initial value is 8 * ord(" ") 0147 for c in buf[:148]: chk += ord(c) # sum up all bytes before chksum 0148 for c in buf[156:]: chk += ord(c) # sum up all bytes after chksum 0149 return chk 0150 0151 def copyfileobj(src, dst, length=None): 0152 """Copy length bytes from fileobj src to fileobj dst. 0153 If length is None, copy the entire content. 0154 """ 0155 if length == 0: 0156 return 0157 if length is None: 0158 shutil.copyfileobj(src, dst) 0159 return 0160 0161 BUFSIZE = 16 * 1024 0162 blocks, remainder = divmod(length, BUFSIZE) 0163 for b in xrange(blocks): 0164 buf = src.read(BUFSIZE) 0165 if len(buf) < BUFSIZE: 0166 raise IOError, "end of file reached" 0167 dst.write(buf) 0168 0169 if remainder != 0: 0170 buf = src.read(remainder) 0171 if len(buf) < remainder: 0172 raise IOError, "end of file reached" 0173 dst.write(buf) 0174 return 0175 0176 filemode_table = ( 0177 ((S_IFLNK, "l"), 0178 (S_IFREG, "-"), 0179 (S_IFBLK, "b"), 0180 (S_IFDIR, "d"), 0181 (S_IFCHR, "c"), 0182 (S_IFIFO, "p")), 0183 0184 ((TUREAD, "r"),), 0185 ((TUWRITE, "w"),), 0186 ((TUEXEC|TSUID, "s"), 0187 (TSUID, "S"), 0188 (TUEXEC, "x")), 0189 0190 ((TGREAD, "r"),), 0191 ((TGWRITE, "w"),), 0192 ((TGEXEC|TSGID, "s"), 0193 (TSGID, "S"), 0194 (TGEXEC, "x")), 0195 0196 ((TOREAD, "r"),), 0197 ((TOWRITE, "w"),), 0198 ((TOEXEC|TSVTX, "t"), 0199 (TSVTX, "T"), 0200 (TOEXEC, "x")) 0201 ) 0202 0203 def filemode(mode): 0204 """Convert a file's mode to a string of the form 0205 -rwxrwxrwx. 0206 Used by TarFile.list() 0207 """ 0208 perm = [] 0209 for table in filemode_table: 0210 for bit, char in table: 0211 if mode & bit == bit: 0212 perm.append(char) 0213 break 0214 else: 0215 perm.append("-") 0216 return "".join(perm) 0217 0218 if os.sep != "/": 0219 normpath = lambda path: os.path.normpath(path).replace(os.sep, "/") 0220 else: 0221 normpath = os.path.normpath 0222 0223 class TarError(Exception): 0224 """Base exception.""" 0225 pass 0226 class ExtractError(TarError): 0227 """General exception for extract errors.""" 0228 pass 0229 class ReadError(TarError): 0230 """Exception for unreadble tar archives.""" 0231 pass 0232 class CompressionError(TarError): 0233 """Exception for unavailable compression methods.""" 0234 pass 0235 class StreamError(TarError): 0236 """Exception for unsupported operations on stream-like TarFiles.""" 0237 pass 0238 0239 #--------------------------- 0240 # internal stream interface 0241 #--------------------------- 0242 class _LowLevelFile: 0243 """Low-level file object. Supports reading and writing. 0244 It is used instead of a regular file object for streaming 0245 access. 0246 """ 0247 0248 def __init__(self, name, mode): 0249 mode = { 0250 "r": os.O_RDONLY, 0251 "w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0252 }[mode] 0253 if hasattr(os, "O_BINARY"): 0254 mode |= os.O_BINARY 0255 self.fd = os.open(name, mode) 0256 0257 def close(self): 0258 os.close(self.fd) 0259 0260 def read(self, size): 0261 return os.read(self.fd, size) 0262 0263 def write(self, s): 0264 os.write(self.fd, s) 0265 0266 class _Stream: 0267 """Class that serves as an adapter between TarFile and 0268 a stream-like object. The stream-like object only 0269 needs to have a read() or write() method and is accessed 0270 blockwise. Use of gzip or bzip2 compression is possible. 0271 A stream-like object could be for example: sys.stdin, 0272 sys.stdout, a socket, a tape device etc. 0273 0274 _Stream is intended to be used only internally. 0275 """ 0276 0277 def __init__(self, name, mode, type, fileobj, bufsize): 0278 """Construct a _Stream object. 0279 """ 0280 self._extfileobj = True 0281 if fileobj is None: 0282 fileobj = _LowLevelFile(name, mode) 0283 self._extfileobj = False 0284 0285 self.name = name or "" 0286 self.mode = mode 0287 self.type = type 0288 self.fileobj = fileobj 0289 self.bufsize = bufsize 0290 self.buf = "" 0291 self.pos = 0L 0292 self.closed = False 0293 0294 if type == "gz": 0295 try: 0296 import zlib 0297 except ImportError: 0298 raise CompressionError, "zlib module is not available" 0299 self.zlib = zlib 0300 self.crc = zlib.crc32("") 0301 if mode == "r": 0302 self._init_read_gz() 0303 else: 0304 self._init_write_gz() 0305 0306 if type == "bz2": 0307 try: 0308 import bz2 0309 except ImportError: 0310 raise CompressionError, "bz2 module is not available" 0311 if mode == "r": 0312 self.dbuf = "" 0313 self.cmp = bz2.BZ2Decompressor() 0314 else: 0315 self.cmp = bz2.BZ2Compressor() 0316 0317 def __del__(self): 0318 if not self.closed: 0319 self.close() 0320 0321 def _init_write_gz(self): 0322 """Initialize for writing with gzip compression. 0323 """ 0324 self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED, 0325 -self.zlib.MAX_WBITS, 0326 self.zlib.DEF_MEM_LEVEL, 0327 0) 0328 timestamp = struct.pack("<L", long(time.time())) 0329 self.__write("\037\213\010\010%s\002\377" % timestamp) 0330 if self.name.endswith(".gz"): 0331 self.name = self.name[:-3] 0332 self.__write(self.name + NUL) 0333 0334 def write(self, s): 0335 """Write string s to the stream. 0336 """ 0337 if self.type == "gz": 0338 self.crc = self.zlib.crc32(s, self.crc) 0339 self.pos += len(s) 0340 if self.type != "tar": 0341 s = self.cmp.compress(s) 0342 self.__write(s) 0343 0344 def __write(self, s): 0345 """Write string s to the stream if a whole new block 0346 is ready to be written. 0347 """ 0348 self.buf += s 0349 while len(self.buf) > self.bufsize: 0350 self.fileobj.write(self.buf[:self.bufsize]) 0351 self.buf = self.buf[self.bufsize:] 0352 0353 def close(self): 0354 """Close the _Stream object. No operation should be 0355 done on it afterwards. 0356 """ 0357 if self.closed: 0358 return 0359 0360 if self.mode == "w" and self.type != "tar": 0361 self.buf += self.cmp.flush() 0362 if self.mode == "w" and self.buf: 0363 self.fileobj.write(self.buf) 0364 self.buf = "" 0365 if self.type == "gz": 0366 self.fileobj.write(struct.pack("<l", self.crc)) 0367 self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFFL)) 0368 0369 if not self._extfileobj: 0370 self.fileobj.close() 0371 0372 self.closed = True 0373 0374 def _init_read_gz(self): 0375 """Initialize for reading a gzip compressed fileobj. 0376 """ 0377 self.cmp = self.zlib.decompressobj(-self.zlib.MAX_WBITS) 0378 self.dbuf = "" 0379 0380 # taken from gzip.GzipFile with some alterations 0381 if self.__read(2) != "\037\213": 0382 raise ReadError, "not a gzip file" 0383 if self.__read(1) != "\010": 0384 raise CompressionError, "unsupported compression method" 0385 0386 flag = ord(self.__read(1)) 0387 self.__read(6) 0388 0389 if flag & 4: 0390 xlen = ord(self.__read(1)) + 256 * ord(self.__read(1)) 0391 self.read(xlen) 0392 if flag & 8: 0393 while True: 0394 s = self.__read(1) 0395 if not s or s == NUL: 0396 break 0397 if flag & 16: 0398 while True: 0399 s = self.__read(1) 0400 if not s or s == NUL: 0401 break 0402 if flag & 2: 0403 self.__read(2) 0404 0405 def tell(self): 0406 """Return the stream's file pointer position. 0407 """ 0408 return self.pos 0409 0410 def seek(self, pos=0): 0411 """Set the stream's file pointer to pos. Negative seeking 0412 is forbidden. 0413 """ 0414 if pos - self.pos >= 0: 0415 blocks, remainder = divmod(pos - self.pos, self.bufsize) 0416 for i in xrange(blocks): 0417 self.read(self.bufsize) 0418 self.read(remainder) 0419 else: 0420 raise StreamError, "seeking backwards is not allowed" 0421 return self.pos 0422 0423 def read(self, size=None): 0424 """Return the next size number of bytes from the stream. 0425 If size is not defined, return all bytes of the stream 0426 up to EOF. 0427 """ 0428 if size is None: 0429 t = [] 0430 while True: 0431 buf = self._read(self.bufsize) 0432 if not buf: 0433 break 0434 t.append(buf) 0435 buf = "".join(t) 0436 else: 0437 buf = self._read(size) 0438 self.pos += len(buf) 0439 return buf 0440 0441 def _read(self, size): 0442 """Return size bytes from the stream. 0443 """ 0444 if self.type == "tar": 0445 return self.__read(size) 0446 0447 c = len(self.dbuf) 0448 t = [self.dbuf] 0449 while c < size: 0450 buf = self.__read(self.bufsize) 0451 if not buf: 0452 break 0453 buf = self.cmp.decompress(buf) 0454 t.append(buf) 0455 c += len(buf) 0456 t = "".join(t) 0457 self.dbuf = t[size:] 0458 return t[:size] 0459 0460 def __read(self, size): 0461 """Return size bytes from stream. If internal buffer is empty, 0462 read another block from the stream. 0463 """ 0464 c = len(self.buf) 0465 t = [self.buf] 0466 while c < size: 0467 buf = self.fileobj.read(self.bufsize) 0468 if not buf: 0469 break 0470 t.append(buf) 0471 c += len(buf) 0472 t = "".join(t) 0473 self.buf = t[size:] 0474 return t[:size] 0475 # class _Stream 0476 0477 #------------------------ 0478 # Extraction file object 0479 #------------------------ 0480 class ExFileObject(object): 0481 """File-like object for reading an archive member. 0482 Is returned by TarFile.extractfile(). Support for 0483 sparse files included. 0484 """ 0485 0486 def __init__(self, tarfile, tarinfo): 0487 self.fileobj = tarfile.fileobj 0488 self.name = tarinfo.name 0489 self.mode = "r" 0490 self.closed = False 0491 self.offset = tarinfo.offset_data 0492 self.size = tarinfo.size 0493 self.pos = 0L 0494 self.linebuffer = "" 0495 if tarinfo.issparse(): 0496 self.sparse = tarinfo.sparse 0497 self.read = self._readsparse 0498 else: 0499 self.read = self._readnormal 0500 0501 def __read(self, size): 0502 """Overloadable read method. 0503 """ 0504 return self.fileobj.read(size) 0505 0506 def readline(self, size=-1): 0507 """Read a line with approx. size. If size is negative, 0508 read a whole line. readline() and read() must not 0509 be mixed up (!). 0510 """ 0511 if size < 0: 0512 size = sys.maxint 0513 0514 nl = self.linebuffer.find("\n") 0515 if nl >= 0: 0516 nl = min(nl, size) 0517 else: 0518 size -= len(self.linebuffer) 0519 while (nl < 0 and size > 0): 0520 buf = self.read(min(size, 100)) 0521 if not buf: 0522 break 0523 self.linebuffer += buf 0524 size -= len(buf) 0525 nl = self.linebuffer.find("\n") 0526 if nl == -1: 0527 s = self.linebuffer 0528 self.linebuffer = "" 0529 return s 0530 buf = self.linebuffer[:nl] 0531 self.linebuffer = self.linebuffer[nl + 1:] 0532 while buf[-1:] == "\r": 0533 buf = buf[:-1] 0534 return buf + "\n" 0535 0536 def readlines(self): 0537 """Return a list with all (following) lines. 0538 """ 0539 result = [] 0540 while True: 0541 line = self.readline() 0542 if not line: break 0543 result.append(line) 0544 return result 0545 0546 def _readnormal(self, size=None): 0547 """Read operation for regular files. 0548 """ 0549 if self.closed: 0550 raise ValueError, "file is closed" 0551 self.fileobj.seek(self.offset + self.pos) 0552 bytesleft = self.size - self.pos 0553 if size is None: 0554 bytestoread = bytesleft 0555 else: 0556 bytestoread = min(size, bytesleft) 0557 self.pos += bytestoread 0558 return self.__read(bytestoread) 0559 0560 def _readsparse(self, size=None): 0561 """Read operation for sparse files. 0562 """ 0563 if self.closed: 0564 raise ValueError, "file is closed" 0565 0566 if size is None: 0567 size = self.size - self.pos 0568 0569 data = [] 0570 while size > 0: 0571 buf = self._readsparsesection(size) 0572 if not buf: 0573 break 0574 size -= len(buf) 0575 data.append(buf) 0576 return "".join(data) 0577 0578 def _readsparsesection(self, size): 0579 """Read a single section of a sparse file. 0580 """ 0581 section = self.sparse.find(self.pos) 0582 0583 if section is None: 0584 return "" 0585 0586 toread = min(size, section.offset + section.size - self.pos) 0587 if isinstance(section, _data): 0588 realpos = section.realpos + self.pos - section.offset 0589 self.pos += toread 0590 self.fileobj.seek(self.offset + realpos) 0591 return self.__read(toread) 0592 else: 0593 self.pos += toread 0594 return NUL * toread 0595 0596 def tell(self): 0597 """Return the current file position. 0598 """ 0599 return self.pos 0600 0601 def seek(self, pos, whence=0): 0602 """Seek to a position in the file. 0603 """ 0604 self.linebuffer = "" 0605 if whence == 0: 0606 self.pos = min(max(pos, 0), self.size) 0607 if whence == 1: 0608 if pos < 0: 0609 self.pos = max(self.pos + pos, 0) 0610 else: 0611 self.pos = min(self.pos + pos, self.size) 0612 if whence == 2: 0613 self.pos = max(min(self.size + pos, self.size), 0) 0614 0615 def close(self): 0616 """Close the file object. 0617 """ 0618 self.closed = True 0619 #class ExFileObject 0620 0621 #------------------ 0622 # Exported Classes 0623 #------------------ 0624 class TarInfo(object): 0625 """Informational class which holds the details about an 0626 archive member given by a tar header block. 0627 TarInfo objects are returned by TarFile.getmember(), 0628 TarFile.getmembers() and TarFile.gettarinfo() and are 0629 usually created internally. 0630 """ 0631 0632 def __init__(self, name=""): 0633 """Construct a TarInfo object. name is the optional name 0634 of the member. 0635 """ 0636 0637 self.name = name # member name (dirnames must end with '/') 0638 self.mode = 0666 # file permissions 0639 self.uid = 0 # user id 0640 self.gid = 0 # group id 0641 self.size = 0 # file size 0642 self.mtime = 0 # modification time 0643 self.chksum = 0 # header checksum 0644 self.type = REGTYPE # member type 0645 self.linkname = "" # link name 0646 self.uname = "user" # user name 0647 self.gname = "group" # group name 0648 self.devmajor = 0 #- 0649 self.devminor = 0 #-for use with CHRTYPE and BLKTYPE 0650 self.prefix = "" # prefix to filename or holding information 0651 # about sparse files 0652 0653 self.offset = 0 # the tar header starts here 0654 self.offset_data = 0 # the file's data starts here 0655 0656 def __repr__(self): 0657 return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self)) 0658 0659 def frombuf(cls, buf): 0660 """Construct a TarInfo object from a 512 byte string buffer. 0661 """ 0662 tarinfo = cls() 0663 tarinfo.name = nts(buf[0:100]) 0664 tarinfo.mode = int(buf[100:108], 8) 0665 tarinfo.uid = int(buf[108:116],8) 0666 tarinfo.gid = int(buf[116:124],8) 0667 0668 # There are two possible codings for the size field we 0669 # have to discriminate, see comment in tobuf() below. 0670 if buf[124] != chr(0200): 0671 tarinfo.size = long(buf[124:136], 8) 0672 else: 0673 tarinfo.size = 0L 0674 for i in range(11): 0675 tarinfo.size <<= 8 0676 tarinfo.size += ord(buf[125 + i]) 0677 0678 tarinfo.mtime = long(buf[136:148], 8) 0679 tarinfo.chksum = int(buf[148:156], 8) 0680 tarinfo.type = buf[156:157] 0681 tarinfo.linkname = nts(buf[157:257]) 0682 tarinfo.uname = nts(buf[265:297]) 0683 tarinfo.gname = nts(buf[297:329]) 0684 try: 0685 tarinfo.devmajor = int(buf[329:337], 8) 0686 tarinfo.devminor = int(buf[337:345], 8) 0687 except ValueError: 0688 tarinfo.devmajor = tarinfo.devmajor = 0 0689 tarinfo.prefix = buf[345:500] 0690 0691 # The prefix field is used for filenames > 100 in 0692 # the POSIX standard. 0693 # name = prefix + '/' + name 0694 if tarinfo.type != GNUTYPE_SPARSE: 0695 tarinfo.name = normpath(os.path.join(nts(tarinfo.prefix), tarinfo.name)) 0696 0697 # Directory names should have a '/' at the end. 0698 if tarinfo.isdir() and tarinfo.name[-1:] != "/": 0699 tarinfo.name += "/" 0700 return tarinfo 0701 0702 frombuf = classmethod(frombuf) 0703 0704 def tobuf(self): 0705 """Return a tar header block as a 512 byte string. 0706 """ 0707 # Prefer the size to be encoded as 11 octal ascii digits 0708 # which is the most portable. If the size exceeds this 0709 # limit (>= 8 GB), encode it as an 88-bit value which is 0710 # a GNU tar feature. 0711 if self.size <= MAXSIZE_MEMBER: 0712 size = "%011o" % self.size 0713 else: 0714 s = self.size 0715 size = "" 0716 for i in range(11): 0717 size = chr(s & 0377) + size 0718 s >>= 8 0719 size = chr(0200) + size 0720 0721 # The following code was contributed by Detlef Lannert. 0722 parts = [] 0723 for value, fieldsize in ( 0724 (self.name, 100), 0725 ("%07o" % (self.mode & 07777), 8), 0726 ("%07o" % self.uid, 8), 0727 ("%07o" % self.gid, 8), 0728 (size, 12), 0729 ("%011o" % self.mtime, 12), 0730 (" ", 8), 0731 (self.type, 1), 0732 (self.linkname, 100), 0733 (MAGIC, 6), 0734 (VERSION, 2), 0735 (self.uname, 32), 0736 (self.gname, 32), 0737 ("%07o" % self.devmajor, 8), 0738 ("%07o" % self.devminor, 8), 0739 (self.prefix, 155) 0740 ): 0741 l = len(value) 0742 parts.append(value[:fieldsize] + (fieldsize - l) * NUL) 0743 0744 buf = "".join(parts) 0745 chksum = calc_chksum(buf) 0746 buf = buf[:148] + "%06o\0" % chksum + buf[155:] 0747 buf += (BLOCKSIZE - len(buf)) * NUL 0748 self.buf = buf 0749 return buf 0750 0751 def isreg(self): 0752 return self.type in REGULAR_TYPES 0753 def isfile(self): 0754 return self.isreg() 0755 def isdir(self): 0756 return self.type == DIRTYPE 0757 def issym(self): 0758 return self.type == SYMTYPE 0759 def islnk(self): 0760 return self.type == LNKTYPE 0761 def ischr(self): 0762 return self.type == CHRTYPE 0763 def isblk(self): 0764 return self.type == BLKTYPE 0765 def isfifo(self): 0766 return self.type == FIFOTYPE 0767 def issparse(self): 0768 return self.type == GNUTYPE_SPARSE 0769 def isdev(self): 0770 return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE) 0771 # class TarInfo 0772 0773 class TarFile(object): 0774 """The TarFile Class provides an interface to tar archives. 0775 """ 0776 0777 debug = 0 # May be set from 0 (no msgs) to 3 (all msgs) 0778 0779 dereference = False # If true, add content of linked file to the 0780 # tar file, else the link. 0781 0782 ignore_zeros = False # If true, skips empty or invalid blocks and 0783 # continues processing. 0784 0785 errorlevel = 0 # If 0, fatal errors only appear in debug 0786 # messages (if debug >= 0). If > 0, errors 0787 # are passed to the caller as exceptions. 0788 0789 posix = False # If True, generates POSIX.1-1990-compliant 0790 # archives (no GNU extensions!) 0791 0792 fileobject = ExFileObject 0793 0794 def __init__(self, name=None, mode="r", fileobj=None): 0795 """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to 0796 read from an existing archive, 'a' to append data to an existing 0797 file or 'w' to create a new file overwriting an existing one. `mode' 0798 defaults to 'r'. 0799 If `fileobj' is given, it is used for reading or writing data. If it 0800 can be determined, `mode' is overridden by `fileobj's mode. 0801 `fileobj' is not closed, when TarFile is closed. 0802 """ 0803 self.name = name 0804 0805 if len(mode) > 1 or mode not in "raw": 0806 raise ValueError, "mode must be 'r', 'a' or 'w'" 0807 self._mode = mode 0808 self.mode = {"r": "rb", "a": "r+b", "w": "wb"}[mode] 0809 0810 if not fileobj: 0811 fileobj = file(self.name, self.mode) 0812 self._extfileobj = False 0813 else: 0814 if self.name is None and hasattr(fileobj, "name"): 0815 self.name = fileobj.name 0816 if hasattr(fileobj, "mode"): 0817 self.mode = fileobj.mode 0818 self._extfileobj = True 0819 self.fileobj = fileobj 0820 0821 # Init datastructures 0822 self.closed = False 0823 self.members = [] # list of members as TarInfo objects 0824 self._loaded = False # flag if all members have been read 0825 self.offset = 0L # current position in the archive file 0826 self.inodes = {} # dictionary caching the inodes of 0827 # archive members already added 0828 0829 if self._mode == "r": 0830 self.firstmember = None 0831 self.firstmember = self.next() 0832 0833 if self._mode == "a": 0834 # Move to the end of the archive, 0835 # before the first empty block. 0836 self.firstmember = None 0837 while True: 0838 try: 0839 tarinfo = self.next() 0840 except ReadError: 0841 self.fileobj.seek(0) 0842 break 0843 if tarinfo is None: 0844 self.fileobj.seek(- BLOCKSIZE, 1) 0845 break 0846 0847 if self._mode in "aw": 0848 self._loaded = True 0849 0850 #-------------------------------------------------------------------------- 0851 # Below are the classmethods which act as alternate constructors to the 0852 # TarFile class. The open() method is the only one that is needed for 0853 # public use; it is the "super"-constructor and is able to select an 0854 # adequate "sub"-constructor for a particular compression using the mapping 0855 # from OPEN_METH. 0856 # 0857 # This concept allows one to subclass TarFile without losing the comfort of 0858 # the super-constructor. A sub-constructor is registered and made available 0859 # by adding it to the mapping in OPEN_METH. 0860 0861 def open(cls, name=None, mode="r", fileobj=None, bufsize=20*512): 0862 """Open a tar archive for reading, writing or appending. Return 0863 an appropriate TarFile class. 0864 0865 mode: 0866 'r' open for reading with transparent compression 0867 'r:' open for reading exclusively uncompressed 0868 'r:gz' open for reading with gzip compression 0869 'r:bz2' open for reading with bzip2 compression 0870 'a' or 'a:' open for appending 0871 'w' or 'w:' open for writing without compression 0872 'w:gz' open for writing with gzip compression 0873 'w:bz2' open for writing with bzip2 compression 0874 'r|' open an uncompressed stream of tar blocks for reading 0875 'r|gz' open a gzip compressed stream of tar blocks 0876 'r|bz2' open a bzip2 compressed stream of tar blocks 0877 'w|' open an uncompressed stream for writing 0878 'w|gz' open a gzip compressed stream for writing 0879 'w|bz2' open a bzip2 compressed stream for writing 0880 """ 0881 0882 if not name and not fileobj: 0883 raise ValueError, "nothing to open" 0884 0885 if ":" in mode: 0886 filemode, comptype = mode.split(":", 1) 0887 filemode = filemode or "r" 0888 comptype = comptype or "tar" 0889 0890 # Select the *open() function according to 0891 # given compression. 0892 if comptype in cls.OPEN_METH: 0893 func = getattr(cls, cls.OPEN_METH[comptype]) 0894 else: 0895 raise CompressionError, "unknown compression type %r" % comptype 0896 return func(name, filemode, fileobj) 0897 0898 elif "|" in mode: 0899 filemode, comptype = mode.split("|", 1) 0900 filemode = filemode or "r" 0901 comptype = comptype or "tar" 0902 0903 if filemode not in "rw": 0904 raise ValueError, "mode must be 'r' or 'w'" 0905 0906 t = cls(name, filemode, 0907 _Stream(name, filemode, comptype, fileobj, bufsize)) 0908 t._extfileobj = False 0909 return t 0910 0911 elif mode == "r": 0912 # Find out which *open() is appropriate for opening the file. 0913 for comptype in cls.OPEN_METH: 0914 func = getattr(cls, cls.OPEN_METH[comptype]) 0915 try: 0916 return func(name, "r", fileobj) 0917 except (ReadError, CompressionError): 0918 continue 0919 raise ReadError, "file could not be opened successfully" 0920 0921 elif mode in "aw": 0922 return cls.taropen(name, mode, fileobj) 0923 0924 raise ValueError, "undiscernible mode" 0925 0926 open = classmethod(open) 0927 0928 def taropen(cls, name, mode="r", fileobj=None): 0929 """Open uncompressed tar archive name for reading or writing. 0930 """ 0931 if len(mode) > 1 or mode not in "raw": 0932 raise ValueError, "mode must be 'r', 'a' or 'w'" 0933 return cls(name, mode, fileobj) 0934 0935 taropen = classmethod(taropen) 0936 0937 def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9): 0938 """Open gzip compressed tar archive name for reading or writing. 0939 Appending is not allowed. 0940 """ 0941 if len(mode) > 1 or mode not in "rw": 0942 raise ValueError, "mode must be 'r' or 'w'" 0943 0944 try: 0945 import gzip 0946 gzip.GzipFile 0947 except (ImportError, AttributeError): 0948 raise CompressionError, "gzip module is not available" 0949 0950 pre, ext = os.path.splitext(name) 0951 pre = os.path.basename(pre) 0952 if ext == ".tgz": 0953 ext = ".tar" 0954 if ext == ".gz": 0955 ext = "" 0956 tarname = pre + ext 0957 0958 if fileobj is None: 0959 fileobj = file(name, mode + "b") 0960 0961 if mode != "r": 0962 name = tarname 0963 0964 try: 0965 t = cls.taropen(tarname, mode, 0966 gzip.GzipFile(name, mode, compresslevel, fileobj) 0967 ) 0968 except IOError: 0969 raise ReadError, "not a gzip file" 0970 t._extfileobj = False 0971 return t 0972 0973 gzopen = classmethod(gzopen) 0974 0975 def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9): 0976 """Open bzip2 compressed tar archive name for reading or writing. 0977 Appending is not allowed. 0978 """ 0979 if len(mode) > 1 or mode not in "rw": 0980 raise ValueError, "mode must be 'r' or 'w'." 0981 0982 try: 0983 import bz2 0984 except ImportError: 0985 raise CompressionError, "bz2 module is not available" 0986 0987 pre, ext = os.path.splitext(name) 0988 pre = os.path.basename(pre) 0989 if ext == ".tbz2": 0990 ext = ".tar" 0991 if ext == ".bz2": 0992 ext = "" 0993 tarname = pre + ext 0994 0995 if fileobj is not None: 0996 raise ValueError, "no support for external file objects" 0997 0998 try: 0999 t = cls.taropen(tarname, mode, bz2.BZ2File(name, mode, compresslevel=compresslevel)) 1000 except IOError: 1001 raise ReadError, "not a bzip2 file" 1002 t._extfileobj = False 1003 return t 1004 1005 bz2open = classmethod(bz2open) 1006 1007 # All *open() methods are registered here. 1008 OPEN_METH = { 1009 "tar": "taropen", # uncompressed tar 1010 "gz": "gzopen", # gzip compressed tar 1011 "bz2": "bz2open" # bzip2 compressed tar 1012 } 1013 1014 #-------------------------------------------------------------------------- 1015 # The public methods which TarFile provides: 1016 1017 def close(self): 1018 """Close the TarFile. In write-mode, two finishing zero blocks are 1019 appended to the archive. 1020 """ 1021 if self.closed: 1022 return 1023 1024 if self._mode in "aw": 1025 self.fileobj.write(NUL * (BLOCKSIZE * 2)) 1026 self.offset += (BLOCKSIZE * 2) 1027 # fill up the end with zero-blocks 1028 # (like option -b20 for tar does) 1029 blocks, remainder = divmod(self.offset, RECORDSIZE) 1030 if remainder > 0: 1031 self.fileobj.write(NUL * (RECORDSIZE - remainder)) 1032 1033 if not self._extfileobj: 1034 self.fileobj.close() 1035 self.closed = True 1036 1037 def getmember(self, name): 1038 """Return a TarInfo object for member `name'. If `name' can not be 1039 found in the archive, KeyError is raised. If a member occurs more 1040 than once in the archive, its last occurence is assumed to be the 1041 most up-to-date version. 1042 """ 1043 tarinfo = self._getmember(name) 1044 if tarinfo is None: 1045 raise KeyError, "filename %r not found" % name 1046 return tarinfo 1047 1048 def getmembers(self): 1049 """Return the members of the archive as a list of TarInfo objects. The 1050 list has the same order as the members in the archive. 1051 """ 1052 self._check() 1053 if not self._loaded: # if we want to obtain a list of 1054 self._load() # all members, we first have to 1055 # scan the whole archive. 1056 return self.members 1057 1058 def getnames(self): 1059 """Return the members of the archive as a list of their names. It has 1060 the same order as the list returned by getmembers(). 1061 """ 1062 return [tarinfo.name for tarinfo in self.getmembers()] 1063 1064 def gettarinfo(self, name=None, arcname=None, fileobj=None): 1065 """Create a TarInfo object for either the file `name' or the file 1066 object `fileobj' (using os.fstat on its file descriptor). You can 1067 modify some of the TarInfo's attributes before you add it using 1068 addfile(). If given, `arcname' specifies an alternative name for the 1069 file in the archive. 1070 """ 1071 self._check("aw") 1072 1073 # When fileobj is given, replace name by 1074 # fileobj's real name. 1075 if fileobj is not None: 1076 name = fileobj.name 1077 1078 # Building the name of the member in the archive. 1079 # Backward slashes are converted to forward slashes, 1080 # Absolute paths are turned to relative paths. 1081 if arcname is None: 1082 arcname = name 1083 arcname = normpath(arcname) 1084 drv, arcname = os.path.splitdrive(arcname) 1085 while arcname[0:1] == "/": 1086 arcname = arcname[1:] 1087 1088 # Now, fill the TarInfo object with 1089 # information specific for the file. 1090 tarinfo = TarInfo() 1091 1092 # Use os.stat or os.lstat, depending on platform 1093 # and if symlinks shall be resolved. 1094 if fileobj is None: 1095 if hasattr(os, "lstat") and not self.dereference: 1096 statres = os.lstat(name) 1097 else: 1098 statres = os.stat(name) 1099 else: 1100 statres = os.fstat(fileobj.fileno()) 1101 linkname = "" 1102 1103 stmd = statres.st_mode 1104 if stat.S_ISREG(stmd): 1105 inode = (statres.st_ino, statres.st_dev) 1106 if inode in self.inodes and not self.dereference: 1107 # Is it a hardlink to an already 1108 # archived file? 1109 type = LNKTYPE 1110 linkname = self.inodes[inode] 1111 else: 1112 # The inode is added only if its valid. 1113 # For win32 it is always 0. 1114 type = REGTYPE 1115 if inode[0]: 1116 self.inodes[inode] = arcname 1117 elif stat.S_ISDIR(stmd): 1118 type = DIRTYPE 1119 if arcname[-1:] != "/": 1120 arcname += "/" 1121 elif stat.S_ISFIFO(stmd): 1122 type = FIFOTYPE 1123 elif stat.S_ISLNK(stmd): 1124 type = SYMTYPE 1125 linkname = os.readlink(name) 1126 elif stat.S_ISCHR(stmd): 1127 type = CHRTYPE 1128 elif stat.S_ISBLK(stmd): 1129 type = BLKTYPE 1130 else: 1131 return None 1132 1133 # Fill the TarInfo object with all 1134 # information we can get. 1135 tarinfo.name = arcname 1136 tarinfo.mode = stmd 1137 tarinfo.uid = statres.st_uid 1138 tarinfo.gid = statres.st_gid 1139 if stat.S_ISDIR(stmd): 1140 # For a directory, the size must be 0 1141 tarinfo.size = 0 1142 else: 1143 tarinfo.size = statres.st_size 1144 tarinfo.mtime = statres.st_mtime 1145 tarinfo.type = type 1146 tarinfo.linkname = linkname 1147 if pwd: 1148 try: 1149 tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0] 1150 except KeyError: 1151 pass 1152 if grp: 1153 try: 1154 tarinfo.gname = grp.getgrgid(tarinfo.gid)[0] 1155 except KeyError: 1156 pass 1157 1158 if type in (CHRTYPE, BLKTYPE): 1159 if hasattr(os, "major") and hasattr(os, "minor"): 1160 tarinfo.devmajor = os.major(statres.st_rdev) 1161 tarinfo.devminor = os.minor(statres.st_rdev) 1162 return tarinfo 1163 1164 def list(self, verbose=True): 1165 """Print a table of contents to sys.stdout. If `verbose' is False, only 1166 the names of the members are printed. If it is True, an `ls -l'-like 1167 output is produced. 1168 """ 1169 self._check() 1170 1171 for tarinfo in self: 1172 if verbose: 1173 print filemode(tarinfo.mode), 1174 print "%s/%s" % (tarinfo.uname or tarinfo.uid, 1175 tarinfo.gname or tarinfo.gid), 1176 if tarinfo.ischr() or tarinfo.isblk(): 1177 print "%10s" % ("%d,%d" \ 1178 % (tarinfo.devmajor, tarinfo.devminor)), 1179 else: 1180 print "%10d" % tarinfo.size, 1181 print "%d-%02d-%02d %02d:%02d:%02d" \ 1182 % time.localtime(tarinfo.mtime)[:6], 1183 1184 print tarinfo.name, 1185 1186 if verbose: 1187 if tarinfo.issym(): 1188 print "->", tarinfo.linkname, 1189 if tarinfo.islnk(): 1190 print "link to", tarinfo.linkname, 1191 print 1192 1193 def add(self, name, arcname=None, recursive=True): 1194 """Add the file `name' to the archive. `name' may be any type of file 1195 (directory, fifo, symbolic link, etc.). If given, `arcname' 1196 specifies an alternative name for the file in the archive. 1197 Directories are added recursively by default. This can be avoided by 1198 setting `recursive' to False. 1199 """ 1200 self._check("aw") 1201 1202 if arcname is None: 1203 arcname = name 1204 1205 # Skip if somebody tries to archive the archive... 1206 if self.name is not None \ 1207 and os.path.abspath(name) == os.path.abspath(self.name): 1208 self._dbg(2, "tarfile: Skipped %r" % name) 1209 return 1210 1211 # Special case: The user wants to add the current 1212 # working directory. 1213 if name == ".": 1214 if recursive: 1215 if arcname == ".": 1216 arcname = "" 1217 for f in os.listdir("."): 1218 self.add(f, os.path.join(arcname, f)) 1219 return 1220 1221 self._dbg(1, name) 1222 1223 # Create a TarInfo object from the file. 1224 tarinfo = self.gettarinfo(name, arcname) 1225 1226 if tarinfo is None: 1227 self._dbg(1, "tarfile: Unsupported type %r" % name) 1228 return 1229 1230 # Append the tar header and data to the archive. 1231 if tarinfo.isreg(): 1232 f = file(name, "rb") 1233 self.addfile(tarinfo, f) 1234 f.close() 1235 1236 if tarinfo.type in (LNKTYPE, SYMTYPE, FIFOTYPE, CHRTYPE, BLKTYPE): 1237 tarinfo.size = 0L 1238 self.addfile(tarinfo) 1239 1240 if tarinfo.isdir(): 1241 self.addfile(tarinfo) 1242 if recursive: 1243 for f in os.listdir(name): 1244 self.add(os.path.join(name, f), os.path.join(arcname, f)) 1245 1246 def addfile(self, tarinfo, fileobj=None): 1247 """Add the TarInfo object `tarinfo' to the archive. If `fileobj' is 1248 given, tarinfo.size bytes are read from it and added to the archive. 1249 You can create TarInfo objects using gettarinfo(). 1250 On Windows platforms, `fileobj' should always be opened with mode 1251 'rb' to avoid irritation about the file size. 1252 """ 1253 self._check("aw") 1254 1255 tarinfo.name = normpath(tarinfo.name) 1256 if tarinfo.isdir(): 1257 # directories should end with '/' 1258 tarinfo.name += "/" 1259 1260 if tarinfo.linkname: 1261 tarinfo.linkname = normpath(tarinfo.linkname) 1262 1263 if tarinfo.size > MAXSIZE_MEMBER: 1264 if self.posix: 1265 raise ValueError, "file is too large (>= 8 GB)" 1266 else: 1267 self._dbg(2, "tarfile: Created GNU tar largefile header") 1268 1269 1270 if len(tarinfo.linkname) > LENGTH_LINK: 1271 if self.posix: 1272 raise ValueError, "linkname is too long (>%d)" \ 1273 % (LENGTH_LINK) 1274 else: 1275 self._create_gnulong(tarinfo.linkname, GNUTYPE_LONGLINK) 1276 tarinfo.linkname = tarinfo.linkname[:LENGTH_LINK -1] 1277 self._dbg(2, "tarfile: Created GNU tar extension LONGLINK") 1278 1279 if len(tarinfo.name) > LENGTH_NAME: 1280 if self.posix: 1281 prefix = tarinfo.name[:LENGTH_PREFIX + 1] 1282 while prefix and prefix[-1] != "/": 1283 prefix = prefix[:-1] 1284 1285 name = tarinfo.name[len(prefix):] 1286 prefix = prefix[:-1] 1287 1288 if not prefix or len(name) > LENGTH_NAME: 1289 raise ValueError, "name is too long (>%d)" \ 1290 % (LENGTH_NAME) 1291 1292 tarinfo.name = name 1293 tarinfo.prefix = prefix 1294 else: 1295 self._create_gnulong(tarinfo.name, GNUTYPE_LONGNAME) 1296 tarinfo.name = tarinfo.name[:LENGTH_NAME - 1] 1297 self._dbg(2, "tarfile: Created GNU tar extension LONGNAME") 1298 1299 self.fileobj.write(tarinfo.tobuf()) 1300 self.offset += BLOCKSIZE 1301 1302 # If there's data to follow, append it. 1303 if fileobj is not None: 1304 copyfileobj(fileobj, self.fileobj, tarinfo.size) 1305 blocks, remainder = divmod(tarinfo.size, BLOCKSIZE) 1306 if remainder > 0: 1307 self.fileobj.write(NUL * (BLOCKSIZE - remainder)) 1308 blocks += 1 1309 self.offset += blocks * BLOCKSIZE 1310 1311 self.members.append(tarinfo) 1312 1313 def extract(self, member, path=""): 1314 """Extract a member from the archive to the current working directory, 1315 using its full name. Its file information is extracted as accurately 1316 as possible. `member' may be a filename or a TarInfo object. You can 1317 specify a different directory using `path'. 1318 """ 1319 self._check("r") 1320 1321 if isinstance(member, TarInfo): 1322 tarinfo = member 1323 else: 1324 tarinfo = self.getmember(member) 1325 1326 # Prepare the link target for makelink(). 1327 if tarinfo.islnk(): 1328 tarinfo._link_target = os.path.join(path, tarinfo.linkname) 1329 1330 try: 1331 self._extract_member(tarinfo, os.path.join(path, tarinfo.name)) 1332 except EnvironmentError, e: 1333 if self.errorlevel > 0: 1334 raise 1335 else: 1336 if e.filename is None: 1337 self._dbg(1, "tarfile: %s" % e.strerror) 1338 else: 1339 self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename)) 1340 except ExtractError, e: 1341 if self.errorlevel > 1: 1342 raise 1343 else: 1344 self._dbg(1, "tarfile: %s" % e) 1345 1346 def extractfile(self, member): 1347 """Extract a member from the archive as a file object. `member' may be 1348 a filename or a TarInfo object. If `member' is a regular file, a 1349 file-like object is returned. If `member' is a link, a file-like 1350 object is constructed from the link's target. If `member' is none of 1351 the above, None is returned. 1352 The file-like object is read-only and provides the following 1353 methods: read(), readline(), readlines(), seek() and tell() 1354 """ 1355 self._check("r") 1356 1357 if isinstance(member, TarInfo): 1358 tarinfo = member 1359 else: 1360 tarinfo = self.getmember(member) 1361 1362 if tarinfo.isreg(): 1363 return self.fileobject(self, tarinfo) 1364 1365 elif tarinfo.type not in SUPPORTED_TYPES: 1366 # If a member's type is unknown, it is treated as a 1367 # regular file. 1368 return self.fileobject(self, tarinfo) 1369 1370 elif tarinfo.islnk() or tarinfo.issym(): 1371 if isinstance(self.fileobj, _Stream): 1372 # A small but ugly workaround for the case that someone tries 1373 # to extract a (sym)link as a file-object from a non-seekable 1374 # stream of tar blocks. 1375 raise StreamError, "cannot extract (sym)link as file object" 1376 else: 1377 # A (sym)link's file object is it's target's file object. 1378 return self.extractfile(self._getmember(tarinfo.linkname, 1379 tarinfo)) 1380 else: 1381 # If there's no data associated with the member (directory, chrdev, 1382 # blkdev, etc.), return None instead of a file object. 1383 return None 1384 1385 def _extract_member(self, tarinfo, targetpath): 1386 """Extract the TarInfo object tarinfo to a physical 1387 file called targetpath. 1388 """ 1389 # Fetch the TarInfo object for the given name 1390 # and build the destination pathname, replacing 1391 # forward slashes to platform specific separators. 1392 if targetpath[-1:] == "/": 1393 targetpath = targetpath[:-1] 1394 targetpath = os.path.normpath(targetpath) 1395 1396 # Create all upper directories. 1397 upperdirs = os.path.dirname(targetpath) 1398 if upperdirs and not os.path.exists(upperdirs): 1399 ti = TarInfo() 1400 ti.name = upperdirs 1401 ti.type = DIRTYPE 1402 ti.mode = 0777 1403 ti.mtime = tarinfo.mtime 1404 ti.uid = tarinfo.uid 1405 ti.gid = tarinfo.gid 1406 ti.uname = tarinfo.uname 1407 ti.gname = tarinfo.gname 1408 try: 1409 self._extract_member(ti, ti.name) 1410 except: 1411 pass 1412 1413 if tarinfo.islnk() or tarinfo.issym(): 1414 self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname)) 1415 else: 1416 self._dbg(1, tarinfo.name) 1417 1418 if tarinfo.isreg(): 1419 self.makefile(tarinfo, targetpath) 1420 elif tarinfo.isdir(): 1421 self.makedir(tarinfo, targetpath) 1422 elif tarinfo.isfifo(): 1423 self.makefifo(tarinfo, targetpath) 1424 elif tarinfo.ischr() or tarinfo.isblk(): 1425 self.makedev(tarinfo, targetpath) 1426 elif tarinfo.islnk() or tarinfo.issym(): 1427 self.makelink(tarinfo, targetpath) 1428 elif tarinfo.type not in SUPPORTED_TYPES: 1429 self.makeunknown(tarinfo, targetpath) 1430 else: 1431 self.makefile(tarinfo, targetpath) 1432 1433 self.chown(tarinfo, targetpath) 1434 if not tarinfo.issym(): 1435 self.chmod(tarinfo, targetpath) 1436 self.utime(tarinfo, targetpath) 1437 1438 #-------------------------------------------------------------------------- 1439 # Below are the different file methods. They are called via 1440 # _extract_member() when extract() is called. They can be replaced in a 1441 # subclass to implement other functionality. 1442 1443 def makedir(self, tarinfo, targetpath): 1444 """Make a directory called targetpath. 1445 """ 1446 try: 1447 os.mkdir(targetpath) 1448 except EnvironmentError, e: 1449 if e.errno != errno.EEXIST: 1450 raise 1451 1452 def makefile(self, tarinfo, targetpath): 1453 """Make a file called targetpath. 1454 """ 1455 source = self.extractfile(tarinfo) 1456 target = file(targetpath, "wb") 1457 copyfileobj(source, target) 1458 source.close() 1459 target.close() 1460 1461 def makeunknown(self, tarinfo, targetpath): 1462 """Make a file from a TarInfo object with an unknown type 1463 at targetpath. 1464 """ 1465 self.makefile(tarinfo, targetpath) 1466 self._dbg(1, "tarfile: Unknown file type %r, " \ 1467 "extracted as regular file." % tarinfo.type) 1468 1469 def makefifo(self, tarinfo, targetpath): 1470 """Make a fifo called targetpath. 1471 """ 1472 if hasattr(os, "mkfifo"): 1473 os.mkfifo(targetpath) 1474 else: 1475 raise ExtractError, "fifo not supported by system" 1476 1477 def makedev(self, tarinfo, targetpath): 1478 """Make a character or block device called targetpath. 1479 """ 1480 if not hasattr(os, "mknod") or not hasattr(os, "makedev"): 1481 raise ExtractError, "special devices not supported by system" 1482 1483 mode = tarinfo.mode 1484 if tarinfo.isblk(): 1485 mode |= stat.S_IFBLK 1486 else: 1487 mode |= stat.S_IFCHR 1488 1489 os.mknod(targetpath, mode, 1490 os.makedev(tarinfo.devmajor, tarinfo.devminor)) 1491 1492 def makelink(self, tarinfo, targetpath): 1493 """Make a (symbolic) link called targetpath. If it cannot be created 1494 (platform limitation), we try to make a copy of the referenced file 1495 instead of a link. 1496 """ 1497 linkpath = tarinfo.linkname 1498 try: 1499 if tarinfo.issym(): 1500 os.symlink(linkpath, targetpath) 1501 else: 1502 # See extract(). 1503 os.link(tarinfo._link_target, targetpath) 1504 except AttributeError: 1505 if tarinfo.issym(): 1506 linkpath = os.path.join(os.path.dirname(tarinfo.name), 1507 linkpath) 1508 linkpath = normpath(linkpath) 1509 1510 try: 1511 self._extract_member(self.getmember(linkpath), targetpath) 1512 except (EnvironmentError, KeyError), e: 1513 linkpath = os.path.normpath(linkpath) 1514 try: 1515 shutil.copy2(linkpath, targetpath) 1516 except EnvironmentError, e: 1517 raise IOError, "link could not be created" 1518 1519 def chown(self, tarinfo, targetpath): 1520 """Set owner of targetpath according to tarinfo. 1521 """ 1522 if pwd and hasattr(os, "geteuid") and os.geteuid() == 0: 1523 # We have to be root to do so. 1524 try: 1525 g = grp.getgrnam(tarinfo.gname)[2] 1526 except KeyError: 1527 try: 1528 g = grp.getgrgid(tarinfo.gid)[2] 1529 except KeyError: 1530 g = os.getgid() 1531 try: 1532 u = pwd.getpwnam(tarinfo.uname)[2] 1533 except KeyError: 1534 try: 1535 u = pwd.getpwuid(tarinfo.uid)[2] 1536 except KeyError: 1537 u = os.getuid() 1538 try: 1539 if tarinfo.issym() and hasattr(os, "lchown"): 1540 os.lchown(targetpath, u, g) 1541 else: 1542 if sys.platform != "os2emx": 1543 os.chown(targetpath, u, g) 1544 except EnvironmentError, e: 1545 raise ExtractError, "could not change owner" 1546 1547 def chmod(self, tarinfo, targetpath): 1548 """Set file permissions of targetpath according to tarinfo. 1549 """ 1550 if hasattr(os, 'chmod'): 1551 try: 1552 os.chmod(targetpath, tarinfo.mode) 1553 except EnvironmentError, e: 1554 raise ExtractError, "could not change mode" 1555 1556 def utime(self, tarinfo, targetpath): 1557 """Set modification time of targetpath according to tarinfo. 1558 """ 1559 if not hasattr(os, 'utime'): 1560 return 1561 if sys.platform == "win32" and tarinfo.isdir(): 1562 # According to msdn.microsoft.com, it is an error (EACCES) 1563 # to use utime() on directories. 1564 return 1565 try: 1566 os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime)) 1567 except EnvironmentError, e: 1568 raise ExtractError, "could not change modification time" 1569 1570 #-------------------------------------------------------------------------- 1571 1572 def next(self): 1573 """Return the next member of the archive as a TarInfo object, when 1574 TarFile is opened for reading. Return None if there is no more 1575 available. 1576 """ 1577 self._check("ra") 1578 if self.firstmember is not None: 1579 m = self.firstmember 1580 self.firstmember = None 1581 return m 1582 1583 # Read the next block. 1584 self.fileobj.seek(self.offset) 1585 while True: 1586 buf = self.fileobj.read(BLOCKSIZE) 1587 if not buf: 1588 return None 1589 try: 1590 tarinfo = TarInfo.frombuf(buf) 1591 except ValueError: 1592 if self.ignore_zeros: 1593 if buf.count(NUL) == BLOCKSIZE: 1594 adj = "empty" 1595 else: 1596 adj = "invalid" 1597 self._dbg(2, "0x%X: %s block" % (self.offset, adj)) 1598 self.offset += BLOCKSIZE 1599 continue 1600 else: 1601 # Block is empty or unreadable. 1602 if self.offset == 0: 1603 # If the first block is invalid. That does not 1604 # look like a tar archive we can handle. 1605 raise ReadError,"empty, unreadable or compressed file" 1606 return None 1607 break 1608 1609 # We shouldn't rely on this checksum, because some tar programs 1610 # calculate it differently and it is merely validating the 1611 # header block. We could just as well skip this part, which would 1612 # have a slight effect on performance... 1613 if tarinfo.chksum != calc_chksum(buf): 1614 self._dbg(1, "tarfile: Bad Checksum %r" % tarinfo.name) 1615 1616 # Set the TarInfo object's offset to the current position of the 1617 # TarFile and set self.offset to the position where the data blocks 1618 # should begin. 1619 tarinfo.offset = self.offset 1620 self.offset += BLOCKSIZE 1621 1622 # Check if the TarInfo object has a typeflag for which a callback 1623 # method is registered in the TYPE_METH. If so, then call it. 1624 if tarinfo.type in self.TYPE_METH: 1625 return self.TYPE_METH[tarinfo.type](self, tarinfo) 1626 1627 tarinfo.offset_data = self.offset 1628 if tarinfo.isreg() or tarinfo.type not in SUPPORTED_TYPES: 1629 # Skip the following data blocks. 1630 self.offset += self._block(tarinfo.size) 1631 1632 if tarinfo.isreg() and tarinfo.name[:-1] == "/": 1633 # some old tar programs don't know DIRTYPE 1634 tarinfo.type = DIRTYPE 1635 1636 self.members.append(tarinfo) 1637 return tarinfo 1638 1639 #-------------------------------------------------------------------------- 1640 # Below are some methods which are called for special typeflags in the 1641 # next() method, e.g. for unwrapping GNU longname/longlink blocks. They 1642 # are registered in TYPE_METH below. You can register your own methods 1643 # with this mapping. 1644 # A registered method is called with a TarInfo object as only argument. 1645 # 1646 # During its execution the method MUST perform the following tasks: 1647 # 1. set tarinfo.offset_data to the position where the data blocks begin, 1648 # if there is data to follow. 1649 # 2. set self.offset to the position where the next member's header will 1650 # begin. 1651 # 3. append the tarinfo object to self.members, if it is supposed to appear 1652 # as a member of the TarFile object. 1653 # 4. return tarinfo or another valid TarInfo object. 1654 1655 def proc_gnulong(self, tarinfo): 1656 """Evaluate the blocks that hold a GNU longname 1657 or longlink member. 1658 """ 1659 buf = "" 1660 count = tarinfo.size 1661 while count > 0: 1662 block = self.fileobj.read(BLOCKSIZE) 1663 buf += block 1664 self.offset += BLOCKSIZE 1665 count -= BLOCKSIZE 1666 1667 # Fetch the next header 1668 next = self.next() 1669 1670 next.offset = tarinfo.offset 1671 if tarinfo.type == GNUTYPE_LONGNAME: 1672 next.name = nts(buf) 1673 elif tarinfo.type == GNUTYPE_LONGLINK: 1674 next.linkname = nts(buf) 1675 1676 return next 1677 1678 def proc_sparse(self, tarinfo): 1679 """Analyze a GNU sparse header plus extra headers. 1680 """ 1681 buf = tarinfo.tobuf() 1682 sp = _ringbuffer() 1683 pos = 386 1684 lastpos = 0L 1685 realpos = 0L 1686 # There are 4 possible sparse structs in the 1687 # first header. 1688 for i in xrange(4): 1689 try: 1690 offset = int(buf[pos:pos + 12], 8) 1691 numbytes = int(buf[pos + 12:pos + 24], 8) 1692 except ValueError: 1693 break 1694 if offset > lastpos: 1695 sp.append(_hole(lastpos, offset - lastpos)) 1696 sp.append(_data(offset, numbytes, realpos)) 1697 realpos += numbytes 1698 lastpos = offset + numbytes 1699 pos += 24 1700 1701 isextended = ord(buf[482]) 1702 origsize = int(buf[483:495], 8) 1703 1704 # If the isextended flag is given, 1705 # there are extra headers to process. 1706 while isextended == 1: 1707 buf = self.fileobj.read(BLOCKSIZE) 1708 self.offset += BLOCKSIZE 1709 pos = 0 1710 for i in xrange(21): 1711 try: 1712 offset = int(buf[pos:pos + 12], 8) 1713 numbytes = int(buf[pos + 12:pos + 24], 8) 1714 except ValueError: 1715 break 1716 if offset > lastpos: 1717 sp.append(_hole(lastpos, offset - lastpos)) 1718 sp.append(_data(offset, numbytes, realpos)) 1719 realpos += numbytes 1720 lastpos = offset + numbytes 1721 pos += 24 1722 isextended = ord(buf[504]) 1723 1724 if lastpos < origsize: 1725 sp.append(_hole(lastpos, origsize - lastpos)) 1726 1727 tarinfo.sparse = sp 1728 1729 tarinfo.offset_data = self.offset 1730 self.offset += self._block(tarinfo.size) 1731 tarinfo.size = origsize 1732 1733 self.members.append(tarinfo) 1734 return tarinfo 1735 1736 # The type mapping for the next() method. The keys are single character 1737 # strings, the typeflag. The values are methods which are called when 1738 # next() encounters such a typeflag. 1739 TYPE_METH = { 1740 GNUTYPE_LONGNAME: proc_gnulong, 1741 GNUTYPE_LONGLINK: proc_gnulong, 1742 GNUTYPE_SPARSE: proc_sparse 1743 } 1744 1745 #-------------------------------------------------------------------------- 1746 # Little helper methods: 1747 1748 def _block(self, count): 1749 """Round up a byte count by BLOCKSIZE and return it, 1750 e.g. _block(834) => 1024. 1751 """ 1752 blocks, remainder = divmod(count, BLOCKSIZE) 1753 if remainder: 1754 blocks += 1 1755 return blocks * BLOCKSIZE 1756 1757 def _getmember(self, name, tarinfo=None): 1758 """Find an archive member by name from bottom to top. 1759 If tarinfo is given, it is used as the starting point. 1760 """ 1761 # Ensure that all members have been loaded. 1762 members = self.getmembers() 1763 1764 if tarinfo is None: 1765 end = len(members) 1766 else: 1767 end = members.index(tarinfo) 1768 1769 for i in xrange(end - 1, -1, -1): 1770 if name == members[i].name: 1771 return members[i] 1772 1773 def _load(self): 1774 """Read through the entire archive file and look for readable 1775 members. 1776 """ 1777 while True: 1778 tarinfo = self.next() 1779 if tarinfo is None: 1780 break 1781 self._loaded = True 1782 1783 def _check(self, mode=None): 1784 """Check if TarFile is still open, and if the operation's mode 1785 corresponds to TarFile's mode. 1786 """ 1787 if self.closed: 1788 raise IOError, "%s is closed" % self.__class__.__name__ 1789 if mode is not None and self._mode not in mode: 1790 raise IOError, "bad operation for mode %r" % self._mode 1791 1792 def __iter__(self): 1793 """Provide an iterator object. 1794 """ 1795 if self._loaded: 1796 return iter(self.members) 1797 else: 1798 return TarIter(self) 1799 1800 def _create_gnulong(self, name, type): 1801 """Write a GNU longname/longlink member to the TarFile. 1802 It consists of an extended tar header, with the length 1803 of the longname as size, followed by data blocks, 1804 which contain the longname as a null terminated string. 1805 """ 1806 name += NUL 1807 1808 tarinfo = TarInfo() 1809 tarinfo.name = "././@LongLink" 1810 tarinfo.type = type 1811 tarinfo.mode = 0 1812 tarinfo.size = len(name) 1813 1814 # write extended header 1815 self.fileobj.write(tarinfo.tobuf()) 1816 self.offset += BLOCKSIZE 1817 # write name blocks 1818 self.fileobj.write(name) 1819 blocks, remainder = divmod(tarinfo.size, BLOCKSIZE) 1820 if remainder > 0: 1821 self.fileobj.write(NUL * (BLOCKSIZE - remainder)) 1822 blocks += 1 1823 self.offset += blocks * BLOCKSIZE 1824 1825 def _dbg(self, level, msg): 1826 """Write debugging output to sys.stderr. 1827 """ 1828 if level <= self.debug: 1829 print >> sys.stderr, msg 1830 # class TarFile 1831 1832 class TarIter: 1833 """Iterator Class. 1834 1835 for tarinfo in TarFile(...): 1836 suite... 1837 """ 1838 1839 def __init__(self, tarfile): 1840 """Construct a TarIter object. 1841 """ 1842 self.tarfile = tarfile 1843 def __iter__(self): 1844 """Return iterator object. 1845 """ 1846 return self 1847 def next(self): 1848 """Return the next item using TarFile's next() method. 1849 When all members have been read, set TarFile as _loaded. 1850 """ 1851 tarinfo = self.tarfile.next() 1852 if not tarinfo: 1853 self.tarfile._loaded = True 1854 raise StopIteration 1855 return tarinfo 1856 1857 # Helper classes for sparse file support 1858 class _section: 1859 """Base class for _data and _hole. 1860 """ 1861 def __init__(self, offset, size): 1862 self.offset = offset 1863 self.size = size 1864 def __contains__(self, offset): 1865 return self.offset <= offset < self.offset + self.size 1866 1867 class _data(_section): 1868 """Represent a data section in a sparse file. 1869 """ 1870 def __init__(self, offset, size, realpos): 1871 _section.__init__(self, offset, size) 1872 self.realpos = realpos 1873 1874 class _hole(_section): 1875 """Represent a hole section in a sparse file. 1876 """ 1877 pass 1878 1879 class _ringbuffer(list): 1880 """Ringbuffer class which increases performance 1881 over a regular list. 1882 """ 1883 def __init__(self): 1884 self.idx = 0 1885 def find(self, offset): 1886 idx = self.idx 1887 while True: 1888 item = self[idx] 1889 if offset in item: 1890 break 1891 idx += 1 1892 if idx == len(self): 1893 idx = 0 1894 if idx == self.idx: 1895 # End of File 1896 return None 1897 self.idx = idx 1898 return item 1899 1900 #--------------------------------------------- 1901 # zipfile compatible TarFile class 1902 #--------------------------------------------- 1903 TAR_PLAIN = 0 # zipfile.ZIP_STORED 1904 TAR_GZIPPED = 8 # zipfile.ZIP_DEFLATED 1905 class TarFileCompat: 1906 """TarFile class compatible with standard module zipfile's 1907 ZipFile class. 1908 """ 1909 def __init__(self, file, mode="r", compression=TAR_PLAIN): 1910 if compression == TAR_PLAIN: 1911 self.tarfile = TarFile.taropen(file, mode) 1912 elif compression == TAR_GZIPPED: 1913 self.tarfile = TarFile.gzopen(file, mode) 1914 else: 1915 raise ValueError, "unknown compression constant" 1916 if mode[0:1] == "r": 1917 members = self.tarfile.getmembers() 1918 for i in xrange(len(members)): 1919 m = members[i] 1920 m.filename = m.name 1921 m.file_size = m.size 1922 m.date_time = time.gmtime(m.mtime)[:6] 1923 def namelist(self): 1924 return map(lambda m: m.name, self.infolist()) 1925 def infolist(self): 1926 return filter(lambda m: m.type in REGULAR_TYPES, 1927 self.tarfile.getmembers()) 1928 def printdir(self): 1929 self.tarfile.list() 1930 def testzip(self): 1931 return 1932 def getinfo(self, name): 1933 return self.tarfile.getmember(name) 1934 def read(self, name): 1935 return self.tarfile.extractfile(self.tarfile.getmember(name)).read() 1936 def write(self, filename, arcname=None, compress_type=None): 1937 self.tarfile.add(filename, arcname) 1938 def writestr(self, zinfo, bytes): 1939 import StringIO 1940 import calendar 1941 zinfo.name = zinfo.filename 1942 zinfo.size = zinfo.file_size 1943 zinfo.mtime = calendar.timegm(zinfo.date_time) 1944 self.tarfile.addfile(zinfo, StringIO.StringIO(bytes)) 1945 def close(self): 1946 self.tarfile.close() 1947 #class TarFileCompat 1948 1949 #-------------------- 1950 # exported functions 1951 #-------------------- 1952 def is_tarfile(name): 1953 """Return True if name points to a tar archive that we 1954 are able to handle, else return False. 1955 """ 1956 try: 1957 t = open(name) 1958 t.close() 1959 return True 1960 except TarError: 1961 return False 1962 1963 open = TarFile.open 1964
Generated by PyXR 0.9.4