PyXR

c:\python24\lib \ tarfile.py



0001 #!/usr/bin/env python
0002 # -*- coding: iso-8859-1 -*-
0003 #-------------------------------------------------------------------
0004 # tarfile.py
0005 #-------------------------------------------------------------------
0006 # Copyright (C) 2002 Lars Gustäbel <lars@gustaebel.de>
0007 # All rights reserved.
0008 #
0009 # Permission  is  hereby granted,  free  of charge,  to  any person
0010 # obtaining a  copy of  this software  and associated documentation
0011 # files  (the  "Software"),  to   deal  in  the  Software   without
0012 # restriction,  including  without limitation  the  rights to  use,
0013 # copy, modify, merge, publish, distribute, sublicense, and/or sell
0014 # copies  of  the  Software,  and to  permit  persons  to  whom the
0015 # Software  is  furnished  to  do  so,  subject  to  the  following
0016 # conditions:
0017 #
0018 # The above copyright  notice and this  permission notice shall  be
0019 # included in all copies or substantial portions of the Software.
0020 #
0021 # THE SOFTWARE IS PROVIDED "AS  IS", WITHOUT WARRANTY OF ANY  KIND,
0022 # EXPRESS OR IMPLIED, INCLUDING  BUT NOT LIMITED TO  THE WARRANTIES
0023 # OF  MERCHANTABILITY,  FITNESS   FOR  A  PARTICULAR   PURPOSE  AND
0024 # NONINFRINGEMENT.  IN  NO  EVENT SHALL  THE  AUTHORS  OR COPYRIGHT
0025 # HOLDERS  BE LIABLE  FOR ANY  CLAIM, DAMAGES  OR OTHER  LIABILITY,
0026 # WHETHER  IN AN  ACTION OF  CONTRACT, TORT  OR OTHERWISE,  ARISING
0027 # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
0028 # OTHER DEALINGS IN THE SOFTWARE.
0029 #
0030 """Read from and write to tar format archives.
0031 """
0032 
0033 __version__ = "$Revision: 1.21 $"
0034 # $Source: /cvsroot/python/python/dist/src/Lib/tarfile.py,v $
0035 
0036 version     = "0.6.4"
0037 __author__  = "Lars Gustäbel (lars@gustaebel.de)"
0038 __date__    = "$Date: 2004/10/20 11:48:42 $"
0039 __cvsid__   = "$Id: tarfile.py,v 1.21 2004/10/20 11:48:42 akuchling Exp $"
0040 __credits__ = "Gustavo Niemeyer, Niels Gustäbel, Richard Townsend."
0041 
0042 #---------
0043 # Imports
0044 #---------
0045 import sys
0046 import os
0047 import shutil
0048 import stat
0049 import errno
0050 import time
0051 import struct
0052 
0053 if sys.platform == 'mac':
0054     # This module needs work for MacOS9, especially in the area of pathname
0055     # handling. In many places it is assumed a simple substitution of / by the
0056     # local os.path.sep is good enough to convert pathnames, but this does not
0057     # work with the mac rooted:path:name versus :nonrooted:path:name syntax
0058     raise ImportError, "tarfile does not work for platform==mac"
0059 
0060 try:
0061     import grp, pwd
0062 except ImportError:
0063     grp = pwd = None
0064 
0065 # from tarfile import *
0066 __all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError"]
0067 
0068 #---------------------------------------------------------
0069 # tar constants
0070 #---------------------------------------------------------
0071 NUL        = "\0"               # the null character
0072 BLOCKSIZE  = 512                # length of processing blocks
0073 RECORDSIZE = BLOCKSIZE * 20     # length of records
0074 MAGIC      = "ustar"            # magic tar string
0075 VERSION    = "00"               # version number
0076 
0077 LENGTH_NAME    = 100            # maximum length of a filename
0078 LENGTH_LINK    = 100            # maximum length of a linkname
0079 LENGTH_PREFIX  = 155            # maximum length of the prefix field
0080 MAXSIZE_MEMBER = 077777777777L  # maximum size of a file (11 octal digits)
0081 
0082 REGTYPE  = "0"                  # regular file
0083 AREGTYPE = "\0"                 # regular file
0084 LNKTYPE  = "1"                  # link (inside tarfile)
0085 SYMTYPE  = "2"                  # symbolic link
0086 CHRTYPE  = "3"                  # character special device
0087 BLKTYPE  = "4"                  # block special device
0088 DIRTYPE  = "5"                  # directory
0089 FIFOTYPE = "6"                  # fifo special device
0090 CONTTYPE = "7"                  # contiguous file
0091 
0092 GNUTYPE_LONGNAME = "L"          # GNU tar extension for longnames
0093 GNUTYPE_LONGLINK = "K"          # GNU tar extension for longlink
0094 GNUTYPE_SPARSE   = "S"          # GNU tar extension for sparse file
0095 
0096 #---------------------------------------------------------
0097 # tarfile constants
0098 #---------------------------------------------------------
0099 SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE,  # file types that tarfile
0100                    SYMTYPE, DIRTYPE, FIFOTYPE,  # can cope with.
0101                    CONTTYPE, CHRTYPE, BLKTYPE,
0102                    GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
0103                    GNUTYPE_SPARSE)
0104 
0105 REGULAR_TYPES = (REGTYPE, AREGTYPE,             # file types that somehow
0106                  CONTTYPE, GNUTYPE_SPARSE)      # represent regular files
0107 
0108 #---------------------------------------------------------
0109 # Bits used in the mode field, values in octal.
0110 #---------------------------------------------------------
0111 S_IFLNK = 0120000        # symbolic link
0112 S_IFREG = 0100000        # regular file
0113 S_IFBLK = 0060000        # block device
0114 S_IFDIR = 0040000        # directory
0115 S_IFCHR = 0020000        # character device
0116 S_IFIFO = 0010000        # fifo
0117 
0118 TSUID   = 04000          # set UID on execution
0119 TSGID   = 02000          # set GID on execution
0120 TSVTX   = 01000          # reserved
0121 
0122 TUREAD  = 0400           # read by owner
0123 TUWRITE = 0200           # write by owner
0124 TUEXEC  = 0100           # execute/search by owner
0125 TGREAD  = 0040           # read by group
0126 TGWRITE = 0020           # write by group
0127 TGEXEC  = 0010           # execute/search by group
0128 TOREAD  = 0004           # read by other
0129 TOWRITE = 0002           # write by other
0130 TOEXEC  = 0001           # execute/search by other
0131 
0132 #---------------------------------------------------------
0133 # Some useful functions
0134 #---------------------------------------------------------
0135 def nts(s):
0136     """Convert a null-terminated string buffer to a python string.
0137     """
0138     return s.rstrip(NUL)
0139 
0140 def calc_chksum(buf):
0141     """Calculate the checksum for a member's header. It's a simple addition
0142        of all bytes, treating the chksum field as if filled with spaces.
0143        buf is a 512 byte long string buffer which holds the header.
0144     """
0145     chk = 256                           # chksum field is treated as blanks,
0146                                         # so the initial value is 8 * ord(" ")
0147     for c in buf[:148]: chk += ord(c)   # sum up all bytes before chksum
0148     for c in buf[156:]: chk += ord(c)   # sum up all bytes after chksum
0149     return chk
0150 
0151 def copyfileobj(src, dst, length=None):
0152     """Copy length bytes from fileobj src to fileobj dst.
0153        If length is None, copy the entire content.
0154     """
0155     if length == 0:
0156         return
0157     if length is None:
0158         shutil.copyfileobj(src, dst)
0159         return
0160 
0161     BUFSIZE = 16 * 1024
0162     blocks, remainder = divmod(length, BUFSIZE)
0163     for b in xrange(blocks):
0164         buf = src.read(BUFSIZE)
0165         if len(buf) < BUFSIZE:
0166             raise IOError, "end of file reached"
0167         dst.write(buf)
0168 
0169     if remainder != 0:
0170         buf = src.read(remainder)
0171         if len(buf) < remainder:
0172             raise IOError, "end of file reached"
0173         dst.write(buf)
0174     return
0175 
0176 filemode_table = (
0177     ((S_IFLNK,      "l"),
0178      (S_IFREG,      "-"),
0179      (S_IFBLK,      "b"),
0180      (S_IFDIR,      "d"),
0181      (S_IFCHR,      "c"),
0182      (S_IFIFO,      "p")),
0183 
0184     ((TUREAD,       "r"),),
0185     ((TUWRITE,      "w"),),
0186     ((TUEXEC|TSUID, "s"),
0187      (TSUID,        "S"),
0188      (TUEXEC,       "x")),
0189 
0190     ((TGREAD,       "r"),),
0191     ((TGWRITE,      "w"),),
0192     ((TGEXEC|TSGID, "s"),
0193      (TSGID,        "S"),
0194      (TGEXEC,       "x")),
0195 
0196     ((TOREAD,       "r"),),
0197     ((TOWRITE,      "w"),),
0198     ((TOEXEC|TSVTX, "t"),
0199      (TSVTX,        "T"),
0200      (TOEXEC,       "x"))
0201 )
0202 
0203 def filemode(mode):
0204     """Convert a file's mode to a string of the form
0205        -rwxrwxrwx.
0206        Used by TarFile.list()
0207     """
0208     perm = []
0209     for table in filemode_table:
0210         for bit, char in table:
0211             if mode & bit == bit:
0212                 perm.append(char)
0213                 break
0214         else:
0215             perm.append("-")
0216     return "".join(perm)
0217 
0218 if os.sep != "/":
0219     normpath = lambda path: os.path.normpath(path).replace(os.sep, "/")
0220 else:
0221     normpath = os.path.normpath
0222 
0223 class TarError(Exception):
0224     """Base exception."""
0225     pass
0226 class ExtractError(TarError):
0227     """General exception for extract errors."""
0228     pass
0229 class ReadError(TarError):
0230     """Exception for unreadble tar archives."""
0231     pass
0232 class CompressionError(TarError):
0233     """Exception for unavailable compression methods."""
0234     pass
0235 class StreamError(TarError):
0236     """Exception for unsupported operations on stream-like TarFiles."""
0237     pass
0238 
0239 #---------------------------
0240 # internal stream interface
0241 #---------------------------
0242 class _LowLevelFile:
0243     """Low-level file object. Supports reading and writing.
0244        It is used instead of a regular file object for streaming
0245        access.
0246     """
0247 
0248     def __init__(self, name, mode):
0249         mode = {
0250             "r": os.O_RDONLY,
0251             "w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC,
0252         }[mode]
0253         if hasattr(os, "O_BINARY"):
0254             mode |= os.O_BINARY
0255         self.fd = os.open(name, mode)
0256 
0257     def close(self):
0258         os.close(self.fd)
0259 
0260     def read(self, size):
0261         return os.read(self.fd, size)
0262 
0263     def write(self, s):
0264         os.write(self.fd, s)
0265 
0266 class _Stream:
0267     """Class that serves as an adapter between TarFile and
0268        a stream-like object.  The stream-like object only
0269        needs to have a read() or write() method and is accessed
0270        blockwise.  Use of gzip or bzip2 compression is possible.
0271        A stream-like object could be for example: sys.stdin,
0272        sys.stdout, a socket, a tape device etc.
0273 
0274        _Stream is intended to be used only internally.
0275     """
0276 
0277     def __init__(self, name, mode, type, fileobj, bufsize):
0278         """Construct a _Stream object.
0279         """
0280         self._extfileobj = True
0281         if fileobj is None:
0282             fileobj = _LowLevelFile(name, mode)
0283             self._extfileobj = False
0284 
0285         self.name    = name or ""
0286         self.mode    = mode
0287         self.type    = type
0288         self.fileobj = fileobj
0289         self.bufsize = bufsize
0290         self.buf     = ""
0291         self.pos     = 0L
0292         self.closed  = False
0293 
0294         if type == "gz":
0295             try:
0296                 import zlib
0297             except ImportError:
0298                 raise CompressionError, "zlib module is not available"
0299             self.zlib = zlib
0300             self.crc = zlib.crc32("")
0301             if mode == "r":
0302                 self._init_read_gz()
0303             else:
0304                 self._init_write_gz()
0305 
0306         if type == "bz2":
0307             try:
0308                 import bz2
0309             except ImportError:
0310                 raise CompressionError, "bz2 module is not available"
0311             if mode == "r":
0312                 self.dbuf = ""
0313                 self.cmp = bz2.BZ2Decompressor()
0314             else:
0315                 self.cmp = bz2.BZ2Compressor()
0316 
0317     def __del__(self):
0318         if not self.closed:
0319             self.close()
0320 
0321     def _init_write_gz(self):
0322         """Initialize for writing with gzip compression.
0323         """
0324         self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED,
0325                                             -self.zlib.MAX_WBITS,
0326                                             self.zlib.DEF_MEM_LEVEL,
0327                                             0)
0328         timestamp = struct.pack("<L", long(time.time()))
0329         self.__write("\037\213\010\010%s\002\377" % timestamp)
0330         if self.name.endswith(".gz"):
0331             self.name = self.name[:-3]
0332         self.__write(self.name + NUL)
0333 
0334     def write(self, s):
0335         """Write string s to the stream.
0336         """
0337         if self.type == "gz":
0338             self.crc = self.zlib.crc32(s, self.crc)
0339         self.pos += len(s)
0340         if self.type != "tar":
0341             s = self.cmp.compress(s)
0342         self.__write(s)
0343 
0344     def __write(self, s):
0345         """Write string s to the stream if a whole new block
0346            is ready to be written.
0347         """
0348         self.buf += s
0349         while len(self.buf) > self.bufsize:
0350             self.fileobj.write(self.buf[:self.bufsize])
0351             self.buf = self.buf[self.bufsize:]
0352 
0353     def close(self):
0354         """Close the _Stream object. No operation should be
0355            done on it afterwards.
0356         """
0357         if self.closed:
0358             return
0359 
0360         if self.mode == "w" and self.type != "tar":
0361             self.buf += self.cmp.flush()
0362         if self.mode == "w" and self.buf:
0363             self.fileobj.write(self.buf)
0364             self.buf = ""
0365             if self.type == "gz":
0366                 self.fileobj.write(struct.pack("<l", self.crc))
0367                 self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFFL))
0368 
0369         if not self._extfileobj:
0370             self.fileobj.close()
0371 
0372         self.closed = True
0373 
0374     def _init_read_gz(self):
0375         """Initialize for reading a gzip compressed fileobj.
0376         """
0377         self.cmp = self.zlib.decompressobj(-self.zlib.MAX_WBITS)
0378         self.dbuf = ""
0379 
0380         # taken from gzip.GzipFile with some alterations
0381         if self.__read(2) != "\037\213":
0382             raise ReadError, "not a gzip file"
0383         if self.__read(1) != "\010":
0384             raise CompressionError, "unsupported compression method"
0385 
0386         flag = ord(self.__read(1))
0387         self.__read(6)
0388 
0389         if flag & 4:
0390             xlen = ord(self.__read(1)) + 256 * ord(self.__read(1))
0391             self.read(xlen)
0392         if flag & 8:
0393             while True:
0394                 s = self.__read(1)
0395                 if not s or s == NUL:
0396                     break
0397         if flag & 16:
0398             while True:
0399                 s = self.__read(1)
0400                 if not s or s == NUL:
0401                     break
0402         if flag & 2:
0403             self.__read(2)
0404 
0405     def tell(self):
0406         """Return the stream's file pointer position.
0407         """
0408         return self.pos
0409 
0410     def seek(self, pos=0):
0411         """Set the stream's file pointer to pos. Negative seeking
0412            is forbidden.
0413         """
0414         if pos - self.pos >= 0:
0415             blocks, remainder = divmod(pos - self.pos, self.bufsize)
0416             for i in xrange(blocks):
0417                 self.read(self.bufsize)
0418             self.read(remainder)
0419         else:
0420             raise StreamError, "seeking backwards is not allowed"
0421         return self.pos
0422 
0423     def read(self, size=None):
0424         """Return the next size number of bytes from the stream.
0425            If size is not defined, return all bytes of the stream
0426            up to EOF.
0427         """
0428         if size is None:
0429             t = []
0430             while True:
0431                 buf = self._read(self.bufsize)
0432                 if not buf:
0433                     break
0434                 t.append(buf)
0435             buf = "".join(t)
0436         else:
0437             buf = self._read(size)
0438         self.pos += len(buf)
0439         return buf
0440 
0441     def _read(self, size):
0442         """Return size bytes from the stream.
0443         """
0444         if self.type == "tar":
0445             return self.__read(size)
0446 
0447         c = len(self.dbuf)
0448         t = [self.dbuf]
0449         while c < size:
0450             buf = self.__read(self.bufsize)
0451             if not buf:
0452                 break
0453             buf = self.cmp.decompress(buf)
0454             t.append(buf)
0455             c += len(buf)
0456         t = "".join(t)
0457         self.dbuf = t[size:]
0458         return t[:size]
0459 
0460     def __read(self, size):
0461         """Return size bytes from stream. If internal buffer is empty,
0462            read another block from the stream.
0463         """
0464         c = len(self.buf)
0465         t = [self.buf]
0466         while c < size:
0467             buf = self.fileobj.read(self.bufsize)
0468             if not buf:
0469                 break
0470             t.append(buf)
0471             c += len(buf)
0472         t = "".join(t)
0473         self.buf = t[size:]
0474         return t[:size]
0475 # class _Stream
0476 
0477 #------------------------
0478 # Extraction file object
0479 #------------------------
0480 class ExFileObject(object):
0481     """File-like object for reading an archive member.
0482        Is returned by TarFile.extractfile(). Support for
0483        sparse files included.
0484     """
0485 
0486     def __init__(self, tarfile, tarinfo):
0487         self.fileobj = tarfile.fileobj
0488         self.name    = tarinfo.name
0489         self.mode    = "r"
0490         self.closed  = False
0491         self.offset  = tarinfo.offset_data
0492         self.size    = tarinfo.size
0493         self.pos     = 0L
0494         self.linebuffer = ""
0495         if tarinfo.issparse():
0496             self.sparse = tarinfo.sparse
0497             self.read = self._readsparse
0498         else:
0499             self.read = self._readnormal
0500 
0501     def __read(self, size):
0502         """Overloadable read method.
0503         """
0504         return self.fileobj.read(size)
0505 
0506     def readline(self, size=-1):
0507         """Read a line with approx. size. If size is negative,
0508            read a whole line. readline() and read() must not
0509            be mixed up (!).
0510         """
0511         if size < 0:
0512             size = sys.maxint
0513 
0514         nl = self.linebuffer.find("\n")
0515         if nl >= 0:
0516             nl = min(nl, size)
0517         else:
0518             size -= len(self.linebuffer)
0519             while (nl < 0 and size > 0):
0520                 buf = self.read(min(size, 100))
0521                 if not buf:
0522                     break
0523                 self.linebuffer += buf
0524                 size -= len(buf)
0525                 nl = self.linebuffer.find("\n")
0526             if nl == -1:
0527                 s = self.linebuffer
0528                 self.linebuffer = ""
0529                 return s
0530         buf = self.linebuffer[:nl]
0531         self.linebuffer = self.linebuffer[nl + 1:]
0532         while buf[-1:] == "\r":
0533             buf = buf[:-1]
0534         return buf + "\n"
0535 
0536     def readlines(self):
0537         """Return a list with all (following) lines.
0538         """
0539         result = []
0540         while True:
0541             line = self.readline()
0542             if not line: break
0543             result.append(line)
0544         return result
0545 
0546     def _readnormal(self, size=None):
0547         """Read operation for regular files.
0548         """
0549         if self.closed:
0550             raise ValueError, "file is closed"
0551         self.fileobj.seek(self.offset + self.pos)
0552         bytesleft = self.size - self.pos
0553         if size is None:
0554             bytestoread = bytesleft
0555         else:
0556             bytestoread = min(size, bytesleft)
0557         self.pos += bytestoread
0558         return self.__read(bytestoread)
0559 
0560     def _readsparse(self, size=None):
0561         """Read operation for sparse files.
0562         """
0563         if self.closed:
0564             raise ValueError, "file is closed"
0565 
0566         if size is None:
0567             size = self.size - self.pos
0568 
0569         data = []
0570         while size > 0:
0571             buf = self._readsparsesection(size)
0572             if not buf:
0573                 break
0574             size -= len(buf)
0575             data.append(buf)
0576         return "".join(data)
0577 
0578     def _readsparsesection(self, size):
0579         """Read a single section of a sparse file.
0580         """
0581         section = self.sparse.find(self.pos)
0582 
0583         if section is None:
0584             return ""
0585 
0586         toread = min(size, section.offset + section.size - self.pos)
0587         if isinstance(section, _data):
0588             realpos = section.realpos + self.pos - section.offset
0589             self.pos += toread
0590             self.fileobj.seek(self.offset + realpos)
0591             return self.__read(toread)
0592         else:
0593             self.pos += toread
0594             return NUL * toread
0595 
0596     def tell(self):
0597         """Return the current file position.
0598         """
0599         return self.pos
0600 
0601     def seek(self, pos, whence=0):
0602         """Seek to a position in the file.
0603         """
0604         self.linebuffer = ""
0605         if whence == 0:
0606             self.pos = min(max(pos, 0), self.size)
0607         if whence == 1:
0608             if pos < 0:
0609                 self.pos = max(self.pos + pos, 0)
0610             else:
0611                 self.pos = min(self.pos + pos, self.size)
0612         if whence == 2:
0613             self.pos = max(min(self.size + pos, self.size), 0)
0614 
0615     def close(self):
0616         """Close the file object.
0617         """
0618         self.closed = True
0619 #class ExFileObject
0620 
0621 #------------------
0622 # Exported Classes
0623 #------------------
0624 class TarInfo(object):
0625     """Informational class which holds the details about an
0626        archive member given by a tar header block.
0627        TarInfo objects are returned by TarFile.getmember(),
0628        TarFile.getmembers() and TarFile.gettarinfo() and are
0629        usually created internally.
0630     """
0631 
0632     def __init__(self, name=""):
0633         """Construct a TarInfo object. name is the optional name
0634            of the member.
0635         """
0636 
0637         self.name     = name       # member name (dirnames must end with '/')
0638         self.mode     = 0666       # file permissions
0639         self.uid      = 0          # user id
0640         self.gid      = 0          # group id
0641         self.size     = 0          # file size
0642         self.mtime    = 0          # modification time
0643         self.chksum   = 0          # header checksum
0644         self.type     = REGTYPE    # member type
0645         self.linkname = ""         # link name
0646         self.uname    = "user"     # user name
0647         self.gname    = "group"    # group name
0648         self.devmajor = 0          #-
0649         self.devminor = 0          #-for use with CHRTYPE and BLKTYPE
0650         self.prefix   = ""         # prefix to filename or holding information
0651                                    # about sparse files
0652 
0653         self.offset   = 0          # the tar header starts here
0654         self.offset_data = 0       # the file's data starts here
0655 
0656     def __repr__(self):
0657         return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self))
0658 
0659     def frombuf(cls, buf):
0660         """Construct a TarInfo object from a 512 byte string buffer.
0661         """
0662         tarinfo = cls()
0663         tarinfo.name   = nts(buf[0:100])
0664         tarinfo.mode   = int(buf[100:108], 8)
0665         tarinfo.uid    = int(buf[108:116],8)
0666         tarinfo.gid    = int(buf[116:124],8)
0667 
0668         # There are two possible codings for the size field we
0669         # have to discriminate, see comment in tobuf() below.
0670         if buf[124] != chr(0200):
0671             tarinfo.size = long(buf[124:136], 8)
0672         else:
0673             tarinfo.size = 0L
0674             for i in range(11):
0675                 tarinfo.size <<= 8
0676                 tarinfo.size += ord(buf[125 + i])
0677 
0678         tarinfo.mtime  = long(buf[136:148], 8)
0679         tarinfo.chksum = int(buf[148:156], 8)
0680         tarinfo.type   = buf[156:157]
0681         tarinfo.linkname = nts(buf[157:257])
0682         tarinfo.uname  = nts(buf[265:297])
0683         tarinfo.gname  = nts(buf[297:329])
0684         try:
0685             tarinfo.devmajor = int(buf[329:337], 8)
0686             tarinfo.devminor = int(buf[337:345], 8)
0687         except ValueError:
0688             tarinfo.devmajor = tarinfo.devmajor = 0
0689         tarinfo.prefix = buf[345:500]
0690 
0691         # The prefix field is used for filenames > 100 in
0692         # the POSIX standard.
0693         # name = prefix + '/' + name
0694         if tarinfo.type != GNUTYPE_SPARSE:
0695             tarinfo.name = normpath(os.path.join(nts(tarinfo.prefix), tarinfo.name))
0696 
0697         # Directory names should have a '/' at the end.
0698         if tarinfo.isdir() and tarinfo.name[-1:] != "/":
0699             tarinfo.name += "/"
0700         return tarinfo
0701 
0702     frombuf = classmethod(frombuf)
0703 
0704     def tobuf(self):
0705         """Return a tar header block as a 512 byte string.
0706         """
0707         # Prefer the size to be encoded as 11 octal ascii digits
0708         # which is the most portable. If the size exceeds this
0709         # limit (>= 8 GB), encode it as an 88-bit value which is
0710         # a GNU tar feature.
0711         if self.size <= MAXSIZE_MEMBER:
0712             size = "%011o" % self.size
0713         else:
0714             s = self.size
0715             size = ""
0716             for i in range(11):
0717                 size = chr(s & 0377) + size
0718                 s >>= 8
0719             size = chr(0200) + size
0720 
0721         # The following code was contributed by Detlef Lannert.
0722         parts = []
0723         for value, fieldsize in (
0724                 (self.name, 100),
0725                 ("%07o" % (self.mode & 07777), 8),
0726                 ("%07o" % self.uid, 8),
0727                 ("%07o" % self.gid, 8),
0728                 (size, 12),
0729                 ("%011o" % self.mtime, 12),
0730                 ("        ", 8),
0731                 (self.type, 1),
0732                 (self.linkname, 100),
0733                 (MAGIC, 6),
0734                 (VERSION, 2),
0735                 (self.uname, 32),
0736                 (self.gname, 32),
0737                 ("%07o" % self.devmajor, 8),
0738                 ("%07o" % self.devminor, 8),
0739                 (self.prefix, 155)
0740             ):
0741             l = len(value)
0742             parts.append(value[:fieldsize] + (fieldsize - l) * NUL)
0743 
0744         buf = "".join(parts)
0745         chksum = calc_chksum(buf)
0746         buf = buf[:148] + "%06o\0" % chksum + buf[155:]
0747         buf += (BLOCKSIZE - len(buf)) * NUL
0748         self.buf = buf
0749         return buf
0750 
0751     def isreg(self):
0752         return self.type in REGULAR_TYPES
0753     def isfile(self):
0754         return self.isreg()
0755     def isdir(self):
0756         return self.type == DIRTYPE
0757     def issym(self):
0758         return self.type == SYMTYPE
0759     def islnk(self):
0760         return self.type == LNKTYPE
0761     def ischr(self):
0762         return self.type == CHRTYPE
0763     def isblk(self):
0764         return self.type == BLKTYPE
0765     def isfifo(self):
0766         return self.type == FIFOTYPE
0767     def issparse(self):
0768         return self.type == GNUTYPE_SPARSE
0769     def isdev(self):
0770         return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE)
0771 # class TarInfo
0772 
0773 class TarFile(object):
0774     """The TarFile Class provides an interface to tar archives.
0775     """
0776 
0777     debug = 0                   # May be set from 0 (no msgs) to 3 (all msgs)
0778 
0779     dereference = False         # If true, add content of linked file to the
0780                                 # tar file, else the link.
0781 
0782     ignore_zeros = False        # If true, skips empty or invalid blocks and
0783                                 # continues processing.
0784 
0785     errorlevel = 0              # If 0, fatal errors only appear in debug
0786                                 # messages (if debug >= 0). If > 0, errors
0787                                 # are passed to the caller as exceptions.
0788 
0789     posix = False               # If True, generates POSIX.1-1990-compliant
0790                                 # archives (no GNU extensions!)
0791 
0792     fileobject = ExFileObject
0793 
0794     def __init__(self, name=None, mode="r", fileobj=None):
0795         """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
0796            read from an existing archive, 'a' to append data to an existing
0797            file or 'w' to create a new file overwriting an existing one. `mode'
0798            defaults to 'r'.
0799            If `fileobj' is given, it is used for reading or writing data. If it
0800            can be determined, `mode' is overridden by `fileobj's mode.
0801            `fileobj' is not closed, when TarFile is closed.
0802         """
0803         self.name = name
0804 
0805         if len(mode) > 1 or mode not in "raw":
0806             raise ValueError, "mode must be 'r', 'a' or 'w'"
0807         self._mode = mode
0808         self.mode = {"r": "rb", "a": "r+b", "w": "wb"}[mode]
0809 
0810         if not fileobj:
0811             fileobj = file(self.name, self.mode)
0812             self._extfileobj = False
0813         else:
0814             if self.name is None and hasattr(fileobj, "name"):
0815                 self.name = fileobj.name
0816             if hasattr(fileobj, "mode"):
0817                 self.mode = fileobj.mode
0818             self._extfileobj = True
0819         self.fileobj = fileobj
0820 
0821         # Init datastructures
0822         self.closed      = False
0823         self.members     = []       # list of members as TarInfo objects
0824         self._loaded     = False    # flag if all members have been read
0825         self.offset      = 0L       # current position in the archive file
0826         self.inodes      = {}       # dictionary caching the inodes of
0827                                     # archive members already added
0828 
0829         if self._mode == "r":
0830             self.firstmember = None
0831             self.firstmember = self.next()
0832 
0833         if self._mode == "a":
0834             # Move to the end of the archive,
0835             # before the first empty block.
0836             self.firstmember = None
0837             while True:
0838                 try:
0839                     tarinfo = self.next()
0840                 except ReadError:
0841                     self.fileobj.seek(0)
0842                     break
0843                 if tarinfo is None:
0844                     self.fileobj.seek(- BLOCKSIZE, 1)
0845                     break
0846 
0847         if self._mode in "aw":
0848             self._loaded = True
0849 
0850     #--------------------------------------------------------------------------
0851     # Below are the classmethods which act as alternate constructors to the
0852     # TarFile class. The open() method is the only one that is needed for
0853     # public use; it is the "super"-constructor and is able to select an
0854     # adequate "sub"-constructor for a particular compression using the mapping
0855     # from OPEN_METH.
0856     #
0857     # This concept allows one to subclass TarFile without losing the comfort of
0858     # the super-constructor. A sub-constructor is registered and made available
0859     # by adding it to the mapping in OPEN_METH.
0860 
0861     def open(cls, name=None, mode="r", fileobj=None, bufsize=20*512):
0862         """Open a tar archive for reading, writing or appending. Return
0863            an appropriate TarFile class.
0864 
0865            mode:
0866            'r'          open for reading with transparent compression
0867            'r:'         open for reading exclusively uncompressed
0868            'r:gz'       open for reading with gzip compression
0869            'r:bz2'      open for reading with bzip2 compression
0870            'a' or 'a:'  open for appending
0871            'w' or 'w:'  open for writing without compression
0872            'w:gz'       open for writing with gzip compression
0873            'w:bz2'      open for writing with bzip2 compression
0874            'r|'         open an uncompressed stream of tar blocks for reading
0875            'r|gz'       open a gzip compressed stream of tar blocks
0876            'r|bz2'      open a bzip2 compressed stream of tar blocks
0877            'w|'         open an uncompressed stream for writing
0878            'w|gz'       open a gzip compressed stream for writing
0879            'w|bz2'      open a bzip2 compressed stream for writing
0880         """
0881 
0882         if not name and not fileobj:
0883             raise ValueError, "nothing to open"
0884 
0885         if ":" in mode:
0886             filemode, comptype = mode.split(":", 1)
0887             filemode = filemode or "r"
0888             comptype = comptype or "tar"
0889 
0890             # Select the *open() function according to
0891             # given compression.
0892             if comptype in cls.OPEN_METH:
0893                 func = getattr(cls, cls.OPEN_METH[comptype])
0894             else:
0895                 raise CompressionError, "unknown compression type %r" % comptype
0896             return func(name, filemode, fileobj)
0897 
0898         elif "|" in mode:
0899             filemode, comptype = mode.split("|", 1)
0900             filemode = filemode or "r"
0901             comptype = comptype or "tar"
0902 
0903             if filemode not in "rw":
0904                 raise ValueError, "mode must be 'r' or 'w'"
0905 
0906             t = cls(name, filemode,
0907                     _Stream(name, filemode, comptype, fileobj, bufsize))
0908             t._extfileobj = False
0909             return t
0910 
0911         elif mode == "r":
0912             # Find out which *open() is appropriate for opening the file.
0913             for comptype in cls.OPEN_METH:
0914                 func = getattr(cls, cls.OPEN_METH[comptype])
0915                 try:
0916                     return func(name, "r", fileobj)
0917                 except (ReadError, CompressionError):
0918                     continue
0919             raise ReadError, "file could not be opened successfully"
0920 
0921         elif mode in "aw":
0922             return cls.taropen(name, mode, fileobj)
0923 
0924         raise ValueError, "undiscernible mode"
0925 
0926     open = classmethod(open)
0927 
0928     def taropen(cls, name, mode="r", fileobj=None):
0929         """Open uncompressed tar archive name for reading or writing.
0930         """
0931         if len(mode) > 1 or mode not in "raw":
0932             raise ValueError, "mode must be 'r', 'a' or 'w'"
0933         return cls(name, mode, fileobj)
0934 
0935     taropen = classmethod(taropen)
0936 
0937     def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9):
0938         """Open gzip compressed tar archive name for reading or writing.
0939            Appending is not allowed.
0940         """
0941         if len(mode) > 1 or mode not in "rw":
0942             raise ValueError, "mode must be 'r' or 'w'"
0943 
0944         try:
0945             import gzip
0946             gzip.GzipFile
0947         except (ImportError, AttributeError):
0948             raise CompressionError, "gzip module is not available"
0949 
0950         pre, ext = os.path.splitext(name)
0951         pre = os.path.basename(pre)
0952         if ext == ".tgz":
0953             ext = ".tar"
0954         if ext == ".gz":
0955             ext = ""
0956         tarname = pre + ext
0957 
0958         if fileobj is None:
0959             fileobj = file(name, mode + "b")
0960 
0961         if mode != "r":
0962             name = tarname
0963 
0964         try:
0965             t = cls.taropen(tarname, mode,
0966                 gzip.GzipFile(name, mode, compresslevel, fileobj)
0967             )
0968         except IOError:
0969             raise ReadError, "not a gzip file"
0970         t._extfileobj = False
0971         return t
0972 
0973     gzopen = classmethod(gzopen)
0974 
0975     def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9):
0976         """Open bzip2 compressed tar archive name for reading or writing.
0977            Appending is not allowed.
0978         """
0979         if len(mode) > 1 or mode not in "rw":
0980             raise ValueError, "mode must be 'r' or 'w'."
0981 
0982         try:
0983             import bz2
0984         except ImportError:
0985             raise CompressionError, "bz2 module is not available"
0986 
0987         pre, ext = os.path.splitext(name)
0988         pre = os.path.basename(pre)
0989         if ext == ".tbz2":
0990             ext = ".tar"
0991         if ext == ".bz2":
0992             ext = ""
0993         tarname = pre + ext
0994 
0995         if fileobj is not None:
0996             raise ValueError, "no support for external file objects"
0997 
0998         try:
0999             t = cls.taropen(tarname, mode, bz2.BZ2File(name, mode, compresslevel=compresslevel))
1000         except IOError:
1001             raise ReadError, "not a bzip2 file"
1002         t._extfileobj = False
1003         return t
1004 
1005     bz2open = classmethod(bz2open)
1006 
1007     # All *open() methods are registered here.
1008     OPEN_METH = {
1009         "tar": "taropen",   # uncompressed tar
1010         "gz":  "gzopen",    # gzip compressed tar
1011         "bz2": "bz2open"    # bzip2 compressed tar
1012     }
1013 
1014     #--------------------------------------------------------------------------
1015     # The public methods which TarFile provides:
1016 
1017     def close(self):
1018         """Close the TarFile. In write-mode, two finishing zero blocks are
1019            appended to the archive.
1020         """
1021         if self.closed:
1022             return
1023 
1024         if self._mode in "aw":
1025             self.fileobj.write(NUL * (BLOCKSIZE * 2))
1026             self.offset += (BLOCKSIZE * 2)
1027             # fill up the end with zero-blocks
1028             # (like option -b20 for tar does)
1029             blocks, remainder = divmod(self.offset, RECORDSIZE)
1030             if remainder > 0:
1031                 self.fileobj.write(NUL * (RECORDSIZE - remainder))
1032 
1033         if not self._extfileobj:
1034             self.fileobj.close()
1035         self.closed = True
1036 
1037     def getmember(self, name):
1038         """Return a TarInfo object for member `name'. If `name' can not be
1039            found in the archive, KeyError is raised. If a member occurs more
1040            than once in the archive, its last occurence is assumed to be the
1041            most up-to-date version.
1042         """
1043         tarinfo = self._getmember(name)
1044         if tarinfo is None:
1045             raise KeyError, "filename %r not found" % name
1046         return tarinfo
1047 
1048     def getmembers(self):
1049         """Return the members of the archive as a list of TarInfo objects. The
1050            list has the same order as the members in the archive.
1051         """
1052         self._check()
1053         if not self._loaded:    # if we want to obtain a list of
1054             self._load()        # all members, we first have to
1055                                 # scan the whole archive.
1056         return self.members
1057 
1058     def getnames(self):
1059         """Return the members of the archive as a list of their names. It has
1060            the same order as the list returned by getmembers().
1061         """
1062         return [tarinfo.name for tarinfo in self.getmembers()]
1063 
1064     def gettarinfo(self, name=None, arcname=None, fileobj=None):
1065         """Create a TarInfo object for either the file `name' or the file
1066            object `fileobj' (using os.fstat on its file descriptor). You can
1067            modify some of the TarInfo's attributes before you add it using
1068            addfile(). If given, `arcname' specifies an alternative name for the
1069            file in the archive.
1070         """
1071         self._check("aw")
1072 
1073         # When fileobj is given, replace name by
1074         # fileobj's real name.
1075         if fileobj is not None:
1076             name = fileobj.name
1077 
1078         # Building the name of the member in the archive.
1079         # Backward slashes are converted to forward slashes,
1080         # Absolute paths are turned to relative paths.
1081         if arcname is None:
1082             arcname = name
1083         arcname = normpath(arcname)
1084         drv, arcname = os.path.splitdrive(arcname)
1085         while arcname[0:1] == "/":
1086             arcname = arcname[1:]
1087 
1088         # Now, fill the TarInfo object with
1089         # information specific for the file.
1090         tarinfo = TarInfo()
1091 
1092         # Use os.stat or os.lstat, depending on platform
1093         # and if symlinks shall be resolved.
1094         if fileobj is None:
1095             if hasattr(os, "lstat") and not self.dereference:
1096                 statres = os.lstat(name)
1097             else:
1098                 statres = os.stat(name)
1099         else:
1100             statres = os.fstat(fileobj.fileno())
1101         linkname = ""
1102 
1103         stmd = statres.st_mode
1104         if stat.S_ISREG(stmd):
1105             inode = (statres.st_ino, statres.st_dev)
1106             if inode in self.inodes and not self.dereference:
1107                 # Is it a hardlink to an already
1108                 # archived file?
1109                 type = LNKTYPE
1110                 linkname = self.inodes[inode]
1111             else:
1112                 # The inode is added only if its valid.
1113                 # For win32 it is always 0.
1114                 type = REGTYPE
1115                 if inode[0]:
1116                     self.inodes[inode] = arcname
1117         elif stat.S_ISDIR(stmd):
1118             type = DIRTYPE
1119             if arcname[-1:] != "/":
1120                 arcname += "/"
1121         elif stat.S_ISFIFO(stmd):
1122             type = FIFOTYPE
1123         elif stat.S_ISLNK(stmd):
1124             type = SYMTYPE
1125             linkname = os.readlink(name)
1126         elif stat.S_ISCHR(stmd):
1127             type = CHRTYPE
1128         elif stat.S_ISBLK(stmd):
1129             type = BLKTYPE
1130         else:
1131             return None
1132 
1133         # Fill the TarInfo object with all
1134         # information we can get.
1135         tarinfo.name  = arcname
1136         tarinfo.mode  = stmd
1137         tarinfo.uid   = statres.st_uid
1138         tarinfo.gid   = statres.st_gid
1139         if stat.S_ISDIR(stmd):
1140             # For a directory, the size must be 0
1141             tarinfo.size  = 0
1142         else:
1143             tarinfo.size = statres.st_size
1144         tarinfo.mtime = statres.st_mtime
1145         tarinfo.type  = type
1146         tarinfo.linkname = linkname
1147         if pwd:
1148             try:
1149                 tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0]
1150             except KeyError:
1151                 pass
1152         if grp:
1153             try:
1154                 tarinfo.gname = grp.getgrgid(tarinfo.gid)[0]
1155             except KeyError:
1156                 pass
1157 
1158         if type in (CHRTYPE, BLKTYPE):
1159             if hasattr(os, "major") and hasattr(os, "minor"):
1160                 tarinfo.devmajor = os.major(statres.st_rdev)
1161                 tarinfo.devminor = os.minor(statres.st_rdev)
1162         return tarinfo
1163 
1164     def list(self, verbose=True):
1165         """Print a table of contents to sys.stdout. If `verbose' is False, only
1166            the names of the members are printed. If it is True, an `ls -l'-like
1167            output is produced.
1168         """
1169         self._check()
1170 
1171         for tarinfo in self:
1172             if verbose:
1173                 print filemode(tarinfo.mode),
1174                 print "%s/%s" % (tarinfo.uname or tarinfo.uid,
1175                                  tarinfo.gname or tarinfo.gid),
1176                 if tarinfo.ischr() or tarinfo.isblk():
1177                     print "%10s" % ("%d,%d" \
1178                                     % (tarinfo.devmajor, tarinfo.devminor)),
1179                 else:
1180                     print "%10d" % tarinfo.size,
1181                 print "%d-%02d-%02d %02d:%02d:%02d" \
1182                       % time.localtime(tarinfo.mtime)[:6],
1183 
1184             print tarinfo.name,
1185 
1186             if verbose:
1187                 if tarinfo.issym():
1188                     print "->", tarinfo.linkname,
1189                 if tarinfo.islnk():
1190                     print "link to", tarinfo.linkname,
1191             print
1192 
1193     def add(self, name, arcname=None, recursive=True):
1194         """Add the file `name' to the archive. `name' may be any type of file
1195            (directory, fifo, symbolic link, etc.). If given, `arcname'
1196            specifies an alternative name for the file in the archive.
1197            Directories are added recursively by default. This can be avoided by
1198            setting `recursive' to False.
1199         """
1200         self._check("aw")
1201 
1202         if arcname is None:
1203             arcname = name
1204 
1205         # Skip if somebody tries to archive the archive...
1206         if self.name is not None \
1207             and os.path.abspath(name) == os.path.abspath(self.name):
1208             self._dbg(2, "tarfile: Skipped %r" % name)
1209             return
1210 
1211         # Special case: The user wants to add the current
1212         # working directory.
1213         if name == ".":
1214             if recursive:
1215                 if arcname == ".":
1216                     arcname = ""
1217                 for f in os.listdir("."):
1218                     self.add(f, os.path.join(arcname, f))
1219             return
1220 
1221         self._dbg(1, name)
1222 
1223         # Create a TarInfo object from the file.
1224         tarinfo = self.gettarinfo(name, arcname)
1225 
1226         if tarinfo is None:
1227             self._dbg(1, "tarfile: Unsupported type %r" % name)
1228             return
1229 
1230         # Append the tar header and data to the archive.
1231         if tarinfo.isreg():
1232             f = file(name, "rb")
1233             self.addfile(tarinfo, f)
1234             f.close()
1235 
1236         if tarinfo.type in (LNKTYPE, SYMTYPE, FIFOTYPE, CHRTYPE, BLKTYPE):
1237             tarinfo.size = 0L
1238             self.addfile(tarinfo)
1239 
1240         if tarinfo.isdir():
1241             self.addfile(tarinfo)
1242             if recursive:
1243                 for f in os.listdir(name):
1244                     self.add(os.path.join(name, f), os.path.join(arcname, f))
1245 
1246     def addfile(self, tarinfo, fileobj=None):
1247         """Add the TarInfo object `tarinfo' to the archive. If `fileobj' is
1248            given, tarinfo.size bytes are read from it and added to the archive.
1249            You can create TarInfo objects using gettarinfo().
1250            On Windows platforms, `fileobj' should always be opened with mode
1251            'rb' to avoid irritation about the file size.
1252         """
1253         self._check("aw")
1254 
1255         tarinfo.name = normpath(tarinfo.name)
1256         if tarinfo.isdir():
1257             # directories should end with '/'
1258             tarinfo.name += "/"
1259 
1260         if tarinfo.linkname:
1261             tarinfo.linkname = normpath(tarinfo.linkname)
1262 
1263         if tarinfo.size > MAXSIZE_MEMBER:
1264             if self.posix:
1265                 raise ValueError, "file is too large (>= 8 GB)"
1266             else:
1267                 self._dbg(2, "tarfile: Created GNU tar largefile header")
1268 
1269 
1270         if len(tarinfo.linkname) > LENGTH_LINK:
1271             if self.posix:
1272                 raise ValueError, "linkname is too long (>%d)" \
1273                                   % (LENGTH_LINK)
1274             else:
1275                 self._create_gnulong(tarinfo.linkname, GNUTYPE_LONGLINK)
1276                 tarinfo.linkname = tarinfo.linkname[:LENGTH_LINK -1]
1277                 self._dbg(2, "tarfile: Created GNU tar extension LONGLINK")
1278 
1279         if len(tarinfo.name) > LENGTH_NAME:
1280             if self.posix:
1281                 prefix = tarinfo.name[:LENGTH_PREFIX + 1]
1282                 while prefix and prefix[-1] != "/":
1283                     prefix = prefix[:-1]
1284 
1285                 name = tarinfo.name[len(prefix):]
1286                 prefix = prefix[:-1]
1287 
1288                 if not prefix or len(name) > LENGTH_NAME:
1289                     raise ValueError, "name is too long (>%d)" \
1290                                       % (LENGTH_NAME)
1291 
1292                 tarinfo.name   = name
1293                 tarinfo.prefix = prefix
1294             else:
1295                 self._create_gnulong(tarinfo.name, GNUTYPE_LONGNAME)
1296                 tarinfo.name = tarinfo.name[:LENGTH_NAME - 1]
1297                 self._dbg(2, "tarfile: Created GNU tar extension LONGNAME")
1298 
1299         self.fileobj.write(tarinfo.tobuf())
1300         self.offset += BLOCKSIZE
1301 
1302         # If there's data to follow, append it.
1303         if fileobj is not None:
1304             copyfileobj(fileobj, self.fileobj, tarinfo.size)
1305             blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
1306             if remainder > 0:
1307                 self.fileobj.write(NUL * (BLOCKSIZE - remainder))
1308                 blocks += 1
1309             self.offset += blocks * BLOCKSIZE
1310 
1311         self.members.append(tarinfo)
1312 
1313     def extract(self, member, path=""):
1314         """Extract a member from the archive to the current working directory,
1315            using its full name. Its file information is extracted as accurately
1316            as possible. `member' may be a filename or a TarInfo object. You can
1317            specify a different directory using `path'.
1318         """
1319         self._check("r")
1320 
1321         if isinstance(member, TarInfo):
1322             tarinfo = member
1323         else:
1324             tarinfo = self.getmember(member)
1325 
1326         # Prepare the link target for makelink().
1327         if tarinfo.islnk():
1328             tarinfo._link_target = os.path.join(path, tarinfo.linkname)
1329 
1330         try:
1331             self._extract_member(tarinfo, os.path.join(path, tarinfo.name))
1332         except EnvironmentError, e:
1333             if self.errorlevel > 0:
1334                 raise
1335             else:
1336                 if e.filename is None:
1337                     self._dbg(1, "tarfile: %s" % e.strerror)
1338                 else:
1339                     self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename))
1340         except ExtractError, e:
1341             if self.errorlevel > 1:
1342                 raise
1343             else:
1344                 self._dbg(1, "tarfile: %s" % e)
1345 
1346     def extractfile(self, member):
1347         """Extract a member from the archive as a file object. `member' may be
1348            a filename or a TarInfo object. If `member' is a regular file, a
1349            file-like object is returned. If `member' is a link, a file-like
1350            object is constructed from the link's target. If `member' is none of
1351            the above, None is returned.
1352            The file-like object is read-only and provides the following
1353            methods: read(), readline(), readlines(), seek() and tell()
1354         """
1355         self._check("r")
1356 
1357         if isinstance(member, TarInfo):
1358             tarinfo = member
1359         else:
1360             tarinfo = self.getmember(member)
1361 
1362         if tarinfo.isreg():
1363             return self.fileobject(self, tarinfo)
1364 
1365         elif tarinfo.type not in SUPPORTED_TYPES:
1366             # If a member's type is unknown, it is treated as a
1367             # regular file.
1368             return self.fileobject(self, tarinfo)
1369 
1370         elif tarinfo.islnk() or tarinfo.issym():
1371             if isinstance(self.fileobj, _Stream):
1372                 # A small but ugly workaround for the case that someone tries
1373                 # to extract a (sym)link as a file-object from a non-seekable
1374                 # stream of tar blocks.
1375                 raise StreamError, "cannot extract (sym)link as file object"
1376             else:
1377                 # A (sym)link's file object is it's target's file object.
1378                 return self.extractfile(self._getmember(tarinfo.linkname,
1379                                                         tarinfo))
1380         else:
1381             # If there's no data associated with the member (directory, chrdev,
1382             # blkdev, etc.), return None instead of a file object.
1383             return None
1384 
1385     def _extract_member(self, tarinfo, targetpath):
1386         """Extract the TarInfo object tarinfo to a physical
1387            file called targetpath.
1388         """
1389         # Fetch the TarInfo object for the given name
1390         # and build the destination pathname, replacing
1391         # forward slashes to platform specific separators.
1392         if targetpath[-1:] == "/":
1393             targetpath = targetpath[:-1]
1394         targetpath = os.path.normpath(targetpath)
1395 
1396         # Create all upper directories.
1397         upperdirs = os.path.dirname(targetpath)
1398         if upperdirs and not os.path.exists(upperdirs):
1399             ti = TarInfo()
1400             ti.name  = upperdirs
1401             ti.type  = DIRTYPE
1402             ti.mode  = 0777
1403             ti.mtime = tarinfo.mtime
1404             ti.uid   = tarinfo.uid
1405             ti.gid   = tarinfo.gid
1406             ti.uname = tarinfo.uname
1407             ti.gname = tarinfo.gname
1408             try:
1409                 self._extract_member(ti, ti.name)
1410             except:
1411                 pass
1412 
1413         if tarinfo.islnk() or tarinfo.issym():
1414             self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname))
1415         else:
1416             self._dbg(1, tarinfo.name)
1417 
1418         if tarinfo.isreg():
1419             self.makefile(tarinfo, targetpath)
1420         elif tarinfo.isdir():
1421             self.makedir(tarinfo, targetpath)
1422         elif tarinfo.isfifo():
1423             self.makefifo(tarinfo, targetpath)
1424         elif tarinfo.ischr() or tarinfo.isblk():
1425             self.makedev(tarinfo, targetpath)
1426         elif tarinfo.islnk() or tarinfo.issym():
1427             self.makelink(tarinfo, targetpath)
1428         elif tarinfo.type not in SUPPORTED_TYPES:
1429             self.makeunknown(tarinfo, targetpath)
1430         else:
1431             self.makefile(tarinfo, targetpath)
1432 
1433         self.chown(tarinfo, targetpath)
1434         if not tarinfo.issym():
1435             self.chmod(tarinfo, targetpath)
1436             self.utime(tarinfo, targetpath)
1437 
1438     #--------------------------------------------------------------------------
1439     # Below are the different file methods. They are called via
1440     # _extract_member() when extract() is called. They can be replaced in a
1441     # subclass to implement other functionality.
1442 
1443     def makedir(self, tarinfo, targetpath):
1444         """Make a directory called targetpath.
1445         """
1446         try:
1447             os.mkdir(targetpath)
1448         except EnvironmentError, e:
1449             if e.errno != errno.EEXIST:
1450                 raise
1451 
1452     def makefile(self, tarinfo, targetpath):
1453         """Make a file called targetpath.
1454         """
1455         source = self.extractfile(tarinfo)
1456         target = file(targetpath, "wb")
1457         copyfileobj(source, target)
1458         source.close()
1459         target.close()
1460 
1461     def makeunknown(self, tarinfo, targetpath):
1462         """Make a file from a TarInfo object with an unknown type
1463            at targetpath.
1464         """
1465         self.makefile(tarinfo, targetpath)
1466         self._dbg(1, "tarfile: Unknown file type %r, " \
1467                      "extracted as regular file." % tarinfo.type)
1468 
1469     def makefifo(self, tarinfo, targetpath):
1470         """Make a fifo called targetpath.
1471         """
1472         if hasattr(os, "mkfifo"):
1473             os.mkfifo(targetpath)
1474         else:
1475             raise ExtractError, "fifo not supported by system"
1476 
1477     def makedev(self, tarinfo, targetpath):
1478         """Make a character or block device called targetpath.
1479         """
1480         if not hasattr(os, "mknod") or not hasattr(os, "makedev"):
1481             raise ExtractError, "special devices not supported by system"
1482 
1483         mode = tarinfo.mode
1484         if tarinfo.isblk():
1485             mode |= stat.S_IFBLK
1486         else:
1487             mode |= stat.S_IFCHR
1488 
1489         os.mknod(targetpath, mode,
1490                  os.makedev(tarinfo.devmajor, tarinfo.devminor))
1491 
1492     def makelink(self, tarinfo, targetpath):
1493         """Make a (symbolic) link called targetpath. If it cannot be created
1494           (platform limitation), we try to make a copy of the referenced file
1495           instead of a link.
1496         """
1497         linkpath = tarinfo.linkname
1498         try:
1499             if tarinfo.issym():
1500                 os.symlink(linkpath, targetpath)
1501             else:
1502                 # See extract().
1503                 os.link(tarinfo._link_target, targetpath)
1504         except AttributeError:
1505             if tarinfo.issym():
1506                 linkpath = os.path.join(os.path.dirname(tarinfo.name),
1507                                         linkpath)
1508                 linkpath = normpath(linkpath)
1509 
1510             try:
1511                 self._extract_member(self.getmember(linkpath), targetpath)
1512             except (EnvironmentError, KeyError), e:
1513                 linkpath = os.path.normpath(linkpath)
1514                 try:
1515                     shutil.copy2(linkpath, targetpath)
1516                 except EnvironmentError, e:
1517                     raise IOError, "link could not be created"
1518 
1519     def chown(self, tarinfo, targetpath):
1520         """Set owner of targetpath according to tarinfo.
1521         """
1522         if pwd and hasattr(os, "geteuid") and os.geteuid() == 0:
1523             # We have to be root to do so.
1524             try:
1525                 g = grp.getgrnam(tarinfo.gname)[2]
1526             except KeyError:
1527                 try:
1528                     g = grp.getgrgid(tarinfo.gid)[2]
1529                 except KeyError:
1530                     g = os.getgid()
1531             try:
1532                 u = pwd.getpwnam(tarinfo.uname)[2]
1533             except KeyError:
1534                 try:
1535                     u = pwd.getpwuid(tarinfo.uid)[2]
1536                 except KeyError:
1537                     u = os.getuid()
1538             try:
1539                 if tarinfo.issym() and hasattr(os, "lchown"):
1540                     os.lchown(targetpath, u, g)
1541                 else:
1542                     if sys.platform != "os2emx":
1543                         os.chown(targetpath, u, g)
1544             except EnvironmentError, e:
1545                 raise ExtractError, "could not change owner"
1546 
1547     def chmod(self, tarinfo, targetpath):
1548         """Set file permissions of targetpath according to tarinfo.
1549         """
1550         if hasattr(os, 'chmod'):
1551             try:
1552                 os.chmod(targetpath, tarinfo.mode)
1553             except EnvironmentError, e:
1554                 raise ExtractError, "could not change mode"
1555 
1556     def utime(self, tarinfo, targetpath):
1557         """Set modification time of targetpath according to tarinfo.
1558         """
1559         if not hasattr(os, 'utime'):
1560             return
1561         if sys.platform == "win32" and tarinfo.isdir():
1562             # According to msdn.microsoft.com, it is an error (EACCES)
1563             # to use utime() on directories.
1564             return
1565         try:
1566             os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime))
1567         except EnvironmentError, e:
1568             raise ExtractError, "could not change modification time"
1569 
1570     #--------------------------------------------------------------------------
1571 
1572     def next(self):
1573         """Return the next member of the archive as a TarInfo object, when
1574            TarFile is opened for reading. Return None if there is no more
1575            available.
1576         """
1577         self._check("ra")
1578         if self.firstmember is not None:
1579             m = self.firstmember
1580             self.firstmember = None
1581             return m
1582 
1583         # Read the next block.
1584         self.fileobj.seek(self.offset)
1585         while True:
1586             buf = self.fileobj.read(BLOCKSIZE)
1587             if not buf:
1588                 return None
1589             try:
1590                 tarinfo = TarInfo.frombuf(buf)
1591             except ValueError:
1592                 if self.ignore_zeros:
1593                     if buf.count(NUL) == BLOCKSIZE:
1594                         adj = "empty"
1595                     else:
1596                         adj = "invalid"
1597                     self._dbg(2, "0x%X: %s block" % (self.offset, adj))
1598                     self.offset += BLOCKSIZE
1599                     continue
1600                 else:
1601                     # Block is empty or unreadable.
1602                     if self.offset == 0:
1603                         # If the first block is invalid. That does not
1604                         # look like a tar archive we can handle.
1605                         raise ReadError,"empty, unreadable or compressed file"
1606                     return None
1607             break
1608 
1609         # We shouldn't rely on this checksum, because some tar programs
1610         # calculate it differently and it is merely validating the
1611         # header block. We could just as well skip this part, which would
1612         # have a slight effect on performance...
1613         if tarinfo.chksum != calc_chksum(buf):
1614             self._dbg(1, "tarfile: Bad Checksum %r" % tarinfo.name)
1615 
1616         # Set the TarInfo object's offset to the current position of the
1617         # TarFile and set self.offset to the position where the data blocks
1618         # should begin.
1619         tarinfo.offset = self.offset
1620         self.offset += BLOCKSIZE
1621 
1622         # Check if the TarInfo object has a typeflag for which a callback
1623         # method is registered in the TYPE_METH. If so, then call it.
1624         if tarinfo.type in self.TYPE_METH:
1625             return self.TYPE_METH[tarinfo.type](self, tarinfo)
1626 
1627         tarinfo.offset_data = self.offset
1628         if tarinfo.isreg() or tarinfo.type not in SUPPORTED_TYPES:
1629             # Skip the following data blocks.
1630             self.offset += self._block(tarinfo.size)
1631 
1632         if tarinfo.isreg() and tarinfo.name[:-1] == "/":
1633             # some old tar programs don't know DIRTYPE
1634             tarinfo.type = DIRTYPE
1635 
1636         self.members.append(tarinfo)
1637         return tarinfo
1638 
1639     #--------------------------------------------------------------------------
1640     # Below are some methods which are called for special typeflags in the
1641     # next() method, e.g. for unwrapping GNU longname/longlink blocks. They
1642     # are registered in TYPE_METH below. You can register your own methods
1643     # with this mapping.
1644     # A registered method is called with a TarInfo object as only argument.
1645     #
1646     # During its execution the method MUST perform the following tasks:
1647     # 1. set tarinfo.offset_data to the position where the data blocks begin,
1648     #    if there is data to follow.
1649     # 2. set self.offset to the position where the next member's header will
1650     #    begin.
1651     # 3. append the tarinfo object to self.members, if it is supposed to appear
1652     #    as a member of the TarFile object.
1653     # 4. return tarinfo or another valid TarInfo object.
1654 
1655     def proc_gnulong(self, tarinfo):
1656         """Evaluate the blocks that hold a GNU longname
1657            or longlink member.
1658         """
1659         buf = ""
1660         count = tarinfo.size
1661         while count > 0:
1662             block = self.fileobj.read(BLOCKSIZE)
1663             buf += block
1664             self.offset += BLOCKSIZE
1665             count -= BLOCKSIZE
1666 
1667         # Fetch the next header
1668         next = self.next()
1669 
1670         next.offset = tarinfo.offset
1671         if tarinfo.type == GNUTYPE_LONGNAME:
1672             next.name = nts(buf)
1673         elif tarinfo.type == GNUTYPE_LONGLINK:
1674             next.linkname = nts(buf)
1675 
1676         return next
1677 
1678     def proc_sparse(self, tarinfo):
1679         """Analyze a GNU sparse header plus extra headers.
1680         """
1681         buf = tarinfo.tobuf()
1682         sp = _ringbuffer()
1683         pos = 386
1684         lastpos = 0L
1685         realpos = 0L
1686         # There are 4 possible sparse structs in the
1687         # first header.
1688         for i in xrange(4):
1689             try:
1690                 offset = int(buf[pos:pos + 12], 8)
1691                 numbytes = int(buf[pos + 12:pos + 24], 8)
1692             except ValueError:
1693                 break
1694             if offset > lastpos:
1695                 sp.append(_hole(lastpos, offset - lastpos))
1696             sp.append(_data(offset, numbytes, realpos))
1697             realpos += numbytes
1698             lastpos = offset + numbytes
1699             pos += 24
1700 
1701         isextended = ord(buf[482])
1702         origsize = int(buf[483:495], 8)
1703 
1704         # If the isextended flag is given,
1705         # there are extra headers to process.
1706         while isextended == 1:
1707             buf = self.fileobj.read(BLOCKSIZE)
1708             self.offset += BLOCKSIZE
1709             pos = 0
1710             for i in xrange(21):
1711                 try:
1712                     offset = int(buf[pos:pos + 12], 8)
1713                     numbytes = int(buf[pos + 12:pos + 24], 8)
1714                 except ValueError:
1715                     break
1716                 if offset > lastpos:
1717                     sp.append(_hole(lastpos, offset - lastpos))
1718                 sp.append(_data(offset, numbytes, realpos))
1719                 realpos += numbytes
1720                 lastpos = offset + numbytes
1721                 pos += 24
1722             isextended = ord(buf[504])
1723 
1724         if lastpos < origsize:
1725             sp.append(_hole(lastpos, origsize - lastpos))
1726 
1727         tarinfo.sparse = sp
1728 
1729         tarinfo.offset_data = self.offset
1730         self.offset += self._block(tarinfo.size)
1731         tarinfo.size = origsize
1732 
1733         self.members.append(tarinfo)
1734         return tarinfo
1735 
1736     # The type mapping for the next() method. The keys are single character
1737     # strings, the typeflag. The values are methods which are called when
1738     # next() encounters such a typeflag.
1739     TYPE_METH = {
1740         GNUTYPE_LONGNAME: proc_gnulong,
1741         GNUTYPE_LONGLINK: proc_gnulong,
1742         GNUTYPE_SPARSE:   proc_sparse
1743     }
1744 
1745     #--------------------------------------------------------------------------
1746     # Little helper methods:
1747 
1748     def _block(self, count):
1749         """Round up a byte count by BLOCKSIZE and return it,
1750            e.g. _block(834) => 1024.
1751         """
1752         blocks, remainder = divmod(count, BLOCKSIZE)
1753         if remainder:
1754             blocks += 1
1755         return blocks * BLOCKSIZE
1756 
1757     def _getmember(self, name, tarinfo=None):
1758         """Find an archive member by name from bottom to top.
1759            If tarinfo is given, it is used as the starting point.
1760         """
1761         # Ensure that all members have been loaded.
1762         members = self.getmembers()
1763 
1764         if tarinfo is None:
1765             end = len(members)
1766         else:
1767             end = members.index(tarinfo)
1768 
1769         for i in xrange(end - 1, -1, -1):
1770             if name == members[i].name:
1771                 return members[i]
1772 
1773     def _load(self):
1774         """Read through the entire archive file and look for readable
1775            members.
1776         """
1777         while True:
1778             tarinfo = self.next()
1779             if tarinfo is None:
1780                 break
1781         self._loaded = True
1782 
1783     def _check(self, mode=None):
1784         """Check if TarFile is still open, and if the operation's mode
1785            corresponds to TarFile's mode.
1786         """
1787         if self.closed:
1788             raise IOError, "%s is closed" % self.__class__.__name__
1789         if mode is not None and self._mode not in mode:
1790             raise IOError, "bad operation for mode %r" % self._mode
1791 
1792     def __iter__(self):
1793         """Provide an iterator object.
1794         """
1795         if self._loaded:
1796             return iter(self.members)
1797         else:
1798             return TarIter(self)
1799 
1800     def _create_gnulong(self, name, type):
1801         """Write a GNU longname/longlink member to the TarFile.
1802            It consists of an extended tar header, with the length
1803            of the longname as size, followed by data blocks,
1804            which contain the longname as a null terminated string.
1805         """
1806         name += NUL
1807 
1808         tarinfo = TarInfo()
1809         tarinfo.name = "././@LongLink"
1810         tarinfo.type = type
1811         tarinfo.mode = 0
1812         tarinfo.size = len(name)
1813 
1814         # write extended header
1815         self.fileobj.write(tarinfo.tobuf())
1816         self.offset += BLOCKSIZE
1817         # write name blocks
1818         self.fileobj.write(name)
1819         blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
1820         if remainder > 0:
1821             self.fileobj.write(NUL * (BLOCKSIZE - remainder))
1822             blocks += 1
1823         self.offset += blocks * BLOCKSIZE
1824 
1825     def _dbg(self, level, msg):
1826         """Write debugging output to sys.stderr.
1827         """
1828         if level <= self.debug:
1829             print >> sys.stderr, msg
1830 # class TarFile
1831 
1832 class TarIter:
1833     """Iterator Class.
1834 
1835        for tarinfo in TarFile(...):
1836            suite...
1837     """
1838 
1839     def __init__(self, tarfile):
1840         """Construct a TarIter object.
1841         """
1842         self.tarfile = tarfile
1843     def __iter__(self):
1844         """Return iterator object.
1845         """
1846         return self
1847     def next(self):
1848         """Return the next item using TarFile's next() method.
1849            When all members have been read, set TarFile as _loaded.
1850         """
1851         tarinfo = self.tarfile.next()
1852         if not tarinfo:
1853             self.tarfile._loaded = True
1854             raise StopIteration
1855         return tarinfo
1856 
1857 # Helper classes for sparse file support
1858 class _section:
1859     """Base class for _data and _hole.
1860     """
1861     def __init__(self, offset, size):
1862         self.offset = offset
1863         self.size = size
1864     def __contains__(self, offset):
1865         return self.offset <= offset < self.offset + self.size
1866 
1867 class _data(_section):
1868     """Represent a data section in a sparse file.
1869     """
1870     def __init__(self, offset, size, realpos):
1871         _section.__init__(self, offset, size)
1872         self.realpos = realpos
1873 
1874 class _hole(_section):
1875     """Represent a hole section in a sparse file.
1876     """
1877     pass
1878 
1879 class _ringbuffer(list):
1880     """Ringbuffer class which increases performance
1881        over a regular list.
1882     """
1883     def __init__(self):
1884         self.idx = 0
1885     def find(self, offset):
1886         idx = self.idx
1887         while True:
1888             item = self[idx]
1889             if offset in item:
1890                 break
1891             idx += 1
1892             if idx == len(self):
1893                 idx = 0
1894             if idx == self.idx:
1895                 # End of File
1896                 return None
1897         self.idx = idx
1898         return item
1899 
1900 #---------------------------------------------
1901 # zipfile compatible TarFile class
1902 #---------------------------------------------
1903 TAR_PLAIN = 0           # zipfile.ZIP_STORED
1904 TAR_GZIPPED = 8         # zipfile.ZIP_DEFLATED
1905 class TarFileCompat:
1906     """TarFile class compatible with standard module zipfile's
1907        ZipFile class.
1908     """
1909     def __init__(self, file, mode="r", compression=TAR_PLAIN):
1910         if compression == TAR_PLAIN:
1911             self.tarfile = TarFile.taropen(file, mode)
1912         elif compression == TAR_GZIPPED:
1913             self.tarfile = TarFile.gzopen(file, mode)
1914         else:
1915             raise ValueError, "unknown compression constant"
1916         if mode[0:1] == "r":
1917             members = self.tarfile.getmembers()
1918             for i in xrange(len(members)):
1919                 m = members[i]
1920                 m.filename = m.name
1921                 m.file_size = m.size
1922                 m.date_time = time.gmtime(m.mtime)[:6]
1923     def namelist(self):
1924         return map(lambda m: m.name, self.infolist())
1925     def infolist(self):
1926         return filter(lambda m: m.type in REGULAR_TYPES,
1927                       self.tarfile.getmembers())
1928     def printdir(self):
1929         self.tarfile.list()
1930     def testzip(self):
1931         return
1932     def getinfo(self, name):
1933         return self.tarfile.getmember(name)
1934     def read(self, name):
1935         return self.tarfile.extractfile(self.tarfile.getmember(name)).read()
1936     def write(self, filename, arcname=None, compress_type=None):
1937         self.tarfile.add(filename, arcname)
1938     def writestr(self, zinfo, bytes):
1939         import StringIO
1940         import calendar
1941         zinfo.name = zinfo.filename
1942         zinfo.size = zinfo.file_size
1943         zinfo.mtime = calendar.timegm(zinfo.date_time)
1944         self.tarfile.addfile(zinfo, StringIO.StringIO(bytes))
1945     def close(self):
1946         self.tarfile.close()
1947 #class TarFileCompat
1948 
1949 #--------------------
1950 # exported functions
1951 #--------------------
1952 def is_tarfile(name):
1953     """Return True if name points to a tar archive that we
1954        are able to handle, else return False.
1955     """
1956     try:
1957         t = open(name)
1958         t.close()
1959         return True
1960     except TarError:
1961         return False
1962 
1963 open = TarFile.open
1964 

Generated by PyXR 0.9.4
SourceForge.net Logo