PyXR

c:\python24\lib \ mimetypes.py



0001 """Guess the MIME type of a file.
0002 
0003 This module defines two useful functions:
0004 
0005 guess_type(url, strict=1) -- guess the MIME type and encoding of a URL.
0006 
0007 guess_extension(type, strict=1) -- guess the extension for a given MIME type.
0008 
0009 It also contains the following, for tuning the behavior:
0010 
0011 Data:
0012 
0013 knownfiles -- list of files to parse
0014 inited -- flag set when init() has been called
0015 suffix_map -- dictionary mapping suffixes to suffixes
0016 encodings_map -- dictionary mapping suffixes to encodings
0017 types_map -- dictionary mapping suffixes to types
0018 
0019 Functions:
0020 
0021 init([files]) -- parse a list of files, default knownfiles
0022 read_mime_types(file) -- parse one file, return a dictionary or None
0023 """
0024 
0025 import os
0026 import posixpath
0027 import urllib
0028 
0029 __all__ = [
0030     "guess_type","guess_extension","guess_all_extensions",
0031     "add_type","read_mime_types","init"
0032 ]
0033 
0034 knownfiles = [
0035     "/etc/mime.types",
0036     "/usr/local/etc/httpd/conf/mime.types",
0037     "/usr/local/lib/netscape/mime.types",
0038     "/usr/local/etc/httpd/conf/mime.types",     # Apache 1.2
0039     "/usr/local/etc/mime.types",                # Apache 1.3
0040     ]
0041 
0042 inited = False
0043 
0044 
0045 class MimeTypes:
0046     """MIME-types datastore.
0047 
0048     This datastore can handle information from mime.types-style files
0049     and supports basic determination of MIME type from a filename or
0050     URL, and can guess a reasonable extension given a MIME type.
0051     """
0052 
0053     def __init__(self, filenames=(), strict=True):
0054         if not inited:
0055             init()
0056         self.encodings_map = encodings_map.copy()
0057         self.suffix_map = suffix_map.copy()
0058         self.types_map = ({}, {}) # dict for (non-strict, strict)
0059         self.types_map_inv = ({}, {})
0060         for (ext, type) in types_map.items():
0061             self.add_type(type, ext, True)
0062         for (ext, type) in common_types.items():
0063             self.add_type(type, ext, False)
0064         for name in filenames:
0065             self.read(name, strict)
0066 
0067     def add_type(self, type, ext, strict=True):
0068         """Add a mapping between a type and an extension.
0069 
0070         When the extension is already known, the new
0071         type will replace the old one. When the type
0072         is already known the extension will be added
0073         to the list of known extensions.
0074 
0075         If strict is true, information will be added to
0076         list of standard types, else to the list of non-standard
0077         types.
0078         """
0079         self.types_map[strict][ext] = type
0080         exts = self.types_map_inv[strict].setdefault(type, [])
0081         if ext not in exts:
0082             exts.append(ext)
0083 
0084     def guess_type(self, url, strict=True):
0085         """Guess the type of a file based on its URL.
0086 
0087         Return value is a tuple (type, encoding) where type is None if
0088         the type can't be guessed (no or unknown suffix) or a string
0089         of the form type/subtype, usable for a MIME Content-type
0090         header; and encoding is None for no encoding or the name of
0091         the program used to encode (e.g. compress or gzip).  The
0092         mappings are table driven.  Encoding suffixes are case
0093         sensitive; type suffixes are first tried case sensitive, then
0094         case insensitive.
0095 
0096         The suffixes .tgz, .taz and .tz (case sensitive!) are all
0097         mapped to '.tar.gz'.  (This is table-driven too, using the
0098         dictionary suffix_map.)
0099 
0100         Optional `strict' argument when False adds a bunch of commonly found,
0101         but non-standard types.
0102         """
0103         scheme, url = urllib.splittype(url)
0104         if scheme == 'data':
0105             # syntax of data URLs:
0106             # dataurl   := "data:" [ mediatype ] [ ";base64" ] "," data
0107             # mediatype := [ type "/" subtype ] *( ";" parameter )
0108             # data      := *urlchar
0109             # parameter := attribute "=" value
0110             # type/subtype defaults to "text/plain"
0111             comma = url.find(',')
0112             if comma < 0:
0113                 # bad data URL
0114                 return None, None
0115             semi = url.find(';', 0, comma)
0116             if semi >= 0:
0117                 type = url[:semi]
0118             else:
0119                 type = url[:comma]
0120             if '=' in type or '/' not in type:
0121                 type = 'text/plain'
0122             return type, None           # never compressed, so encoding is None
0123         base, ext = posixpath.splitext(url)
0124         while ext in self.suffix_map:
0125             base, ext = posixpath.splitext(base + self.suffix_map[ext])
0126         if ext in self.encodings_map:
0127             encoding = self.encodings_map[ext]
0128             base, ext = posixpath.splitext(base)
0129         else:
0130             encoding = None
0131         types_map = self.types_map[True]
0132         if ext in types_map:
0133             return types_map[ext], encoding
0134         elif ext.lower() in types_map:
0135             return types_map[ext.lower()], encoding
0136         elif strict:
0137             return None, encoding
0138         types_map = self.types_map[False]
0139         if ext in types_map:
0140             return types_map[ext], encoding
0141         elif ext.lower() in types_map:
0142             return types_map[ext.lower()], encoding
0143         else:
0144             return None, encoding
0145 
0146     def guess_all_extensions(self, type, strict=True):
0147         """Guess the extensions for a file based on its MIME type.
0148 
0149         Return value is a list of strings giving the possible filename
0150         extensions, including the leading dot ('.').  The extension is not
0151         guaranteed to have been associated with any particular data stream,
0152         but would be mapped to the MIME type `type' by guess_type().
0153 
0154         Optional `strict' argument when false adds a bunch of commonly found,
0155         but non-standard types.
0156         """
0157         type = type.lower()
0158         extensions = self.types_map_inv[True].get(type, [])
0159         if not strict:
0160             for ext in self.types_map_inv[False].get(type, []):
0161                 if ext not in extensions:
0162                     extensions.append(ext)
0163         return extensions
0164 
0165     def guess_extension(self, type, strict=True):
0166         """Guess the extension for a file based on its MIME type.
0167 
0168         Return value is a string giving a filename extension,
0169         including the leading dot ('.').  The extension is not
0170         guaranteed to have been associated with any particular data
0171         stream, but would be mapped to the MIME type `type' by
0172         guess_type().  If no extension can be guessed for `type', None
0173         is returned.
0174 
0175         Optional `strict' argument when false adds a bunch of commonly found,
0176         but non-standard types.
0177         """
0178         extensions = self.guess_all_extensions(type, strict)
0179         if not extensions:
0180             return None
0181         return extensions[0]
0182 
0183     def read(self, filename, strict=True):
0184         """
0185         Read a single mime.types-format file, specified by pathname.
0186 
0187         If strict is true, information will be added to
0188         list of standard types, else to the list of non-standard
0189         types.
0190         """
0191         fp = open(filename)
0192         self.readfp(fp, strict)
0193         fp.close()
0194 
0195     def readfp(self, fp, strict=True):
0196         """
0197         Read a single mime.types-format file.
0198 
0199         If strict is true, information will be added to
0200         list of standard types, else to the list of non-standard
0201         types.
0202         """
0203         while 1:
0204             line = fp.readline()
0205             if not line:
0206                 break
0207             words = line.split()
0208             for i in range(len(words)):
0209                 if words[i][0] == '#':
0210                     del words[i:]
0211                     break
0212             if not words:
0213                 continue
0214             type, suffixes = words[0], words[1:]
0215             for suff in suffixes:
0216                 self.add_type(type, '.' + suff, strict)
0217 
0218 def guess_type(url, strict=True):
0219     """Guess the type of a file based on its URL.
0220 
0221     Return value is a tuple (type, encoding) where type is None if the
0222     type can't be guessed (no or unknown suffix) or a string of the
0223     form type/subtype, usable for a MIME Content-type header; and
0224     encoding is None for no encoding or the name of the program used
0225     to encode (e.g. compress or gzip).  The mappings are table
0226     driven.  Encoding suffixes are case sensitive; type suffixes are
0227     first tried case sensitive, then case insensitive.
0228 
0229     The suffixes .tgz, .taz and .tz (case sensitive!) are all mapped
0230     to ".tar.gz".  (This is table-driven too, using the dictionary
0231     suffix_map).
0232 
0233     Optional `strict' argument when false adds a bunch of commonly found, but
0234     non-standard types.
0235     """
0236     init()
0237     return guess_type(url, strict)
0238 
0239 
0240 def guess_all_extensions(type, strict=True):
0241     """Guess the extensions for a file based on its MIME type.
0242 
0243     Return value is a list of strings giving the possible filename
0244     extensions, including the leading dot ('.').  The extension is not
0245     guaranteed to have been associated with any particular data
0246     stream, but would be mapped to the MIME type `type' by
0247     guess_type().  If no extension can be guessed for `type', None
0248     is returned.
0249 
0250     Optional `strict' argument when false adds a bunch of commonly found,
0251     but non-standard types.
0252     """
0253     init()
0254     return guess_all_extensions(type, strict)
0255 
0256 def guess_extension(type, strict=True):
0257     """Guess the extension for a file based on its MIME type.
0258 
0259     Return value is a string giving a filename extension, including the
0260     leading dot ('.').  The extension is not guaranteed to have been
0261     associated with any particular data stream, but would be mapped to the
0262     MIME type `type' by guess_type().  If no extension can be guessed for
0263     `type', None is returned.
0264 
0265     Optional `strict' argument when false adds a bunch of commonly found,
0266     but non-standard types.
0267     """
0268     init()
0269     return guess_extension(type, strict)
0270 
0271 def add_type(type, ext, strict=True):
0272     """Add a mapping between a type and an extension.
0273 
0274     When the extension is already known, the new
0275     type will replace the old one. When the type
0276     is already known the extension will be added
0277     to the list of known extensions.
0278 
0279     If strict is true, information will be added to
0280     list of standard types, else to the list of non-standard
0281     types.
0282     """
0283     init()
0284     return add_type(type, ext, strict)
0285 
0286 
0287 def init(files=None):
0288     global guess_all_extensions, guess_extension, guess_type
0289     global suffix_map, types_map, encodings_map, common_types
0290     global add_type, inited
0291     inited = True
0292     db = MimeTypes()
0293     if files is None:
0294         files = knownfiles
0295     for file in files:
0296         if os.path.isfile(file):
0297             db.readfp(open(file))
0298     encodings_map = db.encodings_map
0299     suffix_map = db.suffix_map
0300     types_map = db.types_map[True]
0301     guess_all_extensions = db.guess_all_extensions
0302     guess_extension = db.guess_extension
0303     guess_type = db.guess_type
0304     add_type = db.add_type
0305     common_types = db.types_map[False]
0306 
0307 
0308 def read_mime_types(file):
0309     try:
0310         f = open(file)
0311     except IOError:
0312         return None
0313     db = MimeTypes()
0314     db.readfp(f, True)
0315     return db.types_map[True]
0316 
0317 
0318 suffix_map = {
0319     '.tgz': '.tar.gz',
0320     '.taz': '.tar.gz',
0321     '.tz': '.tar.gz',
0322     }
0323 
0324 encodings_map = {
0325     '.gz': 'gzip',
0326     '.Z': 'compress',
0327     }
0328 
0329 # Before adding new types, make sure they are either registered with IANA, at
0330 # http://www.isi.edu/in-notes/iana/assignments/media-types
0331 # or extensions, i.e. using the x- prefix
0332 
0333 # If you add to these, please keep them sorted!
0334 types_map = {
0335     '.a'      : 'application/octet-stream',
0336     '.ai'     : 'application/postscript',
0337     '.aif'    : 'audio/x-aiff',
0338     '.aifc'   : 'audio/x-aiff',
0339     '.aiff'   : 'audio/x-aiff',
0340     '.au'     : 'audio/basic',
0341     '.avi'    : 'video/x-msvideo',
0342     '.bat'    : 'text/plain',
0343     '.bcpio'  : 'application/x-bcpio',
0344     '.bin'    : 'application/octet-stream',
0345     '.bmp'    : 'image/x-ms-bmp',
0346     '.c'      : 'text/plain',
0347     # Duplicates :(
0348     '.cdf'    : 'application/x-cdf',
0349     '.cdf'    : 'application/x-netcdf',
0350     '.cpio'   : 'application/x-cpio',
0351     '.csh'    : 'application/x-csh',
0352     '.css'    : 'text/css',
0353     '.dll'    : 'application/octet-stream',
0354     '.doc'    : 'application/msword',
0355     '.dot'    : 'application/msword',
0356     '.dvi'    : 'application/x-dvi',
0357     '.eml'    : 'message/rfc822',
0358     '.eps'    : 'application/postscript',
0359     '.etx'    : 'text/x-setext',
0360     '.exe'    : 'application/octet-stream',
0361     '.gif'    : 'image/gif',
0362     '.gtar'   : 'application/x-gtar',
0363     '.h'      : 'text/plain',
0364     '.hdf'    : 'application/x-hdf',
0365     '.htm'    : 'text/html',
0366     '.html'   : 'text/html',
0367     '.ief'    : 'image/ief',
0368     '.jpe'    : 'image/jpeg',
0369     '.jpeg'   : 'image/jpeg',
0370     '.jpg'    : 'image/jpeg',
0371     '.js'     : 'application/x-javascript',
0372     '.ksh'    : 'text/plain',
0373     '.latex'  : 'application/x-latex',
0374     '.m1v'    : 'video/mpeg',
0375     '.man'    : 'application/x-troff-man',
0376     '.me'     : 'application/x-troff-me',
0377     '.mht'    : 'message/rfc822',
0378     '.mhtml'  : 'message/rfc822',
0379     '.mif'    : 'application/x-mif',
0380     '.mov'    : 'video/quicktime',
0381     '.movie'  : 'video/x-sgi-movie',
0382     '.mp2'    : 'audio/mpeg',
0383     '.mp3'    : 'audio/mpeg',
0384     '.mpa'    : 'video/mpeg',
0385     '.mpe'    : 'video/mpeg',
0386     '.mpeg'   : 'video/mpeg',
0387     '.mpg'    : 'video/mpeg',
0388     '.ms'     : 'application/x-troff-ms',
0389     '.nc'     : 'application/x-netcdf',
0390     '.nws'    : 'message/rfc822',
0391     '.o'      : 'application/octet-stream',
0392     '.obj'    : 'application/octet-stream',
0393     '.oda'    : 'application/oda',
0394     '.p12'    : 'application/x-pkcs12',
0395     '.p7c'    : 'application/pkcs7-mime',
0396     '.pbm'    : 'image/x-portable-bitmap',
0397     '.pdf'    : 'application/pdf',
0398     '.pfx'    : 'application/x-pkcs12',
0399     '.pgm'    : 'image/x-portable-graymap',
0400     '.pl'     : 'text/plain',
0401     '.png'    : 'image/png',
0402     '.pnm'    : 'image/x-portable-anymap',
0403     '.pot'    : 'application/vnd.ms-powerpoint',
0404     '.ppa'    : 'application/vnd.ms-powerpoint',
0405     '.ppm'    : 'image/x-portable-pixmap',
0406     '.pps'    : 'application/vnd.ms-powerpoint',
0407     '.ppt'    : 'application/vnd.ms-powerpoint',
0408     '.ps'     : 'application/postscript',
0409     '.pwz'    : 'application/vnd.ms-powerpoint',
0410     '.py'     : 'text/x-python',
0411     '.pyc'    : 'application/x-python-code',
0412     '.pyo'    : 'application/x-python-code',
0413     '.qt'     : 'video/quicktime',
0414     '.ra'     : 'audio/x-pn-realaudio',
0415     '.ram'    : 'application/x-pn-realaudio',
0416     '.ras'    : 'image/x-cmu-raster',
0417     '.rdf'    : 'application/xml',
0418     '.rgb'    : 'image/x-rgb',
0419     '.roff'   : 'application/x-troff',
0420     '.rtx'    : 'text/richtext',
0421     '.sgm'    : 'text/x-sgml',
0422     '.sgml'   : 'text/x-sgml',
0423     '.sh'     : 'application/x-sh',
0424     '.shar'   : 'application/x-shar',
0425     '.snd'    : 'audio/basic',
0426     '.so'     : 'application/octet-stream',
0427     '.src'    : 'application/x-wais-source',
0428     '.sv4cpio': 'application/x-sv4cpio',
0429     '.sv4crc' : 'application/x-sv4crc',
0430     '.swf'    : 'application/x-shockwave-flash',
0431     '.t'      : 'application/x-troff',
0432     '.tar'    : 'application/x-tar',
0433     '.tcl'    : 'application/x-tcl',
0434     '.tex'    : 'application/x-tex',
0435     '.texi'   : 'application/x-texinfo',
0436     '.texinfo': 'application/x-texinfo',
0437     '.tif'    : 'image/tiff',
0438     '.tiff'   : 'image/tiff',
0439     '.tr'     : 'application/x-troff',
0440     '.tsv'    : 'text/tab-separated-values',
0441     '.txt'    : 'text/plain',
0442     '.ustar'  : 'application/x-ustar',
0443     '.vcf'    : 'text/x-vcard',
0444     '.wav'    : 'audio/x-wav',
0445     '.wiz'    : 'application/msword',
0446     '.xbm'    : 'image/x-xbitmap',
0447     '.xlb'    : 'application/vnd.ms-excel',
0448     # Duplicates :(
0449     '.xls'    : 'application/excel',
0450     '.xls'    : 'application/vnd.ms-excel',
0451     '.xml'    : 'text/xml',
0452     '.xpm'    : 'image/x-xpixmap',
0453     '.xsl'    : 'application/xml',
0454     '.xwd'    : 'image/x-xwindowdump',
0455     '.zip'    : 'application/zip',
0456     }
0457 
0458 # These are non-standard types, commonly found in the wild.  They will only
0459 # match if strict=0 flag is given to the API methods.
0460 
0461 # Please sort these too
0462 common_types = {
0463     '.jpg' : 'image/jpg',
0464     '.mid' : 'audio/midi',
0465     '.midi': 'audio/midi',
0466     '.pct' : 'image/pict',
0467     '.pic' : 'image/pict',
0468     '.pict': 'image/pict',
0469     '.rtf' : 'application/rtf',
0470     '.xul' : 'text/xul'
0471     }
0472 
0473 
0474 if __name__ == '__main__':
0475     import sys
0476     import getopt
0477 
0478     USAGE = """\
0479 Usage: mimetypes.py [options] type
0480 
0481 Options:
0482     --help / -h       -- print this message and exit
0483     --lenient / -l    -- additionally search of some common, but non-standard
0484                          types.
0485     --extension / -e  -- guess extension instead of type
0486 
0487 More than one type argument may be given.
0488 """
0489 
0490     def usage(code, msg=''):
0491         print USAGE
0492         if msg: print msg
0493         sys.exit(code)
0494 
0495     try:
0496         opts, args = getopt.getopt(sys.argv[1:], 'hle',
0497                                    ['help', 'lenient', 'extension'])
0498     except getopt.error, msg:
0499         usage(1, msg)
0500 
0501     strict = 1
0502     extension = 0
0503     for opt, arg in opts:
0504         if opt in ('-h', '--help'):
0505             usage(0)
0506         elif opt in ('-l', '--lenient'):
0507             strict = 0
0508         elif opt in ('-e', '--extension'):
0509             extension = 1
0510     for gtype in args:
0511         if extension:
0512             guess = guess_extension(gtype, strict)
0513             if not guess: print "I don't know anything about type", gtype
0514             else: print guess
0515         else:
0516             guess, encoding = guess_type(gtype, strict)
0517             if not guess: print "I don't know anything about type", gtype
0518             else: print 'type:', guess, 'encoding:', encoding
0519 

Generated by PyXR 0.9.4
SourceForge.net Logo