PyXR

c:\python24\lib \ email \ Utils.py


0001 # Copyright (C) 2001-2004 Python Software Foundation
0002 # Author: Barry Warsaw
0003 # Contact: email-sig@python.org
0004 
0005 """Miscellaneous utilities."""
0006 
0007 import os
0008 import re
0009 import time
0010 import base64
0011 import random
0012 import socket
0013 import warnings
0014 from cStringIO import StringIO
0015 
0016 from email._parseaddr import quote
0017 from email._parseaddr import AddressList as _AddressList
0018 from email._parseaddr import mktime_tz
0019 
0020 # We need wormarounds for bugs in these methods in older Pythons (see below)
0021 from email._parseaddr import parsedate as _parsedate
0022 from email._parseaddr import parsedate_tz as _parsedate_tz
0023 
0024 from quopri import decodestring as _qdecode
0025 
0026 # Intrapackage imports
0027 from email.Encoders import _bencode, _qencode
0028 
0029 COMMASPACE = ', '
0030 EMPTYSTRING = ''
0031 UEMPTYSTRING = u''
0032 CRLF = '\r\n'
0033 
0034 specialsre = re.compile(r'[][\\()<>@,:;".]')
0035 escapesre = re.compile(r'[][\\()"]')
0036 
0037 
0038 
0039 # Helpers
0040 
0041 def _identity(s):
0042     return s
0043 
0044 
0045 def _bdecode(s):
0046     # We can't quite use base64.encodestring() since it tacks on a "courtesy
0047     # newline".  Blech!
0048     if not s:
0049         return s
0050     value = base64.decodestring(s)
0051     if not s.endswith('\n') and value.endswith('\n'):
0052         return value[:-1]
0053     return value
0054 
0055 
0056 
0057 def fix_eols(s):
0058     """Replace all line-ending characters with \r\n."""
0059     # Fix newlines with no preceding carriage return
0060     s = re.sub(r'(?<!\r)\n', CRLF, s)
0061     # Fix carriage returns with no following newline
0062     s = re.sub(r'\r(?!\n)', CRLF, s)
0063     return s
0064 
0065 
0066 
0067 def formataddr(pair):
0068     """The inverse of parseaddr(), this takes a 2-tuple of the form
0069     (realname, email_address) and returns the string value suitable
0070     for an RFC 2822 From, To or Cc header.
0071 
0072     If the first element of pair is false, then the second element is
0073     returned unmodified.
0074     """
0075     name, address = pair
0076     if name:
0077         quotes = ''
0078         if specialsre.search(name):
0079             quotes = '"'
0080         name = escapesre.sub(r'\\\g<0>', name)
0081         return '%s%s%s <%s>' % (quotes, name, quotes, address)
0082     return address
0083 
0084 
0085 
0086 def getaddresses(fieldvalues):
0087     """Return a list of (REALNAME, EMAIL) for each fieldvalue."""
0088     all = COMMASPACE.join(fieldvalues)
0089     a = _AddressList(all)
0090     return a.addresslist
0091 
0092 
0093 
0094 ecre = re.compile(r'''
0095   =\?                   # literal =?
0096   (?P<charset>[^?]*?)   # non-greedy up to the next ? is the charset
0097   \?                    # literal ?
0098   (?P<encoding>[qb])    # either a "q" or a "b", case insensitive
0099   \?                    # literal ?
0100   (?P<atom>.*?)         # non-greedy up to the next ?= is the atom
0101   \?=                   # literal ?=
0102   ''', re.VERBOSE | re.IGNORECASE)
0103 
0104 
0105 
0106 def formatdate(timeval=None, localtime=False, usegmt=False):
0107     """Returns a date string as specified by RFC 2822, e.g.:
0108 
0109     Fri, 09 Nov 2001 01:08:47 -0000
0110 
0111     Optional timeval if given is a floating point time value as accepted by
0112     gmtime() and localtime(), otherwise the current time is used.
0113 
0114     Optional localtime is a flag that when True, interprets timeval, and
0115     returns a date relative to the local timezone instead of UTC, properly
0116     taking daylight savings time into account.
0117 
0118     Optional argument usegmt means that the timezone is written out as
0119     an ascii string, not numeric one (so "GMT" instead of "+0000"). This
0120     is needed for HTTP, and is only used when localtime==False.
0121     """
0122     # Note: we cannot use strftime() because that honors the locale and RFC
0123     # 2822 requires that day and month names be the English abbreviations.
0124     if timeval is None:
0125         timeval = time.time()
0126     if localtime:
0127         now = time.localtime(timeval)
0128         # Calculate timezone offset, based on whether the local zone has
0129         # daylight savings time, and whether DST is in effect.
0130         if time.daylight and now[-1]:
0131             offset = time.altzone
0132         else:
0133             offset = time.timezone
0134         hours, minutes = divmod(abs(offset), 3600)
0135         # Remember offset is in seconds west of UTC, but the timezone is in
0136         # minutes east of UTC, so the signs differ.
0137         if offset > 0:
0138             sign = '-'
0139         else:
0140             sign = '+'
0141         zone = '%s%02d%02d' % (sign, hours, minutes // 60)
0142     else:
0143         now = time.gmtime(timeval)
0144         # Timezone offset is always -0000
0145         if usegmt:
0146             zone = 'GMT'
0147         else:
0148             zone = '-0000'
0149     return '%s, %02d %s %04d %02d:%02d:%02d %s' % (
0150         ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'][now[6]],
0151         now[2],
0152         ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
0153          'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'][now[1] - 1],
0154         now[0], now[3], now[4], now[5],
0155         zone)
0156 
0157 
0158 
0159 def make_msgid(idstring=None):
0160     """Returns a string suitable for RFC 2822 compliant Message-ID, e.g:
0161 
0162     <20020201195627.33539.96671@nightshade.la.mastaler.com>
0163 
0164     Optional idstring if given is a string used to strengthen the
0165     uniqueness of the message id.
0166     """
0167     timeval = time.time()
0168     utcdate = time.strftime('%Y%m%d%H%M%S', time.gmtime(timeval))
0169     pid = os.getpid()
0170     randint = random.randrange(100000)
0171     if idstring is None:
0172         idstring = ''
0173     else:
0174         idstring = '.' + idstring
0175     idhost = socket.getfqdn()
0176     msgid = '<%s.%s.%s%s@%s>' % (utcdate, pid, randint, idstring, idhost)
0177     return msgid
0178 
0179 
0180 
0181 # These functions are in the standalone mimelib version only because they've
0182 # subsequently been fixed in the latest Python versions.  We use this to worm
0183 # around broken older Pythons.
0184 def parsedate(data):
0185     if not data:
0186         return None
0187     return _parsedate(data)
0188 
0189 
0190 def parsedate_tz(data):
0191     if not data:
0192         return None
0193     return _parsedate_tz(data)
0194 
0195 
0196 def parseaddr(addr):
0197     addrs = _AddressList(addr).addresslist
0198     if not addrs:
0199         return '', ''
0200     return addrs[0]
0201 
0202 
0203 # rfc822.unquote() doesn't properly de-backslash-ify in Python pre-2.3.
0204 def unquote(str):
0205     """Remove quotes from a string."""
0206     if len(str) > 1:
0207         if str.startswith('"') and str.endswith('"'):
0208             return str[1:-1].replace('\\\\', '\\').replace('\\"', '"')
0209         if str.startswith('<') and str.endswith('>'):
0210             return str[1:-1]
0211     return str
0212 
0213 
0214 
0215 # RFC2231-related functions - parameter encoding and decoding
0216 def decode_rfc2231(s):
0217     """Decode string according to RFC 2231"""
0218     import urllib
0219     parts = s.split("'", 2)
0220     if len(parts) == 1:
0221         return None, None, urllib.unquote(s)
0222     charset, language, s = parts
0223     return charset, language, urllib.unquote(s)
0224 
0225 
0226 def encode_rfc2231(s, charset=None, language=None):
0227     """Encode string according to RFC 2231.
0228 
0229     If neither charset nor language is given, then s is returned as-is.  If
0230     charset is given but not language, the string is encoded using the empty
0231     string for language.
0232     """
0233     import urllib
0234     s = urllib.quote(s, safe='')
0235     if charset is None and language is None:
0236         return s
0237     if language is None:
0238         language = ''
0239     return "%s'%s'%s" % (charset, language, s)
0240 
0241 
0242 rfc2231_continuation = re.compile(r'^(?P<name>\w+)\*((?P<num>[0-9]+)\*?)?$')
0243 
0244 def decode_params(params):
0245     """Decode parameters list according to RFC 2231.
0246 
0247     params is a sequence of 2-tuples containing (content type, string value).
0248     """
0249     new_params = []
0250     # maps parameter's name to a list of continuations
0251     rfc2231_params = {}
0252     # params is a sequence of 2-tuples containing (content_type, string value)
0253     name, value = params[0]
0254     new_params.append((name, value))
0255     # Cycle through each of the rest of the parameters.
0256     for name, value in params[1:]:
0257         value = unquote(value)
0258         mo = rfc2231_continuation.match(name)
0259         if mo:
0260             name, num = mo.group('name', 'num')
0261             if num is not None:
0262                 num = int(num)
0263             rfc2231_param1 = rfc2231_params.setdefault(name, [])
0264             rfc2231_param1.append((num, value))
0265         else:
0266             new_params.append((name, '"%s"' % quote(value)))
0267     if rfc2231_params:
0268         for name, continuations in rfc2231_params.items():
0269             value = []
0270             # Sort by number
0271             continuations.sort()
0272             # And now append all values in num order
0273             for num, continuation in continuations:
0274                 value.append(continuation)
0275             charset, language, value = decode_rfc2231(EMPTYSTRING.join(value))
0276             new_params.append(
0277                 (name, (charset, language, '"%s"' % quote(value))))
0278     return new_params
0279 
0280 def collapse_rfc2231_value(value, errors='replace',
0281                            fallback_charset='us-ascii'):
0282     if isinstance(value, tuple):
0283         rawval = unquote(value[2])
0284         charset = value[0] or 'us-ascii'
0285         try:
0286             return unicode(rawval, charset, errors)
0287         except LookupError:
0288             # XXX charset is unknown to Python.
0289             return unicode(rawval, fallback_charset, errors)
0290     else:
0291         return unquote(value)
0292
Generated by PyXR 0.9.4