PyXR

c:\python24\lib \ email \ Generator.py


0001 # Copyright (C) 2001-2004 Python Software Foundation
0002 # Author: Barry Warsaw
0003 # Contact: email-sig@python.org
0004 
0005 """Classes to generate plain text from a message object tree."""
0006 
0007 import re
0008 import sys
0009 import time
0010 import random
0011 import warnings
0012 from cStringIO import StringIO
0013 
0014 from email.Header import Header
0015 
0016 UNDERSCORE = '_'
0017 NL = '\n'
0018 
0019 fcre = re.compile(r'^From ', re.MULTILINE)
0020 
0021 def _is8bitstring(s):
0022     if isinstance(s, str):
0023         try:
0024             unicode(s, 'us-ascii')
0025         except UnicodeError:
0026             return True
0027     return False
0028 
0029 
0030 
0031 class Generator:
0032     """Generates output from a Message object tree.
0033 
0034     This basic generator writes the message to the given file object as plain
0035     text.
0036     """
0037     #
0038     # Public interface
0039     #
0040 
0041     def __init__(self, outfp, mangle_from_=True, maxheaderlen=78):
0042         """Create the generator for message flattening.
0043 
0044         outfp is the output file-like object for writing the message to.  It
0045         must have a write() method.
0046 
0047         Optional mangle_from_ is a flag that, when True (the default), escapes
0048         From_ lines in the body of the message by putting a `>' in front of
0049         them.
0050 
0051         Optional maxheaderlen specifies the longest length for a non-continued
0052         header.  When a header line is longer (in characters, with tabs
0053         expanded to 8 spaces) than maxheaderlen, the header will split as
0054         defined in the Header class.  Set maxheaderlen to zero to disable
0055         header wrapping.  The default is 78, as recommended (but not required)
0056         by RFC 2822.
0057         """
0058         self._fp = outfp
0059         self._mangle_from_ = mangle_from_
0060         self._maxheaderlen = maxheaderlen
0061 
0062     def write(self, s):
0063         # Just delegate to the file object
0064         self._fp.write(s)
0065 
0066     def flatten(self, msg, unixfrom=False):
0067         """Print the message object tree rooted at msg to the output file
0068         specified when the Generator instance was created.
0069 
0070         unixfrom is a flag that forces the printing of a Unix From_ delimiter
0071         before the first object in the message tree.  If the original message
0072         has no From_ delimiter, a `standard' one is crafted.  By default, this
0073         is False to inhibit the printing of any From_ delimiter.
0074 
0075         Note that for subobjects, no From_ line is printed.
0076         """
0077         if unixfrom:
0078             ufrom = msg.get_unixfrom()
0079             if not ufrom:
0080                 ufrom = 'From nobody ' + time.ctime(time.time())
0081             print >> self._fp, ufrom
0082         self._write(msg)
0083 
0084     # For backwards compatibility, but this is slower
0085     def __call__(self, msg, unixfrom=False):
0086         warnings.warn('__call__() deprecated; use flatten()',
0087                       DeprecationWarning, 2)
0088         self.flatten(msg, unixfrom)
0089 
0090     def clone(self, fp):
0091         """Clone this generator with the exact same options."""
0092         return self.__class__(fp, self._mangle_from_, self._maxheaderlen)
0093 
0094     #
0095     # Protected interface - undocumented ;/
0096     #
0097 
0098     def _write(self, msg):
0099         # We can't write the headers yet because of the following scenario:
0100         # say a multipart message includes the boundary string somewhere in
0101         # its body.  We'd have to calculate the new boundary /before/ we write
0102         # the headers so that we can write the correct Content-Type:
0103         # parameter.
0104         #
0105         # The way we do this, so as to make the _handle_*() methods simpler,
0106         # is to cache any subpart writes into a StringIO.  The we write the
0107         # headers and the StringIO contents.  That way, subpart handlers can
0108         # Do The Right Thing, and can still modify the Content-Type: header if
0109         # necessary.
0110         oldfp = self._fp
0111         try:
0112             self._fp = sfp = StringIO()
0113             self._dispatch(msg)
0114         finally:
0115             self._fp = oldfp
0116         # Write the headers.  First we see if the message object wants to
0117         # handle that itself.  If not, we'll do it generically.
0118         meth = getattr(msg, '_write_headers', None)
0119         if meth is None:
0120             self._write_headers(msg)
0121         else:
0122             meth(self)
0123         self._fp.write(sfp.getvalue())
0124 
0125     def _dispatch(self, msg):
0126         # Get the Content-Type: for the message, then try to dispatch to
0127         # self._handle_<maintype>_<subtype>().  If there's no handler for the
0128         # full MIME type, then dispatch to self._handle_<maintype>().  If
0129         # that's missing too, then dispatch to self._writeBody().
0130         main = msg.get_content_maintype()
0131         sub = msg.get_content_subtype()
0132         specific = UNDERSCORE.join((main, sub)).replace('-', '_')
0133         meth = getattr(self, '_handle_' + specific, None)
0134         if meth is None:
0135             generic = main.replace('-', '_')
0136             meth = getattr(self, '_handle_' + generic, None)
0137             if meth is None:
0138                 meth = self._writeBody
0139         meth(msg)
0140 
0141     #
0142     # Default handlers
0143     #
0144 
0145     def _write_headers(self, msg):
0146         for h, v in msg.items():
0147             print >> self._fp, '%s:' % h,
0148             if self._maxheaderlen == 0:
0149                 # Explicit no-wrapping
0150                 print >> self._fp, v
0151             elif isinstance(v, Header):
0152                 # Header instances know what to do
0153                 print >> self._fp, v.encode()
0154             elif _is8bitstring(v):
0155                 # If we have raw 8bit data in a byte string, we have no idea
0156                 # what the encoding is.  There is no safe way to split this
0157                 # string.  If it's ascii-subset, then we could do a normal
0158                 # ascii split, but if it's multibyte then we could break the
0159                 # string.  There's no way to know so the least harm seems to
0160                 # be to not split the string and risk it being too long.
0161                 print >> self._fp, v
0162             else:
0163                 # Header's got lots of smarts, so use it.
0164                 print >> self._fp, Header(
0165                     v, maxlinelen=self._maxheaderlen,
0166                     header_name=h, continuation_ws='\t').encode()
0167         # A blank line always separates headers from body
0168         print >> self._fp
0169 
0170     #
0171     # Handlers for writing types and subtypes
0172     #
0173 
0174     def _handle_text(self, msg):
0175         payload = msg.get_payload()
0176         if payload is None:
0177             return
0178         cset = msg.get_charset()
0179         if cset is not None:
0180             payload = cset.body_encode(payload)
0181         if not isinstance(payload, basestring):
0182             raise TypeError('string payload expected: %s' % type(payload))
0183         if self._mangle_from_:
0184             payload = fcre.sub('>From ', payload)
0185         self._fp.write(payload)
0186 
0187     # Default body handler
0188     _writeBody = _handle_text
0189 
0190     def _handle_multipart(self, msg):
0191         # The trick here is to write out each part separately, merge them all
0192         # together, and then make sure that the boundary we've chosen isn't
0193         # present in the payload.
0194         msgtexts = []
0195         subparts = msg.get_payload()
0196         if subparts is None:
0197             subparts = []
0198         elif isinstance(subparts, basestring):
0199             # e.g. a non-strict parse of a message with no starting boundary.
0200             self._fp.write(subparts)
0201             return
0202         elif not isinstance(subparts, list):
0203             # Scalar payload
0204             subparts = [subparts]
0205         for part in subparts:
0206             s = StringIO()
0207             g = self.clone(s)
0208             g.flatten(part, unixfrom=False)
0209             msgtexts.append(s.getvalue())
0210         # Now make sure the boundary we've selected doesn't appear in any of
0211         # the message texts.
0212         alltext = NL.join(msgtexts)
0213         # BAW: What about boundaries that are wrapped in double-quotes?
0214         boundary = msg.get_boundary(failobj=_make_boundary(alltext))
0215         # If we had to calculate a new boundary because the body text
0216         # contained that string, set the new boundary.  We don't do it
0217         # unconditionally because, while set_boundary() preserves order, it
0218         # doesn't preserve newlines/continuations in headers.  This is no big
0219         # deal in practice, but turns out to be inconvenient for the unittest
0220         # suite.
0221         if msg.get_boundary() <> boundary:
0222             msg.set_boundary(boundary)
0223         # If there's a preamble, write it out, with a trailing CRLF
0224         if msg.preamble is not None:
0225             print >> self._fp, msg.preamble
0226         # dash-boundary transport-padding CRLF
0227         print >> self._fp, '--' + boundary
0228         # body-part
0229         if msgtexts:
0230             self._fp.write(msgtexts.pop(0))
0231         # *encapsulation
0232         # --> delimiter transport-padding
0233         # --> CRLF body-part
0234         for body_part in msgtexts:
0235             # delimiter transport-padding CRLF
0236             print >> self._fp, '\n--' + boundary
0237             # body-part
0238             self._fp.write(body_part)
0239         # close-delimiter transport-padding
0240         self._fp.write('\n--' + boundary + '--')
0241         if msg.epilogue is not None:
0242             print >> self._fp
0243             self._fp.write(msg.epilogue)
0244 
0245     def _handle_message_delivery_status(self, msg):
0246         # We can't just write the headers directly to self's file object
0247         # because this will leave an extra newline between the last header
0248         # block and the boundary.  Sigh.
0249         blocks = []
0250         for part in msg.get_payload():
0251             s = StringIO()
0252             g = self.clone(s)
0253             g.flatten(part, unixfrom=False)
0254             text = s.getvalue()
0255             lines = text.split('\n')
0256             # Strip off the unnecessary trailing empty line
0257             if lines and lines[-1] == '':
0258                 blocks.append(NL.join(lines[:-1]))
0259             else:
0260                 blocks.append(text)
0261         # Now join all the blocks with an empty line.  This has the lovely
0262         # effect of separating each block with an empty line, but not adding
0263         # an extra one after the last one.
0264         self._fp.write(NL.join(blocks))
0265 
0266     def _handle_message(self, msg):
0267         s = StringIO()
0268         g = self.clone(s)
0269         # The payload of a message/rfc822 part should be a multipart sequence
0270         # of length 1.  The zeroth element of the list should be the Message
0271         # object for the subpart.  Extract that object, stringify it, and
0272         # write it out.
0273         g.flatten(msg.get_payload(0), unixfrom=False)
0274         self._fp.write(s.getvalue())
0275 
0276 
0277 
0278 _FMT = '[Non-text (%(type)s) part of message omitted, filename %(filename)s]'
0279 
0280 class DecodedGenerator(Generator):
0281     """Generator a text representation of a message.
0282 
0283     Like the Generator base class, except that non-text parts are substituted
0284     with a format string representing the part.
0285     """
0286     def __init__(self, outfp, mangle_from_=True, maxheaderlen=78, fmt=None):
0287         """Like Generator.__init__() except that an additional optional
0288         argument is allowed.
0289 
0290         Walks through all subparts of a message.  If the subpart is of main
0291         type `text', then it prints the decoded payload of the subpart.
0292 
0293         Otherwise, fmt is a format string that is used instead of the message
0294         payload.  fmt is expanded with the following keywords (in
0295         %(keyword)s format):
0296 
0297         type       : Full MIME type of the non-text part
0298         maintype   : Main MIME type of the non-text part
0299         subtype    : Sub-MIME type of the non-text part
0300         filename   : Filename of the non-text part
0301         description: Description associated with the non-text part
0302         encoding   : Content transfer encoding of the non-text part
0303 
0304         The default value for fmt is None, meaning
0305 
0306         [Non-text (%(type)s) part of message omitted, filename %(filename)s]
0307         """
0308         Generator.__init__(self, outfp, mangle_from_, maxheaderlen)
0309         if fmt is None:
0310             self._fmt = _FMT
0311         else:
0312             self._fmt = fmt
0313 
0314     def _dispatch(self, msg):
0315         for part in msg.walk():
0316             maintype = part.get_content_maintype()
0317             if maintype == 'text':
0318                 print >> self, part.get_payload(decode=True)
0319             elif maintype == 'multipart':
0320                 # Just skip this
0321                 pass
0322             else:
0323                 print >> self, self._fmt % {
0324                     'type'       : part.get_content_type(),
0325                     'maintype'   : part.get_content_maintype(),
0326                     'subtype'    : part.get_content_subtype(),
0327                     'filename'   : part.get_filename('[no filename]'),
0328                     'description': part.get('Content-Description',
0329                                             '[no description]'),
0330                     'encoding'   : part.get('Content-Transfer-Encoding',
0331                                             '[no encoding]'),
0332                     }
0333 
0334 
0335 
0336 # Helper
0337 _width = len(repr(sys.maxint-1))
0338 _fmt = '%%0%dd' % _width
0339 
0340 def _make_boundary(text=None):
0341     # Craft a random boundary.  If text is given, ensure that the chosen
0342     # boundary doesn't appear in the text.
0343     token = random.randrange(sys.maxint)
0344     boundary = ('=' * 15) + (_fmt % token) + '=='
0345     if text is None:
0346         return boundary
0347     b = boundary
0348     counter = 0
0349     while True:
0350         cre = re.compile('^--' + re.escape(b) + '(--)?$', re.MULTILINE)
0351         if not cre.search(text):
0352             break
0353         b = boundary + '.' + str(counter)
0354         counter += 1
0355     return b
0356
Generated by PyXR 0.9.4