0001 # Copyright (C) 2001-2004 Python Software Foundation 0002 # Author: Barry Warsaw 0003 # Contact: email-sig@python.org 0004 0005 """Classes to generate plain text from a message object tree.""" 0006 0007 import re 0008 import sys 0009 import time 0010 import random 0011 import warnings 0012 from cStringIO import StringIO 0013 0014 from email.Header import Header 0015 0016 UNDERSCORE = '_' 0017 NL = '\n' 0018 0019 fcre = re.compile(r'^From ', re.MULTILINE) 0020 0021 def _is8bitstring(s): 0022 if isinstance(s, str): 0023 try: 0024 unicode(s, 'us-ascii') 0025 except UnicodeError: 0026 return True 0027 return False 0028 0029 0030 0031 class Generator: 0032 """Generates output from a Message object tree. 0033 0034 This basic generator writes the message to the given file object as plain 0035 text. 0036 """ 0037 # 0038 # Public interface 0039 # 0040 0041 def __init__(self, outfp, mangle_from_=True, maxheaderlen=78): 0042 """Create the generator for message flattening. 0043 0044 outfp is the output file-like object for writing the message to. It 0045 must have a write() method. 0046 0047 Optional mangle_from_ is a flag that, when True (the default), escapes 0048 From_ lines in the body of the message by putting a `>' in front of 0049 them. 0050 0051 Optional maxheaderlen specifies the longest length for a non-continued 0052 header. When a header line is longer (in characters, with tabs 0053 expanded to 8 spaces) than maxheaderlen, the header will split as 0054 defined in the Header class. Set maxheaderlen to zero to disable 0055 header wrapping. The default is 78, as recommended (but not required) 0056 by RFC 2822. 0057 """ 0058 self._fp = outfp 0059 self._mangle_from_ = mangle_from_ 0060 self._maxheaderlen = maxheaderlen 0061 0062 def write(self, s): 0063 # Just delegate to the file object 0064 self._fp.write(s) 0065 0066 def flatten(self, msg, unixfrom=False): 0067 """Print the message object tree rooted at msg to the output file 0068 specified when the Generator instance was created. 0069 0070 unixfrom is a flag that forces the printing of a Unix From_ delimiter 0071 before the first object in the message tree. If the original message 0072 has no From_ delimiter, a `standard' one is crafted. By default, this 0073 is False to inhibit the printing of any From_ delimiter. 0074 0075 Note that for subobjects, no From_ line is printed. 0076 """ 0077 if unixfrom: 0078 ufrom = msg.get_unixfrom() 0079 if not ufrom: 0080 ufrom = 'From nobody ' + time.ctime(time.time()) 0081 print >> self._fp, ufrom 0082 self._write(msg) 0083 0084 # For backwards compatibility, but this is slower 0085 def __call__(self, msg, unixfrom=False): 0086 warnings.warn('__call__() deprecated; use flatten()', 0087 DeprecationWarning, 2) 0088 self.flatten(msg, unixfrom) 0089 0090 def clone(self, fp): 0091 """Clone this generator with the exact same options.""" 0092 return self.__class__(fp, self._mangle_from_, self._maxheaderlen) 0093 0094 # 0095 # Protected interface - undocumented ;/ 0096 # 0097 0098 def _write(self, msg): 0099 # We can't write the headers yet because of the following scenario: 0100 # say a multipart message includes the boundary string somewhere in 0101 # its body. We'd have to calculate the new boundary /before/ we write 0102 # the headers so that we can write the correct Content-Type: 0103 # parameter. 0104 # 0105 # The way we do this, so as to make the _handle_*() methods simpler, 0106 # is to cache any subpart writes into a StringIO. The we write the 0107 # headers and the StringIO contents. That way, subpart handlers can 0108 # Do The Right Thing, and can still modify the Content-Type: header if 0109 # necessary. 0110 oldfp = self._fp 0111 try: 0112 self._fp = sfp = StringIO() 0113 self._dispatch(msg) 0114 finally: 0115 self._fp = oldfp 0116 # Write the headers. First we see if the message object wants to 0117 # handle that itself. If not, we'll do it generically. 0118 meth = getattr(msg, '_write_headers', None) 0119 if meth is None: 0120 self._write_headers(msg) 0121 else: 0122 meth(self) 0123 self._fp.write(sfp.getvalue()) 0124 0125 def _dispatch(self, msg): 0126 # Get the Content-Type: for the message, then try to dispatch to 0127 # self._handle_<maintype>_<subtype>(). If there's no handler for the 0128 # full MIME type, then dispatch to self._handle_<maintype>(). If 0129 # that's missing too, then dispatch to self._writeBody(). 0130 main = msg.get_content_maintype() 0131 sub = msg.get_content_subtype() 0132 specific = UNDERSCORE.join((main, sub)).replace('-', '_') 0133 meth = getattr(self, '_handle_' + specific, None) 0134 if meth is None: 0135 generic = main.replace('-', '_') 0136 meth = getattr(self, '_handle_' + generic, None) 0137 if meth is None: 0138 meth = self._writeBody 0139 meth(msg) 0140 0141 # 0142 # Default handlers 0143 # 0144 0145 def _write_headers(self, msg): 0146 for h, v in msg.items(): 0147 print >> self._fp, '%s:' % h, 0148 if self._maxheaderlen == 0: 0149 # Explicit no-wrapping 0150 print >> self._fp, v 0151 elif isinstance(v, Header): 0152 # Header instances know what to do 0153 print >> self._fp, v.encode() 0154 elif _is8bitstring(v): 0155 # If we have raw 8bit data in a byte string, we have no idea 0156 # what the encoding is. There is no safe way to split this 0157 # string. If it's ascii-subset, then we could do a normal 0158 # ascii split, but if it's multibyte then we could break the 0159 # string. There's no way to know so the least harm seems to 0160 # be to not split the string and risk it being too long. 0161 print >> self._fp, v 0162 else: 0163 # Header's got lots of smarts, so use it. 0164 print >> self._fp, Header( 0165 v, maxlinelen=self._maxheaderlen, 0166 header_name=h, continuation_ws='\t').encode() 0167 # A blank line always separates headers from body 0168 print >> self._fp 0169 0170 # 0171 # Handlers for writing types and subtypes 0172 # 0173 0174 def _handle_text(self, msg): 0175 payload = msg.get_payload() 0176 if payload is None: 0177 return 0178 cset = msg.get_charset() 0179 if cset is not None: 0180 payload = cset.body_encode(payload) 0181 if not isinstance(payload, basestring): 0182 raise TypeError('string payload expected: %s' % type(payload)) 0183 if self._mangle_from_: 0184 payload = fcre.sub('>From ', payload) 0185 self._fp.write(payload) 0186 0187 # Default body handler 0188 _writeBody = _handle_text 0189 0190 def _handle_multipart(self, msg): 0191 # The trick here is to write out each part separately, merge them all 0192 # together, and then make sure that the boundary we've chosen isn't 0193 # present in the payload. 0194 msgtexts = [] 0195 subparts = msg.get_payload() 0196 if subparts is None: 0197 subparts = [] 0198 elif isinstance(subparts, basestring): 0199 # e.g. a non-strict parse of a message with no starting boundary. 0200 self._fp.write(subparts) 0201 return 0202 elif not isinstance(subparts, list): 0203 # Scalar payload 0204 subparts = [subparts] 0205 for part in subparts: 0206 s = StringIO() 0207 g = self.clone(s) 0208 g.flatten(part, unixfrom=False) 0209 msgtexts.append(s.getvalue()) 0210 # Now make sure the boundary we've selected doesn't appear in any of 0211 # the message texts. 0212 alltext = NL.join(msgtexts) 0213 # BAW: What about boundaries that are wrapped in double-quotes? 0214 boundary = msg.get_boundary(failobj=_make_boundary(alltext)) 0215 # If we had to calculate a new boundary because the body text 0216 # contained that string, set the new boundary. We don't do it 0217 # unconditionally because, while set_boundary() preserves order, it 0218 # doesn't preserve newlines/continuations in headers. This is no big 0219 # deal in practice, but turns out to be inconvenient for the unittest 0220 # suite. 0221 if msg.get_boundary() <> boundary: 0222 msg.set_boundary(boundary) 0223 # If there's a preamble, write it out, with a trailing CRLF 0224 if msg.preamble is not None: 0225 print >> self._fp, msg.preamble 0226 # dash-boundary transport-padding CRLF 0227 print >> self._fp, '--' + boundary 0228 # body-part 0229 if msgtexts: 0230 self._fp.write(msgtexts.pop(0)) 0231 # *encapsulation 0232 # --> delimiter transport-padding 0233 # --> CRLF body-part 0234 for body_part in msgtexts: 0235 # delimiter transport-padding CRLF 0236 print >> self._fp, '\n--' + boundary 0237 # body-part 0238 self._fp.write(body_part) 0239 # close-delimiter transport-padding 0240 self._fp.write('\n--' + boundary + '--') 0241 if msg.epilogue is not None: 0242 print >> self._fp 0243 self._fp.write(msg.epilogue) 0244 0245 def _handle_message_delivery_status(self, msg): 0246 # We can't just write the headers directly to self's file object 0247 # because this will leave an extra newline between the last header 0248 # block and the boundary. Sigh. 0249 blocks = [] 0250 for part in msg.get_payload(): 0251 s = StringIO() 0252 g = self.clone(s) 0253 g.flatten(part, unixfrom=False) 0254 text = s.getvalue() 0255 lines = text.split('\n') 0256 # Strip off the unnecessary trailing empty line 0257 if lines and lines[-1] == '': 0258 blocks.append(NL.join(lines[:-1])) 0259 else: 0260 blocks.append(text) 0261 # Now join all the blocks with an empty line. This has the lovely 0262 # effect of separating each block with an empty line, but not adding 0263 # an extra one after the last one. 0264 self._fp.write(NL.join(blocks)) 0265 0266 def _handle_message(self, msg): 0267 s = StringIO() 0268 g = self.clone(s) 0269 # The payload of a message/rfc822 part should be a multipart sequence 0270 # of length 1. The zeroth element of the list should be the Message 0271 # object for the subpart. Extract that object, stringify it, and 0272 # write it out. 0273 g.flatten(msg.get_payload(0), unixfrom=False) 0274 self._fp.write(s.getvalue()) 0275 0276 0277 0278 _FMT = '[Non-text (%(type)s) part of message omitted, filename %(filename)s]' 0279 0280 class DecodedGenerator(Generator): 0281 """Generator a text representation of a message. 0282 0283 Like the Generator base class, except that non-text parts are substituted 0284 with a format string representing the part. 0285 """ 0286 def __init__(self, outfp, mangle_from_=True, maxheaderlen=78, fmt=None): 0287 """Like Generator.__init__() except that an additional optional 0288 argument is allowed. 0289 0290 Walks through all subparts of a message. If the subpart is of main 0291 type `text', then it prints the decoded payload of the subpart. 0292 0293 Otherwise, fmt is a format string that is used instead of the message 0294 payload. fmt is expanded with the following keywords (in 0295 %(keyword)s format): 0296 0297 type : Full MIME type of the non-text part 0298 maintype : Main MIME type of the non-text part 0299 subtype : Sub-MIME type of the non-text part 0300 filename : Filename of the non-text part 0301 description: Description associated with the non-text part 0302 encoding : Content transfer encoding of the non-text part 0303 0304 The default value for fmt is None, meaning 0305 0306 [Non-text (%(type)s) part of message omitted, filename %(filename)s] 0307 """ 0308 Generator.__init__(self, outfp, mangle_from_, maxheaderlen) 0309 if fmt is None: 0310 self._fmt = _FMT 0311 else: 0312 self._fmt = fmt 0313 0314 def _dispatch(self, msg): 0315 for part in msg.walk(): 0316 maintype = part.get_content_maintype() 0317 if maintype == 'text': 0318 print >> self, part.get_payload(decode=True) 0319 elif maintype == 'multipart': 0320 # Just skip this 0321 pass 0322 else: 0323 print >> self, self._fmt % { 0324 'type' : part.get_content_type(), 0325 'maintype' : part.get_content_maintype(), 0326 'subtype' : part.get_content_subtype(), 0327 'filename' : part.get_filename('[no filename]'), 0328 'description': part.get('Content-Description', 0329 '[no description]'), 0330 'encoding' : part.get('Content-Transfer-Encoding', 0331 '[no encoding]'), 0332 } 0333 0334 0335 0336 # Helper 0337 _width = len(repr(sys.maxint-1)) 0338 _fmt = '%%0%dd' % _width 0339 0340 def _make_boundary(text=None): 0341 # Craft a random boundary. If text is given, ensure that the chosen 0342 # boundary doesn't appear in the text. 0343 token = random.randrange(sys.maxint) 0344 boundary = ('=' * 15) + (_fmt % token) + '==' 0345 if text is None: 0346 return boundary 0347 b = boundary 0348 counter = 0 0349 while True: 0350 cre = re.compile('^--' + re.escape(b) + '(--)?$', re.MULTILINE) 0351 if not cre.search(text): 0352 break 0353 b = boundary + '.' + str(counter) 0354 counter += 1 0355 return b 0356
Generated by PyXR 0.9.4