0001 # Copyright (C) 2002-2004 Python Software Foundation 0002 # Author: Ben Gertzfield 0003 # Contact: email-sig@python.org 0004 0005 """Base64 content transfer encoding per RFCs 2045-2047. 0006 0007 This module handles the content transfer encoding method defined in RFC 2045 0008 to encode arbitrary 8-bit data using the three 8-bit bytes in four 7-bit 0009 characters encoding known as Base64. 0010 0011 It is used in the MIME standards for email to attach images, audio, and text 0012 using some 8-bit character sets to messages. 0013 0014 This module provides an interface to encode and decode both headers and bodies 0015 with Base64 encoding. 0016 0017 RFC 2045 defines a method for including character set information in an 0018 `encoded-word' in a header. This method is commonly used for 8-bit real names 0019 in To:, From:, Cc:, etc. fields, as well as Subject: lines. 0020 0021 This module does not do the line wrapping or end-of-line character conversion 0022 necessary for proper internationalized headers; it only does dumb encoding and 0023 decoding. To deal with the various line wrapping issues, use the email.Header 0024 module. 0025 """ 0026 0027 import re 0028 from binascii import b2a_base64, a2b_base64 0029 from email.Utils import fix_eols 0030 0031 CRLF = '\r\n' 0032 NL = '\n' 0033 EMPTYSTRING = '' 0034 0035 # See also Charset.py 0036 MISC_LEN = 7 0037 0038 0039 0040 # Helpers 0041 def base64_len(s): 0042 """Return the length of s when it is encoded with base64.""" 0043 groups_of_3, leftover = divmod(len(s), 3) 0044 # 4 bytes out for each 3 bytes (or nonzero fraction thereof) in. 0045 # Thanks, Tim! 0046 n = groups_of_3 * 4 0047 if leftover: 0048 n += 4 0049 return n 0050 0051 0052 0053 def header_encode(header, charset='iso-8859-1', keep_eols=False, 0054 maxlinelen=76, eol=NL): 0055 """Encode a single header line with Base64 encoding in a given charset. 0056 0057 Defined in RFC 2045, this Base64 encoding is identical to normal Base64 0058 encoding, except that each line must be intelligently wrapped (respecting 0059 the Base64 encoding), and subsequent lines must start with a space. 0060 0061 charset names the character set to use to encode the header. It defaults 0062 to iso-8859-1. 0063 0064 End-of-line characters (\\r, \\n, \\r\\n) will be automatically converted 0065 to the canonical email line separator \\r\\n unless the keep_eols 0066 parameter is True (the default is False). 0067 0068 Each line of the header will be terminated in the value of eol, which 0069 defaults to "\\n". Set this to "\\r\\n" if you are using the result of 0070 this function directly in email. 0071 0072 The resulting string will be in the form: 0073 0074 "=?charset?b?WW/5ciBtYXp66XLrIHf8eiBhIGhhbXBzdGHuciBBIFlv+XIgbWF6euly?=\\n 0075 =?charset?b?6yB3/HogYSBoYW1wc3Rh7nIgQkMgWW/5ciBtYXp66XLrIHf8eiBhIGhh?=" 0076 0077 with each line wrapped at, at most, maxlinelen characters (defaults to 76 0078 characters). 0079 """ 0080 # Return empty headers unchanged 0081 if not header: 0082 return header 0083 0084 if not keep_eols: 0085 header = fix_eols(header) 0086 0087 # Base64 encode each line, in encoded chunks no greater than maxlinelen in 0088 # length, after the RFC chrome is added in. 0089 base64ed = [] 0090 max_encoded = maxlinelen - len(charset) - MISC_LEN 0091 max_unencoded = max_encoded * 3 // 4 0092 0093 for i in range(0, len(header), max_unencoded): 0094 base64ed.append(b2a_base64(header[i:i+max_unencoded])) 0095 0096 # Now add the RFC chrome to each encoded chunk 0097 lines = [] 0098 for line in base64ed: 0099 # Ignore the last character of each line if it is a newline 0100 if line.endswith(NL): 0101 line = line[:-1] 0102 # Add the chrome 0103 lines.append('=?%s?b?%s?=' % (charset, line)) 0104 # Glue the lines together and return it. BAW: should we be able to 0105 # specify the leading whitespace in the joiner? 0106 joiner = eol + ' ' 0107 return joiner.join(lines) 0108 0109 0110 0111 def encode(s, binary=True, maxlinelen=76, eol=NL): 0112 """Encode a string with base64. 0113 0114 Each line will be wrapped at, at most, maxlinelen characters (defaults to 0115 76 characters). 0116 0117 If binary is False, end-of-line characters will be converted to the 0118 canonical email end-of-line sequence \\r\\n. Otherwise they will be left 0119 verbatim (this is the default). 0120 0121 Each line of encoded text will end with eol, which defaults to "\\n". Set 0122 this to "\r\n" if you will be using the result of this function directly 0123 in an email. 0124 """ 0125 if not s: 0126 return s 0127 0128 if not binary: 0129 s = fix_eols(s) 0130 0131 encvec = [] 0132 max_unencoded = maxlinelen * 3 // 4 0133 for i in range(0, len(s), max_unencoded): 0134 # BAW: should encode() inherit b2a_base64()'s dubious behavior in 0135 # adding a newline to the encoded string? 0136 enc = b2a_base64(s[i:i + max_unencoded]) 0137 if enc.endswith(NL) and eol <> NL: 0138 enc = enc[:-1] + eol 0139 encvec.append(enc) 0140 return EMPTYSTRING.join(encvec) 0141 0142 0143 # For convenience and backwards compatibility w/ standard base64 module 0144 body_encode = encode 0145 encodestring = encode 0146 0147 0148 0149 def decode(s, convert_eols=None): 0150 """Decode a raw base64 string. 0151 0152 If convert_eols is set to a string value, all canonical email linefeeds, 0153 e.g. "\\r\\n", in the decoded text will be converted to the value of 0154 convert_eols. os.linesep is a good choice for convert_eols if you are 0155 decoding a text attachment. 0156 0157 This function does not parse a full MIME header value encoded with 0158 base64 (like =?iso-8895-1?b?bmloISBuaWgh?=) -- please use the high 0159 level email.Header class for that functionality. 0160 """ 0161 if not s: 0162 return s 0163 0164 dec = a2b_base64(s) 0165 if convert_eols: 0166 return dec.replace(CRLF, convert_eols) 0167 return dec 0168 0169 0170 # For convenience and backwards compatibility w/ standard base64 module 0171 body_decode = decode 0172 decodestring = decode 0173
Generated by PyXR 0.9.4