0001 #! /usr/bin/env python 0002 0003 """RFC 3548: Base16, Base32, Base64 Data Encodings""" 0004 0005 # Modified 04-Oct-1995 by Jack Jansen to use binascii module 0006 # Modified 30-Dec-2003 by Barry Warsaw to add full RFC 3548 support 0007 0008 import re 0009 import struct 0010 import binascii 0011 0012 0013 __all__ = [ 0014 # Legacy interface exports traditional RFC 1521 Base64 encodings 0015 'encode', 'decode', 'encodestring', 'decodestring', 0016 # Generalized interface for other encodings 0017 'b64encode', 'b64decode', 'b32encode', 'b32decode', 0018 'b16encode', 'b16decode', 0019 # Standard Base64 encoding 0020 'standard_b64encode', 'standard_b64decode', 0021 # Some common Base64 alternatives. As referenced by RFC 3458, see thread 0022 # starting at: 0023 # 0024 # http://zgp.org/pipermail/p2p-hackers/2001-September/000316.html 0025 'urlsafe_b64encode', 'urlsafe_b64decode', 0026 ] 0027 0028 _translation = [chr(_x) for _x in range(256)] 0029 EMPTYSTRING = '' 0030 0031 0032 def _translate(s, altchars): 0033 translation = _translation[:] 0034 for k, v in altchars.items(): 0035 translation[ord(k)] = v 0036 return s.translate(''.join(translation)) 0037 0038 0039 0040 # Base64 encoding/decoding uses binascii 0041 0042 def b64encode(s, altchars=None): 0043 """Encode a string using Base64. 0044 0045 s is the string to encode. Optional altchars must be a string of at least 0046 length 2 (additional characters are ignored) which specifies an 0047 alternative alphabet for the '+' and '/' characters. This allows an 0048 application to e.g. generate url or filesystem safe Base64 strings. 0049 0050 The encoded string is returned. 0051 """ 0052 # Strip off the trailing newline 0053 encoded = binascii.b2a_base64(s)[:-1] 0054 if altchars is not None: 0055 return _translate(encoded, {'+': altchars[0], '/': altchars[1]}) 0056 return encoded 0057 0058 0059 def b64decode(s, altchars=None): 0060 """Decode a Base64 encoded string. 0061 0062 s is the string to decode. Optional altchars must be a string of at least 0063 length 2 (additional characters are ignored) which specifies the 0064 alternative alphabet used instead of the '+' and '/' characters. 0065 0066 The decoded string is returned. A TypeError is raised if s were 0067 incorrectly padded or if there are non-alphabet characters present in the 0068 string. 0069 """ 0070 if altchars is not None: 0071 s = _translate(s, {altchars[0]: '+', altchars[1]: '/'}) 0072 try: 0073 return binascii.a2b_base64(s) 0074 except binascii.Error, msg: 0075 # Transform this exception for consistency 0076 raise TypeError(msg) 0077 0078 0079 def standard_b64encode(s): 0080 """Encode a string using the standard Base64 alphabet. 0081 0082 s is the string to encode. The encoded string is returned. 0083 """ 0084 return b64encode(s) 0085 0086 def standard_b64decode(s): 0087 """Decode a string encoded with the standard Base64 alphabet. 0088 0089 s is the string to decode. The decoded string is returned. A TypeError 0090 is raised if the string is incorrectly padded or if there are non-alphabet 0091 characters present in the string. 0092 """ 0093 return b64decode(s) 0094 0095 def urlsafe_b64encode(s): 0096 """Encode a string using a url-safe Base64 alphabet. 0097 0098 s is the string to encode. The encoded string is returned. The alphabet 0099 uses '-' instead of '+' and '_' instead of '/'. 0100 """ 0101 return b64encode(s, '-_') 0102 0103 def urlsafe_b64decode(s): 0104 """Decode a string encoded with the standard Base64 alphabet. 0105 0106 s is the string to decode. The decoded string is returned. A TypeError 0107 is raised if the string is incorrectly padded or if there are non-alphabet 0108 characters present in the string. 0109 0110 The alphabet uses '-' instead of '+' and '_' instead of '/'. 0111 """ 0112 return b64decode(s, '-_') 0113 0114 0115 0116 # Base32 encoding/decoding must be done in Python 0117 _b32alphabet = { 0118 0: 'A', 9: 'J', 18: 'S', 27: '3', 0119 1: 'B', 10: 'K', 19: 'T', 28: '4', 0120 2: 'C', 11: 'L', 20: 'U', 29: '5', 0121 3: 'D', 12: 'M', 21: 'V', 30: '6', 0122 4: 'E', 13: 'N', 22: 'W', 31: '7', 0123 5: 'F', 14: 'O', 23: 'X', 0124 6: 'G', 15: 'P', 24: 'Y', 0125 7: 'H', 16: 'Q', 25: 'Z', 0126 8: 'I', 17: 'R', 26: '2', 0127 } 0128 0129 _b32tab = [v for v in _b32alphabet.values()] 0130 _b32rev = dict([(v, long(k)) for k, v in _b32alphabet.items()]) 0131 0132 0133 def b32encode(s): 0134 """Encode a string using Base32. 0135 0136 s is the string to encode. The encoded string is returned. 0137 """ 0138 parts = [] 0139 quanta, leftover = divmod(len(s), 5) 0140 # Pad the last quantum with zero bits if necessary 0141 if leftover: 0142 s += ('\0' * (5 - leftover)) 0143 quanta += 1 0144 for i in range(quanta): 0145 # c1 and c2 are 16 bits wide, c3 is 8 bits wide. The intent of this 0146 # code is to process the 40 bits in units of 5 bits. So we take the 1 0147 # leftover bit of c1 and tack it onto c2. Then we take the 2 leftover 0148 # bits of c2 and tack them onto c3. The shifts and masks are intended 0149 # to give us values of exactly 5 bits in width. 0150 c1, c2, c3 = struct.unpack('!HHB', s[i*5:(i+1)*5]) 0151 c2 += (c1 & 1) << 16 # 17 bits wide 0152 c3 += (c2 & 3) << 8 # 10 bits wide 0153 parts.extend([_b32tab[c1 >> 11], # bits 1 - 5 0154 _b32tab[(c1 >> 6) & 0x1f], # bits 6 - 10 0155 _b32tab[(c1 >> 1) & 0x1f], # bits 11 - 15 0156 _b32tab[c2 >> 12], # bits 16 - 20 (1 - 5) 0157 _b32tab[(c2 >> 7) & 0x1f], # bits 21 - 25 (6 - 10) 0158 _b32tab[(c2 >> 2) & 0x1f], # bits 26 - 30 (11 - 15) 0159 _b32tab[c3 >> 5], # bits 31 - 35 (1 - 5) 0160 _b32tab[c3 & 0x1f], # bits 36 - 40 (1 - 5) 0161 ]) 0162 encoded = EMPTYSTRING.join(parts) 0163 # Adjust for any leftover partial quanta 0164 if leftover == 1: 0165 return encoded[:-6] + '======' 0166 elif leftover == 2: 0167 return encoded[:-4] + '====' 0168 elif leftover == 3: 0169 return encoded[:-3] + '===' 0170 elif leftover == 4: 0171 return encoded[:-1] + '=' 0172 return encoded 0173 0174 0175 def b32decode(s, casefold=False, map01=None): 0176 """Decode a Base32 encoded string. 0177 0178 s is the string to decode. Optional casefold is a flag specifying whether 0179 a lowercase alphabet is acceptable as input. For security purposes, the 0180 default is False. 0181 0182 RFC 3548 allows for optional mapping of the digit 0 (zero) to the letter O 0183 (oh), and for optional mapping of the digit 1 (one) to either the letter I 0184 (eye) or letter L (el). The optional argument map01 when not None, 0185 specifies which letter the digit 1 should be mapped to (when map01 is not 0186 None, the digit 0 is always mapped to the letter O). For security 0187 purposes the default is None, so that 0 and 1 are not allowed in the 0188 input. 0189 0190 The decoded string is returned. A TypeError is raised if s were 0191 incorrectly padded or if there are non-alphabet characters present in the 0192 string. 0193 """ 0194 quanta, leftover = divmod(len(s), 8) 0195 if leftover: 0196 raise TypeError('Incorrect padding') 0197 # Handle section 2.4 zero and one mapping. The flag map01 will be either 0198 # False, or the character to map the digit 1 (one) to. It should be 0199 # either L (el) or I (eye). 0200 if map01: 0201 s = _translate(s, {'0': 'O', '1': map01}) 0202 if casefold: 0203 s = s.upper() 0204 # Strip off pad characters from the right. We need to count the pad 0205 # characters because this will tell us how many null bytes to remove from 0206 # the end of the decoded string. 0207 padchars = 0 0208 mo = re.search('(?P<pad>[=]*)$', s) 0209 if mo: 0210 padchars = len(mo.group('pad')) 0211 if padchars > 0: 0212 s = s[:-padchars] 0213 # Now decode the full quanta 0214 parts = [] 0215 acc = 0 0216 shift = 35 0217 for c in s: 0218 val = _b32rev.get(c) 0219 if val is None: 0220 raise TypeError('Non-base32 digit found') 0221 acc += _b32rev[c] << shift 0222 shift -= 5 0223 if shift < 0: 0224 parts.append(binascii.unhexlify(hex(acc)[2:-1])) 0225 acc = 0 0226 shift = 35 0227 # Process the last, partial quanta 0228 last = binascii.unhexlify(hex(acc)[2:-1]) 0229 if padchars == 1: 0230 last = last[:-1] 0231 elif padchars == 3: 0232 last = last[:-2] 0233 elif padchars == 4: 0234 last = last[:-3] 0235 elif padchars == 6: 0236 last = last[:-4] 0237 elif padchars <> 0: 0238 raise TypeError('Incorrect padding') 0239 parts.append(last) 0240 return EMPTYSTRING.join(parts) 0241 0242 0243 0244 # RFC 3548, Base 16 Alphabet specifies uppercase, but hexlify() returns 0245 # lowercase. The RFC also recommends against accepting input case 0246 # insensitively. 0247 def b16encode(s): 0248 """Encode a string using Base16. 0249 0250 s is the string to encode. The encoded string is returned. 0251 """ 0252 return binascii.hexlify(s).upper() 0253 0254 0255 def b16decode(s, casefold=False): 0256 """Decode a Base16 encoded string. 0257 0258 s is the string to decode. Optional casefold is a flag specifying whether 0259 a lowercase alphabet is acceptable as input. For security purposes, the 0260 default is False. 0261 0262 The decoded string is returned. A TypeError is raised if s were 0263 incorrectly padded or if there are non-alphabet characters present in the 0264 string. 0265 """ 0266 if casefold: 0267 s = s.upper() 0268 if re.search('[^0-9A-F]', s): 0269 raise TypeError('Non-base16 digit found') 0270 return binascii.unhexlify(s) 0271 0272 0273 0274 # Legacy interface. This code could be cleaned up since I don't believe 0275 # binascii has any line length limitations. It just doesn't seem worth it 0276 # though. 0277 0278 MAXLINESIZE = 76 # Excluding the CRLF 0279 MAXBINSIZE = (MAXLINESIZE//4)*3 0280 0281 def encode(input, output): 0282 """Encode a file.""" 0283 while True: 0284 s = input.read(MAXBINSIZE) 0285 if not s: 0286 break 0287 while len(s) < MAXBINSIZE: 0288 ns = input.read(MAXBINSIZE-len(s)) 0289 if not ns: 0290 break 0291 s += ns 0292 line = binascii.b2a_base64(s) 0293 output.write(line) 0294 0295 0296 def decode(input, output): 0297 """Decode a file.""" 0298 while True: 0299 line = input.readline() 0300 if not line: 0301 break 0302 s = binascii.a2b_base64(line) 0303 output.write(s) 0304 0305 0306 def encodestring(s): 0307 """Encode a string.""" 0308 pieces = [] 0309 for i in range(0, len(s), MAXBINSIZE): 0310 chunk = s[i : i + MAXBINSIZE] 0311 pieces.append(binascii.b2a_base64(chunk)) 0312 return "".join(pieces) 0313 0314 0315 def decodestring(s): 0316 """Decode a string.""" 0317 return binascii.a2b_base64(s) 0318 0319 0320 0321 # Useable as a script... 0322 def test(): 0323 """Small test program""" 0324 import sys, getopt 0325 try: 0326 opts, args = getopt.getopt(sys.argv[1:], 'deut') 0327 except getopt.error, msg: 0328 sys.stdout = sys.stderr 0329 print msg 0330 print """usage: %s [-d|-e|-u|-t] [file|-] 0331 -d, -u: decode 0332 -e: encode (default) 0333 -t: encode and decode string 'Aladdin:open sesame'"""%sys.argv[0] 0334 sys.exit(2) 0335 func = encode 0336 for o, a in opts: 0337 if o == '-e': func = encode 0338 if o == '-d': func = decode 0339 if o == '-u': func = decode 0340 if o == '-t': test1(); return 0341 if args and args[0] != '-': 0342 func(open(args[0], 'rb'), sys.stdout) 0343 else: 0344 func(sys.stdin, sys.stdout) 0345 0346 0347 def test1(): 0348 s0 = "Aladdin:open sesame" 0349 s1 = encodestring(s0) 0350 s2 = decodestring(s1) 0351 print s0, repr(s1), s2 0352 0353 0354 if __name__ == '__main__': 0355 test() 0356
Generated by PyXR 0.9.4