0001 import test.test_support, unittest 0002 import sys, codecs, htmlentitydefs, unicodedata 0003 0004 class PosReturn: 0005 # this can be used for configurable callbacks 0006 0007 def __init__(self): 0008 self.pos = 0 0009 0010 def handle(self, exc): 0011 oldpos = self.pos 0012 realpos = oldpos 0013 if realpos<0: 0014 realpos = len(exc.object) + realpos 0015 # if we don't advance this time, terminate on the next call 0016 # otherwise we'd get an endless loop 0017 if realpos <= exc.start: 0018 self.pos = len(exc.object) 0019 return (u"<?>", oldpos) 0020 0021 class CodecCallbackTest(unittest.TestCase): 0022 0023 def test_xmlcharrefreplace(self): 0024 # replace unencodable characters which numeric character entities. 0025 # For ascii, latin-1 and charmaps this is completely implemented 0026 # in C and should be reasonably fast. 0027 s = u"\u30b9\u30d1\u30e2 \xe4nd eggs" 0028 self.assertEqual( 0029 s.encode("ascii", "xmlcharrefreplace"), 0030 "スパモ änd eggs" 0031 ) 0032 self.assertEqual( 0033 s.encode("latin-1", "xmlcharrefreplace"), 0034 "スパモ \xe4nd eggs" 0035 ) 0036 0037 def test_xmlcharnamereplace(self): 0038 # This time use a named character entity for unencodable 0039 # characters, if one is available. 0040 0041 def xmlcharnamereplace(exc): 0042 if not isinstance(exc, UnicodeEncodeError): 0043 raise TypeError("don't know how to handle %r" % exc) 0044 l = [] 0045 for c in exc.object[exc.start:exc.end]: 0046 try: 0047 l.append(u"&%s;" % htmlentitydefs.codepoint2name[ord(c)]) 0048 except KeyError: 0049 l.append(u"&#%d;" % ord(c)) 0050 return (u"".join(l), exc.end) 0051 0052 codecs.register_error( 0053 "test.xmlcharnamereplace", xmlcharnamereplace) 0054 0055 sin = u"\xab\u211c\xbb = \u2329\u1234\u20ac\u232a" 0056 sout = "«ℜ» = ⟨ሴ€⟩" 0057 self.assertEqual(sin.encode("ascii", "test.xmlcharnamereplace"), sout) 0058 sout = "\xabℜ\xbb = ⟨ሴ€⟩" 0059 self.assertEqual(sin.encode("latin-1", "test.xmlcharnamereplace"), sout) 0060 sout = "\xabℜ\xbb = ⟨ሴ\xa4⟩" 0061 self.assertEqual(sin.encode("iso-8859-15", "test.xmlcharnamereplace"), sout) 0062 0063 def test_uninamereplace(self): 0064 # We're using the names from the unicode database this time, 0065 # and we're doing "syntax highlighting" here, i.e. we include 0066 # the replaced text in ANSI escape sequences. For this it is 0067 # useful that the error handler is not called for every single 0068 # unencodable character, but for a complete sequence of 0069 # unencodable characters, otherwise we would output many 0070 # unneccessary escape sequences. 0071 0072 def uninamereplace(exc): 0073 if not isinstance(exc, UnicodeEncodeError): 0074 raise TypeError("don't know how to handle %r" % exc) 0075 l = [] 0076 for c in exc.object[exc.start:exc.end]: 0077 l.append(unicodedata.name(c, u"0x%x" % ord(c))) 0078 return (u"\033[1m%s\033[0m" % u", ".join(l), exc.end) 0079 0080 codecs.register_error( 0081 "test.uninamereplace", uninamereplace) 0082 0083 sin = u"\xac\u1234\u20ac\u8000" 0084 sout = "\033[1mNOT SIGN, ETHIOPIC SYLLABLE SEE, EURO SIGN, CJK UNIFIED IDEOGRAPH-8000\033[0m" 0085 self.assertEqual(sin.encode("ascii", "test.uninamereplace"), sout) 0086 0087 sout = "\xac\033[1mETHIOPIC SYLLABLE SEE, EURO SIGN, CJK UNIFIED IDEOGRAPH-8000\033[0m" 0088 self.assertEqual(sin.encode("latin-1", "test.uninamereplace"), sout) 0089 0090 sout = "\xac\033[1mETHIOPIC SYLLABLE SEE\033[0m\xa4\033[1mCJK UNIFIED IDEOGRAPH-8000\033[0m" 0091 self.assertEqual(sin.encode("iso-8859-15", "test.uninamereplace"), sout) 0092 0093 def test_backslashescape(self): 0094 # Does the same as the "unicode-escape" encoding, but with different 0095 # base encodings. 0096 sin = u"a\xac\u1234\u20ac\u8000" 0097 if sys.maxunicode > 0xffff: 0098 sin += unichr(sys.maxunicode) 0099 sout = "a\\xac\\u1234\\u20ac\\u8000" 0100 if sys.maxunicode > 0xffff: 0101 sout += "\\U%08x" % sys.maxunicode 0102 self.assertEqual(sin.encode("ascii", "backslashreplace"), sout) 0103 0104 sout = "a\xac\\u1234\\u20ac\\u8000" 0105 if sys.maxunicode > 0xffff: 0106 sout += "\\U%08x" % sys.maxunicode 0107 self.assertEqual(sin.encode("latin-1", "backslashreplace"), sout) 0108 0109 sout = "a\xac\\u1234\xa4\\u8000" 0110 if sys.maxunicode > 0xffff: 0111 sout += "\\U%08x" % sys.maxunicode 0112 self.assertEqual(sin.encode("iso-8859-15", "backslashreplace"), sout) 0113 0114 def test_relaxedutf8(self): 0115 # This is the test for a decoding callback handler, 0116 # that relaxes the UTF-8 minimal encoding restriction. 0117 # A null byte that is encoded as "\xc0\x80" will be 0118 # decoded as a null byte. All other illegal sequences 0119 # will be handled strictly. 0120 def relaxedutf8(exc): 0121 if not isinstance(exc, UnicodeDecodeError): 0122 raise TypeError("don't know how to handle %r" % exc) 0123 if exc.object[exc.start:exc.end].startswith("\xc0\x80"): 0124 return (u"\x00", exc.start+2) # retry after two bytes 0125 else: 0126 raise exc 0127 0128 codecs.register_error( 0129 "test.relaxedutf8", relaxedutf8) 0130 0131 sin = "a\x00b\xc0\x80c\xc3\xbc\xc0\x80\xc0\x80" 0132 sout = u"a\x00b\x00c\xfc\x00\x00" 0133 self.assertEqual(sin.decode("utf-8", "test.relaxedutf8"), sout) 0134 sin = "\xc0\x80\xc0\x81" 0135 self.assertRaises(UnicodeError, sin.decode, "utf-8", "test.relaxedutf8") 0136 0137 def test_charmapencode(self): 0138 # For charmap encodings the replacement string will be 0139 # mapped through the encoding again. This means, that 0140 # to be able to use e.g. the "replace" handler, the 0141 # charmap has to have a mapping for "?". 0142 charmap = dict([ (ord(c), 2*c.upper()) for c in "abcdefgh"]) 0143 sin = u"abc" 0144 sout = "AABBCC" 0145 self.assertEquals(codecs.charmap_encode(sin, "strict", charmap)[0], sout) 0146 0147 sin = u"abcA" 0148 self.assertRaises(UnicodeError, codecs.charmap_encode, sin, "strict", charmap) 0149 0150 charmap[ord("?")] = "XYZ" 0151 sin = u"abcDEF" 0152 sout = "AABBCCXYZXYZXYZ" 0153 self.assertEquals(codecs.charmap_encode(sin, "replace", charmap)[0], sout) 0154 0155 charmap[ord("?")] = u"XYZ" 0156 self.assertRaises(TypeError, codecs.charmap_encode, sin, "replace", charmap) 0157 0158 charmap[ord("?")] = u"XYZ" 0159 self.assertRaises(TypeError, codecs.charmap_encode, sin, "replace", charmap) 0160 0161 def test_callbacks(self): 0162 def handler1(exc): 0163 if not isinstance(exc, UnicodeEncodeError) \ 0164 and not isinstance(exc, UnicodeDecodeError): 0165 raise TypeError("don't know how to handle %r" % exc) 0166 l = [u"<%d>" % ord(exc.object[pos]) for pos in xrange(exc.start, exc.end)] 0167 return (u"[%s]" % u"".join(l), exc.end) 0168 0169 codecs.register_error("test.handler1", handler1) 0170 0171 def handler2(exc): 0172 if not isinstance(exc, UnicodeDecodeError): 0173 raise TypeError("don't know how to handle %r" % exc) 0174 l = [u"<%d>" % ord(exc.object[pos]) for pos in xrange(exc.start, exc.end)] 0175 return (u"[%s]" % u"".join(l), exc.end+1) # skip one character 0176 0177 codecs.register_error("test.handler2", handler2) 0178 0179 s = "\x00\x81\x7f\x80\xff" 0180 0181 self.assertEqual( 0182 s.decode("ascii", "test.handler1"), 0183 u"\x00[<129>]\x7f[<128>][<255>]" 0184 ) 0185 self.assertEqual( 0186 s.decode("ascii", "test.handler2"), 0187 u"\x00[<129>][<128>]" 0188 ) 0189 0190 self.assertEqual( 0191 "\\u3042\u3xxx".decode("unicode-escape", "test.handler1"), 0192 u"\u3042[<92><117><51><120>]xx" 0193 ) 0194 0195 self.assertEqual( 0196 "\\u3042\u3xx".decode("unicode-escape", "test.handler1"), 0197 u"\u3042[<92><117><51><120><120>]" 0198 ) 0199 0200 self.assertEqual( 0201 codecs.charmap_decode("abc", "test.handler1", {ord("a"): u"z"})[0], 0202 u"z[<98>][<99>]" 0203 ) 0204 0205 self.assertEqual( 0206 u"g\xfc\xdfrk".encode("ascii", "test.handler1"), 0207 u"g[<252><223>]rk" 0208 ) 0209 0210 self.assertEqual( 0211 u"g\xfc\xdf".encode("ascii", "test.handler1"), 0212 u"g[<252><223>]" 0213 ) 0214 0215 def test_longstrings(self): 0216 # test long strings to check for memory overflow problems 0217 errors = [ "strict", "ignore", "replace", "xmlcharrefreplace", "backslashreplace"] 0218 # register the handlers under different names, 0219 # to prevent the codec from recognizing the name 0220 for err in errors: 0221 codecs.register_error("test." + err, codecs.lookup_error(err)) 0222 l = 1000 0223 errors += [ "test." + err for err in errors ] 0224 for uni in [ s*l for s in (u"x", u"\u3042", u"a\xe4") ]: 0225 for enc in ("ascii", "latin-1", "iso-8859-1", "iso-8859-15", "utf-8", "utf-7", "utf-16"): 0226 for err in errors: 0227 try: 0228 uni.encode(enc, err) 0229 except UnicodeError: 0230 pass 0231 0232 def check_exceptionobjectargs(self, exctype, args, msg): 0233 # Test UnicodeError subclasses: construction, attribute assignment and __str__ conversion 0234 # check with one missing argument 0235 self.assertRaises(TypeError, exctype, *args[:-1]) 0236 # check with one argument too much 0237 self.assertRaises(TypeError, exctype, *(args + ["too much"])) 0238 # check with one argument of the wrong type 0239 wrongargs = [ "spam", u"eggs", 42, 1.0, None ] 0240 for i in xrange(len(args)): 0241 for wrongarg in wrongargs: 0242 if type(wrongarg) is type(args[i]): 0243 continue 0244 # build argument array 0245 callargs = [] 0246 for j in xrange(len(args)): 0247 if i==j: 0248 callargs.append(wrongarg) 0249 else: 0250 callargs.append(args[i]) 0251 self.assertRaises(TypeError, exctype, *callargs) 0252 0253 # check with the correct number and type of arguments 0254 exc = exctype(*args) 0255 self.assertEquals(str(exc), msg) 0256 0257 def test_unicodeencodeerror(self): 0258 self.check_exceptionobjectargs( 0259 UnicodeEncodeError, 0260 ["ascii", u"g\xfcrk", 1, 2, "ouch"], 0261 "'ascii' codec can't encode character u'\\xfc' in position 1: ouch" 0262 ) 0263 self.check_exceptionobjectargs( 0264 UnicodeEncodeError, 0265 ["ascii", u"g\xfcrk", 1, 4, "ouch"], 0266 "'ascii' codec can't encode characters in position 1-3: ouch" 0267 ) 0268 self.check_exceptionobjectargs( 0269 UnicodeEncodeError, 0270 ["ascii", u"\xfcx", 0, 1, "ouch"], 0271 "'ascii' codec can't encode character u'\\xfc' in position 0: ouch" 0272 ) 0273 self.check_exceptionobjectargs( 0274 UnicodeEncodeError, 0275 ["ascii", u"\u0100x", 0, 1, "ouch"], 0276 "'ascii' codec can't encode character u'\\u0100' in position 0: ouch" 0277 ) 0278 self.check_exceptionobjectargs( 0279 UnicodeEncodeError, 0280 ["ascii", u"\uffffx", 0, 1, "ouch"], 0281 "'ascii' codec can't encode character u'\\uffff' in position 0: ouch" 0282 ) 0283 if sys.maxunicode > 0xffff: 0284 self.check_exceptionobjectargs( 0285 UnicodeEncodeError, 0286 ["ascii", u"\U00010000x", 0, 1, "ouch"], 0287 "'ascii' codec can't encode character u'\\U00010000' in position 0: ouch" 0288 ) 0289 0290 def test_unicodedecodeerror(self): 0291 self.check_exceptionobjectargs( 0292 UnicodeDecodeError, 0293 ["ascii", "g\xfcrk", 1, 2, "ouch"], 0294 "'ascii' codec can't decode byte 0xfc in position 1: ouch" 0295 ) 0296 self.check_exceptionobjectargs( 0297 UnicodeDecodeError, 0298 ["ascii", "g\xfcrk", 1, 3, "ouch"], 0299 "'ascii' codec can't decode bytes in position 1-2: ouch" 0300 ) 0301 0302 def test_unicodetranslateerror(self): 0303 self.check_exceptionobjectargs( 0304 UnicodeTranslateError, 0305 [u"g\xfcrk", 1, 2, "ouch"], 0306 "can't translate character u'\\xfc' in position 1: ouch" 0307 ) 0308 self.check_exceptionobjectargs( 0309 UnicodeTranslateError, 0310 [u"g\u0100rk", 1, 2, "ouch"], 0311 "can't translate character u'\\u0100' in position 1: ouch" 0312 ) 0313 self.check_exceptionobjectargs( 0314 UnicodeTranslateError, 0315 [u"g\uffffrk", 1, 2, "ouch"], 0316 "can't translate character u'\\uffff' in position 1: ouch" 0317 ) 0318 if sys.maxunicode > 0xffff: 0319 self.check_exceptionobjectargs( 0320 UnicodeTranslateError, 0321 [u"g\U00010000rk", 1, 2, "ouch"], 0322 "can't translate character u'\\U00010000' in position 1: ouch" 0323 ) 0324 self.check_exceptionobjectargs( 0325 UnicodeTranslateError, 0326 [u"g\xfcrk", 1, 3, "ouch"], 0327 "can't translate characters in position 1-2: ouch" 0328 ) 0329 0330 def test_badandgoodstrictexceptions(self): 0331 # "strict" complains about a non-exception passed in 0332 self.assertRaises( 0333 TypeError, 0334 codecs.strict_errors, 0335 42 0336 ) 0337 # "strict" complains about the wrong exception type 0338 self.assertRaises( 0339 Exception, 0340 codecs.strict_errors, 0341 Exception("ouch") 0342 ) 0343 0344 # If the correct exception is passed in, "strict" raises it 0345 self.assertRaises( 0346 UnicodeEncodeError, 0347 codecs.strict_errors, 0348 UnicodeEncodeError("ascii", u"\u3042", 0, 1, "ouch") 0349 ) 0350 0351 def test_badandgoodignoreexceptions(self): 0352 # "ignore" complains about a non-exception passed in 0353 self.assertRaises( 0354 TypeError, 0355 codecs.ignore_errors, 0356 42 0357 ) 0358 # "ignore" complains about the wrong exception type 0359 self.assertRaises( 0360 TypeError, 0361 codecs.ignore_errors, 0362 UnicodeError("ouch") 0363 ) 0364 # If the correct exception is passed in, "ignore" returns an empty replacement 0365 self.assertEquals( 0366 codecs.ignore_errors(UnicodeEncodeError("ascii", u"\u3042", 0, 1, "ouch")), 0367 (u"", 1) 0368 ) 0369 self.assertEquals( 0370 codecs.ignore_errors(UnicodeDecodeError("ascii", "\xff", 0, 1, "ouch")), 0371 (u"", 1) 0372 ) 0373 self.assertEquals( 0374 codecs.ignore_errors(UnicodeTranslateError(u"\u3042", 0, 1, "ouch")), 0375 (u"", 1) 0376 ) 0377 0378 def test_badandgoodreplaceexceptions(self): 0379 # "replace" complains about a non-exception passed in 0380 self.assertRaises( 0381 TypeError, 0382 codecs.replace_errors, 0383 42 0384 ) 0385 # "replace" complains about the wrong exception type 0386 self.assertRaises( 0387 TypeError, 0388 codecs.replace_errors, 0389 UnicodeError("ouch") 0390 ) 0391 # With the correct exception, "ignore" returns an empty replacement 0392 self.assertEquals( 0393 codecs.replace_errors(UnicodeEncodeError("ascii", u"\u3042", 0, 1, "ouch")), 0394 (u"?", 1) 0395 ) 0396 self.assertEquals( 0397 codecs.replace_errors(UnicodeDecodeError("ascii", "\xff", 0, 1, "ouch")), 0398 (u"\ufffd", 1) 0399 ) 0400 self.assertEquals( 0401 codecs.replace_errors(UnicodeTranslateError(u"\u3042", 0, 1, "ouch")), 0402 (u"\ufffd", 1) 0403 ) 0404 0405 def test_badandgoodxmlcharrefreplaceexceptions(self): 0406 # "xmlcharrefreplace" complains about a non-exception passed in 0407 self.assertRaises( 0408 TypeError, 0409 codecs.xmlcharrefreplace_errors, 0410 42 0411 ) 0412 # "xmlcharrefreplace" complains about the wrong exception types 0413 self.assertRaises( 0414 TypeError, 0415 codecs.xmlcharrefreplace_errors, 0416 UnicodeError("ouch") 0417 ) 0418 # "xmlcharrefreplace" can only be used for encoding 0419 self.assertRaises( 0420 TypeError, 0421 codecs.xmlcharrefreplace_errors, 0422 UnicodeDecodeError("ascii", "\xff", 0, 1, "ouch") 0423 ) 0424 self.assertRaises( 0425 TypeError, 0426 codecs.xmlcharrefreplace_errors, 0427 UnicodeTranslateError(u"\u3042", 0, 1, "ouch") 0428 ) 0429 # Use the correct exception 0430 self.assertEquals( 0431 codecs.xmlcharrefreplace_errors(UnicodeEncodeError("ascii", u"\u3042", 0, 1, "ouch")), 0432 (u"&#%d;" % 0x3042, 1) 0433 ) 0434 0435 def test_badandgoodbackslashreplaceexceptions(self): 0436 # "backslashreplace" complains about a non-exception passed in 0437 self.assertRaises( 0438 TypeError, 0439 codecs.backslashreplace_errors, 0440 42 0441 ) 0442 # "backslashreplace" complains about the wrong exception types 0443 self.assertRaises( 0444 TypeError, 0445 codecs.backslashreplace_errors, 0446 UnicodeError("ouch") 0447 ) 0448 # "backslashreplace" can only be used for encoding 0449 self.assertRaises( 0450 TypeError, 0451 codecs.backslashreplace_errors, 0452 UnicodeDecodeError("ascii", "\xff", 0, 1, "ouch") 0453 ) 0454 self.assertRaises( 0455 TypeError, 0456 codecs.backslashreplace_errors, 0457 UnicodeTranslateError(u"\u3042", 0, 1, "ouch") 0458 ) 0459 # Use the correct exception 0460 self.assertEquals( 0461 codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\u3042", 0, 1, "ouch")), 0462 (u"\\u3042", 1) 0463 ) 0464 self.assertEquals( 0465 codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\x00", 0, 1, "ouch")), 0466 (u"\\x00", 1) 0467 ) 0468 self.assertEquals( 0469 codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\xff", 0, 1, "ouch")), 0470 (u"\\xff", 1) 0471 ) 0472 self.assertEquals( 0473 codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\u0100", 0, 1, "ouch")), 0474 (u"\\u0100", 1) 0475 ) 0476 self.assertEquals( 0477 codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\uffff", 0, 1, "ouch")), 0478 (u"\\uffff", 1) 0479 ) 0480 if sys.maxunicode>0xffff: 0481 self.assertEquals( 0482 codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\U00010000", 0, 1, "ouch")), 0483 (u"\\U00010000", 1) 0484 ) 0485 self.assertEquals( 0486 codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\U0010ffff", 0, 1, "ouch")), 0487 (u"\\U0010ffff", 1) 0488 ) 0489 0490 def test_badhandlerresults(self): 0491 results = ( 42, u"foo", (1,2,3), (u"foo", 1, 3), (u"foo", None), (u"foo",), ("foo", 1, 3), ("foo", None), ("foo",) ) 0492 encs = ("ascii", "latin-1", "iso-8859-1", "iso-8859-15") 0493 0494 for res in results: 0495 codecs.register_error("test.badhandler", lambda: res) 0496 for enc in encs: 0497 self.assertRaises( 0498 TypeError, 0499 u"\u3042".encode, 0500 enc, 0501 "test.badhandler" 0502 ) 0503 for (enc, bytes) in ( 0504 ("ascii", "\xff"), 0505 ("utf-8", "\xff"), 0506 ("utf-7", "+x-") 0507 ): 0508 self.assertRaises( 0509 TypeError, 0510 bytes.decode, 0511 enc, 0512 "test.badhandler" 0513 ) 0514 0515 def test_lookup(self): 0516 self.assertEquals(codecs.strict_errors, codecs.lookup_error("strict")) 0517 self.assertEquals(codecs.ignore_errors, codecs.lookup_error("ignore")) 0518 self.assertEquals(codecs.strict_errors, codecs.lookup_error("strict")) 0519 self.assertEquals( 0520 codecs.xmlcharrefreplace_errors, 0521 codecs.lookup_error("xmlcharrefreplace") 0522 ) 0523 self.assertEquals( 0524 codecs.backslashreplace_errors, 0525 codecs.lookup_error("backslashreplace") 0526 ) 0527 0528 def test_unencodablereplacement(self): 0529 def unencrepl(exc): 0530 if isinstance(exc, UnicodeEncodeError): 0531 return (u"\u4242", exc.end) 0532 else: 0533 raise TypeError("don't know how to handle %r" % exc) 0534 codecs.register_error("test.unencreplhandler", unencrepl) 0535 for enc in ("ascii", "iso-8859-1", "iso-8859-15"): 0536 self.assertRaises( 0537 UnicodeEncodeError, 0538 u"\u4242".encode, 0539 enc, 0540 "test.unencreplhandler" 0541 ) 0542 0543 def test_badregistercall(self): 0544 # enhance coverage of: 0545 # Modules/_codecsmodule.c::register_error() 0546 # Python/codecs.c::PyCodec_RegisterError() 0547 self.assertRaises(TypeError, codecs.register_error, 42) 0548 self.assertRaises(TypeError, codecs.register_error, "test.dummy", 42) 0549 0550 def test_unknownhandler(self): 0551 # enhance coverage of: 0552 # Modules/_codecsmodule.c::lookup_error() 0553 self.assertRaises(LookupError, codecs.lookup_error, "test.unknown") 0554 0555 def test_xmlcharrefvalues(self): 0556 # enhance coverage of: 0557 # Python/codecs.c::PyCodec_XMLCharRefReplaceErrors() 0558 # and inline implementations 0559 v = (1, 5, 10, 50, 100, 500, 1000, 5000, 10000, 50000) 0560 if sys.maxunicode>=100000: 0561 v += (100000, 500000, 1000000) 0562 s = u"".join([unichr(x) for x in v]) 0563 codecs.register_error("test.xmlcharrefreplace", codecs.xmlcharrefreplace_errors) 0564 for enc in ("ascii", "iso-8859-15"): 0565 for err in ("xmlcharrefreplace", "test.xmlcharrefreplace"): 0566 s.encode(enc, err) 0567 0568 def test_decodehelper(self): 0569 # enhance coverage of: 0570 # Objects/unicodeobject.c::unicode_decode_call_errorhandler() 0571 # and callers 0572 self.assertRaises(LookupError, "\xff".decode, "ascii", "test.unknown") 0573 0574 def baddecodereturn1(exc): 0575 return 42 0576 codecs.register_error("test.baddecodereturn1", baddecodereturn1) 0577 self.assertRaises(TypeError, "\xff".decode, "ascii", "test.baddecodereturn1") 0578 self.assertRaises(TypeError, "\\".decode, "unicode-escape", "test.baddecodereturn1") 0579 self.assertRaises(TypeError, "\\x0".decode, "unicode-escape", "test.baddecodereturn1") 0580 self.assertRaises(TypeError, "\\x0y".decode, "unicode-escape", "test.baddecodereturn1") 0581 self.assertRaises(TypeError, "\\Uffffeeee".decode, "unicode-escape", "test.baddecodereturn1") 0582 self.assertRaises(TypeError, "\\uyyyy".decode, "raw-unicode-escape", "test.baddecodereturn1") 0583 0584 def baddecodereturn2(exc): 0585 return (u"?", None) 0586 codecs.register_error("test.baddecodereturn2", baddecodereturn2) 0587 self.assertRaises(TypeError, "\xff".decode, "ascii", "test.baddecodereturn2") 0588 0589 handler = PosReturn() 0590 codecs.register_error("test.posreturn", handler.handle) 0591 0592 # Valid negative position 0593 handler.pos = -1 0594 self.assertEquals("\xff0".decode("ascii", "test.posreturn"), u"<?>0") 0595 0596 # Valid negative position 0597 handler.pos = -2 0598 self.assertEquals("\xff0".decode("ascii", "test.posreturn"), u"<?><?>") 0599 0600 # Negative position out of bounds 0601 handler.pos = -3 0602 self.assertRaises(IndexError, "\xff0".decode, "ascii", "test.posreturn") 0603 0604 # Valid positive position 0605 handler.pos = 1 0606 self.assertEquals("\xff0".decode("ascii", "test.posreturn"), u"<?>0") 0607 0608 # Largest valid positive position (one beyond end of input 0609 handler.pos = 2 0610 self.assertEquals("\xff0".decode("ascii", "test.posreturn"), u"<?>") 0611 0612 # Invalid positive position 0613 handler.pos = 3 0614 self.assertRaises(IndexError, "\xff0".decode, "ascii", "test.posreturn") 0615 0616 # Restart at the "0" 0617 handler.pos = 6 0618 self.assertEquals("\\uyyyy0".decode("raw-unicode-escape", "test.posreturn"), u"<?>0") 0619 0620 class D(dict): 0621 def __getitem__(self, key): 0622 raise ValueError 0623 self.assertRaises(UnicodeError, codecs.charmap_decode, "\xff", "strict", {0xff: None}) 0624 self.assertRaises(ValueError, codecs.charmap_decode, "\xff", "strict", D()) 0625 self.assertRaises(TypeError, codecs.charmap_decode, "\xff", "strict", {0xff: sys.maxunicode+1}) 0626 0627 def test_encodehelper(self): 0628 # enhance coverage of: 0629 # Objects/unicodeobject.c::unicode_encode_call_errorhandler() 0630 # and callers 0631 self.assertRaises(LookupError, u"\xff".encode, "ascii", "test.unknown") 0632 0633 def badencodereturn1(exc): 0634 return 42 0635 codecs.register_error("test.badencodereturn1", badencodereturn1) 0636 self.assertRaises(TypeError, u"\xff".encode, "ascii", "test.badencodereturn1") 0637 0638 def badencodereturn2(exc): 0639 return (u"?", None) 0640 codecs.register_error("test.badencodereturn2", badencodereturn2) 0641 self.assertRaises(TypeError, u"\xff".encode, "ascii", "test.badencodereturn2") 0642 0643 handler = PosReturn() 0644 codecs.register_error("test.posreturn", handler.handle) 0645 0646 # Valid negative position 0647 handler.pos = -1 0648 self.assertEquals(u"\xff0".encode("ascii", "test.posreturn"), "<?>0") 0649 0650 # Valid negative position 0651 handler.pos = -2 0652 self.assertEquals(u"\xff0".encode("ascii", "test.posreturn"), "<?><?>") 0653 0654 # Negative position out of bounds 0655 handler.pos = -3 0656 self.assertRaises(IndexError, u"\xff0".encode, "ascii", "test.posreturn") 0657 0658 # Valid positive position 0659 handler.pos = 1 0660 self.assertEquals(u"\xff0".encode("ascii", "test.posreturn"), "<?>0") 0661 0662 # Largest valid positive position (one beyond end of input 0663 handler.pos = 2 0664 self.assertEquals(u"\xff0".encode("ascii", "test.posreturn"), "<?>") 0665 0666 # Invalid positive position 0667 handler.pos = 3 0668 self.assertRaises(IndexError, u"\xff0".encode, "ascii", "test.posreturn") 0669 0670 handler.pos = 0 0671 0672 class D(dict): 0673 def __getitem__(self, key): 0674 raise ValueError 0675 for err in ("strict", "replace", "xmlcharrefreplace", "backslashreplace", "test.posreturn"): 0676 self.assertRaises(UnicodeError, codecs.charmap_encode, u"\xff", err, {0xff: None}) 0677 self.assertRaises(ValueError, codecs.charmap_encode, u"\xff", err, D()) 0678 self.assertRaises(TypeError, codecs.charmap_encode, u"\xff", err, {0xff: 300}) 0679 0680 def test_translatehelper(self): 0681 # enhance coverage of: 0682 # Objects/unicodeobject.c::unicode_encode_call_errorhandler() 0683 # and callers 0684 # (Unfortunately the errors argument is not directly accessible 0685 # from Python, so we can't test that much) 0686 class D(dict): 0687 def __getitem__(self, key): 0688 raise ValueError 0689 self.assertRaises(ValueError, u"\xff".translate, D()) 0690 self.assertRaises(TypeError, u"\xff".translate, {0xff: sys.maxunicode+1}) 0691 self.assertRaises(TypeError, u"\xff".translate, {0xff: ()}) 0692 0693 def test_bug828737(self): 0694 charmap = { 0695 ord("&"): u"&", 0696 ord("<"): u"<", 0697 ord(">"): u">", 0698 ord('"'): u""", 0699 } 0700 0701 for n in (1, 10, 100, 1000): 0702 text = u'abc<def>ghi'*n 0703 text.translate(charmap) 0704 0705 def test_main(): 0706 test.test_support.run_unittest(CodecCallbackTest) 0707 0708 if __name__ == "__main__": 0709 test_main() 0710
Generated by PyXR 0.9.4