PyXR

c:\python24\lib \ test \ test_codeccallbacks.py



0001 import test.test_support, unittest
0002 import sys, codecs, htmlentitydefs, unicodedata
0003 
0004 class PosReturn:
0005     # this can be used for configurable callbacks
0006 
0007     def __init__(self):
0008         self.pos = 0
0009 
0010     def handle(self, exc):
0011         oldpos = self.pos
0012         realpos = oldpos
0013         if realpos<0:
0014             realpos = len(exc.object) + realpos
0015         # if we don't advance this time, terminate on the next call
0016         # otherwise we'd get an endless loop
0017         if realpos <= exc.start:
0018             self.pos = len(exc.object)
0019         return (u"<?>", oldpos)
0020 
0021 class CodecCallbackTest(unittest.TestCase):
0022 
0023     def test_xmlcharrefreplace(self):
0024         # replace unencodable characters which numeric character entities.
0025         # For ascii, latin-1 and charmaps this is completely implemented
0026         # in C and should be reasonably fast.
0027         s = u"\u30b9\u30d1\u30e2 \xe4nd eggs"
0028         self.assertEqual(
0029             s.encode("ascii", "xmlcharrefreplace"),
0030             "&#12473;&#12497;&#12514; &#228;nd eggs"
0031         )
0032         self.assertEqual(
0033             s.encode("latin-1", "xmlcharrefreplace"),
0034             "&#12473;&#12497;&#12514; \xe4nd eggs"
0035         )
0036 
0037     def test_xmlcharnamereplace(self):
0038         # This time use a named character entity for unencodable
0039         # characters, if one is available.
0040 
0041         def xmlcharnamereplace(exc):
0042             if not isinstance(exc, UnicodeEncodeError):
0043                 raise TypeError("don't know how to handle %r" % exc)
0044             l = []
0045             for c in exc.object[exc.start:exc.end]:
0046                 try:
0047                     l.append(u"&%s;" % htmlentitydefs.codepoint2name[ord(c)])
0048                 except KeyError:
0049                     l.append(u"&#%d;" % ord(c))
0050             return (u"".join(l), exc.end)
0051 
0052         codecs.register_error(
0053             "test.xmlcharnamereplace", xmlcharnamereplace)
0054 
0055         sin = u"\xab\u211c\xbb = \u2329\u1234\u20ac\u232a"
0056         sout = "&laquo;&real;&raquo; = &lang;&#4660;&euro;&rang;"
0057         self.assertEqual(sin.encode("ascii", "test.xmlcharnamereplace"), sout)
0058         sout = "\xab&real;\xbb = &lang;&#4660;&euro;&rang;"
0059         self.assertEqual(sin.encode("latin-1", "test.xmlcharnamereplace"), sout)
0060         sout = "\xab&real;\xbb = &lang;&#4660;\xa4&rang;"
0061         self.assertEqual(sin.encode("iso-8859-15", "test.xmlcharnamereplace"), sout)
0062 
0063     def test_uninamereplace(self):
0064         # We're using the names from the unicode database this time,
0065         # and we're doing "syntax highlighting" here, i.e. we include
0066         # the replaced text in ANSI escape sequences. For this it is
0067         # useful that the error handler is not called for every single
0068         # unencodable character, but for a complete sequence of
0069         # unencodable characters, otherwise we would output many
0070         # unneccessary escape sequences.
0071 
0072         def uninamereplace(exc):
0073             if not isinstance(exc, UnicodeEncodeError):
0074                 raise TypeError("don't know how to handle %r" % exc)
0075             l = []
0076             for c in exc.object[exc.start:exc.end]:
0077                 l.append(unicodedata.name(c, u"0x%x" % ord(c)))
0078             return (u"\033[1m%s\033[0m" % u", ".join(l), exc.end)
0079 
0080         codecs.register_error(
0081             "test.uninamereplace", uninamereplace)
0082 
0083         sin = u"\xac\u1234\u20ac\u8000"
0084         sout = "\033[1mNOT SIGN, ETHIOPIC SYLLABLE SEE, EURO SIGN, CJK UNIFIED IDEOGRAPH-8000\033[0m"
0085         self.assertEqual(sin.encode("ascii", "test.uninamereplace"), sout)
0086 
0087         sout = "\xac\033[1mETHIOPIC SYLLABLE SEE, EURO SIGN, CJK UNIFIED IDEOGRAPH-8000\033[0m"
0088         self.assertEqual(sin.encode("latin-1", "test.uninamereplace"), sout)
0089 
0090         sout = "\xac\033[1mETHIOPIC SYLLABLE SEE\033[0m\xa4\033[1mCJK UNIFIED IDEOGRAPH-8000\033[0m"
0091         self.assertEqual(sin.encode("iso-8859-15", "test.uninamereplace"), sout)
0092 
0093     def test_backslashescape(self):
0094         # Does the same as the "unicode-escape" encoding, but with different
0095         # base encodings.
0096         sin = u"a\xac\u1234\u20ac\u8000"
0097         if sys.maxunicode > 0xffff:
0098             sin += unichr(sys.maxunicode)
0099         sout = "a\\xac\\u1234\\u20ac\\u8000"
0100         if sys.maxunicode > 0xffff:
0101             sout += "\\U%08x" % sys.maxunicode
0102         self.assertEqual(sin.encode("ascii", "backslashreplace"), sout)
0103 
0104         sout = "a\xac\\u1234\\u20ac\\u8000"
0105         if sys.maxunicode > 0xffff:
0106             sout += "\\U%08x" % sys.maxunicode
0107         self.assertEqual(sin.encode("latin-1", "backslashreplace"), sout)
0108 
0109         sout = "a\xac\\u1234\xa4\\u8000"
0110         if sys.maxunicode > 0xffff:
0111             sout += "\\U%08x" % sys.maxunicode
0112         self.assertEqual(sin.encode("iso-8859-15", "backslashreplace"), sout)
0113 
0114     def test_relaxedutf8(self):
0115         # This is the test for a decoding callback handler,
0116         # that relaxes the UTF-8 minimal encoding restriction.
0117         # A null byte that is encoded as "\xc0\x80" will be
0118         # decoded as a null byte. All other illegal sequences
0119         # will be handled strictly.
0120         def relaxedutf8(exc):
0121             if not isinstance(exc, UnicodeDecodeError):
0122                 raise TypeError("don't know how to handle %r" % exc)
0123             if exc.object[exc.start:exc.end].startswith("\xc0\x80"):
0124                 return (u"\x00", exc.start+2) # retry after two bytes
0125             else:
0126                 raise exc
0127 
0128         codecs.register_error(
0129             "test.relaxedutf8", relaxedutf8)
0130 
0131         sin = "a\x00b\xc0\x80c\xc3\xbc\xc0\x80\xc0\x80"
0132         sout = u"a\x00b\x00c\xfc\x00\x00"
0133         self.assertEqual(sin.decode("utf-8", "test.relaxedutf8"), sout)
0134         sin = "\xc0\x80\xc0\x81"
0135         self.assertRaises(UnicodeError, sin.decode, "utf-8", "test.relaxedutf8")
0136 
0137     def test_charmapencode(self):
0138         # For charmap encodings the replacement string will be
0139         # mapped through the encoding again. This means, that
0140         # to be able to use e.g. the "replace" handler, the
0141         # charmap has to have a mapping for "?".
0142         charmap = dict([ (ord(c), 2*c.upper()) for c in "abcdefgh"])
0143         sin = u"abc"
0144         sout = "AABBCC"
0145         self.assertEquals(codecs.charmap_encode(sin, "strict", charmap)[0], sout)
0146 
0147         sin = u"abcA"
0148         self.assertRaises(UnicodeError, codecs.charmap_encode, sin, "strict", charmap)
0149 
0150         charmap[ord("?")] = "XYZ"
0151         sin = u"abcDEF"
0152         sout = "AABBCCXYZXYZXYZ"
0153         self.assertEquals(codecs.charmap_encode(sin, "replace", charmap)[0], sout)
0154 
0155         charmap[ord("?")] = u"XYZ"
0156         self.assertRaises(TypeError, codecs.charmap_encode, sin, "replace", charmap)
0157 
0158         charmap[ord("?")] = u"XYZ"
0159         self.assertRaises(TypeError, codecs.charmap_encode, sin, "replace", charmap)
0160 
0161     def test_callbacks(self):
0162         def handler1(exc):
0163             if not isinstance(exc, UnicodeEncodeError) \
0164                and not isinstance(exc, UnicodeDecodeError):
0165                 raise TypeError("don't know how to handle %r" % exc)
0166             l = [u"<%d>" % ord(exc.object[pos]) for pos in xrange(exc.start, exc.end)]
0167             return (u"[%s]" % u"".join(l), exc.end)
0168 
0169         codecs.register_error("test.handler1", handler1)
0170 
0171         def handler2(exc):
0172             if not isinstance(exc, UnicodeDecodeError):
0173                 raise TypeError("don't know how to handle %r" % exc)
0174             l = [u"<%d>" % ord(exc.object[pos]) for pos in xrange(exc.start, exc.end)]
0175             return (u"[%s]" % u"".join(l), exc.end+1) # skip one character
0176 
0177         codecs.register_error("test.handler2", handler2)
0178 
0179         s = "\x00\x81\x7f\x80\xff"
0180 
0181         self.assertEqual(
0182             s.decode("ascii", "test.handler1"),
0183             u"\x00[<129>]\x7f[<128>][<255>]"
0184         )
0185         self.assertEqual(
0186             s.decode("ascii", "test.handler2"),
0187             u"\x00[<129>][<128>]"
0188         )
0189 
0190         self.assertEqual(
0191             "\\u3042\u3xxx".decode("unicode-escape", "test.handler1"),
0192             u"\u3042[<92><117><51><120>]xx"
0193         )
0194 
0195         self.assertEqual(
0196             "\\u3042\u3xx".decode("unicode-escape", "test.handler1"),
0197             u"\u3042[<92><117><51><120><120>]"
0198         )
0199 
0200         self.assertEqual(
0201             codecs.charmap_decode("abc", "test.handler1", {ord("a"): u"z"})[0],
0202             u"z[<98>][<99>]"
0203         )
0204 
0205         self.assertEqual(
0206             u"g\xfc\xdfrk".encode("ascii", "test.handler1"),
0207             u"g[<252><223>]rk"
0208         )
0209 
0210         self.assertEqual(
0211             u"g\xfc\xdf".encode("ascii", "test.handler1"),
0212             u"g[<252><223>]"
0213         )
0214 
0215     def test_longstrings(self):
0216         # test long strings to check for memory overflow problems
0217         errors = [ "strict", "ignore", "replace", "xmlcharrefreplace", "backslashreplace"]
0218         # register the handlers under different names,
0219         # to prevent the codec from recognizing the name
0220         for err in errors:
0221             codecs.register_error("test." + err, codecs.lookup_error(err))
0222         l = 1000
0223         errors += [ "test." + err for err in errors ]
0224         for uni in [ s*l for s in (u"x", u"\u3042", u"a\xe4") ]:
0225             for enc in ("ascii", "latin-1", "iso-8859-1", "iso-8859-15", "utf-8", "utf-7", "utf-16"):
0226                 for err in errors:
0227                     try:
0228                         uni.encode(enc, err)
0229                     except UnicodeError:
0230                         pass
0231 
0232     def check_exceptionobjectargs(self, exctype, args, msg):
0233         # Test UnicodeError subclasses: construction, attribute assignment and __str__ conversion
0234         # check with one missing argument
0235         self.assertRaises(TypeError, exctype, *args[:-1])
0236         # check with one argument too much
0237         self.assertRaises(TypeError, exctype, *(args + ["too much"]))
0238         # check with one argument of the wrong type
0239         wrongargs = [ "spam", u"eggs", 42, 1.0, None ]
0240         for i in xrange(len(args)):
0241             for wrongarg in wrongargs:
0242                 if type(wrongarg) is type(args[i]):
0243                     continue
0244                 # build argument array
0245                 callargs = []
0246                 for j in xrange(len(args)):
0247                     if i==j:
0248                         callargs.append(wrongarg)
0249                     else:
0250                         callargs.append(args[i])
0251                 self.assertRaises(TypeError, exctype, *callargs)
0252 
0253         # check with the correct number and type of arguments
0254         exc = exctype(*args)
0255         self.assertEquals(str(exc), msg)
0256 
0257     def test_unicodeencodeerror(self):
0258         self.check_exceptionobjectargs(
0259             UnicodeEncodeError,
0260             ["ascii", u"g\xfcrk", 1, 2, "ouch"],
0261             "'ascii' codec can't encode character u'\\xfc' in position 1: ouch"
0262         )
0263         self.check_exceptionobjectargs(
0264             UnicodeEncodeError,
0265             ["ascii", u"g\xfcrk", 1, 4, "ouch"],
0266             "'ascii' codec can't encode characters in position 1-3: ouch"
0267         )
0268         self.check_exceptionobjectargs(
0269             UnicodeEncodeError,
0270             ["ascii", u"\xfcx", 0, 1, "ouch"],
0271             "'ascii' codec can't encode character u'\\xfc' in position 0: ouch"
0272         )
0273         self.check_exceptionobjectargs(
0274             UnicodeEncodeError,
0275             ["ascii", u"\u0100x", 0, 1, "ouch"],
0276             "'ascii' codec can't encode character u'\\u0100' in position 0: ouch"
0277         )
0278         self.check_exceptionobjectargs(
0279             UnicodeEncodeError,
0280             ["ascii", u"\uffffx", 0, 1, "ouch"],
0281             "'ascii' codec can't encode character u'\\uffff' in position 0: ouch"
0282         )
0283         if sys.maxunicode > 0xffff:
0284             self.check_exceptionobjectargs(
0285                 UnicodeEncodeError,
0286                 ["ascii", u"\U00010000x", 0, 1, "ouch"],
0287                 "'ascii' codec can't encode character u'\\U00010000' in position 0: ouch"
0288             )
0289 
0290     def test_unicodedecodeerror(self):
0291         self.check_exceptionobjectargs(
0292             UnicodeDecodeError,
0293             ["ascii", "g\xfcrk", 1, 2, "ouch"],
0294             "'ascii' codec can't decode byte 0xfc in position 1: ouch"
0295         )
0296         self.check_exceptionobjectargs(
0297             UnicodeDecodeError,
0298             ["ascii", "g\xfcrk", 1, 3, "ouch"],
0299             "'ascii' codec can't decode bytes in position 1-2: ouch"
0300         )
0301 
0302     def test_unicodetranslateerror(self):
0303         self.check_exceptionobjectargs(
0304             UnicodeTranslateError,
0305             [u"g\xfcrk", 1, 2, "ouch"],
0306             "can't translate character u'\\xfc' in position 1: ouch"
0307         )
0308         self.check_exceptionobjectargs(
0309             UnicodeTranslateError,
0310             [u"g\u0100rk", 1, 2, "ouch"],
0311             "can't translate character u'\\u0100' in position 1: ouch"
0312         )
0313         self.check_exceptionobjectargs(
0314             UnicodeTranslateError,
0315             [u"g\uffffrk", 1, 2, "ouch"],
0316             "can't translate character u'\\uffff' in position 1: ouch"
0317         )
0318         if sys.maxunicode > 0xffff:
0319             self.check_exceptionobjectargs(
0320                 UnicodeTranslateError,
0321                 [u"g\U00010000rk", 1, 2, "ouch"],
0322                 "can't translate character u'\\U00010000' in position 1: ouch"
0323             )
0324         self.check_exceptionobjectargs(
0325             UnicodeTranslateError,
0326             [u"g\xfcrk", 1, 3, "ouch"],
0327             "can't translate characters in position 1-2: ouch"
0328         )
0329 
0330     def test_badandgoodstrictexceptions(self):
0331         # "strict" complains about a non-exception passed in
0332         self.assertRaises(
0333             TypeError,
0334             codecs.strict_errors,
0335             42
0336         )
0337         # "strict" complains about the wrong exception type
0338         self.assertRaises(
0339             Exception,
0340             codecs.strict_errors,
0341             Exception("ouch")
0342         )
0343 
0344         # If the correct exception is passed in, "strict" raises it
0345         self.assertRaises(
0346             UnicodeEncodeError,
0347             codecs.strict_errors,
0348             UnicodeEncodeError("ascii", u"\u3042", 0, 1, "ouch")
0349         )
0350 
0351     def test_badandgoodignoreexceptions(self):
0352         # "ignore" complains about a non-exception passed in
0353         self.assertRaises(
0354            TypeError,
0355            codecs.ignore_errors,
0356            42
0357         )
0358         # "ignore" complains about the wrong exception type
0359         self.assertRaises(
0360            TypeError,
0361            codecs.ignore_errors,
0362            UnicodeError("ouch")
0363         )
0364         # If the correct exception is passed in, "ignore" returns an empty replacement
0365         self.assertEquals(
0366             codecs.ignore_errors(UnicodeEncodeError("ascii", u"\u3042", 0, 1, "ouch")),
0367             (u"", 1)
0368         )
0369         self.assertEquals(
0370             codecs.ignore_errors(UnicodeDecodeError("ascii", "\xff", 0, 1, "ouch")),
0371             (u"", 1)
0372         )
0373         self.assertEquals(
0374             codecs.ignore_errors(UnicodeTranslateError(u"\u3042", 0, 1, "ouch")),
0375             (u"", 1)
0376         )
0377 
0378     def test_badandgoodreplaceexceptions(self):
0379         # "replace" complains about a non-exception passed in
0380         self.assertRaises(
0381            TypeError,
0382            codecs.replace_errors,
0383            42
0384         )
0385         # "replace" complains about the wrong exception type
0386         self.assertRaises(
0387            TypeError,
0388            codecs.replace_errors,
0389            UnicodeError("ouch")
0390         )
0391         # With the correct exception, "ignore" returns an empty replacement
0392         self.assertEquals(
0393             codecs.replace_errors(UnicodeEncodeError("ascii", u"\u3042", 0, 1, "ouch")),
0394             (u"?", 1)
0395         )
0396         self.assertEquals(
0397             codecs.replace_errors(UnicodeDecodeError("ascii", "\xff", 0, 1, "ouch")),
0398             (u"\ufffd", 1)
0399         )
0400         self.assertEquals(
0401             codecs.replace_errors(UnicodeTranslateError(u"\u3042", 0, 1, "ouch")),
0402             (u"\ufffd", 1)
0403         )
0404 
0405     def test_badandgoodxmlcharrefreplaceexceptions(self):
0406         # "xmlcharrefreplace" complains about a non-exception passed in
0407         self.assertRaises(
0408            TypeError,
0409            codecs.xmlcharrefreplace_errors,
0410            42
0411         )
0412         # "xmlcharrefreplace" complains about the wrong exception types
0413         self.assertRaises(
0414            TypeError,
0415            codecs.xmlcharrefreplace_errors,
0416            UnicodeError("ouch")
0417         )
0418         # "xmlcharrefreplace" can only be used for encoding
0419         self.assertRaises(
0420             TypeError,
0421             codecs.xmlcharrefreplace_errors,
0422             UnicodeDecodeError("ascii", "\xff", 0, 1, "ouch")
0423         )
0424         self.assertRaises(
0425             TypeError,
0426             codecs.xmlcharrefreplace_errors,
0427             UnicodeTranslateError(u"\u3042", 0, 1, "ouch")
0428         )
0429         # Use the correct exception
0430         self.assertEquals(
0431             codecs.xmlcharrefreplace_errors(UnicodeEncodeError("ascii", u"\u3042", 0, 1, "ouch")),
0432             (u"&#%d;" % 0x3042, 1)
0433         )
0434 
0435     def test_badandgoodbackslashreplaceexceptions(self):
0436         # "backslashreplace" complains about a non-exception passed in
0437         self.assertRaises(
0438            TypeError,
0439            codecs.backslashreplace_errors,
0440            42
0441         )
0442         # "backslashreplace" complains about the wrong exception types
0443         self.assertRaises(
0444            TypeError,
0445            codecs.backslashreplace_errors,
0446            UnicodeError("ouch")
0447         )
0448         # "backslashreplace" can only be used for encoding
0449         self.assertRaises(
0450             TypeError,
0451             codecs.backslashreplace_errors,
0452             UnicodeDecodeError("ascii", "\xff", 0, 1, "ouch")
0453         )
0454         self.assertRaises(
0455             TypeError,
0456             codecs.backslashreplace_errors,
0457             UnicodeTranslateError(u"\u3042", 0, 1, "ouch")
0458         )
0459         # Use the correct exception
0460         self.assertEquals(
0461             codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\u3042", 0, 1, "ouch")),
0462             (u"\\u3042", 1)
0463         )
0464         self.assertEquals(
0465             codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\x00", 0, 1, "ouch")),
0466             (u"\\x00", 1)
0467         )
0468         self.assertEquals(
0469             codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\xff", 0, 1, "ouch")),
0470             (u"\\xff", 1)
0471         )
0472         self.assertEquals(
0473             codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\u0100", 0, 1, "ouch")),
0474             (u"\\u0100", 1)
0475         )
0476         self.assertEquals(
0477             codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\uffff", 0, 1, "ouch")),
0478             (u"\\uffff", 1)
0479         )
0480         if sys.maxunicode>0xffff:
0481             self.assertEquals(
0482                 codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\U00010000", 0, 1, "ouch")),
0483                 (u"\\U00010000", 1)
0484             )
0485             self.assertEquals(
0486                 codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\U0010ffff", 0, 1, "ouch")),
0487                 (u"\\U0010ffff", 1)
0488             )
0489 
0490     def test_badhandlerresults(self):
0491         results = ( 42, u"foo", (1,2,3), (u"foo", 1, 3), (u"foo", None), (u"foo",), ("foo", 1, 3), ("foo", None), ("foo",) )
0492         encs = ("ascii", "latin-1", "iso-8859-1", "iso-8859-15")
0493 
0494         for res in results:
0495             codecs.register_error("test.badhandler", lambda: res)
0496             for enc in encs:
0497                 self.assertRaises(
0498                     TypeError,
0499                     u"\u3042".encode,
0500                     enc,
0501                     "test.badhandler"
0502                 )
0503             for (enc, bytes) in (
0504                 ("ascii", "\xff"),
0505                 ("utf-8", "\xff"),
0506                 ("utf-7", "+x-")
0507             ):
0508                 self.assertRaises(
0509                     TypeError,
0510                     bytes.decode,
0511                     enc,
0512                     "test.badhandler"
0513                 )
0514 
0515     def test_lookup(self):
0516         self.assertEquals(codecs.strict_errors, codecs.lookup_error("strict"))
0517         self.assertEquals(codecs.ignore_errors, codecs.lookup_error("ignore"))
0518         self.assertEquals(codecs.strict_errors, codecs.lookup_error("strict"))
0519         self.assertEquals(
0520             codecs.xmlcharrefreplace_errors,
0521             codecs.lookup_error("xmlcharrefreplace")
0522         )
0523         self.assertEquals(
0524             codecs.backslashreplace_errors,
0525             codecs.lookup_error("backslashreplace")
0526         )
0527 
0528     def test_unencodablereplacement(self):
0529         def unencrepl(exc):
0530             if isinstance(exc, UnicodeEncodeError):
0531                 return (u"\u4242", exc.end)
0532             else:
0533                 raise TypeError("don't know how to handle %r" % exc)
0534         codecs.register_error("test.unencreplhandler", unencrepl)
0535         for enc in ("ascii", "iso-8859-1", "iso-8859-15"):
0536             self.assertRaises(
0537                 UnicodeEncodeError,
0538                 u"\u4242".encode,
0539                 enc,
0540                 "test.unencreplhandler"
0541             )
0542 
0543     def test_badregistercall(self):
0544         # enhance coverage of:
0545         # Modules/_codecsmodule.c::register_error()
0546         # Python/codecs.c::PyCodec_RegisterError()
0547         self.assertRaises(TypeError, codecs.register_error, 42)
0548         self.assertRaises(TypeError, codecs.register_error, "test.dummy", 42)
0549 
0550     def test_unknownhandler(self):
0551         # enhance coverage of:
0552         # Modules/_codecsmodule.c::lookup_error()
0553         self.assertRaises(LookupError, codecs.lookup_error, "test.unknown")
0554 
0555     def test_xmlcharrefvalues(self):
0556         # enhance coverage of:
0557         # Python/codecs.c::PyCodec_XMLCharRefReplaceErrors()
0558         # and inline implementations
0559         v = (1, 5, 10, 50, 100, 500, 1000, 5000, 10000, 50000)
0560         if sys.maxunicode>=100000:
0561             v += (100000, 500000, 1000000)
0562         s = u"".join([unichr(x) for x in v])
0563         codecs.register_error("test.xmlcharrefreplace", codecs.xmlcharrefreplace_errors)
0564         for enc in ("ascii", "iso-8859-15"):
0565             for err in ("xmlcharrefreplace", "test.xmlcharrefreplace"):
0566                 s.encode(enc, err)
0567 
0568     def test_decodehelper(self):
0569         # enhance coverage of:
0570         # Objects/unicodeobject.c::unicode_decode_call_errorhandler()
0571         # and callers
0572         self.assertRaises(LookupError, "\xff".decode, "ascii", "test.unknown")
0573 
0574         def baddecodereturn1(exc):
0575             return 42
0576         codecs.register_error("test.baddecodereturn1", baddecodereturn1)
0577         self.assertRaises(TypeError, "\xff".decode, "ascii", "test.baddecodereturn1")
0578         self.assertRaises(TypeError, "\\".decode, "unicode-escape", "test.baddecodereturn1")
0579         self.assertRaises(TypeError, "\\x0".decode, "unicode-escape", "test.baddecodereturn1")
0580         self.assertRaises(TypeError, "\\x0y".decode, "unicode-escape", "test.baddecodereturn1")
0581         self.assertRaises(TypeError, "\\Uffffeeee".decode, "unicode-escape", "test.baddecodereturn1")
0582         self.assertRaises(TypeError, "\\uyyyy".decode, "raw-unicode-escape", "test.baddecodereturn1")
0583 
0584         def baddecodereturn2(exc):
0585             return (u"?", None)
0586         codecs.register_error("test.baddecodereturn2", baddecodereturn2)
0587         self.assertRaises(TypeError, "\xff".decode, "ascii", "test.baddecodereturn2")
0588 
0589         handler = PosReturn()
0590         codecs.register_error("test.posreturn", handler.handle)
0591 
0592         # Valid negative position
0593         handler.pos = -1
0594         self.assertEquals("\xff0".decode("ascii", "test.posreturn"), u"<?>0")
0595 
0596         # Valid negative position
0597         handler.pos = -2
0598         self.assertEquals("\xff0".decode("ascii", "test.posreturn"), u"<?><?>")
0599 
0600         # Negative position out of bounds
0601         handler.pos = -3
0602         self.assertRaises(IndexError, "\xff0".decode, "ascii", "test.posreturn")
0603 
0604         # Valid positive position
0605         handler.pos = 1
0606         self.assertEquals("\xff0".decode("ascii", "test.posreturn"), u"<?>0")
0607 
0608         # Largest valid positive position (one beyond end of input
0609         handler.pos = 2
0610         self.assertEquals("\xff0".decode("ascii", "test.posreturn"), u"<?>")
0611 
0612         # Invalid positive position
0613         handler.pos = 3
0614         self.assertRaises(IndexError, "\xff0".decode, "ascii", "test.posreturn")
0615 
0616         # Restart at the "0"
0617         handler.pos = 6
0618         self.assertEquals("\\uyyyy0".decode("raw-unicode-escape", "test.posreturn"), u"<?>0")
0619 
0620         class D(dict):
0621             def __getitem__(self, key):
0622                 raise ValueError
0623         self.assertRaises(UnicodeError, codecs.charmap_decode, "\xff", "strict", {0xff: None})
0624         self.assertRaises(ValueError, codecs.charmap_decode, "\xff", "strict", D())
0625         self.assertRaises(TypeError, codecs.charmap_decode, "\xff", "strict", {0xff: sys.maxunicode+1})
0626 
0627     def test_encodehelper(self):
0628         # enhance coverage of:
0629         # Objects/unicodeobject.c::unicode_encode_call_errorhandler()
0630         # and callers
0631         self.assertRaises(LookupError, u"\xff".encode, "ascii", "test.unknown")
0632 
0633         def badencodereturn1(exc):
0634             return 42
0635         codecs.register_error("test.badencodereturn1", badencodereturn1)
0636         self.assertRaises(TypeError, u"\xff".encode, "ascii", "test.badencodereturn1")
0637 
0638         def badencodereturn2(exc):
0639             return (u"?", None)
0640         codecs.register_error("test.badencodereturn2", badencodereturn2)
0641         self.assertRaises(TypeError, u"\xff".encode, "ascii", "test.badencodereturn2")
0642 
0643         handler = PosReturn()
0644         codecs.register_error("test.posreturn", handler.handle)
0645 
0646         # Valid negative position
0647         handler.pos = -1
0648         self.assertEquals(u"\xff0".encode("ascii", "test.posreturn"), "<?>0")
0649 
0650         # Valid negative position
0651         handler.pos = -2
0652         self.assertEquals(u"\xff0".encode("ascii", "test.posreturn"), "<?><?>")
0653 
0654         # Negative position out of bounds
0655         handler.pos = -3
0656         self.assertRaises(IndexError, u"\xff0".encode, "ascii", "test.posreturn")
0657 
0658         # Valid positive position
0659         handler.pos = 1
0660         self.assertEquals(u"\xff0".encode("ascii", "test.posreturn"), "<?>0")
0661 
0662         # Largest valid positive position (one beyond end of input
0663         handler.pos = 2
0664         self.assertEquals(u"\xff0".encode("ascii", "test.posreturn"), "<?>")
0665 
0666         # Invalid positive position
0667         handler.pos = 3
0668         self.assertRaises(IndexError, u"\xff0".encode, "ascii", "test.posreturn")
0669 
0670         handler.pos = 0
0671 
0672         class D(dict):
0673             def __getitem__(self, key):
0674                 raise ValueError
0675         for err in ("strict", "replace", "xmlcharrefreplace", "backslashreplace", "test.posreturn"):
0676             self.assertRaises(UnicodeError, codecs.charmap_encode, u"\xff", err, {0xff: None})
0677             self.assertRaises(ValueError, codecs.charmap_encode, u"\xff", err, D())
0678             self.assertRaises(TypeError, codecs.charmap_encode, u"\xff", err, {0xff: 300})
0679 
0680     def test_translatehelper(self):
0681         # enhance coverage of:
0682         # Objects/unicodeobject.c::unicode_encode_call_errorhandler()
0683         # and callers
0684         # (Unfortunately the errors argument is not directly accessible
0685         # from Python, so we can't test that much)
0686         class D(dict):
0687             def __getitem__(self, key):
0688                 raise ValueError
0689         self.assertRaises(ValueError, u"\xff".translate, D())
0690         self.assertRaises(TypeError, u"\xff".translate, {0xff: sys.maxunicode+1})
0691         self.assertRaises(TypeError, u"\xff".translate, {0xff: ()})
0692 
0693     def test_bug828737(self):
0694         charmap = {
0695             ord("&"): u"&amp;",
0696             ord("<"): u"&lt;",
0697             ord(">"): u"&gt;",
0698             ord('"'): u"&quot;",
0699         }
0700 
0701         for n in (1, 10, 100, 1000):
0702             text = u'abc<def>ghi'*n
0703             text.translate(charmap)
0704 
0705 def test_main():
0706     test.test_support.run_unittest(CodecCallbackTest)
0707 
0708 if __name__ == "__main__":
0709     test_main()
0710 

Generated by PyXR 0.9.4
SourceForge.net Logo