PyXR

c:\python24\lib \ test \ test_ucn.py



0001 """ Test script for the Unicode implementation.
0002 
0003 Written by Bill Tutt.
0004 Modified for Python 2.0 by Fredrik Lundh (fredrik@pythonware.com)
0005 
0006 (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
0007 
0008 """#"
0009 
0010 import unittest
0011 
0012 from test import test_support
0013 
0014 class UnicodeNamesTest(unittest.TestCase):
0015 
0016     def checkletter(self, name, code):
0017         # Helper that put all \N escapes inside eval'd raw strings,
0018         # to make sure this script runs even if the compiler
0019         # chokes on \N escapes
0020         res = eval(ur'u"\N{%s}"' % name)
0021         self.assertEqual(res, code)
0022         return res
0023 
0024     def test_general(self):
0025         # General and case insensitivity test:
0026         chars = [
0027             "LATIN CAPITAL LETTER T",
0028             "LATIN SMALL LETTER H",
0029             "LATIN SMALL LETTER E",
0030             "SPACE",
0031             "LATIN SMALL LETTER R",
0032             "LATIN CAPITAL LETTER E",
0033             "LATIN SMALL LETTER D",
0034             "SPACE",
0035             "LATIN SMALL LETTER f",
0036             "LATIN CAPITAL LeTtEr o",
0037             "LATIN SMaLl LETTER x",
0038             "SPACE",
0039             "LATIN SMALL LETTER A",
0040             "LATIN SMALL LETTER T",
0041             "LATIN SMALL LETTER E",
0042             "SPACE",
0043             "LATIN SMALL LETTER T",
0044             "LATIN SMALL LETTER H",
0045             "LATIN SMALL LETTER E",
0046             "SpAcE",
0047             "LATIN SMALL LETTER S",
0048             "LATIN SMALL LETTER H",
0049             "LATIN small LETTER e",
0050             "LATIN small LETTER e",
0051             "LATIN SMALL LETTER P",
0052             "FULL STOP"
0053         ]
0054         string = u"The rEd fOx ate the sheep."
0055 
0056         self.assertEqual(
0057             u"".join([self.checkletter(*args) for args in zip(chars, string)]),
0058             string
0059         )
0060 
0061     def test_ascii_letters(self):
0062         import unicodedata
0063 
0064         for char in "".join(map(chr, xrange(ord("a"), ord("z")))):
0065             name = "LATIN SMALL LETTER %s" % char.upper()
0066             code = unicodedata.lookup(name)
0067             self.assertEqual(unicodedata.name(code), name)
0068 
0069     def test_hangul_syllables(self):
0070         self.checkletter("HANGUL SYLLABLE GA", u"\uac00")
0071         self.checkletter("HANGUL SYLLABLE GGWEOSS", u"\uafe8")
0072         self.checkletter("HANGUL SYLLABLE DOLS", u"\ub3d0")
0073         self.checkletter("HANGUL SYLLABLE RYAN", u"\ub7b8")
0074         self.checkletter("HANGUL SYLLABLE MWIK", u"\ubba0")
0075         self.checkletter("HANGUL SYLLABLE BBWAEM", u"\ubf88")
0076         self.checkletter("HANGUL SYLLABLE SSEOL", u"\uc370")
0077         self.checkletter("HANGUL SYLLABLE YI", u"\uc758")
0078         self.checkletter("HANGUL SYLLABLE JJYOSS", u"\ucb40")
0079         self.checkletter("HANGUL SYLLABLE KYEOLS", u"\ucf28")
0080         self.checkletter("HANGUL SYLLABLE PAN", u"\ud310")
0081         self.checkletter("HANGUL SYLLABLE HWEOK", u"\ud6f8")
0082         self.checkletter("HANGUL SYLLABLE HIH", u"\ud7a3")
0083 
0084         import unicodedata
0085         self.assertRaises(ValueError, unicodedata.name, u"\ud7a4")
0086 
0087     def test_cjk_unified_ideographs(self):
0088         self.checkletter("CJK UNIFIED IDEOGRAPH-3400", u"\u3400")
0089         self.checkletter("CJK UNIFIED IDEOGRAPH-4DB5", u"\u4db5")
0090         self.checkletter("CJK UNIFIED IDEOGRAPH-4E00", u"\u4e00")
0091         self.checkletter("CJK UNIFIED IDEOGRAPH-9FA5", u"\u9fa5")
0092         self.checkletter("CJK UNIFIED IDEOGRAPH-20000", u"\U00020000")
0093         self.checkletter("CJK UNIFIED IDEOGRAPH-2A6D6", u"\U0002a6d6")
0094 
0095     def test_bmp_characters(self):
0096         import unicodedata
0097         count = 0
0098         for code in xrange(0x10000):
0099             char = unichr(code)
0100             name = unicodedata.name(char, None)
0101             if name is not None:
0102                 self.assertEqual(unicodedata.lookup(name), char)
0103                 count += 1
0104 
0105     def test_misc_symbols(self):
0106         self.checkletter("PILCROW SIGN", u"\u00b6")
0107         self.checkletter("REPLACEMENT CHARACTER", u"\uFFFD")
0108         self.checkletter("HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK", u"\uFF9F")
0109         self.checkletter("FULLWIDTH LATIN SMALL LETTER A", u"\uFF41")
0110 
0111     def test_errors(self):
0112         import unicodedata
0113         self.assertRaises(TypeError, unicodedata.name)
0114         self.assertRaises(TypeError, unicodedata.name, u'xx')
0115         self.assertRaises(TypeError, unicodedata.lookup)
0116         self.assertRaises(KeyError, unicodedata.lookup, u'unknown')
0117 
0118     def test_strict_eror_handling(self):
0119         # bogus character name
0120         self.assertRaises(
0121             UnicodeError,
0122             unicode, "\\N{blah}", 'unicode-escape', 'strict'
0123         )
0124         # long bogus character name
0125         self.assertRaises(
0126             UnicodeError,
0127             unicode, "\\N{%s}" % ("x" * 100000), 'unicode-escape', 'strict'
0128         )
0129         # missing closing brace
0130         self.assertRaises(
0131             UnicodeError,
0132             unicode, "\\N{SPACE", 'unicode-escape', 'strict'
0133         )
0134         # missing opening brace
0135         self.assertRaises(
0136             UnicodeError,
0137             unicode, "\\NSPACE", 'unicode-escape', 'strict'
0138         )
0139 
0140 def test_main():
0141     test_support.run_unittest(UnicodeNamesTest)
0142 
0143 if __name__ == "__main__":
0144     test_main()
0145 

Generated by PyXR 0.9.4
SourceForge.net Logo