0001 """ Test script for the Unicode implementation. 0002 0003 Written by Bill Tutt. 0004 Modified for Python 2.0 by Fredrik Lundh (fredrik@pythonware.com) 0005 0006 (c) Copyright CNRI, All Rights Reserved. NO WARRANTY. 0007 0008 """#" 0009 0010 import unittest 0011 0012 from test import test_support 0013 0014 class UnicodeNamesTest(unittest.TestCase): 0015 0016 def checkletter(self, name, code): 0017 # Helper that put all \N escapes inside eval'd raw strings, 0018 # to make sure this script runs even if the compiler 0019 # chokes on \N escapes 0020 res = eval(ur'u"\N{%s}"' % name) 0021 self.assertEqual(res, code) 0022 return res 0023 0024 def test_general(self): 0025 # General and case insensitivity test: 0026 chars = [ 0027 "LATIN CAPITAL LETTER T", 0028 "LATIN SMALL LETTER H", 0029 "LATIN SMALL LETTER E", 0030 "SPACE", 0031 "LATIN SMALL LETTER R", 0032 "LATIN CAPITAL LETTER E", 0033 "LATIN SMALL LETTER D", 0034 "SPACE", 0035 "LATIN SMALL LETTER f", 0036 "LATIN CAPITAL LeTtEr o", 0037 "LATIN SMaLl LETTER x", 0038 "SPACE", 0039 "LATIN SMALL LETTER A", 0040 "LATIN SMALL LETTER T", 0041 "LATIN SMALL LETTER E", 0042 "SPACE", 0043 "LATIN SMALL LETTER T", 0044 "LATIN SMALL LETTER H", 0045 "LATIN SMALL LETTER E", 0046 "SpAcE", 0047 "LATIN SMALL LETTER S", 0048 "LATIN SMALL LETTER H", 0049 "LATIN small LETTER e", 0050 "LATIN small LETTER e", 0051 "LATIN SMALL LETTER P", 0052 "FULL STOP" 0053 ] 0054 string = u"The rEd fOx ate the sheep." 0055 0056 self.assertEqual( 0057 u"".join([self.checkletter(*args) for args in zip(chars, string)]), 0058 string 0059 ) 0060 0061 def test_ascii_letters(self): 0062 import unicodedata 0063 0064 for char in "".join(map(chr, xrange(ord("a"), ord("z")))): 0065 name = "LATIN SMALL LETTER %s" % char.upper() 0066 code = unicodedata.lookup(name) 0067 self.assertEqual(unicodedata.name(code), name) 0068 0069 def test_hangul_syllables(self): 0070 self.checkletter("HANGUL SYLLABLE GA", u"\uac00") 0071 self.checkletter("HANGUL SYLLABLE GGWEOSS", u"\uafe8") 0072 self.checkletter("HANGUL SYLLABLE DOLS", u"\ub3d0") 0073 self.checkletter("HANGUL SYLLABLE RYAN", u"\ub7b8") 0074 self.checkletter("HANGUL SYLLABLE MWIK", u"\ubba0") 0075 self.checkletter("HANGUL SYLLABLE BBWAEM", u"\ubf88") 0076 self.checkletter("HANGUL SYLLABLE SSEOL", u"\uc370") 0077 self.checkletter("HANGUL SYLLABLE YI", u"\uc758") 0078 self.checkletter("HANGUL SYLLABLE JJYOSS", u"\ucb40") 0079 self.checkletter("HANGUL SYLLABLE KYEOLS", u"\ucf28") 0080 self.checkletter("HANGUL SYLLABLE PAN", u"\ud310") 0081 self.checkletter("HANGUL SYLLABLE HWEOK", u"\ud6f8") 0082 self.checkletter("HANGUL SYLLABLE HIH", u"\ud7a3") 0083 0084 import unicodedata 0085 self.assertRaises(ValueError, unicodedata.name, u"\ud7a4") 0086 0087 def test_cjk_unified_ideographs(self): 0088 self.checkletter("CJK UNIFIED IDEOGRAPH-3400", u"\u3400") 0089 self.checkletter("CJK UNIFIED IDEOGRAPH-4DB5", u"\u4db5") 0090 self.checkletter("CJK UNIFIED IDEOGRAPH-4E00", u"\u4e00") 0091 self.checkletter("CJK UNIFIED IDEOGRAPH-9FA5", u"\u9fa5") 0092 self.checkletter("CJK UNIFIED IDEOGRAPH-20000", u"\U00020000") 0093 self.checkletter("CJK UNIFIED IDEOGRAPH-2A6D6", u"\U0002a6d6") 0094 0095 def test_bmp_characters(self): 0096 import unicodedata 0097 count = 0 0098 for code in xrange(0x10000): 0099 char = unichr(code) 0100 name = unicodedata.name(char, None) 0101 if name is not None: 0102 self.assertEqual(unicodedata.lookup(name), char) 0103 count += 1 0104 0105 def test_misc_symbols(self): 0106 self.checkletter("PILCROW SIGN", u"\u00b6") 0107 self.checkletter("REPLACEMENT CHARACTER", u"\uFFFD") 0108 self.checkletter("HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK", u"\uFF9F") 0109 self.checkletter("FULLWIDTH LATIN SMALL LETTER A", u"\uFF41") 0110 0111 def test_errors(self): 0112 import unicodedata 0113 self.assertRaises(TypeError, unicodedata.name) 0114 self.assertRaises(TypeError, unicodedata.name, u'xx') 0115 self.assertRaises(TypeError, unicodedata.lookup) 0116 self.assertRaises(KeyError, unicodedata.lookup, u'unknown') 0117 0118 def test_strict_eror_handling(self): 0119 # bogus character name 0120 self.assertRaises( 0121 UnicodeError, 0122 unicode, "\\N{blah}", 'unicode-escape', 'strict' 0123 ) 0124 # long bogus character name 0125 self.assertRaises( 0126 UnicodeError, 0127 unicode, "\\N{%s}" % ("x" * 100000), 'unicode-escape', 'strict' 0128 ) 0129 # missing closing brace 0130 self.assertRaises( 0131 UnicodeError, 0132 unicode, "\\N{SPACE", 'unicode-escape', 'strict' 0133 ) 0134 # missing opening brace 0135 self.assertRaises( 0136 UnicodeError, 0137 unicode, "\\NSPACE", 'unicode-escape', 'strict' 0138 ) 0139 0140 def test_main(): 0141 test_support.run_unittest(UnicodeNamesTest) 0142 0143 if __name__ == "__main__": 0144 test_main() 0145
Generated by PyXR 0.9.4