0001 from test.test_support import verbose, TestFailed, TestSkipped, verify 0002 import sys 0003 import os 0004 from unicodedata import normalize 0005 0006 TESTDATAFILE = "NormalizationTest-3.2.0" + os.extsep + "txt" 0007 0008 # This search allows using a build directory just inside the source 0009 # directory, and saving just one copy of the test data in the source 0010 # tree, rather than having a copy in each build directory. 0011 # There might be a better way to do this. 0012 0013 for path in [os.path.curdir, os.path.pardir]: 0014 fn = os.path.join(path, TESTDATAFILE) 0015 skip_expected = not os.path.exists(fn) 0016 if not skip_expected: 0017 TESTDATAFILE = fn 0018 break 0019 0020 class RangeError: 0021 pass 0022 0023 def NFC(str): 0024 return normalize("NFC", str) 0025 0026 def NFKC(str): 0027 return normalize("NFKC", str) 0028 0029 def NFD(str): 0030 return normalize("NFD", str) 0031 0032 def NFKD(str): 0033 return normalize("NFKD", str) 0034 0035 def unistr(data): 0036 data = [int(x, 16) for x in data.split(" ")] 0037 for x in data: 0038 if x > sys.maxunicode: 0039 raise RangeError 0040 return u"".join([unichr(x) for x in data]) 0041 0042 def test_main(): 0043 if skip_expected: 0044 raise TestSkipped(TESTDATAFILE + " not found, download from " + 0045 "http://www.unicode.org/Public/3.2-Update/" + TESTDATAFILE) 0046 0047 part1_data = {} 0048 for line in open(TESTDATAFILE): 0049 if '#' in line: 0050 line = line.split('#')[0] 0051 line = line.strip() 0052 if not line: 0053 continue 0054 if line.startswith("@Part"): 0055 part = line 0056 continue 0057 try: 0058 c1,c2,c3,c4,c5 = [unistr(x) for x in line.split(';')[:-1]] 0059 except RangeError: 0060 # Skip unsupported characters 0061 continue 0062 0063 if verbose: 0064 print line 0065 0066 # Perform tests 0067 verify(c2 == NFC(c1) == NFC(c2) == NFC(c3), line) 0068 verify(c4 == NFC(c4) == NFC(c5), line) 0069 verify(c3 == NFD(c1) == NFD(c2) == NFD(c3), line) 0070 verify(c5 == NFD(c4) == NFD(c5), line) 0071 verify(c4 == NFKC(c1) == NFKC(c2) == NFKC(c3) == NFKC(c4) == NFKC(c5), 0072 line) 0073 verify(c5 == NFKD(c1) == NFKD(c2) == NFKD(c3) == NFKD(c4) == NFKD(c5), 0074 line) 0075 0076 # Record part 1 data 0077 if part == "@Part1": 0078 part1_data[c1] = 1 0079 0080 # Perform tests for all other data 0081 for c in range(sys.maxunicode+1): 0082 X = unichr(c) 0083 if X in part1_data: 0084 continue 0085 assert X == NFC(X) == NFD(X) == NFKC(X) == NFKD(X), c 0086 0087 # Check for bug 834676 0088 normalize('NFC',u'\ud55c\uae00') 0089 0090 if __name__ == "__main__": 0091 test_main() 0092
Generated by PyXR 0.9.4