PyXR

c:\python24\lib \ test \ test_normalization.py



0001 from test.test_support import verbose, TestFailed, TestSkipped, verify
0002 import sys
0003 import os
0004 from unicodedata import normalize
0005 
0006 TESTDATAFILE = "NormalizationTest-3.2.0" + os.extsep + "txt"
0007 
0008 # This search allows using a build directory just inside the source
0009 # directory, and saving just one copy of the test data in the source
0010 # tree, rather than having a copy in each build directory.
0011 # There might be a better way to do this.
0012 
0013 for path in [os.path.curdir, os.path.pardir]:
0014     fn = os.path.join(path, TESTDATAFILE)
0015     skip_expected = not os.path.exists(fn)
0016     if not skip_expected:
0017         TESTDATAFILE = fn
0018         break
0019 
0020 class RangeError:
0021     pass
0022 
0023 def NFC(str):
0024     return normalize("NFC", str)
0025 
0026 def NFKC(str):
0027     return normalize("NFKC", str)
0028 
0029 def NFD(str):
0030     return normalize("NFD", str)
0031 
0032 def NFKD(str):
0033     return normalize("NFKD", str)
0034 
0035 def unistr(data):
0036     data = [int(x, 16) for x in data.split(" ")]
0037     for x in data:
0038         if x > sys.maxunicode:
0039             raise RangeError
0040     return u"".join([unichr(x) for x in data])
0041 
0042 def test_main():
0043     if skip_expected:
0044         raise TestSkipped(TESTDATAFILE + " not found, download from " +
0045                     "http://www.unicode.org/Public/3.2-Update/" + TESTDATAFILE)
0046 
0047     part1_data = {}
0048     for line in open(TESTDATAFILE):
0049         if '#' in line:
0050             line = line.split('#')[0]
0051         line = line.strip()
0052         if not line:
0053             continue
0054         if line.startswith("@Part"):
0055             part = line
0056             continue
0057         try:
0058             c1,c2,c3,c4,c5 = [unistr(x) for x in line.split(';')[:-1]]
0059         except RangeError:
0060             # Skip unsupported characters
0061             continue
0062 
0063         if verbose:
0064             print line
0065 
0066         # Perform tests
0067         verify(c2 ==  NFC(c1) ==  NFC(c2) ==  NFC(c3), line)
0068         verify(c4 ==  NFC(c4) ==  NFC(c5), line)
0069         verify(c3 ==  NFD(c1) ==  NFD(c2) ==  NFD(c3), line)
0070         verify(c5 ==  NFD(c4) ==  NFD(c5), line)
0071         verify(c4 == NFKC(c1) == NFKC(c2) == NFKC(c3) == NFKC(c4) == NFKC(c5),
0072                line)
0073         verify(c5 == NFKD(c1) == NFKD(c2) == NFKD(c3) == NFKD(c4) == NFKD(c5),
0074                line)
0075 
0076         # Record part 1 data
0077         if part == "@Part1":
0078             part1_data[c1] = 1
0079 
0080     # Perform tests for all other data
0081     for c in range(sys.maxunicode+1):
0082         X = unichr(c)
0083         if X in part1_data:
0084             continue
0085         assert X == NFC(X) == NFD(X) == NFKC(X) == NFKD(X), c
0086 
0087     # Check for bug 834676
0088     normalize('NFC',u'\ud55c\uae00')
0089 
0090 if __name__ == "__main__":
0091     test_main()
0092 

Generated by PyXR 0.9.4
SourceForge.net Logo