PyXR

c:\python24\lib \ test \ regex_tests.py



0001 # Regex test suite and benchmark suite v1.5a2
0002 # Due to the use of r"aw" strings, this file will
0003 # only work with Python 1.5 or higher.
0004 
0005 # The 3 possible outcomes for each pattern
0006 [SUCCEED, FAIL, SYNTAX_ERROR] = range(3)
0007 
0008 # Benchmark suite (needs expansion)
0009 #
0010 # The benchmark suite does not test correctness, just speed.  The
0011 # first element of each tuple is the regex pattern; the second is a
0012 # string to match it against.  The benchmarking code will embed the
0013 # second string inside several sizes of padding, to test how regex
0014 # matching performs on large strings.
0015 
0016 benchmarks = [
0017         ('Python', 'Python'),                     # Simple text literal
0018         ('.*Python', 'Python'),                   # Bad text literal
0019         ('.*Python.*', 'Python'),                 # Worse text literal
0020         ('.*\\(Python\\)', 'Python'),             # Bad text literal with grouping
0021 
0022         ('(Python\\|Perl\\|Tcl', 'Perl'),          # Alternation
0023         ('\\(Python\\|Perl\\|Tcl\\)', 'Perl'),     # Grouped alternation
0024         ('\\(Python\\)\\1', 'PythonPython'),       # Backreference
0025 #       ('\\([0a-z][a-z]*,\\)+', 'a5,b7,c9,'),     # Disable the fastmap optimization
0026         ('\\([a-z][a-z0-9]*,\\)+', 'a5,b7,c9,')    # A few sets
0027 ]
0028 
0029 # Test suite (for verifying correctness)
0030 #
0031 # The test suite is a list of 5- or 3-tuples.  The 5 parts of a
0032 # complete tuple are:
0033 # element 0: a string containing the pattern
0034 #         1: the string to match against the pattern
0035 #         2: the expected result (SUCCEED, FAIL, SYNTAX_ERROR)
0036 #         3: a string that will be eval()'ed to produce a test string.
0037 #            This is an arbitrary Python expression; the available
0038 #            variables are "found" (the whole match), and "g1", "g2", ...
0039 #            up to "g10" contain the contents of each group, or the
0040 #            string 'None' if the group wasn't given a value.
0041 #         4: The expected result of evaluating the expression.
0042 #            If the two don't match, an error is reported.
0043 #
0044 # If the regex isn't expected to work, the latter two elements can be omitted.
0045 
0046 tests = [
0047 ('abc', 'abc', SUCCEED,
0048  'found', 'abc'),
0049 ('abc', 'xbc', FAIL),
0050 ('abc', 'axc', FAIL),
0051 ('abc', 'abx', FAIL),
0052 ('abc', 'xabcy', SUCCEED,
0053  'found', 'abc'),
0054 ('abc', 'ababc', SUCCEED,
0055  'found', 'abc'),
0056 ('ab*c', 'abc', SUCCEED,
0057  'found', 'abc'),
0058 ('ab*bc', 'abc', SUCCEED,
0059  'found', 'abc'),
0060 ('ab*bc', 'abbc', SUCCEED,
0061  'found', 'abbc'),
0062 ('ab*bc', 'abbbbc', SUCCEED,
0063  'found', 'abbbbc'),
0064 ('ab+bc', 'abbc', SUCCEED,
0065  'found', 'abbc'),
0066 ('ab+bc', 'abc', FAIL),
0067 ('ab+bc', 'abq', FAIL),
0068 ('ab+bc', 'abbbbc', SUCCEED,
0069  'found', 'abbbbc'),
0070 ('ab?bc', 'abbc', SUCCEED,
0071  'found', 'abbc'),
0072 ('ab?bc', 'abc', SUCCEED,
0073  'found', 'abc'),
0074 ('ab?bc', 'abbbbc', FAIL),
0075 ('ab?c', 'abc', SUCCEED,
0076  'found', 'abc'),
0077 ('^abc$', 'abc', SUCCEED,
0078  'found', 'abc'),
0079 ('^abc$', 'abcc', FAIL),
0080 ('^abc', 'abcc', SUCCEED,
0081  'found', 'abc'),
0082 ('^abc$', 'aabc', FAIL),
0083 ('abc$', 'aabc', SUCCEED,
0084  'found', 'abc'),
0085 ('^', 'abc', SUCCEED,
0086  'found+"-"', '-'),
0087 ('$', 'abc', SUCCEED,
0088  'found+"-"', '-'),
0089 ('a.c', 'abc', SUCCEED,
0090  'found', 'abc'),
0091 ('a.c', 'axc', SUCCEED,
0092  'found', 'axc'),
0093 ('a.*c', 'axyzc', SUCCEED,
0094  'found', 'axyzc'),
0095 ('a.*c', 'axyzd', FAIL),
0096 ('a[bc]d', 'abc', FAIL),
0097 ('a[bc]d', 'abd', SUCCEED,
0098  'found', 'abd'),
0099 ('a[b-d]e', 'abd', FAIL),
0100 ('a[b-d]e', 'ace', SUCCEED,
0101  'found', 'ace'),
0102 ('a[b-d]', 'aac', SUCCEED,
0103  'found', 'ac'),
0104 ('a[-b]', 'a-', SUCCEED,
0105  'found', 'a-'),
0106 ('a[b-]', 'a-', SUCCEED,
0107  'found', 'a-'),
0108 ('a[]b', '-', SYNTAX_ERROR),
0109 ('a[', '-', SYNTAX_ERROR),
0110 ('a\\', '-', SYNTAX_ERROR),
0111 ('abc\\)', '-', SYNTAX_ERROR),
0112 ('\\(abc', '-', SYNTAX_ERROR),
0113 ('a]', 'a]', SUCCEED,
0114  'found', 'a]'),
0115 ('a[]]b', 'a]b', SUCCEED,
0116  'found', 'a]b'),
0117 ('a[^bc]d', 'aed', SUCCEED,
0118  'found', 'aed'),
0119 ('a[^bc]d', 'abd', FAIL),
0120 ('a[^-b]c', 'adc', SUCCEED,
0121  'found', 'adc'),
0122 ('a[^-b]c', 'a-c', FAIL),
0123 ('a[^]b]c', 'a]c', FAIL),
0124 ('a[^]b]c', 'adc', SUCCEED,
0125  'found', 'adc'),
0126 ('\\ba\\b', 'a-', SUCCEED,
0127  '"-"', '-'),
0128 ('\\ba\\b', '-a', SUCCEED,
0129  '"-"', '-'),
0130 ('\\ba\\b', '-a-', SUCCEED,
0131  '"-"', '-'),
0132 ('\\by\\b', 'xy', FAIL),
0133 ('\\by\\b', 'yz', FAIL),
0134 ('\\by\\b', 'xyz', FAIL),
0135 ('ab\\|cd', 'abc', SUCCEED,
0136  'found', 'ab'),
0137 ('ab\\|cd', 'abcd', SUCCEED,
0138  'found', 'ab'),
0139 ('\\(\\)ef', 'def', SUCCEED,
0140  'found+"-"+g1', 'ef-'),
0141 ('$b', 'b', FAIL),
0142 ('a(b', 'a(b', SUCCEED,
0143  'found+"-"+g1', 'a(b-None'),
0144 ('a(*b', 'ab', SUCCEED,
0145  'found', 'ab'),
0146 ('a(*b', 'a((b', SUCCEED,
0147  'found', 'a((b'),
0148 ('a\\\\b', 'a\\b', SUCCEED,
0149  'found', 'a\\b'),
0150 ('\\(\\(a\\)\\)', 'abc', SUCCEED,
0151  'found+"-"+g1+"-"+g2', 'a-a-a'),
0152 ('\\(a\\)b\\(c\\)', 'abc', SUCCEED,
0153  'found+"-"+g1+"-"+g2', 'abc-a-c'),
0154 ('a+b+c', 'aabbabc', SUCCEED,
0155  'found', 'abc'),
0156 ('\\(a+\\|b\\)*', 'ab', SUCCEED,
0157  'found+"-"+g1', 'ab-b'),
0158 ('\\(a+\\|b\\)+', 'ab', SUCCEED,
0159  'found+"-"+g1', 'ab-b'),
0160 ('\\(a+\\|b\\)?', 'ab', SUCCEED,
0161  'found+"-"+g1', 'a-a'),
0162 ('\\)\\(', '-', SYNTAX_ERROR),
0163 ('[^ab]*', 'cde', SUCCEED,
0164  'found', 'cde'),
0165 ('abc', '', FAIL),
0166 ('a*', '', SUCCEED,
0167  'found', ''),
0168 ('a\\|b\\|c\\|d\\|e', 'e', SUCCEED,
0169  'found', 'e'),
0170 ('\\(a\\|b\\|c\\|d\\|e\\)f', 'ef', SUCCEED,
0171  'found+"-"+g1', 'ef-e'),
0172 ('abcd*efg', 'abcdefg', SUCCEED,
0173  'found', 'abcdefg'),
0174 ('ab*', 'xabyabbbz', SUCCEED,
0175  'found', 'ab'),
0176 ('ab*', 'xayabbbz', SUCCEED,
0177  'found', 'a'),
0178 ('\\(ab\\|cd\\)e', 'abcde', SUCCEED,
0179  'found+"-"+g1', 'cde-cd'),
0180 ('[abhgefdc]ij', 'hij', SUCCEED,
0181  'found', 'hij'),
0182 ('^\\(ab\\|cd\\)e', 'abcde', FAIL,
0183  'xg1y', 'xy'),
0184 ('\\(abc\\|\\)ef', 'abcdef', SUCCEED,
0185  'found+"-"+g1', 'ef-'),
0186 ('\\(a\\|b\\)c*d', 'abcd', SUCCEED,
0187  'found+"-"+g1', 'bcd-b'),
0188 ('\\(ab\\|ab*\\)bc', 'abc', SUCCEED,
0189  'found+"-"+g1', 'abc-a'),
0190 ('a\\([bc]*\\)c*', 'abc', SUCCEED,
0191  'found+"-"+g1', 'abc-bc'),
0192 ('a\\([bc]*\\)\\(c*d\\)', 'abcd', SUCCEED,
0193  'found+"-"+g1+"-"+g2', 'abcd-bc-d'),
0194 ('a\\([bc]+\\)\\(c*d\\)', 'abcd', SUCCEED,
0195  'found+"-"+g1+"-"+g2', 'abcd-bc-d'),
0196 ('a\\([bc]*\\)\\(c+d\\)', 'abcd', SUCCEED,
0197  'found+"-"+g1+"-"+g2', 'abcd-b-cd'),
0198 ('a[bcd]*dcdcde', 'adcdcde', SUCCEED,
0199  'found', 'adcdcde'),
0200 ('a[bcd]+dcdcde', 'adcdcde', FAIL),
0201 ('\\(ab\\|a\\)b*c', 'abc', SUCCEED,
0202  'found+"-"+g1', 'abc-ab'),
0203 ('\\(\\(a\\)\\(b\\)c\\)\\(d\\)', 'abcd', SUCCEED,
0204  'g1+"-"+g2+"-"+g3+"-"+g4', 'abc-a-b-d'),
0205 ('[a-zA-Z_][a-zA-Z0-9_]*', 'alpha', SUCCEED,
0206  'found', 'alpha'),
0207 ('^a\\(bc+\\|b[eh]\\)g\\|.h$', 'abh', SUCCEED,
0208  'found+"-"+g1', 'bh-None'),
0209 ('\\(bc+d$\\|ef*g.\\|h?i\\(j\\|k\\)\\)', 'effgz', SUCCEED,
0210  'found+"-"+g1+"-"+g2', 'effgz-effgz-None'),
0211 ('\\(bc+d$\\|ef*g.\\|h?i\\(j\\|k\\)\\)', 'ij', SUCCEED,
0212  'found+"-"+g1+"-"+g2', 'ij-ij-j'),
0213 ('\\(bc+d$\\|ef*g.\\|h?i\\(j\\|k\\)\\)', 'effg', FAIL),
0214 ('\\(bc+d$\\|ef*g.\\|h?i\\(j\\|k\\)\\)', 'bcdd', FAIL),
0215 ('\\(bc+d$\\|ef*g.\\|h?i\\(j\\|k\\)\\)', 'reffgz', SUCCEED,
0216  'found+"-"+g1+"-"+g2', 'effgz-effgz-None'),
0217 ('\\(\\(\\(\\(\\(\\(\\(\\(\\(a\\)\\)\\)\\)\\)\\)\\)\\)\\)', 'a', SUCCEED,
0218  'found', 'a'),
0219 ('multiple words of text', 'uh-uh', FAIL),
0220 ('multiple words', 'multiple words, yeah', SUCCEED,
0221  'found', 'multiple words'),
0222 ('\\(.*\\)c\\(.*\\)', 'abcde', SUCCEED,
0223  'found+"-"+g1+"-"+g2', 'abcde-ab-de'),
0224 ('(\\(.*\\), \\(.*\\))', '(a, b)', SUCCEED,
0225  'g2+"-"+g1', 'b-a'),
0226 ('[k]', 'ab', FAIL),
0227 ('a[-]?c', 'ac', SUCCEED,
0228  'found', 'ac'),
0229 ('\\(abc\\)\\1', 'abcabc', SUCCEED,
0230  'g1', 'abc'),
0231 ('\\([a-c]*\\)\\1', 'abcabc', SUCCEED,
0232  'g1', 'abc'),
0233 ('^\\(.+\\)?B', 'AB', SUCCEED,
0234  'g1', 'A'),
0235 ('\\(a+\\).\\1$', 'aaaaa', SUCCEED,
0236  'found+"-"+g1', 'aaaaa-aa'),
0237 ('^\\(a+\\).\\1$', 'aaaa', FAIL),
0238 ('\\(abc\\)\\1', 'abcabc', SUCCEED,
0239  'found+"-"+g1', 'abcabc-abc'),
0240 ('\\([a-c]+\\)\\1', 'abcabc', SUCCEED,
0241  'found+"-"+g1', 'abcabc-abc'),
0242 ('\\(a\\)\\1', 'aa', SUCCEED,
0243  'found+"-"+g1', 'aa-a'),
0244 ('\\(a+\\)\\1', 'aa', SUCCEED,
0245  'found+"-"+g1', 'aa-a'),
0246 ('\\(a+\\)+\\1', 'aa', SUCCEED,
0247  'found+"-"+g1', 'aa-a'),
0248 ('\\(a\\).+\\1', 'aba', SUCCEED,
0249  'found+"-"+g1', 'aba-a'),
0250 ('\\(a\\)ba*\\1', 'aba', SUCCEED,
0251  'found+"-"+g1', 'aba-a'),
0252 ('\\(aa\\|a\\)a\\1$', 'aaa', SUCCEED,
0253  'found+"-"+g1', 'aaa-a'),
0254 ('\\(a\\|aa\\)a\\1$', 'aaa', SUCCEED,
0255  'found+"-"+g1', 'aaa-a'),
0256 ('\\(a+\\)a\\1$', 'aaa', SUCCEED,
0257  'found+"-"+g1', 'aaa-a'),
0258 ('\\([abc]*\\)\\1', 'abcabc', SUCCEED,
0259  'found+"-"+g1', 'abcabc-abc'),
0260 ('\\(a\\)\\(b\\)c\\|ab', 'ab', SUCCEED,
0261  'found+"-"+g1+"-"+g2', 'ab-None-None'),
0262 ('\\(a\\)+x', 'aaax', SUCCEED,
0263  'found+"-"+g1', 'aaax-a'),
0264 ('\\([ac]\\)+x', 'aacx', SUCCEED,
0265  'found+"-"+g1', 'aacx-c'),
0266 ('\\([^/]*/\\)*sub1/', 'd:msgs/tdir/sub1/trial/away.cpp', SUCCEED,
0267  'found+"-"+g1', 'd:msgs/tdir/sub1/-tdir/'),
0268 ('\\([^.]*\\)\\.\\([^:]*\\):[T ]+\\(.*\\)', 'track1.title:TBlah blah blah', SUCCEED,
0269  'found+"-"+g1+"-"+g2+"-"+g3', 'track1.title:TBlah blah blah-track1-title-Blah blah blah'),
0270 ('\\([^N]*N\\)+', 'abNNxyzN', SUCCEED,
0271  'found+"-"+g1', 'abNNxyzN-xyzN'),
0272 ('\\([^N]*N\\)+', 'abNNxyz', SUCCEED,
0273  'found+"-"+g1', 'abNN-N'),
0274 ('\\([abc]*\\)x', 'abcx', SUCCEED,
0275  'found+"-"+g1', 'abcx-abc'),
0276 ('\\([abc]*\\)x', 'abc', FAIL),
0277 ('\\([xyz]*\\)x', 'abcx', SUCCEED,
0278  'found+"-"+g1', 'x-'),
0279 ('\\(a\\)+b\\|aac', 'aac', SUCCEED,
0280  'found+"-"+g1', 'aac-None'),
0281 ('\<a', 'a', SUCCEED, 'found', 'a'),
0282 ('\<a', '!', FAIL),
0283 ('a\<b', 'ab', FAIL),
0284 ('a\>', 'ab', FAIL),
0285 ('a\>', 'a!', SUCCEED, 'found', 'a'),
0286 ('a\>', 'a', SUCCEED, 'found', 'a'),
0287 ]
0288 

Generated by PyXR 0.9.4
SourceForge.net Logo