PyXR

c:\python24\lib \ test \ re_tests.py



0001 #!/usr/bin/env python
0002 # -*- mode: python -*-
0003 
0004 # Re test suite and benchmark suite v1.5
0005 
0006 # The 3 possible outcomes for each pattern
0007 [SUCCEED, FAIL, SYNTAX_ERROR] = range(3)
0008 
0009 # Benchmark suite (needs expansion)
0010 #
0011 # The benchmark suite does not test correctness, just speed.  The
0012 # first element of each tuple is the regex pattern; the second is a
0013 # string to match it against.  The benchmarking code will embed the
0014 # second string inside several sizes of padding, to test how regex
0015 # matching performs on large strings.
0016 
0017 benchmarks = [
0018 
0019     # test common prefix
0020     ('Python|Perl', 'Perl'),    # Alternation
0021     ('(Python|Perl)', 'Perl'),  # Grouped alternation
0022 
0023     ('Python|Perl|Tcl', 'Perl'),        # Alternation
0024     ('(Python|Perl|Tcl)', 'Perl'),      # Grouped alternation
0025 
0026     ('(Python)\\1', 'PythonPython'),    # Backreference
0027     ('([0a-z][a-z0-9]*,)+', 'a5,b7,c9,'), # Disable the fastmap optimization
0028     ('([a-z][a-z0-9]*,)+', 'a5,b7,c9,'), # A few sets
0029 
0030     ('Python', 'Python'),               # Simple text literal
0031     ('.*Python', 'Python'),             # Bad text literal
0032     ('.*Python.*', 'Python'),           # Worse text literal
0033     ('.*(Python)', 'Python'),           # Bad text literal with grouping
0034 
0035 ]
0036 
0037 # Test suite (for verifying correctness)
0038 #
0039 # The test suite is a list of 5- or 3-tuples.  The 5 parts of a
0040 # complete tuple are:
0041 # element 0: a string containing the pattern
0042 #         1: the string to match against the pattern
0043 #         2: the expected result (SUCCEED, FAIL, SYNTAX_ERROR)
0044 #         3: a string that will be eval()'ed to produce a test string.
0045 #            This is an arbitrary Python expression; the available
0046 #            variables are "found" (the whole match), and "g1", "g2", ...
0047 #            up to "g99" contain the contents of each group, or the
0048 #            string 'None' if the group wasn't given a value, or the
0049 #            string 'Error' if the group index was out of range;
0050 #            also "groups", the return value of m.group() (a tuple).
0051 #         4: The expected result of evaluating the expression.
0052 #            If the two don't match, an error is reported.
0053 #
0054 # If the regex isn't expected to work, the latter two elements can be omitted.
0055 
0056 tests = [
0057     # Test ?P< and ?P= extensions
0058     ('(?P<foo_123', '', SYNTAX_ERROR),      # Unterminated group identifier
0059     ('(?P<1>a)', '', SYNTAX_ERROR),         # Begins with a digit
0060     ('(?P<!>a)', '', SYNTAX_ERROR),         # Begins with an illegal char
0061     ('(?P<foo!>a)', '', SYNTAX_ERROR),      # Begins with an illegal char
0062 
0063     # Same tests, for the ?P= form
0064     ('(?P<foo_123>a)(?P=foo_123', 'aa', SYNTAX_ERROR),
0065     ('(?P<foo_123>a)(?P=1)', 'aa', SYNTAX_ERROR),
0066     ('(?P<foo_123>a)(?P=!)', 'aa', SYNTAX_ERROR),
0067     ('(?P<foo_123>a)(?P=foo_124', 'aa', SYNTAX_ERROR),  # Backref to undefined group
0068 
0069     ('(?P<foo_123>a)', 'a', SUCCEED, 'g1', 'a'),
0070     ('(?P<foo_123>a)(?P=foo_123)', 'aa', SUCCEED, 'g1', 'a'),
0071 
0072     # Test octal escapes
0073     ('\\1', 'a', SYNTAX_ERROR),    # Backreference
0074     ('[\\1]', '\1', SUCCEED, 'found', '\1'),  # Character
0075     ('\\09', chr(0) + '9', SUCCEED, 'found', chr(0) + '9'),
0076     ('\\141', 'a', SUCCEED, 'found', 'a'),
0077     ('(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)(l)\\119', 'abcdefghijklk9', SUCCEED, 'found+"-"+g11', 'abcdefghijklk9-k'),
0078 
0079     # Test \0 is handled everywhere
0080     (r'\0', '\0', SUCCEED, 'found', '\0'),
0081     (r'[\0a]', '\0', SUCCEED, 'found', '\0'),
0082     (r'[a\0]', '\0', SUCCEED, 'found', '\0'),
0083     (r'[^a\0]', '\0', FAIL),
0084 
0085     # Test various letter escapes
0086     (r'\a[\b]\f\n\r\t\v', '\a\b\f\n\r\t\v', SUCCEED, 'found', '\a\b\f\n\r\t\v'),
0087     (r'[\a][\b][\f][\n][\r][\t][\v]', '\a\b\f\n\r\t\v', SUCCEED, 'found', '\a\b\f\n\r\t\v'),
0088     # NOTE: not an error under PCRE/PRE:
0089     # (r'\u', '', SYNTAX_ERROR),    # A Perl escape
0090     (r'\c\e\g\h\i\j\k\m\o\p\q\y\z', 'ceghijkmopqyz', SUCCEED, 'found', 'ceghijkmopqyz'),
0091     (r'\xff', '\377', SUCCEED, 'found', chr(255)),
0092     # new \x semantics
0093     (r'\x00ffffffffffffff', '\377', FAIL, 'found', chr(255)),
0094     (r'\x00f', '\017', FAIL, 'found', chr(15)),
0095     (r'\x00fe', '\376', FAIL, 'found', chr(254)),
0096     # (r'\x00ffffffffffffff', '\377', SUCCEED, 'found', chr(255)),
0097     # (r'\x00f', '\017', SUCCEED, 'found', chr(15)),
0098     # (r'\x00fe', '\376', SUCCEED, 'found', chr(254)),
0099 
0100     (r"^\w+=(\\[\000-\277]|[^\n\\])*", "SRC=eval.c g.c blah blah blah \\\\\n\tapes.c",
0101      SUCCEED, 'found', "SRC=eval.c g.c blah blah blah \\\\"),
0102 
0103     # Test that . only matches \n in DOTALL mode
0104     ('a.b', 'acb', SUCCEED, 'found', 'acb'),
0105     ('a.b', 'a\nb', FAIL),
0106     ('a.*b', 'acc\nccb', FAIL),
0107     ('a.{4,5}b', 'acc\nccb', FAIL),
0108     ('a.b', 'a\rb', SUCCEED, 'found', 'a\rb'),
0109     ('a.b(?s)', 'a\nb', SUCCEED, 'found', 'a\nb'),
0110     ('a.*(?s)b', 'acc\nccb', SUCCEED, 'found', 'acc\nccb'),
0111     ('(?s)a.{4,5}b', 'acc\nccb', SUCCEED, 'found', 'acc\nccb'),
0112     ('(?s)a.b', 'a\nb', SUCCEED, 'found', 'a\nb'),
0113 
0114     (')', '', SYNTAX_ERROR),           # Unmatched right bracket
0115     ('', '', SUCCEED, 'found', ''),    # Empty pattern
0116     ('abc', 'abc', SUCCEED, 'found', 'abc'),
0117     ('abc', 'xbc', FAIL),
0118     ('abc', 'axc', FAIL),
0119     ('abc', 'abx', FAIL),
0120     ('abc', 'xabcy', SUCCEED, 'found', 'abc'),
0121     ('abc', 'ababc', SUCCEED, 'found', 'abc'),
0122     ('ab*c', 'abc', SUCCEED, 'found', 'abc'),
0123     ('ab*bc', 'abc', SUCCEED, 'found', 'abc'),
0124     ('ab*bc', 'abbc', SUCCEED, 'found', 'abbc'),
0125     ('ab*bc', 'abbbbc', SUCCEED, 'found', 'abbbbc'),
0126     ('ab+bc', 'abbc', SUCCEED, 'found', 'abbc'),
0127     ('ab+bc', 'abc', FAIL),
0128     ('ab+bc', 'abq', FAIL),
0129     ('ab+bc', 'abbbbc', SUCCEED, 'found', 'abbbbc'),
0130     ('ab?bc', 'abbc', SUCCEED, 'found', 'abbc'),
0131     ('ab?bc', 'abc', SUCCEED, 'found', 'abc'),
0132     ('ab?bc', 'abbbbc', FAIL),
0133     ('ab?c', 'abc', SUCCEED, 'found', 'abc'),
0134     ('^abc$', 'abc', SUCCEED, 'found', 'abc'),
0135     ('^abc$', 'abcc', FAIL),
0136     ('^abc', 'abcc', SUCCEED, 'found', 'abc'),
0137     ('^abc$', 'aabc', FAIL),
0138     ('abc$', 'aabc', SUCCEED, 'found', 'abc'),
0139     ('^', 'abc', SUCCEED, 'found+"-"', '-'),
0140     ('$', 'abc', SUCCEED, 'found+"-"', '-'),
0141     ('a.c', 'abc', SUCCEED, 'found', 'abc'),
0142     ('a.c', 'axc', SUCCEED, 'found', 'axc'),
0143     ('a.*c', 'axyzc', SUCCEED, 'found', 'axyzc'),
0144     ('a.*c', 'axyzd', FAIL),
0145     ('a[bc]d', 'abc', FAIL),
0146     ('a[bc]d', 'abd', SUCCEED, 'found', 'abd'),
0147     ('a[b-d]e', 'abd', FAIL),
0148     ('a[b-d]e', 'ace', SUCCEED, 'found', 'ace'),
0149     ('a[b-d]', 'aac', SUCCEED, 'found', 'ac'),
0150     ('a[-b]', 'a-', SUCCEED, 'found', 'a-'),
0151     ('a[\\-b]', 'a-', SUCCEED, 'found', 'a-'),
0152     # NOTE: not an error under PCRE/PRE:
0153     # ('a[b-]', 'a-', SYNTAX_ERROR),
0154     ('a[]b', '-', SYNTAX_ERROR),
0155     ('a[', '-', SYNTAX_ERROR),
0156     ('a\\', '-', SYNTAX_ERROR),
0157     ('abc)', '-', SYNTAX_ERROR),
0158     ('(abc', '-', SYNTAX_ERROR),
0159     ('a]', 'a]', SUCCEED, 'found', 'a]'),
0160     ('a[]]b', 'a]b', SUCCEED, 'found', 'a]b'),
0161     ('a[\]]b', 'a]b', SUCCEED, 'found', 'a]b'),
0162     ('a[^bc]d', 'aed', SUCCEED, 'found', 'aed'),
0163     ('a[^bc]d', 'abd', FAIL),
0164     ('a[^-b]c', 'adc', SUCCEED, 'found', 'adc'),
0165     ('a[^-b]c', 'a-c', FAIL),
0166     ('a[^]b]c', 'a]c', FAIL),
0167     ('a[^]b]c', 'adc', SUCCEED, 'found', 'adc'),
0168     ('\\ba\\b', 'a-', SUCCEED, '"-"', '-'),
0169     ('\\ba\\b', '-a', SUCCEED, '"-"', '-'),
0170     ('\\ba\\b', '-a-', SUCCEED, '"-"', '-'),
0171     ('\\by\\b', 'xy', FAIL),
0172     ('\\by\\b', 'yz', FAIL),
0173     ('\\by\\b', 'xyz', FAIL),
0174     ('x\\b', 'xyz', FAIL),
0175     ('x\\B', 'xyz', SUCCEED, '"-"', '-'),
0176     ('\\Bz', 'xyz', SUCCEED, '"-"', '-'),
0177     ('z\\B', 'xyz', FAIL),
0178     ('\\Bx', 'xyz', FAIL),
0179     ('\\Ba\\B', 'a-', FAIL, '"-"', '-'),
0180     ('\\Ba\\B', '-a', FAIL, '"-"', '-'),
0181     ('\\Ba\\B', '-a-', FAIL, '"-"', '-'),
0182     ('\\By\\B', 'xy', FAIL),
0183     ('\\By\\B', 'yz', FAIL),
0184     ('\\By\\b', 'xy', SUCCEED, '"-"', '-'),
0185     ('\\by\\B', 'yz', SUCCEED, '"-"', '-'),
0186     ('\\By\\B', 'xyz', SUCCEED, '"-"', '-'),
0187     ('ab|cd', 'abc', SUCCEED, 'found', 'ab'),
0188     ('ab|cd', 'abcd', SUCCEED, 'found', 'ab'),
0189     ('()ef', 'def', SUCCEED, 'found+"-"+g1', 'ef-'),
0190     ('$b', 'b', FAIL),
0191     ('a\\(b', 'a(b', SUCCEED, 'found+"-"+g1', 'a(b-Error'),
0192     ('a\\(*b', 'ab', SUCCEED, 'found', 'ab'),
0193     ('a\\(*b', 'a((b', SUCCEED, 'found', 'a((b'),
0194     ('a\\\\b', 'a\\b', SUCCEED, 'found', 'a\\b'),
0195     ('((a))', 'abc', SUCCEED, 'found+"-"+g1+"-"+g2', 'a-a-a'),
0196     ('(a)b(c)', 'abc', SUCCEED, 'found+"-"+g1+"-"+g2', 'abc-a-c'),
0197     ('a+b+c', 'aabbabc', SUCCEED, 'found', 'abc'),
0198     ('(a+|b)*', 'ab', SUCCEED, 'found+"-"+g1', 'ab-b'),
0199     ('(a+|b)+', 'ab', SUCCEED, 'found+"-"+g1', 'ab-b'),
0200     ('(a+|b)?', 'ab', SUCCEED, 'found+"-"+g1', 'a-a'),
0201     (')(', '-', SYNTAX_ERROR),
0202     ('[^ab]*', 'cde', SUCCEED, 'found', 'cde'),
0203     ('abc', '', FAIL),
0204     ('a*', '', SUCCEED, 'found', ''),
0205     ('a|b|c|d|e', 'e', SUCCEED, 'found', 'e'),
0206     ('(a|b|c|d|e)f', 'ef', SUCCEED, 'found+"-"+g1', 'ef-e'),
0207     ('abcd*efg', 'abcdefg', SUCCEED, 'found', 'abcdefg'),
0208     ('ab*', 'xabyabbbz', SUCCEED, 'found', 'ab'),
0209     ('ab*', 'xayabbbz', SUCCEED, 'found', 'a'),
0210     ('(ab|cd)e', 'abcde', SUCCEED, 'found+"-"+g1', 'cde-cd'),
0211     ('[abhgefdc]ij', 'hij', SUCCEED, 'found', 'hij'),
0212     ('^(ab|cd)e', 'abcde', FAIL, 'xg1y', 'xy'),
0213     ('(abc|)ef', 'abcdef', SUCCEED, 'found+"-"+g1', 'ef-'),
0214     ('(a|b)c*d', 'abcd', SUCCEED, 'found+"-"+g1', 'bcd-b'),
0215     ('(ab|ab*)bc', 'abc', SUCCEED, 'found+"-"+g1', 'abc-a'),
0216     ('a([bc]*)c*', 'abc', SUCCEED, 'found+"-"+g1', 'abc-bc'),
0217     ('a([bc]*)(c*d)', 'abcd', SUCCEED, 'found+"-"+g1+"-"+g2', 'abcd-bc-d'),
0218     ('a([bc]+)(c*d)', 'abcd', SUCCEED, 'found+"-"+g1+"-"+g2', 'abcd-bc-d'),
0219     ('a([bc]*)(c+d)', 'abcd', SUCCEED, 'found+"-"+g1+"-"+g2', 'abcd-b-cd'),
0220     ('a[bcd]*dcdcde', 'adcdcde', SUCCEED, 'found', 'adcdcde'),
0221     ('a[bcd]+dcdcde', 'adcdcde', FAIL),
0222     ('(ab|a)b*c', 'abc', SUCCEED, 'found+"-"+g1', 'abc-ab'),
0223     ('((a)(b)c)(d)', 'abcd', SUCCEED, 'g1+"-"+g2+"-"+g3+"-"+g4', 'abc-a-b-d'),
0224     ('[a-zA-Z_][a-zA-Z0-9_]*', 'alpha', SUCCEED, 'found', 'alpha'),
0225     ('^a(bc+|b[eh])g|.h$', 'abh', SUCCEED, 'found+"-"+g1', 'bh-None'),
0226     ('(bc+d$|ef*g.|h?i(j|k))', 'effgz', SUCCEED, 'found+"-"+g1+"-"+g2', 'effgz-effgz-None'),
0227     ('(bc+d$|ef*g.|h?i(j|k))', 'ij', SUCCEED, 'found+"-"+g1+"-"+g2', 'ij-ij-j'),
0228     ('(bc+d$|ef*g.|h?i(j|k))', 'effg', FAIL),
0229     ('(bc+d$|ef*g.|h?i(j|k))', 'bcdd', FAIL),
0230     ('(bc+d$|ef*g.|h?i(j|k))', 'reffgz', SUCCEED, 'found+"-"+g1+"-"+g2', 'effgz-effgz-None'),
0231     ('(((((((((a)))))))))', 'a', SUCCEED, 'found', 'a'),
0232     ('multiple words of text', 'uh-uh', FAIL),
0233     ('multiple words', 'multiple words, yeah', SUCCEED, 'found', 'multiple words'),
0234     ('(.*)c(.*)', 'abcde', SUCCEED, 'found+"-"+g1+"-"+g2', 'abcde-ab-de'),
0235     ('\\((.*), (.*)\\)', '(a, b)', SUCCEED, 'g2+"-"+g1', 'b-a'),
0236     ('[k]', 'ab', FAIL),
0237     ('a[-]?c', 'ac', SUCCEED, 'found', 'ac'),
0238     ('(abc)\\1', 'abcabc', SUCCEED, 'g1', 'abc'),
0239     ('([a-c]*)\\1', 'abcabc', SUCCEED, 'g1', 'abc'),
0240     ('^(.+)?B', 'AB', SUCCEED, 'g1', 'A'),
0241     ('(a+).\\1$', 'aaaaa', SUCCEED, 'found+"-"+g1', 'aaaaa-aa'),
0242     ('^(a+).\\1$', 'aaaa', FAIL),
0243     ('(abc)\\1', 'abcabc', SUCCEED, 'found+"-"+g1', 'abcabc-abc'),
0244     ('([a-c]+)\\1', 'abcabc', SUCCEED, 'found+"-"+g1', 'abcabc-abc'),
0245     ('(a)\\1', 'aa', SUCCEED, 'found+"-"+g1', 'aa-a'),
0246     ('(a+)\\1', 'aa', SUCCEED, 'found+"-"+g1', 'aa-a'),
0247     ('(a+)+\\1', 'aa', SUCCEED, 'found+"-"+g1', 'aa-a'),
0248     ('(a).+\\1', 'aba', SUCCEED, 'found+"-"+g1', 'aba-a'),
0249     ('(a)ba*\\1', 'aba', SUCCEED, 'found+"-"+g1', 'aba-a'),
0250     ('(aa|a)a\\1$', 'aaa', SUCCEED, 'found+"-"+g1', 'aaa-a'),
0251     ('(a|aa)a\\1$', 'aaa', SUCCEED, 'found+"-"+g1', 'aaa-a'),
0252     ('(a+)a\\1$', 'aaa', SUCCEED, 'found+"-"+g1', 'aaa-a'),
0253     ('([abc]*)\\1', 'abcabc', SUCCEED, 'found+"-"+g1', 'abcabc-abc'),
0254     ('(a)(b)c|ab', 'ab', SUCCEED, 'found+"-"+g1+"-"+g2', 'ab-None-None'),
0255     ('(a)+x', 'aaax', SUCCEED, 'found+"-"+g1', 'aaax-a'),
0256     ('([ac])+x', 'aacx', SUCCEED, 'found+"-"+g1', 'aacx-c'),
0257     ('([^/]*/)*sub1/', 'd:msgs/tdir/sub1/trial/away.cpp', SUCCEED, 'found+"-"+g1', 'd:msgs/tdir/sub1/-tdir/'),
0258     ('([^.]*)\\.([^:]*):[T ]+(.*)', 'track1.title:TBlah blah blah', SUCCEED, 'found+"-"+g1+"-"+g2+"-"+g3', 'track1.title:TBlah blah blah-track1-title-Blah blah blah'),
0259     ('([^N]*N)+', 'abNNxyzN', SUCCEED, 'found+"-"+g1', 'abNNxyzN-xyzN'),
0260     ('([^N]*N)+', 'abNNxyz', SUCCEED, 'found+"-"+g1', 'abNN-N'),
0261     ('([abc]*)x', 'abcx', SUCCEED, 'found+"-"+g1', 'abcx-abc'),
0262     ('([abc]*)x', 'abc', FAIL),
0263     ('([xyz]*)x', 'abcx', SUCCEED, 'found+"-"+g1', 'x-'),
0264     ('(a)+b|aac', 'aac', SUCCEED, 'found+"-"+g1', 'aac-None'),
0265 
0266     # Test symbolic groups
0267 
0268     ('(?P<i d>aaa)a', 'aaaa', SYNTAX_ERROR),
0269     ('(?P<id>aaa)a', 'aaaa', SUCCEED, 'found+"-"+id', 'aaaa-aaa'),
0270     ('(?P<id>aa)(?P=id)', 'aaaa', SUCCEED, 'found+"-"+id', 'aaaa-aa'),
0271     ('(?P<id>aa)(?P=xd)', 'aaaa', SYNTAX_ERROR),
0272 
0273     # Test octal escapes/memory references
0274 
0275     ('\\1', 'a', SYNTAX_ERROR),
0276     ('\\09', chr(0) + '9', SUCCEED, 'found', chr(0) + '9'),
0277     ('\\141', 'a', SUCCEED, 'found', 'a'),
0278     ('(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)(l)\\119', 'abcdefghijklk9', SUCCEED, 'found+"-"+g11', 'abcdefghijklk9-k'),
0279 
0280     # All tests from Perl
0281 
0282     ('abc', 'abc', SUCCEED, 'found', 'abc'),
0283     ('abc', 'xbc', FAIL),
0284     ('abc', 'axc', FAIL),
0285     ('abc', 'abx', FAIL),
0286     ('abc', 'xabcy', SUCCEED, 'found', 'abc'),
0287     ('abc', 'ababc', SUCCEED, 'found', 'abc'),
0288     ('ab*c', 'abc', SUCCEED, 'found', 'abc'),
0289     ('ab*bc', 'abc', SUCCEED, 'found', 'abc'),
0290     ('ab*bc', 'abbc', SUCCEED, 'found', 'abbc'),
0291     ('ab*bc', 'abbbbc', SUCCEED, 'found', 'abbbbc'),
0292     ('ab{0,}bc', 'abbbbc', SUCCEED, 'found', 'abbbbc'),
0293     ('ab+bc', 'abbc', SUCCEED, 'found', 'abbc'),
0294     ('ab+bc', 'abc', FAIL),
0295     ('ab+bc', 'abq', FAIL),
0296     ('ab{1,}bc', 'abq', FAIL),
0297     ('ab+bc', 'abbbbc', SUCCEED, 'found', 'abbbbc'),
0298     ('ab{1,}bc', 'abbbbc', SUCCEED, 'found', 'abbbbc'),
0299     ('ab{1,3}bc', 'abbbbc', SUCCEED, 'found', 'abbbbc'),
0300     ('ab{3,4}bc', 'abbbbc', SUCCEED, 'found', 'abbbbc'),
0301     ('ab{4,5}bc', 'abbbbc', FAIL),
0302     ('ab?bc', 'abbc', SUCCEED, 'found', 'abbc'),
0303     ('ab?bc', 'abc', SUCCEED, 'found', 'abc'),
0304     ('ab{0,1}bc', 'abc', SUCCEED, 'found', 'abc'),
0305     ('ab?bc', 'abbbbc', FAIL),
0306     ('ab?c', 'abc', SUCCEED, 'found', 'abc'),
0307     ('ab{0,1}c', 'abc', SUCCEED, 'found', 'abc'),
0308     ('^abc$', 'abc', SUCCEED, 'found', 'abc'),
0309     ('^abc$', 'abcc', FAIL),
0310     ('^abc', 'abcc', SUCCEED, 'found', 'abc'),
0311     ('^abc$', 'aabc', FAIL),
0312     ('abc$', 'aabc', SUCCEED, 'found', 'abc'),
0313     ('^', 'abc', SUCCEED, 'found', ''),
0314     ('$', 'abc', SUCCEED, 'found', ''),
0315     ('a.c', 'abc', SUCCEED, 'found', 'abc'),
0316     ('a.c', 'axc', SUCCEED, 'found', 'axc'),
0317     ('a.*c', 'axyzc', SUCCEED, 'found', 'axyzc'),
0318     ('a.*c', 'axyzd', FAIL),
0319     ('a[bc]d', 'abc', FAIL),
0320     ('a[bc]d', 'abd', SUCCEED, 'found', 'abd'),
0321     ('a[b-d]e', 'abd', FAIL),
0322     ('a[b-d]e', 'ace', SUCCEED, 'found', 'ace'),
0323     ('a[b-d]', 'aac', SUCCEED, 'found', 'ac'),
0324     ('a[-b]', 'a-', SUCCEED, 'found', 'a-'),
0325     ('a[b-]', 'a-', SUCCEED, 'found', 'a-'),
0326     ('a[b-a]', '-', SYNTAX_ERROR),
0327     ('a[]b', '-', SYNTAX_ERROR),
0328     ('a[', '-', SYNTAX_ERROR),
0329     ('a]', 'a]', SUCCEED, 'found', 'a]'),
0330     ('a[]]b', 'a]b', SUCCEED, 'found', 'a]b'),
0331     ('a[^bc]d', 'aed', SUCCEED, 'found', 'aed'),
0332     ('a[^bc]d', 'abd', FAIL),
0333     ('a[^-b]c', 'adc', SUCCEED, 'found', 'adc'),
0334     ('a[^-b]c', 'a-c', FAIL),
0335     ('a[^]b]c', 'a]c', FAIL),
0336     ('a[^]b]c', 'adc', SUCCEED, 'found', 'adc'),
0337     ('ab|cd', 'abc', SUCCEED, 'found', 'ab'),
0338     ('ab|cd', 'abcd', SUCCEED, 'found', 'ab'),
0339     ('()ef', 'def', SUCCEED, 'found+"-"+g1', 'ef-'),
0340     ('*a', '-', SYNTAX_ERROR),
0341     ('(*)b', '-', SYNTAX_ERROR),
0342     ('$b', 'b', FAIL),
0343     ('a\\', '-', SYNTAX_ERROR),
0344     ('a\\(b', 'a(b', SUCCEED, 'found+"-"+g1', 'a(b-Error'),
0345     ('a\\(*b', 'ab', SUCCEED, 'found', 'ab'),
0346     ('a\\(*b', 'a((b', SUCCEED, 'found', 'a((b'),
0347     ('a\\\\b', 'a\\b', SUCCEED, 'found', 'a\\b'),
0348     ('abc)', '-', SYNTAX_ERROR),
0349     ('(abc', '-', SYNTAX_ERROR),
0350     ('((a))', 'abc', SUCCEED, 'found+"-"+g1+"-"+g2', 'a-a-a'),
0351     ('(a)b(c)', 'abc', SUCCEED, 'found+"-"+g1+"-"+g2', 'abc-a-c'),
0352     ('a+b+c', 'aabbabc', SUCCEED, 'found', 'abc'),
0353     ('a{1,}b{1,}c', 'aabbabc', SUCCEED, 'found', 'abc'),
0354     ('a**', '-', SYNTAX_ERROR),
0355     ('a.+?c', 'abcabc', SUCCEED, 'found', 'abc'),
0356     ('(a+|b)*', 'ab', SUCCEED, 'found+"-"+g1', 'ab-b'),
0357     ('(a+|b){0,}', 'ab', SUCCEED, 'found+"-"+g1', 'ab-b'),
0358     ('(a+|b)+', 'ab', SUCCEED, 'found+"-"+g1', 'ab-b'),
0359     ('(a+|b){1,}', 'ab', SUCCEED, 'found+"-"+g1', 'ab-b'),
0360     ('(a+|b)?', 'ab', SUCCEED, 'found+"-"+g1', 'a-a'),
0361     ('(a+|b){0,1}', 'ab', SUCCEED, 'found+"-"+g1', 'a-a'),
0362     (')(', '-', SYNTAX_ERROR),
0363     ('[^ab]*', 'cde', SUCCEED, 'found', 'cde'),
0364     ('abc', '', FAIL),
0365     ('a*', '', SUCCEED, 'found', ''),
0366     ('([abc])*d', 'abbbcd', SUCCEED, 'found+"-"+g1', 'abbbcd-c'),
0367     ('([abc])*bcd', 'abcd', SUCCEED, 'found+"-"+g1', 'abcd-a'),
0368     ('a|b|c|d|e', 'e', SUCCEED, 'found', 'e'),
0369     ('(a|b|c|d|e)f', 'ef', SUCCEED, 'found+"-"+g1', 'ef-e'),
0370     ('abcd*efg', 'abcdefg', SUCCEED, 'found', 'abcdefg'),
0371     ('ab*', 'xabyabbbz', SUCCEED, 'found', 'ab'),
0372     ('ab*', 'xayabbbz', SUCCEED, 'found', 'a'),
0373     ('(ab|cd)e', 'abcde', SUCCEED, 'found+"-"+g1', 'cde-cd'),
0374     ('[abhgefdc]ij', 'hij', SUCCEED, 'found', 'hij'),
0375     ('^(ab|cd)e', 'abcde', FAIL),
0376     ('(abc|)ef', 'abcdef', SUCCEED, 'found+"-"+g1', 'ef-'),
0377     ('(a|b)c*d', 'abcd', SUCCEED, 'found+"-"+g1', 'bcd-b'),
0378     ('(ab|ab*)bc', 'abc', SUCCEED, 'found+"-"+g1', 'abc-a'),
0379     ('a([bc]*)c*', 'abc', SUCCEED, 'found+"-"+g1', 'abc-bc'),
0380     ('a([bc]*)(c*d)', 'abcd', SUCCEED, 'found+"-"+g1+"-"+g2', 'abcd-bc-d'),
0381     ('a([bc]+)(c*d)', 'abcd', SUCCEED, 'found+"-"+g1+"-"+g2', 'abcd-bc-d'),
0382     ('a([bc]*)(c+d)', 'abcd', SUCCEED, 'found+"-"+g1+"-"+g2', 'abcd-b-cd'),
0383     ('a[bcd]*dcdcde', 'adcdcde', SUCCEED, 'found', 'adcdcde'),
0384     ('a[bcd]+dcdcde', 'adcdcde', FAIL),
0385     ('(ab|a)b*c', 'abc', SUCCEED, 'found+"-"+g1', 'abc-ab'),
0386     ('((a)(b)c)(d)', 'abcd', SUCCEED, 'g1+"-"+g2+"-"+g3+"-"+g4', 'abc-a-b-d'),
0387     ('[a-zA-Z_][a-zA-Z0-9_]*', 'alpha', SUCCEED, 'found', 'alpha'),
0388     ('^a(bc+|b[eh])g|.h$', 'abh', SUCCEED, 'found+"-"+g1', 'bh-None'),
0389     ('(bc+d$|ef*g.|h?i(j|k))', 'effgz', SUCCEED, 'found+"-"+g1+"-"+g2', 'effgz-effgz-None'),
0390     ('(bc+d$|ef*g.|h?i(j|k))', 'ij', SUCCEED, 'found+"-"+g1+"-"+g2', 'ij-ij-j'),
0391     ('(bc+d$|ef*g.|h?i(j|k))', 'effg', FAIL),
0392     ('(bc+d$|ef*g.|h?i(j|k))', 'bcdd', FAIL),
0393     ('(bc+d$|ef*g.|h?i(j|k))', 'reffgz', SUCCEED, 'found+"-"+g1+"-"+g2', 'effgz-effgz-None'),
0394     ('((((((((((a))))))))))', 'a', SUCCEED, 'g10', 'a'),
0395     ('((((((((((a))))))))))\\10', 'aa', SUCCEED, 'found', 'aa'),
0396 # Python does not have the same rules for \\41 so this is a syntax error
0397 #    ('((((((((((a))))))))))\\41', 'aa', FAIL),
0398 #    ('((((((((((a))))))))))\\41', 'a!', SUCCEED, 'found', 'a!'),
0399     ('((((((((((a))))))))))\\41', '', SYNTAX_ERROR),
0400     ('(?i)((((((((((a))))))))))\\41', '', SYNTAX_ERROR),
0401     ('(((((((((a)))))))))', 'a', SUCCEED, 'found', 'a'),
0402     ('multiple words of text', 'uh-uh', FAIL),
0403     ('multiple words', 'multiple words, yeah', SUCCEED, 'found', 'multiple words'),
0404     ('(.*)c(.*)', 'abcde', SUCCEED, 'found+"-"+g1+"-"+g2', 'abcde-ab-de'),
0405     ('\\((.*), (.*)\\)', '(a, b)', SUCCEED, 'g2+"-"+g1', 'b-a'),
0406     ('[k]', 'ab', FAIL),
0407     ('a[-]?c', 'ac', SUCCEED, 'found', 'ac'),
0408     ('(abc)\\1', 'abcabc', SUCCEED, 'g1', 'abc'),
0409     ('([a-c]*)\\1', 'abcabc', SUCCEED, 'g1', 'abc'),
0410     ('(?i)abc', 'ABC', SUCCEED, 'found', 'ABC'),
0411     ('(?i)abc', 'XBC', FAIL),
0412     ('(?i)abc', 'AXC', FAIL),
0413     ('(?i)abc', 'ABX', FAIL),
0414     ('(?i)abc', 'XABCY', SUCCEED, 'found', 'ABC'),
0415     ('(?i)abc', 'ABABC', SUCCEED, 'found', 'ABC'),
0416     ('(?i)ab*c', 'ABC', SUCCEED, 'found', 'ABC'),
0417     ('(?i)ab*bc', 'ABC', SUCCEED, 'found', 'ABC'),
0418     ('(?i)ab*bc', 'ABBC', SUCCEED, 'found', 'ABBC'),
0419     ('(?i)ab*?bc', 'ABBBBC', SUCCEED, 'found', 'ABBBBC'),
0420     ('(?i)ab{0,}?bc', 'ABBBBC', SUCCEED, 'found', 'ABBBBC'),
0421     ('(?i)ab+?bc', 'ABBC', SUCCEED, 'found', 'ABBC'),
0422     ('(?i)ab+bc', 'ABC', FAIL),
0423     ('(?i)ab+bc', 'ABQ', FAIL),
0424     ('(?i)ab{1,}bc', 'ABQ', FAIL),
0425     ('(?i)ab+bc', 'ABBBBC', SUCCEED, 'found', 'ABBBBC'),
0426     ('(?i)ab{1,}?bc', 'ABBBBC', SUCCEED, 'found', 'ABBBBC'),
0427     ('(?i)ab{1,3}?bc', 'ABBBBC', SUCCEED, 'found', 'ABBBBC'),
0428     ('(?i)ab{3,4}?bc', 'ABBBBC', SUCCEED, 'found', 'ABBBBC'),
0429     ('(?i)ab{4,5}?bc', 'ABBBBC', FAIL),
0430     ('(?i)ab??bc', 'ABBC', SUCCEED, 'found', 'ABBC'),
0431     ('(?i)ab??bc', 'ABC', SUCCEED, 'found', 'ABC'),
0432     ('(?i)ab{0,1}?bc', 'ABC', SUCCEED, 'found', 'ABC'),
0433     ('(?i)ab??bc', 'ABBBBC', FAIL),
0434     ('(?i)ab??c', 'ABC', SUCCEED, 'found', 'ABC'),
0435     ('(?i)ab{0,1}?c', 'ABC', SUCCEED, 'found', 'ABC'),
0436     ('(?i)^abc$', 'ABC', SUCCEED, 'found', 'ABC'),
0437     ('(?i)^abc$', 'ABCC', FAIL),
0438     ('(?i)^abc', 'ABCC', SUCCEED, 'found', 'ABC'),
0439     ('(?i)^abc$', 'AABC', FAIL),
0440     ('(?i)abc$', 'AABC', SUCCEED, 'found', 'ABC'),
0441     ('(?i)^', 'ABC', SUCCEED, 'found', ''),
0442     ('(?i)$', 'ABC', SUCCEED, 'found', ''),
0443     ('(?i)a.c', 'ABC', SUCCEED, 'found', 'ABC'),
0444     ('(?i)a.c', 'AXC', SUCCEED, 'found', 'AXC'),
0445     ('(?i)a.*?c', 'AXYZC', SUCCEED, 'found', 'AXYZC'),
0446     ('(?i)a.*c', 'AXYZD', FAIL),
0447     ('(?i)a[bc]d', 'ABC', FAIL),
0448     ('(?i)a[bc]d', 'ABD', SUCCEED, 'found', 'ABD'),
0449     ('(?i)a[b-d]e', 'ABD', FAIL),
0450     ('(?i)a[b-d]e', 'ACE', SUCCEED, 'found', 'ACE'),
0451     ('(?i)a[b-d]', 'AAC', SUCCEED, 'found', 'AC'),
0452     ('(?i)a[-b]', 'A-', SUCCEED, 'found', 'A-'),
0453     ('(?i)a[b-]', 'A-', SUCCEED, 'found', 'A-'),
0454     ('(?i)a[b-a]', '-', SYNTAX_ERROR),
0455     ('(?i)a[]b', '-', SYNTAX_ERROR),
0456     ('(?i)a[', '-', SYNTAX_ERROR),
0457     ('(?i)a]', 'A]', SUCCEED, 'found', 'A]'),
0458     ('(?i)a[]]b', 'A]B', SUCCEED, 'found', 'A]B'),
0459     ('(?i)a[^bc]d', 'AED', SUCCEED, 'found', 'AED'),
0460     ('(?i)a[^bc]d', 'ABD', FAIL),
0461     ('(?i)a[^-b]c', 'ADC', SUCCEED, 'found', 'ADC'),
0462     ('(?i)a[^-b]c', 'A-C', FAIL),
0463     ('(?i)a[^]b]c', 'A]C', FAIL),
0464     ('(?i)a[^]b]c', 'ADC', SUCCEED, 'found', 'ADC'),
0465     ('(?i)ab|cd', 'ABC', SUCCEED, 'found', 'AB'),
0466     ('(?i)ab|cd', 'ABCD', SUCCEED, 'found', 'AB'),
0467     ('(?i)()ef', 'DEF', SUCCEED, 'found+"-"+g1', 'EF-'),
0468     ('(?i)*a', '-', SYNTAX_ERROR),
0469     ('(?i)(*)b', '-', SYNTAX_ERROR),
0470     ('(?i)$b', 'B', FAIL),
0471     ('(?i)a\\', '-', SYNTAX_ERROR),
0472     ('(?i)a\\(b', 'A(B', SUCCEED, 'found+"-"+g1', 'A(B-Error'),
0473     ('(?i)a\\(*b', 'AB', SUCCEED, 'found', 'AB'),
0474     ('(?i)a\\(*b', 'A((B', SUCCEED, 'found', 'A((B'),
0475     ('(?i)a\\\\b', 'A\\B', SUCCEED, 'found', 'A\\B'),
0476     ('(?i)abc)', '-', SYNTAX_ERROR),
0477     ('(?i)(abc', '-', SYNTAX_ERROR),
0478     ('(?i)((a))', 'ABC', SUCCEED, 'found+"-"+g1+"-"+g2', 'A-A-A'),
0479     ('(?i)(a)b(c)', 'ABC', SUCCEED, 'found+"-"+g1+"-"+g2', 'ABC-A-C'),
0480     ('(?i)a+b+c', 'AABBABC', SUCCEED, 'found', 'ABC'),
0481     ('(?i)a{1,}b{1,}c', 'AABBABC', SUCCEED, 'found', 'ABC'),
0482     ('(?i)a**', '-', SYNTAX_ERROR),
0483     ('(?i)a.+?c', 'ABCABC', SUCCEED, 'found', 'ABC'),
0484     ('(?i)a.*?c', 'ABCABC', SUCCEED, 'found', 'ABC'),
0485     ('(?i)a.{0,5}?c', 'ABCABC', SUCCEED, 'found', 'ABC'),
0486     ('(?i)(a+|b)*', 'AB', SUCCEED, 'found+"-"+g1', 'AB-B'),
0487     ('(?i)(a+|b){0,}', 'AB', SUCCEED, 'found+"-"+g1', 'AB-B'),
0488     ('(?i)(a+|b)+', 'AB', SUCCEED, 'found+"-"+g1', 'AB-B'),
0489     ('(?i)(a+|b){1,}', 'AB', SUCCEED, 'found+"-"+g1', 'AB-B'),
0490     ('(?i)(a+|b)?', 'AB', SUCCEED, 'found+"-"+g1', 'A-A'),
0491     ('(?i)(a+|b){0,1}', 'AB', SUCCEED, 'found+"-"+g1', 'A-A'),
0492     ('(?i)(a+|b){0,1}?', 'AB', SUCCEED, 'found+"-"+g1', '-None'),
0493     ('(?i))(', '-', SYNTAX_ERROR),
0494     ('(?i)[^ab]*', 'CDE', SUCCEED, 'found', 'CDE'),
0495     ('(?i)abc', '', FAIL),
0496     ('(?i)a*', '', SUCCEED, 'found', ''),
0497     ('(?i)([abc])*d', 'ABBBCD', SUCCEED, 'found+"-"+g1', 'ABBBCD-C'),
0498     ('(?i)([abc])*bcd', 'ABCD', SUCCEED, 'found+"-"+g1', 'ABCD-A'),
0499     ('(?i)a|b|c|d|e', 'E', SUCCEED, 'found', 'E'),
0500     ('(?i)(a|b|c|d|e)f', 'EF', SUCCEED, 'found+"-"+g1', 'EF-E'),
0501     ('(?i)abcd*efg', 'ABCDEFG', SUCCEED, 'found', 'ABCDEFG'),
0502     ('(?i)ab*', 'XABYABBBZ', SUCCEED, 'found', 'AB'),
0503     ('(?i)ab*', 'XAYABBBZ', SUCCEED, 'found', 'A'),
0504     ('(?i)(ab|cd)e', 'ABCDE', SUCCEED, 'found+"-"+g1', 'CDE-CD'),
0505     ('(?i)[abhgefdc]ij', 'HIJ', SUCCEED, 'found', 'HIJ'),
0506     ('(?i)^(ab|cd)e', 'ABCDE', FAIL),
0507     ('(?i)(abc|)ef', 'ABCDEF', SUCCEED, 'found+"-"+g1', 'EF-'),
0508     ('(?i)(a|b)c*d', 'ABCD', SUCCEED, 'found+"-"+g1', 'BCD-B'),
0509     ('(?i)(ab|ab*)bc', 'ABC', SUCCEED, 'found+"-"+g1', 'ABC-A'),
0510     ('(?i)a([bc]*)c*', 'ABC', SUCCEED, 'found+"-"+g1', 'ABC-BC'),
0511     ('(?i)a([bc]*)(c*d)', 'ABCD', SUCCEED, 'found+"-"+g1+"-"+g2', 'ABCD-BC-D'),
0512     ('(?i)a([bc]+)(c*d)', 'ABCD', SUCCEED, 'found+"-"+g1+"-"+g2', 'ABCD-BC-D'),
0513     ('(?i)a([bc]*)(c+d)', 'ABCD', SUCCEED, 'found+"-"+g1+"-"+g2', 'ABCD-B-CD'),
0514     ('(?i)a[bcd]*dcdcde', 'ADCDCDE', SUCCEED, 'found', 'ADCDCDE'),
0515     ('(?i)a[bcd]+dcdcde', 'ADCDCDE', FAIL),
0516     ('(?i)(ab|a)b*c', 'ABC', SUCCEED, 'found+"-"+g1', 'ABC-AB'),
0517     ('(?i)((a)(b)c)(d)', 'ABCD', SUCCEED, 'g1+"-"+g2+"-"+g3+"-"+g4', 'ABC-A-B-D'),
0518     ('(?i)[a-zA-Z_][a-zA-Z0-9_]*', 'ALPHA', SUCCEED, 'found', 'ALPHA'),
0519     ('(?i)^a(bc+|b[eh])g|.h$', 'ABH', SUCCEED, 'found+"-"+g1', 'BH-None'),
0520     ('(?i)(bc+d$|ef*g.|h?i(j|k))', 'EFFGZ', SUCCEED, 'found+"-"+g1+"-"+g2', 'EFFGZ-EFFGZ-None'),
0521     ('(?i)(bc+d$|ef*g.|h?i(j|k))', 'IJ', SUCCEED, 'found+"-"+g1+"-"+g2', 'IJ-IJ-J'),
0522     ('(?i)(bc+d$|ef*g.|h?i(j|k))', 'EFFG', FAIL),
0523     ('(?i)(bc+d$|ef*g.|h?i(j|k))', 'BCDD', FAIL),
0524     ('(?i)(bc+d$|ef*g.|h?i(j|k))', 'REFFGZ', SUCCEED, 'found+"-"+g1+"-"+g2', 'EFFGZ-EFFGZ-None'),
0525     ('(?i)((((((((((a))))))))))', 'A', SUCCEED, 'g10', 'A'),
0526     ('(?i)((((((((((a))))))))))\\10', 'AA', SUCCEED, 'found', 'AA'),
0527     #('(?i)((((((((((a))))))))))\\41', 'AA', FAIL),
0528     #('(?i)((((((((((a))))))))))\\41', 'A!', SUCCEED, 'found', 'A!'),
0529     ('(?i)(((((((((a)))))))))', 'A', SUCCEED, 'found', 'A'),
0530     ('(?i)(?:(?:(?:(?:(?:(?:(?:(?:(?:(a))))))))))', 'A', SUCCEED, 'g1', 'A'),
0531     ('(?i)(?:(?:(?:(?:(?:(?:(?:(?:(?:(a|b|c))))))))))', 'C', SUCCEED, 'g1', 'C'),
0532     ('(?i)multiple words of text', 'UH-UH', FAIL),
0533     ('(?i)multiple words', 'MULTIPLE WORDS, YEAH', SUCCEED, 'found', 'MULTIPLE WORDS'),
0534     ('(?i)(.*)c(.*)', 'ABCDE', SUCCEED, 'found+"-"+g1+"-"+g2', 'ABCDE-AB-DE'),
0535     ('(?i)\\((.*), (.*)\\)', '(A, B)', SUCCEED, 'g2+"-"+g1', 'B-A'),
0536     ('(?i)[k]', 'AB', FAIL),
0537 #    ('(?i)abcd', 'ABCD', SUCCEED, 'found+"-"+\\found+"-"+\\\\found', 'ABCD-$&-\\ABCD'),
0538 #    ('(?i)a(bc)d', 'ABCD', SUCCEED, 'g1+"-"+\\g1+"-"+\\\\g1', 'BC-$1-\\BC'),
0539     ('(?i)a[-]?c', 'AC', SUCCEED, 'found', 'AC'),
0540     ('(?i)(abc)\\1', 'ABCABC', SUCCEED, 'g1', 'ABC'),
0541     ('(?i)([a-c]*)\\1', 'ABCABC', SUCCEED, 'g1', 'ABC'),
0542     ('a(?!b).', 'abad', SUCCEED, 'found', 'ad'),
0543     ('a(?=d).', 'abad', SUCCEED, 'found', 'ad'),
0544     ('a(?=c|d).', 'abad', SUCCEED, 'found', 'ad'),
0545     ('a(?:b|c|d)(.)', 'ace', SUCCEED, 'g1', 'e'),
0546     ('a(?:b|c|d)*(.)', 'ace', SUCCEED, 'g1', 'e'),
0547     ('a(?:b|c|d)+?(.)', 'ace', SUCCEED, 'g1', 'e'),
0548     ('a(?:b|(c|e){1,2}?|d)+?(.)', 'ace', SUCCEED, 'g1 + g2', 'ce'),
0549     ('^(.+)?B', 'AB', SUCCEED, 'g1', 'A'),
0550 
0551     # lookbehind: split by : but not if it is escaped by -.
0552     ('(?<!-):(.*?)(?<!-):', 'a:bc-:de:f', SUCCEED, 'g1', 'bc-:de' ),
0553     # escaping with \ as we know it
0554     ('(?<!\\\):(.*?)(?<!\\\):', 'a:bc\\:de:f', SUCCEED, 'g1', 'bc\\:de' ),
0555     # terminating with ' and escaping with ? as in edifact
0556     ("(?<!\\?)'(.*?)(?<!\\?)'", "a'bc?'de'f", SUCCEED, 'g1', "bc?'de" ),
0557 
0558     # Comments using the (?#...) syntax
0559 
0560     ('w(?# comment', 'w', SYNTAX_ERROR),
0561     ('w(?# comment 1)xy(?# comment 2)z', 'wxyz', SUCCEED, 'found', 'wxyz'),
0562 
0563     # Check odd placement of embedded pattern modifiers
0564 
0565     # not an error under PCRE/PRE:
0566     ('w(?i)', 'W', SUCCEED, 'found', 'W'),
0567     # ('w(?i)', 'W', SYNTAX_ERROR),
0568 
0569     # Comments using the x embedded pattern modifier
0570 
0571     ("""(?x)w# comment 1
0572         x y
0573         # comment 2
0574         z""", 'wxyz', SUCCEED, 'found', 'wxyz'),
0575 
0576     # using the m embedded pattern modifier
0577 
0578     ('^abc', """jkl
0579 abc
0580 xyz""", FAIL),
0581     ('(?m)^abc', """jkl
0582 abc
0583 xyz""", SUCCEED, 'found', 'abc'),
0584 
0585     ('(?m)abc$', """jkl
0586 xyzabc
0587 123""", SUCCEED, 'found', 'abc'),
0588 
0589     # using the s embedded pattern modifier
0590 
0591     ('a.b', 'a\nb', FAIL),
0592     ('(?s)a.b', 'a\nb', SUCCEED, 'found', 'a\nb'),
0593 
0594     # test \w, etc. both inside and outside character classes
0595 
0596     ('\\w+', '--ab_cd0123--', SUCCEED, 'found', 'ab_cd0123'),
0597     ('[\\w]+', '--ab_cd0123--', SUCCEED, 'found', 'ab_cd0123'),
0598     ('\\D+', '1234abc5678', SUCCEED, 'found', 'abc'),
0599     ('[\\D]+', '1234abc5678', SUCCEED, 'found', 'abc'),
0600     ('[\\da-fA-F]+', '123abc', SUCCEED, 'found', '123abc'),
0601     # not an error under PCRE/PRE:
0602     # ('[\\d-x]', '-', SYNTAX_ERROR),
0603     (r'([\s]*)([\S]*)([\s]*)', ' testing!1972', SUCCEED, 'g3+g2+g1', 'testing!1972 '),
0604     (r'(\s*)(\S*)(\s*)', ' testing!1972', SUCCEED, 'g3+g2+g1', 'testing!1972 '),
0605 
0606     (r'\xff', '\377', SUCCEED, 'found', chr(255)),
0607     # new \x semantics
0608     (r'\x00ff', '\377', FAIL),
0609     # (r'\x00ff', '\377', SUCCEED, 'found', chr(255)),
0610     (r'\t\n\v\r\f\a\g', '\t\n\v\r\f\ag', SUCCEED, 'found', '\t\n\v\r\f\ag'),
0611     ('\t\n\v\r\f\a\g', '\t\n\v\r\f\ag', SUCCEED, 'found', '\t\n\v\r\f\ag'),
0612     (r'\t\n\v\r\f\a', '\t\n\v\r\f\a', SUCCEED, 'found', chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7)),
0613     (r'[\t][\n][\v][\r][\f][\b]', '\t\n\v\r\f\b', SUCCEED, 'found', '\t\n\v\r\f\b'),
0614 
0615     #
0616     # post-1.5.2 additions
0617 
0618     # xmllib problem
0619     (r'(([a-z]+):)?([a-z]+)$', 'smil', SUCCEED, 'g1+"-"+g2+"-"+g3', 'None-None-smil'),
0620     # bug 110866: reference to undefined group
0621     (r'((.)\1+)', '', SYNTAX_ERROR),
0622     # bug 111869: search (PRE/PCRE fails on this one, SRE doesn't)
0623     (r'.*d', 'abc\nabd', SUCCEED, 'found', 'abd'),
0624     # bug 112468: various expected syntax errors
0625     (r'(', '', SYNTAX_ERROR),
0626     (r'[\41]', '!', SUCCEED, 'found', '!'),
0627     # bug 114033: nothing to repeat
0628     (r'(x?)?', 'x', SUCCEED, 'found', 'x'),
0629     # bug 115040: rescan if flags are modified inside pattern
0630     (r' (?x)foo ', 'foo', SUCCEED, 'found', 'foo'),
0631     # bug 115618: negative lookahead
0632     (r'(?<!abc)(d.f)', 'abcdefdof', SUCCEED, 'found', 'dof'),
0633     # bug 116251: character class bug
0634     (r'[\w-]+', 'laser_beam', SUCCEED, 'found', 'laser_beam'),
0635     # bug 123769+127259: non-greedy backtracking bug
0636     (r'.*?\S *:', 'xx:', SUCCEED, 'found', 'xx:'),
0637     (r'a[ ]*?\ (\d+).*', 'a   10', SUCCEED, 'found', 'a   10'),
0638     (r'a[ ]*?\ (\d+).*', 'a    10', SUCCEED, 'found', 'a    10'),
0639     # bug 127259: \Z shouldn't depend on multiline mode
0640     (r'(?ms).*?x\s*\Z(.*)','xx\nx\n', SUCCEED, 'g1', ''),
0641     # bug 128899: uppercase literals under the ignorecase flag
0642     (r'(?i)M+', 'MMM', SUCCEED, 'found', 'MMM'),
0643     (r'(?i)m+', 'MMM', SUCCEED, 'found', 'MMM'),
0644     (r'(?i)[M]+', 'MMM', SUCCEED, 'found', 'MMM'),
0645     (r'(?i)[m]+', 'MMM', SUCCEED, 'found', 'MMM'),
0646     # bug 130748: ^* should be an error (nothing to repeat)
0647     (r'^*', '', SYNTAX_ERROR),
0648     # bug 133283: minimizing repeat problem
0649     (r'"(?:\\"|[^"])*?"', r'"\""', SUCCEED, 'found', r'"\""'),
0650     # bug 477728: minimizing repeat problem
0651     (r'^.*?$', 'one\ntwo\nthree\n', FAIL),
0652     # bug 483789: minimizing repeat problem
0653     (r'a[^>]*?b', 'a>b', FAIL),
0654     # bug 490573: minimizing repeat problem
0655     (r'^a*?$', 'foo', FAIL),
0656     # bug 470582: nested groups problem
0657     (r'^((a)c)?(ab)$', 'ab', SUCCEED, 'g1+"-"+g2+"-"+g3', 'None-None-ab'),
0658     # another minimizing repeat problem (capturing groups in assertions)
0659     ('^([ab]*?)(?=(b)?)c', 'abc', SUCCEED, 'g1+"-"+g2', 'ab-None'),
0660     ('^([ab]*?)(?!(b))c', 'abc', SUCCEED, 'g1+"-"+g2', 'ab-None'),
0661     ('^([ab]*?)(?<!(a))c', 'abc', SUCCEED, 'g1+"-"+g2', 'ab-None'),
0662 ]
0663 
0664 try:
0665     u = eval("u'\N{LATIN CAPITAL LETTER A WITH DIAERESIS}'")
0666 except SyntaxError:
0667     pass
0668 else:
0669     tests.extend([
0670     # bug 410271: \b broken under locales
0671     (r'\b.\b', 'a', SUCCEED, 'found', 'a'),
0672     (r'(?u)\b.\b', u, SUCCEED, 'found', u),
0673     (r'(?u)\w', u, SUCCEED, 'found', u),
0674     ])
0675 

Generated by PyXR 0.9.4
SourceForge.net Logo