0001 #!/usr/bin/env python 0002 # -*- mode: python -*- 0003 0004 # Re test suite and benchmark suite v1.5 0005 0006 # The 3 possible outcomes for each pattern 0007 [SUCCEED, FAIL, SYNTAX_ERROR] = range(3) 0008 0009 # Benchmark suite (needs expansion) 0010 # 0011 # The benchmark suite does not test correctness, just speed. The 0012 # first element of each tuple is the regex pattern; the second is a 0013 # string to match it against. The benchmarking code will embed the 0014 # second string inside several sizes of padding, to test how regex 0015 # matching performs on large strings. 0016 0017 benchmarks = [ 0018 0019 # test common prefix 0020 ('Python|Perl', 'Perl'), # Alternation 0021 ('(Python|Perl)', 'Perl'), # Grouped alternation 0022 0023 ('Python|Perl|Tcl', 'Perl'), # Alternation 0024 ('(Python|Perl|Tcl)', 'Perl'), # Grouped alternation 0025 0026 ('(Python)\\1', 'PythonPython'), # Backreference 0027 ('([0a-z][a-z0-9]*,)+', 'a5,b7,c9,'), # Disable the fastmap optimization 0028 ('([a-z][a-z0-9]*,)+', 'a5,b7,c9,'), # A few sets 0029 0030 ('Python', 'Python'), # Simple text literal 0031 ('.*Python', 'Python'), # Bad text literal 0032 ('.*Python.*', 'Python'), # Worse text literal 0033 ('.*(Python)', 'Python'), # Bad text literal with grouping 0034 0035 ] 0036 0037 # Test suite (for verifying correctness) 0038 # 0039 # The test suite is a list of 5- or 3-tuples. The 5 parts of a 0040 # complete tuple are: 0041 # element 0: a string containing the pattern 0042 # 1: the string to match against the pattern 0043 # 2: the expected result (SUCCEED, FAIL, SYNTAX_ERROR) 0044 # 3: a string that will be eval()'ed to produce a test string. 0045 # This is an arbitrary Python expression; the available 0046 # variables are "found" (the whole match), and "g1", "g2", ... 0047 # up to "g99" contain the contents of each group, or the 0048 # string 'None' if the group wasn't given a value, or the 0049 # string 'Error' if the group index was out of range; 0050 # also "groups", the return value of m.group() (a tuple). 0051 # 4: The expected result of evaluating the expression. 0052 # If the two don't match, an error is reported. 0053 # 0054 # If the regex isn't expected to work, the latter two elements can be omitted. 0055 0056 tests = [ 0057 # Test ?P< and ?P= extensions 0058 ('(?P<foo_123', '', SYNTAX_ERROR), # Unterminated group identifier 0059 ('(?P<1>a)', '', SYNTAX_ERROR), # Begins with a digit 0060 ('(?P<!>a)', '', SYNTAX_ERROR), # Begins with an illegal char 0061 ('(?P<foo!>a)', '', SYNTAX_ERROR), # Begins with an illegal char 0062 0063 # Same tests, for the ?P= form 0064 ('(?P<foo_123>a)(?P=foo_123', 'aa', SYNTAX_ERROR), 0065 ('(?P<foo_123>a)(?P=1)', 'aa', SYNTAX_ERROR), 0066 ('(?P<foo_123>a)(?P=!)', 'aa', SYNTAX_ERROR), 0067 ('(?P<foo_123>a)(?P=foo_124', 'aa', SYNTAX_ERROR), # Backref to undefined group 0068 0069 ('(?P<foo_123>a)', 'a', SUCCEED, 'g1', 'a'), 0070 ('(?P<foo_123>a)(?P=foo_123)', 'aa', SUCCEED, 'g1', 'a'), 0071 0072 # Test octal escapes 0073 ('\\1', 'a', SYNTAX_ERROR), # Backreference 0074 ('[\\1]', '\1', SUCCEED, 'found', '\1'), # Character 0075 ('\\09', chr(0) + '9', SUCCEED, 'found', chr(0) + '9'), 0076 ('\\141', 'a', SUCCEED, 'found', 'a'), 0077 ('(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)(l)\\119', 'abcdefghijklk9', SUCCEED, 'found+"-"+g11', 'abcdefghijklk9-k'), 0078 0079 # Test \0 is handled everywhere 0080 (r'\0', '\0', SUCCEED, 'found', '\0'), 0081 (r'[\0a]', '\0', SUCCEED, 'found', '\0'), 0082 (r'[a\0]', '\0', SUCCEED, 'found', '\0'), 0083 (r'[^a\0]', '\0', FAIL), 0084 0085 # Test various letter escapes 0086 (r'\a[\b]\f\n\r\t\v', '\a\b\f\n\r\t\v', SUCCEED, 'found', '\a\b\f\n\r\t\v'), 0087 (r'[\a][\b][\f][\n][\r][\t][\v]', '\a\b\f\n\r\t\v', SUCCEED, 'found', '\a\b\f\n\r\t\v'), 0088 # NOTE: not an error under PCRE/PRE: 0089 # (r'\u', '', SYNTAX_ERROR), # A Perl escape 0090 (r'\c\e\g\h\i\j\k\m\o\p\q\y\z', 'ceghijkmopqyz', SUCCEED, 'found', 'ceghijkmopqyz'), 0091 (r'\xff', '\377', SUCCEED, 'found', chr(255)), 0092 # new \x semantics 0093 (r'\x00ffffffffffffff', '\377', FAIL, 'found', chr(255)), 0094 (r'\x00f', '\017', FAIL, 'found', chr(15)), 0095 (r'\x00fe', '\376', FAIL, 'found', chr(254)), 0096 # (r'\x00ffffffffffffff', '\377', SUCCEED, 'found', chr(255)), 0097 # (r'\x00f', '\017', SUCCEED, 'found', chr(15)), 0098 # (r'\x00fe', '\376', SUCCEED, 'found', chr(254)), 0099 0100 (r"^\w+=(\\[\000-\277]|[^\n\\])*", "SRC=eval.c g.c blah blah blah \\\\\n\tapes.c", 0101 SUCCEED, 'found', "SRC=eval.c g.c blah blah blah \\\\"), 0102 0103 # Test that . only matches \n in DOTALL mode 0104 ('a.b', 'acb', SUCCEED, 'found', 'acb'), 0105 ('a.b', 'a\nb', FAIL), 0106 ('a.*b', 'acc\nccb', FAIL), 0107 ('a.{4,5}b', 'acc\nccb', FAIL), 0108 ('a.b', 'a\rb', SUCCEED, 'found', 'a\rb'), 0109 ('a.b(?s)', 'a\nb', SUCCEED, 'found', 'a\nb'), 0110 ('a.*(?s)b', 'acc\nccb', SUCCEED, 'found', 'acc\nccb'), 0111 ('(?s)a.{4,5}b', 'acc\nccb', SUCCEED, 'found', 'acc\nccb'), 0112 ('(?s)a.b', 'a\nb', SUCCEED, 'found', 'a\nb'), 0113 0114 (')', '', SYNTAX_ERROR), # Unmatched right bracket 0115 ('', '', SUCCEED, 'found', ''), # Empty pattern 0116 ('abc', 'abc', SUCCEED, 'found', 'abc'), 0117 ('abc', 'xbc', FAIL), 0118 ('abc', 'axc', FAIL), 0119 ('abc', 'abx', FAIL), 0120 ('abc', 'xabcy', SUCCEED, 'found', 'abc'), 0121 ('abc', 'ababc', SUCCEED, 'found', 'abc'), 0122 ('ab*c', 'abc', SUCCEED, 'found', 'abc'), 0123 ('ab*bc', 'abc', SUCCEED, 'found', 'abc'), 0124 ('ab*bc', 'abbc', SUCCEED, 'found', 'abbc'), 0125 ('ab*bc', 'abbbbc', SUCCEED, 'found', 'abbbbc'), 0126 ('ab+bc', 'abbc', SUCCEED, 'found', 'abbc'), 0127 ('ab+bc', 'abc', FAIL), 0128 ('ab+bc', 'abq', FAIL), 0129 ('ab+bc', 'abbbbc', SUCCEED, 'found', 'abbbbc'), 0130 ('ab?bc', 'abbc', SUCCEED, 'found', 'abbc'), 0131 ('ab?bc', 'abc', SUCCEED, 'found', 'abc'), 0132 ('ab?bc', 'abbbbc', FAIL), 0133 ('ab?c', 'abc', SUCCEED, 'found', 'abc'), 0134 ('^abc$', 'abc', SUCCEED, 'found', 'abc'), 0135 ('^abc$', 'abcc', FAIL), 0136 ('^abc', 'abcc', SUCCEED, 'found', 'abc'), 0137 ('^abc$', 'aabc', FAIL), 0138 ('abc$', 'aabc', SUCCEED, 'found', 'abc'), 0139 ('^', 'abc', SUCCEED, 'found+"-"', '-'), 0140 ('$', 'abc', SUCCEED, 'found+"-"', '-'), 0141 ('a.c', 'abc', SUCCEED, 'found', 'abc'), 0142 ('a.c', 'axc', SUCCEED, 'found', 'axc'), 0143 ('a.*c', 'axyzc', SUCCEED, 'found', 'axyzc'), 0144 ('a.*c', 'axyzd', FAIL), 0145 ('a[bc]d', 'abc', FAIL), 0146 ('a[bc]d', 'abd', SUCCEED, 'found', 'abd'), 0147 ('a[b-d]e', 'abd', FAIL), 0148 ('a[b-d]e', 'ace', SUCCEED, 'found', 'ace'), 0149 ('a[b-d]', 'aac', SUCCEED, 'found', 'ac'), 0150 ('a[-b]', 'a-', SUCCEED, 'found', 'a-'), 0151 ('a[\\-b]', 'a-', SUCCEED, 'found', 'a-'), 0152 # NOTE: not an error under PCRE/PRE: 0153 # ('a[b-]', 'a-', SYNTAX_ERROR), 0154 ('a[]b', '-', SYNTAX_ERROR), 0155 ('a[', '-', SYNTAX_ERROR), 0156 ('a\\', '-', SYNTAX_ERROR), 0157 ('abc)', '-', SYNTAX_ERROR), 0158 ('(abc', '-', SYNTAX_ERROR), 0159 ('a]', 'a]', SUCCEED, 'found', 'a]'), 0160 ('a[]]b', 'a]b', SUCCEED, 'found', 'a]b'), 0161 ('a[\]]b', 'a]b', SUCCEED, 'found', 'a]b'), 0162 ('a[^bc]d', 'aed', SUCCEED, 'found', 'aed'), 0163 ('a[^bc]d', 'abd', FAIL), 0164 ('a[^-b]c', 'adc', SUCCEED, 'found', 'adc'), 0165 ('a[^-b]c', 'a-c', FAIL), 0166 ('a[^]b]c', 'a]c', FAIL), 0167 ('a[^]b]c', 'adc', SUCCEED, 'found', 'adc'), 0168 ('\\ba\\b', 'a-', SUCCEED, '"-"', '-'), 0169 ('\\ba\\b', '-a', SUCCEED, '"-"', '-'), 0170 ('\\ba\\b', '-a-', SUCCEED, '"-"', '-'), 0171 ('\\by\\b', 'xy', FAIL), 0172 ('\\by\\b', 'yz', FAIL), 0173 ('\\by\\b', 'xyz', FAIL), 0174 ('x\\b', 'xyz', FAIL), 0175 ('x\\B', 'xyz', SUCCEED, '"-"', '-'), 0176 ('\\Bz', 'xyz', SUCCEED, '"-"', '-'), 0177 ('z\\B', 'xyz', FAIL), 0178 ('\\Bx', 'xyz', FAIL), 0179 ('\\Ba\\B', 'a-', FAIL, '"-"', '-'), 0180 ('\\Ba\\B', '-a', FAIL, '"-"', '-'), 0181 ('\\Ba\\B', '-a-', FAIL, '"-"', '-'), 0182 ('\\By\\B', 'xy', FAIL), 0183 ('\\By\\B', 'yz', FAIL), 0184 ('\\By\\b', 'xy', SUCCEED, '"-"', '-'), 0185 ('\\by\\B', 'yz', SUCCEED, '"-"', '-'), 0186 ('\\By\\B', 'xyz', SUCCEED, '"-"', '-'), 0187 ('ab|cd', 'abc', SUCCEED, 'found', 'ab'), 0188 ('ab|cd', 'abcd', SUCCEED, 'found', 'ab'), 0189 ('()ef', 'def', SUCCEED, 'found+"-"+g1', 'ef-'), 0190 ('$b', 'b', FAIL), 0191 ('a\\(b', 'a(b', SUCCEED, 'found+"-"+g1', 'a(b-Error'), 0192 ('a\\(*b', 'ab', SUCCEED, 'found', 'ab'), 0193 ('a\\(*b', 'a((b', SUCCEED, 'found', 'a((b'), 0194 ('a\\\\b', 'a\\b', SUCCEED, 'found', 'a\\b'), 0195 ('((a))', 'abc', SUCCEED, 'found+"-"+g1+"-"+g2', 'a-a-a'), 0196 ('(a)b(c)', 'abc', SUCCEED, 'found+"-"+g1+"-"+g2', 'abc-a-c'), 0197 ('a+b+c', 'aabbabc', SUCCEED, 'found', 'abc'), 0198 ('(a+|b)*', 'ab', SUCCEED, 'found+"-"+g1', 'ab-b'), 0199 ('(a+|b)+', 'ab', SUCCEED, 'found+"-"+g1', 'ab-b'), 0200 ('(a+|b)?', 'ab', SUCCEED, 'found+"-"+g1', 'a-a'), 0201 (')(', '-', SYNTAX_ERROR), 0202 ('[^ab]*', 'cde', SUCCEED, 'found', 'cde'), 0203 ('abc', '', FAIL), 0204 ('a*', '', SUCCEED, 'found', ''), 0205 ('a|b|c|d|e', 'e', SUCCEED, 'found', 'e'), 0206 ('(a|b|c|d|e)f', 'ef', SUCCEED, 'found+"-"+g1', 'ef-e'), 0207 ('abcd*efg', 'abcdefg', SUCCEED, 'found', 'abcdefg'), 0208 ('ab*', 'xabyabbbz', SUCCEED, 'found', 'ab'), 0209 ('ab*', 'xayabbbz', SUCCEED, 'found', 'a'), 0210 ('(ab|cd)e', 'abcde', SUCCEED, 'found+"-"+g1', 'cde-cd'), 0211 ('[abhgefdc]ij', 'hij', SUCCEED, 'found', 'hij'), 0212 ('^(ab|cd)e', 'abcde', FAIL, 'xg1y', 'xy'), 0213 ('(abc|)ef', 'abcdef', SUCCEED, 'found+"-"+g1', 'ef-'), 0214 ('(a|b)c*d', 'abcd', SUCCEED, 'found+"-"+g1', 'bcd-b'), 0215 ('(ab|ab*)bc', 'abc', SUCCEED, 'found+"-"+g1', 'abc-a'), 0216 ('a([bc]*)c*', 'abc', SUCCEED, 'found+"-"+g1', 'abc-bc'), 0217 ('a([bc]*)(c*d)', 'abcd', SUCCEED, 'found+"-"+g1+"-"+g2', 'abcd-bc-d'), 0218 ('a([bc]+)(c*d)', 'abcd', SUCCEED, 'found+"-"+g1+"-"+g2', 'abcd-bc-d'), 0219 ('a([bc]*)(c+d)', 'abcd', SUCCEED, 'found+"-"+g1+"-"+g2', 'abcd-b-cd'), 0220 ('a[bcd]*dcdcde', 'adcdcde', SUCCEED, 'found', 'adcdcde'), 0221 ('a[bcd]+dcdcde', 'adcdcde', FAIL), 0222 ('(ab|a)b*c', 'abc', SUCCEED, 'found+"-"+g1', 'abc-ab'), 0223 ('((a)(b)c)(d)', 'abcd', SUCCEED, 'g1+"-"+g2+"-"+g3+"-"+g4', 'abc-a-b-d'), 0224 ('[a-zA-Z_][a-zA-Z0-9_]*', 'alpha', SUCCEED, 'found', 'alpha'), 0225 ('^a(bc+|b[eh])g|.h$', 'abh', SUCCEED, 'found+"-"+g1', 'bh-None'), 0226 ('(bc+d$|ef*g.|h?i(j|k))', 'effgz', SUCCEED, 'found+"-"+g1+"-"+g2', 'effgz-effgz-None'), 0227 ('(bc+d$|ef*g.|h?i(j|k))', 'ij', SUCCEED, 'found+"-"+g1+"-"+g2', 'ij-ij-j'), 0228 ('(bc+d$|ef*g.|h?i(j|k))', 'effg', FAIL), 0229 ('(bc+d$|ef*g.|h?i(j|k))', 'bcdd', FAIL), 0230 ('(bc+d$|ef*g.|h?i(j|k))', 'reffgz', SUCCEED, 'found+"-"+g1+"-"+g2', 'effgz-effgz-None'), 0231 ('(((((((((a)))))))))', 'a', SUCCEED, 'found', 'a'), 0232 ('multiple words of text', 'uh-uh', FAIL), 0233 ('multiple words', 'multiple words, yeah', SUCCEED, 'found', 'multiple words'), 0234 ('(.*)c(.*)', 'abcde', SUCCEED, 'found+"-"+g1+"-"+g2', 'abcde-ab-de'), 0235 ('\\((.*), (.*)\\)', '(a, b)', SUCCEED, 'g2+"-"+g1', 'b-a'), 0236 ('[k]', 'ab', FAIL), 0237 ('a[-]?c', 'ac', SUCCEED, 'found', 'ac'), 0238 ('(abc)\\1', 'abcabc', SUCCEED, 'g1', 'abc'), 0239 ('([a-c]*)\\1', 'abcabc', SUCCEED, 'g1', 'abc'), 0240 ('^(.+)?B', 'AB', SUCCEED, 'g1', 'A'), 0241 ('(a+).\\1$', 'aaaaa', SUCCEED, 'found+"-"+g1', 'aaaaa-aa'), 0242 ('^(a+).\\1$', 'aaaa', FAIL), 0243 ('(abc)\\1', 'abcabc', SUCCEED, 'found+"-"+g1', 'abcabc-abc'), 0244 ('([a-c]+)\\1', 'abcabc', SUCCEED, 'found+"-"+g1', 'abcabc-abc'), 0245 ('(a)\\1', 'aa', SUCCEED, 'found+"-"+g1', 'aa-a'), 0246 ('(a+)\\1', 'aa', SUCCEED, 'found+"-"+g1', 'aa-a'), 0247 ('(a+)+\\1', 'aa', SUCCEED, 'found+"-"+g1', 'aa-a'), 0248 ('(a).+\\1', 'aba', SUCCEED, 'found+"-"+g1', 'aba-a'), 0249 ('(a)ba*\\1', 'aba', SUCCEED, 'found+"-"+g1', 'aba-a'), 0250 ('(aa|a)a\\1$', 'aaa', SUCCEED, 'found+"-"+g1', 'aaa-a'), 0251 ('(a|aa)a\\1$', 'aaa', SUCCEED, 'found+"-"+g1', 'aaa-a'), 0252 ('(a+)a\\1$', 'aaa', SUCCEED, 'found+"-"+g1', 'aaa-a'), 0253 ('([abc]*)\\1', 'abcabc', SUCCEED, 'found+"-"+g1', 'abcabc-abc'), 0254 ('(a)(b)c|ab', 'ab', SUCCEED, 'found+"-"+g1+"-"+g2', 'ab-None-None'), 0255 ('(a)+x', 'aaax', SUCCEED, 'found+"-"+g1', 'aaax-a'), 0256 ('([ac])+x', 'aacx', SUCCEED, 'found+"-"+g1', 'aacx-c'), 0257 ('([^/]*/)*sub1/', 'd:msgs/tdir/sub1/trial/away.cpp', SUCCEED, 'found+"-"+g1', 'd:msgs/tdir/sub1/-tdir/'), 0258 ('([^.]*)\\.([^:]*):[T ]+(.*)', 'track1.title:TBlah blah blah', SUCCEED, 'found+"-"+g1+"-"+g2+"-"+g3', 'track1.title:TBlah blah blah-track1-title-Blah blah blah'), 0259 ('([^N]*N)+', 'abNNxyzN', SUCCEED, 'found+"-"+g1', 'abNNxyzN-xyzN'), 0260 ('([^N]*N)+', 'abNNxyz', SUCCEED, 'found+"-"+g1', 'abNN-N'), 0261 ('([abc]*)x', 'abcx', SUCCEED, 'found+"-"+g1', 'abcx-abc'), 0262 ('([abc]*)x', 'abc', FAIL), 0263 ('([xyz]*)x', 'abcx', SUCCEED, 'found+"-"+g1', 'x-'), 0264 ('(a)+b|aac', 'aac', SUCCEED, 'found+"-"+g1', 'aac-None'), 0265 0266 # Test symbolic groups 0267 0268 ('(?P<i d>aaa)a', 'aaaa', SYNTAX_ERROR), 0269 ('(?P<id>aaa)a', 'aaaa', SUCCEED, 'found+"-"+id', 'aaaa-aaa'), 0270 ('(?P<id>aa)(?P=id)', 'aaaa', SUCCEED, 'found+"-"+id', 'aaaa-aa'), 0271 ('(?P<id>aa)(?P=xd)', 'aaaa', SYNTAX_ERROR), 0272 0273 # Test octal escapes/memory references 0274 0275 ('\\1', 'a', SYNTAX_ERROR), 0276 ('\\09', chr(0) + '9', SUCCEED, 'found', chr(0) + '9'), 0277 ('\\141', 'a', SUCCEED, 'found', 'a'), 0278 ('(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)(l)\\119', 'abcdefghijklk9', SUCCEED, 'found+"-"+g11', 'abcdefghijklk9-k'), 0279 0280 # All tests from Perl 0281 0282 ('abc', 'abc', SUCCEED, 'found', 'abc'), 0283 ('abc', 'xbc', FAIL), 0284 ('abc', 'axc', FAIL), 0285 ('abc', 'abx', FAIL), 0286 ('abc', 'xabcy', SUCCEED, 'found', 'abc'), 0287 ('abc', 'ababc', SUCCEED, 'found', 'abc'), 0288 ('ab*c', 'abc', SUCCEED, 'found', 'abc'), 0289 ('ab*bc', 'abc', SUCCEED, 'found', 'abc'), 0290 ('ab*bc', 'abbc', SUCCEED, 'found', 'abbc'), 0291 ('ab*bc', 'abbbbc', SUCCEED, 'found', 'abbbbc'), 0292 ('ab{0,}bc', 'abbbbc', SUCCEED, 'found', 'abbbbc'), 0293 ('ab+bc', 'abbc', SUCCEED, 'found', 'abbc'), 0294 ('ab+bc', 'abc', FAIL), 0295 ('ab+bc', 'abq', FAIL), 0296 ('ab{1,}bc', 'abq', FAIL), 0297 ('ab+bc', 'abbbbc', SUCCEED, 'found', 'abbbbc'), 0298 ('ab{1,}bc', 'abbbbc', SUCCEED, 'found', 'abbbbc'), 0299 ('ab{1,3}bc', 'abbbbc', SUCCEED, 'found', 'abbbbc'), 0300 ('ab{3,4}bc', 'abbbbc', SUCCEED, 'found', 'abbbbc'), 0301 ('ab{4,5}bc', 'abbbbc', FAIL), 0302 ('ab?bc', 'abbc', SUCCEED, 'found', 'abbc'), 0303 ('ab?bc', 'abc', SUCCEED, 'found', 'abc'), 0304 ('ab{0,1}bc', 'abc', SUCCEED, 'found', 'abc'), 0305 ('ab?bc', 'abbbbc', FAIL), 0306 ('ab?c', 'abc', SUCCEED, 'found', 'abc'), 0307 ('ab{0,1}c', 'abc', SUCCEED, 'found', 'abc'), 0308 ('^abc$', 'abc', SUCCEED, 'found', 'abc'), 0309 ('^abc$', 'abcc', FAIL), 0310 ('^abc', 'abcc', SUCCEED, 'found', 'abc'), 0311 ('^abc$', 'aabc', FAIL), 0312 ('abc$', 'aabc', SUCCEED, 'found', 'abc'), 0313 ('^', 'abc', SUCCEED, 'found', ''), 0314 ('$', 'abc', SUCCEED, 'found', ''), 0315 ('a.c', 'abc', SUCCEED, 'found', 'abc'), 0316 ('a.c', 'axc', SUCCEED, 'found', 'axc'), 0317 ('a.*c', 'axyzc', SUCCEED, 'found', 'axyzc'), 0318 ('a.*c', 'axyzd', FAIL), 0319 ('a[bc]d', 'abc', FAIL), 0320 ('a[bc]d', 'abd', SUCCEED, 'found', 'abd'), 0321 ('a[b-d]e', 'abd', FAIL), 0322 ('a[b-d]e', 'ace', SUCCEED, 'found', 'ace'), 0323 ('a[b-d]', 'aac', SUCCEED, 'found', 'ac'), 0324 ('a[-b]', 'a-', SUCCEED, 'found', 'a-'), 0325 ('a[b-]', 'a-', SUCCEED, 'found', 'a-'), 0326 ('a[b-a]', '-', SYNTAX_ERROR), 0327 ('a[]b', '-', SYNTAX_ERROR), 0328 ('a[', '-', SYNTAX_ERROR), 0329 ('a]', 'a]', SUCCEED, 'found', 'a]'), 0330 ('a[]]b', 'a]b', SUCCEED, 'found', 'a]b'), 0331 ('a[^bc]d', 'aed', SUCCEED, 'found', 'aed'), 0332 ('a[^bc]d', 'abd', FAIL), 0333 ('a[^-b]c', 'adc', SUCCEED, 'found', 'adc'), 0334 ('a[^-b]c', 'a-c', FAIL), 0335 ('a[^]b]c', 'a]c', FAIL), 0336 ('a[^]b]c', 'adc', SUCCEED, 'found', 'adc'), 0337 ('ab|cd', 'abc', SUCCEED, 'found', 'ab'), 0338 ('ab|cd', 'abcd', SUCCEED, 'found', 'ab'), 0339 ('()ef', 'def', SUCCEED, 'found+"-"+g1', 'ef-'), 0340 ('*a', '-', SYNTAX_ERROR), 0341 ('(*)b', '-', SYNTAX_ERROR), 0342 ('$b', 'b', FAIL), 0343 ('a\\', '-', SYNTAX_ERROR), 0344 ('a\\(b', 'a(b', SUCCEED, 'found+"-"+g1', 'a(b-Error'), 0345 ('a\\(*b', 'ab', SUCCEED, 'found', 'ab'), 0346 ('a\\(*b', 'a((b', SUCCEED, 'found', 'a((b'), 0347 ('a\\\\b', 'a\\b', SUCCEED, 'found', 'a\\b'), 0348 ('abc)', '-', SYNTAX_ERROR), 0349 ('(abc', '-', SYNTAX_ERROR), 0350 ('((a))', 'abc', SUCCEED, 'found+"-"+g1+"-"+g2', 'a-a-a'), 0351 ('(a)b(c)', 'abc', SUCCEED, 'found+"-"+g1+"-"+g2', 'abc-a-c'), 0352 ('a+b+c', 'aabbabc', SUCCEED, 'found', 'abc'), 0353 ('a{1,}b{1,}c', 'aabbabc', SUCCEED, 'found', 'abc'), 0354 ('a**', '-', SYNTAX_ERROR), 0355 ('a.+?c', 'abcabc', SUCCEED, 'found', 'abc'), 0356 ('(a+|b)*', 'ab', SUCCEED, 'found+"-"+g1', 'ab-b'), 0357 ('(a+|b){0,}', 'ab', SUCCEED, 'found+"-"+g1', 'ab-b'), 0358 ('(a+|b)+', 'ab', SUCCEED, 'found+"-"+g1', 'ab-b'), 0359 ('(a+|b){1,}', 'ab', SUCCEED, 'found+"-"+g1', 'ab-b'), 0360 ('(a+|b)?', 'ab', SUCCEED, 'found+"-"+g1', 'a-a'), 0361 ('(a+|b){0,1}', 'ab', SUCCEED, 'found+"-"+g1', 'a-a'), 0362 (')(', '-', SYNTAX_ERROR), 0363 ('[^ab]*', 'cde', SUCCEED, 'found', 'cde'), 0364 ('abc', '', FAIL), 0365 ('a*', '', SUCCEED, 'found', ''), 0366 ('([abc])*d', 'abbbcd', SUCCEED, 'found+"-"+g1', 'abbbcd-c'), 0367 ('([abc])*bcd', 'abcd', SUCCEED, 'found+"-"+g1', 'abcd-a'), 0368 ('a|b|c|d|e', 'e', SUCCEED, 'found', 'e'), 0369 ('(a|b|c|d|e)f', 'ef', SUCCEED, 'found+"-"+g1', 'ef-e'), 0370 ('abcd*efg', 'abcdefg', SUCCEED, 'found', 'abcdefg'), 0371 ('ab*', 'xabyabbbz', SUCCEED, 'found', 'ab'), 0372 ('ab*', 'xayabbbz', SUCCEED, 'found', 'a'), 0373 ('(ab|cd)e', 'abcde', SUCCEED, 'found+"-"+g1', 'cde-cd'), 0374 ('[abhgefdc]ij', 'hij', SUCCEED, 'found', 'hij'), 0375 ('^(ab|cd)e', 'abcde', FAIL), 0376 ('(abc|)ef', 'abcdef', SUCCEED, 'found+"-"+g1', 'ef-'), 0377 ('(a|b)c*d', 'abcd', SUCCEED, 'found+"-"+g1', 'bcd-b'), 0378 ('(ab|ab*)bc', 'abc', SUCCEED, 'found+"-"+g1', 'abc-a'), 0379 ('a([bc]*)c*', 'abc', SUCCEED, 'found+"-"+g1', 'abc-bc'), 0380 ('a([bc]*)(c*d)', 'abcd', SUCCEED, 'found+"-"+g1+"-"+g2', 'abcd-bc-d'), 0381 ('a([bc]+)(c*d)', 'abcd', SUCCEED, 'found+"-"+g1+"-"+g2', 'abcd-bc-d'), 0382 ('a([bc]*)(c+d)', 'abcd', SUCCEED, 'found+"-"+g1+"-"+g2', 'abcd-b-cd'), 0383 ('a[bcd]*dcdcde', 'adcdcde', SUCCEED, 'found', 'adcdcde'), 0384 ('a[bcd]+dcdcde', 'adcdcde', FAIL), 0385 ('(ab|a)b*c', 'abc', SUCCEED, 'found+"-"+g1', 'abc-ab'), 0386 ('((a)(b)c)(d)', 'abcd', SUCCEED, 'g1+"-"+g2+"-"+g3+"-"+g4', 'abc-a-b-d'), 0387 ('[a-zA-Z_][a-zA-Z0-9_]*', 'alpha', SUCCEED, 'found', 'alpha'), 0388 ('^a(bc+|b[eh])g|.h$', 'abh', SUCCEED, 'found+"-"+g1', 'bh-None'), 0389 ('(bc+d$|ef*g.|h?i(j|k))', 'effgz', SUCCEED, 'found+"-"+g1+"-"+g2', 'effgz-effgz-None'), 0390 ('(bc+d$|ef*g.|h?i(j|k))', 'ij', SUCCEED, 'found+"-"+g1+"-"+g2', 'ij-ij-j'), 0391 ('(bc+d$|ef*g.|h?i(j|k))', 'effg', FAIL), 0392 ('(bc+d$|ef*g.|h?i(j|k))', 'bcdd', FAIL), 0393 ('(bc+d$|ef*g.|h?i(j|k))', 'reffgz', SUCCEED, 'found+"-"+g1+"-"+g2', 'effgz-effgz-None'), 0394 ('((((((((((a))))))))))', 'a', SUCCEED, 'g10', 'a'), 0395 ('((((((((((a))))))))))\\10', 'aa', SUCCEED, 'found', 'aa'), 0396 # Python does not have the same rules for \\41 so this is a syntax error 0397 # ('((((((((((a))))))))))\\41', 'aa', FAIL), 0398 # ('((((((((((a))))))))))\\41', 'a!', SUCCEED, 'found', 'a!'), 0399 ('((((((((((a))))))))))\\41', '', SYNTAX_ERROR), 0400 ('(?i)((((((((((a))))))))))\\41', '', SYNTAX_ERROR), 0401 ('(((((((((a)))))))))', 'a', SUCCEED, 'found', 'a'), 0402 ('multiple words of text', 'uh-uh', FAIL), 0403 ('multiple words', 'multiple words, yeah', SUCCEED, 'found', 'multiple words'), 0404 ('(.*)c(.*)', 'abcde', SUCCEED, 'found+"-"+g1+"-"+g2', 'abcde-ab-de'), 0405 ('\\((.*), (.*)\\)', '(a, b)', SUCCEED, 'g2+"-"+g1', 'b-a'), 0406 ('[k]', 'ab', FAIL), 0407 ('a[-]?c', 'ac', SUCCEED, 'found', 'ac'), 0408 ('(abc)\\1', 'abcabc', SUCCEED, 'g1', 'abc'), 0409 ('([a-c]*)\\1', 'abcabc', SUCCEED, 'g1', 'abc'), 0410 ('(?i)abc', 'ABC', SUCCEED, 'found', 'ABC'), 0411 ('(?i)abc', 'XBC', FAIL), 0412 ('(?i)abc', 'AXC', FAIL), 0413 ('(?i)abc', 'ABX', FAIL), 0414 ('(?i)abc', 'XABCY', SUCCEED, 'found', 'ABC'), 0415 ('(?i)abc', 'ABABC', SUCCEED, 'found', 'ABC'), 0416 ('(?i)ab*c', 'ABC', SUCCEED, 'found', 'ABC'), 0417 ('(?i)ab*bc', 'ABC', SUCCEED, 'found', 'ABC'), 0418 ('(?i)ab*bc', 'ABBC', SUCCEED, 'found', 'ABBC'), 0419 ('(?i)ab*?bc', 'ABBBBC', SUCCEED, 'found', 'ABBBBC'), 0420 ('(?i)ab{0,}?bc', 'ABBBBC', SUCCEED, 'found', 'ABBBBC'), 0421 ('(?i)ab+?bc', 'ABBC', SUCCEED, 'found', 'ABBC'), 0422 ('(?i)ab+bc', 'ABC', FAIL), 0423 ('(?i)ab+bc', 'ABQ', FAIL), 0424 ('(?i)ab{1,}bc', 'ABQ', FAIL), 0425 ('(?i)ab+bc', 'ABBBBC', SUCCEED, 'found', 'ABBBBC'), 0426 ('(?i)ab{1,}?bc', 'ABBBBC', SUCCEED, 'found', 'ABBBBC'), 0427 ('(?i)ab{1,3}?bc', 'ABBBBC', SUCCEED, 'found', 'ABBBBC'), 0428 ('(?i)ab{3,4}?bc', 'ABBBBC', SUCCEED, 'found', 'ABBBBC'), 0429 ('(?i)ab{4,5}?bc', 'ABBBBC', FAIL), 0430 ('(?i)ab??bc', 'ABBC', SUCCEED, 'found', 'ABBC'), 0431 ('(?i)ab??bc', 'ABC', SUCCEED, 'found', 'ABC'), 0432 ('(?i)ab{0,1}?bc', 'ABC', SUCCEED, 'found', 'ABC'), 0433 ('(?i)ab??bc', 'ABBBBC', FAIL), 0434 ('(?i)ab??c', 'ABC', SUCCEED, 'found', 'ABC'), 0435 ('(?i)ab{0,1}?c', 'ABC', SUCCEED, 'found', 'ABC'), 0436 ('(?i)^abc$', 'ABC', SUCCEED, 'found', 'ABC'), 0437 ('(?i)^abc$', 'ABCC', FAIL), 0438 ('(?i)^abc', 'ABCC', SUCCEED, 'found', 'ABC'), 0439 ('(?i)^abc$', 'AABC', FAIL), 0440 ('(?i)abc$', 'AABC', SUCCEED, 'found', 'ABC'), 0441 ('(?i)^', 'ABC', SUCCEED, 'found', ''), 0442 ('(?i)$', 'ABC', SUCCEED, 'found', ''), 0443 ('(?i)a.c', 'ABC', SUCCEED, 'found', 'ABC'), 0444 ('(?i)a.c', 'AXC', SUCCEED, 'found', 'AXC'), 0445 ('(?i)a.*?c', 'AXYZC', SUCCEED, 'found', 'AXYZC'), 0446 ('(?i)a.*c', 'AXYZD', FAIL), 0447 ('(?i)a[bc]d', 'ABC', FAIL), 0448 ('(?i)a[bc]d', 'ABD', SUCCEED, 'found', 'ABD'), 0449 ('(?i)a[b-d]e', 'ABD', FAIL), 0450 ('(?i)a[b-d]e', 'ACE', SUCCEED, 'found', 'ACE'), 0451 ('(?i)a[b-d]', 'AAC', SUCCEED, 'found', 'AC'), 0452 ('(?i)a[-b]', 'A-', SUCCEED, 'found', 'A-'), 0453 ('(?i)a[b-]', 'A-', SUCCEED, 'found', 'A-'), 0454 ('(?i)a[b-a]', '-', SYNTAX_ERROR), 0455 ('(?i)a[]b', '-', SYNTAX_ERROR), 0456 ('(?i)a[', '-', SYNTAX_ERROR), 0457 ('(?i)a]', 'A]', SUCCEED, 'found', 'A]'), 0458 ('(?i)a[]]b', 'A]B', SUCCEED, 'found', 'A]B'), 0459 ('(?i)a[^bc]d', 'AED', SUCCEED, 'found', 'AED'), 0460 ('(?i)a[^bc]d', 'ABD', FAIL), 0461 ('(?i)a[^-b]c', 'ADC', SUCCEED, 'found', 'ADC'), 0462 ('(?i)a[^-b]c', 'A-C', FAIL), 0463 ('(?i)a[^]b]c', 'A]C', FAIL), 0464 ('(?i)a[^]b]c', 'ADC', SUCCEED, 'found', 'ADC'), 0465 ('(?i)ab|cd', 'ABC', SUCCEED, 'found', 'AB'), 0466 ('(?i)ab|cd', 'ABCD', SUCCEED, 'found', 'AB'), 0467 ('(?i)()ef', 'DEF', SUCCEED, 'found+"-"+g1', 'EF-'), 0468 ('(?i)*a', '-', SYNTAX_ERROR), 0469 ('(?i)(*)b', '-', SYNTAX_ERROR), 0470 ('(?i)$b', 'B', FAIL), 0471 ('(?i)a\\', '-', SYNTAX_ERROR), 0472 ('(?i)a\\(b', 'A(B', SUCCEED, 'found+"-"+g1', 'A(B-Error'), 0473 ('(?i)a\\(*b', 'AB', SUCCEED, 'found', 'AB'), 0474 ('(?i)a\\(*b', 'A((B', SUCCEED, 'found', 'A((B'), 0475 ('(?i)a\\\\b', 'A\\B', SUCCEED, 'found', 'A\\B'), 0476 ('(?i)abc)', '-', SYNTAX_ERROR), 0477 ('(?i)(abc', '-', SYNTAX_ERROR), 0478 ('(?i)((a))', 'ABC', SUCCEED, 'found+"-"+g1+"-"+g2', 'A-A-A'), 0479 ('(?i)(a)b(c)', 'ABC', SUCCEED, 'found+"-"+g1+"-"+g2', 'ABC-A-C'), 0480 ('(?i)a+b+c', 'AABBABC', SUCCEED, 'found', 'ABC'), 0481 ('(?i)a{1,}b{1,}c', 'AABBABC', SUCCEED, 'found', 'ABC'), 0482 ('(?i)a**', '-', SYNTAX_ERROR), 0483 ('(?i)a.+?c', 'ABCABC', SUCCEED, 'found', 'ABC'), 0484 ('(?i)a.*?c', 'ABCABC', SUCCEED, 'found', 'ABC'), 0485 ('(?i)a.{0,5}?c', 'ABCABC', SUCCEED, 'found', 'ABC'), 0486 ('(?i)(a+|b)*', 'AB', SUCCEED, 'found+"-"+g1', 'AB-B'), 0487 ('(?i)(a+|b){0,}', 'AB', SUCCEED, 'found+"-"+g1', 'AB-B'), 0488 ('(?i)(a+|b)+', 'AB', SUCCEED, 'found+"-"+g1', 'AB-B'), 0489 ('(?i)(a+|b){1,}', 'AB', SUCCEED, 'found+"-"+g1', 'AB-B'), 0490 ('(?i)(a+|b)?', 'AB', SUCCEED, 'found+"-"+g1', 'A-A'), 0491 ('(?i)(a+|b){0,1}', 'AB', SUCCEED, 'found+"-"+g1', 'A-A'), 0492 ('(?i)(a+|b){0,1}?', 'AB', SUCCEED, 'found+"-"+g1', '-None'), 0493 ('(?i))(', '-', SYNTAX_ERROR), 0494 ('(?i)[^ab]*', 'CDE', SUCCEED, 'found', 'CDE'), 0495 ('(?i)abc', '', FAIL), 0496 ('(?i)a*', '', SUCCEED, 'found', ''), 0497 ('(?i)([abc])*d', 'ABBBCD', SUCCEED, 'found+"-"+g1', 'ABBBCD-C'), 0498 ('(?i)([abc])*bcd', 'ABCD', SUCCEED, 'found+"-"+g1', 'ABCD-A'), 0499 ('(?i)a|b|c|d|e', 'E', SUCCEED, 'found', 'E'), 0500 ('(?i)(a|b|c|d|e)f', 'EF', SUCCEED, 'found+"-"+g1', 'EF-E'), 0501 ('(?i)abcd*efg', 'ABCDEFG', SUCCEED, 'found', 'ABCDEFG'), 0502 ('(?i)ab*', 'XABYABBBZ', SUCCEED, 'found', 'AB'), 0503 ('(?i)ab*', 'XAYABBBZ', SUCCEED, 'found', 'A'), 0504 ('(?i)(ab|cd)e', 'ABCDE', SUCCEED, 'found+"-"+g1', 'CDE-CD'), 0505 ('(?i)[abhgefdc]ij', 'HIJ', SUCCEED, 'found', 'HIJ'), 0506 ('(?i)^(ab|cd)e', 'ABCDE', FAIL), 0507 ('(?i)(abc|)ef', 'ABCDEF', SUCCEED, 'found+"-"+g1', 'EF-'), 0508 ('(?i)(a|b)c*d', 'ABCD', SUCCEED, 'found+"-"+g1', 'BCD-B'), 0509 ('(?i)(ab|ab*)bc', 'ABC', SUCCEED, 'found+"-"+g1', 'ABC-A'), 0510 ('(?i)a([bc]*)c*', 'ABC', SUCCEED, 'found+"-"+g1', 'ABC-BC'), 0511 ('(?i)a([bc]*)(c*d)', 'ABCD', SUCCEED, 'found+"-"+g1+"-"+g2', 'ABCD-BC-D'), 0512 ('(?i)a([bc]+)(c*d)', 'ABCD', SUCCEED, 'found+"-"+g1+"-"+g2', 'ABCD-BC-D'), 0513 ('(?i)a([bc]*)(c+d)', 'ABCD', SUCCEED, 'found+"-"+g1+"-"+g2', 'ABCD-B-CD'), 0514 ('(?i)a[bcd]*dcdcde', 'ADCDCDE', SUCCEED, 'found', 'ADCDCDE'), 0515 ('(?i)a[bcd]+dcdcde', 'ADCDCDE', FAIL), 0516 ('(?i)(ab|a)b*c', 'ABC', SUCCEED, 'found+"-"+g1', 'ABC-AB'), 0517 ('(?i)((a)(b)c)(d)', 'ABCD', SUCCEED, 'g1+"-"+g2+"-"+g3+"-"+g4', 'ABC-A-B-D'), 0518 ('(?i)[a-zA-Z_][a-zA-Z0-9_]*', 'ALPHA', SUCCEED, 'found', 'ALPHA'), 0519 ('(?i)^a(bc+|b[eh])g|.h$', 'ABH', SUCCEED, 'found+"-"+g1', 'BH-None'), 0520 ('(?i)(bc+d$|ef*g.|h?i(j|k))', 'EFFGZ', SUCCEED, 'found+"-"+g1+"-"+g2', 'EFFGZ-EFFGZ-None'), 0521 ('(?i)(bc+d$|ef*g.|h?i(j|k))', 'IJ', SUCCEED, 'found+"-"+g1+"-"+g2', 'IJ-IJ-J'), 0522 ('(?i)(bc+d$|ef*g.|h?i(j|k))', 'EFFG', FAIL), 0523 ('(?i)(bc+d$|ef*g.|h?i(j|k))', 'BCDD', FAIL), 0524 ('(?i)(bc+d$|ef*g.|h?i(j|k))', 'REFFGZ', SUCCEED, 'found+"-"+g1+"-"+g2', 'EFFGZ-EFFGZ-None'), 0525 ('(?i)((((((((((a))))))))))', 'A', SUCCEED, 'g10', 'A'), 0526 ('(?i)((((((((((a))))))))))\\10', 'AA', SUCCEED, 'found', 'AA'), 0527 #('(?i)((((((((((a))))))))))\\41', 'AA', FAIL), 0528 #('(?i)((((((((((a))))))))))\\41', 'A!', SUCCEED, 'found', 'A!'), 0529 ('(?i)(((((((((a)))))))))', 'A', SUCCEED, 'found', 'A'), 0530 ('(?i)(?:(?:(?:(?:(?:(?:(?:(?:(?:(a))))))))))', 'A', SUCCEED, 'g1', 'A'), 0531 ('(?i)(?:(?:(?:(?:(?:(?:(?:(?:(?:(a|b|c))))))))))', 'C', SUCCEED, 'g1', 'C'), 0532 ('(?i)multiple words of text', 'UH-UH', FAIL), 0533 ('(?i)multiple words', 'MULTIPLE WORDS, YEAH', SUCCEED, 'found', 'MULTIPLE WORDS'), 0534 ('(?i)(.*)c(.*)', 'ABCDE', SUCCEED, 'found+"-"+g1+"-"+g2', 'ABCDE-AB-DE'), 0535 ('(?i)\\((.*), (.*)\\)', '(A, B)', SUCCEED, 'g2+"-"+g1', 'B-A'), 0536 ('(?i)[k]', 'AB', FAIL), 0537 # ('(?i)abcd', 'ABCD', SUCCEED, 'found+"-"+\\found+"-"+\\\\found', 'ABCD-$&-\\ABCD'), 0538 # ('(?i)a(bc)d', 'ABCD', SUCCEED, 'g1+"-"+\\g1+"-"+\\\\g1', 'BC-$1-\\BC'), 0539 ('(?i)a[-]?c', 'AC', SUCCEED, 'found', 'AC'), 0540 ('(?i)(abc)\\1', 'ABCABC', SUCCEED, 'g1', 'ABC'), 0541 ('(?i)([a-c]*)\\1', 'ABCABC', SUCCEED, 'g1', 'ABC'), 0542 ('a(?!b).', 'abad', SUCCEED, 'found', 'ad'), 0543 ('a(?=d).', 'abad', SUCCEED, 'found', 'ad'), 0544 ('a(?=c|d).', 'abad', SUCCEED, 'found', 'ad'), 0545 ('a(?:b|c|d)(.)', 'ace', SUCCEED, 'g1', 'e'), 0546 ('a(?:b|c|d)*(.)', 'ace', SUCCEED, 'g1', 'e'), 0547 ('a(?:b|c|d)+?(.)', 'ace', SUCCEED, 'g1', 'e'), 0548 ('a(?:b|(c|e){1,2}?|d)+?(.)', 'ace', SUCCEED, 'g1 + g2', 'ce'), 0549 ('^(.+)?B', 'AB', SUCCEED, 'g1', 'A'), 0550 0551 # lookbehind: split by : but not if it is escaped by -. 0552 ('(?<!-):(.*?)(?<!-):', 'a:bc-:de:f', SUCCEED, 'g1', 'bc-:de' ), 0553 # escaping with \ as we know it 0554 ('(?<!\\\):(.*?)(?<!\\\):', 'a:bc\\:de:f', SUCCEED, 'g1', 'bc\\:de' ), 0555 # terminating with ' and escaping with ? as in edifact 0556 ("(?<!\\?)'(.*?)(?<!\\?)'", "a'bc?'de'f", SUCCEED, 'g1', "bc?'de" ), 0557 0558 # Comments using the (?#...) syntax 0559 0560 ('w(?# comment', 'w', SYNTAX_ERROR), 0561 ('w(?# comment 1)xy(?# comment 2)z', 'wxyz', SUCCEED, 'found', 'wxyz'), 0562 0563 # Check odd placement of embedded pattern modifiers 0564 0565 # not an error under PCRE/PRE: 0566 ('w(?i)', 'W', SUCCEED, 'found', 'W'), 0567 # ('w(?i)', 'W', SYNTAX_ERROR), 0568 0569 # Comments using the x embedded pattern modifier 0570 0571 ("""(?x)w# comment 1 0572 x y 0573 # comment 2 0574 z""", 'wxyz', SUCCEED, 'found', 'wxyz'), 0575 0576 # using the m embedded pattern modifier 0577 0578 ('^abc', """jkl 0579 abc 0580 xyz""", FAIL), 0581 ('(?m)^abc', """jkl 0582 abc 0583 xyz""", SUCCEED, 'found', 'abc'), 0584 0585 ('(?m)abc$', """jkl 0586 xyzabc 0587 123""", SUCCEED, 'found', 'abc'), 0588 0589 # using the s embedded pattern modifier 0590 0591 ('a.b', 'a\nb', FAIL), 0592 ('(?s)a.b', 'a\nb', SUCCEED, 'found', 'a\nb'), 0593 0594 # test \w, etc. both inside and outside character classes 0595 0596 ('\\w+', '--ab_cd0123--', SUCCEED, 'found', 'ab_cd0123'), 0597 ('[\\w]+', '--ab_cd0123--', SUCCEED, 'found', 'ab_cd0123'), 0598 ('\\D+', '1234abc5678', SUCCEED, 'found', 'abc'), 0599 ('[\\D]+', '1234abc5678', SUCCEED, 'found', 'abc'), 0600 ('[\\da-fA-F]+', '123abc', SUCCEED, 'found', '123abc'), 0601 # not an error under PCRE/PRE: 0602 # ('[\\d-x]', '-', SYNTAX_ERROR), 0603 (r'([\s]*)([\S]*)([\s]*)', ' testing!1972', SUCCEED, 'g3+g2+g1', 'testing!1972 '), 0604 (r'(\s*)(\S*)(\s*)', ' testing!1972', SUCCEED, 'g3+g2+g1', 'testing!1972 '), 0605 0606 (r'\xff', '\377', SUCCEED, 'found', chr(255)), 0607 # new \x semantics 0608 (r'\x00ff', '\377', FAIL), 0609 # (r'\x00ff', '\377', SUCCEED, 'found', chr(255)), 0610 (r'\t\n\v\r\f\a\g', '\t\n\v\r\f\ag', SUCCEED, 'found', '\t\n\v\r\f\ag'), 0611 ('\t\n\v\r\f\a\g', '\t\n\v\r\f\ag', SUCCEED, 'found', '\t\n\v\r\f\ag'), 0612 (r'\t\n\v\r\f\a', '\t\n\v\r\f\a', SUCCEED, 'found', chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7)), 0613 (r'[\t][\n][\v][\r][\f][\b]', '\t\n\v\r\f\b', SUCCEED, 'found', '\t\n\v\r\f\b'), 0614 0615 # 0616 # post-1.5.2 additions 0617 0618 # xmllib problem 0619 (r'(([a-z]+):)?([a-z]+)$', 'smil', SUCCEED, 'g1+"-"+g2+"-"+g3', 'None-None-smil'), 0620 # bug 110866: reference to undefined group 0621 (r'((.)\1+)', '', SYNTAX_ERROR), 0622 # bug 111869: search (PRE/PCRE fails on this one, SRE doesn't) 0623 (r'.*d', 'abc\nabd', SUCCEED, 'found', 'abd'), 0624 # bug 112468: various expected syntax errors 0625 (r'(', '', SYNTAX_ERROR), 0626 (r'[\41]', '!', SUCCEED, 'found', '!'), 0627 # bug 114033: nothing to repeat 0628 (r'(x?)?', 'x', SUCCEED, 'found', 'x'), 0629 # bug 115040: rescan if flags are modified inside pattern 0630 (r' (?x)foo ', 'foo', SUCCEED, 'found', 'foo'), 0631 # bug 115618: negative lookahead 0632 (r'(?<!abc)(d.f)', 'abcdefdof', SUCCEED, 'found', 'dof'), 0633 # bug 116251: character class bug 0634 (r'[\w-]+', 'laser_beam', SUCCEED, 'found', 'laser_beam'), 0635 # bug 123769+127259: non-greedy backtracking bug 0636 (r'.*?\S *:', 'xx:', SUCCEED, 'found', 'xx:'), 0637 (r'a[ ]*?\ (\d+).*', 'a 10', SUCCEED, 'found', 'a 10'), 0638 (r'a[ ]*?\ (\d+).*', 'a 10', SUCCEED, 'found', 'a 10'), 0639 # bug 127259: \Z shouldn't depend on multiline mode 0640 (r'(?ms).*?x\s*\Z(.*)','xx\nx\n', SUCCEED, 'g1', ''), 0641 # bug 128899: uppercase literals under the ignorecase flag 0642 (r'(?i)M+', 'MMM', SUCCEED, 'found', 'MMM'), 0643 (r'(?i)m+', 'MMM', SUCCEED, 'found', 'MMM'), 0644 (r'(?i)[M]+', 'MMM', SUCCEED, 'found', 'MMM'), 0645 (r'(?i)[m]+', 'MMM', SUCCEED, 'found', 'MMM'), 0646 # bug 130748: ^* should be an error (nothing to repeat) 0647 (r'^*', '', SYNTAX_ERROR), 0648 # bug 133283: minimizing repeat problem 0649 (r'"(?:\\"|[^"])*?"', r'"\""', SUCCEED, 'found', r'"\""'), 0650 # bug 477728: minimizing repeat problem 0651 (r'^.*?$', 'one\ntwo\nthree\n', FAIL), 0652 # bug 483789: minimizing repeat problem 0653 (r'a[^>]*?b', 'a>b', FAIL), 0654 # bug 490573: minimizing repeat problem 0655 (r'^a*?$', 'foo', FAIL), 0656 # bug 470582: nested groups problem 0657 (r'^((a)c)?(ab)$', 'ab', SUCCEED, 'g1+"-"+g2+"-"+g3', 'None-None-ab'), 0658 # another minimizing repeat problem (capturing groups in assertions) 0659 ('^([ab]*?)(?=(b)?)c', 'abc', SUCCEED, 'g1+"-"+g2', 'ab-None'), 0660 ('^([ab]*?)(?!(b))c', 'abc', SUCCEED, 'g1+"-"+g2', 'ab-None'), 0661 ('^([ab]*?)(?<!(a))c', 'abc', SUCCEED, 'g1+"-"+g2', 'ab-None'), 0662 ] 0663 0664 try: 0665 u = eval("u'\N{LATIN CAPITAL LETTER A WITH DIAERESIS}'") 0666 except SyntaxError: 0667 pass 0668 else: 0669 tests.extend([ 0670 # bug 410271: \b broken under locales 0671 (r'\b.\b', 'a', SUCCEED, 'found', 'a'), 0672 (r'(?u)\b.\b', u, SUCCEED, 'found', u), 0673 (r'(?u)\w', u, SUCCEED, 'found', u), 0674 ]) 0675
Generated by PyXR 0.9.4