0001 # Regex test suite and benchmark suite v1.5a2 0002 # Due to the use of r"aw" strings, this file will 0003 # only work with Python 1.5 or higher. 0004 0005 # The 3 possible outcomes for each pattern 0006 [SUCCEED, FAIL, SYNTAX_ERROR] = range(3) 0007 0008 # Benchmark suite (needs expansion) 0009 # 0010 # The benchmark suite does not test correctness, just speed. The 0011 # first element of each tuple is the regex pattern; the second is a 0012 # string to match it against. The benchmarking code will embed the 0013 # second string inside several sizes of padding, to test how regex 0014 # matching performs on large strings. 0015 0016 benchmarks = [ 0017 ('Python', 'Python'), # Simple text literal 0018 ('.*Python', 'Python'), # Bad text literal 0019 ('.*Python.*', 'Python'), # Worse text literal 0020 ('.*\\(Python\\)', 'Python'), # Bad text literal with grouping 0021 0022 ('(Python\\|Perl\\|Tcl', 'Perl'), # Alternation 0023 ('\\(Python\\|Perl\\|Tcl\\)', 'Perl'), # Grouped alternation 0024 ('\\(Python\\)\\1', 'PythonPython'), # Backreference 0025 # ('\\([0a-z][a-z]*,\\)+', 'a5,b7,c9,'), # Disable the fastmap optimization 0026 ('\\([a-z][a-z0-9]*,\\)+', 'a5,b7,c9,') # A few sets 0027 ] 0028 0029 # Test suite (for verifying correctness) 0030 # 0031 # The test suite is a list of 5- or 3-tuples. The 5 parts of a 0032 # complete tuple are: 0033 # element 0: a string containing the pattern 0034 # 1: the string to match against the pattern 0035 # 2: the expected result (SUCCEED, FAIL, SYNTAX_ERROR) 0036 # 3: a string that will be eval()'ed to produce a test string. 0037 # This is an arbitrary Python expression; the available 0038 # variables are "found" (the whole match), and "g1", "g2", ... 0039 # up to "g10" contain the contents of each group, or the 0040 # string 'None' if the group wasn't given a value. 0041 # 4: The expected result of evaluating the expression. 0042 # If the two don't match, an error is reported. 0043 # 0044 # If the regex isn't expected to work, the latter two elements can be omitted. 0045 0046 tests = [ 0047 ('abc', 'abc', SUCCEED, 0048 'found', 'abc'), 0049 ('abc', 'xbc', FAIL), 0050 ('abc', 'axc', FAIL), 0051 ('abc', 'abx', FAIL), 0052 ('abc', 'xabcy', SUCCEED, 0053 'found', 'abc'), 0054 ('abc', 'ababc', SUCCEED, 0055 'found', 'abc'), 0056 ('ab*c', 'abc', SUCCEED, 0057 'found', 'abc'), 0058 ('ab*bc', 'abc', SUCCEED, 0059 'found', 'abc'), 0060 ('ab*bc', 'abbc', SUCCEED, 0061 'found', 'abbc'), 0062 ('ab*bc', 'abbbbc', SUCCEED, 0063 'found', 'abbbbc'), 0064 ('ab+bc', 'abbc', SUCCEED, 0065 'found', 'abbc'), 0066 ('ab+bc', 'abc', FAIL), 0067 ('ab+bc', 'abq', FAIL), 0068 ('ab+bc', 'abbbbc', SUCCEED, 0069 'found', 'abbbbc'), 0070 ('ab?bc', 'abbc', SUCCEED, 0071 'found', 'abbc'), 0072 ('ab?bc', 'abc', SUCCEED, 0073 'found', 'abc'), 0074 ('ab?bc', 'abbbbc', FAIL), 0075 ('ab?c', 'abc', SUCCEED, 0076 'found', 'abc'), 0077 ('^abc$', 'abc', SUCCEED, 0078 'found', 'abc'), 0079 ('^abc$', 'abcc', FAIL), 0080 ('^abc', 'abcc', SUCCEED, 0081 'found', 'abc'), 0082 ('^abc$', 'aabc', FAIL), 0083 ('abc$', 'aabc', SUCCEED, 0084 'found', 'abc'), 0085 ('^', 'abc', SUCCEED, 0086 'found+"-"', '-'), 0087 ('$', 'abc', SUCCEED, 0088 'found+"-"', '-'), 0089 ('a.c', 'abc', SUCCEED, 0090 'found', 'abc'), 0091 ('a.c', 'axc', SUCCEED, 0092 'found', 'axc'), 0093 ('a.*c', 'axyzc', SUCCEED, 0094 'found', 'axyzc'), 0095 ('a.*c', 'axyzd', FAIL), 0096 ('a[bc]d', 'abc', FAIL), 0097 ('a[bc]d', 'abd', SUCCEED, 0098 'found', 'abd'), 0099 ('a[b-d]e', 'abd', FAIL), 0100 ('a[b-d]e', 'ace', SUCCEED, 0101 'found', 'ace'), 0102 ('a[b-d]', 'aac', SUCCEED, 0103 'found', 'ac'), 0104 ('a[-b]', 'a-', SUCCEED, 0105 'found', 'a-'), 0106 ('a[b-]', 'a-', SUCCEED, 0107 'found', 'a-'), 0108 ('a[]b', '-', SYNTAX_ERROR), 0109 ('a[', '-', SYNTAX_ERROR), 0110 ('a\\', '-', SYNTAX_ERROR), 0111 ('abc\\)', '-', SYNTAX_ERROR), 0112 ('\\(abc', '-', SYNTAX_ERROR), 0113 ('a]', 'a]', SUCCEED, 0114 'found', 'a]'), 0115 ('a[]]b', 'a]b', SUCCEED, 0116 'found', 'a]b'), 0117 ('a[^bc]d', 'aed', SUCCEED, 0118 'found', 'aed'), 0119 ('a[^bc]d', 'abd', FAIL), 0120 ('a[^-b]c', 'adc', SUCCEED, 0121 'found', 'adc'), 0122 ('a[^-b]c', 'a-c', FAIL), 0123 ('a[^]b]c', 'a]c', FAIL), 0124 ('a[^]b]c', 'adc', SUCCEED, 0125 'found', 'adc'), 0126 ('\\ba\\b', 'a-', SUCCEED, 0127 '"-"', '-'), 0128 ('\\ba\\b', '-a', SUCCEED, 0129 '"-"', '-'), 0130 ('\\ba\\b', '-a-', SUCCEED, 0131 '"-"', '-'), 0132 ('\\by\\b', 'xy', FAIL), 0133 ('\\by\\b', 'yz', FAIL), 0134 ('\\by\\b', 'xyz', FAIL), 0135 ('ab\\|cd', 'abc', SUCCEED, 0136 'found', 'ab'), 0137 ('ab\\|cd', 'abcd', SUCCEED, 0138 'found', 'ab'), 0139 ('\\(\\)ef', 'def', SUCCEED, 0140 'found+"-"+g1', 'ef-'), 0141 ('$b', 'b', FAIL), 0142 ('a(b', 'a(b', SUCCEED, 0143 'found+"-"+g1', 'a(b-None'), 0144 ('a(*b', 'ab', SUCCEED, 0145 'found', 'ab'), 0146 ('a(*b', 'a((b', SUCCEED, 0147 'found', 'a((b'), 0148 ('a\\\\b', 'a\\b', SUCCEED, 0149 'found', 'a\\b'), 0150 ('\\(\\(a\\)\\)', 'abc', SUCCEED, 0151 'found+"-"+g1+"-"+g2', 'a-a-a'), 0152 ('\\(a\\)b\\(c\\)', 'abc', SUCCEED, 0153 'found+"-"+g1+"-"+g2', 'abc-a-c'), 0154 ('a+b+c', 'aabbabc', SUCCEED, 0155 'found', 'abc'), 0156 ('\\(a+\\|b\\)*', 'ab', SUCCEED, 0157 'found+"-"+g1', 'ab-b'), 0158 ('\\(a+\\|b\\)+', 'ab', SUCCEED, 0159 'found+"-"+g1', 'ab-b'), 0160 ('\\(a+\\|b\\)?', 'ab', SUCCEED, 0161 'found+"-"+g1', 'a-a'), 0162 ('\\)\\(', '-', SYNTAX_ERROR), 0163 ('[^ab]*', 'cde', SUCCEED, 0164 'found', 'cde'), 0165 ('abc', '', FAIL), 0166 ('a*', '', SUCCEED, 0167 'found', ''), 0168 ('a\\|b\\|c\\|d\\|e', 'e', SUCCEED, 0169 'found', 'e'), 0170 ('\\(a\\|b\\|c\\|d\\|e\\)f', 'ef', SUCCEED, 0171 'found+"-"+g1', 'ef-e'), 0172 ('abcd*efg', 'abcdefg', SUCCEED, 0173 'found', 'abcdefg'), 0174 ('ab*', 'xabyabbbz', SUCCEED, 0175 'found', 'ab'), 0176 ('ab*', 'xayabbbz', SUCCEED, 0177 'found', 'a'), 0178 ('\\(ab\\|cd\\)e', 'abcde', SUCCEED, 0179 'found+"-"+g1', 'cde-cd'), 0180 ('[abhgefdc]ij', 'hij', SUCCEED, 0181 'found', 'hij'), 0182 ('^\\(ab\\|cd\\)e', 'abcde', FAIL, 0183 'xg1y', 'xy'), 0184 ('\\(abc\\|\\)ef', 'abcdef', SUCCEED, 0185 'found+"-"+g1', 'ef-'), 0186 ('\\(a\\|b\\)c*d', 'abcd', SUCCEED, 0187 'found+"-"+g1', 'bcd-b'), 0188 ('\\(ab\\|ab*\\)bc', 'abc', SUCCEED, 0189 'found+"-"+g1', 'abc-a'), 0190 ('a\\([bc]*\\)c*', 'abc', SUCCEED, 0191 'found+"-"+g1', 'abc-bc'), 0192 ('a\\([bc]*\\)\\(c*d\\)', 'abcd', SUCCEED, 0193 'found+"-"+g1+"-"+g2', 'abcd-bc-d'), 0194 ('a\\([bc]+\\)\\(c*d\\)', 'abcd', SUCCEED, 0195 'found+"-"+g1+"-"+g2', 'abcd-bc-d'), 0196 ('a\\([bc]*\\)\\(c+d\\)', 'abcd', SUCCEED, 0197 'found+"-"+g1+"-"+g2', 'abcd-b-cd'), 0198 ('a[bcd]*dcdcde', 'adcdcde', SUCCEED, 0199 'found', 'adcdcde'), 0200 ('a[bcd]+dcdcde', 'adcdcde', FAIL), 0201 ('\\(ab\\|a\\)b*c', 'abc', SUCCEED, 0202 'found+"-"+g1', 'abc-ab'), 0203 ('\\(\\(a\\)\\(b\\)c\\)\\(d\\)', 'abcd', SUCCEED, 0204 'g1+"-"+g2+"-"+g3+"-"+g4', 'abc-a-b-d'), 0205 ('[a-zA-Z_][a-zA-Z0-9_]*', 'alpha', SUCCEED, 0206 'found', 'alpha'), 0207 ('^a\\(bc+\\|b[eh]\\)g\\|.h$', 'abh', SUCCEED, 0208 'found+"-"+g1', 'bh-None'), 0209 ('\\(bc+d$\\|ef*g.\\|h?i\\(j\\|k\\)\\)', 'effgz', SUCCEED, 0210 'found+"-"+g1+"-"+g2', 'effgz-effgz-None'), 0211 ('\\(bc+d$\\|ef*g.\\|h?i\\(j\\|k\\)\\)', 'ij', SUCCEED, 0212 'found+"-"+g1+"-"+g2', 'ij-ij-j'), 0213 ('\\(bc+d$\\|ef*g.\\|h?i\\(j\\|k\\)\\)', 'effg', FAIL), 0214 ('\\(bc+d$\\|ef*g.\\|h?i\\(j\\|k\\)\\)', 'bcdd', FAIL), 0215 ('\\(bc+d$\\|ef*g.\\|h?i\\(j\\|k\\)\\)', 'reffgz', SUCCEED, 0216 'found+"-"+g1+"-"+g2', 'effgz-effgz-None'), 0217 ('\\(\\(\\(\\(\\(\\(\\(\\(\\(a\\)\\)\\)\\)\\)\\)\\)\\)\\)', 'a', SUCCEED, 0218 'found', 'a'), 0219 ('multiple words of text', 'uh-uh', FAIL), 0220 ('multiple words', 'multiple words, yeah', SUCCEED, 0221 'found', 'multiple words'), 0222 ('\\(.*\\)c\\(.*\\)', 'abcde', SUCCEED, 0223 'found+"-"+g1+"-"+g2', 'abcde-ab-de'), 0224 ('(\\(.*\\), \\(.*\\))', '(a, b)', SUCCEED, 0225 'g2+"-"+g1', 'b-a'), 0226 ('[k]', 'ab', FAIL), 0227 ('a[-]?c', 'ac', SUCCEED, 0228 'found', 'ac'), 0229 ('\\(abc\\)\\1', 'abcabc', SUCCEED, 0230 'g1', 'abc'), 0231 ('\\([a-c]*\\)\\1', 'abcabc', SUCCEED, 0232 'g1', 'abc'), 0233 ('^\\(.+\\)?B', 'AB', SUCCEED, 0234 'g1', 'A'), 0235 ('\\(a+\\).\\1$', 'aaaaa', SUCCEED, 0236 'found+"-"+g1', 'aaaaa-aa'), 0237 ('^\\(a+\\).\\1$', 'aaaa', FAIL), 0238 ('\\(abc\\)\\1', 'abcabc', SUCCEED, 0239 'found+"-"+g1', 'abcabc-abc'), 0240 ('\\([a-c]+\\)\\1', 'abcabc', SUCCEED, 0241 'found+"-"+g1', 'abcabc-abc'), 0242 ('\\(a\\)\\1', 'aa', SUCCEED, 0243 'found+"-"+g1', 'aa-a'), 0244 ('\\(a+\\)\\1', 'aa', SUCCEED, 0245 'found+"-"+g1', 'aa-a'), 0246 ('\\(a+\\)+\\1', 'aa', SUCCEED, 0247 'found+"-"+g1', 'aa-a'), 0248 ('\\(a\\).+\\1', 'aba', SUCCEED, 0249 'found+"-"+g1', 'aba-a'), 0250 ('\\(a\\)ba*\\1', 'aba', SUCCEED, 0251 'found+"-"+g1', 'aba-a'), 0252 ('\\(aa\\|a\\)a\\1$', 'aaa', SUCCEED, 0253 'found+"-"+g1', 'aaa-a'), 0254 ('\\(a\\|aa\\)a\\1$', 'aaa', SUCCEED, 0255 'found+"-"+g1', 'aaa-a'), 0256 ('\\(a+\\)a\\1$', 'aaa', SUCCEED, 0257 'found+"-"+g1', 'aaa-a'), 0258 ('\\([abc]*\\)\\1', 'abcabc', SUCCEED, 0259 'found+"-"+g1', 'abcabc-abc'), 0260 ('\\(a\\)\\(b\\)c\\|ab', 'ab', SUCCEED, 0261 'found+"-"+g1+"-"+g2', 'ab-None-None'), 0262 ('\\(a\\)+x', 'aaax', SUCCEED, 0263 'found+"-"+g1', 'aaax-a'), 0264 ('\\([ac]\\)+x', 'aacx', SUCCEED, 0265 'found+"-"+g1', 'aacx-c'), 0266 ('\\([^/]*/\\)*sub1/', 'd:msgs/tdir/sub1/trial/away.cpp', SUCCEED, 0267 'found+"-"+g1', 'd:msgs/tdir/sub1/-tdir/'), 0268 ('\\([^.]*\\)\\.\\([^:]*\\):[T ]+\\(.*\\)', 'track1.title:TBlah blah blah', SUCCEED, 0269 'found+"-"+g1+"-"+g2+"-"+g3', 'track1.title:TBlah blah blah-track1-title-Blah blah blah'), 0270 ('\\([^N]*N\\)+', 'abNNxyzN', SUCCEED, 0271 'found+"-"+g1', 'abNNxyzN-xyzN'), 0272 ('\\([^N]*N\\)+', 'abNNxyz', SUCCEED, 0273 'found+"-"+g1', 'abNN-N'), 0274 ('\\([abc]*\\)x', 'abcx', SUCCEED, 0275 'found+"-"+g1', 'abcx-abc'), 0276 ('\\([abc]*\\)x', 'abc', FAIL), 0277 ('\\([xyz]*\\)x', 'abcx', SUCCEED, 0278 'found+"-"+g1', 'x-'), 0279 ('\\(a\\)+b\\|aac', 'aac', SUCCEED, 0280 'found+"-"+g1', 'aac-None'), 0281 ('\<a', 'a', SUCCEED, 'found', 'a'), 0282 ('\<a', '!', FAIL), 0283 ('a\<b', 'ab', FAIL), 0284 ('a\>', 'ab', FAIL), 0285 ('a\>', 'a!', SUCCEED, 'found', 'a'), 0286 ('a\>', 'a', SUCCEED, 'found', 'a'), 0287 ] 0288
Generated by PyXR 0.9.4