0001 import sys 0002 sys.path = ['.'] + sys.path 0003 0004 from test.test_support import verbose, run_unittest 0005 import re 0006 from sre import Scanner 0007 import sys, os, traceback 0008 from weakref import proxy 0009 0010 # Misc tests from Tim Peters' re.doc 0011 0012 # WARNING: Don't change details in these tests if you don't know 0013 # what you're doing. Some of these tests were carefuly modeled to 0014 # cover most of the code. 0015 0016 import unittest 0017 0018 class ReTests(unittest.TestCase): 0019 0020 def test_weakref(self): 0021 s = 'QabbbcR' 0022 x = re.compile('ab+c') 0023 y = proxy(x) 0024 self.assertEqual(x.findall('QabbbcR'), y.findall('QabbbcR')) 0025 0026 def test_search_star_plus(self): 0027 self.assertEqual(re.search('x*', 'axx').span(0), (0, 0)) 0028 self.assertEqual(re.search('x*', 'axx').span(), (0, 0)) 0029 self.assertEqual(re.search('x+', 'axx').span(0), (1, 3)) 0030 self.assertEqual(re.search('x+', 'axx').span(), (1, 3)) 0031 self.assertEqual(re.search('x', 'aaa'), None) 0032 self.assertEqual(re.match('a*', 'xxx').span(0), (0, 0)) 0033 self.assertEqual(re.match('a*', 'xxx').span(), (0, 0)) 0034 self.assertEqual(re.match('x*', 'xxxa').span(0), (0, 3)) 0035 self.assertEqual(re.match('x*', 'xxxa').span(), (0, 3)) 0036 self.assertEqual(re.match('a+', 'xxx'), None) 0037 0038 def bump_num(self, matchobj): 0039 int_value = int(matchobj.group(0)) 0040 return str(int_value + 1) 0041 0042 def test_basic_re_sub(self): 0043 self.assertEqual(re.sub("(?i)b+", "x", "bbbb BBBB"), 'x x') 0044 self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y'), 0045 '9.3 -3 24x100y') 0046 self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y', 3), 0047 '9.3 -3 23x99y') 0048 0049 self.assertEqual(re.sub('.', lambda m: r"\n", 'x'), '\\n') 0050 self.assertEqual(re.sub('.', r"\n", 'x'), '\n') 0051 0052 s = r"\1\1" 0053 self.assertEqual(re.sub('(.)', s, 'x'), 'xx') 0054 self.assertEqual(re.sub('(.)', re.escape(s), 'x'), s) 0055 self.assertEqual(re.sub('(.)', lambda m: s, 'x'), s) 0056 0057 self.assertEqual(re.sub('(?P<a>x)', '\g<a>\g<a>', 'xx'), 'xxxx') 0058 self.assertEqual(re.sub('(?P<a>x)', '\g<a>\g<1>', 'xx'), 'xxxx') 0059 self.assertEqual(re.sub('(?P<unk>x)', '\g<unk>\g<unk>', 'xx'), 'xxxx') 0060 self.assertEqual(re.sub('(?P<unk>x)', '\g<1>\g<1>', 'xx'), 'xxxx') 0061 0062 self.assertEqual(re.sub('a',r'\t\n\v\r\f\a\b\B\Z\a\A\w\W\s\S\d\D','a'), 0063 '\t\n\v\r\f\a\b\\B\\Z\a\\A\\w\\W\\s\\S\\d\\D') 0064 self.assertEqual(re.sub('a', '\t\n\v\r\f\a', 'a'), '\t\n\v\r\f\a') 0065 self.assertEqual(re.sub('a', '\t\n\v\r\f\a', 'a'), 0066 (chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7))) 0067 0068 self.assertEqual(re.sub('^\s*', 'X', 'test'), 'Xtest') 0069 0070 def test_bug_449964(self): 0071 # fails for group followed by other escape 0072 self.assertEqual(re.sub(r'(?P<unk>x)', '\g<1>\g<1>\\b', 'xx'), 0073 'xx\bxx\b') 0074 0075 def test_bug_449000(self): 0076 # Test for sub() on escaped characters 0077 self.assertEqual(re.sub(r'\r\n', r'\n', 'abc\r\ndef\r\n'), 0078 'abc\ndef\n') 0079 self.assertEqual(re.sub('\r\n', r'\n', 'abc\r\ndef\r\n'), 0080 'abc\ndef\n') 0081 self.assertEqual(re.sub(r'\r\n', '\n', 'abc\r\ndef\r\n'), 0082 'abc\ndef\n') 0083 self.assertEqual(re.sub('\r\n', '\n', 'abc\r\ndef\r\n'), 0084 'abc\ndef\n') 0085 0086 def test_sub_template_numeric_escape(self): 0087 # bug 776311 and friends 0088 self.assertEqual(re.sub('x', r'\0', 'x'), '\0') 0089 self.assertEqual(re.sub('x', r'\000', 'x'), '\000') 0090 self.assertEqual(re.sub('x', r'\001', 'x'), '\001') 0091 self.assertEqual(re.sub('x', r'\008', 'x'), '\0' + '8') 0092 self.assertEqual(re.sub('x', r'\009', 'x'), '\0' + '9') 0093 self.assertEqual(re.sub('x', r'\111', 'x'), '\111') 0094 self.assertEqual(re.sub('x', r'\117', 'x'), '\117') 0095 0096 self.assertEqual(re.sub('x', r'\1111', 'x'), '\1111') 0097 self.assertEqual(re.sub('x', r'\1111', 'x'), '\111' + '1') 0098 0099 self.assertEqual(re.sub('x', r'\00', 'x'), '\x00') 0100 self.assertEqual(re.sub('x', r'\07', 'x'), '\x07') 0101 self.assertEqual(re.sub('x', r'\08', 'x'), '\0' + '8') 0102 self.assertEqual(re.sub('x', r'\09', 'x'), '\0' + '9') 0103 self.assertEqual(re.sub('x', r'\0a', 'x'), '\0' + 'a') 0104 0105 self.assertEqual(re.sub('x', r'\400', 'x'), '\0') 0106 self.assertEqual(re.sub('x', r'\777', 'x'), '\377') 0107 0108 self.assertRaises(re.error, re.sub, 'x', r'\1', 'x') 0109 self.assertRaises(re.error, re.sub, 'x', r'\8', 'x') 0110 self.assertRaises(re.error, re.sub, 'x', r'\9', 'x') 0111 self.assertRaises(re.error, re.sub, 'x', r'\11', 'x') 0112 self.assertRaises(re.error, re.sub, 'x', r'\18', 'x') 0113 self.assertRaises(re.error, re.sub, 'x', r'\1a', 'x') 0114 self.assertRaises(re.error, re.sub, 'x', r'\90', 'x') 0115 self.assertRaises(re.error, re.sub, 'x', r'\99', 'x') 0116 self.assertRaises(re.error, re.sub, 'x', r'\118', 'x') # r'\11' + '8' 0117 self.assertRaises(re.error, re.sub, 'x', r'\11a', 'x') 0118 self.assertRaises(re.error, re.sub, 'x', r'\181', 'x') # r'\18' + '1' 0119 self.assertRaises(re.error, re.sub, 'x', r'\800', 'x') # r'\80' + '0' 0120 0121 # in python2.3 (etc), these loop endlessly in sre_parser.py 0122 self.assertEqual(re.sub('(((((((((((x)))))))))))', r'\11', 'x'), 'x') 0123 self.assertEqual(re.sub('((((((((((y))))))))))(.)', r'\118', 'xyz'), 0124 'xz8') 0125 self.assertEqual(re.sub('((((((((((y))))))))))(.)', r'\11a', 'xyz'), 0126 'xza') 0127 0128 def test_qualified_re_sub(self): 0129 self.assertEqual(re.sub('a', 'b', 'aaaaa'), 'bbbbb') 0130 self.assertEqual(re.sub('a', 'b', 'aaaaa', 1), 'baaaa') 0131 0132 def test_bug_114660(self): 0133 self.assertEqual(re.sub(r'(\S)\s+(\S)', r'\1 \2', 'hello there'), 0134 'hello there') 0135 0136 def test_bug_462270(self): 0137 # Test for empty sub() behaviour, see SF bug #462270 0138 self.assertEqual(re.sub('x*', '-', 'abxd'), '-a-b-d-') 0139 self.assertEqual(re.sub('x+', '-', 'abxd'), 'ab-d') 0140 0141 def test_symbolic_refs(self): 0142 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<a', 'xx') 0143 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<', 'xx') 0144 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g', 'xx') 0145 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<a a>', 'xx') 0146 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<1a1>', 'xx') 0147 self.assertRaises(IndexError, re.sub, '(?P<a>x)', '\g<ab>', 'xx') 0148 self.assertRaises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\g<b>', 'xx') 0149 self.assertRaises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\\2', 'xx') 0150 self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<-1>', 'xx') 0151 0152 def test_re_subn(self): 0153 self.assertEqual(re.subn("(?i)b+", "x", "bbbb BBBB"), ('x x', 2)) 0154 self.assertEqual(re.subn("b+", "x", "bbbb BBBB"), ('x BBBB', 1)) 0155 self.assertEqual(re.subn("b+", "x", "xyz"), ('xyz', 0)) 0156 self.assertEqual(re.subn("b*", "x", "xyz"), ('xxxyxzx', 4)) 0157 self.assertEqual(re.subn("b*", "x", "xyz", 2), ('xxxyz', 2)) 0158 0159 def test_re_split(self): 0160 self.assertEqual(re.split(":", ":a:b::c"), ['', 'a', 'b', '', 'c']) 0161 self.assertEqual(re.split(":*", ":a:b::c"), ['', 'a', 'b', 'c']) 0162 self.assertEqual(re.split("(:*)", ":a:b::c"), 0163 ['', ':', 'a', ':', 'b', '::', 'c']) 0164 self.assertEqual(re.split("(?::*)", ":a:b::c"), ['', 'a', 'b', 'c']) 0165 self.assertEqual(re.split("(:)*", ":a:b::c"), 0166 ['', ':', 'a', ':', 'b', ':', 'c']) 0167 self.assertEqual(re.split("([b:]+)", ":a:b::c"), 0168 ['', ':', 'a', ':b::', 'c']) 0169 self.assertEqual(re.split("(b)|(:+)", ":a:b::c"), 0170 ['', None, ':', 'a', None, ':', '', 'b', None, '', 0171 None, '::', 'c']) 0172 self.assertEqual(re.split("(?:b)|(?::+)", ":a:b::c"), 0173 ['', 'a', '', '', 'c']) 0174 0175 def test_qualified_re_split(self): 0176 self.assertEqual(re.split(":", ":a:b::c", 2), ['', 'a', 'b::c']) 0177 self.assertEqual(re.split(':', 'a:b:c:d', 2), ['a', 'b', 'c:d']) 0178 self.assertEqual(re.split("(:)", ":a:b::c", 2), 0179 ['', ':', 'a', ':', 'b::c']) 0180 self.assertEqual(re.split("(:*)", ":a:b::c", 2), 0181 ['', ':', 'a', ':', 'b::c']) 0182 0183 def test_re_findall(self): 0184 self.assertEqual(re.findall(":+", "abc"), []) 0185 self.assertEqual(re.findall(":+", "a:b::c:::d"), [":", "::", ":::"]) 0186 self.assertEqual(re.findall("(:+)", "a:b::c:::d"), [":", "::", ":::"]) 0187 self.assertEqual(re.findall("(:)(:*)", "a:b::c:::d"), [(":", ""), 0188 (":", ":"), 0189 (":", "::")]) 0190 0191 def test_bug_117612(self): 0192 self.assertEqual(re.findall(r"(a|(b))", "aba"), 0193 [("a", ""),("b", "b"),("a", "")]) 0194 0195 def test_re_match(self): 0196 self.assertEqual(re.match('a', 'a').groups(), ()) 0197 self.assertEqual(re.match('(a)', 'a').groups(), ('a',)) 0198 self.assertEqual(re.match(r'(a)', 'a').group(0), 'a') 0199 self.assertEqual(re.match(r'(a)', 'a').group(1), 'a') 0200 self.assertEqual(re.match(r'(a)', 'a').group(1, 1), ('a', 'a')) 0201 0202 pat = re.compile('((a)|(b))(c)?') 0203 self.assertEqual(pat.match('a').groups(), ('a', 'a', None, None)) 0204 self.assertEqual(pat.match('b').groups(), ('b', None, 'b', None)) 0205 self.assertEqual(pat.match('ac').groups(), ('a', 'a', None, 'c')) 0206 self.assertEqual(pat.match('bc').groups(), ('b', None, 'b', 'c')) 0207 self.assertEqual(pat.match('bc').groups(""), ('b', "", 'b', 'c')) 0208 0209 # A single group 0210 m = re.match('(a)', 'a') 0211 self.assertEqual(m.group(0), 'a') 0212 self.assertEqual(m.group(0), 'a') 0213 self.assertEqual(m.group(1), 'a') 0214 self.assertEqual(m.group(1, 1), ('a', 'a')) 0215 0216 pat = re.compile('(?:(?P<a1>a)|(?P<b2>b))(?P<c3>c)?') 0217 self.assertEqual(pat.match('a').group(1, 2, 3), ('a', None, None)) 0218 self.assertEqual(pat.match('b').group('a1', 'b2', 'c3'), 0219 (None, 'b', None)) 0220 self.assertEqual(pat.match('ac').group(1, 'b2', 3), ('a', None, 'c')) 0221 0222 def test_re_groupref_exists(self): 0223 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', '(a)').groups(), 0224 ('(', 'a')) 0225 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', 'a').groups(), 0226 (None, 'a')) 0227 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', 'a)'), None) 0228 self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', '(a'), None) 0229 self.assertEqual(re.match('^(?:(a)|c)((?(1)b|d))$', 'ab').groups(), 0230 ('a', 'b')) 0231 self.assertEqual(re.match('^(?:(a)|c)((?(1)b|d))$', 'cd').groups(), 0232 (None, 'd')) 0233 self.assertEqual(re.match('^(?:(a)|c)((?(1)|d))$', 'cd').groups(), 0234 (None, 'd')) 0235 self.assertEqual(re.match('^(?:(a)|c)((?(1)|d))$', 'a').groups(), 0236 ('a', '')) 0237 0238 def test_re_groupref(self): 0239 self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', '|a|').groups(), 0240 ('|', 'a')) 0241 self.assertEqual(re.match(r'^(\|)?([^()]+)\1?$', 'a').groups(), 0242 (None, 'a')) 0243 self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', 'a|'), None) 0244 self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', '|a'), None) 0245 self.assertEqual(re.match(r'^(?:(a)|c)(\1)$', 'aa').groups(), 0246 ('a', 'a')) 0247 self.assertEqual(re.match(r'^(?:(a)|c)(\1)?$', 'c').groups(), 0248 (None, None)) 0249 0250 def test_groupdict(self): 0251 self.assertEqual(re.match('(?P<first>first) (?P<second>second)', 0252 'first second').groupdict(), 0253 {'first':'first', 'second':'second'}) 0254 0255 def test_expand(self): 0256 self.assertEqual(re.match("(?P<first>first) (?P<second>second)", 0257 "first second") 0258 .expand(r"\2 \1 \g<second> \g<first>"), 0259 "second first second first") 0260 0261 def test_repeat_minmax(self): 0262 self.assertEqual(re.match("^(\w){1}$", "abc"), None) 0263 self.assertEqual(re.match("^(\w){1}?$", "abc"), None) 0264 self.assertEqual(re.match("^(\w){1,2}$", "abc"), None) 0265 self.assertEqual(re.match("^(\w){1,2}?$", "abc"), None) 0266 0267 self.assertEqual(re.match("^(\w){3}$", "abc").group(1), "c") 0268 self.assertEqual(re.match("^(\w){1,3}$", "abc").group(1), "c") 0269 self.assertEqual(re.match("^(\w){1,4}$", "abc").group(1), "c") 0270 self.assertEqual(re.match("^(\w){3,4}?$", "abc").group(1), "c") 0271 self.assertEqual(re.match("^(\w){3}?$", "abc").group(1), "c") 0272 self.assertEqual(re.match("^(\w){1,3}?$", "abc").group(1), "c") 0273 self.assertEqual(re.match("^(\w){1,4}?$", "abc").group(1), "c") 0274 self.assertEqual(re.match("^(\w){3,4}?$", "abc").group(1), "c") 0275 0276 self.assertEqual(re.match("^x{1}$", "xxx"), None) 0277 self.assertEqual(re.match("^x{1}?$", "xxx"), None) 0278 self.assertEqual(re.match("^x{1,2}$", "xxx"), None) 0279 self.assertEqual(re.match("^x{1,2}?$", "xxx"), None) 0280 0281 self.assertNotEqual(re.match("^x{3}$", "xxx"), None) 0282 self.assertNotEqual(re.match("^x{1,3}$", "xxx"), None) 0283 self.assertNotEqual(re.match("^x{1,4}$", "xxx"), None) 0284 self.assertNotEqual(re.match("^x{3,4}?$", "xxx"), None) 0285 self.assertNotEqual(re.match("^x{3}?$", "xxx"), None) 0286 self.assertNotEqual(re.match("^x{1,3}?$", "xxx"), None) 0287 self.assertNotEqual(re.match("^x{1,4}?$", "xxx"), None) 0288 self.assertNotEqual(re.match("^x{3,4}?$", "xxx"), None) 0289 0290 def test_getattr(self): 0291 self.assertEqual(re.match("(a)", "a").pos, 0) 0292 self.assertEqual(re.match("(a)", "a").endpos, 1) 0293 self.assertEqual(re.match("(a)", "a").string, "a") 0294 self.assertEqual(re.match("(a)", "a").regs, ((0, 1), (0, 1))) 0295 self.assertNotEqual(re.match("(a)", "a").re, None) 0296 0297 def test_special_escapes(self): 0298 self.assertEqual(re.search(r"\b(b.)\b", 0299 "abcd abc bcd bx").group(1), "bx") 0300 self.assertEqual(re.search(r"\B(b.)\B", 0301 "abc bcd bc abxd").group(1), "bx") 0302 self.assertEqual(re.search(r"\b(b.)\b", 0303 "abcd abc bcd bx", re.LOCALE).group(1), "bx") 0304 self.assertEqual(re.search(r"\B(b.)\B", 0305 "abc bcd bc abxd", re.LOCALE).group(1), "bx") 0306 self.assertEqual(re.search(r"\b(b.)\b", 0307 "abcd abc bcd bx", re.UNICODE).group(1), "bx") 0308 self.assertEqual(re.search(r"\B(b.)\B", 0309 "abc bcd bc abxd", re.UNICODE).group(1), "bx") 0310 self.assertEqual(re.search(r"^abc$", "\nabc\n", re.M).group(0), "abc") 0311 self.assertEqual(re.search(r"^\Aabc\Z$", "abc", re.M).group(0), "abc") 0312 self.assertEqual(re.search(r"^\Aabc\Z$", "\nabc\n", re.M), None) 0313 self.assertEqual(re.search(r"\b(b.)\b", 0314 u"abcd abc bcd bx").group(1), "bx") 0315 self.assertEqual(re.search(r"\B(b.)\B", 0316 u"abc bcd bc abxd").group(1), "bx") 0317 self.assertEqual(re.search(r"^abc$", u"\nabc\n", re.M).group(0), "abc") 0318 self.assertEqual(re.search(r"^\Aabc\Z$", u"abc", re.M).group(0), "abc") 0319 self.assertEqual(re.search(r"^\Aabc\Z$", u"\nabc\n", re.M), None) 0320 self.assertEqual(re.search(r"\d\D\w\W\s\S", 0321 "1aa! a").group(0), "1aa! a") 0322 self.assertEqual(re.search(r"\d\D\w\W\s\S", 0323 "1aa! a", re.LOCALE).group(0), "1aa! a") 0324 self.assertEqual(re.search(r"\d\D\w\W\s\S", 0325 "1aa! a", re.UNICODE).group(0), "1aa! a") 0326 0327 def test_ignore_case(self): 0328 self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC") 0329 self.assertEqual(re.match("abc", u"ABC", re.I).group(0), "ABC") 0330 0331 def test_bigcharset(self): 0332 self.assertEqual(re.match(u"([\u2222\u2223])", 0333 u"\u2222").group(1), u"\u2222") 0334 self.assertEqual(re.match(u"([\u2222\u2223])", 0335 u"\u2222", re.UNICODE).group(1), u"\u2222") 0336 0337 def test_anyall(self): 0338 self.assertEqual(re.match("a.b", "a\nb", re.DOTALL).group(0), 0339 "a\nb") 0340 self.assertEqual(re.match("a.*b", "a\n\nb", re.DOTALL).group(0), 0341 "a\n\nb") 0342 0343 def test_non_consuming(self): 0344 self.assertEqual(re.match("(a(?=\s[^a]))", "a b").group(1), "a") 0345 self.assertEqual(re.match("(a(?=\s[^a]*))", "a b").group(1), "a") 0346 self.assertEqual(re.match("(a(?=\s[abc]))", "a b").group(1), "a") 0347 self.assertEqual(re.match("(a(?=\s[abc]*))", "a bc").group(1), "a") 0348 self.assertEqual(re.match(r"(a)(?=\s\1)", "a a").group(1), "a") 0349 self.assertEqual(re.match(r"(a)(?=\s\1*)", "a aa").group(1), "a") 0350 self.assertEqual(re.match(r"(a)(?=\s(abc|a))", "a a").group(1), "a") 0351 0352 self.assertEqual(re.match(r"(a(?!\s[^a]))", "a a").group(1), "a") 0353 self.assertEqual(re.match(r"(a(?!\s[abc]))", "a d").group(1), "a") 0354 self.assertEqual(re.match(r"(a)(?!\s\1)", "a b").group(1), "a") 0355 self.assertEqual(re.match(r"(a)(?!\s(abc|a))", "a b").group(1), "a") 0356 0357 def test_ignore_case(self): 0358 self.assertEqual(re.match(r"(a\s[^a])", "a b", re.I).group(1), "a b") 0359 self.assertEqual(re.match(r"(a\s[^a]*)", "a bb", re.I).group(1), "a bb") 0360 self.assertEqual(re.match(r"(a\s[abc])", "a b", re.I).group(1), "a b") 0361 self.assertEqual(re.match(r"(a\s[abc]*)", "a bb", re.I).group(1), "a bb") 0362 self.assertEqual(re.match(r"((a)\s\2)", "a a", re.I).group(1), "a a") 0363 self.assertEqual(re.match(r"((a)\s\2*)", "a aa", re.I).group(1), "a aa") 0364 self.assertEqual(re.match(r"((a)\s(abc|a))", "a a", re.I).group(1), "a a") 0365 self.assertEqual(re.match(r"((a)\s(abc|a)*)", "a aa", re.I).group(1), "a aa") 0366 0367 def test_category(self): 0368 self.assertEqual(re.match(r"(\s)", " ").group(1), " ") 0369 0370 def test_getlower(self): 0371 import _sre 0372 self.assertEqual(_sre.getlower(ord('A'), 0), ord('a')) 0373 self.assertEqual(_sre.getlower(ord('A'), re.LOCALE), ord('a')) 0374 self.assertEqual(_sre.getlower(ord('A'), re.UNICODE), ord('a')) 0375 0376 self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC") 0377 self.assertEqual(re.match("abc", u"ABC", re.I).group(0), "ABC") 0378 0379 def test_not_literal(self): 0380 self.assertEqual(re.search("\s([^a])", " b").group(1), "b") 0381 self.assertEqual(re.search("\s([^a]*)", " bb").group(1), "bb") 0382 0383 def test_search_coverage(self): 0384 self.assertEqual(re.search("\s(b)", " b").group(1), "b") 0385 self.assertEqual(re.search("a\s", "a ").group(0), "a ") 0386 0387 def test_re_escape(self): 0388 p="" 0389 for i in range(0, 256): 0390 p = p + chr(i) 0391 self.assertEqual(re.match(re.escape(chr(i)), chr(i)) is not None, 0392 True) 0393 self.assertEqual(re.match(re.escape(chr(i)), chr(i)).span(), (0,1)) 0394 0395 pat=re.compile(re.escape(p)) 0396 self.assertEqual(pat.match(p) is not None, True) 0397 self.assertEqual(pat.match(p).span(), (0,256)) 0398 0399 def test_pickling(self): 0400 import pickle 0401 self.pickle_test(pickle) 0402 import cPickle 0403 self.pickle_test(cPickle) 0404 0405 def pickle_test(self, pickle): 0406 oldpat = re.compile('a(?:b|(c|e){1,2}?|d)+?(.)') 0407 s = pickle.dumps(oldpat) 0408 newpat = pickle.loads(s) 0409 self.assertEqual(oldpat, newpat) 0410 0411 def test_constants(self): 0412 self.assertEqual(re.I, re.IGNORECASE) 0413 self.assertEqual(re.L, re.LOCALE) 0414 self.assertEqual(re.M, re.MULTILINE) 0415 self.assertEqual(re.S, re.DOTALL) 0416 self.assertEqual(re.X, re.VERBOSE) 0417 0418 def test_flags(self): 0419 for flag in [re.I, re.M, re.X, re.S, re.L]: 0420 self.assertNotEqual(re.compile('^pattern$', flag), None) 0421 0422 def test_sre_character_literals(self): 0423 for i in [0, 8, 16, 32, 64, 127, 128, 255]: 0424 self.assertNotEqual(re.match(r"\%03o" % i, chr(i)), None) 0425 self.assertNotEqual(re.match(r"\%03o0" % i, chr(i)+"0"), None) 0426 self.assertNotEqual(re.match(r"\%03o8" % i, chr(i)+"8"), None) 0427 self.assertNotEqual(re.match(r"\x%02x" % i, chr(i)), None) 0428 self.assertNotEqual(re.match(r"\x%02x0" % i, chr(i)+"0"), None) 0429 self.assertNotEqual(re.match(r"\x%02xz" % i, chr(i)+"z"), None) 0430 self.assertRaises(re.error, re.match, "\911", "") 0431 0432 def test_sre_character_class_literals(self): 0433 for i in [0, 8, 16, 32, 64, 127, 128, 255]: 0434 self.assertNotEqual(re.match(r"[\%03o]" % i, chr(i)), None) 0435 self.assertNotEqual(re.match(r"[\%03o0]" % i, chr(i)), None) 0436 self.assertNotEqual(re.match(r"[\%03o8]" % i, chr(i)), None) 0437 self.assertNotEqual(re.match(r"[\x%02x]" % i, chr(i)), None) 0438 self.assertNotEqual(re.match(r"[\x%02x0]" % i, chr(i)), None) 0439 self.assertNotEqual(re.match(r"[\x%02xz]" % i, chr(i)), None) 0440 self.assertRaises(re.error, re.match, "[\911]", "") 0441 0442 def test_bug_113254(self): 0443 self.assertEqual(re.match(r'(a)|(b)', 'b').start(1), -1) 0444 self.assertEqual(re.match(r'(a)|(b)', 'b').end(1), -1) 0445 self.assertEqual(re.match(r'(a)|(b)', 'b').span(1), (-1, -1)) 0446 0447 def test_bug_527371(self): 0448 # bug described in patches 527371/672491 0449 self.assertEqual(re.match(r'(a)?a','a').lastindex, None) 0450 self.assertEqual(re.match(r'(a)(b)?b','ab').lastindex, 1) 0451 self.assertEqual(re.match(r'(?P<a>a)(?P<b>b)?b','ab').lastgroup, 'a') 0452 self.assertEqual(re.match("(?P<a>a(b))", "ab").lastgroup, 'a') 0453 self.assertEqual(re.match("((a))", "a").lastindex, 1) 0454 0455 def test_bug_545855(self): 0456 # bug 545855 -- This pattern failed to cause a compile error as it 0457 # should, instead provoking a TypeError. 0458 self.assertRaises(re.error, re.compile, 'foo[a-') 0459 0460 def test_bug_418626(self): 0461 # bugs 418626 at al. -- Testing Greg Chapman's addition of op code 0462 # SRE_OP_MIN_REPEAT_ONE for eliminating recursion on simple uses of 0463 # pattern '*?' on a long string. 0464 self.assertEqual(re.match('.*?c', 10000*'ab'+'cd').end(0), 20001) 0465 self.assertEqual(re.match('.*?cd', 5000*'ab'+'c'+5000*'ab'+'cde').end(0), 0466 20003) 0467 self.assertEqual(re.match('.*?cd', 20000*'abc'+'de').end(0), 60001) 0468 # non-simple '*?' still used to hit the recursion limit, before the 0469 # non-recursive scheme was implemented. 0470 self.assertEqual(re.search('(a|b)*?c', 10000*'ab'+'cd').end(0), 20001) 0471 0472 def test_bug_612074(self): 0473 pat=u"["+re.escape(u"\u2039")+u"]" 0474 self.assertEqual(re.compile(pat) and 1, 1) 0475 0476 def test_stack_overflow(self): 0477 # nasty cases that used to overflow the straightforward recursive 0478 # implementation of repeated groups. 0479 self.assertEqual(re.match('(x)*', 50000*'x').group(1), 'x') 0480 self.assertEqual(re.match('(x)*y', 50000*'x'+'y').group(1), 'x') 0481 self.assertEqual(re.match('(x)*?y', 50000*'x'+'y').group(1), 'x') 0482 0483 def test_scanner(self): 0484 def s_ident(scanner, token): return token 0485 def s_operator(scanner, token): return "op%s" % token 0486 def s_float(scanner, token): return float(token) 0487 def s_int(scanner, token): return int(token) 0488 0489 scanner = Scanner([ 0490 (r"[a-zA-Z_]\w*", s_ident), 0491 (r"\d+\.\d*", s_float), 0492 (r"\d+", s_int), 0493 (r"=|\+|-|\*|/", s_operator), 0494 (r"\s+", None), 0495 ]) 0496 0497 self.assertNotEqual(scanner.scanner.scanner("").pattern, None) 0498 0499 self.assertEqual(scanner.scan("sum = 3*foo + 312.50 + bar"), 0500 (['sum', 'op=', 3, 'op*', 'foo', 'op+', 312.5, 0501 'op+', 'bar'], '')) 0502 0503 def test_bug_448951(self): 0504 # bug 448951 (similar to 429357, but with single char match) 0505 # (Also test greedy matches.) 0506 for op in '','?','*': 0507 self.assertEqual(re.match(r'((.%s):)?z'%op, 'z').groups(), 0508 (None, None)) 0509 self.assertEqual(re.match(r'((.%s):)?z'%op, 'a:z').groups(), 0510 ('a:', 'a')) 0511 0512 def test_bug_725106(self): 0513 # capturing groups in alternatives in repeats 0514 self.assertEqual(re.match('^((a)|b)*', 'abc').groups(), 0515 ('b', 'a')) 0516 self.assertEqual(re.match('^(([ab])|c)*', 'abc').groups(), 0517 ('c', 'b')) 0518 self.assertEqual(re.match('^((d)|[ab])*', 'abc').groups(), 0519 ('b', None)) 0520 self.assertEqual(re.match('^((a)c|[ab])*', 'abc').groups(), 0521 ('b', None)) 0522 self.assertEqual(re.match('^((a)|b)*?c', 'abc').groups(), 0523 ('b', 'a')) 0524 self.assertEqual(re.match('^(([ab])|c)*?d', 'abcd').groups(), 0525 ('c', 'b')) 0526 self.assertEqual(re.match('^((d)|[ab])*?c', 'abc').groups(), 0527 ('b', None)) 0528 self.assertEqual(re.match('^((a)c|[ab])*?c', 'abc').groups(), 0529 ('b', None)) 0530 0531 def test_bug_725149(self): 0532 # mark_stack_base restoring before restoring marks 0533 self.assertEqual(re.match('(a)(?:(?=(b)*)c)*', 'abb').groups(), 0534 ('a', None)) 0535 self.assertEqual(re.match('(a)((?!(b)*))*', 'abb').groups(), 0536 ('a', None, None)) 0537 0538 def test_bug_764548(self): 0539 # bug 764548, re.compile() barfs on str/unicode subclasses 0540 try: 0541 unicode 0542 except NameError: 0543 return # no problem if we have no unicode 0544 class my_unicode(unicode): pass 0545 pat = re.compile(my_unicode("abc")) 0546 self.assertEqual(pat.match("xyz"), None) 0547 0548 def test_finditer(self): 0549 iter = re.finditer(r":+", "a:b::c:::d") 0550 self.assertEqual([item.group(0) for item in iter], 0551 [":", "::", ":::"]) 0552 0553 def test_bug_926075(self): 0554 try: 0555 unicode 0556 except NameError: 0557 return # no problem if we have no unicode 0558 self.assert_(re.compile('bug_926075') is not 0559 re.compile(eval("u'bug_926075'"))) 0560 0561 def test_bug_931848(self): 0562 try: 0563 unicode 0564 except NameError: 0565 pass 0566 pattern = eval('u"[\u002E\u3002\uFF0E\uFF61]"') 0567 self.assertEqual(re.compile(pattern).split("a.b.c"), 0568 ['a','b','c']) 0569 0570 def test_bug_581080(self): 0571 iter = re.finditer(r"\s", "a b") 0572 self.assertEqual(iter.next().span(), (1,2)) 0573 self.assertRaises(StopIteration, iter.next) 0574 0575 scanner = re.compile(r"\s").scanner("a b") 0576 self.assertEqual(scanner.search().span(), (1, 2)) 0577 self.assertEqual(scanner.search(), None) 0578 0579 def test_bug_817234(self): 0580 iter = re.finditer(r".*", "asdf") 0581 self.assertEqual(iter.next().span(), (0, 4)) 0582 self.assertEqual(iter.next().span(), (4, 4)) 0583 self.assertRaises(StopIteration, iter.next) 0584 0585 0586 def run_re_tests(): 0587 from test.re_tests import benchmarks, tests, SUCCEED, FAIL, SYNTAX_ERROR 0588 if verbose: 0589 print 'Running re_tests test suite' 0590 else: 0591 # To save time, only run the first and last 10 tests 0592 #tests = tests[:10] + tests[-10:] 0593 pass 0594 0595 for t in tests: 0596 sys.stdout.flush() 0597 pattern = s = outcome = repl = expected = None 0598 if len(t) == 5: 0599 pattern, s, outcome, repl, expected = t 0600 elif len(t) == 3: 0601 pattern, s, outcome = t 0602 else: 0603 raise ValueError, ('Test tuples should have 3 or 5 fields', t) 0604 0605 try: 0606 obj = re.compile(pattern) 0607 except re.error: 0608 if outcome == SYNTAX_ERROR: pass # Expected a syntax error 0609 else: 0610 print '=== Syntax error:', t 0611 except KeyboardInterrupt: raise KeyboardInterrupt 0612 except: 0613 print '*** Unexpected error ***', t 0614 if verbose: 0615 traceback.print_exc(file=sys.stdout) 0616 else: 0617 try: 0618 result = obj.search(s) 0619 except re.error, msg: 0620 print '=== Unexpected exception', t, repr(msg) 0621 if outcome == SYNTAX_ERROR: 0622 # This should have been a syntax error; forget it. 0623 pass 0624 elif outcome == FAIL: 0625 if result is None: pass # No match, as expected 0626 else: print '=== Succeeded incorrectly', t 0627 elif outcome == SUCCEED: 0628 if result is not None: 0629 # Matched, as expected, so now we compute the 0630 # result string and compare it to our expected result. 0631 start, end = result.span(0) 0632 vardict={'found': result.group(0), 0633 'groups': result.group(), 0634 'flags': result.re.flags} 0635 for i in range(1, 100): 0636 try: 0637 gi = result.group(i) 0638 # Special hack because else the string concat fails: 0639 if gi is None: 0640 gi = "None" 0641 except IndexError: 0642 gi = "Error" 0643 vardict['g%d' % i] = gi 0644 for i in result.re.groupindex.keys(): 0645 try: 0646 gi = result.group(i) 0647 if gi is None: 0648 gi = "None" 0649 except IndexError: 0650 gi = "Error" 0651 vardict[i] = gi 0652 repl = eval(repl, vardict) 0653 if repl != expected: 0654 print '=== grouping error', t, 0655 print repr(repl) + ' should be ' + repr(expected) 0656 else: 0657 print '=== Failed incorrectly', t 0658 0659 # Try the match on a unicode string, and check that it 0660 # still succeeds. 0661 try: 0662 result = obj.search(unicode(s, "latin-1")) 0663 if result is None: 0664 print '=== Fails on unicode match', t 0665 except NameError: 0666 continue # 1.5.2 0667 except TypeError: 0668 continue # unicode test case 0669 0670 # Try the match on a unicode pattern, and check that it 0671 # still succeeds. 0672 obj=re.compile(unicode(pattern, "latin-1")) 0673 result = obj.search(s) 0674 if result is None: 0675 print '=== Fails on unicode pattern match', t 0676 0677 # Try the match with the search area limited to the extent 0678 # of the match and see if it still succeeds. \B will 0679 # break (because it won't match at the end or start of a 0680 # string), so we'll ignore patterns that feature it. 0681 0682 if pattern[:2] != '\\B' and pattern[-2:] != '\\B' \ 0683 and result is not None: 0684 obj = re.compile(pattern) 0685 result = obj.search(s, result.start(0), result.end(0) + 1) 0686 if result is None: 0687 print '=== Failed on range-limited match', t 0688 0689 # Try the match with IGNORECASE enabled, and check that it 0690 # still succeeds. 0691 obj = re.compile(pattern, re.IGNORECASE) 0692 result = obj.search(s) 0693 if result is None: 0694 print '=== Fails on case-insensitive match', t 0695 0696 # Try the match with LOCALE enabled, and check that it 0697 # still succeeds. 0698 obj = re.compile(pattern, re.LOCALE) 0699 result = obj.search(s) 0700 if result is None: 0701 print '=== Fails on locale-sensitive match', t 0702 0703 # Try the match with UNICODE locale enabled, and check 0704 # that it still succeeds. 0705 obj = re.compile(pattern, re.UNICODE) 0706 result = obj.search(s) 0707 if result is None: 0708 print '=== Fails on unicode-sensitive match', t 0709 0710 def test_main(): 0711 run_unittest(ReTests) 0712 run_re_tests() 0713 0714 if __name__ == "__main__": 0715 test_main() 0716
Generated by PyXR 0.9.4