PyXR

c:\python24\lib \ test \ test_re.py



0001 import sys
0002 sys.path = ['.'] + sys.path
0003 
0004 from test.test_support import verbose, run_unittest
0005 import re
0006 from sre import Scanner
0007 import sys, os, traceback
0008 from weakref import proxy
0009 
0010 # Misc tests from Tim Peters' re.doc
0011 
0012 # WARNING: Don't change details in these tests if you don't know
0013 # what you're doing. Some of these tests were carefuly modeled to
0014 # cover most of the code.
0015 
0016 import unittest
0017 
0018 class ReTests(unittest.TestCase):
0019 
0020     def test_weakref(self):
0021         s = 'QabbbcR'
0022         x = re.compile('ab+c')
0023         y = proxy(x)
0024         self.assertEqual(x.findall('QabbbcR'), y.findall('QabbbcR'))
0025 
0026     def test_search_star_plus(self):
0027         self.assertEqual(re.search('x*', 'axx').span(0), (0, 0))
0028         self.assertEqual(re.search('x*', 'axx').span(), (0, 0))
0029         self.assertEqual(re.search('x+', 'axx').span(0), (1, 3))
0030         self.assertEqual(re.search('x+', 'axx').span(), (1, 3))
0031         self.assertEqual(re.search('x', 'aaa'), None)
0032         self.assertEqual(re.match('a*', 'xxx').span(0), (0, 0))
0033         self.assertEqual(re.match('a*', 'xxx').span(), (0, 0))
0034         self.assertEqual(re.match('x*', 'xxxa').span(0), (0, 3))
0035         self.assertEqual(re.match('x*', 'xxxa').span(), (0, 3))
0036         self.assertEqual(re.match('a+', 'xxx'), None)
0037 
0038     def bump_num(self, matchobj):
0039         int_value = int(matchobj.group(0))
0040         return str(int_value + 1)
0041 
0042     def test_basic_re_sub(self):
0043         self.assertEqual(re.sub("(?i)b+", "x", "bbbb BBBB"), 'x x')
0044         self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y'),
0045                          '9.3 -3 24x100y')
0046         self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y', 3),
0047                          '9.3 -3 23x99y')
0048 
0049         self.assertEqual(re.sub('.', lambda m: r"\n", 'x'), '\\n')
0050         self.assertEqual(re.sub('.', r"\n", 'x'), '\n')
0051 
0052         s = r"\1\1"
0053         self.assertEqual(re.sub('(.)', s, 'x'), 'xx')
0054         self.assertEqual(re.sub('(.)', re.escape(s), 'x'), s)
0055         self.assertEqual(re.sub('(.)', lambda m: s, 'x'), s)
0056 
0057         self.assertEqual(re.sub('(?P<a>x)', '\g<a>\g<a>', 'xx'), 'xxxx')
0058         self.assertEqual(re.sub('(?P<a>x)', '\g<a>\g<1>', 'xx'), 'xxxx')
0059         self.assertEqual(re.sub('(?P<unk>x)', '\g<unk>\g<unk>', 'xx'), 'xxxx')
0060         self.assertEqual(re.sub('(?P<unk>x)', '\g<1>\g<1>', 'xx'), 'xxxx')
0061 
0062         self.assertEqual(re.sub('a',r'\t\n\v\r\f\a\b\B\Z\a\A\w\W\s\S\d\D','a'),
0063                          '\t\n\v\r\f\a\b\\B\\Z\a\\A\\w\\W\\s\\S\\d\\D')
0064         self.assertEqual(re.sub('a', '\t\n\v\r\f\a', 'a'), '\t\n\v\r\f\a')
0065         self.assertEqual(re.sub('a', '\t\n\v\r\f\a', 'a'),
0066                          (chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7)))
0067 
0068         self.assertEqual(re.sub('^\s*', 'X', 'test'), 'Xtest')
0069 
0070     def test_bug_449964(self):
0071         # fails for group followed by other escape
0072         self.assertEqual(re.sub(r'(?P<unk>x)', '\g<1>\g<1>\\b', 'xx'),
0073                          'xx\bxx\b')
0074 
0075     def test_bug_449000(self):
0076         # Test for sub() on escaped characters
0077         self.assertEqual(re.sub(r'\r\n', r'\n', 'abc\r\ndef\r\n'),
0078                          'abc\ndef\n')
0079         self.assertEqual(re.sub('\r\n', r'\n', 'abc\r\ndef\r\n'),
0080                          'abc\ndef\n')
0081         self.assertEqual(re.sub(r'\r\n', '\n', 'abc\r\ndef\r\n'),
0082                          'abc\ndef\n')
0083         self.assertEqual(re.sub('\r\n', '\n', 'abc\r\ndef\r\n'),
0084                          'abc\ndef\n')
0085 
0086     def test_sub_template_numeric_escape(self):
0087         # bug 776311 and friends
0088         self.assertEqual(re.sub('x', r'\0', 'x'), '\0')
0089         self.assertEqual(re.sub('x', r'\000', 'x'), '\000')
0090         self.assertEqual(re.sub('x', r'\001', 'x'), '\001')
0091         self.assertEqual(re.sub('x', r'\008', 'x'), '\0' + '8')
0092         self.assertEqual(re.sub('x', r'\009', 'x'), '\0' + '9')
0093         self.assertEqual(re.sub('x', r'\111', 'x'), '\111')
0094         self.assertEqual(re.sub('x', r'\117', 'x'), '\117')
0095 
0096         self.assertEqual(re.sub('x', r'\1111', 'x'), '\1111')
0097         self.assertEqual(re.sub('x', r'\1111', 'x'), '\111' + '1')
0098 
0099         self.assertEqual(re.sub('x', r'\00', 'x'), '\x00')
0100         self.assertEqual(re.sub('x', r'\07', 'x'), '\x07')
0101         self.assertEqual(re.sub('x', r'\08', 'x'), '\0' + '8')
0102         self.assertEqual(re.sub('x', r'\09', 'x'), '\0' + '9')
0103         self.assertEqual(re.sub('x', r'\0a', 'x'), '\0' + 'a')
0104 
0105         self.assertEqual(re.sub('x', r'\400', 'x'), '\0')
0106         self.assertEqual(re.sub('x', r'\777', 'x'), '\377')
0107 
0108         self.assertRaises(re.error, re.sub, 'x', r'\1', 'x')
0109         self.assertRaises(re.error, re.sub, 'x', r'\8', 'x')
0110         self.assertRaises(re.error, re.sub, 'x', r'\9', 'x')
0111         self.assertRaises(re.error, re.sub, 'x', r'\11', 'x')
0112         self.assertRaises(re.error, re.sub, 'x', r'\18', 'x')
0113         self.assertRaises(re.error, re.sub, 'x', r'\1a', 'x')
0114         self.assertRaises(re.error, re.sub, 'x', r'\90', 'x')
0115         self.assertRaises(re.error, re.sub, 'x', r'\99', 'x')
0116         self.assertRaises(re.error, re.sub, 'x', r'\118', 'x') # r'\11' + '8'
0117         self.assertRaises(re.error, re.sub, 'x', r'\11a', 'x')
0118         self.assertRaises(re.error, re.sub, 'x', r'\181', 'x') # r'\18' + '1'
0119         self.assertRaises(re.error, re.sub, 'x', r'\800', 'x') # r'\80' + '0'
0120 
0121         # in python2.3 (etc), these loop endlessly in sre_parser.py
0122         self.assertEqual(re.sub('(((((((((((x)))))))))))', r'\11', 'x'), 'x')
0123         self.assertEqual(re.sub('((((((((((y))))))))))(.)', r'\118', 'xyz'),
0124                          'xz8')
0125         self.assertEqual(re.sub('((((((((((y))))))))))(.)', r'\11a', 'xyz'),
0126                          'xza')
0127 
0128     def test_qualified_re_sub(self):
0129         self.assertEqual(re.sub('a', 'b', 'aaaaa'), 'bbbbb')
0130         self.assertEqual(re.sub('a', 'b', 'aaaaa', 1), 'baaaa')
0131 
0132     def test_bug_114660(self):
0133         self.assertEqual(re.sub(r'(\S)\s+(\S)', r'\1 \2', 'hello  there'),
0134                          'hello there')
0135 
0136     def test_bug_462270(self):
0137         # Test for empty sub() behaviour, see SF bug #462270
0138         self.assertEqual(re.sub('x*', '-', 'abxd'), '-a-b-d-')
0139         self.assertEqual(re.sub('x+', '-', 'abxd'), 'ab-d')
0140 
0141     def test_symbolic_refs(self):
0142         self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<a', 'xx')
0143         self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<', 'xx')
0144         self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g', 'xx')
0145         self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<a a>', 'xx')
0146         self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<1a1>', 'xx')
0147         self.assertRaises(IndexError, re.sub, '(?P<a>x)', '\g<ab>', 'xx')
0148         self.assertRaises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\g<b>', 'xx')
0149         self.assertRaises(re.error, re.sub, '(?P<a>x)|(?P<b>y)', '\\2', 'xx')
0150         self.assertRaises(re.error, re.sub, '(?P<a>x)', '\g<-1>', 'xx')
0151 
0152     def test_re_subn(self):
0153         self.assertEqual(re.subn("(?i)b+", "x", "bbbb BBBB"), ('x x', 2))
0154         self.assertEqual(re.subn("b+", "x", "bbbb BBBB"), ('x BBBB', 1))
0155         self.assertEqual(re.subn("b+", "x", "xyz"), ('xyz', 0))
0156         self.assertEqual(re.subn("b*", "x", "xyz"), ('xxxyxzx', 4))
0157         self.assertEqual(re.subn("b*", "x", "xyz", 2), ('xxxyz', 2))
0158 
0159     def test_re_split(self):
0160         self.assertEqual(re.split(":", ":a:b::c"), ['', 'a', 'b', '', 'c'])
0161         self.assertEqual(re.split(":*", ":a:b::c"), ['', 'a', 'b', 'c'])
0162         self.assertEqual(re.split("(:*)", ":a:b::c"),
0163                          ['', ':', 'a', ':', 'b', '::', 'c'])
0164         self.assertEqual(re.split("(?::*)", ":a:b::c"), ['', 'a', 'b', 'c'])
0165         self.assertEqual(re.split("(:)*", ":a:b::c"),
0166                          ['', ':', 'a', ':', 'b', ':', 'c'])
0167         self.assertEqual(re.split("([b:]+)", ":a:b::c"),
0168                          ['', ':', 'a', ':b::', 'c'])
0169         self.assertEqual(re.split("(b)|(:+)", ":a:b::c"),
0170                          ['', None, ':', 'a', None, ':', '', 'b', None, '',
0171                           None, '::', 'c'])
0172         self.assertEqual(re.split("(?:b)|(?::+)", ":a:b::c"),
0173                          ['', 'a', '', '', 'c'])
0174 
0175     def test_qualified_re_split(self):
0176         self.assertEqual(re.split(":", ":a:b::c", 2), ['', 'a', 'b::c'])
0177         self.assertEqual(re.split(':', 'a:b:c:d', 2), ['a', 'b', 'c:d'])
0178         self.assertEqual(re.split("(:)", ":a:b::c", 2),
0179                          ['', ':', 'a', ':', 'b::c'])
0180         self.assertEqual(re.split("(:*)", ":a:b::c", 2),
0181                          ['', ':', 'a', ':', 'b::c'])
0182 
0183     def test_re_findall(self):
0184         self.assertEqual(re.findall(":+", "abc"), [])
0185         self.assertEqual(re.findall(":+", "a:b::c:::d"), [":", "::", ":::"])
0186         self.assertEqual(re.findall("(:+)", "a:b::c:::d"), [":", "::", ":::"])
0187         self.assertEqual(re.findall("(:)(:*)", "a:b::c:::d"), [(":", ""),
0188                                                                (":", ":"),
0189                                                                (":", "::")])
0190 
0191     def test_bug_117612(self):
0192         self.assertEqual(re.findall(r"(a|(b))", "aba"),
0193                          [("a", ""),("b", "b"),("a", "")])
0194 
0195     def test_re_match(self):
0196         self.assertEqual(re.match('a', 'a').groups(), ())
0197         self.assertEqual(re.match('(a)', 'a').groups(), ('a',))
0198         self.assertEqual(re.match(r'(a)', 'a').group(0), 'a')
0199         self.assertEqual(re.match(r'(a)', 'a').group(1), 'a')
0200         self.assertEqual(re.match(r'(a)', 'a').group(1, 1), ('a', 'a'))
0201 
0202         pat = re.compile('((a)|(b))(c)?')
0203         self.assertEqual(pat.match('a').groups(), ('a', 'a', None, None))
0204         self.assertEqual(pat.match('b').groups(), ('b', None, 'b', None))
0205         self.assertEqual(pat.match('ac').groups(), ('a', 'a', None, 'c'))
0206         self.assertEqual(pat.match('bc').groups(), ('b', None, 'b', 'c'))
0207         self.assertEqual(pat.match('bc').groups(""), ('b', "", 'b', 'c'))
0208 
0209         # A single group
0210         m = re.match('(a)', 'a')
0211         self.assertEqual(m.group(0), 'a')
0212         self.assertEqual(m.group(0), 'a')
0213         self.assertEqual(m.group(1), 'a')
0214         self.assertEqual(m.group(1, 1), ('a', 'a'))
0215 
0216         pat = re.compile('(?:(?P<a1>a)|(?P<b2>b))(?P<c3>c)?')
0217         self.assertEqual(pat.match('a').group(1, 2, 3), ('a', None, None))
0218         self.assertEqual(pat.match('b').group('a1', 'b2', 'c3'),
0219                          (None, 'b', None))
0220         self.assertEqual(pat.match('ac').group(1, 'b2', 3), ('a', None, 'c'))
0221 
0222     def test_re_groupref_exists(self):
0223         self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', '(a)').groups(),
0224                          ('(', 'a'))
0225         self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', 'a').groups(),
0226                          (None, 'a'))
0227         self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', 'a)'), None)
0228         self.assertEqual(re.match('^(\()?([^()]+)(?(1)\))$', '(a'), None)
0229         self.assertEqual(re.match('^(?:(a)|c)((?(1)b|d))$', 'ab').groups(),
0230                          ('a', 'b'))
0231         self.assertEqual(re.match('^(?:(a)|c)((?(1)b|d))$', 'cd').groups(),
0232                          (None, 'd'))
0233         self.assertEqual(re.match('^(?:(a)|c)((?(1)|d))$', 'cd').groups(),
0234                          (None, 'd'))
0235         self.assertEqual(re.match('^(?:(a)|c)((?(1)|d))$', 'a').groups(),
0236                          ('a', ''))
0237 
0238     def test_re_groupref(self):
0239         self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', '|a|').groups(),
0240                          ('|', 'a'))
0241         self.assertEqual(re.match(r'^(\|)?([^()]+)\1?$', 'a').groups(),
0242                          (None, 'a'))
0243         self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', 'a|'), None)
0244         self.assertEqual(re.match(r'^(\|)?([^()]+)\1$', '|a'), None)
0245         self.assertEqual(re.match(r'^(?:(a)|c)(\1)$', 'aa').groups(),
0246                          ('a', 'a'))
0247         self.assertEqual(re.match(r'^(?:(a)|c)(\1)?$', 'c').groups(),
0248                          (None, None))
0249 
0250     def test_groupdict(self):
0251         self.assertEqual(re.match('(?P<first>first) (?P<second>second)',
0252                                   'first second').groupdict(),
0253                          {'first':'first', 'second':'second'})
0254 
0255     def test_expand(self):
0256         self.assertEqual(re.match("(?P<first>first) (?P<second>second)",
0257                                   "first second")
0258                                   .expand(r"\2 \1 \g<second> \g<first>"),
0259                          "second first second first")
0260 
0261     def test_repeat_minmax(self):
0262         self.assertEqual(re.match("^(\w){1}$", "abc"), None)
0263         self.assertEqual(re.match("^(\w){1}?$", "abc"), None)
0264         self.assertEqual(re.match("^(\w){1,2}$", "abc"), None)
0265         self.assertEqual(re.match("^(\w){1,2}?$", "abc"), None)
0266 
0267         self.assertEqual(re.match("^(\w){3}$", "abc").group(1), "c")
0268         self.assertEqual(re.match("^(\w){1,3}$", "abc").group(1), "c")
0269         self.assertEqual(re.match("^(\w){1,4}$", "abc").group(1), "c")
0270         self.assertEqual(re.match("^(\w){3,4}?$", "abc").group(1), "c")
0271         self.assertEqual(re.match("^(\w){3}?$", "abc").group(1), "c")
0272         self.assertEqual(re.match("^(\w){1,3}?$", "abc").group(1), "c")
0273         self.assertEqual(re.match("^(\w){1,4}?$", "abc").group(1), "c")
0274         self.assertEqual(re.match("^(\w){3,4}?$", "abc").group(1), "c")
0275 
0276         self.assertEqual(re.match("^x{1}$", "xxx"), None)
0277         self.assertEqual(re.match("^x{1}?$", "xxx"), None)
0278         self.assertEqual(re.match("^x{1,2}$", "xxx"), None)
0279         self.assertEqual(re.match("^x{1,2}?$", "xxx"), None)
0280 
0281         self.assertNotEqual(re.match("^x{3}$", "xxx"), None)
0282         self.assertNotEqual(re.match("^x{1,3}$", "xxx"), None)
0283         self.assertNotEqual(re.match("^x{1,4}$", "xxx"), None)
0284         self.assertNotEqual(re.match("^x{3,4}?$", "xxx"), None)
0285         self.assertNotEqual(re.match("^x{3}?$", "xxx"), None)
0286         self.assertNotEqual(re.match("^x{1,3}?$", "xxx"), None)
0287         self.assertNotEqual(re.match("^x{1,4}?$", "xxx"), None)
0288         self.assertNotEqual(re.match("^x{3,4}?$", "xxx"), None)
0289 
0290     def test_getattr(self):
0291         self.assertEqual(re.match("(a)", "a").pos, 0)
0292         self.assertEqual(re.match("(a)", "a").endpos, 1)
0293         self.assertEqual(re.match("(a)", "a").string, "a")
0294         self.assertEqual(re.match("(a)", "a").regs, ((0, 1), (0, 1)))
0295         self.assertNotEqual(re.match("(a)", "a").re, None)
0296 
0297     def test_special_escapes(self):
0298         self.assertEqual(re.search(r"\b(b.)\b",
0299                                    "abcd abc bcd bx").group(1), "bx")
0300         self.assertEqual(re.search(r"\B(b.)\B",
0301                                    "abc bcd bc abxd").group(1), "bx")
0302         self.assertEqual(re.search(r"\b(b.)\b",
0303                                    "abcd abc bcd bx", re.LOCALE).group(1), "bx")
0304         self.assertEqual(re.search(r"\B(b.)\B",
0305                                    "abc bcd bc abxd", re.LOCALE).group(1), "bx")
0306         self.assertEqual(re.search(r"\b(b.)\b",
0307                                    "abcd abc bcd bx", re.UNICODE).group(1), "bx")
0308         self.assertEqual(re.search(r"\B(b.)\B",
0309                                    "abc bcd bc abxd", re.UNICODE).group(1), "bx")
0310         self.assertEqual(re.search(r"^abc$", "\nabc\n", re.M).group(0), "abc")
0311         self.assertEqual(re.search(r"^\Aabc\Z$", "abc", re.M).group(0), "abc")
0312         self.assertEqual(re.search(r"^\Aabc\Z$", "\nabc\n", re.M), None)
0313         self.assertEqual(re.search(r"\b(b.)\b",
0314                                    u"abcd abc bcd bx").group(1), "bx")
0315         self.assertEqual(re.search(r"\B(b.)\B",
0316                                    u"abc bcd bc abxd").group(1), "bx")
0317         self.assertEqual(re.search(r"^abc$", u"\nabc\n", re.M).group(0), "abc")
0318         self.assertEqual(re.search(r"^\Aabc\Z$", u"abc", re.M).group(0), "abc")
0319         self.assertEqual(re.search(r"^\Aabc\Z$", u"\nabc\n", re.M), None)
0320         self.assertEqual(re.search(r"\d\D\w\W\s\S",
0321                                    "1aa! a").group(0), "1aa! a")
0322         self.assertEqual(re.search(r"\d\D\w\W\s\S",
0323                                    "1aa! a", re.LOCALE).group(0), "1aa! a")
0324         self.assertEqual(re.search(r"\d\D\w\W\s\S",
0325                                    "1aa! a", re.UNICODE).group(0), "1aa! a")
0326 
0327     def test_ignore_case(self):
0328         self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
0329         self.assertEqual(re.match("abc", u"ABC", re.I).group(0), "ABC")
0330 
0331     def test_bigcharset(self):
0332         self.assertEqual(re.match(u"([\u2222\u2223])",
0333                                   u"\u2222").group(1), u"\u2222")
0334         self.assertEqual(re.match(u"([\u2222\u2223])",
0335                                   u"\u2222", re.UNICODE).group(1), u"\u2222")
0336 
0337     def test_anyall(self):
0338         self.assertEqual(re.match("a.b", "a\nb", re.DOTALL).group(0),
0339                          "a\nb")
0340         self.assertEqual(re.match("a.*b", "a\n\nb", re.DOTALL).group(0),
0341                          "a\n\nb")
0342 
0343     def test_non_consuming(self):
0344         self.assertEqual(re.match("(a(?=\s[^a]))", "a b").group(1), "a")
0345         self.assertEqual(re.match("(a(?=\s[^a]*))", "a b").group(1), "a")
0346         self.assertEqual(re.match("(a(?=\s[abc]))", "a b").group(1), "a")
0347         self.assertEqual(re.match("(a(?=\s[abc]*))", "a bc").group(1), "a")
0348         self.assertEqual(re.match(r"(a)(?=\s\1)", "a a").group(1), "a")
0349         self.assertEqual(re.match(r"(a)(?=\s\1*)", "a aa").group(1), "a")
0350         self.assertEqual(re.match(r"(a)(?=\s(abc|a))", "a a").group(1), "a")
0351 
0352         self.assertEqual(re.match(r"(a(?!\s[^a]))", "a a").group(1), "a")
0353         self.assertEqual(re.match(r"(a(?!\s[abc]))", "a d").group(1), "a")
0354         self.assertEqual(re.match(r"(a)(?!\s\1)", "a b").group(1), "a")
0355         self.assertEqual(re.match(r"(a)(?!\s(abc|a))", "a b").group(1), "a")
0356 
0357     def test_ignore_case(self):
0358         self.assertEqual(re.match(r"(a\s[^a])", "a b", re.I).group(1), "a b")
0359         self.assertEqual(re.match(r"(a\s[^a]*)", "a bb", re.I).group(1), "a bb")
0360         self.assertEqual(re.match(r"(a\s[abc])", "a b", re.I).group(1), "a b")
0361         self.assertEqual(re.match(r"(a\s[abc]*)", "a bb", re.I).group(1), "a bb")
0362         self.assertEqual(re.match(r"((a)\s\2)", "a a", re.I).group(1), "a a")
0363         self.assertEqual(re.match(r"((a)\s\2*)", "a aa", re.I).group(1), "a aa")
0364         self.assertEqual(re.match(r"((a)\s(abc|a))", "a a", re.I).group(1), "a a")
0365         self.assertEqual(re.match(r"((a)\s(abc|a)*)", "a aa", re.I).group(1), "a aa")
0366 
0367     def test_category(self):
0368         self.assertEqual(re.match(r"(\s)", " ").group(1), " ")
0369 
0370     def test_getlower(self):
0371         import _sre
0372         self.assertEqual(_sre.getlower(ord('A'), 0), ord('a'))
0373         self.assertEqual(_sre.getlower(ord('A'), re.LOCALE), ord('a'))
0374         self.assertEqual(_sre.getlower(ord('A'), re.UNICODE), ord('a'))
0375 
0376         self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
0377         self.assertEqual(re.match("abc", u"ABC", re.I).group(0), "ABC")
0378 
0379     def test_not_literal(self):
0380         self.assertEqual(re.search("\s([^a])", " b").group(1), "b")
0381         self.assertEqual(re.search("\s([^a]*)", " bb").group(1), "bb")
0382 
0383     def test_search_coverage(self):
0384         self.assertEqual(re.search("\s(b)", " b").group(1), "b")
0385         self.assertEqual(re.search("a\s", "a ").group(0), "a ")
0386 
0387     def test_re_escape(self):
0388         p=""
0389         for i in range(0, 256):
0390             p = p + chr(i)
0391             self.assertEqual(re.match(re.escape(chr(i)), chr(i)) is not None,
0392                              True)
0393             self.assertEqual(re.match(re.escape(chr(i)), chr(i)).span(), (0,1))
0394 
0395         pat=re.compile(re.escape(p))
0396         self.assertEqual(pat.match(p) is not None, True)
0397         self.assertEqual(pat.match(p).span(), (0,256))
0398 
0399     def test_pickling(self):
0400         import pickle
0401         self.pickle_test(pickle)
0402         import cPickle
0403         self.pickle_test(cPickle)
0404 
0405     def pickle_test(self, pickle):
0406         oldpat = re.compile('a(?:b|(c|e){1,2}?|d)+?(.)')
0407         s = pickle.dumps(oldpat)
0408         newpat = pickle.loads(s)
0409         self.assertEqual(oldpat, newpat)
0410 
0411     def test_constants(self):
0412         self.assertEqual(re.I, re.IGNORECASE)
0413         self.assertEqual(re.L, re.LOCALE)
0414         self.assertEqual(re.M, re.MULTILINE)
0415         self.assertEqual(re.S, re.DOTALL)
0416         self.assertEqual(re.X, re.VERBOSE)
0417 
0418     def test_flags(self):
0419         for flag in [re.I, re.M, re.X, re.S, re.L]:
0420             self.assertNotEqual(re.compile('^pattern$', flag), None)
0421 
0422     def test_sre_character_literals(self):
0423         for i in [0, 8, 16, 32, 64, 127, 128, 255]:
0424             self.assertNotEqual(re.match(r"\%03o" % i, chr(i)), None)
0425             self.assertNotEqual(re.match(r"\%03o0" % i, chr(i)+"0"), None)
0426             self.assertNotEqual(re.match(r"\%03o8" % i, chr(i)+"8"), None)
0427             self.assertNotEqual(re.match(r"\x%02x" % i, chr(i)), None)
0428             self.assertNotEqual(re.match(r"\x%02x0" % i, chr(i)+"0"), None)
0429             self.assertNotEqual(re.match(r"\x%02xz" % i, chr(i)+"z"), None)
0430         self.assertRaises(re.error, re.match, "\911", "")
0431 
0432     def test_sre_character_class_literals(self):
0433         for i in [0, 8, 16, 32, 64, 127, 128, 255]:
0434             self.assertNotEqual(re.match(r"[\%03o]" % i, chr(i)), None)
0435             self.assertNotEqual(re.match(r"[\%03o0]" % i, chr(i)), None)
0436             self.assertNotEqual(re.match(r"[\%03o8]" % i, chr(i)), None)
0437             self.assertNotEqual(re.match(r"[\x%02x]" % i, chr(i)), None)
0438             self.assertNotEqual(re.match(r"[\x%02x0]" % i, chr(i)), None)
0439             self.assertNotEqual(re.match(r"[\x%02xz]" % i, chr(i)), None)
0440         self.assertRaises(re.error, re.match, "[\911]", "")
0441 
0442     def test_bug_113254(self):
0443         self.assertEqual(re.match(r'(a)|(b)', 'b').start(1), -1)
0444         self.assertEqual(re.match(r'(a)|(b)', 'b').end(1), -1)
0445         self.assertEqual(re.match(r'(a)|(b)', 'b').span(1), (-1, -1))
0446 
0447     def test_bug_527371(self):
0448         # bug described in patches 527371/672491
0449         self.assertEqual(re.match(r'(a)?a','a').lastindex, None)
0450         self.assertEqual(re.match(r'(a)(b)?b','ab').lastindex, 1)
0451         self.assertEqual(re.match(r'(?P<a>a)(?P<b>b)?b','ab').lastgroup, 'a')
0452         self.assertEqual(re.match("(?P<a>a(b))", "ab").lastgroup, 'a')
0453         self.assertEqual(re.match("((a))", "a").lastindex, 1)
0454 
0455     def test_bug_545855(self):
0456         # bug 545855 -- This pattern failed to cause a compile error as it
0457         # should, instead provoking a TypeError.
0458         self.assertRaises(re.error, re.compile, 'foo[a-')
0459 
0460     def test_bug_418626(self):
0461         # bugs 418626 at al. -- Testing Greg Chapman's addition of op code
0462         # SRE_OP_MIN_REPEAT_ONE for eliminating recursion on simple uses of
0463         # pattern '*?' on a long string.
0464         self.assertEqual(re.match('.*?c', 10000*'ab'+'cd').end(0), 20001)
0465         self.assertEqual(re.match('.*?cd', 5000*'ab'+'c'+5000*'ab'+'cde').end(0),
0466                          20003)
0467         self.assertEqual(re.match('.*?cd', 20000*'abc'+'de').end(0), 60001)
0468         # non-simple '*?' still used to hit the recursion limit, before the
0469         # non-recursive scheme was implemented.
0470         self.assertEqual(re.search('(a|b)*?c', 10000*'ab'+'cd').end(0), 20001)
0471 
0472     def test_bug_612074(self):
0473         pat=u"["+re.escape(u"\u2039")+u"]"
0474         self.assertEqual(re.compile(pat) and 1, 1)
0475 
0476     def test_stack_overflow(self):
0477         # nasty cases that used to overflow the straightforward recursive
0478         # implementation of repeated groups.
0479         self.assertEqual(re.match('(x)*', 50000*'x').group(1), 'x')
0480         self.assertEqual(re.match('(x)*y', 50000*'x'+'y').group(1), 'x')
0481         self.assertEqual(re.match('(x)*?y', 50000*'x'+'y').group(1), 'x')
0482 
0483     def test_scanner(self):
0484         def s_ident(scanner, token): return token
0485         def s_operator(scanner, token): return "op%s" % token
0486         def s_float(scanner, token): return float(token)
0487         def s_int(scanner, token): return int(token)
0488 
0489         scanner = Scanner([
0490             (r"[a-zA-Z_]\w*", s_ident),
0491             (r"\d+\.\d*", s_float),
0492             (r"\d+", s_int),
0493             (r"=|\+|-|\*|/", s_operator),
0494             (r"\s+", None),
0495             ])
0496 
0497         self.assertNotEqual(scanner.scanner.scanner("").pattern, None)
0498 
0499         self.assertEqual(scanner.scan("sum = 3*foo + 312.50 + bar"),
0500                          (['sum', 'op=', 3, 'op*', 'foo', 'op+', 312.5,
0501                            'op+', 'bar'], ''))
0502 
0503     def test_bug_448951(self):
0504         # bug 448951 (similar to 429357, but with single char match)
0505         # (Also test greedy matches.)
0506         for op in '','?','*':
0507             self.assertEqual(re.match(r'((.%s):)?z'%op, 'z').groups(),
0508                              (None, None))
0509             self.assertEqual(re.match(r'((.%s):)?z'%op, 'a:z').groups(),
0510                              ('a:', 'a'))
0511 
0512     def test_bug_725106(self):
0513         # capturing groups in alternatives in repeats
0514         self.assertEqual(re.match('^((a)|b)*', 'abc').groups(),
0515                          ('b', 'a'))
0516         self.assertEqual(re.match('^(([ab])|c)*', 'abc').groups(),
0517                          ('c', 'b'))
0518         self.assertEqual(re.match('^((d)|[ab])*', 'abc').groups(),
0519                          ('b', None))
0520         self.assertEqual(re.match('^((a)c|[ab])*', 'abc').groups(),
0521                          ('b', None))
0522         self.assertEqual(re.match('^((a)|b)*?c', 'abc').groups(),
0523                          ('b', 'a'))
0524         self.assertEqual(re.match('^(([ab])|c)*?d', 'abcd').groups(),
0525                          ('c', 'b'))
0526         self.assertEqual(re.match('^((d)|[ab])*?c', 'abc').groups(),
0527                          ('b', None))
0528         self.assertEqual(re.match('^((a)c|[ab])*?c', 'abc').groups(),
0529                          ('b', None))
0530 
0531     def test_bug_725149(self):
0532         # mark_stack_base restoring before restoring marks
0533         self.assertEqual(re.match('(a)(?:(?=(b)*)c)*', 'abb').groups(),
0534                          ('a', None))
0535         self.assertEqual(re.match('(a)((?!(b)*))*', 'abb').groups(),
0536                          ('a', None, None))
0537 
0538     def test_bug_764548(self):
0539         # bug 764548, re.compile() barfs on str/unicode subclasses
0540         try:
0541             unicode
0542         except NameError:
0543             return  # no problem if we have no unicode
0544         class my_unicode(unicode): pass
0545         pat = re.compile(my_unicode("abc"))
0546         self.assertEqual(pat.match("xyz"), None)
0547 
0548     def test_finditer(self):
0549         iter = re.finditer(r":+", "a:b::c:::d")
0550         self.assertEqual([item.group(0) for item in iter],
0551                          [":", "::", ":::"])
0552 
0553     def test_bug_926075(self):
0554         try:
0555             unicode
0556         except NameError:
0557             return # no problem if we have no unicode
0558         self.assert_(re.compile('bug_926075') is not
0559                      re.compile(eval("u'bug_926075'")))
0560 
0561     def test_bug_931848(self):
0562         try:
0563             unicode
0564         except NameError:
0565             pass
0566         pattern = eval('u"[\u002E\u3002\uFF0E\uFF61]"')
0567         self.assertEqual(re.compile(pattern).split("a.b.c"),
0568                          ['a','b','c'])
0569 
0570     def test_bug_581080(self):
0571         iter = re.finditer(r"\s", "a b")
0572         self.assertEqual(iter.next().span(), (1,2))
0573         self.assertRaises(StopIteration, iter.next)
0574 
0575         scanner = re.compile(r"\s").scanner("a b")
0576         self.assertEqual(scanner.search().span(), (1, 2))
0577         self.assertEqual(scanner.search(), None)
0578 
0579     def test_bug_817234(self):
0580         iter = re.finditer(r".*", "asdf")
0581         self.assertEqual(iter.next().span(), (0, 4))
0582         self.assertEqual(iter.next().span(), (4, 4))
0583         self.assertRaises(StopIteration, iter.next)
0584 
0585 
0586 def run_re_tests():
0587     from test.re_tests import benchmarks, tests, SUCCEED, FAIL, SYNTAX_ERROR
0588     if verbose:
0589         print 'Running re_tests test suite'
0590     else:
0591         # To save time, only run the first and last 10 tests
0592         #tests = tests[:10] + tests[-10:]
0593         pass
0594 
0595     for t in tests:
0596         sys.stdout.flush()
0597         pattern = s = outcome = repl = expected = None
0598         if len(t) == 5:
0599             pattern, s, outcome, repl, expected = t
0600         elif len(t) == 3:
0601             pattern, s, outcome = t
0602         else:
0603             raise ValueError, ('Test tuples should have 3 or 5 fields', t)
0604 
0605         try:
0606             obj = re.compile(pattern)
0607         except re.error:
0608             if outcome == SYNTAX_ERROR: pass  # Expected a syntax error
0609             else:
0610                 print '=== Syntax error:', t
0611         except KeyboardInterrupt: raise KeyboardInterrupt
0612         except:
0613             print '*** Unexpected error ***', t
0614             if verbose:
0615                 traceback.print_exc(file=sys.stdout)
0616         else:
0617             try:
0618                 result = obj.search(s)
0619             except re.error, msg:
0620                 print '=== Unexpected exception', t, repr(msg)
0621             if outcome == SYNTAX_ERROR:
0622                 # This should have been a syntax error; forget it.
0623                 pass
0624             elif outcome == FAIL:
0625                 if result is None: pass   # No match, as expected
0626                 else: print '=== Succeeded incorrectly', t
0627             elif outcome == SUCCEED:
0628                 if result is not None:
0629                     # Matched, as expected, so now we compute the
0630                     # result string and compare it to our expected result.
0631                     start, end = result.span(0)
0632                     vardict={'found': result.group(0),
0633                              'groups': result.group(),
0634                              'flags': result.re.flags}
0635                     for i in range(1, 100):
0636                         try:
0637                             gi = result.group(i)
0638                             # Special hack because else the string concat fails:
0639                             if gi is None:
0640                                 gi = "None"
0641                         except IndexError:
0642                             gi = "Error"
0643                         vardict['g%d' % i] = gi
0644                     for i in result.re.groupindex.keys():
0645                         try:
0646                             gi = result.group(i)
0647                             if gi is None:
0648                                 gi = "None"
0649                         except IndexError:
0650                             gi = "Error"
0651                         vardict[i] = gi
0652                     repl = eval(repl, vardict)
0653                     if repl != expected:
0654                         print '=== grouping error', t,
0655                         print repr(repl) + ' should be ' + repr(expected)
0656                 else:
0657                     print '=== Failed incorrectly', t
0658 
0659                 # Try the match on a unicode string, and check that it
0660                 # still succeeds.
0661                 try:
0662                     result = obj.search(unicode(s, "latin-1"))
0663                     if result is None:
0664                         print '=== Fails on unicode match', t
0665                 except NameError:
0666                     continue # 1.5.2
0667                 except TypeError:
0668                     continue # unicode test case
0669 
0670                 # Try the match on a unicode pattern, and check that it
0671                 # still succeeds.
0672                 obj=re.compile(unicode(pattern, "latin-1"))
0673                 result = obj.search(s)
0674                 if result is None:
0675                     print '=== Fails on unicode pattern match', t
0676 
0677                 # Try the match with the search area limited to the extent
0678                 # of the match and see if it still succeeds.  \B will
0679                 # break (because it won't match at the end or start of a
0680                 # string), so we'll ignore patterns that feature it.
0681 
0682                 if pattern[:2] != '\\B' and pattern[-2:] != '\\B' \
0683                                and result is not None:
0684                     obj = re.compile(pattern)
0685                     result = obj.search(s, result.start(0), result.end(0) + 1)
0686                     if result is None:
0687                         print '=== Failed on range-limited match', t
0688 
0689                 # Try the match with IGNORECASE enabled, and check that it
0690                 # still succeeds.
0691                 obj = re.compile(pattern, re.IGNORECASE)
0692                 result = obj.search(s)
0693                 if result is None:
0694                     print '=== Fails on case-insensitive match', t
0695 
0696                 # Try the match with LOCALE enabled, and check that it
0697                 # still succeeds.
0698                 obj = re.compile(pattern, re.LOCALE)
0699                 result = obj.search(s)
0700                 if result is None:
0701                     print '=== Fails on locale-sensitive match', t
0702 
0703                 # Try the match with UNICODE locale enabled, and check
0704                 # that it still succeeds.
0705                 obj = re.compile(pattern, re.UNICODE)
0706                 result = obj.search(s)
0707                 if result is None:
0708                     print '=== Fails on unicode-sensitive match', t
0709 
0710 def test_main():
0711     run_unittest(ReTests)
0712     run_re_tests()
0713 
0714 if __name__ == "__main__":
0715     test_main()
0716 

Generated by PyXR 0.9.4
SourceForge.net Logo