PyXR

c:\python24\lib \ sre_parse.py


0001 #
0002 # Secret Labs' Regular Expression Engine
0003 #
0004 # convert re-style regular expression to sre pattern
0005 #
0006 # Copyright (c) 1998-2001 by Secret Labs AB.  All rights reserved.
0007 #
0008 # See the sre.py file for information on usage and redistribution.
0009 #
0010 
0011 """Internal support module for sre"""
0012 
0013 # XXX: show string offset and offending character for all errors
0014 
0015 import sys
0016 
0017 from sre_constants import *
0018 
0019 SPECIAL_CHARS = ".\\[{()*+?^$|"
0020 REPEAT_CHARS = "*+?{"
0021 
0022 DIGITS = tuple("0123456789")
0023 
0024 OCTDIGITS = tuple("01234567")
0025 HEXDIGITS = tuple("0123456789abcdefABCDEF")
0026 
0027 WHITESPACE = tuple(" \t\n\r\v\f")
0028 
0029 ESCAPES = {
0030     r"\a": (LITERAL, ord("\a")),
0031     r"\b": (LITERAL, ord("\b")),
0032     r"\f": (LITERAL, ord("\f")),
0033     r"\n": (LITERAL, ord("\n")),
0034     r"\r": (LITERAL, ord("\r")),
0035     r"\t": (LITERAL, ord("\t")),
0036     r"\v": (LITERAL, ord("\v")),
0037     r"\\": (LITERAL, ord("\\"))
0038 }
0039 
0040 CATEGORIES = {
0041     r"\A": (AT, AT_BEGINNING_STRING), # start of string
0042     r"\b": (AT, AT_BOUNDARY),
0043     r"\B": (AT, AT_NON_BOUNDARY),
0044     r"\d": (IN, [(CATEGORY, CATEGORY_DIGIT)]),
0045     r"\D": (IN, [(CATEGORY, CATEGORY_NOT_DIGIT)]),
0046     r"\s": (IN, [(CATEGORY, CATEGORY_SPACE)]),
0047     r"\S": (IN, [(CATEGORY, CATEGORY_NOT_SPACE)]),
0048     r"\w": (IN, [(CATEGORY, CATEGORY_WORD)]),
0049     r"\W": (IN, [(CATEGORY, CATEGORY_NOT_WORD)]),
0050     r"\Z": (AT, AT_END_STRING), # end of string
0051 }
0052 
0053 FLAGS = {
0054     # standard flags
0055     "i": SRE_FLAG_IGNORECASE,
0056     "L": SRE_FLAG_LOCALE,
0057     "m": SRE_FLAG_MULTILINE,
0058     "s": SRE_FLAG_DOTALL,
0059     "x": SRE_FLAG_VERBOSE,
0060     # extensions
0061     "t": SRE_FLAG_TEMPLATE,
0062     "u": SRE_FLAG_UNICODE,
0063 }
0064 
0065 class Pattern:
0066     # master pattern object.  keeps track of global attributes
0067     def __init__(self):
0068         self.flags = 0
0069         self.open = []
0070         self.groups = 1
0071         self.groupdict = {}
0072     def opengroup(self, name=None):
0073         gid = self.groups
0074         self.groups = gid + 1
0075         if name is not None:
0076             ogid = self.groupdict.get(name, None)
0077             if ogid is not None:
0078                 raise error, ("redefinition of group name %s as group %d; "
0079                               "was group %d" % (repr(name), gid,  ogid))
0080             self.groupdict[name] = gid
0081         self.open.append(gid)
0082         return gid
0083     def closegroup(self, gid):
0084         self.open.remove(gid)
0085     def checkgroup(self, gid):
0086         return gid < self.groups and gid not in self.open
0087 
0088 class SubPattern:
0089     # a subpattern, in intermediate form
0090     def __init__(self, pattern, data=None):
0091         self.pattern = pattern
0092         if data is None:
0093             data = []
0094         self.data = data
0095         self.width = None
0096     def dump(self, level=0):
0097         nl = 1
0098         seqtypes = type(()), type([])
0099         for op, av in self.data:
0100             print level*"  " + op,; nl = 0
0101             if op == "in":
0102                 # member sublanguage
0103                 print; nl = 1
0104                 for op, a in av:
0105                     print (level+1)*"  " + op, a
0106             elif op == "branch":
0107                 print; nl = 1
0108                 i = 0
0109                 for a in av[1]:
0110                     if i > 0:
0111                         print level*"  " + "or"
0112                     a.dump(level+1); nl = 1
0113                     i = i + 1
0114             elif type(av) in seqtypes:
0115                 for a in av:
0116                     if isinstance(a, SubPattern):
0117                         if not nl: print
0118                         a.dump(level+1); nl = 1
0119                     else:
0120                         print a, ; nl = 0
0121             else:
0122                 print av, ; nl = 0
0123             if not nl: print
0124     def __repr__(self):
0125         return repr(self.data)
0126     def __len__(self):
0127         return len(self.data)
0128     def __delitem__(self, index):
0129         del self.data[index]
0130     def __getitem__(self, index):
0131         return self.data[index]
0132     def __setitem__(self, index, code):
0133         self.data[index] = code
0134     def __getslice__(self, start, stop):
0135         return SubPattern(self.pattern, self.data[start:stop])
0136     def insert(self, index, code):
0137         self.data.insert(index, code)
0138     def append(self, code):
0139         self.data.append(code)
0140     def getwidth(self):
0141         # determine the width (min, max) for this subpattern
0142         if self.width:
0143             return self.width
0144         lo = hi = 0L
0145         UNITCODES = (ANY, RANGE, IN, LITERAL, NOT_LITERAL, CATEGORY)
0146         REPEATCODES = (MIN_REPEAT, MAX_REPEAT)
0147         for op, av in self.data:
0148             if op is BRANCH:
0149                 i = sys.maxint
0150                 j = 0
0151                 for av in av[1]:
0152                     l, h = av.getwidth()
0153                     i = min(i, l)
0154                     j = max(j, h)
0155                 lo = lo + i
0156                 hi = hi + j
0157             elif op is CALL:
0158                 i, j = av.getwidth()
0159                 lo = lo + i
0160                 hi = hi + j
0161             elif op is SUBPATTERN:
0162                 i, j = av[1].getwidth()
0163                 lo = lo + i
0164                 hi = hi + j
0165             elif op in REPEATCODES:
0166                 i, j = av[2].getwidth()
0167                 lo = lo + long(i) * av[0]
0168                 hi = hi + long(j) * av[1]
0169             elif op in UNITCODES:
0170                 lo = lo + 1
0171                 hi = hi + 1
0172             elif op == SUCCESS:
0173                 break
0174         self.width = int(min(lo, sys.maxint)), int(min(hi, sys.maxint))
0175         return self.width
0176 
0177 class Tokenizer:
0178     def __init__(self, string):
0179         self.string = string
0180         self.index = 0
0181         self.__next()
0182     def __next(self):
0183         if self.index >= len(self.string):
0184             self.next = None
0185             return
0186         char = self.string[self.index]
0187         if char[0] == "\\":
0188             try:
0189                 c = self.string[self.index + 1]
0190             except IndexError:
0191                 raise error, "bogus escape (end of line)"
0192             char = char + c
0193         self.index = self.index + len(char)
0194         self.next = char
0195     def match(self, char, skip=1):
0196         if char == self.next:
0197             if skip:
0198                 self.__next()
0199             return 1
0200         return 0
0201     def get(self):
0202         this = self.next
0203         self.__next()
0204         return this
0205     def tell(self):
0206         return self.index, self.next
0207     def seek(self, index):
0208         self.index, self.next = index
0209 
0210 def isident(char):
0211     return "a" <= char <= "z" or "A" <= char <= "Z" or char == "_"
0212 
0213 def isdigit(char):
0214     return "0" <= char <= "9"
0215 
0216 def isname(name):
0217     # check that group name is a valid string
0218     if not isident(name[0]):
0219         return False
0220     for char in name[1:]:
0221         if not isident(char) and not isdigit(char):
0222             return False
0223     return True
0224 
0225 def _class_escape(source, escape):
0226     # handle escape code inside character class
0227     code = ESCAPES.get(escape)
0228     if code:
0229         return code
0230     code = CATEGORIES.get(escape)
0231     if code:
0232         return code
0233     try:
0234         c = escape[1:2]
0235         if c == "x":
0236             # hexadecimal escape (exactly two digits)
0237             while source.next in HEXDIGITS and len(escape) < 4:
0238                 escape = escape + source.get()
0239             escape = escape[2:]
0240             if len(escape) != 2:
0241                 raise error, "bogus escape: %s" % repr("\\" + escape)
0242             return LITERAL, int(escape, 16) & 0xff
0243         elif c in OCTDIGITS:
0244             # octal escape (up to three digits)
0245             while source.next in OCTDIGITS and len(escape) < 4:
0246                 escape = escape + source.get()
0247             escape = escape[1:]
0248             return LITERAL, int(escape, 8) & 0xff
0249         elif c in DIGITS:
0250             raise error, "bogus escape: %s" % repr(escape)
0251         if len(escape) == 2:
0252             return LITERAL, ord(escape[1])
0253     except ValueError:
0254         pass
0255     raise error, "bogus escape: %s" % repr(escape)
0256 
0257 def _escape(source, escape, state):
0258     # handle escape code in expression
0259     code = CATEGORIES.get(escape)
0260     if code:
0261         return code
0262     code = ESCAPES.get(escape)
0263     if code:
0264         return code
0265     try:
0266         c = escape[1:2]
0267         if c == "x":
0268             # hexadecimal escape
0269             while source.next in HEXDIGITS and len(escape) < 4:
0270                 escape = escape + source.get()
0271             if len(escape) != 4:
0272                 raise ValueError
0273             return LITERAL, int(escape[2:], 16) & 0xff
0274         elif c == "0":
0275             # octal escape
0276             while source.next in OCTDIGITS and len(escape) < 4:
0277                 escape = escape + source.get()
0278             return LITERAL, int(escape[1:], 8) & 0xff
0279         elif c in DIGITS:
0280             # octal escape *or* decimal group reference (sigh)
0281             if source.next in DIGITS:
0282                 escape = escape + source.get()
0283                 if (escape[1] in OCTDIGITS and escape[2] in OCTDIGITS and
0284                     source.next in OCTDIGITS):
0285                     # got three octal digits; this is an octal escape
0286                     escape = escape + source.get()
0287                     return LITERAL, int(escape[1:], 8) & 0xff
0288             # not an octal escape, so this is a group reference
0289             group = int(escape[1:])
0290             if group < state.groups:
0291                 if not state.checkgroup(group):
0292                     raise error, "cannot refer to open group"
0293                 return GROUPREF, group
0294             raise ValueError
0295         if len(escape) == 2:
0296             return LITERAL, ord(escape[1])
0297     except ValueError:
0298         pass
0299     raise error, "bogus escape: %s" % repr(escape)
0300 
0301 def _parse_sub(source, state, nested=1):
0302     # parse an alternation: a|b|c
0303 
0304     items = []
0305     itemsappend = items.append
0306     sourcematch = source.match
0307     while 1:
0308         itemsappend(_parse(source, state))
0309         if sourcematch("|"):
0310             continue
0311         if not nested:
0312             break
0313         if not source.next or sourcematch(")", 0):
0314             break
0315         else:
0316             raise error, "pattern not properly closed"
0317 
0318     if len(items) == 1:
0319         return items[0]
0320 
0321     subpattern = SubPattern(state)
0322     subpatternappend = subpattern.append
0323 
0324     # check if all items share a common prefix
0325     while 1:
0326         prefix = None
0327         for item in items:
0328             if not item:
0329                 break
0330             if prefix is None:
0331                 prefix = item[0]
0332             elif item[0] != prefix:
0333                 break
0334         else:
0335             # all subitems start with a common "prefix".
0336             # move it out of the branch
0337             for item in items:
0338                 del item[0]
0339             subpatternappend(prefix)
0340             continue # check next one
0341         break
0342 
0343     # check if the branch can be replaced by a character set
0344     for item in items:
0345         if len(item) != 1 or item[0][0] != LITERAL:
0346             break
0347     else:
0348         # we can store this as a character set instead of a
0349         # branch (the compiler may optimize this even more)
0350         set = []
0351         setappend = set.append
0352         for item in items:
0353             setappend(item[0])
0354         subpatternappend((IN, set))
0355         return subpattern
0356 
0357     subpattern.append((BRANCH, (None, items)))
0358     return subpattern
0359 
0360 def _parse_sub_cond(source, state, condgroup):
0361     item_yes = _parse(source, state)
0362     if source.match("|"):
0363         item_no = _parse(source, state)
0364         if source.match("|"):
0365             raise error, "conditional backref with more than two branches"
0366     else:
0367         item_no = None
0368     if source.next and not source.match(")", 0):
0369         raise error, "pattern not properly closed"
0370     subpattern = SubPattern(state)
0371     subpattern.append((GROUPREF_EXISTS, (condgroup, item_yes, item_no)))
0372     return subpattern
0373 
0374 def _parse(source, state):
0375     # parse a simple pattern
0376     subpattern = SubPattern(state)
0377 
0378     # precompute constants into local variables
0379     subpatternappend = subpattern.append
0380     sourceget = source.get
0381     sourcematch = source.match
0382     _len = len
0383     PATTERNENDERS = ("|", ")")
0384     ASSERTCHARS = ("=", "!", "<")
0385     LOOKBEHINDASSERTCHARS = ("=", "!")
0386     REPEATCODES = (MIN_REPEAT, MAX_REPEAT)
0387 
0388     while 1:
0389 
0390         if source.next in PATTERNENDERS:
0391             break # end of subpattern
0392         this = sourceget()
0393         if this is None:
0394             break # end of pattern
0395 
0396         if state.flags & SRE_FLAG_VERBOSE:
0397             # skip whitespace and comments
0398             if this in WHITESPACE:
0399                 continue
0400             if this == "#":
0401                 while 1:
0402                     this = sourceget()
0403                     if this in (None, "\n"):
0404                         break
0405                 continue
0406 
0407         if this and this[0] not in SPECIAL_CHARS:
0408             subpatternappend((LITERAL, ord(this)))
0409 
0410         elif this == "[":
0411             # character set
0412             set = []
0413             setappend = set.append
0414 ##          if sourcematch(":"):
0415 ##              pass # handle character classes
0416             if sourcematch("^"):
0417                 setappend((NEGATE, None))
0418             # check remaining characters
0419             start = set[:]
0420             while 1:
0421                 this = sourceget()
0422                 if this == "]" and set != start:
0423                     break
0424                 elif this and this[0] == "\\":
0425                     code1 = _class_escape(source, this)
0426                 elif this:
0427                     code1 = LITERAL, ord(this)
0428                 else:
0429                     raise error, "unexpected end of regular expression"
0430                 if sourcematch("-"):
0431                     # potential range
0432                     this = sourceget()
0433                     if this == "]":
0434                         if code1[0] is IN:
0435                             code1 = code1[1][0]
0436                         setappend(code1)
0437                         setappend((LITERAL, ord("-")))
0438                         break
0439                     elif this:
0440                         if this[0] == "\\":
0441                             code2 = _class_escape(source, this)
0442                         else:
0443                             code2 = LITERAL, ord(this)
0444                         if code1[0] != LITERAL or code2[0] != LITERAL:
0445                             raise error, "bad character range"
0446                         lo = code1[1]
0447                         hi = code2[1]
0448                         if hi < lo:
0449                             raise error, "bad character range"
0450                         setappend((RANGE, (lo, hi)))
0451                     else:
0452                         raise error, "unexpected end of regular expression"
0453                 else:
0454                     if code1[0] is IN:
0455                         code1 = code1[1][0]
0456                     setappend(code1)
0457 
0458             # XXX: <fl> should move set optimization to compiler!
0459             if _len(set)==1 and set[0][0] is LITERAL:
0460                 subpatternappend(set[0]) # optimization
0461             elif _len(set)==2 and set[0][0] is NEGATE and set[1][0] is LITERAL:
0462                 subpatternappend((NOT_LITERAL, set[1][1])) # optimization
0463             else:
0464                 # XXX: <fl> should add charmap optimization here
0465                 subpatternappend((IN, set))
0466 
0467         elif this and this[0] in REPEAT_CHARS:
0468             # repeat previous item
0469             if this == "?":
0470                 min, max = 0, 1
0471             elif this == "*":
0472                 min, max = 0, MAXREPEAT
0473 
0474             elif this == "+":
0475                 min, max = 1, MAXREPEAT
0476             elif this == "{":
0477                 here = source.tell()
0478                 min, max = 0, MAXREPEAT
0479                 lo = hi = ""
0480                 while source.next in DIGITS:
0481                     lo = lo + source.get()
0482                 if sourcematch(","):
0483                     while source.next in DIGITS:
0484                         hi = hi + sourceget()
0485                 else:
0486                     hi = lo
0487                 if not sourcematch("}"):
0488                     subpatternappend((LITERAL, ord(this)))
0489                     source.seek(here)
0490                     continue
0491                 if lo:
0492                     min = int(lo)
0493                 if hi:
0494                     max = int(hi)
0495                 if max < min:
0496                     raise error, "bad repeat interval"
0497             else:
0498                 raise error, "not supported"
0499             # figure out which item to repeat
0500             if subpattern:
0501                 item = subpattern[-1:]
0502             else:
0503                 item = None
0504             if not item or (_len(item) == 1 and item[0][0] == AT):
0505                 raise error, "nothing to repeat"
0506             if item[0][0] in REPEATCODES:
0507                 raise error, "multiple repeat"
0508             if sourcematch("?"):
0509                 subpattern[-1] = (MIN_REPEAT, (min, max, item))
0510             else:
0511                 subpattern[-1] = (MAX_REPEAT, (min, max, item))
0512 
0513         elif this == ".":
0514             subpatternappend((ANY, None))
0515 
0516         elif this == "(":
0517             group = 1
0518             name = None
0519             condgroup = None
0520             if sourcematch("?"):
0521                 group = 0
0522                 # options
0523                 if sourcematch("P"):
0524                     # python extensions
0525                     if sourcematch("<"):
0526                         # named group: skip forward to end of name
0527                         name = ""
0528                         while 1:
0529                             char = sourceget()
0530                             if char is None:
0531                                 raise error, "unterminated name"
0532                             if char == ">":
0533                                 break
0534                             name = name + char
0535                         group = 1
0536                         if not isname(name):
0537                             raise error, "bad character in group name"
0538                     elif sourcematch("="):
0539                         # named backreference
0540                         name = ""
0541                         while 1:
0542                             char = sourceget()
0543                             if char is None:
0544                                 raise error, "unterminated name"
0545                             if char == ")":
0546                                 break
0547                             name = name + char
0548                         if not isname(name):
0549                             raise error, "bad character in group name"
0550                         gid = state.groupdict.get(name)
0551                         if gid is None:
0552                             raise error, "unknown group name"
0553                         subpatternappend((GROUPREF, gid))
0554                         continue
0555                     else:
0556                         char = sourceget()
0557                         if char is None:
0558                             raise error, "unexpected end of pattern"
0559                         raise error, "unknown specifier: ?P%s" % char
0560                 elif sourcematch(":"):
0561                     # non-capturing group
0562                     group = 2
0563                 elif sourcematch("#"):
0564                     # comment
0565                     while 1:
0566                         if source.next is None or source.next == ")":
0567                             break
0568                         sourceget()
0569                     if not sourcematch(")"):
0570                         raise error, "unbalanced parenthesis"
0571                     continue
0572                 elif source.next in ASSERTCHARS:
0573                     # lookahead assertions
0574                     char = sourceget()
0575                     dir = 1
0576                     if char == "<":
0577                         if source.next not in LOOKBEHINDASSERTCHARS:
0578                             raise error, "syntax error"
0579                         dir = -1 # lookbehind
0580                         char = sourceget()
0581                     p = _parse_sub(source, state)
0582                     if not sourcematch(")"):
0583                         raise error, "unbalanced parenthesis"
0584                     if char == "=":
0585                         subpatternappend((ASSERT, (dir, p)))
0586                     else:
0587                         subpatternappend((ASSERT_NOT, (dir, p)))
0588                     continue
0589                 elif sourcematch("("):
0590                     # conditional backreference group
0591                     condname = ""
0592                     while 1:
0593                         char = sourceget()
0594                         if char is None:
0595                             raise error, "unterminated name"
0596                         if char == ")":
0597                             break
0598                         condname = condname + char
0599                     group = 2
0600                     if isname(condname):
0601                         condgroup = state.groupdict.get(condname)
0602                         if condgroup is None:
0603                             raise error, "unknown group name"
0604                     else:
0605                         try:
0606                             condgroup = int(condname)
0607                         except ValueError:
0608                             raise error, "bad character in group name"
0609                 else:
0610                     # flags
0611                     if not source.next in FLAGS:
0612                         raise error, "unexpected end of pattern"
0613                     while source.next in FLAGS:
0614                         state.flags = state.flags | FLAGS[sourceget()]
0615             if group:
0616                 # parse group contents
0617                 if group == 2:
0618                     # anonymous group
0619                     group = None
0620                 else:
0621                     group = state.opengroup(name)
0622                 if condgroup:
0623                     p = _parse_sub_cond(source, state, condgroup)
0624                 else:
0625                     p = _parse_sub(source, state)
0626                 if not sourcematch(")"):
0627                     raise error, "unbalanced parenthesis"
0628                 if group is not None:
0629                     state.closegroup(group)
0630                 subpatternappend((SUBPATTERN, (group, p)))
0631             else:
0632                 while 1:
0633                     char = sourceget()
0634                     if char is None:
0635                         raise error, "unexpected end of pattern"
0636                     if char == ")":
0637                         break
0638                     raise error, "unknown extension"
0639 
0640         elif this == "^":
0641             subpatternappend((AT, AT_BEGINNING))
0642 
0643         elif this == "$":
0644             subpattern.append((AT, AT_END))
0645 
0646         elif this and this[0] == "\\":
0647             code = _escape(source, this, state)
0648             subpatternappend(code)
0649 
0650         else:
0651             raise error, "parser error"
0652 
0653     return subpattern
0654 
0655 def parse(str, flags=0, pattern=None):
0656     # parse 're' pattern into list of (opcode, argument) tuples
0657 
0658     source = Tokenizer(str)
0659 
0660     if pattern is None:
0661         pattern = Pattern()
0662     pattern.flags = flags
0663     pattern.str = str
0664 
0665     p = _parse_sub(source, pattern, 0)
0666 
0667     tail = source.get()
0668     if tail == ")":
0669         raise error, "unbalanced parenthesis"
0670     elif tail:
0671         raise error, "bogus characters at end of regular expression"
0672 
0673     if flags & SRE_FLAG_DEBUG:
0674         p.dump()
0675 
0676     if not (flags & SRE_FLAG_VERBOSE) and p.pattern.flags & SRE_FLAG_VERBOSE:
0677         # the VERBOSE flag was switched on inside the pattern.  to be
0678         # on the safe side, we'll parse the whole thing again...
0679         return parse(str, p.pattern.flags)
0680 
0681     return p
0682 
0683 def parse_template(source, pattern):
0684     # parse 're' replacement string into list of literals and
0685     # group references
0686     s = Tokenizer(source)
0687     sget = s.get
0688     p = []
0689     a = p.append
0690     def literal(literal, p=p, pappend=a):
0691         if p and p[-1][0] is LITERAL:
0692             p[-1] = LITERAL, p[-1][1] + literal
0693         else:
0694             pappend((LITERAL, literal))
0695     sep = source[:0]
0696     if type(sep) is type(""):
0697         makechar = chr
0698     else:
0699         makechar = unichr
0700     while 1:
0701         this = sget()
0702         if this is None:
0703             break # end of replacement string
0704         if this and this[0] == "\\":
0705             # group
0706             c = this[1:2]
0707             if c == "g":
0708                 name = ""
0709                 if s.match("<"):
0710                     while 1:
0711                         char = sget()
0712                         if char is None:
0713                             raise error, "unterminated group name"
0714                         if char == ">":
0715                             break
0716                         name = name + char
0717                 if not name:
0718                     raise error, "bad group name"
0719                 try:
0720                     index = int(name)
0721                     if index < 0:
0722                         raise error, "negative group number"
0723                 except ValueError:
0724                     if not isname(name):
0725                         raise error, "bad character in group name"
0726                     try:
0727                         index = pattern.groupindex[name]
0728                     except KeyError:
0729                         raise IndexError, "unknown group name"
0730                 a((MARK, index))
0731             elif c == "0":
0732                 if s.next in OCTDIGITS:
0733                     this = this + sget()
0734                     if s.next in OCTDIGITS:
0735                         this = this + sget()
0736                 literal(makechar(int(this[1:], 8) & 0xff))
0737             elif c in DIGITS:
0738                 isoctal = False
0739                 if s.next in DIGITS:
0740                     this = this + sget()
0741                     if (c in OCTDIGITS and this[2] in OCTDIGITS and
0742                         s.next in OCTDIGITS):
0743                         this = this + sget()
0744                         isoctal = True
0745                         literal(makechar(int(this[1:], 8) & 0xff))
0746                 if not isoctal:
0747                     a((MARK, int(this[1:])))
0748             else:
0749                 try:
0750                     this = makechar(ESCAPES[this][1])
0751                 except KeyError:
0752                     pass
0753                 literal(this)
0754         else:
0755             literal(this)
0756     # convert template to groups and literals lists
0757     i = 0
0758     groups = []
0759     groupsappend = groups.append
0760     literals = [None] * len(p)
0761     for c, s in p:
0762         if c is MARK:
0763             groupsappend((i, s))
0764             # literal[i] is already None
0765         else:
0766             literals[i] = s
0767         i = i + 1
0768     return groups, literals
0769 
0770 def expand_template(template, match):
0771     g = match.group
0772     sep = match.string[:0]
0773     groups, literals = template
0774     literals = literals[:]
0775     try:
0776         for index, group in groups:
0777             literals[index] = s = g(group)
0778             if s is None:
0779                 raise error, "unmatched group"
0780     except IndexError:
0781         raise error, "invalid group reference"
0782     return sep.join(literals)
0783
Generated by PyXR 0.9.4