PyXR

c:\python24\lib \ regsub.py


0001 """Regexp-based split and replace using the obsolete regex module.
0002 
0003 This module is only for backward compatibility.  These operations
0004 are now provided by the new regular expression module, "re".
0005 
0006 sub(pat, repl, str):        replace first occurrence of pattern in string
0007 gsub(pat, repl, str):       replace all occurrences of pattern in string
0008 split(str, pat, maxsplit):  split string using pattern as delimiter
0009 splitx(str, pat, maxsplit): split string using pattern as delimiter plus
0010                             return delimiters
0011 """
0012 
0013 import warnings
0014 warnings.warn("the regsub module is deprecated; please use re.sub()",
0015               DeprecationWarning)
0016 
0017 # Ignore further deprecation warnings about this module
0018 warnings.filterwarnings("ignore", "", DeprecationWarning, __name__)
0019 
0020 import regex
0021 
0022 __all__ = ["sub","gsub","split","splitx","capwords"]
0023 
0024 # Replace first occurrence of pattern pat in string str by replacement
0025 # repl.  If the pattern isn't found, the string is returned unchanged.
0026 # The replacement may contain references \digit to subpatterns and
0027 # escaped backslashes.  The pattern may be a string or an already
0028 # compiled pattern.
0029 
0030 def sub(pat, repl, str):
0031     prog = compile(pat)
0032     if prog.search(str) >= 0:
0033         regs = prog.regs
0034         a, b = regs[0]
0035         str = str[:a] + expand(repl, regs, str) + str[b:]
0036     return str
0037 
0038 
0039 # Replace all (non-overlapping) occurrences of pattern pat in string
0040 # str by replacement repl.  The same rules as for sub() apply.
0041 # Empty matches for the pattern are replaced only when not adjacent to
0042 # a previous match, so e.g. gsub('', '-', 'abc') returns '-a-b-c-'.
0043 
0044 def gsub(pat, repl, str):
0045     prog = compile(pat)
0046     new = ''
0047     start = 0
0048     first = 1
0049     while prog.search(str, start) >= 0:
0050         regs = prog.regs
0051         a, b = regs[0]
0052         if a == b == start and not first:
0053             if start >= len(str) or prog.search(str, start+1) < 0:
0054                 break
0055             regs = prog.regs
0056             a, b = regs[0]
0057         new = new + str[start:a] + expand(repl, regs, str)
0058         start = b
0059         first = 0
0060     new = new + str[start:]
0061     return new
0062 
0063 
0064 # Split string str in fields separated by delimiters matching pattern
0065 # pat.  Only non-empty matches for the pattern are considered, so e.g.
0066 # split('abc', '') returns ['abc'].
0067 # The optional 3rd argument sets the number of splits that are performed.
0068 
0069 def split(str, pat, maxsplit = 0):
0070     return intsplit(str, pat, maxsplit, 0)
0071 
0072 # Split string str in fields separated by delimiters matching pattern
0073 # pat.  Only non-empty matches for the pattern are considered, so e.g.
0074 # split('abc', '') returns ['abc']. The delimiters are also included
0075 # in the list.
0076 # The optional 3rd argument sets the number of splits that are performed.
0077 
0078 
0079 def splitx(str, pat, maxsplit = 0):
0080     return intsplit(str, pat, maxsplit, 1)
0081 
0082 # Internal function used to implement split() and splitx().
0083 
0084 def intsplit(str, pat, maxsplit, retain):
0085     prog = compile(pat)
0086     res = []
0087     start = next = 0
0088     splitcount = 0
0089     while prog.search(str, next) >= 0:
0090         regs = prog.regs
0091         a, b = regs[0]
0092         if a == b:
0093             next = next + 1
0094             if next >= len(str):
0095                 break
0096         else:
0097             res.append(str[start:a])
0098             if retain:
0099                 res.append(str[a:b])
0100             start = next = b
0101             splitcount = splitcount + 1
0102             if (maxsplit and (splitcount >= maxsplit)):
0103                 break
0104     res.append(str[start:])
0105     return res
0106 
0107 
0108 # Capitalize words split using a pattern
0109 
0110 def capwords(str, pat='[^a-zA-Z0-9_]+'):
0111     words = splitx(str, pat)
0112     for i in range(0, len(words), 2):
0113         words[i] = words[i].capitalize()
0114     return "".join(words)
0115 
0116 
0117 # Internal subroutines:
0118 # compile(pat): compile a pattern, caching already compiled patterns
0119 # expand(repl, regs, str): expand \digit escapes in replacement string
0120 
0121 
0122 # Manage a cache of compiled regular expressions.
0123 #
0124 # If the pattern is a string a compiled version of it is returned.  If
0125 # the pattern has been used before we return an already compiled
0126 # version from the cache; otherwise we compile it now and save the
0127 # compiled version in the cache, along with the syntax it was compiled
0128 # with.  Instead of a string, a compiled regular expression can also
0129 # be passed.
0130 
0131 cache = {}
0132 
0133 def compile(pat):
0134     if type(pat) != type(''):
0135         return pat              # Assume it is a compiled regex
0136     key = (pat, regex.get_syntax())
0137     if key in cache:
0138         prog = cache[key]       # Get it from the cache
0139     else:
0140         prog = cache[key] = regex.compile(pat)
0141     return prog
0142 
0143 
0144 def clear_cache():
0145     global cache
0146     cache = {}
0147 
0148 
0149 # Expand \digit in the replacement.
0150 # Each occurrence of \digit is replaced by the substring of str
0151 # indicated by regs[digit].  To include a literal \ in the
0152 # replacement, double it; other \ escapes are left unchanged (i.e.
0153 # the \ and the following character are both copied).
0154 
0155 def expand(repl, regs, str):
0156     if '\\' not in repl:
0157         return repl
0158     new = ''
0159     i = 0
0160     ord0 = ord('0')
0161     while i < len(repl):
0162         c = repl[i]; i = i+1
0163         if c != '\\' or i >= len(repl):
0164             new = new + c
0165         else:
0166             c = repl[i]; i = i+1
0167             if '0' <= c <= '9':
0168                 a, b = regs[ord(c)-ord0]
0169                 new = new + str[a:b]
0170             elif c == '\\':
0171                 new = new + c
0172             else:
0173                 new = new + '\\' + c
0174     return new
0175 
0176 
0177 # Test program, reads sequences "pat repl str" from stdin.
0178 # Optional argument specifies pattern used to split lines.
0179 
0180 def test():
0181     import sys
0182     if sys.argv[1:]:
0183         delpat = sys.argv[1]
0184     else:
0185         delpat = '[ \t\n]+'
0186     while 1:
0187         if sys.stdin.isatty(): sys.stderr.write('--> ')
0188         line = sys.stdin.readline()
0189         if not line: break
0190         if line[-1] == '\n': line = line[:-1]
0191         fields = split(line, delpat)
0192         if len(fields) != 3:
0193             print 'Sorry, not three fields'
0194             print 'split:', repr(fields)
0195             continue
0196         [pat, repl, str] = split(line, delpat)
0197         print 'sub :', repr(sub(pat, repl, str))
0198         print 'gsub:', repr(gsub(pat, repl, str))
0199
Generated by PyXR 0.9.4