0001 """Regexp-based split and replace using the obsolete regex module. 0002 0003 This module is only for backward compatibility. These operations 0004 are now provided by the new regular expression module, "re". 0005 0006 sub(pat, repl, str): replace first occurrence of pattern in string 0007 gsub(pat, repl, str): replace all occurrences of pattern in string 0008 split(str, pat, maxsplit): split string using pattern as delimiter 0009 splitx(str, pat, maxsplit): split string using pattern as delimiter plus 0010 return delimiters 0011 """ 0012 0013 import warnings 0014 warnings.warn("the regsub module is deprecated; please use re.sub()", 0015 DeprecationWarning) 0016 0017 # Ignore further deprecation warnings about this module 0018 warnings.filterwarnings("ignore", "", DeprecationWarning, __name__) 0019 0020 import regex 0021 0022 __all__ = ["sub","gsub","split","splitx","capwords"] 0023 0024 # Replace first occurrence of pattern pat in string str by replacement 0025 # repl. If the pattern isn't found, the string is returned unchanged. 0026 # The replacement may contain references \digit to subpatterns and 0027 # escaped backslashes. The pattern may be a string or an already 0028 # compiled pattern. 0029 0030 def sub(pat, repl, str): 0031 prog = compile(pat) 0032 if prog.search(str) >= 0: 0033 regs = prog.regs 0034 a, b = regs[0] 0035 str = str[:a] + expand(repl, regs, str) + str[b:] 0036 return str 0037 0038 0039 # Replace all (non-overlapping) occurrences of pattern pat in string 0040 # str by replacement repl. The same rules as for sub() apply. 0041 # Empty matches for the pattern are replaced only when not adjacent to 0042 # a previous match, so e.g. gsub('', '-', 'abc') returns '-a-b-c-'. 0043 0044 def gsub(pat, repl, str): 0045 prog = compile(pat) 0046 new = '' 0047 start = 0 0048 first = 1 0049 while prog.search(str, start) >= 0: 0050 regs = prog.regs 0051 a, b = regs[0] 0052 if a == b == start and not first: 0053 if start >= len(str) or prog.search(str, start+1) < 0: 0054 break 0055 regs = prog.regs 0056 a, b = regs[0] 0057 new = new + str[start:a] + expand(repl, regs, str) 0058 start = b 0059 first = 0 0060 new = new + str[start:] 0061 return new 0062 0063 0064 # Split string str in fields separated by delimiters matching pattern 0065 # pat. Only non-empty matches for the pattern are considered, so e.g. 0066 # split('abc', '') returns ['abc']. 0067 # The optional 3rd argument sets the number of splits that are performed. 0068 0069 def split(str, pat, maxsplit = 0): 0070 return intsplit(str, pat, maxsplit, 0) 0071 0072 # Split string str in fields separated by delimiters matching pattern 0073 # pat. Only non-empty matches for the pattern are considered, so e.g. 0074 # split('abc', '') returns ['abc']. The delimiters are also included 0075 # in the list. 0076 # The optional 3rd argument sets the number of splits that are performed. 0077 0078 0079 def splitx(str, pat, maxsplit = 0): 0080 return intsplit(str, pat, maxsplit, 1) 0081 0082 # Internal function used to implement split() and splitx(). 0083 0084 def intsplit(str, pat, maxsplit, retain): 0085 prog = compile(pat) 0086 res = [] 0087 start = next = 0 0088 splitcount = 0 0089 while prog.search(str, next) >= 0: 0090 regs = prog.regs 0091 a, b = regs[0] 0092 if a == b: 0093 next = next + 1 0094 if next >= len(str): 0095 break 0096 else: 0097 res.append(str[start:a]) 0098 if retain: 0099 res.append(str[a:b]) 0100 start = next = b 0101 splitcount = splitcount + 1 0102 if (maxsplit and (splitcount >= maxsplit)): 0103 break 0104 res.append(str[start:]) 0105 return res 0106 0107 0108 # Capitalize words split using a pattern 0109 0110 def capwords(str, pat='[^a-zA-Z0-9_]+'): 0111 words = splitx(str, pat) 0112 for i in range(0, len(words), 2): 0113 words[i] = words[i].capitalize() 0114 return "".join(words) 0115 0116 0117 # Internal subroutines: 0118 # compile(pat): compile a pattern, caching already compiled patterns 0119 # expand(repl, regs, str): expand \digit escapes in replacement string 0120 0121 0122 # Manage a cache of compiled regular expressions. 0123 # 0124 # If the pattern is a string a compiled version of it is returned. If 0125 # the pattern has been used before we return an already compiled 0126 # version from the cache; otherwise we compile it now and save the 0127 # compiled version in the cache, along with the syntax it was compiled 0128 # with. Instead of a string, a compiled regular expression can also 0129 # be passed. 0130 0131 cache = {} 0132 0133 def compile(pat): 0134 if type(pat) != type(''): 0135 return pat # Assume it is a compiled regex 0136 key = (pat, regex.get_syntax()) 0137 if key in cache: 0138 prog = cache[key] # Get it from the cache 0139 else: 0140 prog = cache[key] = regex.compile(pat) 0141 return prog 0142 0143 0144 def clear_cache(): 0145 global cache 0146 cache = {} 0147 0148 0149 # Expand \digit in the replacement. 0150 # Each occurrence of \digit is replaced by the substring of str 0151 # indicated by regs[digit]. To include a literal \ in the 0152 # replacement, double it; other \ escapes are left unchanged (i.e. 0153 # the \ and the following character are both copied). 0154 0155 def expand(repl, regs, str): 0156 if '\\' not in repl: 0157 return repl 0158 new = '' 0159 i = 0 0160 ord0 = ord('0') 0161 while i < len(repl): 0162 c = repl[i]; i = i+1 0163 if c != '\\' or i >= len(repl): 0164 new = new + c 0165 else: 0166 c = repl[i]; i = i+1 0167 if '0' <= c <= '9': 0168 a, b = regs[ord(c)-ord0] 0169 new = new + str[a:b] 0170 elif c == '\\': 0171 new = new + c 0172 else: 0173 new = new + '\\' + c 0174 return new 0175 0176 0177 # Test program, reads sequences "pat repl str" from stdin. 0178 # Optional argument specifies pattern used to split lines. 0179 0180 def test(): 0181 import sys 0182 if sys.argv[1:]: 0183 delpat = sys.argv[1] 0184 else: 0185 delpat = '[ \t\n]+' 0186 while 1: 0187 if sys.stdin.isatty(): sys.stderr.write('--> ') 0188 line = sys.stdin.readline() 0189 if not line: break 0190 if line[-1] == '\n': line = line[:-1] 0191 fields = split(line, delpat) 0192 if len(fields) != 3: 0193 print 'Sorry, not three fields' 0194 print 'split:', repr(fields) 0195 continue 0196 [pat, repl, str] = split(line, delpat) 0197 print 'sub :', repr(sub(pat, repl, str)) 0198 print 'gsub:', repr(gsub(pat, repl, str)) 0199
Generated by PyXR 0.9.4