PyXR

c:\python24\lib \ reconvert.py



0001 #! /usr/bin/env python
0002 
0003 r"""Convert old ("regex") regular expressions to new syntax ("re").
0004 
0005 When imported as a module, there are two functions, with their own
0006 strings:
0007 
0008   convert(s, syntax=None) -- convert a regex regular expression to re syntax
0009 
0010   quote(s) -- return a quoted string literal
0011 
0012 When used as a script, read a Python string literal (or any other
0013 expression evaluating to a string) from stdin, and write the
0014 translated expression to stdout as a string literal.  Unless stdout is
0015 a tty, no trailing \n is written to stdout.  This is done so that it
0016 can be used with Emacs C-U M-| (shell-command-on-region with argument
0017 which filters the region through the shell command).
0018 
0019 No attempt has been made at coding for performance.
0020 
0021 Translation table...
0022 
0023     \(    (     (unless RE_NO_BK_PARENS set)
0024     \)    )     (unless RE_NO_BK_PARENS set)
0025     \|    |     (unless RE_NO_BK_VBAR set)
0026     \<    \b    (not quite the same, but alla...)
0027     \>    \b    (not quite the same, but alla...)
0028     \`    \A
0029     \'    \Z
0030 
0031 Not translated...
0032 
0033     .
0034     ^
0035     $
0036     *
0037     +           (unless RE_BK_PLUS_QM set, then to \+)
0038     ?           (unless RE_BK_PLUS_QM set, then to \?)
0039     \
0040     \b
0041     \B
0042     \w
0043     \W
0044     \1 ... \9
0045 
0046 Special cases...
0047 
0048     Non-printable characters are always replaced by their 3-digit
0049     escape code (except \t, \n, \r, which use mnemonic escapes)
0050 
0051     Newline is turned into | when RE_NEWLINE_OR is set
0052 
0053 XXX To be done...
0054 
0055     [...]     (different treatment of backslashed items?)
0056     [^...]    (different treatment of backslashed items?)
0057     ^ $ * + ? (in some error contexts these are probably treated differently)
0058     \vDD  \DD (in the regex docs but only works when RE_ANSI_HEX set)
0059 
0060 """
0061 
0062 
0063 import warnings
0064 warnings.filterwarnings("ignore", ".* regex .*", DeprecationWarning, __name__,
0065                         append=1)
0066 
0067 import regex
0068 from regex_syntax import * # RE_*
0069 
0070 __all__ = ["convert","quote"]
0071 
0072 # Default translation table
0073 mastertable = {
0074     r'\<': r'\b',
0075     r'\>': r'\b',
0076     r'\`': r'\A',
0077     r'\'': r'\Z',
0078     r'\(': '(',
0079     r'\)': ')',
0080     r'\|': '|',
0081     '(': r'\(',
0082     ')': r'\)',
0083     '|': r'\|',
0084     '\t': r'\t',
0085     '\n': r'\n',
0086     '\r': r'\r',
0087 }
0088 
0089 
0090 def convert(s, syntax=None):
0091     """Convert a regex regular expression to re syntax.
0092 
0093     The first argument is the regular expression, as a string object,
0094     just like it would be passed to regex.compile().  (I.e., pass the
0095     actual string object -- string quotes must already have been
0096     removed and the standard escape processing has already been done,
0097     e.g. by eval().)
0098 
0099     The optional second argument is the regex syntax variant to be
0100     used.  This is an integer mask as passed to regex.set_syntax();
0101     the flag bits are defined in regex_syntax.  When not specified, or
0102     when None is given, the current regex syntax mask (as retrieved by
0103     regex.get_syntax()) is used -- which is 0 by default.
0104 
0105     The return value is a regular expression, as a string object that
0106     could be passed to re.compile().  (I.e., no string quotes have
0107     been added -- use quote() below, or repr().)
0108 
0109     The conversion is not always guaranteed to be correct.  More
0110     syntactical analysis should be performed to detect borderline
0111     cases and decide what to do with them.  For example, 'x*?' is not
0112     translated correctly.
0113 
0114     """
0115     table = mastertable.copy()
0116     if syntax is None:
0117         syntax = regex.get_syntax()
0118     if syntax & RE_NO_BK_PARENS:
0119         del table[r'\('], table[r'\)']
0120         del table['('], table[')']
0121     if syntax & RE_NO_BK_VBAR:
0122         del table[r'\|']
0123         del table['|']
0124     if syntax & RE_BK_PLUS_QM:
0125         table['+'] = r'\+'
0126         table['?'] = r'\?'
0127         table[r'\+'] = '+'
0128         table[r'\?'] = '?'
0129     if syntax & RE_NEWLINE_OR:
0130         table['\n'] = '|'
0131     res = ""
0132 
0133     i = 0
0134     end = len(s)
0135     while i < end:
0136         c = s[i]
0137         i = i+1
0138         if c == '\\':
0139             c = s[i]
0140             i = i+1
0141             key = '\\' + c
0142             key = table.get(key, key)
0143             res = res + key
0144         else:
0145             c = table.get(c, c)
0146             res = res + c
0147     return res
0148 
0149 
0150 def quote(s, quote=None):
0151     """Convert a string object to a quoted string literal.
0152 
0153     This is similar to repr() but will return a "raw" string (r'...'
0154     or r"...") when the string contains backslashes, instead of
0155     doubling all backslashes.  The resulting string does *not* always
0156     evaluate to the same string as the original; however it will do
0157     just the right thing when passed into re.compile().
0158 
0159     The optional second argument forces the string quote; it must be
0160     a single character which is a valid Python string quote.
0161 
0162     """
0163     if quote is None:
0164         q = "'"
0165         altq = "'"
0166         if q in s and altq not in s:
0167             q = altq
0168     else:
0169         assert quote in ('"', "'")
0170         q = quote
0171     res = q
0172     for c in s:
0173         if c == q: c = '\\' + c
0174         elif c < ' ' or c > '~': c = "\\%03o" % ord(c)
0175         res = res + c
0176     res = res + q
0177     if '\\' in res:
0178         res = 'r' + res
0179     return res
0180 
0181 
0182 def main():
0183     """Main program -- called when run as a script."""
0184     import sys
0185     s = eval(sys.stdin.read())
0186     sys.stdout.write(quote(convert(s)))
0187     if sys.stdout.isatty():
0188         sys.stdout.write("\n")
0189 
0190 
0191 if __name__ == '__main__':
0192     main()
0193 

Generated by PyXR 0.9.4
SourceForge.net Logo