| #! /usr/bin/env python | 
 |  | 
 | r"""Convert old ("regex") regular expressions to new syntax ("re"). | 
 |  | 
 | When imported as a module, there are two functions, with their own | 
 | strings: | 
 |  | 
 |   convert(s, syntax=None) -- convert a regex regular expression to re syntax | 
 |  | 
 |   quote(s) -- return a quoted string literal | 
 |  | 
 | When used as a script, read a Python string literal (or any other | 
 | expression evaluating to a string) from stdin, and write the | 
 | translated expression to stdout as a string literal.  Unless stdout is | 
 | a tty, no trailing \n is written to stdout.  This is done so that it | 
 | can be used with Emacs C-U M-| (shell-command-on-region with argument | 
 | which filters the region through the shell command). | 
 |  | 
 | No attempt has been made at coding for performance. | 
 |  | 
 | Translation table... | 
 |  | 
 |     \(    (     (unless RE_NO_BK_PARENS set) | 
 |     \)    )     (unless RE_NO_BK_PARENS set) | 
 |     \|    |     (unless RE_NO_BK_VBAR set) | 
 |     \<    \b    (not quite the same, but alla...) | 
 |     \>    \b    (not quite the same, but alla...) | 
 |     \`    \A | 
 |     \'    \Z | 
 |  | 
 | Not translated... | 
 |  | 
 |     . | 
 |     ^ | 
 |     $ | 
 |     * | 
 |     +           (unless RE_BK_PLUS_QM set, then to \+) | 
 |     ?           (unless RE_BK_PLUS_QM set, then to \?) | 
 |     \ | 
 |     \b | 
 |     \B | 
 |     \w | 
 |     \W | 
 |     \1 ... \9 | 
 |  | 
 | Special cases... | 
 |  | 
 |     Non-printable characters are always replaced by their 3-digit | 
 |     escape code (except \t, \n, \r, which use mnemonic escapes) | 
 |  | 
 |     Newline is turned into | when RE_NEWLINE_OR is set | 
 |  | 
 | XXX To be done... | 
 |  | 
 |     [...]     (different treatment of backslashed items?) | 
 |     [^...]    (different treatment of backslashed items?) | 
 |     ^ $ * + ? (in some error contexts these are probably treated differently) | 
 |     \vDD  \DD (in the regex docs but only works when RE_ANSI_HEX set) | 
 |  | 
 | """ | 
 |  | 
 |  | 
 | import warnings | 
 | warnings.filterwarnings("ignore", ".* regex .*", DeprecationWarning, __name__, | 
 |                         append=1) | 
 |  | 
 | import regex | 
 | from regex_syntax import * # RE_* | 
 |  | 
 | __all__ = ["convert","quote"] | 
 |  | 
 | # Default translation table | 
 | mastertable = { | 
 |     r'\<': r'\b', | 
 |     r'\>': r'\b', | 
 |     r'\`': r'\A', | 
 |     r'\'': r'\Z', | 
 |     r'\(': '(', | 
 |     r'\)': ')', | 
 |     r'\|': '|', | 
 |     '(': r'\(', | 
 |     ')': r'\)', | 
 |     '|': r'\|', | 
 |     '\t': r'\t', | 
 |     '\n': r'\n', | 
 |     '\r': r'\r', | 
 | } | 
 |  | 
 |  | 
 | def convert(s, syntax=None): | 
 |     """Convert a regex regular expression to re syntax. | 
 |  | 
 |     The first argument is the regular expression, as a string object, | 
 |     just like it would be passed to regex.compile().  (I.e., pass the | 
 |     actual string object -- string quotes must already have been | 
 |     removed and the standard escape processing has already been done, | 
 |     e.g. by eval().) | 
 |  | 
 |     The optional second argument is the regex syntax variant to be | 
 |     used.  This is an integer mask as passed to regex.set_syntax(); | 
 |     the flag bits are defined in regex_syntax.  When not specified, or | 
 |     when None is given, the current regex syntax mask (as retrieved by | 
 |     regex.get_syntax()) is used -- which is 0 by default. | 
 |  | 
 |     The return value is a regular expression, as a string object that | 
 |     could be passed to re.compile().  (I.e., no string quotes have | 
 |     been added -- use quote() below, or repr().) | 
 |  | 
 |     The conversion is not always guaranteed to be correct.  More | 
 |     syntactical analysis should be performed to detect borderline | 
 |     cases and decide what to do with them.  For example, 'x*?' is not | 
 |     translated correctly. | 
 |  | 
 |     """ | 
 |     table = mastertable.copy() | 
 |     if syntax is None: | 
 |         syntax = regex.get_syntax() | 
 |     if syntax & RE_NO_BK_PARENS: | 
 |         del table[r'\('], table[r'\)'] | 
 |         del table['('], table[')'] | 
 |     if syntax & RE_NO_BK_VBAR: | 
 |         del table[r'\|'] | 
 |         del table['|'] | 
 |     if syntax & RE_BK_PLUS_QM: | 
 |         table['+'] = r'\+' | 
 |         table['?'] = r'\?' | 
 |         table[r'\+'] = '+' | 
 |         table[r'\?'] = '?' | 
 |     if syntax & RE_NEWLINE_OR: | 
 |         table['\n'] = '|' | 
 |     res = "" | 
 |  | 
 |     i = 0 | 
 |     end = len(s) | 
 |     while i < end: | 
 |         c = s[i] | 
 |         i = i+1 | 
 |         if c == '\\': | 
 |             c = s[i] | 
 |             i = i+1 | 
 |             key = '\\' + c | 
 |             key = table.get(key, key) | 
 |             res = res + key | 
 |         else: | 
 |             c = table.get(c, c) | 
 |             res = res + c | 
 |     return res | 
 |  | 
 |  | 
 | def quote(s, quote=None): | 
 |     """Convert a string object to a quoted string literal. | 
 |  | 
 |     This is similar to repr() but will return a "raw" string (r'...' | 
 |     or r"...") when the string contains backslashes, instead of | 
 |     doubling all backslashes.  The resulting string does *not* always | 
 |     evaluate to the same string as the original; however it will do | 
 |     just the right thing when passed into re.compile(). | 
 |  | 
 |     The optional second argument forces the string quote; it must be | 
 |     a single character which is a valid Python string quote. | 
 |  | 
 |     """ | 
 |     if quote is None: | 
 |         q = "'" | 
 |         altq = "'" | 
 |         if q in s and altq not in s: | 
 |             q = altq | 
 |     else: | 
 |         assert quote in ('"', "'") | 
 |         q = quote | 
 |     res = q | 
 |     for c in s: | 
 |         if c == q: c = '\\' + c | 
 |         elif c < ' ' or c > '~': c = "\\%03o" % ord(c) | 
 |         res = res + c | 
 |     res = res + q | 
 |     if '\\' in res: | 
 |         res = 'r' + res | 
 |     return res | 
 |  | 
 |  | 
 | def main(): | 
 |     """Main program -- called when run as a script.""" | 
 |     import sys | 
 |     s = eval(sys.stdin.read()) | 
 |     sys.stdout.write(quote(convert(s))) | 
 |     if sys.stdout.isatty(): | 
 |         sys.stdout.write("\n") | 
 |  | 
 |  | 
 | if __name__ == '__main__': | 
 |     main() |