[2.7] bpo-30375: Correct the stacklevel of regex compiling warnings. (#1595) (#1648)
Warnings emitted when compile a regular expression now always point
to the line in the user code. Previously they could point into inners
of the re module if emitted from inside of groups or conditionals.
(cherry picked from commit c7ac7280c321b3c1679fe5f657a6be0f86adf173)
diff --git a/Lib/sre_parse.py b/Lib/sre_parse.py
index e0d003e..3f6ddd2 100644
--- a/Lib/sre_parse.py
+++ b/Lib/sre_parse.py
@@ -234,7 +234,7 @@
return False
return True
-def _class_escape(source, escape):
+def _class_escape(source, escape, nested):
# handle escape code inside character class
code = ESCAPES.get(escape)
if code:
@@ -266,16 +266,16 @@
if c in 'Uu':
warnings.warn('bad escape %s; Unicode escapes are '
'supported only since Python 3.3' % escape,
- FutureWarning, stacklevel=8)
+ FutureWarning, stacklevel=nested + 6)
else:
warnings.warnpy3k('bad escape %s' % escape,
- DeprecationWarning, stacklevel=8)
+ DeprecationWarning, stacklevel=nested + 6)
return LITERAL, ord(escape[1])
except ValueError:
pass
raise error, "bogus escape: %s" % repr(escape)
-def _escape(source, escape, state):
+def _escape(source, escape, state, nested):
# handle escape code in expression
code = CATEGORIES.get(escape)
if code:
@@ -315,7 +315,7 @@
import warnings
warnings.warn('group references in lookbehind '
'assertions are not supported',
- RuntimeWarning)
+ RuntimeWarning, stacklevel=nested + 6)
return GROUPREF, group
raise ValueError
if len(escape) == 2:
@@ -324,23 +324,23 @@
if c in 'Uu':
warnings.warn('bad escape %s; Unicode escapes are '
'supported only since Python 3.3' % escape,
- FutureWarning, stacklevel=8)
+ FutureWarning, stacklevel=nested + 6)
else:
warnings.warnpy3k('bad escape %s' % escape,
- DeprecationWarning, stacklevel=8)
+ DeprecationWarning, stacklevel=nested + 6)
return LITERAL, ord(escape[1])
except ValueError:
pass
raise error, "bogus escape: %s" % repr(escape)
-def _parse_sub(source, state, nested=1):
+def _parse_sub(source, state, nested):
# parse an alternation: a|b|c
items = []
itemsappend = items.append
sourcematch = source.match
while 1:
- itemsappend(_parse(source, state))
+ itemsappend(_parse(source, state, nested + 1))
if sourcematch("|"):
continue
if not nested:
@@ -392,10 +392,10 @@
subpattern.append((BRANCH, (None, items)))
return subpattern
-def _parse_sub_cond(source, state, condgroup):
- item_yes = _parse(source, state)
+def _parse_sub_cond(source, state, condgroup, nested):
+ item_yes = _parse(source, state, nested + 1)
if source.match("|"):
- item_no = _parse(source, state)
+ item_no = _parse(source, state, nested + 1)
if source.match("|"):
raise error, "conditional backref with more than two branches"
else:
@@ -411,7 +411,7 @@
_LOOKBEHINDASSERTCHARS = set("=!")
_REPEATCODES = set([MIN_REPEAT, MAX_REPEAT])
-def _parse(source, state):
+def _parse(source, state, nested):
# parse a simple pattern
subpattern = SubPattern(state)
@@ -462,7 +462,7 @@
if this == "]" and set != start:
break
elif this and this[0] == "\\":
- code1 = _class_escape(source, this)
+ code1 = _class_escape(source, this, nested + 1)
elif this:
code1 = LITERAL, ord(this)
else:
@@ -478,7 +478,7 @@
break
elif this:
if this[0] == "\\":
- code2 = _class_escape(source, this)
+ code2 = _class_escape(source, this, nested + 1)
else:
code2 = LITERAL, ord(this)
if code1[0] != LITERAL or code2[0] != LITERAL:
@@ -608,7 +608,7 @@
import warnings
warnings.warn('group references in lookbehind '
'assertions are not supported',
- RuntimeWarning)
+ RuntimeWarning, stacklevel=nested + 6)
subpatternappend((GROUPREF, gid))
continue
else:
@@ -638,7 +638,7 @@
dir = -1 # lookbehind
char = sourceget()
state.lookbehind += 1
- p = _parse_sub(source, state)
+ p = _parse_sub(source, state, nested + 1)
if dir < 0:
state.lookbehind -= 1
if not sourcematch(")"):
@@ -675,7 +675,7 @@
import warnings
warnings.warn('group references in lookbehind '
'assertions are not supported',
- RuntimeWarning)
+ RuntimeWarning, stacklevel=nested + 6)
else:
# flags
if not source.next in FLAGS:
@@ -690,9 +690,9 @@
else:
group = state.opengroup(name)
if condgroup:
- p = _parse_sub_cond(source, state, condgroup)
+ p = _parse_sub_cond(source, state, condgroup, nested + 1)
else:
- p = _parse_sub(source, state)
+ p = _parse_sub(source, state, nested + 1)
if not sourcematch(")"):
raise error, "unbalanced parenthesis"
if group is not None:
@@ -714,7 +714,7 @@
subpattern.append((AT, AT_END))
elif this and this[0] == "\\":
- code = _escape(source, this, state)
+ code = _escape(source, this, state, nested + 1)
subpatternappend(code)
else:
diff --git a/Misc/NEWS b/Misc/NEWS
index 3f34c6c..967e861 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -42,6 +42,10 @@
Library
-------
+- bpo-30375: Warnings emitted when compile a regular expression now always
+ point to the line in the user code. Previously they could point into inners
+ of the re module if emitted from inside of groups or conditionals.
+
- bpo-30363: Running Python with the -3 option now warns about regular
expression syntax that is invalid or has different semantic in Python 3
or will change the behavior in future Python versions.