bpo-31648: Improve ElementPath (#3835)
* Allow whitespace inside of ElementPath predicates.
* Add ElementPath predicate support for text comparison of the current node, like "[.='text']".
diff --git a/Doc/library/xml.etree.elementtree.rst b/Doc/library/xml.etree.elementtree.rst
index 7d814ad..6180859 100644
--- a/Doc/library/xml.etree.elementtree.rst
+++ b/Doc/library/xml.etree.elementtree.rst
@@ -437,6 +437,11 @@
| ``[tag]`` | Selects all elements that have a child named |
| | ``tag``. Only immediate children are supported. |
+-----------------------+------------------------------------------------------+
+| ``[.='text']`` | Selects all elements whose complete text content, |
+| | including descendants, equals the given ``text``. |
+| | |
+| | .. versionadded:: 3.7 |
++-----------------------+------------------------------------------------------+
| ``[tag='text']`` | Selects all elements that have a child named |
| | ``tag`` whose complete text content, including |
| | descendants, equals the given ``text``. |
diff --git a/Doc/whatsnew/3.7.rst b/Doc/whatsnew/3.7.rst
index a474e76..845ed64 100644
--- a/Doc/whatsnew/3.7.rst
+++ b/Doc/whatsnew/3.7.rst
@@ -281,6 +281,14 @@
keyword argument. When it's true, zeros are represented by ``'`'``
instead of spaces. (Contributed by Xiang Zhang in :issue:`30103`.)
+xml.etree
+---------
+
+:ref:`ElementPath <elementtree-xpath>` predicates in the :meth:`find`
+methods can now compare text of the current node with ``[. = "text"]``,
+not only text in children. Predicates also allow adding spaces for
+better readability. (Contributed by Stefan Behnel in :issue:`31648`.)
+
zipapp
------
diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py
index 661ad8b..02812f3 100644
--- a/Lib/test/test_xml_etree.py
+++ b/Lib/test/test_xml_etree.py
@@ -2237,6 +2237,39 @@
['tag'] * 2)
self.assertEqual(e.findall('section//'), e.findall('section//*'))
+ self.assertEqual(summarize_list(e.findall(".//section[tag='subtext']")),
+ ['section'])
+ self.assertEqual(summarize_list(e.findall(".//section[tag ='subtext']")),
+ ['section'])
+ self.assertEqual(summarize_list(e.findall(".//section[tag= 'subtext']")),
+ ['section'])
+ self.assertEqual(summarize_list(e.findall(".//section[tag = 'subtext']")),
+ ['section'])
+ self.assertEqual(summarize_list(e.findall(".//section[ tag = 'subtext' ]")),
+ ['section'])
+
+ self.assertEqual(summarize_list(e.findall(".//tag[.='subtext']")),
+ ['tag'])
+ self.assertEqual(summarize_list(e.findall(".//tag[. ='subtext']")),
+ ['tag'])
+ self.assertEqual(summarize_list(e.findall('.//tag[.= "subtext"]')),
+ ['tag'])
+ self.assertEqual(summarize_list(e.findall('.//tag[ . = "subtext" ]')),
+ ['tag'])
+ self.assertEqual(summarize_list(e.findall(".//tag[. = 'subtext']")),
+ ['tag'])
+ self.assertEqual(summarize_list(e.findall(".//tag[. = 'subtext ']")),
+ [])
+ self.assertEqual(summarize_list(e.findall(".//tag[.= ' subtext']")),
+ [])
+
+ # duplicate section => 2x tag matches
+ e[1] = e[2]
+ self.assertEqual(summarize_list(e.findall(".//section[tag = 'subtext']")),
+ ['section', 'section'])
+ self.assertEqual(summarize_list(e.findall(".//tag[. = 'subtext']")),
+ ['tag', 'tag'])
+
def test_test_find_with_ns(self):
e = ET.XML(SAMPLE_XML_NS)
self.assertEqual(summarize_list(e.findall('tag')), [])
diff --git a/Lib/xml/etree/ElementPath.py b/Lib/xml/etree/ElementPath.py
index 361f6d5..c9d6ef3 100644
--- a/Lib/xml/etree/ElementPath.py
+++ b/Lib/xml/etree/ElementPath.py
@@ -157,6 +157,9 @@
return
if token[0] == "]":
break
+ if token == ('', ''):
+ # ignore whitespace
+ continue
if token[0] and token[0][:1] in "'\"":
token = "'", token[0][1:-1]
signature.append(token[0] or "-")
@@ -188,16 +191,22 @@
if elem.find(tag) is not None:
yield elem
return select
- if signature == "-='" and not re.match(r"\-?\d+$", predicate[0]):
- # [tag='value']
+ if signature == ".='" or (signature == "-='" and not re.match(r"\-?\d+$", predicate[0])):
+ # [.='value'] or [tag='value']
tag = predicate[0]
value = predicate[-1]
- def select(context, result):
- for elem in result:
- for e in elem.findall(tag):
- if "".join(e.itertext()) == value:
+ if tag:
+ def select(context, result):
+ for elem in result:
+ for e in elem.findall(tag):
+ if "".join(e.itertext()) == value:
+ yield elem
+ break
+ else:
+ def select(context, result):
+ for elem in result:
+ if "".join(elem.itertext()) == value:
yield elem
- break
return select
if signature == "-" or signature == "-()" or signature == "-()-":
# [index] or [last()] or [last()-index]
diff --git a/Misc/NEWS.d/next/Library/2017-09-30-10-45-12.bpo-31648.Cai7ji.rst b/Misc/NEWS.d/next/Library/2017-09-30-10-45-12.bpo-31648.Cai7ji.rst
new file mode 100644
index 0000000..8b39ce9
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2017-09-30-10-45-12.bpo-31648.Cai7ji.rst
@@ -0,0 +1,6 @@
+Improvements to path predicates in ElementTree:
+
+* Allow whitespace around predicate parts, i.e. "[a = 'text']" instead of requiring the less readable "[a='text']".
+* Add support for text comparison of the current node, like "[.='text']".
+
+Patch by Stefan Behnel.