blob: 00dbe9d546b3dc5b50e5ef50ca87b7c9f2860007 [file] [log] [blame]
Armin Rigo9ed73062005-12-14 18:10:45 +00001#
2# ElementTree
3# $Id: ElementPath.py 1858 2004-06-17 21:31:41Z Fredrik $
4#
5# limited xpath support for element trees
6#
7# history:
8# 2003-05-23 fl created
9# 2003-05-28 fl added support for // etc
10# 2003-08-27 fl fixed parsing of periods in element names
11#
12# Copyright (c) 2003-2004 by Fredrik Lundh. All rights reserved.
13#
14# fredrik@pythonware.com
15# http://www.pythonware.com
16#
17# --------------------------------------------------------------------
18# The ElementTree toolkit is
19#
20# Copyright (c) 1999-2004 by Fredrik Lundh
21#
22# By obtaining, using, and/or copying this software and/or its
23# associated documentation, you agree that you have read, understood,
24# and will comply with the following terms and conditions:
25#
26# Permission to use, copy, modify, and distribute this software and
27# its associated documentation for any purpose and without fee is
28# hereby granted, provided that the above copyright notice appears in
29# all copies, and that both that copyright notice and this permission
30# notice appear in supporting documentation, and that the name of
31# Secret Labs AB or the author not be used in advertising or publicity
32# pertaining to distribution of the software without specific, written
33# prior permission.
34#
35# SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD
36# TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT-
37# ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR
38# BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY
39# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
40# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
41# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
42# OF THIS SOFTWARE.
43# --------------------------------------------------------------------
44
Fredrik Lundh63168a52005-12-14 22:29:34 +000045# Licensed to PSF under a Contributor Agreement.
46# See http://www.python.org/2.4/license for licensing details.
47
Armin Rigo9ed73062005-12-14 18:10:45 +000048##
49# Implementation module for XPath support. There's usually no reason
50# to import this module directly; the <b>ElementTree</b> does this for
51# you, if needed.
52##
53
54import re
55
56xpath_tokenizer = re.compile(
57 "(::|\.\.|\(\)|[/.*:\[\]\(\)@=])|((?:\{[^}]+\})?[^/:\[\]\(\)@=\s]+)|\s+"
58 ).findall
59
60class xpath_descendant_or_self:
61 pass
62
63##
64# Wrapper for a compiled XPath.
65
66class Path:
67
68 ##
69 # Create an Path instance from an XPath expression.
70
71 def __init__(self, path):
72 tokens = xpath_tokenizer(path)
73 # the current version supports 'path/path'-style expressions only
74 self.path = []
75 self.tag = None
76 if tokens and tokens[0][0] == "/":
77 raise SyntaxError("cannot use absolute path on element")
78 while tokens:
79 op, tag = tokens.pop(0)
80 if tag or op == "*":
81 self.path.append(tag or op)
82 elif op == ".":
83 pass
84 elif op == "/":
85 self.path.append(xpath_descendant_or_self())
86 continue
87 else:
88 raise SyntaxError("unsupported path syntax (%s)" % op)
89 if tokens:
90 op, tag = tokens.pop(0)
91 if op != "/":
92 raise SyntaxError(
93 "expected path separator (%s)" % (op or tag)
94 )
95 if self.path and isinstance(self.path[-1], xpath_descendant_or_self):
96 raise SyntaxError("path cannot end with //")
97 if len(self.path) == 1 and isinstance(self.path[0], type("")):
98 self.tag = self.path[0]
99
100 ##
101 # Find first matching object.
102
103 def find(self, element):
104 tag = self.tag
105 if tag is None:
106 nodeset = self.findall(element)
107 if not nodeset:
108 return None
109 return nodeset[0]
110 for elem in element:
111 if elem.tag == tag:
112 return elem
113 return None
114
115 ##
116 # Find text for first matching object.
117
118 def findtext(self, element, default=None):
119 tag = self.tag
120 if tag is None:
121 nodeset = self.findall(element)
122 if not nodeset:
123 return default
124 return nodeset[0].text or ""
125 for elem in element:
126 if elem.tag == tag:
127 return elem.text or ""
128 return default
129
130 ##
131 # Find all matching objects.
132
133 def findall(self, element):
134 nodeset = [element]
135 index = 0
136 while 1:
137 try:
138 path = self.path[index]
139 index = index + 1
140 except IndexError:
141 return nodeset
142 set = []
143 if isinstance(path, xpath_descendant_or_self):
144 try:
145 tag = self.path[index]
146 if not isinstance(tag, type("")):
147 tag = None
148 else:
149 index = index + 1
150 except IndexError:
151 tag = None # invalid path
152 for node in nodeset:
153 new = list(node.getiterator(tag))
154 if new and new[0] is node:
155 set.extend(new[1:])
156 else:
157 set.extend(new)
158 else:
159 for node in nodeset:
160 for node in node:
161 if path == "*" or node.tag == path:
162 set.append(node)
163 if not set:
164 return []
165 nodeset = set
166
167_cache = {}
168
169##
170# (Internal) Compile path.
171
172def _compile(path):
173 p = _cache.get(path)
174 if p is not None:
175 return p
176 p = Path(path)
177 if len(_cache) >= 100:
178 _cache.clear()
179 _cache[path] = p
180 return p
181
182##
183# Find first matching object.
184
185def find(element, path):
186 return _compile(path).find(element)
187
188##
189# Find text for first matching object.
190
191def findtext(element, path, default=None):
192 return _compile(path).findtext(element, default)
193
194##
195# Find all matching objects.
196
197def findall(element, path):
198 return _compile(path).findall(element)