blob: 117de8247c8590d1b121576bd2a468a7e7feef35 [file] [log] [blame]
Daniel Veillard5e5c2d02002-02-09 18:03:01 +00001import libxml2mod
Daniel Veillard85bb5b02003-12-04 14:12:05 +00002import types
Daniel Veillard3b6acc92006-12-14 15:49:41 +00003import sys
Daniel Veillardd2897fd2002-01-30 16:37:32 +00004
Daniel Veillarda81355e2004-09-28 11:08:27 +00005# The root of all libxml2 errors.
6class libxmlError(Exception): pass
7
Daniel Veillard6f184652013-03-29 15:17:40 +08008# Type of the wrapper class for the C objects wrappers
9def checkWrapper(obj):
10 try:
11 n = type(_obj).__name__
12 if n != 'PyCObject' and n != 'PyCapsule':
13 return 1
14 except:
15 return 0
16 return 0
17
Daniel Veillard1971ee22002-01-31 20:29:19 +000018#
Daniel Veillard3b6acc92006-12-14 15:49:41 +000019# id() is sometimes negative ...
20#
21def pos_id(o):
22 i = id(o)
23 if (i < 0):
Daniel Veillard3cb1ae22013-03-27 22:40:54 +080024 return (sys.maxsize - i)
Daniel Veillard3b6acc92006-12-14 15:49:41 +000025 return i
26
27#
Daniel Veillard8d24cc12002-03-05 15:41:29 +000028# Errors raised by the wrappers when some tree handling failed.
29#
Daniel Veillarda81355e2004-09-28 11:08:27 +000030class treeError(libxmlError):
Daniel Veillard8d24cc12002-03-05 15:41:29 +000031 def __init__(self, msg):
32 self.msg = msg
33 def __str__(self):
34 return self.msg
35
Daniel Veillarda81355e2004-09-28 11:08:27 +000036class parserError(libxmlError):
Daniel Veillard8d24cc12002-03-05 15:41:29 +000037 def __init__(self, msg):
38 self.msg = msg
39 def __str__(self):
40 return self.msg
41
Daniel Veillarda81355e2004-09-28 11:08:27 +000042class uriError(libxmlError):
Daniel Veillard8d24cc12002-03-05 15:41:29 +000043 def __init__(self, msg):
44 self.msg = msg
45 def __str__(self):
46 return self.msg
47
Daniel Veillarda81355e2004-09-28 11:08:27 +000048class xpathError(libxmlError):
Daniel Veillard8d24cc12002-03-05 15:41:29 +000049 def __init__(self, msg):
50 self.msg = msg
51 def __str__(self):
52 return self.msg
53
Daniel Veillardc6d4a932002-09-12 15:00:57 +000054class ioWrapper:
55 def __init__(self, _obj):
56 self.__io = _obj
57 self._o = None
58
59 def io_close(self):
60 if self.__io == None:
William M. Brack1d75c8a2003-10-27 13:48:16 +000061 return(-1)
62 self.__io.close()
63 self.__io = None
64 return(0)
Daniel Veillardc6d4a932002-09-12 15:00:57 +000065
66 def io_flush(self):
67 if self.__io == None:
William M. Brack1d75c8a2003-10-27 13:48:16 +000068 return(-1)
69 self.__io.flush()
70 return(0)
Daniel Veillardc6d4a932002-09-12 15:00:57 +000071
72 def io_read(self, len = -1):
73 if self.__io == None:
William M. Brack1d75c8a2003-10-27 13:48:16 +000074 return(-1)
Daniel Veillard6f184652013-03-29 15:17:40 +080075 try:
76 if len < 0:
77 ret = self.__io.read()
78 else:
79 ret = self.__io.read(len)
80 except Exception as e:
81 print("failed to read from Python:", type(e))
82 print("on IO:", self.__io)
83 self.__io == None
84 return(-1)
85
86 return(ret)
Daniel Veillardc6d4a932002-09-12 15:00:57 +000087
88 def io_write(self, str, len = -1):
89 if self.__io == None:
William M. Brack1d75c8a2003-10-27 13:48:16 +000090 return(-1)
Daniel Veillardc6d4a932002-09-12 15:00:57 +000091 if len < 0:
William M. Brack1d75c8a2003-10-27 13:48:16 +000092 return(self.__io.write(str))
93 return(self.__io.write(str, len))
Daniel Veillardc6d4a932002-09-12 15:00:57 +000094
95class ioReadWrapper(ioWrapper):
96 def __init__(self, _obj, enc = ""):
97 ioWrapper.__init__(self, _obj)
98 self._o = libxml2mod.xmlCreateInputBuffer(self, enc)
99
100 def __del__(self):
Daniel Veillard3cb1ae22013-03-27 22:40:54 +0800101 print("__del__")
Daniel Veillardc6d4a932002-09-12 15:00:57 +0000102 self.io_close()
103 if self._o != None:
104 libxml2mod.xmlFreeParserInputBuffer(self._o)
105 self._o = None
106
107 def close(self):
108 self.io_close()
109 if self._o != None:
110 libxml2mod.xmlFreeParserInputBuffer(self._o)
111 self._o = None
112
113class ioWriteWrapper(ioWrapper):
114 def __init__(self, _obj, enc = ""):
Daniel Veillard85bb5b02003-12-04 14:12:05 +0000115# print "ioWriteWrapper.__init__", _obj
116 if type(_obj) == type(''):
Daniel Veillard3cb1ae22013-03-27 22:40:54 +0800117 print("write io from a string")
William M. Brack37e63942004-07-12 16:27:37 +0000118 self.o = None
Daniel Veillard6f184652013-03-29 15:17:40 +0800119 elif type(_obj).__name__ == 'PyCapsule':
120 file = libxml2mod.outputBufferGetPythonFile(_obj)
121 if file != None:
122 ioWrapper.__init__(self, file)
123 else:
124 ioWrapper.__init__(self, _obj)
125 self._o = _obj
126# elif type(_obj) == types.InstanceType:
127# print(("write io from instance of %s" % (_obj.__class__)))
128# ioWrapper.__init__(self, _obj)
129# self._o = libxml2mod.xmlCreateOutputBuffer(self, enc)
William M. Brack37e63942004-07-12 16:27:37 +0000130 else:
131 file = libxml2mod.outputBufferGetPythonFile(_obj)
132 if file != None:
133 ioWrapper.__init__(self, file)
134 else:
135 ioWrapper.__init__(self, _obj)
136 self._o = _obj
Daniel Veillardc6d4a932002-09-12 15:00:57 +0000137
138 def __del__(self):
Daniel Veillard85bb5b02003-12-04 14:12:05 +0000139# print "__del__"
Daniel Veillardc6d4a932002-09-12 15:00:57 +0000140 self.io_close()
141 if self._o != None:
142 libxml2mod.xmlOutputBufferClose(self._o)
143 self._o = None
144
Daniel Veillard85bb5b02003-12-04 14:12:05 +0000145 def flush(self):
146 self.io_flush()
147 if self._o != None:
148 libxml2mod.xmlOutputBufferClose(self._o)
149 self._o = None
150
Daniel Veillardc6d4a932002-09-12 15:00:57 +0000151 def close(self):
Daniel Veillard85bb5b02003-12-04 14:12:05 +0000152 self.io_flush()
Daniel Veillardc6d4a932002-09-12 15:00:57 +0000153 if self._o != None:
154 libxml2mod.xmlOutputBufferClose(self._o)
155 self._o = None
156
Daniel Veillard8d24cc12002-03-05 15:41:29 +0000157#
158# Example of a class to handle SAX events
159#
160class SAXCallback:
161 """Base class for SAX handlers"""
162 def startDocument(self):
163 """called at the start of the document"""
164 pass
165
166 def endDocument(self):
167 """called at the end of the document"""
168 pass
169
170 def startElement(self, tag, attrs):
171 """called at the start of every element, tag is the name of
William M. Brack1d75c8a2003-10-27 13:48:16 +0000172 the element, attrs is a dictionary of the element's attributes"""
Daniel Veillard8d24cc12002-03-05 15:41:29 +0000173 pass
174
175 def endElement(self, tag):
176 """called at the start of every element, tag is the name of
William M. Brack1d75c8a2003-10-27 13:48:16 +0000177 the element"""
Daniel Veillard8d24cc12002-03-05 15:41:29 +0000178 pass
179
180 def characters(self, data):
181 """called when character data have been read, data is the string
William M. Brack1d75c8a2003-10-27 13:48:16 +0000182 containing the data, multiple consecutive characters() callback
183 are possible."""
Daniel Veillard8d24cc12002-03-05 15:41:29 +0000184 pass
185
186 def cdataBlock(self, data):
187 """called when CDATA section have been read, data is the string
William M. Brack1d75c8a2003-10-27 13:48:16 +0000188 containing the data, multiple consecutive cdataBlock() callback
189 are possible."""
Daniel Veillard8d24cc12002-03-05 15:41:29 +0000190 pass
191
192 def reference(self, name):
193 """called when an entity reference has been found"""
194 pass
195
196 def ignorableWhitespace(self, data):
197 """called when potentially ignorable white spaces have been found"""
198 pass
199
200 def processingInstruction(self, target, data):
201 """called when a PI has been found, target contains the PI name and
William M. Brack1d75c8a2003-10-27 13:48:16 +0000202 data is the associated data in the PI"""
Daniel Veillard8d24cc12002-03-05 15:41:29 +0000203 pass
204
205 def comment(self, content):
206 """called when a comment has been found, content contains the comment"""
207 pass
208
209 def externalSubset(self, name, externalID, systemID):
210 """called when a DOCTYPE declaration has been found, name is the
William M. Brack1d75c8a2003-10-27 13:48:16 +0000211 DTD name and externalID, systemID are the DTD public and system
212 identifier for that DTd if available"""
Daniel Veillard8d24cc12002-03-05 15:41:29 +0000213 pass
214
215 def internalSubset(self, name, externalID, systemID):
216 """called when a DOCTYPE declaration has been found, name is the
William M. Brack1d75c8a2003-10-27 13:48:16 +0000217 DTD name and externalID, systemID are the DTD public and system
218 identifier for that DTD if available"""
Daniel Veillard8d24cc12002-03-05 15:41:29 +0000219 pass
220
221 def entityDecl(self, name, type, externalID, systemID, content):
222 """called when an ENTITY declaration has been found, name is the
William M. Brack1d75c8a2003-10-27 13:48:16 +0000223 entity name and externalID, systemID are the entity public and
224 system identifier for that entity if available, type indicates
225 the entity type, and content reports it's string content"""
Daniel Veillard8d24cc12002-03-05 15:41:29 +0000226 pass
227
228 def notationDecl(self, name, externalID, systemID):
229 """called when an NOTATION declaration has been found, name is the
William M. Brack1d75c8a2003-10-27 13:48:16 +0000230 notation name and externalID, systemID are the notation public and
231 system identifier for that notation if available"""
Daniel Veillard8d24cc12002-03-05 15:41:29 +0000232 pass
233
234 def attributeDecl(self, elem, name, type, defi, defaultValue, nameList):
235 """called when an ATTRIBUTE definition has been found"""
William M. Brack1d75c8a2003-10-27 13:48:16 +0000236 pass
Daniel Veillard8d24cc12002-03-05 15:41:29 +0000237
238 def elementDecl(self, name, type, content):
239 """called when an ELEMENT definition has been found"""
William M. Brack1d75c8a2003-10-27 13:48:16 +0000240 pass
Daniel Veillard8d24cc12002-03-05 15:41:29 +0000241
242 def entityDecl(self, name, publicId, systemID, notationName):
243 """called when an unparsed ENTITY declaration has been found,
William M. Brack1d75c8a2003-10-27 13:48:16 +0000244 name is the entity name and publicId,, systemID are the entity
245 public and system identifier for that entity if available,
246 and notationName indicate the associated NOTATION"""
Daniel Veillard8d24cc12002-03-05 15:41:29 +0000247 pass
248
249 def warning(self, msg):
Daniel Veillardeaccdc62005-10-27 14:10:52 +0000250 #print msg
Daniel Veillard745648b2006-02-27 09:59:30 +0000251 pass
Daniel Veillard8d24cc12002-03-05 15:41:29 +0000252
253 def error(self, msg):
254 raise parserError(msg)
255
256 def fatalError(self, msg):
257 raise parserError(msg)
258
259#
Daniel Veillard1971ee22002-01-31 20:29:19 +0000260# This class is the ancestor of all the Node classes. It provides
261# the basic functionalities shared by all nodes (and handle
262# gracefylly the exception), like name, navigation in the tree,
Daniel Veillard1e774382002-03-06 17:35:40 +0000263# doc reference, content access and serializing to a string or URI
Daniel Veillard1971ee22002-01-31 20:29:19 +0000264#
Daniel Veillard36ed5292002-01-30 23:49:06 +0000265class xmlCore:
Daniel Veillardd2897fd2002-01-30 16:37:32 +0000266 def __init__(self, _obj=None):
267 if _obj != None:
Daniel Veillard01a6d412002-02-11 18:42:20 +0000268 self._o = _obj;
269 return
270 self._o = None
William M. Brack40cca612006-06-26 18:25:40 +0000271
272 def __eq__(self, other):
273 if other == None:
William M. Brack7a12e572007-02-16 17:11:09 +0000274 return False
William M. Brack40cca612006-06-26 18:25:40 +0000275 ret = libxml2mod.compareNodesEqual(self._o, other._o)
William M. Brack7a12e572007-02-16 17:11:09 +0000276 if ret == None:
277 return False
278 return ret == True
William M. Brack40cca612006-06-26 18:25:40 +0000279 def __ne__(self, other):
280 if other == None:
William M. Brack7a12e572007-02-16 17:11:09 +0000281 return True
William M. Brack40cca612006-06-26 18:25:40 +0000282 ret = libxml2mod.compareNodesEqual(self._o, other._o)
William M. Brack7a12e572007-02-16 17:11:09 +0000283 return not ret
William M. Brack40cca612006-06-26 18:25:40 +0000284 def __hash__(self):
William M. Brack7a12e572007-02-16 17:11:09 +0000285 ret = libxml2mod.nodeHash(self._o)
286 return ret
William M. Brack40cca612006-06-26 18:25:40 +0000287
Daniel Veillard1cd4dae2005-01-15 17:45:28 +0000288 def __str__(self):
289 return self.serialize()
Daniel Veillardd2897fd2002-01-30 16:37:32 +0000290 def get_parent(self):
Daniel Veillard01a6d412002-02-11 18:42:20 +0000291 ret = libxml2mod.parent(self._o)
292 if ret == None:
293 return None
Alexey Neymane32ceb92013-02-20 18:28:25 -0800294 return nodeWrap(ret)
Daniel Veillardd2897fd2002-01-30 16:37:32 +0000295 def get_children(self):
Daniel Veillard01a6d412002-02-11 18:42:20 +0000296 ret = libxml2mod.children(self._o)
297 if ret == None:
298 return None
Alexey Neymane32ceb92013-02-20 18:28:25 -0800299 return nodeWrap(ret)
Daniel Veillardd2897fd2002-01-30 16:37:32 +0000300 def get_last(self):
Daniel Veillard01a6d412002-02-11 18:42:20 +0000301 ret = libxml2mod.last(self._o)
302 if ret == None:
303 return None
Alexey Neymane32ceb92013-02-20 18:28:25 -0800304 return nodeWrap(ret)
Daniel Veillardd2897fd2002-01-30 16:37:32 +0000305 def get_next(self):
Daniel Veillard01a6d412002-02-11 18:42:20 +0000306 ret = libxml2mod.next(self._o)
307 if ret == None:
308 return None
Alexey Neymane32ceb92013-02-20 18:28:25 -0800309 return nodeWrap(ret)
Daniel Veillard1971ee22002-01-31 20:29:19 +0000310 def get_properties(self):
Daniel Veillard01a6d412002-02-11 18:42:20 +0000311 ret = libxml2mod.properties(self._o)
312 if ret == None:
313 return None
314 return xmlAttr(_obj=ret)
Daniel Veillardd2897fd2002-01-30 16:37:32 +0000315 def get_prev(self):
Daniel Veillard01a6d412002-02-11 18:42:20 +0000316 ret = libxml2mod.prev(self._o)
317 if ret == None:
318 return None
Alexey Neymane32ceb92013-02-20 18:28:25 -0800319 return nodeWrap(ret)
Daniel Veillardd2897fd2002-01-30 16:37:32 +0000320 def get_content(self):
Daniel Veillard01a6d412002-02-11 18:42:20 +0000321 return libxml2mod.xmlNodeGetContent(self._o)
Daniel Veillard51a447a2003-01-04 19:42:46 +0000322 getContent = get_content # why is this duplicate naming needed ?
Daniel Veillardd2897fd2002-01-30 16:37:32 +0000323 def get_name(self):
Daniel Veillard01a6d412002-02-11 18:42:20 +0000324 return libxml2mod.name(self._o)
Daniel Veillardd2897fd2002-01-30 16:37:32 +0000325 def get_type(self):
Daniel Veillard01a6d412002-02-11 18:42:20 +0000326 return libxml2mod.type(self._o)
Daniel Veillard51a447a2003-01-04 19:42:46 +0000327 def get_doc(self):
328 ret = libxml2mod.doc(self._o)
329 if ret == None:
330 if self.type in ["document_xml", "document_html"]:
331 return xmlDoc(_obj=self._o)
332 else:
333 return None
334 return xmlDoc(_obj=ret)
335 #
336 # Those are common attributes to nearly all type of nodes
337 # defined as python2 properties
338 #
339 import sys
340 if float(sys.version[0:3]) < 2.2:
William M. Brack1d75c8a2003-10-27 13:48:16 +0000341 def __getattr__(self, attr):
342 if attr == "parent":
343 ret = libxml2mod.parent(self._o)
344 if ret == None:
345 return None
Alexey Neymane32ceb92013-02-20 18:28:25 -0800346 return nodeWrap(ret)
William M. Brack1d75c8a2003-10-27 13:48:16 +0000347 elif attr == "properties":
348 ret = libxml2mod.properties(self._o)
349 if ret == None:
350 return None
351 return xmlAttr(_obj=ret)
352 elif attr == "children":
353 ret = libxml2mod.children(self._o)
354 if ret == None:
355 return None
Alexey Neymane32ceb92013-02-20 18:28:25 -0800356 return nodeWrap(ret)
William M. Brack1d75c8a2003-10-27 13:48:16 +0000357 elif attr == "last":
358 ret = libxml2mod.last(self._o)
359 if ret == None:
360 return None
Alexey Neymane32ceb92013-02-20 18:28:25 -0800361 return nodeWrap(ret)
William M. Brack1d75c8a2003-10-27 13:48:16 +0000362 elif attr == "next":
363 ret = libxml2mod.next(self._o)
364 if ret == None:
365 return None
Alexey Neymane32ceb92013-02-20 18:28:25 -0800366 return nodeWrap(ret)
William M. Brack1d75c8a2003-10-27 13:48:16 +0000367 elif attr == "prev":
368 ret = libxml2mod.prev(self._o)
369 if ret == None:
370 return None
Alexey Neymane32ceb92013-02-20 18:28:25 -0800371 return nodeWrap(ret)
William M. Brack1d75c8a2003-10-27 13:48:16 +0000372 elif attr == "content":
373 return libxml2mod.xmlNodeGetContent(self._o)
374 elif attr == "name":
375 return libxml2mod.name(self._o)
376 elif attr == "type":
377 return libxml2mod.type(self._o)
378 elif attr == "doc":
379 ret = libxml2mod.doc(self._o)
380 if ret == None:
381 if self.type == "document_xml" or self.type == "document_html":
382 return xmlDoc(_obj=self._o)
383 else:
384 return None
385 return xmlDoc(_obj=ret)
Daniel Veillard3cb1ae22013-03-27 22:40:54 +0800386 raise AttributeError(attr)
Daniel Veillard51a447a2003-01-04 19:42:46 +0000387 else:
William M. Brack1d75c8a2003-10-27 13:48:16 +0000388 parent = property(get_parent, None, None, "Parent node")
389 children = property(get_children, None, None, "First child node")
390 last = property(get_last, None, None, "Last sibling node")
391 next = property(get_next, None, None, "Next sibling node")
392 prev = property(get_prev, None, None, "Previous sibling node")
393 properties = property(get_properties, None, None, "List of properies")
394 content = property(get_content, None, None, "Content of this node")
395 name = property(get_name, None, None, "Node name")
396 type = property(get_type, None, None, "Node type")
397 doc = property(get_doc, None, None, "The document this node belongs to")
Daniel Veillard1e774382002-03-06 17:35:40 +0000398
399 #
400 # Serialization routines, the optional arguments have the following
401 # meaning:
402 # encoding: string to ask saving in a specific encoding
Daniel Veillard51a447a2003-01-04 19:42:46 +0000403 # indent: if 1 the serializer is asked to indent the output
Daniel Veillard1e774382002-03-06 17:35:40 +0000404 #
405 def serialize(self, encoding = None, format = 0):
406 return libxml2mod.serializeNode(self._o, encoding, format)
407 def saveTo(self, file, encoding = None, format = 0):
408 return libxml2mod.saveNodeTo(self._o, file, encoding, format)
Daniel Veillard01a6d412002-02-11 18:42:20 +0000409
Daniel Veillardf742d342002-03-07 00:05:35 +0000410 #
Daniel Veillardd5e198a2004-03-09 09:03:28 +0000411 # Canonicalization routines:
412 #
413 # nodes: the node set (tuple or list) to be included in the
414 # canonized image or None if all document nodes should be
415 # included.
416 # exclusive: the exclusive flag (0 - non-exclusive
417 # canonicalization; otherwise - exclusive canonicalization)
418 # prefixes: the list of inclusive namespace prefixes (strings),
419 # or None if there is no inclusive namespaces (only for
420 # exclusive canonicalization, ignored otherwise)
421 # with_comments: include comments in the result (!=0) or not
422 # (==0)
423 def c14nMemory(self,
424 nodes=None,
425 exclusive=0,
426 prefixes=None,
427 with_comments=0):
428 if nodes:
Daniel Veillard3cb1ae22013-03-27 22:40:54 +0800429 nodes = [n._o for n in nodes]
Daniel Veillardd5e198a2004-03-09 09:03:28 +0000430 return libxml2mod.xmlC14NDocDumpMemory(
431 self.get_doc()._o,
432 nodes,
433 exclusive != 0,
434 prefixes,
435 with_comments != 0)
436 def c14nSaveTo(self,
437 file,
438 nodes=None,
439 exclusive=0,
440 prefixes=None,
441 with_comments=0):
442 if nodes:
Daniel Veillard3cb1ae22013-03-27 22:40:54 +0800443 nodes = [n._o for n in nodes]
Daniel Veillardd5e198a2004-03-09 09:03:28 +0000444 return libxml2mod.xmlC14NDocSaveTo(
445 self.get_doc()._o,
446 nodes,
447 exclusive != 0,
448 prefixes,
449 with_comments != 0,
450 file)
451
452 #
Daniel Veillardf742d342002-03-07 00:05:35 +0000453 # Selecting nodes using XPath, a bit slow because the context
454 # is allocated/freed every time but convenient.
455 #
456 def xpathEval(self, expr):
William M. Brack1d75c8a2003-10-27 13:48:16 +0000457 doc = self.doc
458 if doc == None:
459 return None
460 ctxt = doc.xpathNewContext()
461 ctxt.setContextNode(self)
462 res = ctxt.xpathEval(expr)
463 ctxt.xpathFreeContext()
464 return res
Daniel Veillard51a447a2003-01-04 19:42:46 +0000465
Daniel Veillardf88d8cf2003-12-08 10:25:02 +0000466# #
467# # Selecting nodes using XPath, faster because the context
468# # is allocated just once per xmlDoc.
469# #
470# # Removed: DV memleaks c.f. #126735
471# #
472# def xpathEval2(self, expr):
473# doc = self.doc
474# if doc == None:
475# return None
476# try:
477# doc._ctxt.setContextNode(self)
478# except:
479# doc._ctxt = doc.xpathNewContext()
480# doc._ctxt.setContextNode(self)
481# res = doc._ctxt.xpathEval(expr)
482# return res
Daniel Veillard51a447a2003-01-04 19:42:46 +0000483 def xpathEval2(self, expr):
Daniel Veillardf88d8cf2003-12-08 10:25:02 +0000484 return self.xpathEval(expr)
Daniel Veillard51a447a2003-01-04 19:42:46 +0000485
Daniel Veillardf9cf6f52005-04-12 01:02:29 +0000486 # Remove namespaces
487 def removeNsDef(self, href):
488 """
489 Remove a namespace definition from a node. If href is None,
490 remove all of the ns definitions on that node. The removed
491 namespaces are returned as a linked list.
492
493 Note: If any child nodes referred to the removed namespaces,
494 they will be left with dangling links. You should call
Leonid Evdokimov147687f2009-08-24 14:08:43 +0200495 renconciliateNs() to fix those pointers.
Daniel Veillardf9cf6f52005-04-12 01:02:29 +0000496
497 Note: This method does not free memory taken by the ns
498 definitions. You will need to free it manually with the
499 freeNsList() method on the returns xmlNs object.
500 """
501
502 ret = libxml2mod.xmlNodeRemoveNsDef(self._o, href)
503 if ret is None:return None
504 __tmp = xmlNs(_obj=ret)
505 return __tmp
506
Daniel Veillard51a447a2003-01-04 19:42:46 +0000507 # support for python2 iterators
508 def walk_depth_first(self):
509 return xmlCoreDepthFirstItertor(self)
510 def walk_breadth_first(self):
511 return xmlCoreBreadthFirstItertor(self)
512 __iter__ = walk_depth_first
513
514 def free(self):
515 try:
516 self.doc._ctxt.xpathFreeContext()
517 except:
518 pass
Daniel Veillardf88d8cf2003-12-08 10:25:02 +0000519 libxml2mod.xmlFreeDoc(self._o)
Daniel Veillard51a447a2003-01-04 19:42:46 +0000520
521
522#
523# implements the depth-first iterator for libxml2 DOM tree
524#
525class xmlCoreDepthFirstItertor:
526 def __init__(self, node):
527 self.node = node
528 self.parents = []
529 def __iter__(self):
530 return self
Daniel Veillard87f32872013-04-01 13:33:42 +0800531 def next(self):
Daniel Veillard51a447a2003-01-04 19:42:46 +0000532 while 1:
533 if self.node:
534 ret = self.node
535 self.parents.append(self.node)
536 self.node = self.node.children
537 return ret
538 try:
539 parent = self.parents.pop()
540 except IndexError:
541 raise StopIteration
Daniel Veillard87f32872013-04-01 13:33:42 +0800542 self.node = parent.next
Daniel Veillard51a447a2003-01-04 19:42:46 +0000543
544#
545# implements the breadth-first iterator for libxml2 DOM tree
546#
547class xmlCoreBreadthFirstItertor:
548 def __init__(self, node):
549 self.node = node
550 self.parents = []
551 def __iter__(self):
552 return self
Daniel Veillard87f32872013-04-01 13:33:42 +0800553 def next(self):
Daniel Veillard51a447a2003-01-04 19:42:46 +0000554 while 1:
555 if self.node:
556 ret = self.node
557 self.parents.append(self.node)
Daniel Veillard87f32872013-04-01 13:33:42 +0800558 self.node = self.node.next
Daniel Veillard51a447a2003-01-04 19:42:46 +0000559 return ret
560 try:
561 parent = self.parents.pop()
562 except IndexError:
563 raise StopIteration
564 self.node = parent.children
565
Daniel Veillard36ed5292002-01-30 23:49:06 +0000566#
Daniel Veillard1971ee22002-01-31 20:29:19 +0000567# converters to present a nicer view of the XPath returns
568#
569def nodeWrap(o):
570 # TODO try to cast to the most appropriate node class
Daniel Veillard1f8658a2004-08-14 21:46:31 +0000571 name = libxml2mod.type(o)
Daniel Veillard1971ee22002-01-31 20:29:19 +0000572 if name == "element" or name == "text":
573 return xmlNode(_obj=o)
574 if name == "attribute":
575 return xmlAttr(_obj=o)
576 if name[0:8] == "document":
577 return xmlDoc(_obj=o)
Daniel Veillard1f8658a2004-08-14 21:46:31 +0000578 if name == "namespace":
Daniel Veillard1971ee22002-01-31 20:29:19 +0000579 return xmlNs(_obj=o)
580 if name == "elem_decl":
581 return xmlElement(_obj=o)
582 if name == "attribute_decl":
Daniel Veillard1f8658a2004-08-14 21:46:31 +0000583 return xmlAttribute(_obj=o)
Daniel Veillard1971ee22002-01-31 20:29:19 +0000584 if name == "entity_decl":
585 return xmlEntity(_obj=o)
586 if name == "dtd":
Daniel Veillarde59494f2003-01-04 16:35:29 +0000587 return xmlDtd(_obj=o)
Daniel Veillard1971ee22002-01-31 20:29:19 +0000588 return xmlNode(_obj=o)
589
590def xpathObjectRet(o):
Daniel Veillard46459062006-10-10 08:40:04 +0000591 otype = type(o)
592 if otype == type([]):
Daniel Veillard3cb1ae22013-03-27 22:40:54 +0800593 ret = list(map(xpathObjectRet, o))
Daniel Veillard01a6d412002-02-11 18:42:20 +0000594 return ret
Daniel Veillard46459062006-10-10 08:40:04 +0000595 elif otype == type(()):
Daniel Veillard3cb1ae22013-03-27 22:40:54 +0800596 ret = list(map(xpathObjectRet, o))
Daniel Veillard46459062006-10-10 08:40:04 +0000597 return tuple(ret)
598 elif otype == type('') or otype == type(0) or otype == type(0.0):
599 return o
600 else:
601 return nodeWrap(o)
Daniel Veillard1971ee22002-01-31 20:29:19 +0000602
603#
Daniel Veillarda7340c82002-02-01 17:56:45 +0000604# register an XPath function
605#
606def registerXPathFunction(ctxt, name, ns_uri, f):
Daniel Veillard5e5c2d02002-02-09 18:03:01 +0000607 ret = libxml2mod.xmlRegisterXPathFunction(ctxt, name, ns_uri, f)
Daniel Veillarda7340c82002-02-01 17:56:45 +0000608
609#
Daniel Veillardf25b4ca2002-12-27 15:18:35 +0000610# For the xmlTextReader parser configuration
611#
612PARSER_LOADDTD=1
613PARSER_DEFAULTATTRS=2
614PARSER_VALIDATE=3
Daniel Veillarde18fc182002-12-28 22:56:33 +0000615PARSER_SUBST_ENTITIES=4
Daniel Veillardf25b4ca2002-12-27 15:18:35 +0000616
617#
Daniel Veillard417be3a2003-01-20 21:26:34 +0000618# For the error callback severities
Daniel Veillard26f70262003-01-16 22:45:08 +0000619#
Daniel Veillard417be3a2003-01-20 21:26:34 +0000620PARSER_SEVERITY_VALIDITY_WARNING=1
621PARSER_SEVERITY_VALIDITY_ERROR=2
622PARSER_SEVERITY_WARNING=3
623PARSER_SEVERITY_ERROR=4
Daniel Veillard26f70262003-01-16 22:45:08 +0000624
625#
Daniel Veillard3e20a292003-01-10 13:14:40 +0000626# register the libxml2 error handler
Daniel Veillard36ed5292002-01-30 23:49:06 +0000627#
Daniel Veillard3e20a292003-01-10 13:14:40 +0000628def registerErrorHandler(f, ctx):
629 """Register a Python written function to for error reporting.
630 The function is called back as f(ctx, error). """
631 import sys
Daniel Veillard3cb1ae22013-03-27 22:40:54 +0800632 if 'libxslt' not in sys.modules:
Daniel Veillard3e20a292003-01-10 13:14:40 +0000633 # normal behaviour when libxslt is not imported
634 ret = libxml2mod.xmlRegisterErrorHandler(f,ctx)
635 else:
636 # when libxslt is already imported, one must
637 # use libxst's error handler instead
638 import libxslt
639 ret = libxslt.registerErrorHandler(f,ctx)
640 return ret
641
Daniel Veillarde6227e02003-01-14 11:42:39 +0000642class parserCtxtCore:
643
644 def __init__(self, _obj=None):
645 if _obj != None:
646 self._o = _obj;
647 return
648 self._o = None
649
650 def __del__(self):
651 if self._o != None:
652 libxml2mod.xmlFreeParserCtxt(self._o)
William M. Brack1d75c8a2003-10-27 13:48:16 +0000653 self._o = None
Daniel Veillarde6227e02003-01-14 11:42:39 +0000654
Daniel Veillard417be3a2003-01-20 21:26:34 +0000655 def setErrorHandler(self,f,arg):
656 """Register an error handler that will be called back as
657 f(arg,msg,severity,reserved).
658
659 @reserved is currently always None."""
660 libxml2mod.xmlParserCtxtSetErrorHandler(self._o,f,arg)
Daniel Veillarde6227e02003-01-14 11:42:39 +0000661
Daniel Veillard417be3a2003-01-20 21:26:34 +0000662 def getErrorHandler(self):
663 """Return (f,arg) as previously registered with setErrorHandler
664 or (None,None)."""
665 return libxml2mod.xmlParserCtxtGetErrorHandler(self._o)
666
Daniel Veillard54396242003-04-23 07:36:50 +0000667 def addLocalCatalog(self, uri):
668 """Register a local catalog with the parser"""
669 return libxml2mod.addLocalCatalog(self._o, uri)
670
671
Daniel Veillard0e460da2005-03-30 22:47:10 +0000672class ValidCtxtCore:
673
674 def __init__(self, *args, **kw):
675 pass
676
677 def setValidityErrorHandler(self, err_func, warn_func, arg=None):
678 """
679 Register error and warning handlers for DTD validation.
680 These will be called back as f(msg,arg)
681 """
682 libxml2mod.xmlSetValidErrors(self._o, err_func, warn_func, arg)
683
684
685class SchemaValidCtxtCore:
686
687 def __init__(self, *args, **kw):
688 pass
689
690 def setValidityErrorHandler(self, err_func, warn_func, arg=None):
691 """
692 Register error and warning handlers for Schema validation.
693 These will be called back as f(msg,arg)
694 """
695 libxml2mod.xmlSchemaSetValidErrors(self._o, err_func, warn_func, arg)
696
697
698class relaxNgValidCtxtCore:
699
700 def __init__(self, *args, **kw):
701 pass
702
703 def setValidityErrorHandler(self, err_func, warn_func, arg=None):
704 """
705 Register error and warning handlers for RelaxNG validation.
706 These will be called back as f(msg,arg)
707 """
708 libxml2mod.xmlRelaxNGSetValidErrors(self._o, err_func, warn_func, arg)
709
710
Daniel Veillard3cb1ae22013-03-27 22:40:54 +0800711def _xmlTextReaderErrorFunc(xxx_todo_changeme,msg,severity,locator):
Daniel Veillard417be3a2003-01-20 21:26:34 +0000712 """Intermediate callback to wrap the locator"""
Daniel Veillard3cb1ae22013-03-27 22:40:54 +0800713 (f,arg) = xxx_todo_changeme
Daniel Veillard417be3a2003-01-20 21:26:34 +0000714 return f(arg,msg,severity,xmlTextReaderLocator(locator))
Daniel Veillarde6227e02003-01-14 11:42:39 +0000715
Daniel Veillard26f70262003-01-16 22:45:08 +0000716class xmlTextReaderCore:
717
718 def __init__(self, _obj=None):
719 self.input = None
720 if _obj != None:self._o = _obj;return
721 self._o = None
722
723 def __del__(self):
724 if self._o != None:
725 libxml2mod.xmlFreeTextReader(self._o)
726 self._o = None
727
Daniel Veillard417be3a2003-01-20 21:26:34 +0000728 def SetErrorHandler(self,f,arg):
Daniel Veillard26f70262003-01-16 22:45:08 +0000729 """Register an error handler that will be called back as
Daniel Veillard417be3a2003-01-20 21:26:34 +0000730 f(arg,msg,severity,locator)."""
731 if f is None:
732 libxml2mod.xmlTextReaderSetErrorHandler(\
733 self._o,None,None)
734 else:
735 libxml2mod.xmlTextReaderSetErrorHandler(\
736 self._o,_xmlTextReaderErrorFunc,(f,arg))
Daniel Veillard26f70262003-01-16 22:45:08 +0000737
Daniel Veillard417be3a2003-01-20 21:26:34 +0000738 def GetErrorHandler(self):
Daniel Veillard26f70262003-01-16 22:45:08 +0000739 """Return (f,arg) as previously registered with setErrorHandler
740 or (None,None)."""
Daniel Veillard417be3a2003-01-20 21:26:34 +0000741 f,arg = libxml2mod.xmlTextReaderGetErrorHandler(self._o)
742 if f is None:
743 return None,None
744 else:
745 # assert f is _xmlTextReaderErrorFunc
746 return arg
Daniel Veillard26f70262003-01-16 22:45:08 +0000747
Daniel Veillardf93a8662004-07-01 12:56:30 +0000748#
Alexey Neyman48da90b2013-02-25 15:54:25 +0800749# The cleanup now goes though a wrapper in libxml.c
Daniel Veillardf93a8662004-07-01 12:56:30 +0000750#
751def cleanupParser():
752 libxml2mod.xmlPythonCleanupParser()
Daniel Veillard87ab1c12003-12-21 13:01:56 +0000753
Alexey Neyman48da90b2013-02-25 15:54:25 +0800754#
755# The interface to xmlRegisterInputCallbacks.
756# Since this API does not allow to pass a data object along with
757# match/open callbacks, it is necessary to maintain a list of all
758# Python callbacks.
759#
760__input_callbacks = []
761def registerInputCallback(func):
762 def findOpenCallback(URI):
763 for cb in reversed(__input_callbacks):
764 o = cb(URI)
765 if o is not None:
766 return o
767 libxml2mod.xmlRegisterInputCallback(findOpenCallback)
768 __input_callbacks.append(func)
769
770def popInputCallbacks():
771 # First pop python-level callbacks, when no more available - start
772 # popping built-in ones.
773 if len(__input_callbacks) > 0:
774 __input_callbacks.pop()
775 if len(__input_callbacks) == 0:
776 libxml2mod.xmlUnregisterInputCallback()
777
Daniel Veillard3e20a292003-01-10 13:14:40 +0000778# WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING
779#
780# Everything before this line comes from libxml.py
781# Everything after this line is automatically generated
782#
783# WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING
Daniel Veillard1971ee22002-01-31 20:29:19 +0000784