| """\ |
| minidom.py -- a lightweight DOM implementation based on SAX. |
| |
| parse( "foo.xml" ) |
| |
| parseString( "<foo><bar/></foo>" ) |
| |
| Todo: |
| ===== |
| * convenience methods for getting elements and text. |
| * more testing |
| * bring some of the writer and linearizer code into conformance with this |
| interface |
| * SAX 2 namespaces |
| """ |
| |
| import pulldom |
| import string |
| from StringIO import StringIO |
| import types |
| |
| class Node: |
| ELEMENT_NODE = 1 |
| ATTRIBUTE_NODE = 2 |
| TEXT_NODE = 3 |
| CDATA_SECTION_NODE = 4 |
| ENTITY_REFERENCE_NODE = 5 |
| ENTITY_NODE = 6 |
| PROCESSING_INSTRUCTION_NODE = 7 |
| COMMENT_NODE = 8 |
| DOCUMENT_NODE = 9 |
| DOCUMENT_TYPE_NODE = 10 |
| DOCUMENT_FRAGMENT_NODE = 11 |
| NOTATION_NODE = 12 |
| |
| allnodes = {} |
| _debug = 0 |
| _makeParentNodes = 1 |
| debug = None |
| |
| def __init__(self): |
| self.childNodes = [] |
| if Node._debug: |
| index = repr(id(self)) + repr(self.__class__) |
| Node.allnodes[index] = repr(self.__dict__) |
| if Node.debug is None: |
| Node.debug = StringIO() |
| #open( "debug4.out", "w" ) |
| Node.debug.write("create %s\n" % index) |
| |
| def __getattr__(self, key): |
| if key[0:2] == "__": |
| raise AttributeError |
| # getattr should never call getattr! |
| if self.__dict__.has_key("inGetAttr"): |
| del self.inGetAttr |
| raise AttributeError, key |
| |
| prefix, attrname = key[:5], key[5:] |
| if prefix == "_get_": |
| self.inGetAttr = 1 |
| if hasattr(self, attrname): |
| del self.inGetAttr |
| return (lambda self=self, attrname=attrname: |
| getattr(self, attrname)) |
| else: |
| del self.inGetAttr |
| raise AttributeError, key |
| else: |
| self.inGetAttr = 1 |
| try: |
| func = getattr(self, "_get_" + key) |
| except AttributeError: |
| raise AttributeError, key |
| del self.inGetAttr |
| return func() |
| |
| def __nonzero__(self): |
| return 1 |
| |
| def toxml(self): |
| writer = StringIO() |
| self.writexml(writer) |
| return writer.getvalue() |
| |
| def hasChildNodes(self): |
| if self.childNodes: |
| return 1 |
| else: |
| return 0 |
| |
| def _get_firstChild(self): |
| return self.childNodes[0] |
| |
| def _get_lastChild(self): |
| return self.childNodes[-1] |
| |
| def insertBefore(self, newChild, refChild): |
| index = self.childNodes.index(refChild) |
| self.childNodes.insert(index, newChild) |
| if self._makeParentNodes: |
| newChild.parentNode = self |
| |
| def appendChild(self, node): |
| self.childNodes.append(node) |
| return node |
| |
| def replaceChild(self, newChild, oldChild): |
| index = self.childNodes.index(oldChild) |
| self.childNodes[index] = oldChild |
| |
| def removeChild(self, oldChild): |
| index = self.childNodes.index(oldChild) |
| del self.childNodes[index] |
| |
| def cloneNode(self, deep): |
| import new |
| clone = new.instance(self.__class__, self.__dict__) |
| clone.attributes = self.attributes.copy() |
| if not deep: |
| clone.childNodes = [] |
| else: |
| clone.childNodes = map(lambda x: x.cloneNode, self.childNodes) |
| return clone |
| |
| def unlink(self): |
| self.parentNode = None |
| while self.childNodes: |
| self.childNodes[-1].unlink() |
| del self.childNodes[-1] # probably not most efficient! |
| self.childNodes = None |
| if self.attributes: |
| for attr in self._attrs.values(): |
| self.removeAttributeNode(attr) |
| assert not len(self._attrs) |
| assert not len(self._attrsNS) |
| if Node._debug: |
| index = repr(id(self)) + repr(self.__class__) |
| self.debug.write("Deleting: %s\n" % index) |
| del Node.allnodes[index] |
| |
| def _write_data(writer, data): |
| "Writes datachars to writer." |
| data = string.replace(data, "&", "&") |
| data = string.replace(data, "<", "<") |
| data = string.replace(data, "\"", """) |
| data = string.replace(data, ">", ">") |
| writer.write(data) |
| |
| def _getElementsByTagNameHelper(parent, name, rc): |
| for node in parent.childNodes: |
| if node.nodeType == Node.ELEMENT_NODE and \ |
| (name == "*" or node.tagName == name): |
| rc.append(node) |
| _getElementsByTagNameHelper(node, name, rc) |
| return rc |
| |
| def _getElementsByTagNameNSHelper(parent, nsURI, localName, rc): |
| for node in parent.childNodes: |
| if node.nodeType == Node.ELEMENT_NODE: |
| if ((localName == "*" or node.tagName == localName) and |
| (nsURI == "*" or node.namespaceURI == nsURI)): |
| rc.append(node) |
| _getElementsByTagNameNSHelper(node, name, rc) |
| |
| class Attr(Node): |
| nodeType = Node.ATTRIBUTE_NODE |
| |
| def __init__(self, qName, namespaceURI="", localName=None, prefix=None): |
| # skip setattr for performance |
| self.__dict__["localName"] = localName or qName |
| self.__dict__["nodeName"] = self.__dict__["name"] = qName |
| self.__dict__["namespaceURI"] = namespaceURI |
| self.__dict__["prefix"] = prefix |
| self.attributes = None |
| Node.__init__(self) |
| # nodeValue and value are set elsewhere |
| |
| def __setattr__(self, name, value): |
| if name in ("value", "nodeValue"): |
| self.__dict__["value"] = self.__dict__["nodeValue"] = value |
| else: |
| self.__dict__[name] = value |
| |
| class AttributeList: |
| """the attribute list is a transient interface to the underlying |
| dictionaries. mutations here will change the underlying element's |
| dictionary""" |
| def __init__(self, attrs, attrsNS): |
| self._attrs = attrs |
| self._attrsNS = attrsNS |
| self.length = len(self._attrs.keys()) |
| |
| def item(self, index): |
| try: |
| return self[self.keys()[index]] |
| except IndexError: |
| return None |
| |
| def items(self): |
| return map(lambda node: (node.tagName, node.value), |
| self._attrs.values()) |
| |
| def itemsNS(self): |
| return map(lambda node: ((node.URI, node.localName), node.value), |
| self._attrs.values()) |
| |
| def keys(self): |
| return self._attrs.keys() |
| |
| def keysNS(self): |
| return self._attrsNS.keys() |
| |
| def values(self): |
| return self._attrs.values() |
| |
| def __len__(self): |
| return self.length |
| |
| def __cmp__(self, other): |
| if self._attrs is getattr(other, "_attrs", None): |
| return 0 |
| else: |
| return cmp(id(self), id(other)) |
| |
| #FIXME: is it appropriate to return .value? |
| def __getitem__(self, attname_or_tuple): |
| if type(attname_or_tuple) is types.TupleType: |
| return self._attrsNS[attname_or_tuple] |
| else: |
| return self._attrs[attname_or_tuple] |
| |
| # same as set |
| def __setitem__(self, attname, value): |
| if type(value) is types.StringType: |
| node = Attr(attname) |
| node.value=value |
| else: |
| assert isinstance(value, Attr) or type(value) is types.StringType |
| node = value |
| old = self._attrs.get(attname, None) |
| if old: |
| old.unlink() |
| self._attrs[node.name] = node |
| self._attrsNS[(node.namespaceURI, node.localName)] = node |
| |
| def __delitem__(self, attname_or_tuple): |
| node = self[attname_or_tuple] |
| node.unlink() |
| del self._attrs[node.name] |
| del self._attrsNS[(node.namespaceURI, node.localName)] |
| |
| class Element( Node ): |
| nodeType = Node.ELEMENT_NODE |
| |
| def __init__(self, tagName, namespaceURI="", prefix="", |
| localName=None): |
| Node.__init__(self) |
| self.tagName = self.nodeName = tagName |
| self.localName = localName or tagName |
| self.prefix = prefix |
| self.namespaceURI = namespaceURI |
| self.nodeValue = None |
| |
| self._attrs={} # attributes are double-indexed: |
| self._attrsNS={}# tagName -> Attribute |
| # URI,localName -> Attribute |
| # in the future: consider lazy generation of attribute objects |
| # this is too tricky for now because of headaches |
| # with namespaces. |
| |
| def getAttribute(self, attname): |
| return self._attrs[attname].value |
| |
| def getAttributeNS(self, namespaceURI, localName): |
| return self._attrsNS[(namespaceURI, localName)].value |
| |
| def setAttribute(self, attname, value): |
| attr = Attr(attname) |
| # for performance |
| attr.__dict__["value"] = attr.__dict__["nodeValue"] = value |
| self.setAttributeNode(attr) |
| |
| def setAttributeNS(self, namespaceURI, qualifiedName, value): |
| prefix, localname = _nssplit(qualifiedName) |
| # for performance |
| attr = Attr(qualifiedName, namespaceURI, localname, prefix) |
| attr.__dict__["value"] = attr.__dict__["nodeValue"] = value |
| self.setAttributeNode(attr) |
| |
| def getAttributeNode(self, attrname): |
| return self._attrs.get(attrname) |
| |
| def getAttributeNodeNS(self, namespaceURI, localName): |
| return self._attrsNS[(namespaceURI, localName)] |
| |
| def setAttributeNode(self, attr): |
| old = self._attrs.get(attr.name, None) |
| if old: |
| old.unlink() |
| self._attrs[attr.name] = attr |
| self._attrsNS[(attr.namespaceURI, attr.localName)] = attr |
| |
| def removeAttribute(self, name): |
| attr = self._attrs[name] |
| self.removeAttributeNode(attr) |
| |
| def removeAttributeNS(self, namespaceURI, localName): |
| attr = self._attrsNS[(namespaceURI, localName)] |
| self.removeAttributeNode(attr) |
| |
| def removeAttributeNode(self, node): |
| node.unlink() |
| del self._attrs[node.name] |
| del self._attrsNS[(node.namespaceURI, node.localName)] |
| |
| def getElementsByTagName(self, name): |
| return _getElementsByTagNameHelper(self, name, []) |
| |
| def getElementsByTagNameNS(self, namespaceURI, localName): |
| _getElementsByTagNameNSHelper(self, namespaceURI, localName, []) |
| |
| def __repr__(self): |
| return "<DOM Element: %s at %s>" % (self.tagName, id(self)) |
| |
| def writexml(self, writer): |
| writer.write("<" + self.tagName) |
| |
| a_names = self._get_attributes().keys() |
| a_names.sort() |
| |
| for a_name in a_names: |
| writer.write(" %s=\"" % a_name) |
| _write_data(writer, self._get_attributes()[a_name]) |
| writer.write("\"") |
| if self.childNodes: |
| writer.write(">") |
| for node in self.childNodes: |
| node.writexml(writer) |
| writer.write("</%s>" % self.tagName) |
| else: |
| writer.write("/>") |
| |
| def _get_attributes(self): |
| return AttributeList(self._attrs, self._attrsNS) |
| |
| class Comment(Node): |
| nodeType = Node.COMMENT_NODE |
| |
| def __init__(self, data): |
| Node.__init__(self) |
| self.data = self.nodeValue = data |
| self.nodeName = "#comment" |
| self.attributes = None |
| |
| def writexml(self, writer): |
| writer.write("<!--%s-->" % self.data) |
| |
| class ProcessingInstruction(Node): |
| nodeType = Node.PROCESSING_INSTRUCTION_NODE |
| |
| def __init__(self, target, data): |
| Node.__init__(self) |
| self.target = self.nodeName = target |
| self.data = self.nodeValue = data |
| self.attributes = None |
| |
| def writexml(self, writer): |
| writer.write("<?%s %s?>" % (self.target, self.data)) |
| |
| class Text(Node): |
| nodeType = Node.TEXT_NODE |
| nodeName = "#text" |
| |
| def __init__(self, data): |
| Node.__init__(self) |
| self.data = self.nodeValue = data |
| self.attributes = None |
| |
| def __repr__(self): |
| if len(self.data) > 10: |
| dotdotdot = "..." |
| else: |
| dotdotdot = "" |
| return "<DOM Text node \"%s%s\">" % (self.data[0:10], dotdotdot) |
| |
| def writexml(self, writer): |
| _write_data(writer, self.data) |
| |
| def _nssplit(qualifiedName): |
| fields = qualifiedName.split(':', 1) |
| if len(fields) == 2: |
| return fields |
| elif len(fields) == 1: |
| return ('', fields[0]) |
| |
| class Document(Node): |
| nodeType = Node.DOCUMENT_NODE |
| documentElement = None |
| |
| def __init__(self): |
| Node.__init__(self) |
| self.attributes = None |
| self.nodeName = "#document" |
| self.nodeValue = None |
| |
| def appendChild(self, node): |
| if node.nodeType == Node.ELEMENT_NODE: |
| if self.documentElement: |
| raise TypeError, "Two document elements disallowed" |
| else: |
| self.documentElement = node |
| Node.appendChild(self, node) |
| return node |
| |
| createElement = Element |
| |
| createTextNode = Text |
| |
| createComment = Comment |
| |
| createProcessingInstruction = ProcessingInstruction |
| |
| createAttribute = Attr |
| |
| def createElementNS(self, namespaceURI, qualifiedName): |
| prefix,localName = _nssplit(qualifiedName) |
| return Element(qualifiedName, namespaceURI, prefix, localName) |
| |
| def createAttributeNS(self, namespaceURI, qualifiedName): |
| prefix,localName = _nssplit(qualifiedName) |
| return Attr(namespaceURI, qualifiedName, localName, prefix) |
| |
| def getElementsByTagNameNS(self, namespaceURI, localName): |
| _getElementsByTagNameNSHelper(self, namespaceURI, localName) |
| |
| def unlink(self): |
| self.documentElement = None |
| Node.unlink(self) |
| |
| def getElementsByTagName(self, name): |
| rc = [] |
| _getElementsByTagNameHelper(self, name, rc) |
| return rc |
| |
| def writexml(self, writer): |
| for node in self.childNodes: |
| node.writexml(writer) |
| |
| def _doparse(func, args, kwargs): |
| events = apply(func, args, kwargs) |
| toktype, rootNode = events.getEvent() |
| events.expandNode(rootNode) |
| return rootNode |
| |
| def parse(*args, **kwargs): |
| "Parse a file into a DOM by filename or file object" |
| return _doparse(pulldom.parse, args, kwargs) |
| |
| def parseString(*args, **kwargs): |
| "Parse a file into a DOM from a string" |
| return _doparse(pulldom.parseString, args, kwargs) |
| |