| """ |
| SAX driver for the Pyexpat C module. This driver works with |
| pyexpat.__version__ == '1.5'. |
| |
| $Id$ |
| """ |
| |
| # Todo on driver: |
| # - make it support external entities (wait for pyexpat.c) |
| # - enable configuration between reset() and feed() calls |
| # - support lexical events? |
| # - proper inputsource handling |
| # - properties and features |
| |
| # Todo on pyexpat.c: |
| # - support XML_ExternalEntityParserCreate |
| # - exceptions in callouts from pyexpat to python code lose position info |
| |
| version = "0.20" |
| |
| from string import split |
| |
| from xml.sax import xmlreader |
| import pyexpat |
| import xml.sax |
| |
| # --- ExpatParser |
| |
| class ExpatParser( xmlreader.IncrementalParser, xmlreader.Locator ): |
| "SAX driver for the Pyexpat C module." |
| |
| def __init__(self, namespaceHandling=0, bufsize=2**16-20): |
| xmlreader.IncrementalParser.__init__(self, bufsize) |
| self._source = None |
| self._parser = None |
| self._namespaces = namespaceHandling |
| self._parsing = 0 |
| |
| # XMLReader methods |
| |
| def parse(self, stream_or_string ): |
| "Parse an XML document from a URL." |
| if type( stream_or_string ) == type( "" ): |
| stream=open( stream_or_string ) |
| else: |
| stream=stream_or_string |
| |
| self.reset() |
| self._cont_handler.setDocumentLocator(self) |
| try: |
| xmlreader.IncrementalParser.parse(self, stream) |
| except pyexpat.error: |
| error_code = self._parser.ErrorCode |
| raise xml.sax.SAXParseException(pyexpat.ErrorString(error_code), |
| None, self) |
| |
| self._cont_handler.endDocument() |
| |
| def prepareParser(self, filename=None): |
| self._source = filename |
| |
| if self._source != None: |
| self._parser.SetBase(self._source) |
| |
| def getFeature(self, name): |
| "Looks up and returns the state of a SAX2 feature." |
| raise SAXNotRecognizedException("Feature '%s' not recognized" % name) |
| |
| def setFeature(self, name, state): |
| "Sets the state of a SAX2 feature." |
| raise SAXNotRecognizedException("Feature '%s' not recognized" % name) |
| |
| def getProperty(self, name): |
| "Looks up and returns the value of a SAX2 property." |
| raise SAXNotRecognizedException("Property '%s' not recognized" % name) |
| |
| def setProperty(self, name, value): |
| "Sets the value of a SAX2 property." |
| raise SAXNotRecognizedException("Property '%s' not recognized" % name) |
| |
| # IncrementalParser methods |
| |
| def feed(self, data): |
| if not self._parsing: |
| self._parsing=1 |
| self.reset() |
| self._cont_handler.startDocument() |
| # FIXME: error checking and endDocument() |
| self._parser.Parse(data, 0) |
| |
| def close(self): |
| if self._parsing: |
| self._cont_handler.endDocument() |
| self._parsing=0 |
| self._parser.Parse("", 1) |
| |
| def reset(self): |
| if self._namespaces: |
| self._parser = pyexpat.ParserCreate(None, " ") |
| self._parser.StartElementHandler = self.start_element_ns |
| self._parser.EndElementHandler = self.end_element_ns |
| else: |
| self._parser = pyexpat.ParserCreate() |
| self._parser.StartElementHandler = self._cont_handler.startElement |
| self._parser.EndElementHandler = self._cont_handler.endElement |
| |
| self._parser.ProcessingInstructionHandler = \ |
| self._cont_handler.processingInstruction |
| self._parser.CharacterDataHandler = self._cont_handler.characters |
| self._parser.UnparsedEntityDeclHandler = self.unparsed_entity_decl |
| self._parser.NotationDeclHandler = self.notation_decl |
| self._parser.StartNamespaceDeclHandler = self.start_namespace_decl |
| self._parser.EndNamespaceDeclHandler = self.end_namespace_decl |
| # self._parser.CommentHandler = |
| # self._parser.StartCdataSectionHandler = |
| # self._parser.EndCdataSectionHandler = |
| # self._parser.DefaultHandler = |
| # self._parser.DefaultHandlerExpand = |
| # self._parser.NotStandaloneHandler = |
| self._parser.ExternalEntityRefHandler = self.external_entity_ref |
| |
| # Locator methods |
| |
| def getColumnNumber(self): |
| return self._parser.ErrorColumnNumber |
| |
| def getLineNumber(self): |
| return self._parser.ErrorLineNumber |
| |
| def getPublicId(self): |
| return self._source.getPublicId() |
| |
| def getSystemId(self): |
| return self._parser.GetBase() |
| |
| # internal methods |
| |
| # event handlers |
| |
| def start_element(self, name, attrs): |
| self._cont_handler.startElement(name, |
| xmlreader.AttributesImpl(attrs, attrs)) |
| |
| def end_element(self, name): |
| self._cont_handler.endElement(name) |
| |
| def start_element_ns(self, name, attrs): |
| pair = split(name) |
| if len(pair) == 1: |
| tup = (None, name, None) |
| else: |
| tup = pair+[None] # prefix is not implemented yet! |
| |
| self._cont_handler.startElement(tup, |
| xmlreader.AttributesImpl(attrs, None)) |
| |
| def end_element_ns(self, name): |
| pair = split(name) |
| if len(pair) == 1: |
| name = (None, name, None) |
| else: |
| name = pair+[None] # prefix is not implemented yet! |
| |
| self._cont_handler.endElement(name) |
| |
| def processing_instruction(self, target, data): |
| self._cont_handler.processingInstruction(target, data) |
| |
| def character_data(self, data): |
| self._cont_handler.characters(data) |
| |
| def start_namespace_decl(self, prefix, uri): |
| self._cont_handler.startPrefixMapping(prefix, uri) |
| |
| def end_namespace_decl(self, prefix): |
| self._cont_handler.endPrefixMapping(prefix) |
| |
| def unparsed_entity_decl(self, name, base, sysid, pubid, notation_name): |
| self._dtd_handler.unparsedEntityDecl(name, pubid, sysid, notation_name) |
| |
| def notation_decl(self, name, base, sysid, pubid): |
| self._dtd_handler.notationDecl(name, pubid, sysid) |
| |
| def external_entity_ref(self, context, base, sysid, pubid): |
| assert 0 # not implemented |
| source = self._ent_handler.resolveEntity(pubid, sysid) |
| source = saxutils.prepare_input_source(source) |
| # FIXME: create new parser, stack self._source and self._parser |
| # FIXME: reuse code from self.parse(...) |
| return 1 |
| |
| # --- |
| |
| def create_parser(*args, **kwargs): |
| return apply( ExpatParser, args, kwargs ) |
| |
| # --- |
| |
| if __name__ == "__main__": |
| import xml.sax |
| p = create_parser() |
| p.setContentHandler(xml.sax.XMLGenerator()) |
| p.setErrorHandler(xml.sax.ErrorHandler()) |
| p.parse("../../../hamlet.xml") |