Paul Prescod <paul@prescod.net>:
SAX interfaces for Python.
diff --git a/Lib/xml/sax/expatreader.py b/Lib/xml/sax/expatreader.py
new file mode 100644
index 0000000..b6816a4
--- /dev/null
+++ b/Lib/xml/sax/expatreader.py
@@ -0,0 +1,204 @@
+"""
+SAX driver for the Pyexpat C module.  This driver works with
+pyexpat.__version__ == '1.5'.
+
+$Id$
+"""
+
+# Todo on driver:
+#  - make it support external entities (wait for pyexpat.c)
+#  - enable configuration between reset() and feed() calls
+#  - support lexical events?
+#  - proper inputsource handling
+#  - properties and features
+
+# Todo on pyexpat.c:
+#  - support XML_ExternalEntityParserCreate
+#  - exceptions in callouts from pyexpat to python code lose position info
+
+version = "0.20"
+
+from string import split
+
+from xml.sax import xmlreader
+import pyexpat
+import xml.sax
+
+# --- ExpatParser
+
+class ExpatParser( xmlreader.IncrementalParser, xmlreader.Locator ):
+    "SAX driver for the Pyexpat C module."
+
+    def __init__(self, namespaceHandling=0, bufsize=2**16-20):
+        xmlreader.IncrementalParser.__init__(self, bufsize)
+        self._source = None
+        self._parser = None
+        self._namespaces = namespaceHandling
+        self._parsing = 0
+
+    # XMLReader methods
+
+    def parse(self, stream_or_string ):
+	"Parse an XML document from a URL."
+        if type( stream_or_string ) == type( "" ):
+            stream=open( stream_or_string )
+        else:
+            stream=stream_or_string
+ 
+        self.reset()
+        self._cont_handler.setDocumentLocator(self)
+        try:
+            xmlreader.IncrementalParser.parse(self, stream)
+        except pyexpat.error:
+            error_code = self._parser.ErrorCode
+            raise xml.sax.SAXParseException(pyexpat.ErrorString(error_code),
+                                           None, self)
+            
+            self._cont_handler.endDocument()
+
+    def prepareParser(self, filename=None):
+        self._source = filename
+        
+        if self._source != None:
+            self._parser.SetBase(self._source)
+        
+    def getFeature(self, name):
+        "Looks up and returns the state of a SAX2 feature."
+        raise SAXNotRecognizedException("Feature '%s' not recognized" % name)
+
+    def setFeature(self, name, state):
+        "Sets the state of a SAX2 feature."
+        raise SAXNotRecognizedException("Feature '%s' not recognized" % name)
+
+    def getProperty(self, name):
+        "Looks up and returns the value of a SAX2 property."
+        raise SAXNotRecognizedException("Property '%s' not recognized" % name)
+
+    def setProperty(self, name, value):
+        "Sets the value of a SAX2 property."
+        raise SAXNotRecognizedException("Property '%s' not recognized" % name)
+
+    # IncrementalParser methods
+
+    def feed(self, data):
+        if not self._parsing:
+            self._parsing=1
+            self.reset()
+            self._cont_handler.startDocument()
+        # FIXME: error checking and endDocument()
+        self._parser.Parse(data, 0)
+
+    def close(self):
+        if self._parsing:
+            self._cont_handler.endDocument()
+            self._parsing=0
+        self._parser.Parse("", 1)
+        
+    def reset(self):
+        if self._namespaces:
+            self._parser = pyexpat.ParserCreate(None, " ")
+            self._parser.StartElementHandler = self.start_element_ns
+            self._parser.EndElementHandler = self.end_element_ns
+        else:
+            self._parser = pyexpat.ParserCreate()
+            self._parser.StartElementHandler = self._cont_handler.startElement
+            self._parser.EndElementHandler = self._cont_handler.endElement
+
+        self._parser.ProcessingInstructionHandler = \
+                                    self._cont_handler.processingInstruction
+        self._parser.CharacterDataHandler = self._cont_handler.characters
+        self._parser.UnparsedEntityDeclHandler = self.unparsed_entity_decl
+        self._parser.NotationDeclHandler = self.notation_decl
+        self._parser.StartNamespaceDeclHandler = self.start_namespace_decl
+        self._parser.EndNamespaceDeclHandler = self.end_namespace_decl
+#         self._parser.CommentHandler = 
+#         self._parser.StartCdataSectionHandler = 
+#         self._parser.EndCdataSectionHandler = 
+#         self._parser.DefaultHandler = 
+#         self._parser.DefaultHandlerExpand = 
+#         self._parser.NotStandaloneHandler = 
+        self._parser.ExternalEntityRefHandler = self.external_entity_ref
+    
+    # Locator methods
+
+    def getColumnNumber(self):
+        return self._parser.ErrorColumnNumber
+
+    def getLineNumber(self):
+        return self._parser.ErrorLineNumber
+
+    def getPublicId(self):
+        return self._source.getPublicId()
+
+    def getSystemId(self):
+        return self._parser.GetBase()
+    
+    # internal methods
+
+    # event handlers
+
+    def start_element(self, name, attrs):
+        self._cont_handler.startElement(name, 
+                                 xmlreader.AttributesImpl(attrs, attrs))
+
+    def end_element(self, name):
+        self._cont_handler.endElement(name)
+
+    def start_element_ns(self, name, attrs):
+        pair = split(name)
+        if len(pair) == 1:
+            tup = (None, name, None)
+        else:
+            tup = pair+[None] # prefix is not implemented yet!
+
+        self._cont_handler.startElement(tup,
+                                        xmlreader.AttributesImpl(attrs, None))        
+
+    def end_element_ns(self, name):
+        pair = split(name)
+        if len(pair) == 1:
+            name = (None, name, None)
+        else:
+            name = pair+[None] # prefix is not implemented yet!
+            
+        self._cont_handler.endElement(name)
+
+    def processing_instruction(self, target, data):
+        self._cont_handler.processingInstruction(target, data)
+
+    def character_data(self, data):
+        self._cont_handler.characters(data)
+
+    def start_namespace_decl(self, prefix, uri):
+        self._cont_handler.startPrefixMapping(prefix, uri)
+
+    def end_namespace_decl(self, prefix):
+        self._cont_handler.endPrefixMapping(prefix)
+        
+    def unparsed_entity_decl(self, name, base, sysid, pubid, notation_name):
+        self._dtd_handler.unparsedEntityDecl(name, pubid, sysid, notation_name)
+
+    def notation_decl(self, name, base, sysid, pubid):
+        self._dtd_handler.notationDecl(name, pubid, sysid)
+
+    def external_entity_ref(self, context, base, sysid, pubid):
+        assert 0 # not implemented
+        source = self._ent_handler.resolveEntity(pubid, sysid)
+        source = saxutils.prepare_input_source(source)
+        # FIXME: create new parser, stack self._source and self._parser
+        # FIXME: reuse code from self.parse(...)
+        return 1
+        
+# ---
+        
+def create_parser(*args, **kwargs):
+    return apply( ExpatParser, args, kwargs )
+        
+# ---
+
+if __name__ == "__main__":
+    import xml.sax
+    p = create_parser()
+    p.setContentHandler(xml.sax.XMLGenerator())
+    p.setErrorHandler(xml.sax.ErrorHandler())
+    p.parse("../../../hamlet.xml")