| Fred Drake | 45cd9de | 2000-06-29 19:34:54 +0000 | [diff] [blame] | 1 | """ | 
| Martin v. Löwis | 3f1b528 | 2003-01-25 16:51:50 +0000 | [diff] [blame] | 2 | SAX driver for the pyexpat C module.  This driver works with | 
| Lars Gustäbel | bb75713 | 2000-09-24 20:38:18 +0000 | [diff] [blame] | 3 | pyexpat.__version__ == '2.22'. | 
| Fred Drake | 45cd9de | 2000-06-29 19:34:54 +0000 | [diff] [blame] | 4 | """ | 
 | 5 |  | 
| Fred Drake | 45cd9de | 2000-06-29 19:34:54 +0000 | [diff] [blame] | 6 | version = "0.20" | 
 | 7 |  | 
| Fred Drake | fbdeaad | 2006-07-29 16:56:15 +0000 | [diff] [blame] | 8 | from xml.sax._exceptions import * | 
 | 9 | from xml.sax.handler import feature_validation, feature_namespaces | 
 | 10 | from xml.sax.handler import feature_namespace_prefixes | 
 | 11 | from xml.sax.handler import feature_external_ges, feature_external_pes | 
 | 12 | from xml.sax.handler import feature_string_interning | 
 | 13 | from xml.sax.handler import property_xml_string, property_interning_dict | 
| Martin v. Löwis | fb73bb1 | 2001-06-17 07:05:43 +0000 | [diff] [blame] | 14 |  | 
| Fred Drake | fbdeaad | 2006-07-29 16:56:15 +0000 | [diff] [blame] | 15 | # xml.parsers.expat does not raise ImportError in Jython | 
| Martin v. Löwis | fb73bb1 | 2001-06-17 07:05:43 +0000 | [diff] [blame] | 16 | import sys | 
| Fred Drake | c974bf4 | 2001-07-30 22:41:23 +0000 | [diff] [blame] | 17 | if sys.platform[:4] == "java": | 
| Martin v. Löwis | fb73bb1 | 2001-06-17 07:05:43 +0000 | [diff] [blame] | 18 |     raise SAXReaderNotAvailable("expat not available in Java", None) | 
 | 19 | del sys | 
 | 20 |  | 
| Martin v. Löwis | 962c9e7 | 2000-10-06 17:41:52 +0000 | [diff] [blame] | 21 | try: | 
| Fred Drake | fbdeaad | 2006-07-29 16:56:15 +0000 | [diff] [blame] | 22 |     from xml.parsers import expat | 
| Martin v. Löwis | 962c9e7 | 2000-10-06 17:41:52 +0000 | [diff] [blame] | 23 | except ImportError: | 
| Fred Drake | c974bf4 | 2001-07-30 22:41:23 +0000 | [diff] [blame] | 24 |     raise SAXReaderNotAvailable("expat not supported", None) | 
| Jeremy Hylton | e3c37d6 | 2001-07-30 21:49:22 +0000 | [diff] [blame] | 25 | else: | 
 | 26 |     if not hasattr(expat, "ParserCreate"): | 
| Fred Drake | c974bf4 | 2001-07-30 22:41:23 +0000 | [diff] [blame] | 27 |         raise SAXReaderNotAvailable("expat not supported", None) | 
| Fred Drake | fbdeaad | 2006-07-29 16:56:15 +0000 | [diff] [blame] | 28 | from xml.sax import xmlreader, saxutils, handler | 
| Fred Drake | 45cd9de | 2000-06-29 19:34:54 +0000 | [diff] [blame] | 29 |  | 
| Lars Gustäbel | 32bf12e | 2000-09-24 18:39:23 +0000 | [diff] [blame] | 30 | AttributesImpl = xmlreader.AttributesImpl | 
 | 31 | AttributesNSImpl = xmlreader.AttributesNSImpl | 
 | 32 |  | 
| Martin v. Löwis | 18476a3 | 2002-06-30 07:21:24 +0000 | [diff] [blame] | 33 | # If we're using a sufficiently recent version of Python, we can use | 
 | 34 | # weak references to avoid cycles between the parser and content | 
 | 35 | # handler, otherwise we'll just have to pretend. | 
 | 36 | try: | 
 | 37 |     import _weakref | 
 | 38 | except ImportError: | 
 | 39 |     def _mkproxy(o): | 
 | 40 |         return o | 
 | 41 | else: | 
 | 42 |     import weakref | 
 | 43 |     _mkproxy = weakref.proxy | 
 | 44 |     del weakref, _weakref | 
| Fred Drake | 012c81f | 2002-04-04 17:57:08 +0000 | [diff] [blame] | 45 |  | 
 | 46 | # --- ExpatLocator | 
 | 47 |  | 
 | 48 | class ExpatLocator(xmlreader.Locator): | 
 | 49 |     """Locator for use with the ExpatParser class. | 
 | 50 |  | 
 | 51 |     This uses a weak reference to the parser object to avoid creating | 
 | 52 |     a circular reference between the parser and the content handler. | 
 | 53 |     """ | 
 | 54 |     def __init__(self, parser): | 
| Martin v. Löwis | 18476a3 | 2002-06-30 07:21:24 +0000 | [diff] [blame] | 55 |         self._ref = _mkproxy(parser) | 
| Fred Drake | 012c81f | 2002-04-04 17:57:08 +0000 | [diff] [blame] | 56 |  | 
 | 57 |     def getColumnNumber(self): | 
| Martin v. Löwis | 18476a3 | 2002-06-30 07:21:24 +0000 | [diff] [blame] | 58 |         parser = self._ref | 
 | 59 |         if parser._parser is None: | 
| Fred Drake | 012c81f | 2002-04-04 17:57:08 +0000 | [diff] [blame] | 60 |             return None | 
 | 61 |         return parser._parser.ErrorColumnNumber | 
 | 62 |  | 
 | 63 |     def getLineNumber(self): | 
| Martin v. Löwis | 18476a3 | 2002-06-30 07:21:24 +0000 | [diff] [blame] | 64 |         parser = self._ref | 
 | 65 |         if parser._parser is None: | 
| Fred Drake | 012c81f | 2002-04-04 17:57:08 +0000 | [diff] [blame] | 66 |             return 1 | 
| Fred Drake | da204da | 2002-04-04 19:12:31 +0000 | [diff] [blame] | 67 |         return parser._parser.ErrorLineNumber | 
| Fred Drake | 012c81f | 2002-04-04 17:57:08 +0000 | [diff] [blame] | 68 |  | 
 | 69 |     def getPublicId(self): | 
| Martin v. Löwis | 18476a3 | 2002-06-30 07:21:24 +0000 | [diff] [blame] | 70 |         parser = self._ref | 
| Fred Drake | 012c81f | 2002-04-04 17:57:08 +0000 | [diff] [blame] | 71 |         if parser is None: | 
 | 72 |             return None | 
 | 73 |         return parser._source.getPublicId() | 
 | 74 |  | 
 | 75 |     def getSystemId(self): | 
| Martin v. Löwis | 18476a3 | 2002-06-30 07:21:24 +0000 | [diff] [blame] | 76 |         parser = self._ref | 
| Fred Drake | 012c81f | 2002-04-04 17:57:08 +0000 | [diff] [blame] | 77 |         if parser is None: | 
 | 78 |             return None | 
 | 79 |         return parser._source.getSystemId() | 
 | 80 |  | 
| Martin v. Löwis | 3f0969f | 2000-09-29 19:00:40 +0000 | [diff] [blame] | 81 |  | 
| Fred Drake | 45cd9de | 2000-06-29 19:34:54 +0000 | [diff] [blame] | 82 | # --- ExpatParser | 
 | 83 |  | 
| Fred Drake | ddb4867 | 2000-09-23 05:32:26 +0000 | [diff] [blame] | 84 | class ExpatParser(xmlreader.IncrementalParser, xmlreader.Locator): | 
| Martin v. Löwis | 3f1b528 | 2003-01-25 16:51:50 +0000 | [diff] [blame] | 85 |     """SAX driver for the pyexpat C module.""" | 
| Fred Drake | 45cd9de | 2000-06-29 19:34:54 +0000 | [diff] [blame] | 86 |  | 
 | 87 |     def __init__(self, namespaceHandling=0, bufsize=2**16-20): | 
 | 88 |         xmlreader.IncrementalParser.__init__(self, bufsize) | 
| Lars Gustäbel | e292a24 | 2000-09-24 20:19:45 +0000 | [diff] [blame] | 89 |         self._source = xmlreader.InputSource() | 
| Fred Drake | 45cd9de | 2000-06-29 19:34:54 +0000 | [diff] [blame] | 90 |         self._parser = None | 
 | 91 |         self._namespaces = namespaceHandling | 
| Martin v. Löwis | 0591725 | 2001-01-27 08:56:24 +0000 | [diff] [blame] | 92 |         self._lex_handler_prop = None | 
| Fred Drake | 45cd9de | 2000-06-29 19:34:54 +0000 | [diff] [blame] | 93 |         self._parsing = 0 | 
| Lars Gustäbel | e292a24 | 2000-09-24 20:19:45 +0000 | [diff] [blame] | 94 |         self._entity_stack = [] | 
| Martin v. Löwis | 18476a3 | 2002-06-30 07:21:24 +0000 | [diff] [blame] | 95 |         self._external_ges = 1 | 
 | 96 |         self._interning = None | 
| Fred Drake | 45cd9de | 2000-06-29 19:34:54 +0000 | [diff] [blame] | 97 |  | 
 | 98 |     # XMLReader methods | 
 | 99 |  | 
| Lars Gustäbel | 523b0a6 | 2000-09-24 18:54:49 +0000 | [diff] [blame] | 100 |     def parse(self, source): | 
| Lars Gustäbel | bb75713 | 2000-09-24 20:38:18 +0000 | [diff] [blame] | 101 |         "Parse an XML document from a URL or an InputSource." | 
| Lars Gustäbel | 523b0a6 | 2000-09-24 18:54:49 +0000 | [diff] [blame] | 102 |         source = saxutils.prepare_input_source(source) | 
 | 103 |  | 
 | 104 |         self._source = source | 
| Fred Drake | 45cd9de | 2000-06-29 19:34:54 +0000 | [diff] [blame] | 105 |         self.reset() | 
| Fred Drake | 012c81f | 2002-04-04 17:57:08 +0000 | [diff] [blame] | 106 |         self._cont_handler.setDocumentLocator(ExpatLocator(self)) | 
| Fred Drake | 16f6329 | 2000-10-23 18:09:50 +0000 | [diff] [blame] | 107 |         xmlreader.IncrementalParser.parse(self, source) | 
| Fred Drake | 45cd9de | 2000-06-29 19:34:54 +0000 | [diff] [blame] | 108 |  | 
| Lars Gustäbel | 523b0a6 | 2000-09-24 18:54:49 +0000 | [diff] [blame] | 109 |     def prepareParser(self, source): | 
| Benjamin Peterson | 5b63acd | 2008-03-29 15:24:25 +0000 | [diff] [blame] | 110 |         if source.getSystemId() is not None: | 
| Lars Gustäbel | 523b0a6 | 2000-09-24 18:54:49 +0000 | [diff] [blame] | 111 |             self._parser.SetBase(source.getSystemId()) | 
| Fred Drake | 16f6329 | 2000-10-23 18:09:50 +0000 | [diff] [blame] | 112 |  | 
| Martin v. Löwis | 3f1b528 | 2003-01-25 16:51:50 +0000 | [diff] [blame] | 113 |     # Redefined setContentHandler to allow changing handlers during parsing | 
| Martin v. Löwis | fb73bb1 | 2001-06-17 07:05:43 +0000 | [diff] [blame] | 114 |  | 
 | 115 |     def setContentHandler(self, handler): | 
 | 116 |         xmlreader.IncrementalParser.setContentHandler(self, handler) | 
 | 117 |         if self._parsing: | 
 | 118 |             self._reset_cont_handler() | 
 | 119 |  | 
| Fred Drake | 45cd9de | 2000-06-29 19:34:54 +0000 | [diff] [blame] | 120 |     def getFeature(self, name): | 
| Martin v. Löwis | 18476a3 | 2002-06-30 07:21:24 +0000 | [diff] [blame] | 121 |         if name == feature_namespaces: | 
| Lars Gustäbel | f43cf31 | 2000-09-24 18:29:24 +0000 | [diff] [blame] | 122 |             return self._namespaces | 
| Martin v. Löwis | 18476a3 | 2002-06-30 07:21:24 +0000 | [diff] [blame] | 123 |         elif name == feature_string_interning: | 
 | 124 |             return self._interning is not None | 
 | 125 |         elif name in (feature_validation, feature_external_pes, | 
 | 126 |                       feature_namespace_prefixes): | 
 | 127 |             return 0 | 
 | 128 |         elif name == feature_external_ges: | 
 | 129 |             return self._external_ges | 
| Fred Drake | 45cd9de | 2000-06-29 19:34:54 +0000 | [diff] [blame] | 130 |         raise SAXNotRecognizedException("Feature '%s' not recognized" % name) | 
 | 131 |  | 
 | 132 |     def setFeature(self, name, state): | 
| Lars Gustäbel | f43cf31 | 2000-09-24 18:29:24 +0000 | [diff] [blame] | 133 |         if self._parsing: | 
 | 134 |             raise SAXNotSupportedException("Cannot set features while parsing") | 
| Martin v. Löwis | 18476a3 | 2002-06-30 07:21:24 +0000 | [diff] [blame] | 135 |  | 
 | 136 |         if name == feature_namespaces: | 
| Lars Gustäbel | f43cf31 | 2000-09-24 18:29:24 +0000 | [diff] [blame] | 137 |             self._namespaces = state | 
| Martin v. Löwis | 18476a3 | 2002-06-30 07:21:24 +0000 | [diff] [blame] | 138 |         elif name == feature_external_ges: | 
 | 139 |             self._external_ges = state | 
 | 140 |         elif name == feature_string_interning: | 
 | 141 |             if state: | 
 | 142 |                 if self._interning is None: | 
 | 143 |                     self._interning = {} | 
 | 144 |             else: | 
 | 145 |                 self._interning = None | 
 | 146 |         elif name == feature_validation: | 
 | 147 |             if state: | 
| Martin v. Löwis | 3f1b528 | 2003-01-25 16:51:50 +0000 | [diff] [blame] | 148 |                 raise SAXNotSupportedException( | 
 | 149 |                     "expat does not support validation") | 
| Martin v. Löwis | 18476a3 | 2002-06-30 07:21:24 +0000 | [diff] [blame] | 150 |         elif name == feature_external_pes: | 
 | 151 |             if state: | 
| Martin v. Löwis | 3f1b528 | 2003-01-25 16:51:50 +0000 | [diff] [blame] | 152 |                 raise SAXNotSupportedException( | 
 | 153 |                     "expat does not read external parameter entities") | 
| Martin v. Löwis | 18476a3 | 2002-06-30 07:21:24 +0000 | [diff] [blame] | 154 |         elif name == feature_namespace_prefixes: | 
 | 155 |             if state: | 
| Martin v. Löwis | 3f1b528 | 2003-01-25 16:51:50 +0000 | [diff] [blame] | 156 |                 raise SAXNotSupportedException( | 
 | 157 |                     "expat does not report namespace prefixes") | 
| Lars Gustäbel | f43cf31 | 2000-09-24 18:29:24 +0000 | [diff] [blame] | 158 |         else: | 
| Martin v. Löwis | 3f1b528 | 2003-01-25 16:51:50 +0000 | [diff] [blame] | 159 |             raise SAXNotRecognizedException( | 
 | 160 |                 "Feature '%s' not recognized" % name) | 
| Fred Drake | 45cd9de | 2000-06-29 19:34:54 +0000 | [diff] [blame] | 161 |  | 
 | 162 |     def getProperty(self, name): | 
| Martin v. Löwis | 0591725 | 2001-01-27 08:56:24 +0000 | [diff] [blame] | 163 |         if name == handler.property_lexical_handler: | 
 | 164 |             return self._lex_handler_prop | 
| Martin v. Löwis | 18476a3 | 2002-06-30 07:21:24 +0000 | [diff] [blame] | 165 |         elif name == property_interning_dict: | 
 | 166 |             return self._interning | 
 | 167 |         elif name == property_xml_string: | 
 | 168 |             if self._parser: | 
 | 169 |                 if hasattr(self._parser, "GetInputContext"): | 
 | 170 |                     return self._parser.GetInputContext() | 
 | 171 |                 else: | 
| Martin v. Löwis | 3f1b528 | 2003-01-25 16:51:50 +0000 | [diff] [blame] | 172 |                     raise SAXNotRecognizedException( | 
 | 173 |                         "This version of expat does not support getting" | 
 | 174 |                         " the XML string") | 
| Martin v. Löwis | 18476a3 | 2002-06-30 07:21:24 +0000 | [diff] [blame] | 175 |             else: | 
| Martin v. Löwis | 3f1b528 | 2003-01-25 16:51:50 +0000 | [diff] [blame] | 176 |                 raise SAXNotSupportedException( | 
 | 177 |                     "XML string cannot be returned when not parsing") | 
| Fred Drake | 45cd9de | 2000-06-29 19:34:54 +0000 | [diff] [blame] | 178 |         raise SAXNotRecognizedException("Property '%s' not recognized" % name) | 
 | 179 |  | 
 | 180 |     def setProperty(self, name, value): | 
| Martin v. Löwis | 0591725 | 2001-01-27 08:56:24 +0000 | [diff] [blame] | 181 |         if name == handler.property_lexical_handler: | 
 | 182 |             self._lex_handler_prop = value | 
| Martin v. Löwis | fb73bb1 | 2001-06-17 07:05:43 +0000 | [diff] [blame] | 183 |             if self._parsing: | 
 | 184 |                 self._reset_lex_handler_prop() | 
| Martin v. Löwis | 18476a3 | 2002-06-30 07:21:24 +0000 | [diff] [blame] | 185 |         elif name == property_interning_dict: | 
 | 186 |             self._interning = value | 
 | 187 |         elif name == property_xml_string: | 
 | 188 |             raise SAXNotSupportedException("Property '%s' cannot be set" % | 
 | 189 |                                            name) | 
| Martin v. Löwis | 0591725 | 2001-01-27 08:56:24 +0000 | [diff] [blame] | 190 |         else: | 
| Martin v. Löwis | 18476a3 | 2002-06-30 07:21:24 +0000 | [diff] [blame] | 191 |             raise SAXNotRecognizedException("Property '%s' not recognized" % | 
 | 192 |                                             name) | 
| Fred Drake | 45cd9de | 2000-06-29 19:34:54 +0000 | [diff] [blame] | 193 |  | 
 | 194 |     # IncrementalParser methods | 
 | 195 |  | 
| Martin v. Löwis | ee1dc15 | 2000-10-06 21:08:59 +0000 | [diff] [blame] | 196 |     def feed(self, data, isFinal = 0): | 
| Fred Drake | 45cd9de | 2000-06-29 19:34:54 +0000 | [diff] [blame] | 197 |         if not self._parsing: | 
| Fred Drake | 45cd9de | 2000-06-29 19:34:54 +0000 | [diff] [blame] | 198 |             self.reset() | 
| Lars Gustäbel | 55b4efd | 2000-10-14 10:28:01 +0000 | [diff] [blame] | 199 |             self._parsing = 1 | 
| Fred Drake | 45cd9de | 2000-06-29 19:34:54 +0000 | [diff] [blame] | 200 |             self._cont_handler.startDocument() | 
| Lars Gustäbel | f43cf31 | 2000-09-24 18:29:24 +0000 | [diff] [blame] | 201 |  | 
| Martin v. Löwis | ee1dc15 | 2000-10-06 21:08:59 +0000 | [diff] [blame] | 202 |         try: | 
 | 203 |             # The isFinal parameter is internal to the expat reader. | 
 | 204 |             # If it is set to true, expat will check validity of the entire | 
 | 205 |             # document. When feeding chunks, they are not normally final - | 
 | 206 |             # except when invoked from close. | 
 | 207 |             self._parser.Parse(data, isFinal) | 
| Martin v. Löwis | 3f1b528 | 2003-01-25 16:51:50 +0000 | [diff] [blame] | 208 |         except expat.error, e: | 
 | 209 |             exc = SAXParseException(expat.ErrorString(e.code), e, self) | 
| Martin v. Löwis | 0591725 | 2001-01-27 08:56:24 +0000 | [diff] [blame] | 210 |             # FIXME: when to invoke error()? | 
| Martin v. Löwis | 04f4943 | 2000-10-09 16:45:54 +0000 | [diff] [blame] | 211 |             self._err_handler.fatalError(exc) | 
| Fred Drake | 45cd9de | 2000-06-29 19:34:54 +0000 | [diff] [blame] | 212 |  | 
 | 213 |     def close(self): | 
| Martin v. Löwis | ee1dc15 | 2000-10-06 21:08:59 +0000 | [diff] [blame] | 214 |         if self._entity_stack: | 
 | 215 |             # If we are completing an external entity, do nothing here | 
 | 216 |             return | 
 | 217 |         self.feed("", isFinal = 1) | 
 | 218 |         self._cont_handler.endDocument() | 
 | 219 |         self._parsing = 0 | 
| Martin v. Löwis | 0591725 | 2001-01-27 08:56:24 +0000 | [diff] [blame] | 220 |         # break cycle created by expat handlers pointing to our methods | 
 | 221 |         self._parser = None | 
| Fred Drake | 16f6329 | 2000-10-23 18:09:50 +0000 | [diff] [blame] | 222 |  | 
| Martin v. Löwis | fb73bb1 | 2001-06-17 07:05:43 +0000 | [diff] [blame] | 223 |     def _reset_cont_handler(self): | 
 | 224 |         self._parser.ProcessingInstructionHandler = \ | 
 | 225 |                                     self._cont_handler.processingInstruction | 
 | 226 |         self._parser.CharacterDataHandler = self._cont_handler.characters | 
 | 227 |  | 
 | 228 |     def _reset_lex_handler_prop(self): | 
| Martin v. Löwis | 3f1b528 | 2003-01-25 16:51:50 +0000 | [diff] [blame] | 229 |         lex = self._lex_handler_prop | 
 | 230 |         parser = self._parser | 
 | 231 |         if lex is None: | 
 | 232 |             parser.CommentHandler = None | 
 | 233 |             parser.StartCdataSectionHandler = None | 
 | 234 |             parser.EndCdataSectionHandler = None | 
 | 235 |             parser.StartDoctypeDeclHandler = None | 
 | 236 |             parser.EndDoctypeDeclHandler = None | 
 | 237 |         else: | 
 | 238 |             parser.CommentHandler = lex.comment | 
 | 239 |             parser.StartCdataSectionHandler = lex.startCDATA | 
 | 240 |             parser.EndCdataSectionHandler = lex.endCDATA | 
 | 241 |             parser.StartDoctypeDeclHandler = self.start_doctype_decl | 
 | 242 |             parser.EndDoctypeDeclHandler = lex.endDTD | 
| Martin v. Löwis | fb73bb1 | 2001-06-17 07:05:43 +0000 | [diff] [blame] | 243 |  | 
| Fred Drake | 45cd9de | 2000-06-29 19:34:54 +0000 | [diff] [blame] | 244 |     def reset(self): | 
 | 245 |         if self._namespaces: | 
| Andrew M. Kuchling | 593d6b3 | 2005-12-04 19:53:45 +0000 | [diff] [blame] | 246 |             self._parser = expat.ParserCreate(self._source.getEncoding(), " ", | 
| Martin v. Löwis | 3f1b528 | 2003-01-25 16:51:50 +0000 | [diff] [blame] | 247 |                                               intern=self._interning) | 
 | 248 |             self._parser.namespace_prefixes = 1 | 
| Fred Drake | 45cd9de | 2000-06-29 19:34:54 +0000 | [diff] [blame] | 249 |             self._parser.StartElementHandler = self.start_element_ns | 
 | 250 |             self._parser.EndElementHandler = self.end_element_ns | 
 | 251 |         else: | 
| Andrew M. Kuchling | 593d6b3 | 2005-12-04 19:53:45 +0000 | [diff] [blame] | 252 |             self._parser = expat.ParserCreate(self._source.getEncoding(), | 
 | 253 |                                               intern = self._interning) | 
| Paul Prescod | 6c4753f | 2000-07-04 03:39:33 +0000 | [diff] [blame] | 254 |             self._parser.StartElementHandler = self.start_element | 
 | 255 |             self._parser.EndElementHandler = self.end_element | 
| Fred Drake | 45cd9de | 2000-06-29 19:34:54 +0000 | [diff] [blame] | 256 |  | 
| Martin v. Löwis | fb73bb1 | 2001-06-17 07:05:43 +0000 | [diff] [blame] | 257 |         self._reset_cont_handler() | 
| Fred Drake | 45cd9de | 2000-06-29 19:34:54 +0000 | [diff] [blame] | 258 |         self._parser.UnparsedEntityDeclHandler = self.unparsed_entity_decl | 
 | 259 |         self._parser.NotationDeclHandler = self.notation_decl | 
 | 260 |         self._parser.StartNamespaceDeclHandler = self.start_namespace_decl | 
 | 261 |         self._parser.EndNamespaceDeclHandler = self.end_namespace_decl | 
| Martin v. Löwis | 70d39a6 | 2001-01-27 09:01:20 +0000 | [diff] [blame] | 262 |  | 
| Martin v. Löwis | 0591725 | 2001-01-27 08:56:24 +0000 | [diff] [blame] | 263 |         self._decl_handler_prop = None | 
 | 264 |         if self._lex_handler_prop: | 
| Martin v. Löwis | fb73bb1 | 2001-06-17 07:05:43 +0000 | [diff] [blame] | 265 |             self._reset_lex_handler_prop() | 
| Martin v. Löwis | 70d39a6 | 2001-01-27 09:01:20 +0000 | [diff] [blame] | 266 | #         self._parser.DefaultHandler = | 
 | 267 | #         self._parser.DefaultHandlerExpand = | 
 | 268 | #         self._parser.NotStandaloneHandler = | 
| Fred Drake | 45cd9de | 2000-06-29 19:34:54 +0000 | [diff] [blame] | 269 |         self._parser.ExternalEntityRefHandler = self.external_entity_ref | 
| Martin v. Löwis | 3f1b528 | 2003-01-25 16:51:50 +0000 | [diff] [blame] | 270 |         try: | 
 | 271 |             self._parser.SkippedEntityHandler = self.skipped_entity_handler | 
 | 272 |         except AttributeError: | 
 | 273 |             # This pyexpat does not support SkippedEntity | 
 | 274 |             pass | 
 | 275 |         self._parser.SetParamEntityParsing( | 
 | 276 |             expat.XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE) | 
| Lars Gustäbel | bb75713 | 2000-09-24 20:38:18 +0000 | [diff] [blame] | 277 |  | 
| Lars Gustäbel | 55b4efd | 2000-10-14 10:28:01 +0000 | [diff] [blame] | 278 |         self._parsing = 0 | 
| Lars Gustäbel | bb75713 | 2000-09-24 20:38:18 +0000 | [diff] [blame] | 279 |         self._entity_stack = [] | 
| Fred Drake | 16f6329 | 2000-10-23 18:09:50 +0000 | [diff] [blame] | 280 |  | 
| Fred Drake | 45cd9de | 2000-06-29 19:34:54 +0000 | [diff] [blame] | 281 |     # Locator methods | 
 | 282 |  | 
 | 283 |     def getColumnNumber(self): | 
| Martin v. Löwis | 0591725 | 2001-01-27 08:56:24 +0000 | [diff] [blame] | 284 |         if self._parser is None: | 
 | 285 |             return None | 
| Fred Drake | 45cd9de | 2000-06-29 19:34:54 +0000 | [diff] [blame] | 286 |         return self._parser.ErrorColumnNumber | 
 | 287 |  | 
 | 288 |     def getLineNumber(self): | 
| Martin v. Löwis | 0591725 | 2001-01-27 08:56:24 +0000 | [diff] [blame] | 289 |         if self._parser is None: | 
 | 290 |             return 1 | 
| Fred Drake | 45cd9de | 2000-06-29 19:34:54 +0000 | [diff] [blame] | 291 |         return self._parser.ErrorLineNumber | 
 | 292 |  | 
 | 293 |     def getPublicId(self): | 
 | 294 |         return self._source.getPublicId() | 
 | 295 |  | 
 | 296 |     def getSystemId(self): | 
| Martin v. Löwis | ee1dc15 | 2000-10-06 21:08:59 +0000 | [diff] [blame] | 297 |         return self._source.getSystemId() | 
| Fred Drake | 16f6329 | 2000-10-23 18:09:50 +0000 | [diff] [blame] | 298 |  | 
| Fred Drake | 45cd9de | 2000-06-29 19:34:54 +0000 | [diff] [blame] | 299 |     # event handlers | 
| Fred Drake | 45cd9de | 2000-06-29 19:34:54 +0000 | [diff] [blame] | 300 |     def start_element(self, name, attrs): | 
| Lars Gustäbel | 32bf12e | 2000-09-24 18:39:23 +0000 | [diff] [blame] | 301 |         self._cont_handler.startElement(name, AttributesImpl(attrs)) | 
| Fred Drake | 45cd9de | 2000-06-29 19:34:54 +0000 | [diff] [blame] | 302 |  | 
 | 303 |     def end_element(self, name): | 
| Lars Gustäbel | f43cf31 | 2000-09-24 18:29:24 +0000 | [diff] [blame] | 304 |         self._cont_handler.endElement(name) | 
| Fred Drake | 45cd9de | 2000-06-29 19:34:54 +0000 | [diff] [blame] | 305 |  | 
 | 306 |     def start_element_ns(self, name, attrs): | 
| Neal Norwitz | ab19962 | 2002-05-31 20:46:39 +0000 | [diff] [blame] | 307 |         pair = name.split() | 
| Fred Drake | 45cd9de | 2000-06-29 19:34:54 +0000 | [diff] [blame] | 308 |         if len(pair) == 1: | 
| Martin v. Löwis | 3f1b528 | 2003-01-25 16:51:50 +0000 | [diff] [blame] | 309 |             # no namespace | 
| Lars Gustäbel | f43cf31 | 2000-09-24 18:29:24 +0000 | [diff] [blame] | 310 |             pair = (None, name) | 
| Martin v. Löwis | 3f1b528 | 2003-01-25 16:51:50 +0000 | [diff] [blame] | 311 |         elif len(pair) == 3: | 
 | 312 |             pair = pair[0], pair[1] | 
| Lars Gustäbel | d2f5a9a | 2000-10-19 07:36:29 +0000 | [diff] [blame] | 313 |         else: | 
| Martin v. Löwis | 3f1b528 | 2003-01-25 16:51:50 +0000 | [diff] [blame] | 314 |             # default namespace | 
| Lars Gustäbel | d2f5a9a | 2000-10-19 07:36:29 +0000 | [diff] [blame] | 315 |             pair = tuple(pair) | 
| Fred Drake | 45cd9de | 2000-06-29 19:34:54 +0000 | [diff] [blame] | 316 |  | 
| Lars Gustäbel | 32bf12e | 2000-09-24 18:39:23 +0000 | [diff] [blame] | 317 |         newattrs = {} | 
| Martin v. Löwis | 3f1b528 | 2003-01-25 16:51:50 +0000 | [diff] [blame] | 318 |         qnames = {} | 
| Lars Gustäbel | 32bf12e | 2000-09-24 18:39:23 +0000 | [diff] [blame] | 319 |         for (aname, value) in attrs.items(): | 
| Martin v. Löwis | 3f1b528 | 2003-01-25 16:51:50 +0000 | [diff] [blame] | 320 |             parts = aname.split() | 
 | 321 |             length = len(parts) | 
 | 322 |             if length == 1: | 
 | 323 |                 # no namespace | 
 | 324 |                 qname = aname | 
| Lars Gustäbel | 32bf12e | 2000-09-24 18:39:23 +0000 | [diff] [blame] | 325 |                 apair = (None, aname) | 
| Martin v. Löwis | 3f1b528 | 2003-01-25 16:51:50 +0000 | [diff] [blame] | 326 |             elif length == 3: | 
 | 327 |                 qname = "%s:%s" % (parts[2], parts[1]) | 
 | 328 |                 apair = parts[0], parts[1] | 
| Lars Gustäbel | 32bf12e | 2000-09-24 18:39:23 +0000 | [diff] [blame] | 329 |             else: | 
| Martin v. Löwis | 3f1b528 | 2003-01-25 16:51:50 +0000 | [diff] [blame] | 330 |                 # default namespace | 
 | 331 |                 qname = parts[1] | 
 | 332 |                 apair = tuple(parts) | 
| Lars Gustäbel | 32bf12e | 2000-09-24 18:39:23 +0000 | [diff] [blame] | 333 |  | 
 | 334 |             newattrs[apair] = value | 
| Martin v. Löwis | 3f1b528 | 2003-01-25 16:51:50 +0000 | [diff] [blame] | 335 |             qnames[apair] = qname | 
| Lars Gustäbel | 32bf12e | 2000-09-24 18:39:23 +0000 | [diff] [blame] | 336 |  | 
| Fred Drake | 16f6329 | 2000-10-23 18:09:50 +0000 | [diff] [blame] | 337 |         self._cont_handler.startElementNS(pair, None, | 
| Martin v. Löwis | 3f1b528 | 2003-01-25 16:51:50 +0000 | [diff] [blame] | 338 |                                           AttributesNSImpl(newattrs, qnames)) | 
| Fred Drake | 45cd9de | 2000-06-29 19:34:54 +0000 | [diff] [blame] | 339 |  | 
 | 340 |     def end_element_ns(self, name): | 
| Neal Norwitz | ab19962 | 2002-05-31 20:46:39 +0000 | [diff] [blame] | 341 |         pair = name.split() | 
| Fred Drake | 45cd9de | 2000-06-29 19:34:54 +0000 | [diff] [blame] | 342 |         if len(pair) == 1: | 
| Lars Gustäbel | 32bf12e | 2000-09-24 18:39:23 +0000 | [diff] [blame] | 343 |             pair = (None, name) | 
| Martin v. Löwis | 3f1b528 | 2003-01-25 16:51:50 +0000 | [diff] [blame] | 344 |         elif len(pair) == 3: | 
 | 345 |             pair = pair[0], pair[1] | 
| Martin v. Löwis | 0591725 | 2001-01-27 08:56:24 +0000 | [diff] [blame] | 346 |         else: | 
 | 347 |             pair = tuple(pair) | 
| Fred Drake | 16f6329 | 2000-10-23 18:09:50 +0000 | [diff] [blame] | 348 |  | 
| Lars Gustäbel | f43cf31 | 2000-09-24 18:29:24 +0000 | [diff] [blame] | 349 |         self._cont_handler.endElementNS(pair, None) | 
| Fred Drake | 45cd9de | 2000-06-29 19:34:54 +0000 | [diff] [blame] | 350 |  | 
| Lars Gustäbel | f43cf31 | 2000-09-24 18:29:24 +0000 | [diff] [blame] | 351 |     # this is not used (call directly to ContentHandler) | 
| Fred Drake | 45cd9de | 2000-06-29 19:34:54 +0000 | [diff] [blame] | 352 |     def processing_instruction(self, target, data): | 
 | 353 |         self._cont_handler.processingInstruction(target, data) | 
 | 354 |  | 
| Lars Gustäbel | f43cf31 | 2000-09-24 18:29:24 +0000 | [diff] [blame] | 355 |     # this is not used (call directly to ContentHandler) | 
| Fred Drake | 45cd9de | 2000-06-29 19:34:54 +0000 | [diff] [blame] | 356 |     def character_data(self, data): | 
 | 357 |         self._cont_handler.characters(data) | 
 | 358 |  | 
 | 359 |     def start_namespace_decl(self, prefix, uri): | 
 | 360 |         self._cont_handler.startPrefixMapping(prefix, uri) | 
 | 361 |  | 
 | 362 |     def end_namespace_decl(self, prefix): | 
 | 363 |         self._cont_handler.endPrefixMapping(prefix) | 
| Fred Drake | 16f6329 | 2000-10-23 18:09:50 +0000 | [diff] [blame] | 364 |  | 
| Martin v. Löwis | 456ab1d | 2004-05-06 01:54:36 +0000 | [diff] [blame] | 365 |     def start_doctype_decl(self, name, sysid, pubid, has_internal_subset): | 
| Martin v. Löwis | 3f1b528 | 2003-01-25 16:51:50 +0000 | [diff] [blame] | 366 |         self._lex_handler_prop.startDTD(name, pubid, sysid) | 
 | 367 |  | 
| Fred Drake | 45cd9de | 2000-06-29 19:34:54 +0000 | [diff] [blame] | 368 |     def unparsed_entity_decl(self, name, base, sysid, pubid, notation_name): | 
 | 369 |         self._dtd_handler.unparsedEntityDecl(name, pubid, sysid, notation_name) | 
 | 370 |  | 
 | 371 |     def notation_decl(self, name, base, sysid, pubid): | 
 | 372 |         self._dtd_handler.notationDecl(name, pubid, sysid) | 
 | 373 |  | 
 | 374 |     def external_entity_ref(self, context, base, sysid, pubid): | 
| Martin v. Löwis | 18476a3 | 2002-06-30 07:21:24 +0000 | [diff] [blame] | 375 |         if not self._external_ges: | 
 | 376 |             return 1 | 
| Martin v. Löwis | d1b516c | 2002-06-30 07:27:30 +0000 | [diff] [blame] | 377 |  | 
| Fred Drake | 45cd9de | 2000-06-29 19:34:54 +0000 | [diff] [blame] | 378 |         source = self._ent_handler.resolveEntity(pubid, sysid) | 
| Lars Gustäbel | e292a24 | 2000-09-24 20:19:45 +0000 | [diff] [blame] | 379 |         source = saxutils.prepare_input_source(source, | 
 | 380 |                                                self._source.getSystemId() or | 
 | 381 |                                                "") | 
| Fred Drake | 16f6329 | 2000-10-23 18:09:50 +0000 | [diff] [blame] | 382 |  | 
| Lars Gustäbel | e292a24 | 2000-09-24 20:19:45 +0000 | [diff] [blame] | 383 |         self._entity_stack.append((self._parser, self._source)) | 
 | 384 |         self._parser = self._parser.ExternalEntityParserCreate(context) | 
 | 385 |         self._source = source | 
 | 386 |  | 
 | 387 |         try: | 
 | 388 |             xmlreader.IncrementalParser.parse(self, source) | 
| Lars Gustäbel | e292a24 | 2000-09-24 20:19:45 +0000 | [diff] [blame] | 389 |         except: | 
 | 390 |             return 0  # FIXME: save error info here? | 
 | 391 |  | 
 | 392 |         (self._parser, self._source) = self._entity_stack[-1] | 
 | 393 |         del self._entity_stack[-1] | 
| Fred Drake | 45cd9de | 2000-06-29 19:34:54 +0000 | [diff] [blame] | 394 |         return 1 | 
| Fred Drake | 16f6329 | 2000-10-23 18:09:50 +0000 | [diff] [blame] | 395 |  | 
| Martin v. Löwis | 3f1b528 | 2003-01-25 16:51:50 +0000 | [diff] [blame] | 396 |     def skipped_entity_handler(self, name, is_pe): | 
 | 397 |         if is_pe: | 
 | 398 |             # The SAX spec requires to report skipped PEs with a '%' | 
 | 399 |             name = '%'+name | 
 | 400 |         self._cont_handler.skippedEntity(name) | 
 | 401 |  | 
| Fred Drake | 45cd9de | 2000-06-29 19:34:54 +0000 | [diff] [blame] | 402 | # --- | 
| Fred Drake | 16f6329 | 2000-10-23 18:09:50 +0000 | [diff] [blame] | 403 |  | 
| Fred Drake | 45cd9de | 2000-06-29 19:34:54 +0000 | [diff] [blame] | 404 | def create_parser(*args, **kwargs): | 
| Guido van Rossum | 68468eb | 2003-02-27 20:14:51 +0000 | [diff] [blame] | 405 |     return ExpatParser(*args, **kwargs) | 
| Fred Drake | 16f6329 | 2000-10-23 18:09:50 +0000 | [diff] [blame] | 406 |  | 
| Fred Drake | 45cd9de | 2000-06-29 19:34:54 +0000 | [diff] [blame] | 407 | # --- | 
 | 408 |  | 
 | 409 | if __name__ == "__main__": | 
| Fred Drake | fbdeaad | 2006-07-29 16:56:15 +0000 | [diff] [blame] | 410 |     import xml.sax | 
| Fred Drake | 45cd9de | 2000-06-29 19:34:54 +0000 | [diff] [blame] | 411 |     p = create_parser() | 
| Fred Drake | fbdeaad | 2006-07-29 16:56:15 +0000 | [diff] [blame] | 412 |     p.setContentHandler(xml.sax.XMLGenerator()) | 
 | 413 |     p.setErrorHandler(xml.sax.ErrorHandler()) | 
| Fred Drake | 45cd9de | 2000-06-29 19:34:54 +0000 | [diff] [blame] | 414 |     p.parse("../../../hamlet.xml") |