|  | """ | 
|  | This module contains the core classes of version 2.0 of SAX for Python. | 
|  | This file provides only default classes with absolutely minimum | 
|  | functionality, from which drivers and applications can be subclassed. | 
|  |  | 
|  | Many of these classes are empty and are included only as documentation | 
|  | of the interfaces. | 
|  |  | 
|  | $Id$ | 
|  | """ | 
|  |  | 
|  | version = '2.0beta' | 
|  |  | 
|  | #============================================================================ | 
|  | # | 
|  | # HANDLER INTERFACES | 
|  | # | 
|  | #============================================================================ | 
|  |  | 
|  | # ===== ERRORHANDLER ===== | 
|  |  | 
|  | class ErrorHandler: | 
|  | """Basic interface for SAX error handlers. | 
|  |  | 
|  | If you create an object that implements this interface, then | 
|  | register the object with your XMLReader, the parser will call the | 
|  | methods in your object to report all warnings and errors. There | 
|  | are three levels of errors available: warnings, (possibly) | 
|  | recoverable errors, and unrecoverable errors. All methods take a | 
|  | SAXParseException as the only parameter.""" | 
|  |  | 
|  | def error(self, exception): | 
|  | "Handle a recoverable error." | 
|  | raise exception | 
|  |  | 
|  | def fatalError(self, exception): | 
|  | "Handle a non-recoverable error." | 
|  | raise exception | 
|  |  | 
|  | def warning(self, exception): | 
|  | "Handle a warning." | 
|  | print(exception) | 
|  |  | 
|  |  | 
|  | # ===== CONTENTHANDLER ===== | 
|  |  | 
|  | class ContentHandler: | 
|  | """Interface for receiving logical document content events. | 
|  |  | 
|  | This is the main callback interface in SAX, and the one most | 
|  | important to applications. The order of events in this interface | 
|  | mirrors the order of the information in the document.""" | 
|  |  | 
|  | def __init__(self): | 
|  | self._locator = None | 
|  |  | 
|  | def setDocumentLocator(self, locator): | 
|  | """Called by the parser to give the application a locator for | 
|  | locating the origin of document events. | 
|  |  | 
|  | SAX parsers are strongly encouraged (though not absolutely | 
|  | required) to supply a locator: if it does so, it must supply | 
|  | the locator to the application by invoking this method before | 
|  | invoking any of the other methods in the DocumentHandler | 
|  | interface. | 
|  |  | 
|  | The locator allows the application to determine the end | 
|  | position of any document-related event, even if the parser is | 
|  | not reporting an error. Typically, the application will use | 
|  | this information for reporting its own errors (such as | 
|  | character content that does not match an application's | 
|  | business rules). The information returned by the locator is | 
|  | probably not sufficient for use with a search engine. | 
|  |  | 
|  | Note that the locator will return correct information only | 
|  | during the invocation of the events in this interface. The | 
|  | application should not attempt to use it at any other time.""" | 
|  | self._locator = locator | 
|  |  | 
|  | def startDocument(self): | 
|  | """Receive notification of the beginning of a document. | 
|  |  | 
|  | The SAX parser will invoke this method only once, before any | 
|  | other methods in this interface or in DTDHandler (except for | 
|  | setDocumentLocator).""" | 
|  |  | 
|  | def endDocument(self): | 
|  | """Receive notification of the end of a document. | 
|  |  | 
|  | The SAX parser will invoke this method only once, and it will | 
|  | be the last method invoked during the parse. The parser shall | 
|  | not invoke this method until it has either abandoned parsing | 
|  | (because of an unrecoverable error) or reached the end of | 
|  | input.""" | 
|  |  | 
|  | def startPrefixMapping(self, prefix, uri): | 
|  | """Begin the scope of a prefix-URI Namespace mapping. | 
|  |  | 
|  | The information from this event is not necessary for normal | 
|  | Namespace processing: the SAX XML reader will automatically | 
|  | replace prefixes for element and attribute names when the | 
|  | http://xml.org/sax/features/namespaces feature is true (the | 
|  | default). | 
|  |  | 
|  | There are cases, however, when applications need to use | 
|  | prefixes in character data or in attribute values, where they | 
|  | cannot safely be expanded automatically; the | 
|  | start/endPrefixMapping event supplies the information to the | 
|  | application to expand prefixes in those contexts itself, if | 
|  | necessary. | 
|  |  | 
|  | Note that start/endPrefixMapping events are not guaranteed to | 
|  | be properly nested relative to each-other: all | 
|  | startPrefixMapping events will occur before the corresponding | 
|  | startElement event, and all endPrefixMapping events will occur | 
|  | after the corresponding endElement event, but their order is | 
|  | not guaranteed.""" | 
|  |  | 
|  | def endPrefixMapping(self, prefix): | 
|  | """End the scope of a prefix-URI mapping. | 
|  |  | 
|  | See startPrefixMapping for details. This event will always | 
|  | occur after the corresponding endElement event, but the order | 
|  | of endPrefixMapping events is not otherwise guaranteed.""" | 
|  |  | 
|  | def startElement(self, name, attrs): | 
|  | """Signals the start of an element in non-namespace mode. | 
|  |  | 
|  | The name parameter contains the raw XML 1.0 name of the | 
|  | element type as a string and the attrs parameter holds an | 
|  | instance of the Attributes class containing the attributes of | 
|  | the element.""" | 
|  |  | 
|  | def endElement(self, name): | 
|  | """Signals the end of an element in non-namespace mode. | 
|  |  | 
|  | The name parameter contains the name of the element type, just | 
|  | as with the startElement event.""" | 
|  |  | 
|  | def startElementNS(self, name, qname, attrs): | 
|  | """Signals the start of an element in namespace mode. | 
|  |  | 
|  | The name parameter contains the name of the element type as a | 
|  | (uri, localname) tuple, the qname parameter the raw XML 1.0 | 
|  | name used in the source document, and the attrs parameter | 
|  | holds an instance of the Attributes class containing the | 
|  | attributes of the element. | 
|  |  | 
|  | The uri part of the name tuple is None for elements which have | 
|  | no namespace.""" | 
|  |  | 
|  | def endElementNS(self, name, qname): | 
|  | """Signals the end of an element in namespace mode. | 
|  |  | 
|  | The name parameter contains the name of the element type, just | 
|  | as with the startElementNS event.""" | 
|  |  | 
|  | def characters(self, content): | 
|  | """Receive notification of character data. | 
|  |  | 
|  | The Parser will call this method to report each chunk of | 
|  | character data. SAX parsers may return all contiguous | 
|  | character data in a single chunk, or they may split it into | 
|  | several chunks; however, all of the characters in any single | 
|  | event must come from the same external entity so that the | 
|  | Locator provides useful information.""" | 
|  |  | 
|  | def ignorableWhitespace(self, whitespace): | 
|  | """Receive notification of ignorable whitespace in element content. | 
|  |  | 
|  | Validating Parsers must use this method to report each chunk | 
|  | of ignorable whitespace (see the W3C XML 1.0 recommendation, | 
|  | section 2.10): non-validating parsers may also use this method | 
|  | if they are capable of parsing and using content models. | 
|  |  | 
|  | SAX parsers may return all contiguous whitespace in a single | 
|  | chunk, or they may split it into several chunks; however, all | 
|  | of the characters in any single event must come from the same | 
|  | external entity, so that the Locator provides useful | 
|  | information.""" | 
|  |  | 
|  | def processingInstruction(self, target, data): | 
|  | """Receive notification of a processing instruction. | 
|  |  | 
|  | The Parser will invoke this method once for each processing | 
|  | instruction found: note that processing instructions may occur | 
|  | before or after the main document element. | 
|  |  | 
|  | A SAX parser should never report an XML declaration (XML 1.0, | 
|  | section 2.8) or a text declaration (XML 1.0, section 4.3.1) | 
|  | using this method.""" | 
|  |  | 
|  | def skippedEntity(self, name): | 
|  | """Receive notification of a skipped entity. | 
|  |  | 
|  | The Parser will invoke this method once for each entity | 
|  | skipped. Non-validating processors may skip entities if they | 
|  | have not seen the declarations (because, for example, the | 
|  | entity was declared in an external DTD subset). All processors | 
|  | may skip external entities, depending on the values of the | 
|  | http://xml.org/sax/features/external-general-entities and the | 
|  | http://xml.org/sax/features/external-parameter-entities | 
|  | properties.""" | 
|  |  | 
|  |  | 
|  | # ===== DTDHandler ===== | 
|  |  | 
|  | class DTDHandler: | 
|  | """Handle DTD events. | 
|  |  | 
|  | This interface specifies only those DTD events required for basic | 
|  | parsing (unparsed entities and attributes).""" | 
|  |  | 
|  | def notationDecl(self, name, publicId, systemId): | 
|  | "Handle a notation declaration event." | 
|  |  | 
|  | def unparsedEntityDecl(self, name, publicId, systemId, ndata): | 
|  | "Handle an unparsed entity declaration event." | 
|  |  | 
|  |  | 
|  | # ===== ENTITYRESOLVER ===== | 
|  |  | 
|  | class EntityResolver: | 
|  | """Basic interface for resolving entities. If you create an object | 
|  | implementing this interface, then register the object with your | 
|  | Parser, the parser will call the method in your object to | 
|  | resolve all external entities. Note that DefaultHandler implements | 
|  | this interface with the default behaviour.""" | 
|  |  | 
|  | def resolveEntity(self, publicId, systemId): | 
|  | """Resolve the system identifier of an entity and return either | 
|  | the system identifier to read from as a string, or an InputSource | 
|  | to read from.""" | 
|  | return systemId | 
|  |  | 
|  |  | 
|  | #============================================================================ | 
|  | # | 
|  | # CORE FEATURES | 
|  | # | 
|  | #============================================================================ | 
|  |  | 
|  | feature_namespaces = "http://xml.org/sax/features/namespaces" | 
|  | # true: Perform Namespace processing (default). | 
|  | # false: Optionally do not perform Namespace processing | 
|  | #        (implies namespace-prefixes). | 
|  | # access: (parsing) read-only; (not parsing) read/write | 
|  |  | 
|  | feature_namespace_prefixes = "http://xml.org/sax/features/namespace-prefixes" | 
|  | # true: Report the original prefixed names and attributes used for Namespace | 
|  | #       declarations. | 
|  | # false: Do not report attributes used for Namespace declarations, and | 
|  | #        optionally do not report original prefixed names (default). | 
|  | # access: (parsing) read-only; (not parsing) read/write | 
|  |  | 
|  | feature_string_interning = "http://xml.org/sax/features/string-interning" | 
|  | # true: All element names, prefixes, attribute names, Namespace URIs, and | 
|  | #       local names are interned using the built-in intern function. | 
|  | # false: Names are not necessarily interned, although they may be (default). | 
|  | # access: (parsing) read-only; (not parsing) read/write | 
|  |  | 
|  | feature_validation = "http://xml.org/sax/features/validation" | 
|  | # true: Report all validation errors (implies external-general-entities and | 
|  | #       external-parameter-entities). | 
|  | # false: Do not report validation errors. | 
|  | # access: (parsing) read-only; (not parsing) read/write | 
|  |  | 
|  | feature_external_ges = "http://xml.org/sax/features/external-general-entities" | 
|  | # true: Include all external general (text) entities. | 
|  | # false: Do not include external general entities. | 
|  | # access: (parsing) read-only; (not parsing) read/write | 
|  |  | 
|  | feature_external_pes = "http://xml.org/sax/features/external-parameter-entities" | 
|  | # true: Include all external parameter entities, including the external | 
|  | #       DTD subset. | 
|  | # false: Do not include any external parameter entities, even the external | 
|  | #        DTD subset. | 
|  | # access: (parsing) read-only; (not parsing) read/write | 
|  |  | 
|  | all_features = [feature_namespaces, | 
|  | feature_namespace_prefixes, | 
|  | feature_string_interning, | 
|  | feature_validation, | 
|  | feature_external_ges, | 
|  | feature_external_pes] | 
|  |  | 
|  |  | 
|  | #============================================================================ | 
|  | # | 
|  | # CORE PROPERTIES | 
|  | # | 
|  | #============================================================================ | 
|  |  | 
|  | property_lexical_handler = "http://xml.org/sax/properties/lexical-handler" | 
|  | # data type: xml.sax.sax2lib.LexicalHandler | 
|  | # description: An optional extension handler for lexical events like comments. | 
|  | # access: read/write | 
|  |  | 
|  | property_declaration_handler = "http://xml.org/sax/properties/declaration-handler" | 
|  | # data type: xml.sax.sax2lib.DeclHandler | 
|  | # description: An optional extension handler for DTD-related events other | 
|  | #              than notations and unparsed entities. | 
|  | # access: read/write | 
|  |  | 
|  | property_dom_node = "http://xml.org/sax/properties/dom-node" | 
|  | # data type: org.w3c.dom.Node | 
|  | # description: When parsing, the current DOM node being visited if this is | 
|  | #              a DOM iterator; when not parsing, the root DOM node for | 
|  | #              iteration. | 
|  | # access: (parsing) read-only; (not parsing) read/write | 
|  |  | 
|  | property_xml_string = "http://xml.org/sax/properties/xml-string" | 
|  | # data type: String | 
|  | # description: The literal string of characters that was the source for | 
|  | #              the current event. | 
|  | # access: read-only | 
|  |  | 
|  | property_encoding = "http://www.python.org/sax/properties/encoding" | 
|  | # data type: String | 
|  | # description: The name of the encoding to assume for input data. | 
|  | # access: write: set the encoding, e.g. established by a higher-level | 
|  | #                protocol. May change during parsing (e.g. after | 
|  | #                processing a META tag) | 
|  | #         read:  return the current encoding (possibly established through | 
|  | #                auto-detection. | 
|  | # initial value: UTF-8 | 
|  | # | 
|  |  | 
|  | property_interning_dict = "http://www.python.org/sax/properties/interning-dict" | 
|  | # data type: Dictionary | 
|  | # description: The dictionary used to intern common strings in the document | 
|  | # access: write: Request that the parser uses a specific dictionary, to | 
|  | #                allow interning across different documents | 
|  | #         read:  return the current interning dictionary, or None | 
|  | # | 
|  |  | 
|  | all_properties = [property_lexical_handler, | 
|  | property_dom_node, | 
|  | property_declaration_handler, | 
|  | property_xml_string, | 
|  | property_encoding, | 
|  | property_interning_dict] |