blob: 893af762ad4474da2e020fe9a4974bf58dfd1aa3 [file] [log] [blame]
Fred Drake3843bae1998-12-01 19:00:58 +00001"""Miscellaneous utility functions useful for dealing with ESIS streams."""
Fred Drake3843bae1998-12-01 19:00:58 +00002
3import re
4import string
Fred Drakef6c115f2001-03-23 16:42:08 +00005
6import xml.dom.pulldom
7
8import xml.sax
9import xml.sax.handler
10import xml.sax.xmlreader
Fred Drake3843bae1998-12-01 19:00:58 +000011
12
Fred Drakef6c115f2001-03-23 16:42:08 +000013_data_match = re.compile(r"[^\\][^\\]*").match
Fred Drake3843bae1998-12-01 19:00:58 +000014
15def decode(s):
16 r = ''
17 while s:
Fred Drakef6c115f2001-03-23 16:42:08 +000018 m = _data_match(s)
Fred Drake3843bae1998-12-01 19:00:58 +000019 if m:
20 r = r + m.group()
Fred Drakef6c115f2001-03-23 16:42:08 +000021 s = s[m.end():]
Fred Drake3843bae1998-12-01 19:00:58 +000022 elif s[1] == "\\":
23 r = r + "\\"
24 s = s[2:]
25 elif s[1] == "n":
26 r = r + "\n"
27 s = s[2:]
Fred Drakef6c115f2001-03-23 16:42:08 +000028 elif s[1] == "%":
29 s = s[2:]
30 n, s = s.split(";", 1)
31 r = r + unichr(int(n))
Fred Drake3843bae1998-12-01 19:00:58 +000032 else:
33 raise ValueError, "can't handle " + `s`
34 return r
35
36
37_charmap = {}
38for c in map(chr, range(256)):
39 _charmap[c] = c
40_charmap["\n"] = r"\n"
41_charmap["\\"] = r"\\"
42del c
43
Fred Drakef6c115f2001-03-23 16:42:08 +000044_null_join = ''.join
Fred Drake3843bae1998-12-01 19:00:58 +000045def encode(s):
Fred Drakef6c115f2001-03-23 16:42:08 +000046 return _null_join(map(_charmap.get, s))
Fred Drake3843bae1998-12-01 19:00:58 +000047
48
Fred Drakef6c115f2001-03-23 16:42:08 +000049class ESISReader(xml.sax.xmlreader.XMLReader):
50 """SAX Reader which reads from an ESIS stream.
Fred Drake3843bae1998-12-01 19:00:58 +000051
Fred Drakef6c115f2001-03-23 16:42:08 +000052 No verification of the document structure is performed by the
53 reader; a general verifier could be used as the target
54 ContentHandler instance.
55
56 """
57 _decl_handler = None
58 _lexical_handler = None
59
60 _public_id = None
61 _system_id = None
62
63 _buffer = ""
64 _is_empty = 0
65 _lineno = 0
66 _started = 0
67
68 def __init__(self, contentHandler=None, errorHandler=None):
69 xml.sax.xmlreader.XMLReader.__init__(self)
70 self._attrs = {}
71 self._attributes = Attributes(self._attrs)
72 self._locator = Locator()
73 self._empties = {}
74 if contentHandler:
75 self.setContentHandler(contentHandler)
76 if errorHandler:
77 self.setErrorHandler(errorHandler)
Fred Drakeabb158f1999-08-26 18:04:32 +000078
Fred Drake3843bae1998-12-01 19:00:58 +000079 def get_empties(self):
Fred Drakef6c115f2001-03-23 16:42:08 +000080 return self._empties.keys()
81
82 #
83 # XMLReader interface
84 #
85
86 def parse(self, source):
87 raise RuntimeError
88 self._locator._public_id = source.getPublicId()
89 self._locator._system_id = source.getSystemId()
90 fp = source.getByteStream()
91 handler = self.getContentHandler()
92 if handler:
93 handler.startDocument()
94 lineno = 0
95 while 1:
96 token, data = self._get_token(fp)
97 if token is None:
98 break
99 lineno = lineno + 1
100 self._locator._lineno = lineno
101 self._handle_token(token, data)
102 handler = self.getContentHandler()
103 if handler:
104 handler.startDocument()
105
106 def feed(self, data):
107 if not self._started:
108 handler = self.getContentHandler()
109 if handler:
110 handler.startDocument()
111 self._started = 1
112 data = self._buffer + data
113 self._buffer = None
114 lines = data.split("\n")
115 if lines:
116 for line in lines[:-1]:
117 self._lineno = self._lineno + 1
118 self._locator._lineno = self._lineno
119 if not line:
120 e = xml.sax.SAXParseException(
121 "ESIS input line contains no token type mark",
122 None, self._locator)
123 self.getErrorHandler().error(e)
124 else:
125 self._handle_token(line[0], line[1:])
126 self._buffer = lines[-1]
127 else:
128 self._buffer = ""
129
130 def close(self):
131 handler = self.getContentHandler()
132 if handler:
133 handler.endDocument()
134 self._buffer = ""
135
136 def _get_token(self, fp):
137 try:
138 line = fp.readline()
139 except IOError, e:
140 e = SAXException("I/O error reading input stream", e)
141 self.getErrorHandler().fatalError(e)
142 return
143 if not line:
144 return None, None
145 if line[-1] == "\n":
146 line = line[:-1]
147 if not line:
148 e = xml.sax.SAXParseException(
149 "ESIS input line contains no token type mark",
150 None, self._locator)
151 self.getErrorHandler().error(e)
152 return
153 return line[0], line[1:]
154
155 def _handle_token(self, token, data):
156 handler = self.getContentHandler()
157 if token == '-':
158 if data and handler:
159 handler.characters(decode(data))
160 elif token == ')':
161 if handler:
162 handler.endElement(decode(data))
163 elif token == '(':
164 if self._is_empty:
165 self._empties[data] = 1
166 if handler:
167 handler.startElement(data, self._attributes)
168 self._attrs.clear()
169 self._is_empty = 0
170 elif token == 'A':
171 name, value = data.split(' ', 1)
172 if value != "IMPLIED":
173 type, value = value.split(' ', 1)
174 self._attrs[name] = (decode(value), type)
175 elif token == '&':
176 # entity reference in SAX?
177 pass
178 elif token == '?':
179 if handler:
180 if ' ' in data:
181 target, data = string.split(data, None, 1)
182 else:
183 target, data = data, ""
184 handler.processingInstruction(target, decode(data))
185 elif token == 'N':
186 handler = self.getDTDHandler()
187 if handler:
188 handler.notationDecl(data, self._public_id, self._system_id)
189 self._public_id = None
190 self._system_id = None
191 elif token == 'p':
192 self._public_id = decode(data)
193 elif token == 's':
194 self._system_id = decode(data)
195 elif token == 'e':
196 self._is_empty = 1
197 elif token == 'C':
198 pass
199 else:
200 e = SAXParseException("unknown ESIS token in event stream",
201 None, self._locator)
202 self.getErrorHandler().error(e)
203
204 def setContentHandler(self, handler):
205 old = self.getContentHandler()
206 if old:
207 old.setDocumentLocator(None)
208 if handler:
209 handler.setDocumentLocator(self._locator)
210 xml.sax.xmlreader.XMLReader.setContentHandler(self, handler)
211
212 def getProperty(self, property):
213 if property == xml.sax.handler.property_lexical_handler:
214 return self._lexical_handler
215
216 elif property == xml.sax.handler.property_declaration_handler:
217 return self._decl_handler
218
219 else:
220 raise xml.sax.SAXNotRecognizedException("unknown property %s"
221 % `property`)
222
223 def setProperty(self, property, value):
224 if property == xml.sax.handler.property_lexical_handler:
225 if self._lexical_handler:
226 self._lexical_handler.setDocumentLocator(None)
227 if value:
228 value.setDocumentLocator(self._locator)
229 self._lexical_handler = value
230
231 elif property == xml.sax.handler.property_declaration_handler:
232 if self._decl_handler:
233 self._decl_handler.setDocumentLocator(None)
234 if value:
235 value.setDocumentLocator(self._locator)
236 self._decl_handler = value
237
238 else:
239 raise xml.sax.SAXNotRecognizedException()
240
241 def getFeature(self, feature):
242 if feature == xml.sax.handler.feature_namespaces:
243 return 1
244 else:
245 return xml.sax.xmlreader.XMLReader.getFeature(self, feature)
246
247 def setFeature(self, feature, enabled):
248 if feature == xml.sax.handler.feature_namespaces:
249 pass
250 else:
251 xml.sax.xmlreader.XMLReader.setFeature(self, feature, enabled)
252
253
254class Attributes(xml.sax.xmlreader.AttributesImpl):
255 # self._attrs has the form {name: (value, type)}
256
257 def getType(self, name):
258 return self._attrs[name][1]
259
260 def getValue(self, name):
261 return self._attrs[name][0]
262
263 def getValueByQName(self, name):
264 return self._attrs[name][0]
265
266 def __getitem__(self, name):
267 return self._attrs[name][0]
268
269 def get(self, name, default=None):
270 if self._attrs.has_key(name):
271 return self._attrs[name][0]
272 return default
273
274 def items(self):
275 L = []
276 for name, (value, type) in self._attrs.items():
277 L.append((name, value))
278 return L
279
280 def values(self):
281 L = []
282 for value, type in self._attrs.values():
283 L.append(value)
284 return L
285
286
287class Locator(xml.sax.xmlreader.Locator):
288 _lineno = -1
289 _public_id = None
290 _system_id = None
291
292 def getLineNumber(self):
293 return self._lineno
294
295 def getPublicId(self):
296 return self._public_id
297
298 def getSystemId(self):
299 return self._system_id
300
301
302def parse(stream_or_string, parser=None):
303 if type(stream_or_string) in [type(""), type(u"")]:
304 stream = open(stream_or_string)
305 else:
306 stream = stream_or_string
307 if not parser:
308 parser = ESISReader()
309 return xml.dom.pulldom.DOMEventStream(stream, parser, (2 ** 14) - 20)