blob: 40c9121d2bddae84142d0f8ea65023f80d84b314 [file] [log] [blame]
Fred Drake3843bae1998-12-01 19:00:58 +00001"""Miscellaneous utility functions useful for dealing with ESIS streams."""
2__version__ = '$Revision$'
3
4import re
5import string
Fred Drakef6c115f2001-03-23 16:42:08 +00006
7import xml.dom.pulldom
8
9import xml.sax
10import xml.sax.handler
11import xml.sax.xmlreader
Fred Drake3843bae1998-12-01 19:00:58 +000012
13
Fred Drakef6c115f2001-03-23 16:42:08 +000014_data_match = re.compile(r"[^\\][^\\]*").match
Fred Drake3843bae1998-12-01 19:00:58 +000015
16def decode(s):
17 r = ''
18 while s:
Fred Drakef6c115f2001-03-23 16:42:08 +000019 m = _data_match(s)
Fred Drake3843bae1998-12-01 19:00:58 +000020 if m:
21 r = r + m.group()
Fred Drakef6c115f2001-03-23 16:42:08 +000022 s = s[m.end():]
Fred Drake3843bae1998-12-01 19:00:58 +000023 elif s[1] == "\\":
24 r = r + "\\"
25 s = s[2:]
26 elif s[1] == "n":
27 r = r + "\n"
28 s = s[2:]
Fred Drakef6c115f2001-03-23 16:42:08 +000029 elif s[1] == "%":
30 s = s[2:]
31 n, s = s.split(";", 1)
32 r = r + unichr(int(n))
Fred Drake3843bae1998-12-01 19:00:58 +000033 else:
34 raise ValueError, "can't handle " + `s`
35 return r
36
37
38_charmap = {}
39for c in map(chr, range(256)):
40 _charmap[c] = c
41_charmap["\n"] = r"\n"
42_charmap["\\"] = r"\\"
43del c
44
Fred Drakef6c115f2001-03-23 16:42:08 +000045_null_join = ''.join
Fred Drake3843bae1998-12-01 19:00:58 +000046def encode(s):
Fred Drakef6c115f2001-03-23 16:42:08 +000047 return _null_join(map(_charmap.get, s))
Fred Drake3843bae1998-12-01 19:00:58 +000048
49
Fred Drakef6c115f2001-03-23 16:42:08 +000050class ESISReader(xml.sax.xmlreader.XMLReader):
51 """SAX Reader which reads from an ESIS stream.
Fred Drake3843bae1998-12-01 19:00:58 +000052
Fred Drakef6c115f2001-03-23 16:42:08 +000053 No verification of the document structure is performed by the
54 reader; a general verifier could be used as the target
55 ContentHandler instance.
56
57 """
58 _decl_handler = None
59 _lexical_handler = None
60
61 _public_id = None
62 _system_id = None
63
64 _buffer = ""
65 _is_empty = 0
66 _lineno = 0
67 _started = 0
68
69 def __init__(self, contentHandler=None, errorHandler=None):
70 xml.sax.xmlreader.XMLReader.__init__(self)
71 self._attrs = {}
72 self._attributes = Attributes(self._attrs)
73 self._locator = Locator()
74 self._empties = {}
75 if contentHandler:
76 self.setContentHandler(contentHandler)
77 if errorHandler:
78 self.setErrorHandler(errorHandler)
Fred Drakeabb158f1999-08-26 18:04:32 +000079
Fred Drake3843bae1998-12-01 19:00:58 +000080 def get_empties(self):
Fred Drakef6c115f2001-03-23 16:42:08 +000081 return self._empties.keys()
82
83 #
84 # XMLReader interface
85 #
86
87 def parse(self, source):
88 raise RuntimeError
89 self._locator._public_id = source.getPublicId()
90 self._locator._system_id = source.getSystemId()
91 fp = source.getByteStream()
92 handler = self.getContentHandler()
93 if handler:
94 handler.startDocument()
95 lineno = 0
96 while 1:
97 token, data = self._get_token(fp)
98 if token is None:
99 break
100 lineno = lineno + 1
101 self._locator._lineno = lineno
102 self._handle_token(token, data)
103 handler = self.getContentHandler()
104 if handler:
105 handler.startDocument()
106
107 def feed(self, data):
108 if not self._started:
109 handler = self.getContentHandler()
110 if handler:
111 handler.startDocument()
112 self._started = 1
113 data = self._buffer + data
114 self._buffer = None
115 lines = data.split("\n")
116 if lines:
117 for line in lines[:-1]:
118 self._lineno = self._lineno + 1
119 self._locator._lineno = self._lineno
120 if not line:
121 e = xml.sax.SAXParseException(
122 "ESIS input line contains no token type mark",
123 None, self._locator)
124 self.getErrorHandler().error(e)
125 else:
126 self._handle_token(line[0], line[1:])
127 self._buffer = lines[-1]
128 else:
129 self._buffer = ""
130
131 def close(self):
132 handler = self.getContentHandler()
133 if handler:
134 handler.endDocument()
135 self._buffer = ""
136
137 def _get_token(self, fp):
138 try:
139 line = fp.readline()
140 except IOError, e:
141 e = SAXException("I/O error reading input stream", e)
142 self.getErrorHandler().fatalError(e)
143 return
144 if not line:
145 return None, None
146 if line[-1] == "\n":
147 line = line[:-1]
148 if not line:
149 e = xml.sax.SAXParseException(
150 "ESIS input line contains no token type mark",
151 None, self._locator)
152 self.getErrorHandler().error(e)
153 return
154 return line[0], line[1:]
155
156 def _handle_token(self, token, data):
157 handler = self.getContentHandler()
158 if token == '-':
159 if data and handler:
160 handler.characters(decode(data))
161 elif token == ')':
162 if handler:
163 handler.endElement(decode(data))
164 elif token == '(':
165 if self._is_empty:
166 self._empties[data] = 1
167 if handler:
168 handler.startElement(data, self._attributes)
169 self._attrs.clear()
170 self._is_empty = 0
171 elif token == 'A':
172 name, value = data.split(' ', 1)
173 if value != "IMPLIED":
174 type, value = value.split(' ', 1)
175 self._attrs[name] = (decode(value), type)
176 elif token == '&':
177 # entity reference in SAX?
178 pass
179 elif token == '?':
180 if handler:
181 if ' ' in data:
182 target, data = string.split(data, None, 1)
183 else:
184 target, data = data, ""
185 handler.processingInstruction(target, decode(data))
186 elif token == 'N':
187 handler = self.getDTDHandler()
188 if handler:
189 handler.notationDecl(data, self._public_id, self._system_id)
190 self._public_id = None
191 self._system_id = None
192 elif token == 'p':
193 self._public_id = decode(data)
194 elif token == 's':
195 self._system_id = decode(data)
196 elif token == 'e':
197 self._is_empty = 1
198 elif token == 'C':
199 pass
200 else:
201 e = SAXParseException("unknown ESIS token in event stream",
202 None, self._locator)
203 self.getErrorHandler().error(e)
204
205 def setContentHandler(self, handler):
206 old = self.getContentHandler()
207 if old:
208 old.setDocumentLocator(None)
209 if handler:
210 handler.setDocumentLocator(self._locator)
211 xml.sax.xmlreader.XMLReader.setContentHandler(self, handler)
212
213 def getProperty(self, property):
214 if property == xml.sax.handler.property_lexical_handler:
215 return self._lexical_handler
216
217 elif property == xml.sax.handler.property_declaration_handler:
218 return self._decl_handler
219
220 else:
221 raise xml.sax.SAXNotRecognizedException("unknown property %s"
222 % `property`)
223
224 def setProperty(self, property, value):
225 if property == xml.sax.handler.property_lexical_handler:
226 if self._lexical_handler:
227 self._lexical_handler.setDocumentLocator(None)
228 if value:
229 value.setDocumentLocator(self._locator)
230 self._lexical_handler = value
231
232 elif property == xml.sax.handler.property_declaration_handler:
233 if self._decl_handler:
234 self._decl_handler.setDocumentLocator(None)
235 if value:
236 value.setDocumentLocator(self._locator)
237 self._decl_handler = value
238
239 else:
240 raise xml.sax.SAXNotRecognizedException()
241
242 def getFeature(self, feature):
243 if feature == xml.sax.handler.feature_namespaces:
244 return 1
245 else:
246 return xml.sax.xmlreader.XMLReader.getFeature(self, feature)
247
248 def setFeature(self, feature, enabled):
249 if feature == xml.sax.handler.feature_namespaces:
250 pass
251 else:
252 xml.sax.xmlreader.XMLReader.setFeature(self, feature, enabled)
253
254
255class Attributes(xml.sax.xmlreader.AttributesImpl):
256 # self._attrs has the form {name: (value, type)}
257
258 def getType(self, name):
259 return self._attrs[name][1]
260
261 def getValue(self, name):
262 return self._attrs[name][0]
263
264 def getValueByQName(self, name):
265 return self._attrs[name][0]
266
267 def __getitem__(self, name):
268 return self._attrs[name][0]
269
270 def get(self, name, default=None):
271 if self._attrs.has_key(name):
272 return self._attrs[name][0]
273 return default
274
275 def items(self):
276 L = []
277 for name, (value, type) in self._attrs.items():
278 L.append((name, value))
279 return L
280
281 def values(self):
282 L = []
283 for value, type in self._attrs.values():
284 L.append(value)
285 return L
286
287
288class Locator(xml.sax.xmlreader.Locator):
289 _lineno = -1
290 _public_id = None
291 _system_id = None
292
293 def getLineNumber(self):
294 return self._lineno
295
296 def getPublicId(self):
297 return self._public_id
298
299 def getSystemId(self):
300 return self._system_id
301
302
303def parse(stream_or_string, parser=None):
304 if type(stream_or_string) in [type(""), type(u"")]:
305 stream = open(stream_or_string)
306 else:
307 stream = stream_or_string
308 if not parser:
309 parser = ESISReader()
310 return xml.dom.pulldom.DOMEventStream(stream, parser, (2 ** 14) - 20)