blob: 7e24662e83e19e2cdb1fa1188ffec00799d9f9a5 [file] [log] [blame]
Martin v. Löwis7090ed12001-09-19 10:37:50 +00001#include "Python.h"
Fred Drake4113b132001-03-24 19:58:26 +00002#include <ctype.h>
3
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00004#include "compile.h"
5#include "frameobject.h"
Fred Drakea77254a2000-09-29 19:23:29 +00006#include "expat.h"
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00007
Martin v. Löwisb4fcf4d2002-06-30 06:40:55 +00008#ifndef PyDoc_STRVAR
Martin v. Löwis069dde22003-01-21 10:58:18 +00009
10/*
11 * fdrake says:
12 * Don't change the PyDoc_STR macro definition to (str), because
13 * '''the parentheses cause compile failures
14 * ("non-constant static initializer" or something like that)
15 * on some platforms (Irix?)'''
16 */
Fred Drakef57b22a2002-09-02 15:54:06 +000017#define PyDoc_STR(str) str
Fred Drake7c75bf22002-07-01 14:02:31 +000018#define PyDoc_VAR(name) static char name[]
Martin v. Löwisb4fcf4d2002-06-30 06:40:55 +000019#define PyDoc_STRVAR(name,str) PyDoc_VAR(name) = PyDoc_STR(str)
Martin v. Löwis0078f6c2001-01-21 10:18:10 +000020#endif
21
Martin v. Löwisb4fcf4d2002-06-30 06:40:55 +000022#if (PY_MAJOR_VERSION == 2 && PY_MINOR_VERSION < 2)
23/* In Python 2.0 and 2.1, disabling Unicode was not possible. */
Martin v. Löwis339d0f72001-08-17 18:39:25 +000024#define Py_USING_UNICODE
Martin v. Löwis069dde22003-01-21 10:58:18 +000025#define NOFIX_TRACE
Martin v. Löwis339d0f72001-08-17 18:39:25 +000026#endif
27
Fred Drake0582df92000-07-12 04:49:00 +000028enum HandlerTypes {
29 StartElement,
30 EndElement,
31 ProcessingInstruction,
32 CharacterData,
33 UnparsedEntityDecl,
34 NotationDecl,
35 StartNamespaceDecl,
36 EndNamespaceDecl,
37 Comment,
38 StartCdataSection,
39 EndCdataSection,
40 Default,
41 DefaultHandlerExpand,
42 NotStandalone,
Martin v. Löwis0078f6c2001-01-21 10:18:10 +000043 ExternalEntityRef,
44 StartDoctypeDecl,
45 EndDoctypeDecl,
Fred Drake85d835f2001-02-08 15:39:08 +000046 EntityDecl,
47 XmlDecl,
48 ElementDecl,
49 AttlistDecl,
Martin v. Löwis069dde22003-01-21 10:58:18 +000050 SkippedEntity,
Fred Drake85d835f2001-02-08 15:39:08 +000051 _DummyDecl
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000052};
53
54static PyObject *ErrorObject;
55
56/* ----------------------------------------------------- */
57
58/* Declarations for objects of type xmlparser */
59
60typedef struct {
Fred Drake0582df92000-07-12 04:49:00 +000061 PyObject_HEAD
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000062
Fred Drake0582df92000-07-12 04:49:00 +000063 XML_Parser itself;
Fred Drake85d835f2001-02-08 15:39:08 +000064 int returns_unicode; /* True if Unicode strings are returned;
65 if false, UTF-8 strings are returned */
66 int ordered_attributes; /* Return attributes as a list. */
67 int specified_attributes; /* Report only specified attributes. */
Fred Drakebd6101c2001-02-14 18:29:45 +000068 int in_callback; /* Is a callback active? */
Martin v. Löwis069dde22003-01-21 10:58:18 +000069 int ns_prefixes; /* Namespace-triplets mode? */
Fred Drake2a3d7db2002-06-28 22:56:48 +000070 XML_Char *buffer; /* Buffer used when accumulating characters */
71 /* NULL if not enabled */
72 int buffer_size; /* Size of buffer, in XML_Char units */
73 int buffer_used; /* Buffer units in use */
Fred Drakeb91a36b2002-06-27 19:40:48 +000074 PyObject *intern; /* Dictionary to intern strings */
Fred Drake0582df92000-07-12 04:49:00 +000075 PyObject **handlers;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000076} xmlparseobject;
77
Fred Drake2a3d7db2002-06-28 22:56:48 +000078#define CHARACTER_DATA_BUFFER_SIZE 8192
79
Jeremy Hylton938ace62002-07-17 16:30:39 +000080static PyTypeObject Xmlparsetype;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000081
Fred Drake117ac852002-09-24 16:24:54 +000082typedef void (*xmlhandlersetter)(XML_Parser self, void *meth);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000083typedef void* xmlhandler;
84
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +000085struct HandlerInfo {
Fred Drake0582df92000-07-12 04:49:00 +000086 const char *name;
87 xmlhandlersetter setter;
88 xmlhandler handler;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +000089 PyCodeObject *tb_code;
Fred Drake71b63ff2002-06-28 22:29:01 +000090 PyObject *nameobj;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000091};
92
Jeremy Hylton938ace62002-07-17 16:30:39 +000093static struct HandlerInfo handler_info[64];
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000094
Fred Drakebd6101c2001-02-14 18:29:45 +000095/* Set an integer attribute on the error object; return true on success,
96 * false on an exception.
97 */
98static int
99set_error_attr(PyObject *err, char *name, int value)
100{
101 PyObject *v = PyInt_FromLong(value);
Fred Drake85d835f2001-02-08 15:39:08 +0000102
Fred Drakebd6101c2001-02-14 18:29:45 +0000103 if (v != NULL && PyObject_SetAttrString(err, name, v) == -1) {
104 Py_DECREF(v);
105 return 0;
106 }
107 return 1;
108}
109
110/* Build and set an Expat exception, including positioning
111 * information. Always returns NULL.
112 */
Fred Drake85d835f2001-02-08 15:39:08 +0000113static PyObject *
Martin v. Löwis069dde22003-01-21 10:58:18 +0000114set_error(xmlparseobject *self, enum XML_Error code)
Fred Drake85d835f2001-02-08 15:39:08 +0000115{
116 PyObject *err;
117 char buffer[256];
118 XML_Parser parser = self->itself;
Fred Drakebd6101c2001-02-14 18:29:45 +0000119 int lineno = XML_GetErrorLineNumber(parser);
120 int column = XML_GetErrorColumnNumber(parser);
Fred Drake85d835f2001-02-08 15:39:08 +0000121
Martin v. Löwis6b2cf0e2002-06-30 06:03:35 +0000122 /* There is no risk of overflowing this buffer, since
123 even for 64-bit integers, there is sufficient space. */
124 sprintf(buffer, "%.200s: line %i, column %i",
Fred Drakebd6101c2001-02-14 18:29:45 +0000125 XML_ErrorString(code), lineno, column);
Fred Drake85d835f2001-02-08 15:39:08 +0000126 err = PyObject_CallFunction(ErrorObject, "s", buffer);
Fred Drakebd6101c2001-02-14 18:29:45 +0000127 if ( err != NULL
128 && set_error_attr(err, "code", code)
129 && set_error_attr(err, "offset", column)
130 && set_error_attr(err, "lineno", lineno)) {
131 PyErr_SetObject(ErrorObject, err);
Fred Drake85d835f2001-02-08 15:39:08 +0000132 }
133 return NULL;
134}
135
Fred Drake71b63ff2002-06-28 22:29:01 +0000136static int
137have_handler(xmlparseobject *self, int type)
138{
139 PyObject *handler = self->handlers[type];
140 return handler != NULL;
141}
142
143static PyObject *
144get_handler_name(struct HandlerInfo *hinfo)
145{
146 PyObject *name = hinfo->nameobj;
147 if (name == NULL) {
148 name = PyString_FromString(hinfo->name);
149 hinfo->nameobj = name;
150 }
151 Py_XINCREF(name);
152 return name;
153}
154
Fred Drake85d835f2001-02-08 15:39:08 +0000155
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000156#ifdef Py_USING_UNICODE
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000157/* Convert a string of XML_Chars into a Unicode string.
158 Returns None if str is a null pointer. */
159
Fred Drake0582df92000-07-12 04:49:00 +0000160static PyObject *
Fred Drakeb91a36b2002-06-27 19:40:48 +0000161conv_string_to_unicode(const XML_Char *str)
Fred Drake0582df92000-07-12 04:49:00 +0000162{
Fred Drake71b63ff2002-06-28 22:29:01 +0000163 /* XXX currently this code assumes that XML_Char is 8-bit,
Fred Drake0582df92000-07-12 04:49:00 +0000164 and hence in UTF-8. */
165 /* UTF-8 from Expat, Unicode desired */
166 if (str == NULL) {
167 Py_INCREF(Py_None);
168 return Py_None;
169 }
Fred Drake71b63ff2002-06-28 22:29:01 +0000170 return PyUnicode_DecodeUTF8(str, strlen(str), "strict");
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000171}
172
Fred Drake0582df92000-07-12 04:49:00 +0000173static PyObject *
174conv_string_len_to_unicode(const XML_Char *str, int len)
175{
Fred Drake71b63ff2002-06-28 22:29:01 +0000176 /* XXX currently this code assumes that XML_Char is 8-bit,
Fred Drake0582df92000-07-12 04:49:00 +0000177 and hence in UTF-8. */
178 /* UTF-8 from Expat, Unicode desired */
179 if (str == NULL) {
180 Py_INCREF(Py_None);
181 return Py_None;
182 }
Fred Drake6f987622000-08-25 18:03:30 +0000183 return PyUnicode_DecodeUTF8((const char *)str, len, "strict");
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000184}
185#endif
186
187/* Convert a string of XML_Chars into an 8-bit Python string.
188 Returns None if str is a null pointer. */
189
Fred Drake6f987622000-08-25 18:03:30 +0000190static PyObject *
Fred Drakeb91a36b2002-06-27 19:40:48 +0000191conv_string_to_utf8(const XML_Char *str)
Fred Drake6f987622000-08-25 18:03:30 +0000192{
Fred Drake71b63ff2002-06-28 22:29:01 +0000193 /* XXX currently this code assumes that XML_Char is 8-bit,
Fred Drake6f987622000-08-25 18:03:30 +0000194 and hence in UTF-8. */
195 /* UTF-8 from Expat, UTF-8 desired */
196 if (str == NULL) {
197 Py_INCREF(Py_None);
198 return Py_None;
199 }
Fred Drakeb91a36b2002-06-27 19:40:48 +0000200 return PyString_FromString(str);
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000201}
202
Fred Drake6f987622000-08-25 18:03:30 +0000203static PyObject *
Fred Drake71b63ff2002-06-28 22:29:01 +0000204conv_string_len_to_utf8(const XML_Char *str, int len)
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000205{
Fred Drake71b63ff2002-06-28 22:29:01 +0000206 /* XXX currently this code assumes that XML_Char is 8-bit,
Fred Drake6f987622000-08-25 18:03:30 +0000207 and hence in UTF-8. */
208 /* UTF-8 from Expat, UTF-8 desired */
209 if (str == NULL) {
210 Py_INCREF(Py_None);
211 return Py_None;
212 }
213 return PyString_FromStringAndSize((const char *)str, len);
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000214}
215
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000216/* Callback routines */
217
Martin v. Löwis5b68ce32001-10-21 08:53:52 +0000218static void clear_handlers(xmlparseobject *self, int initial);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000219
Martin v. Löwis069dde22003-01-21 10:58:18 +0000220/* This handler is used when an error has been detected, in the hope
221 that actual parsing can be terminated early. This will only help
222 if an external entity reference is encountered. */
223static int
224error_external_entity_ref_handler(XML_Parser parser,
225 const XML_Char *context,
226 const XML_Char *base,
227 const XML_Char *systemId,
228 const XML_Char *publicId)
229{
230 return 0;
231}
232
Fred Drake6f987622000-08-25 18:03:30 +0000233static void
234flag_error(xmlparseobject *self)
235{
Martin v. Löwis5b68ce32001-10-21 08:53:52 +0000236 clear_handlers(self, 0);
Martin v. Löwis069dde22003-01-21 10:58:18 +0000237 XML_SetExternalEntityRefHandler(self->itself,
238 error_external_entity_ref_handler);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000239}
240
241static PyCodeObject*
242getcode(enum HandlerTypes slot, char* func_name, int lineno)
243{
Fred Drakebd6101c2001-02-14 18:29:45 +0000244 PyObject *code = NULL;
245 PyObject *name = NULL;
246 PyObject *nulltuple = NULL;
247 PyObject *filename = NULL;
248
249 if (handler_info[slot].tb_code == NULL) {
250 code = PyString_FromString("");
251 if (code == NULL)
252 goto failed;
253 name = PyString_FromString(func_name);
254 if (name == NULL)
255 goto failed;
256 nulltuple = PyTuple_New(0);
257 if (nulltuple == NULL)
258 goto failed;
259 filename = PyString_FromString(__FILE__);
260 handler_info[slot].tb_code =
261 PyCode_New(0, /* argcount */
262 0, /* nlocals */
263 0, /* stacksize */
264 0, /* flags */
265 code, /* code */
266 nulltuple, /* consts */
267 nulltuple, /* names */
268 nulltuple, /* varnames */
Martin v. Löwis76192ee2001-02-06 09:34:40 +0000269#if PYTHON_API_VERSION >= 1010
Fred Drakebd6101c2001-02-14 18:29:45 +0000270 nulltuple, /* freevars */
271 nulltuple, /* cellvars */
Martin v. Löwis76192ee2001-02-06 09:34:40 +0000272#endif
Fred Drakebd6101c2001-02-14 18:29:45 +0000273 filename, /* filename */
274 name, /* name */
275 lineno, /* firstlineno */
276 code /* lnotab */
277 );
278 if (handler_info[slot].tb_code == NULL)
279 goto failed;
280 Py_DECREF(code);
281 Py_DECREF(nulltuple);
282 Py_DECREF(filename);
283 Py_DECREF(name);
284 }
285 return handler_info[slot].tb_code;
286 failed:
287 Py_XDECREF(code);
288 Py_XDECREF(name);
289 return NULL;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000290}
291
Martin v. Löwis069dde22003-01-21 10:58:18 +0000292#ifndef NOFIX_TRACE
Martin v. Löwis7d6e19d2002-08-04 08:24:49 +0000293static int
294trace_frame(PyThreadState *tstate, PyFrameObject *f, int code, PyObject *val)
295{
296 int result = 0;
297 if (!tstate->use_tracing || tstate->tracing)
298 return 0;
299 if (tstate->c_profilefunc != NULL) {
300 tstate->tracing++;
301 result = tstate->c_profilefunc(tstate->c_profileobj,
302 f, code , val);
303 tstate->use_tracing = ((tstate->c_tracefunc != NULL)
304 || (tstate->c_profilefunc != NULL));
305 tstate->tracing--;
306 if (result)
307 return result;
308 }
309 if (tstate->c_tracefunc != NULL) {
310 tstate->tracing++;
311 result = tstate->c_tracefunc(tstate->c_traceobj,
312 f, code , val);
313 tstate->use_tracing = ((tstate->c_tracefunc != NULL)
314 || (tstate->c_profilefunc != NULL));
315 tstate->tracing--;
316 }
317 return result;
318}
Martin v. Löwis069dde22003-01-21 10:58:18 +0000319#endif
Martin v. Löwis7d6e19d2002-08-04 08:24:49 +0000320
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000321static PyObject*
322call_with_frame(PyCodeObject *c, PyObject* func, PyObject* args)
323{
Fred Drakebd6101c2001-02-14 18:29:45 +0000324 PyThreadState *tstate = PyThreadState_GET();
325 PyFrameObject *f;
326 PyObject *res;
327
328 if (c == NULL)
329 return NULL;
Martin v. Löwis7d6e19d2002-08-04 08:24:49 +0000330
Fred Drakebd6101c2001-02-14 18:29:45 +0000331 f = PyFrame_New(
332 tstate, /*back*/
333 c, /*code*/
Michael W. Hudson019a78e2002-11-08 12:53:11 +0000334 PyEval_GetGlobals(), /*globals*/
Fred Drakebd6101c2001-02-14 18:29:45 +0000335 NULL /*locals*/
Fred Drakebd6101c2001-02-14 18:29:45 +0000336 );
337 if (f == NULL)
338 return NULL;
339 tstate->frame = f;
Martin v. Löwis069dde22003-01-21 10:58:18 +0000340#ifndef NOFIX_TRACE
Martin v. Löwis7d6e19d2002-08-04 08:24:49 +0000341 if (trace_frame(tstate, f, PyTrace_CALL, Py_None)) {
342 Py_DECREF(f);
343 return NULL;
344 }
Martin v. Löwis069dde22003-01-21 10:58:18 +0000345#endif
Fred Drakebd6101c2001-02-14 18:29:45 +0000346 res = PyEval_CallObject(func, args);
347 if (res == NULL && tstate->curexc_traceback == NULL)
348 PyTraceBack_Here(f);
Martin v. Löwis069dde22003-01-21 10:58:18 +0000349#ifndef NOFIX_TRACE
Martin v. Löwis7d6e19d2002-08-04 08:24:49 +0000350 else {
351 if (trace_frame(tstate, f, PyTrace_RETURN, res)) {
352 Py_XDECREF(res);
353 res = NULL;
354 }
355 }
Martin v. Löwis069dde22003-01-21 10:58:18 +0000356#endif
Fred Drakebd6101c2001-02-14 18:29:45 +0000357 tstate->frame = f->f_back;
358 Py_DECREF(f);
359 return res;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000360}
361
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000362#ifndef Py_USING_UNICODE
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000363#define STRING_CONV_FUNC conv_string_to_utf8
364#else
Martin v. Löwis069dde22003-01-21 10:58:18 +0000365/* Python 2.0 and later versions, when built with Unicode support */
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000366#define STRING_CONV_FUNC (self->returns_unicode \
367 ? conv_string_to_unicode : conv_string_to_utf8)
368#endif
Guido van Rossum5961f5a2000-03-31 16:18:11 +0000369
Fred Drakeb91a36b2002-06-27 19:40:48 +0000370static PyObject*
371string_intern(xmlparseobject *self, const char* str)
372{
373 PyObject *result = STRING_CONV_FUNC(str);
374 PyObject *value;
375 if (!self->intern)
376 return result;
377 value = PyDict_GetItem(self->intern, result);
378 if (!value) {
379 if (PyDict_SetItem(self->intern, result, result) == 0)
380 return result;
381 else
382 return NULL;
383 }
384 Py_INCREF(value);
385 Py_DECREF(result);
386 return value;
387}
388
Fred Drake2a3d7db2002-06-28 22:56:48 +0000389/* Return 0 on success, -1 on exception.
390 * flag_error() will be called before return if needed.
391 */
392static int
393call_character_handler(xmlparseobject *self, const XML_Char *buffer, int len)
394{
395 PyObject *args;
396 PyObject *temp;
397
398 args = PyTuple_New(1);
399 if (args == NULL)
400 return -1;
401#ifdef Py_USING_UNICODE
402 temp = (self->returns_unicode
403 ? conv_string_len_to_unicode(buffer, len)
404 : conv_string_len_to_utf8(buffer, len));
405#else
406 temp = conv_string_len_to_utf8(buffer, len);
407#endif
408 if (temp == NULL) {
409 Py_DECREF(args);
410 flag_error(self);
411 return -1;
412 }
413 PyTuple_SET_ITEM(args, 0, temp);
414 /* temp is now a borrowed reference; consider it unused. */
415 self->in_callback = 1;
416 temp = call_with_frame(getcode(CharacterData, "CharacterData", __LINE__),
417 self->handlers[CharacterData], args);
418 /* temp is an owned reference again, or NULL */
419 self->in_callback = 0;
420 Py_DECREF(args);
421 if (temp == NULL) {
422 flag_error(self);
423 return -1;
424 }
425 Py_DECREF(temp);
426 return 0;
427}
428
429static int
430flush_character_buffer(xmlparseobject *self)
431{
432 int rc;
433 if (self->buffer == NULL || self->buffer_used == 0)
434 return 0;
435 rc = call_character_handler(self, self->buffer, self->buffer_used);
436 self->buffer_used = 0;
437 return rc;
438}
439
440static void
441my_CharacterDataHandler(void *userData, const XML_Char *data, int len)
442{
443 xmlparseobject *self = (xmlparseobject *) userData;
444 if (self->buffer == NULL)
445 call_character_handler(self, data, len);
446 else {
447 if ((self->buffer_used + len) > self->buffer_size) {
448 if (flush_character_buffer(self) < 0)
449 return;
450 /* handler might have changed; drop the rest on the floor
451 * if there isn't a handler anymore
452 */
453 if (!have_handler(self, CharacterData))
454 return;
455 }
456 if (len > self->buffer_size) {
457 call_character_handler(self, data, len);
458 self->buffer_used = 0;
459 }
460 else {
461 memcpy(self->buffer + self->buffer_used,
462 data, len * sizeof(XML_Char));
463 self->buffer_used += len;
464 }
465 }
466}
467
Fred Drake85d835f2001-02-08 15:39:08 +0000468static void
469my_StartElementHandler(void *userData,
Fred Drake71b63ff2002-06-28 22:29:01 +0000470 const XML_Char *name, const XML_Char *atts[])
Fred Drake85d835f2001-02-08 15:39:08 +0000471{
472 xmlparseobject *self = (xmlparseobject *)userData;
473
Fred Drake71b63ff2002-06-28 22:29:01 +0000474 if (have_handler(self, StartElement)) {
Fred Drake85d835f2001-02-08 15:39:08 +0000475 PyObject *container, *rv, *args;
476 int i, max;
477
Fred Drake2a3d7db2002-06-28 22:56:48 +0000478 if (flush_character_buffer(self) < 0)
479 return;
Fred Drake85d835f2001-02-08 15:39:08 +0000480 /* Set max to the number of slots filled in atts[]; max/2 is
481 * the number of attributes we need to process.
482 */
483 if (self->specified_attributes) {
484 max = XML_GetSpecifiedAttributeCount(self->itself);
485 }
486 else {
487 max = 0;
488 while (atts[max] != NULL)
489 max += 2;
490 }
491 /* Build the container. */
492 if (self->ordered_attributes)
493 container = PyList_New(max);
494 else
495 container = PyDict_New();
496 if (container == NULL) {
497 flag_error(self);
498 return;
499 }
500 for (i = 0; i < max; i += 2) {
Fred Drakeb91a36b2002-06-27 19:40:48 +0000501 PyObject *n = string_intern(self, (XML_Char *) atts[i]);
Fred Drake85d835f2001-02-08 15:39:08 +0000502 PyObject *v;
503 if (n == NULL) {
504 flag_error(self);
505 Py_DECREF(container);
506 return;
507 }
508 v = STRING_CONV_FUNC((XML_Char *) atts[i+1]);
509 if (v == NULL) {
510 flag_error(self);
511 Py_DECREF(container);
512 Py_DECREF(n);
513 return;
514 }
515 if (self->ordered_attributes) {
516 PyList_SET_ITEM(container, i, n);
517 PyList_SET_ITEM(container, i+1, v);
518 }
519 else if (PyDict_SetItem(container, n, v)) {
520 flag_error(self);
521 Py_DECREF(n);
522 Py_DECREF(v);
523 return;
524 }
525 else {
526 Py_DECREF(n);
527 Py_DECREF(v);
528 }
529 }
Fred Drakeb91a36b2002-06-27 19:40:48 +0000530 args = Py_BuildValue("(NN)", string_intern(self, name), container);
Fred Drake85d835f2001-02-08 15:39:08 +0000531 if (args == NULL) {
532 Py_DECREF(container);
533 return;
534 }
535 /* Container is now a borrowed reference; ignore it. */
Fred Drakebd6101c2001-02-14 18:29:45 +0000536 self->in_callback = 1;
537 rv = call_with_frame(getcode(StartElement, "StartElement", __LINE__),
Fred Drake85d835f2001-02-08 15:39:08 +0000538 self->handlers[StartElement], args);
Fred Drakebd6101c2001-02-14 18:29:45 +0000539 self->in_callback = 0;
540 Py_DECREF(args);
Fred Drake85d835f2001-02-08 15:39:08 +0000541 if (rv == NULL) {
542 flag_error(self);
543 return;
Fred Drakebd6101c2001-02-14 18:29:45 +0000544 }
Fred Drake85d835f2001-02-08 15:39:08 +0000545 Py_DECREF(rv);
546 }
547}
548
549#define RC_HANDLER(RC, NAME, PARAMS, INIT, PARAM_FORMAT, CONVERSION, \
550 RETURN, GETUSERDATA) \
551static RC \
552my_##NAME##Handler PARAMS {\
553 xmlparseobject *self = GETUSERDATA ; \
554 PyObject *args = NULL; \
555 PyObject *rv = NULL; \
556 INIT \
557\
Fred Drake71b63ff2002-06-28 22:29:01 +0000558 if (have_handler(self, NAME)) { \
Fred Drake2a3d7db2002-06-28 22:56:48 +0000559 if (flush_character_buffer(self) < 0) \
560 return RETURN; \
Fred Drake85d835f2001-02-08 15:39:08 +0000561 args = Py_BuildValue PARAM_FORMAT ;\
Martin v. Löwis1d7c55f2001-11-10 13:57:55 +0000562 if (!args) { flag_error(self); return RETURN;} \
Fred Drakebd6101c2001-02-14 18:29:45 +0000563 self->in_callback = 1; \
Fred Drake85d835f2001-02-08 15:39:08 +0000564 rv = call_with_frame(getcode(NAME,#NAME,__LINE__), \
565 self->handlers[NAME], args); \
Fred Drakebd6101c2001-02-14 18:29:45 +0000566 self->in_callback = 0; \
Fred Drake85d835f2001-02-08 15:39:08 +0000567 Py_DECREF(args); \
568 if (rv == NULL) { \
569 flag_error(self); \
570 return RETURN; \
571 } \
572 CONVERSION \
573 Py_DECREF(rv); \
574 } \
575 return RETURN; \
576}
577
Fred Drake6f987622000-08-25 18:03:30 +0000578#define VOID_HANDLER(NAME, PARAMS, PARAM_FORMAT) \
579 RC_HANDLER(void, NAME, PARAMS, ;, PARAM_FORMAT, ;, ;,\
580 (xmlparseobject *)userData)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000581
Fred Drake6f987622000-08-25 18:03:30 +0000582#define INT_HANDLER(NAME, PARAMS, PARAM_FORMAT)\
583 RC_HANDLER(int, NAME, PARAMS, int rc=0;, PARAM_FORMAT, \
584 rc = PyInt_AsLong(rv);, rc, \
585 (xmlparseobject *)userData)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000586
Fred Drake71b63ff2002-06-28 22:29:01 +0000587VOID_HANDLER(EndElement,
588 (void *userData, const XML_Char *name),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000589 ("(N)", string_intern(self, name)))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000590
Fred Drake6f987622000-08-25 18:03:30 +0000591VOID_HANDLER(ProcessingInstruction,
Fred Drake71b63ff2002-06-28 22:29:01 +0000592 (void *userData,
593 const XML_Char *target,
Fred Drake85d835f2001-02-08 15:39:08 +0000594 const XML_Char *data),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000595 ("(NO&)", string_intern(self, target), STRING_CONV_FUNC,data))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000596
Fred Drake6f987622000-08-25 18:03:30 +0000597VOID_HANDLER(UnparsedEntityDecl,
Fred Drake71b63ff2002-06-28 22:29:01 +0000598 (void *userData,
Fred Drake85d835f2001-02-08 15:39:08 +0000599 const XML_Char *entityName,
600 const XML_Char *base,
601 const XML_Char *systemId,
602 const XML_Char *publicId,
603 const XML_Char *notationName),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000604 ("(NNNNN)",
Fred Drake71b63ff2002-06-28 22:29:01 +0000605 string_intern(self, entityName), string_intern(self, base),
606 string_intern(self, systemId), string_intern(self, publicId),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000607 string_intern(self, notationName)))
Fred Drake85d835f2001-02-08 15:39:08 +0000608
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000609#ifndef Py_USING_UNICODE
Fred Drake85d835f2001-02-08 15:39:08 +0000610VOID_HANDLER(EntityDecl,
611 (void *userData,
612 const XML_Char *entityName,
613 int is_parameter_entity,
614 const XML_Char *value,
615 int value_length,
616 const XML_Char *base,
617 const XML_Char *systemId,
618 const XML_Char *publicId,
619 const XML_Char *notationName),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000620 ("NiNNNNN",
621 string_intern(self, entityName), is_parameter_entity,
Fred Drake85d835f2001-02-08 15:39:08 +0000622 conv_string_len_to_utf8(value, value_length),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000623 string_intern(self, base), string_intern(self, systemId),
624 string_intern(self, publicId),
625 string_intern(self, notationName)))
Fred Drake85d835f2001-02-08 15:39:08 +0000626#else
627VOID_HANDLER(EntityDecl,
628 (void *userData,
629 const XML_Char *entityName,
630 int is_parameter_entity,
631 const XML_Char *value,
632 int value_length,
633 const XML_Char *base,
634 const XML_Char *systemId,
635 const XML_Char *publicId,
636 const XML_Char *notationName),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000637 ("NiNNNNN",
638 string_intern(self, entityName), is_parameter_entity,
Fred Drake71b63ff2002-06-28 22:29:01 +0000639 (self->returns_unicode
640 ? conv_string_len_to_unicode(value, value_length)
Fred Drake85d835f2001-02-08 15:39:08 +0000641 : conv_string_len_to_utf8(value, value_length)),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000642 string_intern(self, base), string_intern(self, systemId),
643 string_intern(self, publicId),
644 string_intern(self, notationName)))
Fred Drake85d835f2001-02-08 15:39:08 +0000645#endif
646
647VOID_HANDLER(XmlDecl,
648 (void *userData,
649 const XML_Char *version,
650 const XML_Char *encoding,
651 int standalone),
652 ("(O&O&i)",
Fred Drake71b63ff2002-06-28 22:29:01 +0000653 STRING_CONV_FUNC,version, STRING_CONV_FUNC,encoding,
Fred Drake85d835f2001-02-08 15:39:08 +0000654 standalone))
655
656static PyObject *
657conv_content_model(XML_Content * const model,
Fred Drakeb91a36b2002-06-27 19:40:48 +0000658 PyObject *(*conv_string)(const XML_Char *))
Fred Drake85d835f2001-02-08 15:39:08 +0000659{
660 PyObject *result = NULL;
661 PyObject *children = PyTuple_New(model->numchildren);
662 int i;
663
664 if (children != NULL) {
Tim Peters9544fc52001-07-28 09:36:36 +0000665 assert(model->numchildren < INT_MAX);
666 for (i = 0; i < (int)model->numchildren; ++i) {
Fred Drake85d835f2001-02-08 15:39:08 +0000667 PyObject *child = conv_content_model(&model->children[i],
668 conv_string);
669 if (child == NULL) {
670 Py_XDECREF(children);
671 return NULL;
672 }
673 PyTuple_SET_ITEM(children, i, child);
674 }
675 result = Py_BuildValue("(iiO&N)",
676 model->type, model->quant,
677 conv_string,model->name, children);
678 }
679 return result;
680}
681
682static PyObject *
683conv_content_model_utf8(XML_Content * const model)
684{
685 return conv_content_model(model, conv_string_to_utf8);
686}
687
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000688#ifdef Py_USING_UNICODE
Fred Drake85d835f2001-02-08 15:39:08 +0000689static PyObject *
690conv_content_model_unicode(XML_Content * const model)
691{
692 return conv_content_model(model, conv_string_to_unicode);
693}
694
695VOID_HANDLER(ElementDecl,
696 (void *userData,
697 const XML_Char *name,
698 XML_Content *model),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000699 ("NO&",
700 string_intern(self, name),
Fred Drake85d835f2001-02-08 15:39:08 +0000701 (self->returns_unicode ? conv_content_model_unicode
702 : conv_content_model_utf8),model))
703#else
704VOID_HANDLER(ElementDecl,
705 (void *userData,
706 const XML_Char *name,
707 XML_Content *model),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000708 ("NO&",
709 string_intern(self, name), conv_content_model_utf8,model))
Fred Drake85d835f2001-02-08 15:39:08 +0000710#endif
711
712VOID_HANDLER(AttlistDecl,
713 (void *userData,
714 const XML_Char *elname,
715 const XML_Char *attname,
716 const XML_Char *att_type,
717 const XML_Char *dflt,
718 int isrequired),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000719 ("(NNO&O&i)",
720 string_intern(self, elname), string_intern(self, attname),
Fred Drake85d835f2001-02-08 15:39:08 +0000721 STRING_CONV_FUNC,att_type, STRING_CONV_FUNC,dflt,
722 isrequired))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000723
Martin v. Löwis069dde22003-01-21 10:58:18 +0000724VOID_HANDLER(SkippedEntity,
725 (void *userData,
726 const XML_Char *entityName,
727 int is_parameter_entity),
728 ("Ni",
729 string_intern(self, entityName), is_parameter_entity))
730
Fred Drake71b63ff2002-06-28 22:29:01 +0000731VOID_HANDLER(NotationDecl,
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000732 (void *userData,
733 const XML_Char *notationName,
734 const XML_Char *base,
735 const XML_Char *systemId,
736 const XML_Char *publicId),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000737 ("(NNNN)",
Fred Drake71b63ff2002-06-28 22:29:01 +0000738 string_intern(self, notationName), string_intern(self, base),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000739 string_intern(self, systemId), string_intern(self, publicId)))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000740
Fred Drake6f987622000-08-25 18:03:30 +0000741VOID_HANDLER(StartNamespaceDecl,
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000742 (void *userData,
743 const XML_Char *prefix,
744 const XML_Char *uri),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000745 ("(NN)",
746 string_intern(self, prefix), string_intern(self, uri)))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000747
Fred Drake6f987622000-08-25 18:03:30 +0000748VOID_HANDLER(EndNamespaceDecl,
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000749 (void *userData,
750 const XML_Char *prefix),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000751 ("(N)", string_intern(self, prefix)))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000752
Fred Drake6f987622000-08-25 18:03:30 +0000753VOID_HANDLER(Comment,
Fred Drakeb91a36b2002-06-27 19:40:48 +0000754 (void *userData, const XML_Char *data),
755 ("(O&)", STRING_CONV_FUNC,data))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000756
Fred Drake6f987622000-08-25 18:03:30 +0000757VOID_HANDLER(StartCdataSection,
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000758 (void *userData),
Fred Drake6f987622000-08-25 18:03:30 +0000759 ("()"))
Fred Drake71b63ff2002-06-28 22:29:01 +0000760
Fred Drake6f987622000-08-25 18:03:30 +0000761VOID_HANDLER(EndCdataSection,
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000762 (void *userData),
Fred Drake6f987622000-08-25 18:03:30 +0000763 ("()"))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000764
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000765#ifndef Py_USING_UNICODE
Fred Drake6f987622000-08-25 18:03:30 +0000766VOID_HANDLER(Default,
Fred Drake71b63ff2002-06-28 22:29:01 +0000767 (void *userData, const XML_Char *s, int len),
Fred Drakeca1f4262000-09-21 20:10:23 +0000768 ("(N)", conv_string_len_to_utf8(s,len)))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000769
Fred Drake6f987622000-08-25 18:03:30 +0000770VOID_HANDLER(DefaultHandlerExpand,
Fred Drake71b63ff2002-06-28 22:29:01 +0000771 (void *userData, const XML_Char *s, int len),
Fred Drakeca1f4262000-09-21 20:10:23 +0000772 ("(N)", conv_string_len_to_utf8(s,len)))
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000773#else
Fred Drake6f987622000-08-25 18:03:30 +0000774VOID_HANDLER(Default,
Fred Drake71b63ff2002-06-28 22:29:01 +0000775 (void *userData, const XML_Char *s, int len),
776 ("(N)", (self->returns_unicode
777 ? conv_string_len_to_unicode(s,len)
Fred Drake6f987622000-08-25 18:03:30 +0000778 : conv_string_len_to_utf8(s,len))))
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000779
Fred Drake6f987622000-08-25 18:03:30 +0000780VOID_HANDLER(DefaultHandlerExpand,
Fred Drake71b63ff2002-06-28 22:29:01 +0000781 (void *userData, const XML_Char *s, int len),
782 ("(N)", (self->returns_unicode
783 ? conv_string_len_to_unicode(s,len)
Fred Drake6f987622000-08-25 18:03:30 +0000784 : conv_string_len_to_utf8(s,len))))
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000785#endif
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000786
Fred Drake71b63ff2002-06-28 22:29:01 +0000787INT_HANDLER(NotStandalone,
788 (void *userData),
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000789 ("()"))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000790
Fred Drake6f987622000-08-25 18:03:30 +0000791RC_HANDLER(int, ExternalEntityRef,
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000792 (XML_Parser parser,
793 const XML_Char *context,
794 const XML_Char *base,
795 const XML_Char *systemId,
796 const XML_Char *publicId),
797 int rc=0;,
Fred Drakeb91a36b2002-06-27 19:40:48 +0000798 ("(O&NNN)",
Fred Drake71b63ff2002-06-28 22:29:01 +0000799 STRING_CONV_FUNC,context, string_intern(self, base),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000800 string_intern(self, systemId), string_intern(self, publicId)),
Fred Drake6f987622000-08-25 18:03:30 +0000801 rc = PyInt_AsLong(rv);, rc,
802 XML_GetUserData(parser))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000803
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000804/* XXX UnknownEncodingHandler */
805
Fred Drake85d835f2001-02-08 15:39:08 +0000806VOID_HANDLER(StartDoctypeDecl,
807 (void *userData, const XML_Char *doctypeName,
808 const XML_Char *sysid, const XML_Char *pubid,
809 int has_internal_subset),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000810 ("(NNNi)", string_intern(self, doctypeName),
811 string_intern(self, sysid), string_intern(self, pubid),
Fred Drake85d835f2001-02-08 15:39:08 +0000812 has_internal_subset))
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000813
814VOID_HANDLER(EndDoctypeDecl, (void *userData), ("()"))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000815
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000816/* ---------------------------------------------------------------- */
817
Fred Drake71b63ff2002-06-28 22:29:01 +0000818static PyObject *
819get_parse_result(xmlparseobject *self, int rv)
820{
821 if (PyErr_Occurred()) {
822 return NULL;
823 }
824 if (rv == 0) {
Martin v. Löwis069dde22003-01-21 10:58:18 +0000825 return set_error(self, XML_GetErrorCode(self->itself));
Fred Drake71b63ff2002-06-28 22:29:01 +0000826 }
Fred Drake2a3d7db2002-06-28 22:56:48 +0000827 if (flush_character_buffer(self) < 0) {
828 return NULL;
829 }
Fred Drake71b63ff2002-06-28 22:29:01 +0000830 return PyInt_FromLong(rv);
831}
832
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000833PyDoc_STRVAR(xmlparse_Parse__doc__,
Thomas Wouters35317302000-07-22 16:34:15 +0000834"Parse(data[, isfinal])\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000835Parse XML data. `isfinal' should be true at end of input.");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000836
837static PyObject *
Fred Drake0582df92000-07-12 04:49:00 +0000838xmlparse_Parse(xmlparseobject *self, PyObject *args)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000839{
Fred Drake0582df92000-07-12 04:49:00 +0000840 char *s;
841 int slen;
842 int isFinal = 0;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000843
Fred Drake0582df92000-07-12 04:49:00 +0000844 if (!PyArg_ParseTuple(args, "s#|i:Parse", &s, &slen, &isFinal))
845 return NULL;
Fred Drake71b63ff2002-06-28 22:29:01 +0000846
847 return get_parse_result(self, XML_Parse(self->itself, s, slen, isFinal));
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000848}
849
Fred Drakeca1f4262000-09-21 20:10:23 +0000850/* File reading copied from cPickle */
851
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000852#define BUF_SIZE 2048
853
Fred Drake0582df92000-07-12 04:49:00 +0000854static int
855readinst(char *buf, int buf_size, PyObject *meth)
856{
857 PyObject *arg = NULL;
858 PyObject *bytes = NULL;
859 PyObject *str = NULL;
860 int len = -1;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000861
Fred Drake676940b2000-09-22 15:21:31 +0000862 if ((bytes = PyInt_FromLong(buf_size)) == NULL)
Fred Drake0582df92000-07-12 04:49:00 +0000863 goto finally;
Fred Drake676940b2000-09-22 15:21:31 +0000864
Fred Drakeca1f4262000-09-21 20:10:23 +0000865 if ((arg = PyTuple_New(1)) == NULL)
Fred Drake0582df92000-07-12 04:49:00 +0000866 goto finally;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000867
Tim Peters954eef72000-09-22 06:01:11 +0000868 PyTuple_SET_ITEM(arg, 0, bytes);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000869
Guido van Rossum84b2bed2002-08-16 17:01:09 +0000870 if ((str = PyObject_Call(meth, arg, NULL)) == NULL)
Fred Drake0582df92000-07-12 04:49:00 +0000871 goto finally;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000872
Fred Drake0582df92000-07-12 04:49:00 +0000873 /* XXX what to do if it returns a Unicode string? */
Fred Drakeca1f4262000-09-21 20:10:23 +0000874 if (!PyString_Check(str)) {
Fred Drake71b63ff2002-06-28 22:29:01 +0000875 PyErr_Format(PyExc_TypeError,
Fred Drake0582df92000-07-12 04:49:00 +0000876 "read() did not return a string object (type=%.400s)",
877 str->ob_type->tp_name);
878 goto finally;
879 }
880 len = PyString_GET_SIZE(str);
881 if (len > buf_size) {
882 PyErr_Format(PyExc_ValueError,
883 "read() returned too much data: "
884 "%i bytes requested, %i returned",
885 buf_size, len);
886 Py_DECREF(str);
887 goto finally;
888 }
889 memcpy(buf, PyString_AsString(str), len);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000890finally:
Fred Drake0582df92000-07-12 04:49:00 +0000891 Py_XDECREF(arg);
Fred Drakeca1f4262000-09-21 20:10:23 +0000892 Py_XDECREF(str);
Fred Drake0582df92000-07-12 04:49:00 +0000893 return len;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000894}
895
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000896PyDoc_STRVAR(xmlparse_ParseFile__doc__,
Thomas Wouters35317302000-07-22 16:34:15 +0000897"ParseFile(file)\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000898Parse XML data from file-like object.");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000899
900static PyObject *
Fred Drake0582df92000-07-12 04:49:00 +0000901xmlparse_ParseFile(xmlparseobject *self, PyObject *args)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000902{
Fred Drake0582df92000-07-12 04:49:00 +0000903 int rv = 1;
904 PyObject *f;
905 FILE *fp;
906 PyObject *readmethod = NULL;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000907
Fred Drake0582df92000-07-12 04:49:00 +0000908 if (!PyArg_ParseTuple(args, "O:ParseFile", &f))
909 return NULL;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000910
Fred Drake0582df92000-07-12 04:49:00 +0000911 if (PyFile_Check(f)) {
912 fp = PyFile_AsFile(f);
913 }
914 else{
915 fp = NULL;
Fred Drakeca1f4262000-09-21 20:10:23 +0000916 readmethod = PyObject_GetAttrString(f, "read");
917 if (readmethod == NULL) {
Fred Drake0582df92000-07-12 04:49:00 +0000918 PyErr_Clear();
Fred Drake71b63ff2002-06-28 22:29:01 +0000919 PyErr_SetString(PyExc_TypeError,
Fred Drake0582df92000-07-12 04:49:00 +0000920 "argument must have 'read' attribute");
Fred Drake814f9fe2002-07-19 22:03:03 +0000921 return NULL;
Fred Drake0582df92000-07-12 04:49:00 +0000922 }
923 }
924 for (;;) {
925 int bytes_read;
926 void *buf = XML_GetBuffer(self->itself, BUF_SIZE);
927 if (buf == NULL)
928 return PyErr_NoMemory();
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000929
Fred Drake0582df92000-07-12 04:49:00 +0000930 if (fp) {
931 bytes_read = fread(buf, sizeof(char), BUF_SIZE, fp);
932 if (bytes_read < 0) {
933 PyErr_SetFromErrno(PyExc_IOError);
934 return NULL;
935 }
936 }
937 else {
938 bytes_read = readinst(buf, BUF_SIZE, readmethod);
939 if (bytes_read < 0)
940 return NULL;
941 }
942 rv = XML_ParseBuffer(self->itself, bytes_read, bytes_read == 0);
943 if (PyErr_Occurred())
944 return NULL;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000945
Fred Drake0582df92000-07-12 04:49:00 +0000946 if (!rv || bytes_read == 0)
947 break;
948 }
Fred Drake71b63ff2002-06-28 22:29:01 +0000949 return get_parse_result(self, rv);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000950}
951
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000952PyDoc_STRVAR(xmlparse_SetBase__doc__,
Thomas Wouters35317302000-07-22 16:34:15 +0000953"SetBase(base_url)\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000954Set the base URL for the parser.");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000955
956static PyObject *
Fred Drake0582df92000-07-12 04:49:00 +0000957xmlparse_SetBase(xmlparseobject *self, PyObject *args)
958{
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000959 char *base;
960
Fred Drake0582df92000-07-12 04:49:00 +0000961 if (!PyArg_ParseTuple(args, "s:SetBase", &base))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000962 return NULL;
Fred Drake0582df92000-07-12 04:49:00 +0000963 if (!XML_SetBase(self->itself, base)) {
964 return PyErr_NoMemory();
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000965 }
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000966 Py_INCREF(Py_None);
967 return Py_None;
968}
969
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000970PyDoc_STRVAR(xmlparse_GetBase__doc__,
Thomas Wouters35317302000-07-22 16:34:15 +0000971"GetBase() -> url\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000972Return base URL string for the parser.");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000973
974static PyObject *
Fred Drake0582df92000-07-12 04:49:00 +0000975xmlparse_GetBase(xmlparseobject *self, PyObject *args)
976{
977 if (!PyArg_ParseTuple(args, ":GetBase"))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000978 return NULL;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000979
Fred Drake0582df92000-07-12 04:49:00 +0000980 return Py_BuildValue("z", XML_GetBase(self->itself));
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000981}
982
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000983PyDoc_STRVAR(xmlparse_GetInputContext__doc__,
Fred Drakebd6101c2001-02-14 18:29:45 +0000984"GetInputContext() -> string\n\
985Return the untranslated text of the input that caused the current event.\n\
986If the event was generated by a large amount of text (such as a start tag\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000987for an element with many attributes), not all of the text may be available.");
Fred Drakebd6101c2001-02-14 18:29:45 +0000988
989static PyObject *
990xmlparse_GetInputContext(xmlparseobject *self, PyObject *args)
991{
992 PyObject *result = NULL;
993
994 if (PyArg_ParseTuple(args, ":GetInputContext")) {
995 if (self->in_callback) {
996 int offset, size;
997 const char *buffer
998 = XML_GetInputContext(self->itself, &offset, &size);
999
1000 if (buffer != NULL)
1001 result = PyString_FromStringAndSize(buffer + offset, size);
1002 else {
1003 result = Py_None;
1004 Py_INCREF(result);
1005 }
1006 }
1007 else {
1008 result = Py_None;
1009 Py_INCREF(result);
1010 }
1011 }
1012 return result;
1013}
Fred Drakebd6101c2001-02-14 18:29:45 +00001014
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001015PyDoc_STRVAR(xmlparse_ExternalEntityParserCreate__doc__,
Fred Drake2d4ac202001-01-03 15:36:25 +00001016"ExternalEntityParserCreate(context[, encoding])\n\
Tim Peters51dc9682000-09-24 22:12:45 +00001017Create a parser for parsing an external entity based on the\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001018information passed to the ExternalEntityRefHandler.");
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001019
1020static PyObject *
1021xmlparse_ExternalEntityParserCreate(xmlparseobject *self, PyObject *args)
1022{
1023 char *context;
1024 char *encoding = NULL;
1025 xmlparseobject *new_parser;
1026 int i;
1027
Martin v. Löwisc57428d2001-09-19 09:55:09 +00001028 if (!PyArg_ParseTuple(args, "z|s:ExternalEntityParserCreate",
Fred Drakecde79132001-04-25 16:01:30 +00001029 &context, &encoding)) {
1030 return NULL;
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001031 }
1032
Martin v. Löwis894258c2001-09-23 10:20:10 +00001033#ifndef Py_TPFLAGS_HAVE_GC
Martin v. Löwisb4fcf4d2002-06-30 06:40:55 +00001034 /* Python versions 2.0 and 2.1 */
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001035 new_parser = PyObject_New(xmlparseobject, &Xmlparsetype);
Martin v. Löwis894258c2001-09-23 10:20:10 +00001036#else
1037 /* Python versions 2.2 and later */
1038 new_parser = PyObject_GC_New(xmlparseobject, &Xmlparsetype);
1039#endif
Fred Drake85d835f2001-02-08 15:39:08 +00001040
1041 if (new_parser == NULL)
1042 return NULL;
Fred Drake2a3d7db2002-06-28 22:56:48 +00001043 new_parser->buffer_size = self->buffer_size;
1044 new_parser->buffer_used = 0;
1045 if (self->buffer != NULL) {
1046 new_parser->buffer = malloc(new_parser->buffer_size);
1047 if (new_parser->buffer == NULL) {
Fred Drakeb28467b2002-07-02 15:44:36 +00001048#ifndef Py_TPFLAGS_HAVE_GC
1049 /* Code for versions 2.0 and 2.1 */
1050 PyObject_Del(new_parser);
1051#else
1052 /* Code for versions 2.2 and later. */
Fred Drake2a3d7db2002-06-28 22:56:48 +00001053 PyObject_GC_Del(new_parser);
Fred Drakeb28467b2002-07-02 15:44:36 +00001054#endif
Fred Drake2a3d7db2002-06-28 22:56:48 +00001055 return PyErr_NoMemory();
1056 }
1057 }
1058 else
1059 new_parser->buffer = NULL;
Fred Drake85d835f2001-02-08 15:39:08 +00001060 new_parser->returns_unicode = self->returns_unicode;
1061 new_parser->ordered_attributes = self->ordered_attributes;
1062 new_parser->specified_attributes = self->specified_attributes;
Fred Drakebd6101c2001-02-14 18:29:45 +00001063 new_parser->in_callback = 0;
Martin v. Löwis069dde22003-01-21 10:58:18 +00001064 new_parser->ns_prefixes = self->ns_prefixes;
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001065 new_parser->itself = XML_ExternalEntityParserCreate(self->itself, context,
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001066 encoding);
1067 new_parser->handlers = 0;
Fred Drakeb91a36b2002-06-27 19:40:48 +00001068 new_parser->intern = self->intern;
1069 Py_XINCREF(new_parser->intern);
Martin v. Löwis894258c2001-09-23 10:20:10 +00001070#ifdef Py_TPFLAGS_HAVE_GC
1071 PyObject_GC_Track(new_parser);
1072#else
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001073 PyObject_GC_Init(new_parser);
Martin v. Löwis894258c2001-09-23 10:20:10 +00001074#endif
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001075
1076 if (!new_parser->itself) {
Fred Drake85d835f2001-02-08 15:39:08 +00001077 Py_DECREF(new_parser);
1078 return PyErr_NoMemory();
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001079 }
1080
1081 XML_SetUserData(new_parser->itself, (void *)new_parser);
1082
1083 /* allocate and clear handlers first */
Fred Drake2a3d7db2002-06-28 22:56:48 +00001084 for (i = 0; handler_info[i].name != NULL; i++)
Fred Drake85d835f2001-02-08 15:39:08 +00001085 /* do nothing */;
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001086
Fred Drake2a3d7db2002-06-28 22:56:48 +00001087 new_parser->handlers = malloc(sizeof(PyObject *) * i);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001088 if (!new_parser->handlers) {
Fred Drake85d835f2001-02-08 15:39:08 +00001089 Py_DECREF(new_parser);
1090 return PyErr_NoMemory();
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001091 }
Martin v. Löwis5b68ce32001-10-21 08:53:52 +00001092 clear_handlers(new_parser, 1);
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001093
1094 /* then copy handlers from self */
1095 for (i = 0; handler_info[i].name != NULL; i++) {
Fred Drake71b63ff2002-06-28 22:29:01 +00001096 PyObject *handler = self->handlers[i];
1097 if (handler != NULL) {
1098 Py_INCREF(handler);
1099 new_parser->handlers[i] = handler;
1100 handler_info[i].setter(new_parser->itself,
Fred Drake85d835f2001-02-08 15:39:08 +00001101 handler_info[i].handler);
1102 }
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001103 }
Fred Drake71b63ff2002-06-28 22:29:01 +00001104 return (PyObject *)new_parser;
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001105}
1106
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001107PyDoc_STRVAR(xmlparse_SetParamEntityParsing__doc__,
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001108"SetParamEntityParsing(flag) -> success\n\
1109Controls parsing of parameter entities (including the external DTD\n\
1110subset). Possible flag values are XML_PARAM_ENTITY_PARSING_NEVER,\n\
1111XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE and\n\
1112XML_PARAM_ENTITY_PARSING_ALWAYS. Returns true if setting the flag\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001113was successful.");
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001114
1115static PyObject*
Fred Drakebd6101c2001-02-14 18:29:45 +00001116xmlparse_SetParamEntityParsing(xmlparseobject *p, PyObject* args)
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001117{
Fred Drake85d835f2001-02-08 15:39:08 +00001118 int flag;
1119 if (!PyArg_ParseTuple(args, "i", &flag))
1120 return NULL;
Fred Drakebd6101c2001-02-14 18:29:45 +00001121 flag = XML_SetParamEntityParsing(p->itself, flag);
Fred Drake85d835f2001-02-08 15:39:08 +00001122 return PyInt_FromLong(flag);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001123}
1124
Martin v. Löwis069dde22003-01-21 10:58:18 +00001125PyDoc_STRVAR(xmlparse_UseForeignDTD__doc__,
1126"UseForeignDTD([flag])\n\
1127Allows the application to provide an artificial external subset if one is\n\
1128not specified as part of the document instance. This readily allows the\n\
1129use of a 'default' document type controlled by the application, while still\n\
1130getting the advantage of providing document type information to the parser.\n\
1131'flag' defaults to True if not provided.");
1132
1133static PyObject *
1134xmlparse_UseForeignDTD(xmlparseobject *self, PyObject *args)
1135{
1136 PyObject *flagobj = NULL;
1137 XML_Bool flag = XML_TRUE;
1138 enum XML_Error rc;
1139 if (!PyArg_ParseTuple(args, "|O:UseForeignDTD", &flagobj))
1140 return NULL;
1141 if (flagobj != NULL)
1142 flag = PyObject_IsTrue(flagobj) ? XML_TRUE : XML_FALSE;
1143 rc = XML_UseForeignDTD(self->itself, flag);
1144 if (rc != XML_ERROR_NONE) {
1145 return set_error(self, rc);
1146 }
1147 Py_INCREF(Py_None);
1148 return Py_None;
1149}
1150
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001151static struct PyMethodDef xmlparse_methods[] = {
Fred Drake0582df92000-07-12 04:49:00 +00001152 {"Parse", (PyCFunction)xmlparse_Parse,
Fred Drakebd6101c2001-02-14 18:29:45 +00001153 METH_VARARGS, xmlparse_Parse__doc__},
Fred Drake0582df92000-07-12 04:49:00 +00001154 {"ParseFile", (PyCFunction)xmlparse_ParseFile,
Fred Drakebd6101c2001-02-14 18:29:45 +00001155 METH_VARARGS, xmlparse_ParseFile__doc__},
Fred Drake0582df92000-07-12 04:49:00 +00001156 {"SetBase", (PyCFunction)xmlparse_SetBase,
Martin v. Löwis069dde22003-01-21 10:58:18 +00001157 METH_VARARGS, xmlparse_SetBase__doc__},
Fred Drake0582df92000-07-12 04:49:00 +00001158 {"GetBase", (PyCFunction)xmlparse_GetBase,
Martin v. Löwis069dde22003-01-21 10:58:18 +00001159 METH_VARARGS, xmlparse_GetBase__doc__},
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001160 {"ExternalEntityParserCreate", (PyCFunction)xmlparse_ExternalEntityParserCreate,
Martin v. Löwis069dde22003-01-21 10:58:18 +00001161 METH_VARARGS, xmlparse_ExternalEntityParserCreate__doc__},
Fred Drakebd6101c2001-02-14 18:29:45 +00001162 {"SetParamEntityParsing", (PyCFunction)xmlparse_SetParamEntityParsing,
1163 METH_VARARGS, xmlparse_SetParamEntityParsing__doc__},
Fred Drakebd6101c2001-02-14 18:29:45 +00001164 {"GetInputContext", (PyCFunction)xmlparse_GetInputContext,
1165 METH_VARARGS, xmlparse_GetInputContext__doc__},
Martin v. Löwis069dde22003-01-21 10:58:18 +00001166 {"UseForeignDTD", (PyCFunction)xmlparse_UseForeignDTD,
1167 METH_VARARGS, xmlparse_UseForeignDTD__doc__},
1168 {NULL, NULL} /* sentinel */
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001169};
1170
1171/* ---------- */
1172
1173
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001174#ifdef Py_USING_UNICODE
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001175
Fred Drake71b63ff2002-06-28 22:29:01 +00001176/* pyexpat international encoding support.
1177 Make it as simple as possible.
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001178*/
1179
Martin v. Löwis3af7cc02001-01-22 08:19:10 +00001180static char template_buffer[257];
Fred Drakebb66a202001-03-01 20:48:17 +00001181PyObject *template_string = NULL;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001182
Fred Drake71b63ff2002-06-28 22:29:01 +00001183static void
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001184init_template_buffer(void)
1185{
1186 int i;
Fred Drakebb66a202001-03-01 20:48:17 +00001187 for (i = 0; i < 256; i++) {
1188 template_buffer[i] = i;
Tim Peters63cb99e2001-02-17 18:12:50 +00001189 }
Fred Drakebb66a202001-03-01 20:48:17 +00001190 template_buffer[256] = 0;
Tim Peters63cb99e2001-02-17 18:12:50 +00001191}
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001192
Fred Drake71b63ff2002-06-28 22:29:01 +00001193static int
1194PyUnknownEncodingHandler(void *encodingHandlerData,
1195 const XML_Char *name,
1196 XML_Encoding *info)
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001197{
Fred Drakebb66a202001-03-01 20:48:17 +00001198 PyUnicodeObject *_u_string = NULL;
1199 int result = 0;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001200 int i;
Fred Drake71b63ff2002-06-28 22:29:01 +00001201
Fred Drakebb66a202001-03-01 20:48:17 +00001202 /* Yes, supports only 8bit encodings */
1203 _u_string = (PyUnicodeObject *)
1204 PyUnicode_Decode(template_buffer, 256, name, "replace");
Fred Drake71b63ff2002-06-28 22:29:01 +00001205
Fred Drakebb66a202001-03-01 20:48:17 +00001206 if (_u_string == NULL)
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001207 return result;
Fred Drake71b63ff2002-06-28 22:29:01 +00001208
Fred Drakebb66a202001-03-01 20:48:17 +00001209 for (i = 0; i < 256; i++) {
1210 /* Stupid to access directly, but fast */
1211 Py_UNICODE c = _u_string->str[i];
1212 if (c == Py_UNICODE_REPLACEMENT_CHARACTER)
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001213 info->map[i] = -1;
Fred Drakebb66a202001-03-01 20:48:17 +00001214 else
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001215 info->map[i] = c;
Tim Peters63cb99e2001-02-17 18:12:50 +00001216 }
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001217 info->data = NULL;
1218 info->convert = NULL;
1219 info->release = NULL;
Fred Drake71b63ff2002-06-28 22:29:01 +00001220 result = 1;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001221 Py_DECREF(_u_string);
1222 return result;
1223}
1224
1225#endif
1226
1227static PyObject *
Fred Drakeb91a36b2002-06-27 19:40:48 +00001228newxmlparseobject(char *encoding, char *namespace_separator, PyObject *intern)
Fred Drake0582df92000-07-12 04:49:00 +00001229{
1230 int i;
1231 xmlparseobject *self;
Fred Drake71b63ff2002-06-28 22:29:01 +00001232
Martin v. Löwis894258c2001-09-23 10:20:10 +00001233#ifdef Py_TPFLAGS_HAVE_GC
1234 /* Code for versions 2.2 and later */
1235 self = PyObject_GC_New(xmlparseobject, &Xmlparsetype);
1236#else
Fred Drake0582df92000-07-12 04:49:00 +00001237 self = PyObject_New(xmlparseobject, &Xmlparsetype);
Martin v. Löwis894258c2001-09-23 10:20:10 +00001238#endif
Fred Drake0582df92000-07-12 04:49:00 +00001239 if (self == NULL)
1240 return NULL;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001241
Martin v. Löwisb4fcf4d2002-06-30 06:40:55 +00001242#ifdef Py_USING_UNICODE
Fred Drake0582df92000-07-12 04:49:00 +00001243 self->returns_unicode = 1;
Martin v. Löwisb4fcf4d2002-06-30 06:40:55 +00001244#else
1245 self->returns_unicode = 0;
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +00001246#endif
Martin v. Löwisb4fcf4d2002-06-30 06:40:55 +00001247
Fred Drake2a3d7db2002-06-28 22:56:48 +00001248 self->buffer = NULL;
1249 self->buffer_size = CHARACTER_DATA_BUFFER_SIZE;
1250 self->buffer_used = 0;
Fred Drake85d835f2001-02-08 15:39:08 +00001251 self->ordered_attributes = 0;
1252 self->specified_attributes = 0;
Fred Drakebd6101c2001-02-14 18:29:45 +00001253 self->in_callback = 0;
Martin v. Löwis069dde22003-01-21 10:58:18 +00001254 self->ns_prefixes = 0;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001255 self->handlers = NULL;
Fred Drakecde79132001-04-25 16:01:30 +00001256 if (namespace_separator != NULL) {
Fred Drake0582df92000-07-12 04:49:00 +00001257 self->itself = XML_ParserCreateNS(encoding, *namespace_separator);
1258 }
Fred Drake85d835f2001-02-08 15:39:08 +00001259 else {
Fred Drake0582df92000-07-12 04:49:00 +00001260 self->itself = XML_ParserCreate(encoding);
1261 }
Fred Drakeb91a36b2002-06-27 19:40:48 +00001262 self->intern = intern;
1263 Py_XINCREF(self->intern);
Martin v. Löwis894258c2001-09-23 10:20:10 +00001264#ifdef Py_TPFLAGS_HAVE_GC
1265 PyObject_GC_Track(self);
1266#else
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001267 PyObject_GC_Init(self);
Martin v. Löwis894258c2001-09-23 10:20:10 +00001268#endif
Fred Drake0582df92000-07-12 04:49:00 +00001269 if (self->itself == NULL) {
Fred Drake71b63ff2002-06-28 22:29:01 +00001270 PyErr_SetString(PyExc_RuntimeError,
Fred Drake0582df92000-07-12 04:49:00 +00001271 "XML_ParserCreate failed");
1272 Py_DECREF(self);
1273 return NULL;
1274 }
1275 XML_SetUserData(self->itself, (void *)self);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001276#ifdef Py_USING_UNICODE
Fred Drake7c75bf22002-07-01 14:02:31 +00001277 XML_SetUnknownEncodingHandler(self->itself,
1278 (XML_UnknownEncodingHandler) PyUnknownEncodingHandler, NULL);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001279#endif
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001280
Fred Drake2a3d7db2002-06-28 22:56:48 +00001281 for (i = 0; handler_info[i].name != NULL; i++)
Fred Drake0582df92000-07-12 04:49:00 +00001282 /* do nothing */;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001283
Fred Drake7c75bf22002-07-01 14:02:31 +00001284 self->handlers = malloc(sizeof(PyObject *) * i);
1285 if (!self->handlers) {
Fred Drake71b63ff2002-06-28 22:29:01 +00001286 Py_DECREF(self);
1287 return PyErr_NoMemory();
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001288 }
Martin v. Löwis5b68ce32001-10-21 08:53:52 +00001289 clear_handlers(self, 1);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001290
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001291 return (PyObject*)self;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001292}
1293
1294
1295static void
Fred Drake0582df92000-07-12 04:49:00 +00001296xmlparse_dealloc(xmlparseobject *self)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001297{
Fred Drake0582df92000-07-12 04:49:00 +00001298 int i;
Martin v. Löwis894258c2001-09-23 10:20:10 +00001299#ifdef Py_TPFLAGS_HAVE_GC
1300 PyObject_GC_UnTrack(self);
1301#else
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001302 PyObject_GC_Fini(self);
Martin v. Löwis894258c2001-09-23 10:20:10 +00001303#endif
Fred Drake85d835f2001-02-08 15:39:08 +00001304 if (self->itself != NULL)
Fred Drake0582df92000-07-12 04:49:00 +00001305 XML_ParserFree(self->itself);
1306 self->itself = NULL;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001307
Fred Drake85d835f2001-02-08 15:39:08 +00001308 if (self->handlers != NULL) {
Fred Drakecde79132001-04-25 16:01:30 +00001309 PyObject *temp;
Fred Drake85d835f2001-02-08 15:39:08 +00001310 for (i = 0; handler_info[i].name != NULL; i++) {
Fred Drakecde79132001-04-25 16:01:30 +00001311 temp = self->handlers[i];
1312 self->handlers[i] = NULL;
1313 Py_XDECREF(temp);
Fred Drake85d835f2001-02-08 15:39:08 +00001314 }
1315 free(self->handlers);
Fred Drake71b63ff2002-06-28 22:29:01 +00001316 self->handlers = NULL;
Fred Drake0582df92000-07-12 04:49:00 +00001317 }
Fred Drake2a3d7db2002-06-28 22:56:48 +00001318 if (self->buffer != NULL) {
1319 free(self->buffer);
1320 self->buffer = NULL;
1321 }
Fred Drakeb91a36b2002-06-27 19:40:48 +00001322 Py_XDECREF(self->intern);
Martin v. Löwis894258c2001-09-23 10:20:10 +00001323#ifndef Py_TPFLAGS_HAVE_GC
Martin v. Löwisb4fcf4d2002-06-30 06:40:55 +00001324 /* Code for versions 2.0 and 2.1 */
Fred Drake0582df92000-07-12 04:49:00 +00001325 PyObject_Del(self);
Martin v. Löwis894258c2001-09-23 10:20:10 +00001326#else
1327 /* Code for versions 2.2 and later. */
1328 PyObject_GC_Del(self);
1329#endif
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001330}
1331
Fred Drake0582df92000-07-12 04:49:00 +00001332static int
1333handlername2int(const char *name)
1334{
1335 int i;
Fred Drake71b63ff2002-06-28 22:29:01 +00001336 for (i = 0; handler_info[i].name != NULL; i++) {
Fred Drake0582df92000-07-12 04:49:00 +00001337 if (strcmp(name, handler_info[i].name) == 0) {
1338 return i;
1339 }
1340 }
1341 return -1;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001342}
1343
1344static PyObject *
Fred Drake71b63ff2002-06-28 22:29:01 +00001345get_pybool(int istrue)
1346{
1347 PyObject *result = istrue ? Py_True : Py_False;
1348 Py_INCREF(result);
1349 return result;
1350}
1351
1352static PyObject *
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001353xmlparse_getattr(xmlparseobject *self, char *name)
1354{
Fred Drake71b63ff2002-06-28 22:29:01 +00001355 int handlernum = handlername2int(name);
1356
1357 if (handlernum != -1) {
1358 PyObject *result = self->handlers[handlernum];
1359 if (result == NULL)
1360 result = Py_None;
1361 Py_INCREF(result);
1362 return result;
1363 }
1364 if (name[0] == 'E') {
1365 if (strcmp(name, "ErrorCode") == 0)
1366 return PyInt_FromLong((long)
1367 XML_GetErrorCode(self->itself));
1368 if (strcmp(name, "ErrorLineNumber") == 0)
1369 return PyInt_FromLong((long)
1370 XML_GetErrorLineNumber(self->itself));
1371 if (strcmp(name, "ErrorColumnNumber") == 0)
1372 return PyInt_FromLong((long)
1373 XML_GetErrorColumnNumber(self->itself));
1374 if (strcmp(name, "ErrorByteIndex") == 0)
1375 return PyInt_FromLong((long)
1376 XML_GetErrorByteIndex(self->itself));
1377 }
Fred Drake2a3d7db2002-06-28 22:56:48 +00001378 if (name[0] == 'b') {
1379 if (strcmp(name, "buffer_size") == 0)
1380 return PyInt_FromLong((long) self->buffer_size);
1381 if (strcmp(name, "buffer_text") == 0)
1382 return get_pybool(self->buffer != NULL);
1383 if (strcmp(name, "buffer_used") == 0)
1384 return PyInt_FromLong((long) self->buffer_used);
1385 }
Martin v. Löwis069dde22003-01-21 10:58:18 +00001386 if (strcmp(name, "namespace_prefixes") == 0)
1387 return get_pybool(self->ns_prefixes);
Fred Drake85d835f2001-02-08 15:39:08 +00001388 if (strcmp(name, "ordered_attributes") == 0)
Fred Drake71b63ff2002-06-28 22:29:01 +00001389 return get_pybool(self->ordered_attributes);
Fred Drake0582df92000-07-12 04:49:00 +00001390 if (strcmp(name, "returns_unicode") == 0)
Fred Drake71b63ff2002-06-28 22:29:01 +00001391 return get_pybool((long) self->returns_unicode);
Fred Drake85d835f2001-02-08 15:39:08 +00001392 if (strcmp(name, "specified_attributes") == 0)
Fred Drake71b63ff2002-06-28 22:29:01 +00001393 return get_pybool((long) self->specified_attributes);
Fred Drakeb91a36b2002-06-27 19:40:48 +00001394 if (strcmp(name, "intern") == 0) {
1395 if (self->intern == NULL) {
1396 Py_INCREF(Py_None);
1397 return Py_None;
1398 }
1399 else {
1400 Py_INCREF(self->intern);
1401 return self->intern;
1402 }
1403 }
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001404
Neal Norwitzfa56e2d2003-01-19 15:40:09 +00001405#define APPEND(list, str) \
Martin v. Löwis069dde22003-01-21 10:58:18 +00001406 do { \
1407 PyObject *o = PyString_FromString(str); \
1408 if (o != NULL) \
1409 PyList_Append(list, o); \
1410 Py_XDECREF(o); \
1411 } while (0)
Neal Norwitzfa56e2d2003-01-19 15:40:09 +00001412
Fred Drake0582df92000-07-12 04:49:00 +00001413 if (strcmp(name, "__members__") == 0) {
1414 int i;
1415 PyObject *rc = PyList_New(0);
Fred Drake71b63ff2002-06-28 22:29:01 +00001416 for (i = 0; handler_info[i].name != NULL; i++) {
Neal Norwitzfa56e2d2003-01-19 15:40:09 +00001417 PyObject *o = get_handler_name(&handler_info[i]);
1418 if (o != NULL)
1419 PyList_Append(rc, o);
1420 Py_XDECREF(o);
Fred Drake0582df92000-07-12 04:49:00 +00001421 }
Neal Norwitzfa56e2d2003-01-19 15:40:09 +00001422 APPEND(rc, "ErrorCode");
1423 APPEND(rc, "ErrorLineNumber");
1424 APPEND(rc, "ErrorColumnNumber");
1425 APPEND(rc, "ErrorByteIndex");
1426 APPEND(rc, "buffer_size");
1427 APPEND(rc, "buffer_text");
1428 APPEND(rc, "buffer_used");
Martin v. Löwis069dde22003-01-21 10:58:18 +00001429 APPEND(rc, "namespace_prefixes");
Neal Norwitzfa56e2d2003-01-19 15:40:09 +00001430 APPEND(rc, "ordered_attributes");
1431 APPEND(rc, "returns_unicode");
1432 APPEND(rc, "specified_attributes");
1433 APPEND(rc, "intern");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001434
Neal Norwitzfa56e2d2003-01-19 15:40:09 +00001435#undef APPEND
Fred Drake0582df92000-07-12 04:49:00 +00001436 return rc;
1437 }
1438 return Py_FindMethod(xmlparse_methods, (PyObject *)self, name);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001439}
1440
Fred Drake6f987622000-08-25 18:03:30 +00001441static int
1442sethandler(xmlparseobject *self, const char *name, PyObject* v)
Fred Drake0582df92000-07-12 04:49:00 +00001443{
1444 int handlernum = handlername2int(name);
Fred Drake71b63ff2002-06-28 22:29:01 +00001445 if (handlernum >= 0) {
1446 xmlhandler c_handler = NULL;
1447 PyObject *temp = self->handlers[handlernum];
1448
1449 if (v == Py_None)
1450 v = NULL;
1451 else if (v != NULL) {
1452 Py_INCREF(v);
1453 c_handler = handler_info[handlernum].handler;
1454 }
Fred Drake0582df92000-07-12 04:49:00 +00001455 self->handlers[handlernum] = v;
Fred Drake71b63ff2002-06-28 22:29:01 +00001456 Py_XDECREF(temp);
1457 handler_info[handlernum].setter(self->itself, c_handler);
Fred Drake0582df92000-07-12 04:49:00 +00001458 return 1;
1459 }
1460 return 0;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001461}
1462
1463static int
Fred Drake6f987622000-08-25 18:03:30 +00001464xmlparse_setattr(xmlparseobject *self, char *name, PyObject *v)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001465{
Fred Drake6f987622000-08-25 18:03:30 +00001466 /* Set attribute 'name' to value 'v'. v==NULL means delete */
Fred Drake85d835f2001-02-08 15:39:08 +00001467 if (v == NULL) {
Fred Drake6f987622000-08-25 18:03:30 +00001468 PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
1469 return -1;
1470 }
Fred Drake2a3d7db2002-06-28 22:56:48 +00001471 if (strcmp(name, "buffer_text") == 0) {
1472 if (PyObject_IsTrue(v)) {
1473 if (self->buffer == NULL) {
1474 self->buffer = malloc(self->buffer_size);
1475 if (self->buffer == NULL) {
1476 PyErr_NoMemory();
1477 return -1;
1478 }
1479 self->buffer_used = 0;
1480 }
1481 }
1482 else if (self->buffer != NULL) {
1483 if (flush_character_buffer(self) < 0)
1484 return -1;
1485 free(self->buffer);
1486 self->buffer = NULL;
1487 }
1488 return 0;
1489 }
Martin v. Löwis069dde22003-01-21 10:58:18 +00001490 if (strcmp(name, "namespace_prefixes") == 0) {
1491 if (PyObject_IsTrue(v))
1492 self->ns_prefixes = 1;
1493 else
1494 self->ns_prefixes = 0;
1495 XML_SetReturnNSTriplet(self->itself, self->ns_prefixes);
1496 return 0;
1497 }
Fred Drake85d835f2001-02-08 15:39:08 +00001498 if (strcmp(name, "ordered_attributes") == 0) {
1499 if (PyObject_IsTrue(v))
1500 self->ordered_attributes = 1;
1501 else
1502 self->ordered_attributes = 0;
1503 return 0;
1504 }
Fred Drake6f987622000-08-25 18:03:30 +00001505 if (strcmp(name, "returns_unicode") == 0) {
Fred Drake85d835f2001-02-08 15:39:08 +00001506 if (PyObject_IsTrue(v)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001507#ifndef Py_USING_UNICODE
Fred Drake71b63ff2002-06-28 22:29:01 +00001508 PyErr_SetString(PyExc_ValueError,
1509 "Unicode support not available");
Fred Drake6f987622000-08-25 18:03:30 +00001510 return -1;
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +00001511#else
Fred Drake6f987622000-08-25 18:03:30 +00001512 self->returns_unicode = 1;
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +00001513#endif
Fred Drake6f987622000-08-25 18:03:30 +00001514 }
1515 else
1516 self->returns_unicode = 0;
Fred Drake85d835f2001-02-08 15:39:08 +00001517 return 0;
1518 }
1519 if (strcmp(name, "specified_attributes") == 0) {
1520 if (PyObject_IsTrue(v))
1521 self->specified_attributes = 1;
1522 else
1523 self->specified_attributes = 0;
Fred Drake6f987622000-08-25 18:03:30 +00001524 return 0;
1525 }
Fred Drake2a3d7db2002-06-28 22:56:48 +00001526 if (strcmp(name, "CharacterDataHandler") == 0) {
1527 /* If we're changing the character data handler, flush all
1528 * cached data with the old handler. Not sure there's a
1529 * "right" thing to do, though, but this probably won't
1530 * happen.
1531 */
1532 if (flush_character_buffer(self) < 0)
1533 return -1;
1534 }
Fred Drake6f987622000-08-25 18:03:30 +00001535 if (sethandler(self, name, v)) {
1536 return 0;
1537 }
1538 PyErr_SetString(PyExc_AttributeError, name);
1539 return -1;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001540}
1541
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001542#ifdef WITH_CYCLE_GC
1543static int
1544xmlparse_traverse(xmlparseobject *op, visitproc visit, void *arg)
1545{
Fred Drakecde79132001-04-25 16:01:30 +00001546 int i, err;
1547 for (i = 0; handler_info[i].name != NULL; i++) {
1548 if (!op->handlers[i])
1549 continue;
1550 err = visit(op->handlers[i], arg);
1551 if (err)
1552 return err;
1553 }
1554 return 0;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001555}
1556
1557static int
1558xmlparse_clear(xmlparseobject *op)
1559{
Martin v. Löwis5b68ce32001-10-21 08:53:52 +00001560 clear_handlers(op, 0);
Fred Drakeb91a36b2002-06-27 19:40:48 +00001561 Py_XDECREF(op->intern);
1562 op->intern = 0;
Fred Drakecde79132001-04-25 16:01:30 +00001563 return 0;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001564}
1565#endif
1566
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001567PyDoc_STRVAR(Xmlparsetype__doc__, "XML parser");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001568
1569static PyTypeObject Xmlparsetype = {
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +00001570 PyObject_HEAD_INIT(NULL)
1571 0, /*ob_size*/
Guido van Rossum14648392001-12-08 18:02:58 +00001572 "pyexpat.xmlparser", /*tp_name*/
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001573 sizeof(xmlparseobject) + PyGC_HEAD_SIZE,/*tp_basicsize*/
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +00001574 0, /*tp_itemsize*/
1575 /* methods */
1576 (destructor)xmlparse_dealloc, /*tp_dealloc*/
1577 (printfunc)0, /*tp_print*/
1578 (getattrfunc)xmlparse_getattr, /*tp_getattr*/
1579 (setattrfunc)xmlparse_setattr, /*tp_setattr*/
1580 (cmpfunc)0, /*tp_compare*/
1581 (reprfunc)0, /*tp_repr*/
1582 0, /*tp_as_number*/
1583 0, /*tp_as_sequence*/
1584 0, /*tp_as_mapping*/
1585 (hashfunc)0, /*tp_hash*/
1586 (ternaryfunc)0, /*tp_call*/
1587 (reprfunc)0, /*tp_str*/
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001588 0, /* tp_getattro */
1589 0, /* tp_setattro */
1590 0, /* tp_as_buffer */
Martin v. Löwis894258c2001-09-23 10:20:10 +00001591#ifdef Py_TPFLAGS_HAVE_GC
Fred Drake71b63ff2002-06-28 22:29:01 +00001592 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /*tp_flags*/
Martin v. Löwis894258c2001-09-23 10:20:10 +00001593#else
Fred Drake71b63ff2002-06-28 22:29:01 +00001594 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_GC, /*tp_flags*/
Martin v. Löwis894258c2001-09-23 10:20:10 +00001595#endif
Martin v. Löwis069dde22003-01-21 10:58:18 +00001596 Xmlparsetype__doc__, /* tp_doc - Documentation string */
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001597#ifdef WITH_CYCLE_GC
1598 (traverseproc)xmlparse_traverse, /* tp_traverse */
1599 (inquiry)xmlparse_clear /* tp_clear */
1600#else
1601 0, 0
1602#endif
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001603};
1604
1605/* End of code for xmlparser objects */
1606/* -------------------------------------------------------- */
1607
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001608PyDoc_STRVAR(pyexpat_ParserCreate__doc__,
Fred Drake0582df92000-07-12 04:49:00 +00001609"ParserCreate([encoding[, namespace_separator]]) -> parser\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001610Return a new XML parser object.");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001611
1612static PyObject *
Fred Drake0582df92000-07-12 04:49:00 +00001613pyexpat_ParserCreate(PyObject *notused, PyObject *args, PyObject *kw)
1614{
Fred Drakecde79132001-04-25 16:01:30 +00001615 char *encoding = NULL;
1616 char *namespace_separator = NULL;
Fred Drakeb91a36b2002-06-27 19:40:48 +00001617 PyObject *intern = NULL;
1618 PyObject *result;
1619 int intern_decref = 0;
Fred Drake71b63ff2002-06-28 22:29:01 +00001620 static char *kwlist[] = {"encoding", "namespace_separator",
Fred Drakeb91a36b2002-06-27 19:40:48 +00001621 "intern", NULL};
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001622
Fred Drakeb91a36b2002-06-27 19:40:48 +00001623 if (!PyArg_ParseTupleAndKeywords(args, kw, "|zzO:ParserCreate", kwlist,
1624 &encoding, &namespace_separator, &intern))
Fred Drakecde79132001-04-25 16:01:30 +00001625 return NULL;
1626 if (namespace_separator != NULL
1627 && strlen(namespace_separator) > 1) {
1628 PyErr_SetString(PyExc_ValueError,
1629 "namespace_separator must be at most one"
1630 " character, omitted, or None");
1631 return NULL;
1632 }
Fred Drakeb91a36b2002-06-27 19:40:48 +00001633 /* Explicitly passing None means no interning is desired.
1634 Not passing anything means that a new dictionary is used. */
1635 if (intern == Py_None)
1636 intern = NULL;
1637 else if (intern == NULL) {
1638 intern = PyDict_New();
1639 if (!intern)
1640 return NULL;
1641 intern_decref = 1;
Fred Drake71b63ff2002-06-28 22:29:01 +00001642 }
Fred Drakeb91a36b2002-06-27 19:40:48 +00001643 else if (!PyDict_Check(intern)) {
1644 PyErr_SetString(PyExc_TypeError, "intern must be a dictionary");
1645 return NULL;
1646 }
1647
1648 result = newxmlparseobject(encoding, namespace_separator, intern);
1649 if (intern_decref) {
1650 Py_DECREF(intern);
1651 }
1652 return result;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001653}
1654
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001655PyDoc_STRVAR(pyexpat_ErrorString__doc__,
Fred Drake0582df92000-07-12 04:49:00 +00001656"ErrorString(errno) -> string\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001657Returns string error for given number.");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001658
1659static PyObject *
Fred Drake0582df92000-07-12 04:49:00 +00001660pyexpat_ErrorString(PyObject *self, PyObject *args)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001661{
Fred Drake0582df92000-07-12 04:49:00 +00001662 long code = 0;
1663
1664 if (!PyArg_ParseTuple(args, "l:ErrorString", &code))
1665 return NULL;
1666 return Py_BuildValue("z", XML_ErrorString((int)code));
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001667}
1668
1669/* List of methods defined in the module */
1670
1671static struct PyMethodDef pyexpat_methods[] = {
Fred Drake0582df92000-07-12 04:49:00 +00001672 {"ParserCreate", (PyCFunction)pyexpat_ParserCreate,
1673 METH_VARARGS|METH_KEYWORDS, pyexpat_ParserCreate__doc__},
1674 {"ErrorString", (PyCFunction)pyexpat_ErrorString,
1675 METH_VARARGS, pyexpat_ErrorString__doc__},
Fred Drake71b63ff2002-06-28 22:29:01 +00001676
Fred Drake0582df92000-07-12 04:49:00 +00001677 {NULL, (PyCFunction)NULL, 0, NULL} /* sentinel */
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001678};
1679
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +00001680/* Module docstring */
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001681
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001682PyDoc_STRVAR(pyexpat_module_documentation,
1683"Python wrapper for Expat parser.");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001684
Fred Drake4113b132001-03-24 19:58:26 +00001685/* Return a Python string that represents the version number without the
1686 * extra cruft added by revision control, even if the right options were
1687 * given to the "cvs export" command to make it not include the extra
1688 * cruft.
1689 */
1690static PyObject *
1691get_version_string(void)
1692{
1693 static char *rcsid = "$Revision$";
1694 char *rev = rcsid;
1695 int i = 0;
1696
Neal Norwitz3afb2d22002-03-20 21:32:07 +00001697 while (!isdigit((int)*rev))
Fred Drake4113b132001-03-24 19:58:26 +00001698 ++rev;
1699 while (rev[i] != ' ' && rev[i] != '\0')
1700 ++i;
1701
1702 return PyString_FromStringAndSize(rev, i);
1703}
1704
Fred Drakecde79132001-04-25 16:01:30 +00001705/* Initialization function for the module */
1706
1707#ifndef MODULE_NAME
1708#define MODULE_NAME "pyexpat"
1709#endif
1710
1711#ifndef MODULE_INITFUNC
1712#define MODULE_INITFUNC initpyexpat
1713#endif
1714
Martin v. Löwis069dde22003-01-21 10:58:18 +00001715#ifndef PyMODINIT_FUNC
1716# ifdef MS_WINDOWS
1717# define PyMODINIT_FUNC __declspec(dllexport) void
1718# else
1719# define PyMODINIT_FUNC void
1720# endif
1721#endif
1722
Mark Hammond8235ea12002-07-19 06:55:41 +00001723PyMODINIT_FUNC MODULE_INITFUNC(void); /* avoid compiler warnings */
Fred Drakecde79132001-04-25 16:01:30 +00001724
Martin v. Löwis069dde22003-01-21 10:58:18 +00001725PyMODINIT_FUNC
1726MODULE_INITFUNC(void)
Fred Drake0582df92000-07-12 04:49:00 +00001727{
1728 PyObject *m, *d;
Fred Drakecde79132001-04-25 16:01:30 +00001729 PyObject *errmod_name = PyString_FromString(MODULE_NAME ".errors");
Fred Drake85d835f2001-02-08 15:39:08 +00001730 PyObject *errors_module;
1731 PyObject *modelmod_name;
1732 PyObject *model_module;
Fred Drake0582df92000-07-12 04:49:00 +00001733 PyObject *sys_modules;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001734
Fred Drake6f987622000-08-25 18:03:30 +00001735 if (errmod_name == NULL)
1736 return;
Fred Drakecde79132001-04-25 16:01:30 +00001737 modelmod_name = PyString_FromString(MODULE_NAME ".model");
Fred Drake85d835f2001-02-08 15:39:08 +00001738 if (modelmod_name == NULL)
1739 return;
Fred Drake6f987622000-08-25 18:03:30 +00001740
Fred Drake0582df92000-07-12 04:49:00 +00001741 Xmlparsetype.ob_type = &PyType_Type;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001742
Fred Drake0582df92000-07-12 04:49:00 +00001743 /* Create the module and add the functions */
Fred Drakecde79132001-04-25 16:01:30 +00001744 m = Py_InitModule3(MODULE_NAME, pyexpat_methods,
Fred Drake85d835f2001-02-08 15:39:08 +00001745 pyexpat_module_documentation);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001746
Fred Drake0582df92000-07-12 04:49:00 +00001747 /* Add some symbolic constants to the module */
Fred Drakebd6101c2001-02-14 18:29:45 +00001748 if (ErrorObject == NULL) {
1749 ErrorObject = PyErr_NewException("xml.parsers.expat.ExpatError",
Fred Drake93adb692000-09-23 04:55:48 +00001750 NULL, NULL);
Fred Drakebd6101c2001-02-14 18:29:45 +00001751 if (ErrorObject == NULL)
1752 return;
1753 }
1754 Py_INCREF(ErrorObject);
Fred Drake93adb692000-09-23 04:55:48 +00001755 PyModule_AddObject(m, "error", ErrorObject);
Fred Drakebd6101c2001-02-14 18:29:45 +00001756 Py_INCREF(ErrorObject);
1757 PyModule_AddObject(m, "ExpatError", ErrorObject);
Fred Drake4ba298c2000-10-29 04:57:53 +00001758 Py_INCREF(&Xmlparsetype);
1759 PyModule_AddObject(m, "XMLParserType", (PyObject *) &Xmlparsetype);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001760
Fred Drake4113b132001-03-24 19:58:26 +00001761 PyModule_AddObject(m, "__version__", get_version_string());
Fred Drake738293d2000-12-21 17:25:07 +00001762 PyModule_AddStringConstant(m, "EXPAT_VERSION",
1763 (char *) XML_ExpatVersion());
Fred Drake85d835f2001-02-08 15:39:08 +00001764 {
1765 XML_Expat_Version info = XML_ExpatVersionInfo();
1766 PyModule_AddObject(m, "version_info",
1767 Py_BuildValue("(iii)", info.major,
1768 info.minor, info.micro));
1769 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001770#ifdef Py_USING_UNICODE
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001771 init_template_buffer();
1772#endif
Fred Drake0582df92000-07-12 04:49:00 +00001773 /* XXX When Expat supports some way of figuring out how it was
Fred Drake71b63ff2002-06-28 22:29:01 +00001774 compiled, this should check and set native_encoding
1775 appropriately.
Fred Drake0582df92000-07-12 04:49:00 +00001776 */
Fred Drake93adb692000-09-23 04:55:48 +00001777 PyModule_AddStringConstant(m, "native_encoding", "UTF-8");
Fred Drakec23b5232000-08-24 21:57:43 +00001778
Fred Drake85d835f2001-02-08 15:39:08 +00001779 sys_modules = PySys_GetObject("modules");
Fred Drake93adb692000-09-23 04:55:48 +00001780 d = PyModule_GetDict(m);
Fred Drake6f987622000-08-25 18:03:30 +00001781 errors_module = PyDict_GetItem(d, errmod_name);
1782 if (errors_module == NULL) {
Fred Drakecde79132001-04-25 16:01:30 +00001783 errors_module = PyModule_New(MODULE_NAME ".errors");
Fred Drake6f987622000-08-25 18:03:30 +00001784 if (errors_module != NULL) {
Fred Drake6f987622000-08-25 18:03:30 +00001785 PyDict_SetItem(sys_modules, errmod_name, errors_module);
Fred Drake93adb692000-09-23 04:55:48 +00001786 /* gives away the reference to errors_module */
1787 PyModule_AddObject(m, "errors", errors_module);
Fred Drakec23b5232000-08-24 21:57:43 +00001788 }
1789 }
Fred Drake6f987622000-08-25 18:03:30 +00001790 Py_DECREF(errmod_name);
Fred Drake85d835f2001-02-08 15:39:08 +00001791 model_module = PyDict_GetItem(d, modelmod_name);
1792 if (model_module == NULL) {
Fred Drakecde79132001-04-25 16:01:30 +00001793 model_module = PyModule_New(MODULE_NAME ".model");
Fred Drake85d835f2001-02-08 15:39:08 +00001794 if (model_module != NULL) {
1795 PyDict_SetItem(sys_modules, modelmod_name, model_module);
1796 /* gives away the reference to model_module */
1797 PyModule_AddObject(m, "model", model_module);
1798 }
1799 }
1800 Py_DECREF(modelmod_name);
1801 if (errors_module == NULL || model_module == NULL)
1802 /* Don't core dump later! */
Fred Drake6f987622000-08-25 18:03:30 +00001803 return;
Martin v. Löwis069dde22003-01-21 10:58:18 +00001804
1805 {
1806 const XML_Feature *features = XML_GetFeatureList();
1807 PyObject *list = PyList_New(0);
1808 if (list == NULL)
1809 /* just ignore it */
1810 PyErr_Clear();
1811 else {
1812 int i = 0;
1813 for (; features[i].feature != XML_FEATURE_END; ++i) {
1814 int ok;
1815 PyObject *item = Py_BuildValue("si", features[i].name,
1816 features[i].value);
1817 if (item == NULL) {
1818 Py_DECREF(list);
1819 list = NULL;
1820 break;
1821 }
1822 ok = PyList_Append(list, item);
1823 Py_DECREF(item);
1824 if (ok < 0) {
1825 PyErr_Clear();
1826 break;
1827 }
1828 }
1829 if (list != NULL)
1830 PyModule_AddObject(m, "features", list);
1831 }
1832 }
Fred Drake6f987622000-08-25 18:03:30 +00001833
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001834#define MYCONST(name) \
Fred Drake93adb692000-09-23 04:55:48 +00001835 PyModule_AddStringConstant(errors_module, #name, \
1836 (char*)XML_ErrorString(name))
Fred Drake7bd9f412000-07-04 23:51:31 +00001837
Fred Drake0582df92000-07-12 04:49:00 +00001838 MYCONST(XML_ERROR_NO_MEMORY);
1839 MYCONST(XML_ERROR_SYNTAX);
1840 MYCONST(XML_ERROR_NO_ELEMENTS);
1841 MYCONST(XML_ERROR_INVALID_TOKEN);
1842 MYCONST(XML_ERROR_UNCLOSED_TOKEN);
1843 MYCONST(XML_ERROR_PARTIAL_CHAR);
1844 MYCONST(XML_ERROR_TAG_MISMATCH);
1845 MYCONST(XML_ERROR_DUPLICATE_ATTRIBUTE);
1846 MYCONST(XML_ERROR_JUNK_AFTER_DOC_ELEMENT);
1847 MYCONST(XML_ERROR_PARAM_ENTITY_REF);
1848 MYCONST(XML_ERROR_UNDEFINED_ENTITY);
1849 MYCONST(XML_ERROR_RECURSIVE_ENTITY_REF);
1850 MYCONST(XML_ERROR_ASYNC_ENTITY);
1851 MYCONST(XML_ERROR_BAD_CHAR_REF);
1852 MYCONST(XML_ERROR_BINARY_ENTITY_REF);
1853 MYCONST(XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF);
1854 MYCONST(XML_ERROR_MISPLACED_XML_PI);
1855 MYCONST(XML_ERROR_UNKNOWN_ENCODING);
1856 MYCONST(XML_ERROR_INCORRECT_ENCODING);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001857 MYCONST(XML_ERROR_UNCLOSED_CDATA_SECTION);
1858 MYCONST(XML_ERROR_EXTERNAL_ENTITY_HANDLING);
1859 MYCONST(XML_ERROR_NOT_STANDALONE);
1860
Fred Drake85d835f2001-02-08 15:39:08 +00001861 PyModule_AddStringConstant(errors_module, "__doc__",
1862 "Constants used to describe error conditions.");
1863
Fred Drake93adb692000-09-23 04:55:48 +00001864#undef MYCONST
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001865
Fred Drake85d835f2001-02-08 15:39:08 +00001866#define MYCONST(c) PyModule_AddIntConstant(m, #c, c)
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001867 MYCONST(XML_PARAM_ENTITY_PARSING_NEVER);
1868 MYCONST(XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE);
1869 MYCONST(XML_PARAM_ENTITY_PARSING_ALWAYS);
Fred Drake85d835f2001-02-08 15:39:08 +00001870#undef MYCONST
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001871
Fred Drake85d835f2001-02-08 15:39:08 +00001872#define MYCONST(c) PyModule_AddIntConstant(model_module, #c, c)
1873 PyModule_AddStringConstant(model_module, "__doc__",
1874 "Constants used to interpret content model information.");
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001875
Fred Drake85d835f2001-02-08 15:39:08 +00001876 MYCONST(XML_CTYPE_EMPTY);
1877 MYCONST(XML_CTYPE_ANY);
1878 MYCONST(XML_CTYPE_MIXED);
1879 MYCONST(XML_CTYPE_NAME);
1880 MYCONST(XML_CTYPE_CHOICE);
1881 MYCONST(XML_CTYPE_SEQ);
1882
1883 MYCONST(XML_CQUANT_NONE);
1884 MYCONST(XML_CQUANT_OPT);
1885 MYCONST(XML_CQUANT_REP);
1886 MYCONST(XML_CQUANT_PLUS);
1887#undef MYCONST
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001888}
1889
Fred Drake6f987622000-08-25 18:03:30 +00001890static void
Martin v. Löwis5b68ce32001-10-21 08:53:52 +00001891clear_handlers(xmlparseobject *self, int initial)
Fred Drake0582df92000-07-12 04:49:00 +00001892{
Fred Drakecde79132001-04-25 16:01:30 +00001893 int i = 0;
1894 PyObject *temp;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001895
Fred Drake71b63ff2002-06-28 22:29:01 +00001896 for (; handler_info[i].name != NULL; i++) {
Martin v. Löwis5b68ce32001-10-21 08:53:52 +00001897 if (initial)
Fred Drake71b63ff2002-06-28 22:29:01 +00001898 self->handlers[i] = NULL;
Martin v. Löwis5b68ce32001-10-21 08:53:52 +00001899 else {
Fred Drakecde79132001-04-25 16:01:30 +00001900 temp = self->handlers[i];
1901 self->handlers[i] = NULL;
1902 Py_XDECREF(temp);
Martin v. Löwis5b68ce32001-10-21 08:53:52 +00001903 handler_info[i].setter(self->itself, NULL);
Fred Drakecde79132001-04-25 16:01:30 +00001904 }
Fred Drakecde79132001-04-25 16:01:30 +00001905 }
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001906}
1907
Tim Peters0c322792002-07-17 16:49:03 +00001908static struct HandlerInfo handler_info[] = {
Fred Drake71b63ff2002-06-28 22:29:01 +00001909 {"StartElementHandler",
1910 (xmlhandlersetter)XML_SetStartElementHandler,
Fred Drake0582df92000-07-12 04:49:00 +00001911 (xmlhandler)my_StartElementHandler},
Fred Drake71b63ff2002-06-28 22:29:01 +00001912 {"EndElementHandler",
1913 (xmlhandlersetter)XML_SetEndElementHandler,
Fred Drake0582df92000-07-12 04:49:00 +00001914 (xmlhandler)my_EndElementHandler},
Fred Drake71b63ff2002-06-28 22:29:01 +00001915 {"ProcessingInstructionHandler",
Fred Drake0582df92000-07-12 04:49:00 +00001916 (xmlhandlersetter)XML_SetProcessingInstructionHandler,
1917 (xmlhandler)my_ProcessingInstructionHandler},
Fred Drake71b63ff2002-06-28 22:29:01 +00001918 {"CharacterDataHandler",
Fred Drake0582df92000-07-12 04:49:00 +00001919 (xmlhandlersetter)XML_SetCharacterDataHandler,
1920 (xmlhandler)my_CharacterDataHandler},
Fred Drake71b63ff2002-06-28 22:29:01 +00001921 {"UnparsedEntityDeclHandler",
Fred Drake0582df92000-07-12 04:49:00 +00001922 (xmlhandlersetter)XML_SetUnparsedEntityDeclHandler,
Fred Drake2a3d7db2002-06-28 22:56:48 +00001923 (xmlhandler)my_UnparsedEntityDeclHandler},
Fred Drake71b63ff2002-06-28 22:29:01 +00001924 {"NotationDeclHandler",
Fred Drake0582df92000-07-12 04:49:00 +00001925 (xmlhandlersetter)XML_SetNotationDeclHandler,
Fred Drake2a3d7db2002-06-28 22:56:48 +00001926 (xmlhandler)my_NotationDeclHandler},
Fred Drake71b63ff2002-06-28 22:29:01 +00001927 {"StartNamespaceDeclHandler",
1928 (xmlhandlersetter)XML_SetStartNamespaceDeclHandler,
Fred Drake2a3d7db2002-06-28 22:56:48 +00001929 (xmlhandler)my_StartNamespaceDeclHandler},
Fred Drake71b63ff2002-06-28 22:29:01 +00001930 {"EndNamespaceDeclHandler",
1931 (xmlhandlersetter)XML_SetEndNamespaceDeclHandler,
Fred Drake2a3d7db2002-06-28 22:56:48 +00001932 (xmlhandler)my_EndNamespaceDeclHandler},
Fred Drake0582df92000-07-12 04:49:00 +00001933 {"CommentHandler",
1934 (xmlhandlersetter)XML_SetCommentHandler,
1935 (xmlhandler)my_CommentHandler},
1936 {"StartCdataSectionHandler",
Fred Drake71b63ff2002-06-28 22:29:01 +00001937 (xmlhandlersetter)XML_SetStartCdataSectionHandler,
Fred Drake0582df92000-07-12 04:49:00 +00001938 (xmlhandler)my_StartCdataSectionHandler},
1939 {"EndCdataSectionHandler",
Fred Drake71b63ff2002-06-28 22:29:01 +00001940 (xmlhandlersetter)XML_SetEndCdataSectionHandler,
Fred Drake0582df92000-07-12 04:49:00 +00001941 (xmlhandler)my_EndCdataSectionHandler},
1942 {"DefaultHandler",
1943 (xmlhandlersetter)XML_SetDefaultHandler,
1944 (xmlhandler)my_DefaultHandler},
1945 {"DefaultHandlerExpand",
1946 (xmlhandlersetter)XML_SetDefaultHandlerExpand,
1947 (xmlhandler)my_DefaultHandlerExpandHandler},
1948 {"NotStandaloneHandler",
1949 (xmlhandlersetter)XML_SetNotStandaloneHandler,
1950 (xmlhandler)my_NotStandaloneHandler},
1951 {"ExternalEntityRefHandler",
1952 (xmlhandlersetter)XML_SetExternalEntityRefHandler,
Fred Drake2a3d7db2002-06-28 22:56:48 +00001953 (xmlhandler)my_ExternalEntityRefHandler},
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001954 {"StartDoctypeDeclHandler",
Fred Drake71b63ff2002-06-28 22:29:01 +00001955 (xmlhandlersetter)XML_SetStartDoctypeDeclHandler,
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001956 (xmlhandler)my_StartDoctypeDeclHandler},
1957 {"EndDoctypeDeclHandler",
Fred Drake71b63ff2002-06-28 22:29:01 +00001958 (xmlhandlersetter)XML_SetEndDoctypeDeclHandler,
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001959 (xmlhandler)my_EndDoctypeDeclHandler},
Fred Drake85d835f2001-02-08 15:39:08 +00001960 {"EntityDeclHandler",
1961 (xmlhandlersetter)XML_SetEntityDeclHandler,
1962 (xmlhandler)my_EntityDeclHandler},
1963 {"XmlDeclHandler",
1964 (xmlhandlersetter)XML_SetXmlDeclHandler,
1965 (xmlhandler)my_XmlDeclHandler},
1966 {"ElementDeclHandler",
1967 (xmlhandlersetter)XML_SetElementDeclHandler,
1968 (xmlhandler)my_ElementDeclHandler},
1969 {"AttlistDeclHandler",
1970 (xmlhandlersetter)XML_SetAttlistDeclHandler,
1971 (xmlhandler)my_AttlistDeclHandler},
Martin v. Löwis069dde22003-01-21 10:58:18 +00001972 {"SkippedEntityHandler",
1973 (xmlhandlersetter)XML_SetSkippedEntityHandler,
1974 (xmlhandler)my_SkippedEntityHandler},
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001975
Fred Drake0582df92000-07-12 04:49:00 +00001976 {NULL, NULL, NULL} /* sentinel */
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001977};