blob: 436f891adf59ff7426bf00e317c4727aa114b5c8 [file] [log] [blame]
Martin v. Löwis7090ed12001-09-19 10:37:50 +00001#include "Python.h"
Fred Drake4113b132001-03-24 19:58:26 +00002#include <ctype.h>
3
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00004#include "compile.h"
5#include "frameobject.h"
Fred Drakea77254a2000-09-29 19:23:29 +00006#include "expat.h"
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00007
Martin v. Löwisc847f402003-01-21 11:09:21 +00008#define XML_COMBINED_VERSION (10000*XML_MAJOR_VERSION+100*XML_MINOR_VERSION+XML_MICRO_VERSION)
9
Martin v. Löwisb4fcf4d2002-06-30 06:40:55 +000010#ifndef PyDoc_STRVAR
Martin v. Löwis069dde22003-01-21 10:58:18 +000011
12/*
13 * fdrake says:
14 * Don't change the PyDoc_STR macro definition to (str), because
15 * '''the parentheses cause compile failures
16 * ("non-constant static initializer" or something like that)
17 * on some platforms (Irix?)'''
18 */
Fred Drakef57b22a2002-09-02 15:54:06 +000019#define PyDoc_STR(str) str
Fred Drake7c75bf22002-07-01 14:02:31 +000020#define PyDoc_VAR(name) static char name[]
Martin v. Löwisb4fcf4d2002-06-30 06:40:55 +000021#define PyDoc_STRVAR(name,str) PyDoc_VAR(name) = PyDoc_STR(str)
Martin v. Löwis0078f6c2001-01-21 10:18:10 +000022#endif
23
Martin v. Löwisb4fcf4d2002-06-30 06:40:55 +000024#if (PY_MAJOR_VERSION == 2 && PY_MINOR_VERSION < 2)
25/* In Python 2.0 and 2.1, disabling Unicode was not possible. */
Martin v. Löwis339d0f72001-08-17 18:39:25 +000026#define Py_USING_UNICODE
Martin v. Löwis069dde22003-01-21 10:58:18 +000027#define NOFIX_TRACE
Martin v. Löwis339d0f72001-08-17 18:39:25 +000028#endif
29
Fred Drake0582df92000-07-12 04:49:00 +000030enum HandlerTypes {
31 StartElement,
32 EndElement,
33 ProcessingInstruction,
34 CharacterData,
35 UnparsedEntityDecl,
36 NotationDecl,
37 StartNamespaceDecl,
38 EndNamespaceDecl,
39 Comment,
40 StartCdataSection,
41 EndCdataSection,
42 Default,
43 DefaultHandlerExpand,
44 NotStandalone,
Martin v. Löwis0078f6c2001-01-21 10:18:10 +000045 ExternalEntityRef,
46 StartDoctypeDecl,
47 EndDoctypeDecl,
Fred Drake85d835f2001-02-08 15:39:08 +000048 EntityDecl,
49 XmlDecl,
50 ElementDecl,
51 AttlistDecl,
Martin v. Löwisc847f402003-01-21 11:09:21 +000052#if XML_COMBINED_VERSION >= 19504
Martin v. Löwis069dde22003-01-21 10:58:18 +000053 SkippedEntity,
Martin v. Löwisc847f402003-01-21 11:09:21 +000054#endif
Fred Drake85d835f2001-02-08 15:39:08 +000055 _DummyDecl
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000056};
57
58static PyObject *ErrorObject;
59
60/* ----------------------------------------------------- */
61
62/* Declarations for objects of type xmlparser */
63
64typedef struct {
Fred Drake0582df92000-07-12 04:49:00 +000065 PyObject_HEAD
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000066
Fred Drake0582df92000-07-12 04:49:00 +000067 XML_Parser itself;
Fred Drake85d835f2001-02-08 15:39:08 +000068 int returns_unicode; /* True if Unicode strings are returned;
69 if false, UTF-8 strings are returned */
70 int ordered_attributes; /* Return attributes as a list. */
71 int specified_attributes; /* Report only specified attributes. */
Fred Drakebd6101c2001-02-14 18:29:45 +000072 int in_callback; /* Is a callback active? */
Martin v. Löwis069dde22003-01-21 10:58:18 +000073 int ns_prefixes; /* Namespace-triplets mode? */
Fred Drake2a3d7db2002-06-28 22:56:48 +000074 XML_Char *buffer; /* Buffer used when accumulating characters */
75 /* NULL if not enabled */
76 int buffer_size; /* Size of buffer, in XML_Char units */
77 int buffer_used; /* Buffer units in use */
Fred Drakeb91a36b2002-06-27 19:40:48 +000078 PyObject *intern; /* Dictionary to intern strings */
Fred Drake0582df92000-07-12 04:49:00 +000079 PyObject **handlers;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000080} xmlparseobject;
81
Fred Drake2a3d7db2002-06-28 22:56:48 +000082#define CHARACTER_DATA_BUFFER_SIZE 8192
83
Jeremy Hylton938ace62002-07-17 16:30:39 +000084static PyTypeObject Xmlparsetype;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000085
Fred Drake117ac852002-09-24 16:24:54 +000086typedef void (*xmlhandlersetter)(XML_Parser self, void *meth);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000087typedef void* xmlhandler;
88
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +000089struct HandlerInfo {
Fred Drake0582df92000-07-12 04:49:00 +000090 const char *name;
91 xmlhandlersetter setter;
92 xmlhandler handler;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +000093 PyCodeObject *tb_code;
Fred Drake71b63ff2002-06-28 22:29:01 +000094 PyObject *nameobj;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000095};
96
Jeremy Hylton938ace62002-07-17 16:30:39 +000097static struct HandlerInfo handler_info[64];
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000098
Fred Drakebd6101c2001-02-14 18:29:45 +000099/* Set an integer attribute on the error object; return true on success,
100 * false on an exception.
101 */
102static int
103set_error_attr(PyObject *err, char *name, int value)
104{
105 PyObject *v = PyInt_FromLong(value);
Fred Drake85d835f2001-02-08 15:39:08 +0000106
Fred Drakebd6101c2001-02-14 18:29:45 +0000107 if (v != NULL && PyObject_SetAttrString(err, name, v) == -1) {
108 Py_DECREF(v);
109 return 0;
110 }
111 return 1;
112}
113
114/* Build and set an Expat exception, including positioning
115 * information. Always returns NULL.
116 */
Fred Drake85d835f2001-02-08 15:39:08 +0000117static PyObject *
Martin v. Löwis069dde22003-01-21 10:58:18 +0000118set_error(xmlparseobject *self, enum XML_Error code)
Fred Drake85d835f2001-02-08 15:39:08 +0000119{
120 PyObject *err;
121 char buffer[256];
122 XML_Parser parser = self->itself;
Fred Drakebd6101c2001-02-14 18:29:45 +0000123 int lineno = XML_GetErrorLineNumber(parser);
124 int column = XML_GetErrorColumnNumber(parser);
Fred Drake85d835f2001-02-08 15:39:08 +0000125
Martin v. Löwis6b2cf0e2002-06-30 06:03:35 +0000126 /* There is no risk of overflowing this buffer, since
127 even for 64-bit integers, there is sufficient space. */
128 sprintf(buffer, "%.200s: line %i, column %i",
Fred Drakebd6101c2001-02-14 18:29:45 +0000129 XML_ErrorString(code), lineno, column);
Fred Drake85d835f2001-02-08 15:39:08 +0000130 err = PyObject_CallFunction(ErrorObject, "s", buffer);
Fred Drakebd6101c2001-02-14 18:29:45 +0000131 if ( err != NULL
132 && set_error_attr(err, "code", code)
133 && set_error_attr(err, "offset", column)
134 && set_error_attr(err, "lineno", lineno)) {
135 PyErr_SetObject(ErrorObject, err);
Fred Drake85d835f2001-02-08 15:39:08 +0000136 }
137 return NULL;
138}
139
Fred Drake71b63ff2002-06-28 22:29:01 +0000140static int
141have_handler(xmlparseobject *self, int type)
142{
143 PyObject *handler = self->handlers[type];
144 return handler != NULL;
145}
146
147static PyObject *
148get_handler_name(struct HandlerInfo *hinfo)
149{
150 PyObject *name = hinfo->nameobj;
151 if (name == NULL) {
152 name = PyString_FromString(hinfo->name);
153 hinfo->nameobj = name;
154 }
155 Py_XINCREF(name);
156 return name;
157}
158
Fred Drake85d835f2001-02-08 15:39:08 +0000159
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000160#ifdef Py_USING_UNICODE
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000161/* Convert a string of XML_Chars into a Unicode string.
162 Returns None if str is a null pointer. */
163
Fred Drake0582df92000-07-12 04:49:00 +0000164static PyObject *
Fred Drakeb91a36b2002-06-27 19:40:48 +0000165conv_string_to_unicode(const XML_Char *str)
Fred Drake0582df92000-07-12 04:49:00 +0000166{
Fred Drake71b63ff2002-06-28 22:29:01 +0000167 /* XXX currently this code assumes that XML_Char is 8-bit,
Fred Drake0582df92000-07-12 04:49:00 +0000168 and hence in UTF-8. */
169 /* UTF-8 from Expat, Unicode desired */
170 if (str == NULL) {
171 Py_INCREF(Py_None);
172 return Py_None;
173 }
Fred Drake71b63ff2002-06-28 22:29:01 +0000174 return PyUnicode_DecodeUTF8(str, strlen(str), "strict");
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000175}
176
Fred Drake0582df92000-07-12 04:49:00 +0000177static PyObject *
178conv_string_len_to_unicode(const XML_Char *str, int len)
179{
Fred Drake71b63ff2002-06-28 22:29:01 +0000180 /* XXX currently this code assumes that XML_Char is 8-bit,
Fred Drake0582df92000-07-12 04:49:00 +0000181 and hence in UTF-8. */
182 /* UTF-8 from Expat, Unicode desired */
183 if (str == NULL) {
184 Py_INCREF(Py_None);
185 return Py_None;
186 }
Fred Drake6f987622000-08-25 18:03:30 +0000187 return PyUnicode_DecodeUTF8((const char *)str, len, "strict");
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000188}
189#endif
190
191/* Convert a string of XML_Chars into an 8-bit Python string.
192 Returns None if str is a null pointer. */
193
Fred Drake6f987622000-08-25 18:03:30 +0000194static PyObject *
Fred Drakeb91a36b2002-06-27 19:40:48 +0000195conv_string_to_utf8(const XML_Char *str)
Fred Drake6f987622000-08-25 18:03:30 +0000196{
Fred Drake71b63ff2002-06-28 22:29:01 +0000197 /* XXX currently this code assumes that XML_Char is 8-bit,
Fred Drake6f987622000-08-25 18:03:30 +0000198 and hence in UTF-8. */
199 /* UTF-8 from Expat, UTF-8 desired */
200 if (str == NULL) {
201 Py_INCREF(Py_None);
202 return Py_None;
203 }
Fred Drakeb91a36b2002-06-27 19:40:48 +0000204 return PyString_FromString(str);
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000205}
206
Fred Drake6f987622000-08-25 18:03:30 +0000207static PyObject *
Fred Drake71b63ff2002-06-28 22:29:01 +0000208conv_string_len_to_utf8(const XML_Char *str, int len)
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000209{
Fred Drake71b63ff2002-06-28 22:29:01 +0000210 /* XXX currently this code assumes that XML_Char is 8-bit,
Fred Drake6f987622000-08-25 18:03:30 +0000211 and hence in UTF-8. */
212 /* UTF-8 from Expat, UTF-8 desired */
213 if (str == NULL) {
214 Py_INCREF(Py_None);
215 return Py_None;
216 }
217 return PyString_FromStringAndSize((const char *)str, len);
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000218}
219
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000220/* Callback routines */
221
Martin v. Löwis5b68ce32001-10-21 08:53:52 +0000222static void clear_handlers(xmlparseobject *self, int initial);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000223
Martin v. Löwis069dde22003-01-21 10:58:18 +0000224/* This handler is used when an error has been detected, in the hope
225 that actual parsing can be terminated early. This will only help
226 if an external entity reference is encountered. */
227static int
228error_external_entity_ref_handler(XML_Parser parser,
229 const XML_Char *context,
230 const XML_Char *base,
231 const XML_Char *systemId,
232 const XML_Char *publicId)
233{
234 return 0;
235}
236
Fred Drake6f987622000-08-25 18:03:30 +0000237static void
238flag_error(xmlparseobject *self)
239{
Martin v. Löwis5b68ce32001-10-21 08:53:52 +0000240 clear_handlers(self, 0);
Martin v. Löwis069dde22003-01-21 10:58:18 +0000241 XML_SetExternalEntityRefHandler(self->itself,
242 error_external_entity_ref_handler);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000243}
244
245static PyCodeObject*
246getcode(enum HandlerTypes slot, char* func_name, int lineno)
247{
Fred Drakebd6101c2001-02-14 18:29:45 +0000248 PyObject *code = NULL;
249 PyObject *name = NULL;
250 PyObject *nulltuple = NULL;
251 PyObject *filename = NULL;
252
253 if (handler_info[slot].tb_code == NULL) {
254 code = PyString_FromString("");
255 if (code == NULL)
256 goto failed;
257 name = PyString_FromString(func_name);
258 if (name == NULL)
259 goto failed;
260 nulltuple = PyTuple_New(0);
261 if (nulltuple == NULL)
262 goto failed;
263 filename = PyString_FromString(__FILE__);
264 handler_info[slot].tb_code =
265 PyCode_New(0, /* argcount */
266 0, /* nlocals */
267 0, /* stacksize */
268 0, /* flags */
269 code, /* code */
270 nulltuple, /* consts */
271 nulltuple, /* names */
272 nulltuple, /* varnames */
Martin v. Löwis76192ee2001-02-06 09:34:40 +0000273#if PYTHON_API_VERSION >= 1010
Fred Drakebd6101c2001-02-14 18:29:45 +0000274 nulltuple, /* freevars */
275 nulltuple, /* cellvars */
Martin v. Löwis76192ee2001-02-06 09:34:40 +0000276#endif
Fred Drakebd6101c2001-02-14 18:29:45 +0000277 filename, /* filename */
278 name, /* name */
279 lineno, /* firstlineno */
280 code /* lnotab */
281 );
282 if (handler_info[slot].tb_code == NULL)
283 goto failed;
284 Py_DECREF(code);
285 Py_DECREF(nulltuple);
286 Py_DECREF(filename);
287 Py_DECREF(name);
288 }
289 return handler_info[slot].tb_code;
290 failed:
291 Py_XDECREF(code);
292 Py_XDECREF(name);
293 return NULL;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000294}
295
Martin v. Löwis069dde22003-01-21 10:58:18 +0000296#ifndef NOFIX_TRACE
Martin v. Löwis7d6e19d2002-08-04 08:24:49 +0000297static int
298trace_frame(PyThreadState *tstate, PyFrameObject *f, int code, PyObject *val)
299{
300 int result = 0;
301 if (!tstate->use_tracing || tstate->tracing)
302 return 0;
303 if (tstate->c_profilefunc != NULL) {
304 tstate->tracing++;
305 result = tstate->c_profilefunc(tstate->c_profileobj,
306 f, code , val);
307 tstate->use_tracing = ((tstate->c_tracefunc != NULL)
308 || (tstate->c_profilefunc != NULL));
309 tstate->tracing--;
310 if (result)
311 return result;
312 }
313 if (tstate->c_tracefunc != NULL) {
314 tstate->tracing++;
315 result = tstate->c_tracefunc(tstate->c_traceobj,
316 f, code , val);
317 tstate->use_tracing = ((tstate->c_tracefunc != NULL)
318 || (tstate->c_profilefunc != NULL));
319 tstate->tracing--;
320 }
321 return result;
322}
Martin v. Löwis069dde22003-01-21 10:58:18 +0000323#endif
Martin v. Löwis7d6e19d2002-08-04 08:24:49 +0000324
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000325static PyObject*
326call_with_frame(PyCodeObject *c, PyObject* func, PyObject* args)
327{
Fred Drakebd6101c2001-02-14 18:29:45 +0000328 PyThreadState *tstate = PyThreadState_GET();
329 PyFrameObject *f;
330 PyObject *res;
331
332 if (c == NULL)
333 return NULL;
Martin v. Löwis7d6e19d2002-08-04 08:24:49 +0000334
Fred Drakebd6101c2001-02-14 18:29:45 +0000335 f = PyFrame_New(
336 tstate, /*back*/
337 c, /*code*/
Michael W. Hudson019a78e2002-11-08 12:53:11 +0000338 PyEval_GetGlobals(), /*globals*/
Fred Drakebd6101c2001-02-14 18:29:45 +0000339 NULL /*locals*/
Fred Drakebd6101c2001-02-14 18:29:45 +0000340 );
341 if (f == NULL)
342 return NULL;
343 tstate->frame = f;
Martin v. Löwis069dde22003-01-21 10:58:18 +0000344#ifndef NOFIX_TRACE
Martin v. Löwis7d6e19d2002-08-04 08:24:49 +0000345 if (trace_frame(tstate, f, PyTrace_CALL, Py_None)) {
346 Py_DECREF(f);
347 return NULL;
348 }
Martin v. Löwis069dde22003-01-21 10:58:18 +0000349#endif
Fred Drakebd6101c2001-02-14 18:29:45 +0000350 res = PyEval_CallObject(func, args);
351 if (res == NULL && tstate->curexc_traceback == NULL)
352 PyTraceBack_Here(f);
Martin v. Löwis069dde22003-01-21 10:58:18 +0000353#ifndef NOFIX_TRACE
Martin v. Löwis7d6e19d2002-08-04 08:24:49 +0000354 else {
355 if (trace_frame(tstate, f, PyTrace_RETURN, res)) {
356 Py_XDECREF(res);
357 res = NULL;
358 }
359 }
Martin v. Löwis069dde22003-01-21 10:58:18 +0000360#endif
Fred Drakebd6101c2001-02-14 18:29:45 +0000361 tstate->frame = f->f_back;
362 Py_DECREF(f);
363 return res;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000364}
365
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000366#ifndef Py_USING_UNICODE
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000367#define STRING_CONV_FUNC conv_string_to_utf8
368#else
Martin v. Löwis069dde22003-01-21 10:58:18 +0000369/* Python 2.0 and later versions, when built with Unicode support */
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000370#define STRING_CONV_FUNC (self->returns_unicode \
371 ? conv_string_to_unicode : conv_string_to_utf8)
372#endif
Guido van Rossum5961f5a2000-03-31 16:18:11 +0000373
Fred Drakeb91a36b2002-06-27 19:40:48 +0000374static PyObject*
375string_intern(xmlparseobject *self, const char* str)
376{
377 PyObject *result = STRING_CONV_FUNC(str);
378 PyObject *value;
379 if (!self->intern)
380 return result;
381 value = PyDict_GetItem(self->intern, result);
382 if (!value) {
383 if (PyDict_SetItem(self->intern, result, result) == 0)
384 return result;
385 else
386 return NULL;
387 }
388 Py_INCREF(value);
389 Py_DECREF(result);
390 return value;
391}
392
Fred Drake2a3d7db2002-06-28 22:56:48 +0000393/* Return 0 on success, -1 on exception.
394 * flag_error() will be called before return if needed.
395 */
396static int
397call_character_handler(xmlparseobject *self, const XML_Char *buffer, int len)
398{
399 PyObject *args;
400 PyObject *temp;
401
402 args = PyTuple_New(1);
403 if (args == NULL)
404 return -1;
405#ifdef Py_USING_UNICODE
406 temp = (self->returns_unicode
407 ? conv_string_len_to_unicode(buffer, len)
408 : conv_string_len_to_utf8(buffer, len));
409#else
410 temp = conv_string_len_to_utf8(buffer, len);
411#endif
412 if (temp == NULL) {
413 Py_DECREF(args);
414 flag_error(self);
415 return -1;
416 }
417 PyTuple_SET_ITEM(args, 0, temp);
418 /* temp is now a borrowed reference; consider it unused. */
419 self->in_callback = 1;
420 temp = call_with_frame(getcode(CharacterData, "CharacterData", __LINE__),
421 self->handlers[CharacterData], args);
422 /* temp is an owned reference again, or NULL */
423 self->in_callback = 0;
424 Py_DECREF(args);
425 if (temp == NULL) {
426 flag_error(self);
427 return -1;
428 }
429 Py_DECREF(temp);
430 return 0;
431}
432
433static int
434flush_character_buffer(xmlparseobject *self)
435{
436 int rc;
437 if (self->buffer == NULL || self->buffer_used == 0)
438 return 0;
439 rc = call_character_handler(self, self->buffer, self->buffer_used);
440 self->buffer_used = 0;
441 return rc;
442}
443
444static void
445my_CharacterDataHandler(void *userData, const XML_Char *data, int len)
446{
447 xmlparseobject *self = (xmlparseobject *) userData;
448 if (self->buffer == NULL)
449 call_character_handler(self, data, len);
450 else {
451 if ((self->buffer_used + len) > self->buffer_size) {
452 if (flush_character_buffer(self) < 0)
453 return;
454 /* handler might have changed; drop the rest on the floor
455 * if there isn't a handler anymore
456 */
457 if (!have_handler(self, CharacterData))
458 return;
459 }
460 if (len > self->buffer_size) {
461 call_character_handler(self, data, len);
462 self->buffer_used = 0;
463 }
464 else {
465 memcpy(self->buffer + self->buffer_used,
466 data, len * sizeof(XML_Char));
467 self->buffer_used += len;
468 }
469 }
470}
471
Fred Drake85d835f2001-02-08 15:39:08 +0000472static void
473my_StartElementHandler(void *userData,
Fred Drake71b63ff2002-06-28 22:29:01 +0000474 const XML_Char *name, const XML_Char *atts[])
Fred Drake85d835f2001-02-08 15:39:08 +0000475{
476 xmlparseobject *self = (xmlparseobject *)userData;
477
Fred Drake71b63ff2002-06-28 22:29:01 +0000478 if (have_handler(self, StartElement)) {
Fred Drake85d835f2001-02-08 15:39:08 +0000479 PyObject *container, *rv, *args;
480 int i, max;
481
Fred Drake2a3d7db2002-06-28 22:56:48 +0000482 if (flush_character_buffer(self) < 0)
483 return;
Fred Drake85d835f2001-02-08 15:39:08 +0000484 /* Set max to the number of slots filled in atts[]; max/2 is
485 * the number of attributes we need to process.
486 */
487 if (self->specified_attributes) {
488 max = XML_GetSpecifiedAttributeCount(self->itself);
489 }
490 else {
491 max = 0;
492 while (atts[max] != NULL)
493 max += 2;
494 }
495 /* Build the container. */
496 if (self->ordered_attributes)
497 container = PyList_New(max);
498 else
499 container = PyDict_New();
500 if (container == NULL) {
501 flag_error(self);
502 return;
503 }
504 for (i = 0; i < max; i += 2) {
Fred Drakeb91a36b2002-06-27 19:40:48 +0000505 PyObject *n = string_intern(self, (XML_Char *) atts[i]);
Fred Drake85d835f2001-02-08 15:39:08 +0000506 PyObject *v;
507 if (n == NULL) {
508 flag_error(self);
509 Py_DECREF(container);
510 return;
511 }
512 v = STRING_CONV_FUNC((XML_Char *) atts[i+1]);
513 if (v == NULL) {
514 flag_error(self);
515 Py_DECREF(container);
516 Py_DECREF(n);
517 return;
518 }
519 if (self->ordered_attributes) {
520 PyList_SET_ITEM(container, i, n);
521 PyList_SET_ITEM(container, i+1, v);
522 }
523 else if (PyDict_SetItem(container, n, v)) {
524 flag_error(self);
525 Py_DECREF(n);
526 Py_DECREF(v);
527 return;
528 }
529 else {
530 Py_DECREF(n);
531 Py_DECREF(v);
532 }
533 }
Fred Drakeb91a36b2002-06-27 19:40:48 +0000534 args = Py_BuildValue("(NN)", string_intern(self, name), container);
Fred Drake85d835f2001-02-08 15:39:08 +0000535 if (args == NULL) {
536 Py_DECREF(container);
537 return;
538 }
539 /* Container is now a borrowed reference; ignore it. */
Fred Drakebd6101c2001-02-14 18:29:45 +0000540 self->in_callback = 1;
541 rv = call_with_frame(getcode(StartElement, "StartElement", __LINE__),
Fred Drake85d835f2001-02-08 15:39:08 +0000542 self->handlers[StartElement], args);
Fred Drakebd6101c2001-02-14 18:29:45 +0000543 self->in_callback = 0;
544 Py_DECREF(args);
Fred Drake85d835f2001-02-08 15:39:08 +0000545 if (rv == NULL) {
546 flag_error(self);
547 return;
Fred Drakebd6101c2001-02-14 18:29:45 +0000548 }
Fred Drake85d835f2001-02-08 15:39:08 +0000549 Py_DECREF(rv);
550 }
551}
552
553#define RC_HANDLER(RC, NAME, PARAMS, INIT, PARAM_FORMAT, CONVERSION, \
554 RETURN, GETUSERDATA) \
555static RC \
556my_##NAME##Handler PARAMS {\
557 xmlparseobject *self = GETUSERDATA ; \
558 PyObject *args = NULL; \
559 PyObject *rv = NULL; \
560 INIT \
561\
Fred Drake71b63ff2002-06-28 22:29:01 +0000562 if (have_handler(self, NAME)) { \
Fred Drake2a3d7db2002-06-28 22:56:48 +0000563 if (flush_character_buffer(self) < 0) \
564 return RETURN; \
Fred Drake85d835f2001-02-08 15:39:08 +0000565 args = Py_BuildValue PARAM_FORMAT ;\
Martin v. Löwis1d7c55f2001-11-10 13:57:55 +0000566 if (!args) { flag_error(self); return RETURN;} \
Fred Drakebd6101c2001-02-14 18:29:45 +0000567 self->in_callback = 1; \
Fred Drake85d835f2001-02-08 15:39:08 +0000568 rv = call_with_frame(getcode(NAME,#NAME,__LINE__), \
569 self->handlers[NAME], args); \
Fred Drakebd6101c2001-02-14 18:29:45 +0000570 self->in_callback = 0; \
Fred Drake85d835f2001-02-08 15:39:08 +0000571 Py_DECREF(args); \
572 if (rv == NULL) { \
573 flag_error(self); \
574 return RETURN; \
575 } \
576 CONVERSION \
577 Py_DECREF(rv); \
578 } \
579 return RETURN; \
580}
581
Fred Drake6f987622000-08-25 18:03:30 +0000582#define VOID_HANDLER(NAME, PARAMS, PARAM_FORMAT) \
583 RC_HANDLER(void, NAME, PARAMS, ;, PARAM_FORMAT, ;, ;,\
584 (xmlparseobject *)userData)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000585
Fred Drake6f987622000-08-25 18:03:30 +0000586#define INT_HANDLER(NAME, PARAMS, PARAM_FORMAT)\
587 RC_HANDLER(int, NAME, PARAMS, int rc=0;, PARAM_FORMAT, \
588 rc = PyInt_AsLong(rv);, rc, \
589 (xmlparseobject *)userData)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000590
Fred Drake71b63ff2002-06-28 22:29:01 +0000591VOID_HANDLER(EndElement,
592 (void *userData, const XML_Char *name),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000593 ("(N)", string_intern(self, name)))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000594
Fred Drake6f987622000-08-25 18:03:30 +0000595VOID_HANDLER(ProcessingInstruction,
Fred Drake71b63ff2002-06-28 22:29:01 +0000596 (void *userData,
597 const XML_Char *target,
Fred Drake85d835f2001-02-08 15:39:08 +0000598 const XML_Char *data),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000599 ("(NO&)", string_intern(self, target), STRING_CONV_FUNC,data))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000600
Fred Drake6f987622000-08-25 18:03:30 +0000601VOID_HANDLER(UnparsedEntityDecl,
Fred Drake71b63ff2002-06-28 22:29:01 +0000602 (void *userData,
Fred Drake85d835f2001-02-08 15:39:08 +0000603 const XML_Char *entityName,
604 const XML_Char *base,
605 const XML_Char *systemId,
606 const XML_Char *publicId,
607 const XML_Char *notationName),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000608 ("(NNNNN)",
Fred Drake71b63ff2002-06-28 22:29:01 +0000609 string_intern(self, entityName), string_intern(self, base),
610 string_intern(self, systemId), string_intern(self, publicId),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000611 string_intern(self, notationName)))
Fred Drake85d835f2001-02-08 15:39:08 +0000612
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000613#ifndef Py_USING_UNICODE
Fred Drake85d835f2001-02-08 15:39:08 +0000614VOID_HANDLER(EntityDecl,
615 (void *userData,
616 const XML_Char *entityName,
617 int is_parameter_entity,
618 const XML_Char *value,
619 int value_length,
620 const XML_Char *base,
621 const XML_Char *systemId,
622 const XML_Char *publicId,
623 const XML_Char *notationName),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000624 ("NiNNNNN",
625 string_intern(self, entityName), is_parameter_entity,
Fred Drake85d835f2001-02-08 15:39:08 +0000626 conv_string_len_to_utf8(value, value_length),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000627 string_intern(self, base), string_intern(self, systemId),
628 string_intern(self, publicId),
629 string_intern(self, notationName)))
Fred Drake85d835f2001-02-08 15:39:08 +0000630#else
631VOID_HANDLER(EntityDecl,
632 (void *userData,
633 const XML_Char *entityName,
634 int is_parameter_entity,
635 const XML_Char *value,
636 int value_length,
637 const XML_Char *base,
638 const XML_Char *systemId,
639 const XML_Char *publicId,
640 const XML_Char *notationName),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000641 ("NiNNNNN",
642 string_intern(self, entityName), is_parameter_entity,
Fred Drake71b63ff2002-06-28 22:29:01 +0000643 (self->returns_unicode
644 ? conv_string_len_to_unicode(value, value_length)
Fred Drake85d835f2001-02-08 15:39:08 +0000645 : conv_string_len_to_utf8(value, value_length)),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000646 string_intern(self, base), string_intern(self, systemId),
647 string_intern(self, publicId),
648 string_intern(self, notationName)))
Fred Drake85d835f2001-02-08 15:39:08 +0000649#endif
650
651VOID_HANDLER(XmlDecl,
652 (void *userData,
653 const XML_Char *version,
654 const XML_Char *encoding,
655 int standalone),
656 ("(O&O&i)",
Fred Drake71b63ff2002-06-28 22:29:01 +0000657 STRING_CONV_FUNC,version, STRING_CONV_FUNC,encoding,
Fred Drake85d835f2001-02-08 15:39:08 +0000658 standalone))
659
660static PyObject *
661conv_content_model(XML_Content * const model,
Fred Drakeb91a36b2002-06-27 19:40:48 +0000662 PyObject *(*conv_string)(const XML_Char *))
Fred Drake85d835f2001-02-08 15:39:08 +0000663{
664 PyObject *result = NULL;
665 PyObject *children = PyTuple_New(model->numchildren);
666 int i;
667
668 if (children != NULL) {
Tim Peters9544fc52001-07-28 09:36:36 +0000669 assert(model->numchildren < INT_MAX);
670 for (i = 0; i < (int)model->numchildren; ++i) {
Fred Drake85d835f2001-02-08 15:39:08 +0000671 PyObject *child = conv_content_model(&model->children[i],
672 conv_string);
673 if (child == NULL) {
674 Py_XDECREF(children);
675 return NULL;
676 }
677 PyTuple_SET_ITEM(children, i, child);
678 }
679 result = Py_BuildValue("(iiO&N)",
680 model->type, model->quant,
681 conv_string,model->name, children);
682 }
683 return result;
684}
685
686static PyObject *
687conv_content_model_utf8(XML_Content * const model)
688{
689 return conv_content_model(model, conv_string_to_utf8);
690}
691
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000692#ifdef Py_USING_UNICODE
Fred Drake85d835f2001-02-08 15:39:08 +0000693static PyObject *
694conv_content_model_unicode(XML_Content * const model)
695{
696 return conv_content_model(model, conv_string_to_unicode);
697}
698
699VOID_HANDLER(ElementDecl,
700 (void *userData,
701 const XML_Char *name,
702 XML_Content *model),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000703 ("NO&",
704 string_intern(self, name),
Fred Drake85d835f2001-02-08 15:39:08 +0000705 (self->returns_unicode ? conv_content_model_unicode
706 : conv_content_model_utf8),model))
707#else
708VOID_HANDLER(ElementDecl,
709 (void *userData,
710 const XML_Char *name,
711 XML_Content *model),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000712 ("NO&",
713 string_intern(self, name), conv_content_model_utf8,model))
Fred Drake85d835f2001-02-08 15:39:08 +0000714#endif
715
716VOID_HANDLER(AttlistDecl,
717 (void *userData,
718 const XML_Char *elname,
719 const XML_Char *attname,
720 const XML_Char *att_type,
721 const XML_Char *dflt,
722 int isrequired),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000723 ("(NNO&O&i)",
724 string_intern(self, elname), string_intern(self, attname),
Fred Drake85d835f2001-02-08 15:39:08 +0000725 STRING_CONV_FUNC,att_type, STRING_CONV_FUNC,dflt,
726 isrequired))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000727
Martin v. Löwisc847f402003-01-21 11:09:21 +0000728#if XML_COMBINED_VERSION >= 19504
Martin v. Löwis069dde22003-01-21 10:58:18 +0000729VOID_HANDLER(SkippedEntity,
730 (void *userData,
731 const XML_Char *entityName,
732 int is_parameter_entity),
733 ("Ni",
734 string_intern(self, entityName), is_parameter_entity))
Martin v. Löwisc847f402003-01-21 11:09:21 +0000735#endif
Martin v. Löwis069dde22003-01-21 10:58:18 +0000736
Fred Drake71b63ff2002-06-28 22:29:01 +0000737VOID_HANDLER(NotationDecl,
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000738 (void *userData,
739 const XML_Char *notationName,
740 const XML_Char *base,
741 const XML_Char *systemId,
742 const XML_Char *publicId),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000743 ("(NNNN)",
Fred Drake71b63ff2002-06-28 22:29:01 +0000744 string_intern(self, notationName), string_intern(self, base),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000745 string_intern(self, systemId), string_intern(self, publicId)))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000746
Fred Drake6f987622000-08-25 18:03:30 +0000747VOID_HANDLER(StartNamespaceDecl,
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000748 (void *userData,
749 const XML_Char *prefix,
750 const XML_Char *uri),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000751 ("(NN)",
752 string_intern(self, prefix), string_intern(self, uri)))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000753
Fred Drake6f987622000-08-25 18:03:30 +0000754VOID_HANDLER(EndNamespaceDecl,
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000755 (void *userData,
756 const XML_Char *prefix),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000757 ("(N)", string_intern(self, prefix)))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000758
Fred Drake6f987622000-08-25 18:03:30 +0000759VOID_HANDLER(Comment,
Fred Drakeb91a36b2002-06-27 19:40:48 +0000760 (void *userData, const XML_Char *data),
761 ("(O&)", STRING_CONV_FUNC,data))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000762
Fred Drake6f987622000-08-25 18:03:30 +0000763VOID_HANDLER(StartCdataSection,
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000764 (void *userData),
Fred Drake6f987622000-08-25 18:03:30 +0000765 ("()"))
Fred Drake71b63ff2002-06-28 22:29:01 +0000766
Fred Drake6f987622000-08-25 18:03:30 +0000767VOID_HANDLER(EndCdataSection,
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000768 (void *userData),
Fred Drake6f987622000-08-25 18:03:30 +0000769 ("()"))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000770
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000771#ifndef Py_USING_UNICODE
Fred Drake6f987622000-08-25 18:03:30 +0000772VOID_HANDLER(Default,
Fred Drake71b63ff2002-06-28 22:29:01 +0000773 (void *userData, const XML_Char *s, int len),
Fred Drakeca1f4262000-09-21 20:10:23 +0000774 ("(N)", conv_string_len_to_utf8(s,len)))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000775
Fred Drake6f987622000-08-25 18:03:30 +0000776VOID_HANDLER(DefaultHandlerExpand,
Fred Drake71b63ff2002-06-28 22:29:01 +0000777 (void *userData, const XML_Char *s, int len),
Fred Drakeca1f4262000-09-21 20:10:23 +0000778 ("(N)", conv_string_len_to_utf8(s,len)))
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000779#else
Fred Drake6f987622000-08-25 18:03:30 +0000780VOID_HANDLER(Default,
Fred Drake71b63ff2002-06-28 22:29:01 +0000781 (void *userData, const XML_Char *s, int len),
782 ("(N)", (self->returns_unicode
783 ? conv_string_len_to_unicode(s,len)
Fred Drake6f987622000-08-25 18:03:30 +0000784 : conv_string_len_to_utf8(s,len))))
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000785
Fred Drake6f987622000-08-25 18:03:30 +0000786VOID_HANDLER(DefaultHandlerExpand,
Fred Drake71b63ff2002-06-28 22:29:01 +0000787 (void *userData, const XML_Char *s, int len),
788 ("(N)", (self->returns_unicode
789 ? conv_string_len_to_unicode(s,len)
Fred Drake6f987622000-08-25 18:03:30 +0000790 : conv_string_len_to_utf8(s,len))))
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000791#endif
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000792
Fred Drake71b63ff2002-06-28 22:29:01 +0000793INT_HANDLER(NotStandalone,
794 (void *userData),
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000795 ("()"))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000796
Fred Drake6f987622000-08-25 18:03:30 +0000797RC_HANDLER(int, ExternalEntityRef,
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000798 (XML_Parser parser,
799 const XML_Char *context,
800 const XML_Char *base,
801 const XML_Char *systemId,
802 const XML_Char *publicId),
803 int rc=0;,
Fred Drakeb91a36b2002-06-27 19:40:48 +0000804 ("(O&NNN)",
Fred Drake71b63ff2002-06-28 22:29:01 +0000805 STRING_CONV_FUNC,context, string_intern(self, base),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000806 string_intern(self, systemId), string_intern(self, publicId)),
Fred Drake6f987622000-08-25 18:03:30 +0000807 rc = PyInt_AsLong(rv);, rc,
808 XML_GetUserData(parser))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000809
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000810/* XXX UnknownEncodingHandler */
811
Fred Drake85d835f2001-02-08 15:39:08 +0000812VOID_HANDLER(StartDoctypeDecl,
813 (void *userData, const XML_Char *doctypeName,
814 const XML_Char *sysid, const XML_Char *pubid,
815 int has_internal_subset),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000816 ("(NNNi)", string_intern(self, doctypeName),
817 string_intern(self, sysid), string_intern(self, pubid),
Fred Drake85d835f2001-02-08 15:39:08 +0000818 has_internal_subset))
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000819
820VOID_HANDLER(EndDoctypeDecl, (void *userData), ("()"))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000821
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000822/* ---------------------------------------------------------------- */
823
Fred Drake71b63ff2002-06-28 22:29:01 +0000824static PyObject *
825get_parse_result(xmlparseobject *self, int rv)
826{
827 if (PyErr_Occurred()) {
828 return NULL;
829 }
830 if (rv == 0) {
Martin v. Löwis069dde22003-01-21 10:58:18 +0000831 return set_error(self, XML_GetErrorCode(self->itself));
Fred Drake71b63ff2002-06-28 22:29:01 +0000832 }
Fred Drake2a3d7db2002-06-28 22:56:48 +0000833 if (flush_character_buffer(self) < 0) {
834 return NULL;
835 }
Fred Drake71b63ff2002-06-28 22:29:01 +0000836 return PyInt_FromLong(rv);
837}
838
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000839PyDoc_STRVAR(xmlparse_Parse__doc__,
Thomas Wouters35317302000-07-22 16:34:15 +0000840"Parse(data[, isfinal])\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000841Parse XML data. `isfinal' should be true at end of input.");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000842
843static PyObject *
Fred Drake0582df92000-07-12 04:49:00 +0000844xmlparse_Parse(xmlparseobject *self, PyObject *args)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000845{
Fred Drake0582df92000-07-12 04:49:00 +0000846 char *s;
847 int slen;
848 int isFinal = 0;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000849
Fred Drake0582df92000-07-12 04:49:00 +0000850 if (!PyArg_ParseTuple(args, "s#|i:Parse", &s, &slen, &isFinal))
851 return NULL;
Fred Drake71b63ff2002-06-28 22:29:01 +0000852
853 return get_parse_result(self, XML_Parse(self->itself, s, slen, isFinal));
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000854}
855
Fred Drakeca1f4262000-09-21 20:10:23 +0000856/* File reading copied from cPickle */
857
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000858#define BUF_SIZE 2048
859
Fred Drake0582df92000-07-12 04:49:00 +0000860static int
861readinst(char *buf, int buf_size, PyObject *meth)
862{
863 PyObject *arg = NULL;
864 PyObject *bytes = NULL;
865 PyObject *str = NULL;
866 int len = -1;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000867
Fred Drake676940b2000-09-22 15:21:31 +0000868 if ((bytes = PyInt_FromLong(buf_size)) == NULL)
Fred Drake0582df92000-07-12 04:49:00 +0000869 goto finally;
Fred Drake676940b2000-09-22 15:21:31 +0000870
Fred Drakeca1f4262000-09-21 20:10:23 +0000871 if ((arg = PyTuple_New(1)) == NULL)
Fred Drake0582df92000-07-12 04:49:00 +0000872 goto finally;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000873
Tim Peters954eef72000-09-22 06:01:11 +0000874 PyTuple_SET_ITEM(arg, 0, bytes);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000875
Guido van Rossum84b2bed2002-08-16 17:01:09 +0000876 if ((str = PyObject_Call(meth, arg, NULL)) == NULL)
Fred Drake0582df92000-07-12 04:49:00 +0000877 goto finally;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000878
Fred Drake0582df92000-07-12 04:49:00 +0000879 /* XXX what to do if it returns a Unicode string? */
Fred Drakeca1f4262000-09-21 20:10:23 +0000880 if (!PyString_Check(str)) {
Fred Drake71b63ff2002-06-28 22:29:01 +0000881 PyErr_Format(PyExc_TypeError,
Fred Drake0582df92000-07-12 04:49:00 +0000882 "read() did not return a string object (type=%.400s)",
883 str->ob_type->tp_name);
884 goto finally;
885 }
886 len = PyString_GET_SIZE(str);
887 if (len > buf_size) {
888 PyErr_Format(PyExc_ValueError,
889 "read() returned too much data: "
890 "%i bytes requested, %i returned",
891 buf_size, len);
892 Py_DECREF(str);
893 goto finally;
894 }
895 memcpy(buf, PyString_AsString(str), len);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000896finally:
Fred Drake0582df92000-07-12 04:49:00 +0000897 Py_XDECREF(arg);
Fred Drakeca1f4262000-09-21 20:10:23 +0000898 Py_XDECREF(str);
Fred Drake0582df92000-07-12 04:49:00 +0000899 return len;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000900}
901
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000902PyDoc_STRVAR(xmlparse_ParseFile__doc__,
Thomas Wouters35317302000-07-22 16:34:15 +0000903"ParseFile(file)\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000904Parse XML data from file-like object.");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000905
906static PyObject *
Fred Drake0582df92000-07-12 04:49:00 +0000907xmlparse_ParseFile(xmlparseobject *self, PyObject *args)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000908{
Fred Drake0582df92000-07-12 04:49:00 +0000909 int rv = 1;
910 PyObject *f;
911 FILE *fp;
912 PyObject *readmethod = NULL;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000913
Fred Drake0582df92000-07-12 04:49:00 +0000914 if (!PyArg_ParseTuple(args, "O:ParseFile", &f))
915 return NULL;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000916
Fred Drake0582df92000-07-12 04:49:00 +0000917 if (PyFile_Check(f)) {
918 fp = PyFile_AsFile(f);
919 }
920 else{
921 fp = NULL;
Fred Drakeca1f4262000-09-21 20:10:23 +0000922 readmethod = PyObject_GetAttrString(f, "read");
923 if (readmethod == NULL) {
Fred Drake0582df92000-07-12 04:49:00 +0000924 PyErr_Clear();
Fred Drake71b63ff2002-06-28 22:29:01 +0000925 PyErr_SetString(PyExc_TypeError,
Fred Drake0582df92000-07-12 04:49:00 +0000926 "argument must have 'read' attribute");
Fred Drake814f9fe2002-07-19 22:03:03 +0000927 return NULL;
Fred Drake0582df92000-07-12 04:49:00 +0000928 }
929 }
930 for (;;) {
931 int bytes_read;
932 void *buf = XML_GetBuffer(self->itself, BUF_SIZE);
933 if (buf == NULL)
934 return PyErr_NoMemory();
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000935
Fred Drake0582df92000-07-12 04:49:00 +0000936 if (fp) {
937 bytes_read = fread(buf, sizeof(char), BUF_SIZE, fp);
938 if (bytes_read < 0) {
939 PyErr_SetFromErrno(PyExc_IOError);
940 return NULL;
941 }
942 }
943 else {
944 bytes_read = readinst(buf, BUF_SIZE, readmethod);
945 if (bytes_read < 0)
946 return NULL;
947 }
948 rv = XML_ParseBuffer(self->itself, bytes_read, bytes_read == 0);
949 if (PyErr_Occurred())
950 return NULL;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000951
Fred Drake0582df92000-07-12 04:49:00 +0000952 if (!rv || bytes_read == 0)
953 break;
954 }
Fred Drake71b63ff2002-06-28 22:29:01 +0000955 return get_parse_result(self, rv);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000956}
957
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000958PyDoc_STRVAR(xmlparse_SetBase__doc__,
Thomas Wouters35317302000-07-22 16:34:15 +0000959"SetBase(base_url)\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000960Set the base URL for the parser.");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000961
962static PyObject *
Fred Drake0582df92000-07-12 04:49:00 +0000963xmlparse_SetBase(xmlparseobject *self, PyObject *args)
964{
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000965 char *base;
966
Fred Drake0582df92000-07-12 04:49:00 +0000967 if (!PyArg_ParseTuple(args, "s:SetBase", &base))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000968 return NULL;
Fred Drake0582df92000-07-12 04:49:00 +0000969 if (!XML_SetBase(self->itself, base)) {
970 return PyErr_NoMemory();
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000971 }
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000972 Py_INCREF(Py_None);
973 return Py_None;
974}
975
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000976PyDoc_STRVAR(xmlparse_GetBase__doc__,
Thomas Wouters35317302000-07-22 16:34:15 +0000977"GetBase() -> url\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000978Return base URL string for the parser.");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000979
980static PyObject *
Fred Drake0582df92000-07-12 04:49:00 +0000981xmlparse_GetBase(xmlparseobject *self, PyObject *args)
982{
983 if (!PyArg_ParseTuple(args, ":GetBase"))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000984 return NULL;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000985
Fred Drake0582df92000-07-12 04:49:00 +0000986 return Py_BuildValue("z", XML_GetBase(self->itself));
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000987}
988
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000989PyDoc_STRVAR(xmlparse_GetInputContext__doc__,
Fred Drakebd6101c2001-02-14 18:29:45 +0000990"GetInputContext() -> string\n\
991Return the untranslated text of the input that caused the current event.\n\
992If the event was generated by a large amount of text (such as a start tag\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000993for an element with many attributes), not all of the text may be available.");
Fred Drakebd6101c2001-02-14 18:29:45 +0000994
995static PyObject *
996xmlparse_GetInputContext(xmlparseobject *self, PyObject *args)
997{
998 PyObject *result = NULL;
999
1000 if (PyArg_ParseTuple(args, ":GetInputContext")) {
1001 if (self->in_callback) {
1002 int offset, size;
1003 const char *buffer
1004 = XML_GetInputContext(self->itself, &offset, &size);
1005
1006 if (buffer != NULL)
1007 result = PyString_FromStringAndSize(buffer + offset, size);
1008 else {
1009 result = Py_None;
1010 Py_INCREF(result);
1011 }
1012 }
1013 else {
1014 result = Py_None;
1015 Py_INCREF(result);
1016 }
1017 }
1018 return result;
1019}
Fred Drakebd6101c2001-02-14 18:29:45 +00001020
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001021PyDoc_STRVAR(xmlparse_ExternalEntityParserCreate__doc__,
Fred Drake2d4ac202001-01-03 15:36:25 +00001022"ExternalEntityParserCreate(context[, encoding])\n\
Tim Peters51dc9682000-09-24 22:12:45 +00001023Create a parser for parsing an external entity based on the\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001024information passed to the ExternalEntityRefHandler.");
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001025
1026static PyObject *
1027xmlparse_ExternalEntityParserCreate(xmlparseobject *self, PyObject *args)
1028{
1029 char *context;
1030 char *encoding = NULL;
1031 xmlparseobject *new_parser;
1032 int i;
1033
Martin v. Löwisc57428d2001-09-19 09:55:09 +00001034 if (!PyArg_ParseTuple(args, "z|s:ExternalEntityParserCreate",
Fred Drakecde79132001-04-25 16:01:30 +00001035 &context, &encoding)) {
1036 return NULL;
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001037 }
1038
Martin v. Löwis894258c2001-09-23 10:20:10 +00001039#ifndef Py_TPFLAGS_HAVE_GC
Martin v. Löwisb4fcf4d2002-06-30 06:40:55 +00001040 /* Python versions 2.0 and 2.1 */
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001041 new_parser = PyObject_New(xmlparseobject, &Xmlparsetype);
Martin v. Löwis894258c2001-09-23 10:20:10 +00001042#else
1043 /* Python versions 2.2 and later */
1044 new_parser = PyObject_GC_New(xmlparseobject, &Xmlparsetype);
1045#endif
Fred Drake85d835f2001-02-08 15:39:08 +00001046
1047 if (new_parser == NULL)
1048 return NULL;
Fred Drake2a3d7db2002-06-28 22:56:48 +00001049 new_parser->buffer_size = self->buffer_size;
1050 new_parser->buffer_used = 0;
1051 if (self->buffer != NULL) {
1052 new_parser->buffer = malloc(new_parser->buffer_size);
1053 if (new_parser->buffer == NULL) {
Fred Drakeb28467b2002-07-02 15:44:36 +00001054#ifndef Py_TPFLAGS_HAVE_GC
1055 /* Code for versions 2.0 and 2.1 */
1056 PyObject_Del(new_parser);
1057#else
1058 /* Code for versions 2.2 and later. */
Fred Drake2a3d7db2002-06-28 22:56:48 +00001059 PyObject_GC_Del(new_parser);
Fred Drakeb28467b2002-07-02 15:44:36 +00001060#endif
Fred Drake2a3d7db2002-06-28 22:56:48 +00001061 return PyErr_NoMemory();
1062 }
1063 }
1064 else
1065 new_parser->buffer = NULL;
Fred Drake85d835f2001-02-08 15:39:08 +00001066 new_parser->returns_unicode = self->returns_unicode;
1067 new_parser->ordered_attributes = self->ordered_attributes;
1068 new_parser->specified_attributes = self->specified_attributes;
Fred Drakebd6101c2001-02-14 18:29:45 +00001069 new_parser->in_callback = 0;
Martin v. Löwis069dde22003-01-21 10:58:18 +00001070 new_parser->ns_prefixes = self->ns_prefixes;
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001071 new_parser->itself = XML_ExternalEntityParserCreate(self->itself, context,
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001072 encoding);
1073 new_parser->handlers = 0;
Fred Drakeb91a36b2002-06-27 19:40:48 +00001074 new_parser->intern = self->intern;
1075 Py_XINCREF(new_parser->intern);
Martin v. Löwis894258c2001-09-23 10:20:10 +00001076#ifdef Py_TPFLAGS_HAVE_GC
1077 PyObject_GC_Track(new_parser);
1078#else
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001079 PyObject_GC_Init(new_parser);
Martin v. Löwis894258c2001-09-23 10:20:10 +00001080#endif
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001081
1082 if (!new_parser->itself) {
Fred Drake85d835f2001-02-08 15:39:08 +00001083 Py_DECREF(new_parser);
1084 return PyErr_NoMemory();
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001085 }
1086
1087 XML_SetUserData(new_parser->itself, (void *)new_parser);
1088
1089 /* allocate and clear handlers first */
Fred Drake2a3d7db2002-06-28 22:56:48 +00001090 for (i = 0; handler_info[i].name != NULL; i++)
Fred Drake85d835f2001-02-08 15:39:08 +00001091 /* do nothing */;
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001092
Fred Drake2a3d7db2002-06-28 22:56:48 +00001093 new_parser->handlers = malloc(sizeof(PyObject *) * i);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001094 if (!new_parser->handlers) {
Fred Drake85d835f2001-02-08 15:39:08 +00001095 Py_DECREF(new_parser);
1096 return PyErr_NoMemory();
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001097 }
Martin v. Löwis5b68ce32001-10-21 08:53:52 +00001098 clear_handlers(new_parser, 1);
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001099
1100 /* then copy handlers from self */
1101 for (i = 0; handler_info[i].name != NULL; i++) {
Fred Drake71b63ff2002-06-28 22:29:01 +00001102 PyObject *handler = self->handlers[i];
1103 if (handler != NULL) {
1104 Py_INCREF(handler);
1105 new_parser->handlers[i] = handler;
1106 handler_info[i].setter(new_parser->itself,
Fred Drake85d835f2001-02-08 15:39:08 +00001107 handler_info[i].handler);
1108 }
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001109 }
Fred Drake71b63ff2002-06-28 22:29:01 +00001110 return (PyObject *)new_parser;
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001111}
1112
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001113PyDoc_STRVAR(xmlparse_SetParamEntityParsing__doc__,
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001114"SetParamEntityParsing(flag) -> success\n\
1115Controls parsing of parameter entities (including the external DTD\n\
1116subset). Possible flag values are XML_PARAM_ENTITY_PARSING_NEVER,\n\
1117XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE and\n\
1118XML_PARAM_ENTITY_PARSING_ALWAYS. Returns true if setting the flag\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001119was successful.");
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001120
1121static PyObject*
Fred Drakebd6101c2001-02-14 18:29:45 +00001122xmlparse_SetParamEntityParsing(xmlparseobject *p, PyObject* args)
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001123{
Fred Drake85d835f2001-02-08 15:39:08 +00001124 int flag;
1125 if (!PyArg_ParseTuple(args, "i", &flag))
1126 return NULL;
Fred Drakebd6101c2001-02-14 18:29:45 +00001127 flag = XML_SetParamEntityParsing(p->itself, flag);
Fred Drake85d835f2001-02-08 15:39:08 +00001128 return PyInt_FromLong(flag);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001129}
1130
Martin v. Löwisc847f402003-01-21 11:09:21 +00001131
1132#if XML_COMBINED_VERSION >= 19505
Martin v. Löwis069dde22003-01-21 10:58:18 +00001133PyDoc_STRVAR(xmlparse_UseForeignDTD__doc__,
1134"UseForeignDTD([flag])\n\
1135Allows the application to provide an artificial external subset if one is\n\
1136not specified as part of the document instance. This readily allows the\n\
1137use of a 'default' document type controlled by the application, while still\n\
1138getting the advantage of providing document type information to the parser.\n\
1139'flag' defaults to True if not provided.");
1140
1141static PyObject *
1142xmlparse_UseForeignDTD(xmlparseobject *self, PyObject *args)
1143{
1144 PyObject *flagobj = NULL;
1145 XML_Bool flag = XML_TRUE;
1146 enum XML_Error rc;
1147 if (!PyArg_ParseTuple(args, "|O:UseForeignDTD", &flagobj))
1148 return NULL;
1149 if (flagobj != NULL)
1150 flag = PyObject_IsTrue(flagobj) ? XML_TRUE : XML_FALSE;
1151 rc = XML_UseForeignDTD(self->itself, flag);
1152 if (rc != XML_ERROR_NONE) {
1153 return set_error(self, rc);
1154 }
1155 Py_INCREF(Py_None);
1156 return Py_None;
1157}
Martin v. Löwisc847f402003-01-21 11:09:21 +00001158#endif
Martin v. Löwis069dde22003-01-21 10:58:18 +00001159
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001160static struct PyMethodDef xmlparse_methods[] = {
Fred Drake0582df92000-07-12 04:49:00 +00001161 {"Parse", (PyCFunction)xmlparse_Parse,
Fred Drakebd6101c2001-02-14 18:29:45 +00001162 METH_VARARGS, xmlparse_Parse__doc__},
Fred Drake0582df92000-07-12 04:49:00 +00001163 {"ParseFile", (PyCFunction)xmlparse_ParseFile,
Fred Drakebd6101c2001-02-14 18:29:45 +00001164 METH_VARARGS, xmlparse_ParseFile__doc__},
Fred Drake0582df92000-07-12 04:49:00 +00001165 {"SetBase", (PyCFunction)xmlparse_SetBase,
Martin v. Löwis069dde22003-01-21 10:58:18 +00001166 METH_VARARGS, xmlparse_SetBase__doc__},
Fred Drake0582df92000-07-12 04:49:00 +00001167 {"GetBase", (PyCFunction)xmlparse_GetBase,
Martin v. Löwis069dde22003-01-21 10:58:18 +00001168 METH_VARARGS, xmlparse_GetBase__doc__},
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001169 {"ExternalEntityParserCreate", (PyCFunction)xmlparse_ExternalEntityParserCreate,
Martin v. Löwis069dde22003-01-21 10:58:18 +00001170 METH_VARARGS, xmlparse_ExternalEntityParserCreate__doc__},
Fred Drakebd6101c2001-02-14 18:29:45 +00001171 {"SetParamEntityParsing", (PyCFunction)xmlparse_SetParamEntityParsing,
1172 METH_VARARGS, xmlparse_SetParamEntityParsing__doc__},
Fred Drakebd6101c2001-02-14 18:29:45 +00001173 {"GetInputContext", (PyCFunction)xmlparse_GetInputContext,
1174 METH_VARARGS, xmlparse_GetInputContext__doc__},
Martin v. Löwisc847f402003-01-21 11:09:21 +00001175#if XML_COMBINED_VERSION >= 19505
Martin v. Löwis069dde22003-01-21 10:58:18 +00001176 {"UseForeignDTD", (PyCFunction)xmlparse_UseForeignDTD,
1177 METH_VARARGS, xmlparse_UseForeignDTD__doc__},
Martin v. Löwisc847f402003-01-21 11:09:21 +00001178#endif
Martin v. Löwis069dde22003-01-21 10:58:18 +00001179 {NULL, NULL} /* sentinel */
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001180};
1181
1182/* ---------- */
1183
1184
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001185#ifdef Py_USING_UNICODE
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001186
Fred Drake71b63ff2002-06-28 22:29:01 +00001187/* pyexpat international encoding support.
1188 Make it as simple as possible.
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001189*/
1190
Martin v. Löwis3af7cc02001-01-22 08:19:10 +00001191static char template_buffer[257];
Fred Drakebb66a202001-03-01 20:48:17 +00001192PyObject *template_string = NULL;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001193
Fred Drake71b63ff2002-06-28 22:29:01 +00001194static void
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001195init_template_buffer(void)
1196{
1197 int i;
Fred Drakebb66a202001-03-01 20:48:17 +00001198 for (i = 0; i < 256; i++) {
1199 template_buffer[i] = i;
Tim Peters63cb99e2001-02-17 18:12:50 +00001200 }
Fred Drakebb66a202001-03-01 20:48:17 +00001201 template_buffer[256] = 0;
Tim Peters63cb99e2001-02-17 18:12:50 +00001202}
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001203
Fred Drake71b63ff2002-06-28 22:29:01 +00001204static int
1205PyUnknownEncodingHandler(void *encodingHandlerData,
1206 const XML_Char *name,
1207 XML_Encoding *info)
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001208{
Fred Drakebb66a202001-03-01 20:48:17 +00001209 PyUnicodeObject *_u_string = NULL;
1210 int result = 0;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001211 int i;
Fred Drake71b63ff2002-06-28 22:29:01 +00001212
Fred Drakebb66a202001-03-01 20:48:17 +00001213 /* Yes, supports only 8bit encodings */
1214 _u_string = (PyUnicodeObject *)
1215 PyUnicode_Decode(template_buffer, 256, name, "replace");
Fred Drake71b63ff2002-06-28 22:29:01 +00001216
Fred Drakebb66a202001-03-01 20:48:17 +00001217 if (_u_string == NULL)
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001218 return result;
Fred Drake71b63ff2002-06-28 22:29:01 +00001219
Fred Drakebb66a202001-03-01 20:48:17 +00001220 for (i = 0; i < 256; i++) {
1221 /* Stupid to access directly, but fast */
1222 Py_UNICODE c = _u_string->str[i];
1223 if (c == Py_UNICODE_REPLACEMENT_CHARACTER)
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001224 info->map[i] = -1;
Fred Drakebb66a202001-03-01 20:48:17 +00001225 else
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001226 info->map[i] = c;
Tim Peters63cb99e2001-02-17 18:12:50 +00001227 }
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001228 info->data = NULL;
1229 info->convert = NULL;
1230 info->release = NULL;
Fred Drake71b63ff2002-06-28 22:29:01 +00001231 result = 1;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001232 Py_DECREF(_u_string);
1233 return result;
1234}
1235
1236#endif
1237
1238static PyObject *
Fred Drakeb91a36b2002-06-27 19:40:48 +00001239newxmlparseobject(char *encoding, char *namespace_separator, PyObject *intern)
Fred Drake0582df92000-07-12 04:49:00 +00001240{
1241 int i;
1242 xmlparseobject *self;
Fred Drake71b63ff2002-06-28 22:29:01 +00001243
Martin v. Löwis894258c2001-09-23 10:20:10 +00001244#ifdef Py_TPFLAGS_HAVE_GC
1245 /* Code for versions 2.2 and later */
1246 self = PyObject_GC_New(xmlparseobject, &Xmlparsetype);
1247#else
Fred Drake0582df92000-07-12 04:49:00 +00001248 self = PyObject_New(xmlparseobject, &Xmlparsetype);
Martin v. Löwis894258c2001-09-23 10:20:10 +00001249#endif
Fred Drake0582df92000-07-12 04:49:00 +00001250 if (self == NULL)
1251 return NULL;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001252
Martin v. Löwisb4fcf4d2002-06-30 06:40:55 +00001253#ifdef Py_USING_UNICODE
Fred Drake0582df92000-07-12 04:49:00 +00001254 self->returns_unicode = 1;
Martin v. Löwisb4fcf4d2002-06-30 06:40:55 +00001255#else
1256 self->returns_unicode = 0;
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +00001257#endif
Martin v. Löwisb4fcf4d2002-06-30 06:40:55 +00001258
Fred Drake2a3d7db2002-06-28 22:56:48 +00001259 self->buffer = NULL;
1260 self->buffer_size = CHARACTER_DATA_BUFFER_SIZE;
1261 self->buffer_used = 0;
Fred Drake85d835f2001-02-08 15:39:08 +00001262 self->ordered_attributes = 0;
1263 self->specified_attributes = 0;
Fred Drakebd6101c2001-02-14 18:29:45 +00001264 self->in_callback = 0;
Martin v. Löwis069dde22003-01-21 10:58:18 +00001265 self->ns_prefixes = 0;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001266 self->handlers = NULL;
Fred Drakecde79132001-04-25 16:01:30 +00001267 if (namespace_separator != NULL) {
Fred Drake0582df92000-07-12 04:49:00 +00001268 self->itself = XML_ParserCreateNS(encoding, *namespace_separator);
1269 }
Fred Drake85d835f2001-02-08 15:39:08 +00001270 else {
Fred Drake0582df92000-07-12 04:49:00 +00001271 self->itself = XML_ParserCreate(encoding);
1272 }
Fred Drakeb91a36b2002-06-27 19:40:48 +00001273 self->intern = intern;
1274 Py_XINCREF(self->intern);
Martin v. Löwis894258c2001-09-23 10:20:10 +00001275#ifdef Py_TPFLAGS_HAVE_GC
1276 PyObject_GC_Track(self);
1277#else
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001278 PyObject_GC_Init(self);
Martin v. Löwis894258c2001-09-23 10:20:10 +00001279#endif
Fred Drake0582df92000-07-12 04:49:00 +00001280 if (self->itself == NULL) {
Fred Drake71b63ff2002-06-28 22:29:01 +00001281 PyErr_SetString(PyExc_RuntimeError,
Fred Drake0582df92000-07-12 04:49:00 +00001282 "XML_ParserCreate failed");
1283 Py_DECREF(self);
1284 return NULL;
1285 }
1286 XML_SetUserData(self->itself, (void *)self);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001287#ifdef Py_USING_UNICODE
Fred Drake7c75bf22002-07-01 14:02:31 +00001288 XML_SetUnknownEncodingHandler(self->itself,
1289 (XML_UnknownEncodingHandler) PyUnknownEncodingHandler, NULL);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001290#endif
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001291
Fred Drake2a3d7db2002-06-28 22:56:48 +00001292 for (i = 0; handler_info[i].name != NULL; i++)
Fred Drake0582df92000-07-12 04:49:00 +00001293 /* do nothing */;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001294
Fred Drake7c75bf22002-07-01 14:02:31 +00001295 self->handlers = malloc(sizeof(PyObject *) * i);
1296 if (!self->handlers) {
Fred Drake71b63ff2002-06-28 22:29:01 +00001297 Py_DECREF(self);
1298 return PyErr_NoMemory();
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001299 }
Martin v. Löwis5b68ce32001-10-21 08:53:52 +00001300 clear_handlers(self, 1);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001301
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001302 return (PyObject*)self;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001303}
1304
1305
1306static void
Fred Drake0582df92000-07-12 04:49:00 +00001307xmlparse_dealloc(xmlparseobject *self)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001308{
Fred Drake0582df92000-07-12 04:49:00 +00001309 int i;
Martin v. Löwis894258c2001-09-23 10:20:10 +00001310#ifdef Py_TPFLAGS_HAVE_GC
1311 PyObject_GC_UnTrack(self);
1312#else
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001313 PyObject_GC_Fini(self);
Martin v. Löwis894258c2001-09-23 10:20:10 +00001314#endif
Fred Drake85d835f2001-02-08 15:39:08 +00001315 if (self->itself != NULL)
Fred Drake0582df92000-07-12 04:49:00 +00001316 XML_ParserFree(self->itself);
1317 self->itself = NULL;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001318
Fred Drake85d835f2001-02-08 15:39:08 +00001319 if (self->handlers != NULL) {
Fred Drakecde79132001-04-25 16:01:30 +00001320 PyObject *temp;
Fred Drake85d835f2001-02-08 15:39:08 +00001321 for (i = 0; handler_info[i].name != NULL; i++) {
Fred Drakecde79132001-04-25 16:01:30 +00001322 temp = self->handlers[i];
1323 self->handlers[i] = NULL;
1324 Py_XDECREF(temp);
Fred Drake85d835f2001-02-08 15:39:08 +00001325 }
1326 free(self->handlers);
Fred Drake71b63ff2002-06-28 22:29:01 +00001327 self->handlers = NULL;
Fred Drake0582df92000-07-12 04:49:00 +00001328 }
Fred Drake2a3d7db2002-06-28 22:56:48 +00001329 if (self->buffer != NULL) {
1330 free(self->buffer);
1331 self->buffer = NULL;
1332 }
Fred Drakeb91a36b2002-06-27 19:40:48 +00001333 Py_XDECREF(self->intern);
Martin v. Löwis894258c2001-09-23 10:20:10 +00001334#ifndef Py_TPFLAGS_HAVE_GC
Martin v. Löwisb4fcf4d2002-06-30 06:40:55 +00001335 /* Code for versions 2.0 and 2.1 */
Fred Drake0582df92000-07-12 04:49:00 +00001336 PyObject_Del(self);
Martin v. Löwis894258c2001-09-23 10:20:10 +00001337#else
1338 /* Code for versions 2.2 and later. */
1339 PyObject_GC_Del(self);
1340#endif
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001341}
1342
Fred Drake0582df92000-07-12 04:49:00 +00001343static int
1344handlername2int(const char *name)
1345{
1346 int i;
Fred Drake71b63ff2002-06-28 22:29:01 +00001347 for (i = 0; handler_info[i].name != NULL; i++) {
Fred Drake0582df92000-07-12 04:49:00 +00001348 if (strcmp(name, handler_info[i].name) == 0) {
1349 return i;
1350 }
1351 }
1352 return -1;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001353}
1354
1355static PyObject *
Fred Drake71b63ff2002-06-28 22:29:01 +00001356get_pybool(int istrue)
1357{
1358 PyObject *result = istrue ? Py_True : Py_False;
1359 Py_INCREF(result);
1360 return result;
1361}
1362
1363static PyObject *
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001364xmlparse_getattr(xmlparseobject *self, char *name)
1365{
Fred Drake71b63ff2002-06-28 22:29:01 +00001366 int handlernum = handlername2int(name);
1367
1368 if (handlernum != -1) {
1369 PyObject *result = self->handlers[handlernum];
1370 if (result == NULL)
1371 result = Py_None;
1372 Py_INCREF(result);
1373 return result;
1374 }
1375 if (name[0] == 'E') {
1376 if (strcmp(name, "ErrorCode") == 0)
1377 return PyInt_FromLong((long)
1378 XML_GetErrorCode(self->itself));
1379 if (strcmp(name, "ErrorLineNumber") == 0)
1380 return PyInt_FromLong((long)
1381 XML_GetErrorLineNumber(self->itself));
1382 if (strcmp(name, "ErrorColumnNumber") == 0)
1383 return PyInt_FromLong((long)
1384 XML_GetErrorColumnNumber(self->itself));
1385 if (strcmp(name, "ErrorByteIndex") == 0)
1386 return PyInt_FromLong((long)
1387 XML_GetErrorByteIndex(self->itself));
1388 }
Fred Drake2a3d7db2002-06-28 22:56:48 +00001389 if (name[0] == 'b') {
1390 if (strcmp(name, "buffer_size") == 0)
1391 return PyInt_FromLong((long) self->buffer_size);
1392 if (strcmp(name, "buffer_text") == 0)
1393 return get_pybool(self->buffer != NULL);
1394 if (strcmp(name, "buffer_used") == 0)
1395 return PyInt_FromLong((long) self->buffer_used);
1396 }
Martin v. Löwis069dde22003-01-21 10:58:18 +00001397 if (strcmp(name, "namespace_prefixes") == 0)
1398 return get_pybool(self->ns_prefixes);
Fred Drake85d835f2001-02-08 15:39:08 +00001399 if (strcmp(name, "ordered_attributes") == 0)
Fred Drake71b63ff2002-06-28 22:29:01 +00001400 return get_pybool(self->ordered_attributes);
Fred Drake0582df92000-07-12 04:49:00 +00001401 if (strcmp(name, "returns_unicode") == 0)
Fred Drake71b63ff2002-06-28 22:29:01 +00001402 return get_pybool((long) self->returns_unicode);
Fred Drake85d835f2001-02-08 15:39:08 +00001403 if (strcmp(name, "specified_attributes") == 0)
Fred Drake71b63ff2002-06-28 22:29:01 +00001404 return get_pybool((long) self->specified_attributes);
Fred Drakeb91a36b2002-06-27 19:40:48 +00001405 if (strcmp(name, "intern") == 0) {
1406 if (self->intern == NULL) {
1407 Py_INCREF(Py_None);
1408 return Py_None;
1409 }
1410 else {
1411 Py_INCREF(self->intern);
1412 return self->intern;
1413 }
1414 }
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001415
Neal Norwitzfa56e2d2003-01-19 15:40:09 +00001416#define APPEND(list, str) \
Martin v. Löwis069dde22003-01-21 10:58:18 +00001417 do { \
1418 PyObject *o = PyString_FromString(str); \
1419 if (o != NULL) \
1420 PyList_Append(list, o); \
1421 Py_XDECREF(o); \
1422 } while (0)
Neal Norwitzfa56e2d2003-01-19 15:40:09 +00001423
Fred Drake0582df92000-07-12 04:49:00 +00001424 if (strcmp(name, "__members__") == 0) {
1425 int i;
1426 PyObject *rc = PyList_New(0);
Fred Drake71b63ff2002-06-28 22:29:01 +00001427 for (i = 0; handler_info[i].name != NULL; i++) {
Neal Norwitzfa56e2d2003-01-19 15:40:09 +00001428 PyObject *o = get_handler_name(&handler_info[i]);
1429 if (o != NULL)
1430 PyList_Append(rc, o);
1431 Py_XDECREF(o);
Fred Drake0582df92000-07-12 04:49:00 +00001432 }
Neal Norwitzfa56e2d2003-01-19 15:40:09 +00001433 APPEND(rc, "ErrorCode");
1434 APPEND(rc, "ErrorLineNumber");
1435 APPEND(rc, "ErrorColumnNumber");
1436 APPEND(rc, "ErrorByteIndex");
1437 APPEND(rc, "buffer_size");
1438 APPEND(rc, "buffer_text");
1439 APPEND(rc, "buffer_used");
Martin v. Löwis069dde22003-01-21 10:58:18 +00001440 APPEND(rc, "namespace_prefixes");
Neal Norwitzfa56e2d2003-01-19 15:40:09 +00001441 APPEND(rc, "ordered_attributes");
1442 APPEND(rc, "returns_unicode");
1443 APPEND(rc, "specified_attributes");
1444 APPEND(rc, "intern");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001445
Neal Norwitzfa56e2d2003-01-19 15:40:09 +00001446#undef APPEND
Fred Drake0582df92000-07-12 04:49:00 +00001447 return rc;
1448 }
1449 return Py_FindMethod(xmlparse_methods, (PyObject *)self, name);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001450}
1451
Fred Drake6f987622000-08-25 18:03:30 +00001452static int
1453sethandler(xmlparseobject *self, const char *name, PyObject* v)
Fred Drake0582df92000-07-12 04:49:00 +00001454{
1455 int handlernum = handlername2int(name);
Fred Drake71b63ff2002-06-28 22:29:01 +00001456 if (handlernum >= 0) {
1457 xmlhandler c_handler = NULL;
1458 PyObject *temp = self->handlers[handlernum];
1459
1460 if (v == Py_None)
1461 v = NULL;
1462 else if (v != NULL) {
1463 Py_INCREF(v);
1464 c_handler = handler_info[handlernum].handler;
1465 }
Fred Drake0582df92000-07-12 04:49:00 +00001466 self->handlers[handlernum] = v;
Fred Drake71b63ff2002-06-28 22:29:01 +00001467 Py_XDECREF(temp);
1468 handler_info[handlernum].setter(self->itself, c_handler);
Fred Drake0582df92000-07-12 04:49:00 +00001469 return 1;
1470 }
1471 return 0;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001472}
1473
1474static int
Fred Drake6f987622000-08-25 18:03:30 +00001475xmlparse_setattr(xmlparseobject *self, char *name, PyObject *v)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001476{
Fred Drake6f987622000-08-25 18:03:30 +00001477 /* Set attribute 'name' to value 'v'. v==NULL means delete */
Fred Drake85d835f2001-02-08 15:39:08 +00001478 if (v == NULL) {
Fred Drake6f987622000-08-25 18:03:30 +00001479 PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
1480 return -1;
1481 }
Fred Drake2a3d7db2002-06-28 22:56:48 +00001482 if (strcmp(name, "buffer_text") == 0) {
1483 if (PyObject_IsTrue(v)) {
1484 if (self->buffer == NULL) {
1485 self->buffer = malloc(self->buffer_size);
1486 if (self->buffer == NULL) {
1487 PyErr_NoMemory();
1488 return -1;
1489 }
1490 self->buffer_used = 0;
1491 }
1492 }
1493 else if (self->buffer != NULL) {
1494 if (flush_character_buffer(self) < 0)
1495 return -1;
1496 free(self->buffer);
1497 self->buffer = NULL;
1498 }
1499 return 0;
1500 }
Martin v. Löwis069dde22003-01-21 10:58:18 +00001501 if (strcmp(name, "namespace_prefixes") == 0) {
1502 if (PyObject_IsTrue(v))
1503 self->ns_prefixes = 1;
1504 else
1505 self->ns_prefixes = 0;
1506 XML_SetReturnNSTriplet(self->itself, self->ns_prefixes);
1507 return 0;
1508 }
Fred Drake85d835f2001-02-08 15:39:08 +00001509 if (strcmp(name, "ordered_attributes") == 0) {
1510 if (PyObject_IsTrue(v))
1511 self->ordered_attributes = 1;
1512 else
1513 self->ordered_attributes = 0;
1514 return 0;
1515 }
Fred Drake6f987622000-08-25 18:03:30 +00001516 if (strcmp(name, "returns_unicode") == 0) {
Fred Drake85d835f2001-02-08 15:39:08 +00001517 if (PyObject_IsTrue(v)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001518#ifndef Py_USING_UNICODE
Fred Drake71b63ff2002-06-28 22:29:01 +00001519 PyErr_SetString(PyExc_ValueError,
1520 "Unicode support not available");
Fred Drake6f987622000-08-25 18:03:30 +00001521 return -1;
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +00001522#else
Fred Drake6f987622000-08-25 18:03:30 +00001523 self->returns_unicode = 1;
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +00001524#endif
Fred Drake6f987622000-08-25 18:03:30 +00001525 }
1526 else
1527 self->returns_unicode = 0;
Fred Drake85d835f2001-02-08 15:39:08 +00001528 return 0;
1529 }
1530 if (strcmp(name, "specified_attributes") == 0) {
1531 if (PyObject_IsTrue(v))
1532 self->specified_attributes = 1;
1533 else
1534 self->specified_attributes = 0;
Fred Drake6f987622000-08-25 18:03:30 +00001535 return 0;
1536 }
Fred Drake2a3d7db2002-06-28 22:56:48 +00001537 if (strcmp(name, "CharacterDataHandler") == 0) {
1538 /* If we're changing the character data handler, flush all
1539 * cached data with the old handler. Not sure there's a
1540 * "right" thing to do, though, but this probably won't
1541 * happen.
1542 */
1543 if (flush_character_buffer(self) < 0)
1544 return -1;
1545 }
Fred Drake6f987622000-08-25 18:03:30 +00001546 if (sethandler(self, name, v)) {
1547 return 0;
1548 }
1549 PyErr_SetString(PyExc_AttributeError, name);
1550 return -1;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001551}
1552
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001553#ifdef WITH_CYCLE_GC
1554static int
1555xmlparse_traverse(xmlparseobject *op, visitproc visit, void *arg)
1556{
Fred Drakecde79132001-04-25 16:01:30 +00001557 int i, err;
1558 for (i = 0; handler_info[i].name != NULL; i++) {
1559 if (!op->handlers[i])
1560 continue;
1561 err = visit(op->handlers[i], arg);
1562 if (err)
1563 return err;
1564 }
1565 return 0;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001566}
1567
1568static int
1569xmlparse_clear(xmlparseobject *op)
1570{
Martin v. Löwis5b68ce32001-10-21 08:53:52 +00001571 clear_handlers(op, 0);
Fred Drakeb91a36b2002-06-27 19:40:48 +00001572 Py_XDECREF(op->intern);
1573 op->intern = 0;
Fred Drakecde79132001-04-25 16:01:30 +00001574 return 0;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001575}
1576#endif
1577
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001578PyDoc_STRVAR(Xmlparsetype__doc__, "XML parser");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001579
1580static PyTypeObject Xmlparsetype = {
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +00001581 PyObject_HEAD_INIT(NULL)
1582 0, /*ob_size*/
Guido van Rossum14648392001-12-08 18:02:58 +00001583 "pyexpat.xmlparser", /*tp_name*/
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001584 sizeof(xmlparseobject) + PyGC_HEAD_SIZE,/*tp_basicsize*/
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +00001585 0, /*tp_itemsize*/
1586 /* methods */
1587 (destructor)xmlparse_dealloc, /*tp_dealloc*/
1588 (printfunc)0, /*tp_print*/
1589 (getattrfunc)xmlparse_getattr, /*tp_getattr*/
1590 (setattrfunc)xmlparse_setattr, /*tp_setattr*/
1591 (cmpfunc)0, /*tp_compare*/
1592 (reprfunc)0, /*tp_repr*/
1593 0, /*tp_as_number*/
1594 0, /*tp_as_sequence*/
1595 0, /*tp_as_mapping*/
1596 (hashfunc)0, /*tp_hash*/
1597 (ternaryfunc)0, /*tp_call*/
1598 (reprfunc)0, /*tp_str*/
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001599 0, /* tp_getattro */
1600 0, /* tp_setattro */
1601 0, /* tp_as_buffer */
Martin v. Löwis894258c2001-09-23 10:20:10 +00001602#ifdef Py_TPFLAGS_HAVE_GC
Fred Drake71b63ff2002-06-28 22:29:01 +00001603 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /*tp_flags*/
Martin v. Löwis894258c2001-09-23 10:20:10 +00001604#else
Fred Drake71b63ff2002-06-28 22:29:01 +00001605 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_GC, /*tp_flags*/
Martin v. Löwis894258c2001-09-23 10:20:10 +00001606#endif
Martin v. Löwis069dde22003-01-21 10:58:18 +00001607 Xmlparsetype__doc__, /* tp_doc - Documentation string */
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001608#ifdef WITH_CYCLE_GC
1609 (traverseproc)xmlparse_traverse, /* tp_traverse */
1610 (inquiry)xmlparse_clear /* tp_clear */
1611#else
1612 0, 0
1613#endif
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001614};
1615
1616/* End of code for xmlparser objects */
1617/* -------------------------------------------------------- */
1618
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001619PyDoc_STRVAR(pyexpat_ParserCreate__doc__,
Fred Drake0582df92000-07-12 04:49:00 +00001620"ParserCreate([encoding[, namespace_separator]]) -> parser\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001621Return a new XML parser object.");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001622
1623static PyObject *
Fred Drake0582df92000-07-12 04:49:00 +00001624pyexpat_ParserCreate(PyObject *notused, PyObject *args, PyObject *kw)
1625{
Fred Drakecde79132001-04-25 16:01:30 +00001626 char *encoding = NULL;
1627 char *namespace_separator = NULL;
Fred Drakeb91a36b2002-06-27 19:40:48 +00001628 PyObject *intern = NULL;
1629 PyObject *result;
1630 int intern_decref = 0;
Fred Drake71b63ff2002-06-28 22:29:01 +00001631 static char *kwlist[] = {"encoding", "namespace_separator",
Fred Drakeb91a36b2002-06-27 19:40:48 +00001632 "intern", NULL};
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001633
Fred Drakeb91a36b2002-06-27 19:40:48 +00001634 if (!PyArg_ParseTupleAndKeywords(args, kw, "|zzO:ParserCreate", kwlist,
1635 &encoding, &namespace_separator, &intern))
Fred Drakecde79132001-04-25 16:01:30 +00001636 return NULL;
1637 if (namespace_separator != NULL
1638 && strlen(namespace_separator) > 1) {
1639 PyErr_SetString(PyExc_ValueError,
1640 "namespace_separator must be at most one"
1641 " character, omitted, or None");
1642 return NULL;
1643 }
Fred Drakeb91a36b2002-06-27 19:40:48 +00001644 /* Explicitly passing None means no interning is desired.
1645 Not passing anything means that a new dictionary is used. */
1646 if (intern == Py_None)
1647 intern = NULL;
1648 else if (intern == NULL) {
1649 intern = PyDict_New();
1650 if (!intern)
1651 return NULL;
1652 intern_decref = 1;
Fred Drake71b63ff2002-06-28 22:29:01 +00001653 }
Fred Drakeb91a36b2002-06-27 19:40:48 +00001654 else if (!PyDict_Check(intern)) {
1655 PyErr_SetString(PyExc_TypeError, "intern must be a dictionary");
1656 return NULL;
1657 }
1658
1659 result = newxmlparseobject(encoding, namespace_separator, intern);
1660 if (intern_decref) {
1661 Py_DECREF(intern);
1662 }
1663 return result;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001664}
1665
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001666PyDoc_STRVAR(pyexpat_ErrorString__doc__,
Fred Drake0582df92000-07-12 04:49:00 +00001667"ErrorString(errno) -> string\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001668Returns string error for given number.");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001669
1670static PyObject *
Fred Drake0582df92000-07-12 04:49:00 +00001671pyexpat_ErrorString(PyObject *self, PyObject *args)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001672{
Fred Drake0582df92000-07-12 04:49:00 +00001673 long code = 0;
1674
1675 if (!PyArg_ParseTuple(args, "l:ErrorString", &code))
1676 return NULL;
1677 return Py_BuildValue("z", XML_ErrorString((int)code));
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001678}
1679
1680/* List of methods defined in the module */
1681
1682static struct PyMethodDef pyexpat_methods[] = {
Fred Drake0582df92000-07-12 04:49:00 +00001683 {"ParserCreate", (PyCFunction)pyexpat_ParserCreate,
1684 METH_VARARGS|METH_KEYWORDS, pyexpat_ParserCreate__doc__},
1685 {"ErrorString", (PyCFunction)pyexpat_ErrorString,
1686 METH_VARARGS, pyexpat_ErrorString__doc__},
Fred Drake71b63ff2002-06-28 22:29:01 +00001687
Fred Drake0582df92000-07-12 04:49:00 +00001688 {NULL, (PyCFunction)NULL, 0, NULL} /* sentinel */
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001689};
1690
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +00001691/* Module docstring */
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001692
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001693PyDoc_STRVAR(pyexpat_module_documentation,
1694"Python wrapper for Expat parser.");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001695
Fred Drake4113b132001-03-24 19:58:26 +00001696/* Return a Python string that represents the version number without the
1697 * extra cruft added by revision control, even if the right options were
1698 * given to the "cvs export" command to make it not include the extra
1699 * cruft.
1700 */
1701static PyObject *
1702get_version_string(void)
1703{
1704 static char *rcsid = "$Revision$";
1705 char *rev = rcsid;
1706 int i = 0;
1707
Neal Norwitz3afb2d22002-03-20 21:32:07 +00001708 while (!isdigit((int)*rev))
Fred Drake4113b132001-03-24 19:58:26 +00001709 ++rev;
1710 while (rev[i] != ' ' && rev[i] != '\0')
1711 ++i;
1712
1713 return PyString_FromStringAndSize(rev, i);
1714}
1715
Fred Drakecde79132001-04-25 16:01:30 +00001716/* Initialization function for the module */
1717
1718#ifndef MODULE_NAME
1719#define MODULE_NAME "pyexpat"
1720#endif
1721
1722#ifndef MODULE_INITFUNC
1723#define MODULE_INITFUNC initpyexpat
1724#endif
1725
Martin v. Löwis069dde22003-01-21 10:58:18 +00001726#ifndef PyMODINIT_FUNC
1727# ifdef MS_WINDOWS
1728# define PyMODINIT_FUNC __declspec(dllexport) void
1729# else
1730# define PyMODINIT_FUNC void
1731# endif
1732#endif
1733
Mark Hammond8235ea12002-07-19 06:55:41 +00001734PyMODINIT_FUNC MODULE_INITFUNC(void); /* avoid compiler warnings */
Fred Drakecde79132001-04-25 16:01:30 +00001735
Martin v. Löwis069dde22003-01-21 10:58:18 +00001736PyMODINIT_FUNC
1737MODULE_INITFUNC(void)
Fred Drake0582df92000-07-12 04:49:00 +00001738{
1739 PyObject *m, *d;
Fred Drakecde79132001-04-25 16:01:30 +00001740 PyObject *errmod_name = PyString_FromString(MODULE_NAME ".errors");
Fred Drake85d835f2001-02-08 15:39:08 +00001741 PyObject *errors_module;
1742 PyObject *modelmod_name;
1743 PyObject *model_module;
Fred Drake0582df92000-07-12 04:49:00 +00001744 PyObject *sys_modules;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001745
Fred Drake6f987622000-08-25 18:03:30 +00001746 if (errmod_name == NULL)
1747 return;
Fred Drakecde79132001-04-25 16:01:30 +00001748 modelmod_name = PyString_FromString(MODULE_NAME ".model");
Fred Drake85d835f2001-02-08 15:39:08 +00001749 if (modelmod_name == NULL)
1750 return;
Fred Drake6f987622000-08-25 18:03:30 +00001751
Fred Drake0582df92000-07-12 04:49:00 +00001752 Xmlparsetype.ob_type = &PyType_Type;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001753
Fred Drake0582df92000-07-12 04:49:00 +00001754 /* Create the module and add the functions */
Fred Drakecde79132001-04-25 16:01:30 +00001755 m = Py_InitModule3(MODULE_NAME, pyexpat_methods,
Fred Drake85d835f2001-02-08 15:39:08 +00001756 pyexpat_module_documentation);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001757
Fred Drake0582df92000-07-12 04:49:00 +00001758 /* Add some symbolic constants to the module */
Fred Drakebd6101c2001-02-14 18:29:45 +00001759 if (ErrorObject == NULL) {
1760 ErrorObject = PyErr_NewException("xml.parsers.expat.ExpatError",
Fred Drake93adb692000-09-23 04:55:48 +00001761 NULL, NULL);
Fred Drakebd6101c2001-02-14 18:29:45 +00001762 if (ErrorObject == NULL)
1763 return;
1764 }
1765 Py_INCREF(ErrorObject);
Fred Drake93adb692000-09-23 04:55:48 +00001766 PyModule_AddObject(m, "error", ErrorObject);
Fred Drakebd6101c2001-02-14 18:29:45 +00001767 Py_INCREF(ErrorObject);
1768 PyModule_AddObject(m, "ExpatError", ErrorObject);
Fred Drake4ba298c2000-10-29 04:57:53 +00001769 Py_INCREF(&Xmlparsetype);
1770 PyModule_AddObject(m, "XMLParserType", (PyObject *) &Xmlparsetype);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001771
Fred Drake4113b132001-03-24 19:58:26 +00001772 PyModule_AddObject(m, "__version__", get_version_string());
Fred Drake738293d2000-12-21 17:25:07 +00001773 PyModule_AddStringConstant(m, "EXPAT_VERSION",
1774 (char *) XML_ExpatVersion());
Fred Drake85d835f2001-02-08 15:39:08 +00001775 {
1776 XML_Expat_Version info = XML_ExpatVersionInfo();
1777 PyModule_AddObject(m, "version_info",
1778 Py_BuildValue("(iii)", info.major,
1779 info.minor, info.micro));
1780 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001781#ifdef Py_USING_UNICODE
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001782 init_template_buffer();
1783#endif
Fred Drake0582df92000-07-12 04:49:00 +00001784 /* XXX When Expat supports some way of figuring out how it was
Fred Drake71b63ff2002-06-28 22:29:01 +00001785 compiled, this should check and set native_encoding
1786 appropriately.
Fred Drake0582df92000-07-12 04:49:00 +00001787 */
Fred Drake93adb692000-09-23 04:55:48 +00001788 PyModule_AddStringConstant(m, "native_encoding", "UTF-8");
Fred Drakec23b5232000-08-24 21:57:43 +00001789
Fred Drake85d835f2001-02-08 15:39:08 +00001790 sys_modules = PySys_GetObject("modules");
Fred Drake93adb692000-09-23 04:55:48 +00001791 d = PyModule_GetDict(m);
Fred Drake6f987622000-08-25 18:03:30 +00001792 errors_module = PyDict_GetItem(d, errmod_name);
1793 if (errors_module == NULL) {
Fred Drakecde79132001-04-25 16:01:30 +00001794 errors_module = PyModule_New(MODULE_NAME ".errors");
Fred Drake6f987622000-08-25 18:03:30 +00001795 if (errors_module != NULL) {
Fred Drake6f987622000-08-25 18:03:30 +00001796 PyDict_SetItem(sys_modules, errmod_name, errors_module);
Fred Drake93adb692000-09-23 04:55:48 +00001797 /* gives away the reference to errors_module */
1798 PyModule_AddObject(m, "errors", errors_module);
Fred Drakec23b5232000-08-24 21:57:43 +00001799 }
1800 }
Fred Drake6f987622000-08-25 18:03:30 +00001801 Py_DECREF(errmod_name);
Fred Drake85d835f2001-02-08 15:39:08 +00001802 model_module = PyDict_GetItem(d, modelmod_name);
1803 if (model_module == NULL) {
Fred Drakecde79132001-04-25 16:01:30 +00001804 model_module = PyModule_New(MODULE_NAME ".model");
Fred Drake85d835f2001-02-08 15:39:08 +00001805 if (model_module != NULL) {
1806 PyDict_SetItem(sys_modules, modelmod_name, model_module);
1807 /* gives away the reference to model_module */
1808 PyModule_AddObject(m, "model", model_module);
1809 }
1810 }
1811 Py_DECREF(modelmod_name);
1812 if (errors_module == NULL || model_module == NULL)
1813 /* Don't core dump later! */
Fred Drake6f987622000-08-25 18:03:30 +00001814 return;
Martin v. Löwis069dde22003-01-21 10:58:18 +00001815
Martin v. Löwisc847f402003-01-21 11:09:21 +00001816#if XML_COMBINED_VERSION > 19505
Martin v. Löwis069dde22003-01-21 10:58:18 +00001817 {
1818 const XML_Feature *features = XML_GetFeatureList();
1819 PyObject *list = PyList_New(0);
1820 if (list == NULL)
1821 /* just ignore it */
1822 PyErr_Clear();
1823 else {
1824 int i = 0;
1825 for (; features[i].feature != XML_FEATURE_END; ++i) {
1826 int ok;
1827 PyObject *item = Py_BuildValue("si", features[i].name,
1828 features[i].value);
1829 if (item == NULL) {
1830 Py_DECREF(list);
1831 list = NULL;
1832 break;
1833 }
1834 ok = PyList_Append(list, item);
1835 Py_DECREF(item);
1836 if (ok < 0) {
1837 PyErr_Clear();
1838 break;
1839 }
1840 }
1841 if (list != NULL)
1842 PyModule_AddObject(m, "features", list);
1843 }
1844 }
Martin v. Löwisc847f402003-01-21 11:09:21 +00001845#endif
Fred Drake6f987622000-08-25 18:03:30 +00001846
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001847#define MYCONST(name) \
Fred Drake93adb692000-09-23 04:55:48 +00001848 PyModule_AddStringConstant(errors_module, #name, \
1849 (char*)XML_ErrorString(name))
Fred Drake7bd9f412000-07-04 23:51:31 +00001850
Fred Drake0582df92000-07-12 04:49:00 +00001851 MYCONST(XML_ERROR_NO_MEMORY);
1852 MYCONST(XML_ERROR_SYNTAX);
1853 MYCONST(XML_ERROR_NO_ELEMENTS);
1854 MYCONST(XML_ERROR_INVALID_TOKEN);
1855 MYCONST(XML_ERROR_UNCLOSED_TOKEN);
1856 MYCONST(XML_ERROR_PARTIAL_CHAR);
1857 MYCONST(XML_ERROR_TAG_MISMATCH);
1858 MYCONST(XML_ERROR_DUPLICATE_ATTRIBUTE);
1859 MYCONST(XML_ERROR_JUNK_AFTER_DOC_ELEMENT);
1860 MYCONST(XML_ERROR_PARAM_ENTITY_REF);
1861 MYCONST(XML_ERROR_UNDEFINED_ENTITY);
1862 MYCONST(XML_ERROR_RECURSIVE_ENTITY_REF);
1863 MYCONST(XML_ERROR_ASYNC_ENTITY);
1864 MYCONST(XML_ERROR_BAD_CHAR_REF);
1865 MYCONST(XML_ERROR_BINARY_ENTITY_REF);
1866 MYCONST(XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF);
1867 MYCONST(XML_ERROR_MISPLACED_XML_PI);
1868 MYCONST(XML_ERROR_UNKNOWN_ENCODING);
1869 MYCONST(XML_ERROR_INCORRECT_ENCODING);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001870 MYCONST(XML_ERROR_UNCLOSED_CDATA_SECTION);
1871 MYCONST(XML_ERROR_EXTERNAL_ENTITY_HANDLING);
1872 MYCONST(XML_ERROR_NOT_STANDALONE);
1873
Fred Drake85d835f2001-02-08 15:39:08 +00001874 PyModule_AddStringConstant(errors_module, "__doc__",
1875 "Constants used to describe error conditions.");
1876
Fred Drake93adb692000-09-23 04:55:48 +00001877#undef MYCONST
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001878
Fred Drake85d835f2001-02-08 15:39:08 +00001879#define MYCONST(c) PyModule_AddIntConstant(m, #c, c)
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001880 MYCONST(XML_PARAM_ENTITY_PARSING_NEVER);
1881 MYCONST(XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE);
1882 MYCONST(XML_PARAM_ENTITY_PARSING_ALWAYS);
Fred Drake85d835f2001-02-08 15:39:08 +00001883#undef MYCONST
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001884
Fred Drake85d835f2001-02-08 15:39:08 +00001885#define MYCONST(c) PyModule_AddIntConstant(model_module, #c, c)
1886 PyModule_AddStringConstant(model_module, "__doc__",
1887 "Constants used to interpret content model information.");
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001888
Fred Drake85d835f2001-02-08 15:39:08 +00001889 MYCONST(XML_CTYPE_EMPTY);
1890 MYCONST(XML_CTYPE_ANY);
1891 MYCONST(XML_CTYPE_MIXED);
1892 MYCONST(XML_CTYPE_NAME);
1893 MYCONST(XML_CTYPE_CHOICE);
1894 MYCONST(XML_CTYPE_SEQ);
1895
1896 MYCONST(XML_CQUANT_NONE);
1897 MYCONST(XML_CQUANT_OPT);
1898 MYCONST(XML_CQUANT_REP);
1899 MYCONST(XML_CQUANT_PLUS);
1900#undef MYCONST
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001901}
1902
Fred Drake6f987622000-08-25 18:03:30 +00001903static void
Martin v. Löwis5b68ce32001-10-21 08:53:52 +00001904clear_handlers(xmlparseobject *self, int initial)
Fred Drake0582df92000-07-12 04:49:00 +00001905{
Fred Drakecde79132001-04-25 16:01:30 +00001906 int i = 0;
1907 PyObject *temp;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001908
Fred Drake71b63ff2002-06-28 22:29:01 +00001909 for (; handler_info[i].name != NULL; i++) {
Martin v. Löwis5b68ce32001-10-21 08:53:52 +00001910 if (initial)
Fred Drake71b63ff2002-06-28 22:29:01 +00001911 self->handlers[i] = NULL;
Martin v. Löwis5b68ce32001-10-21 08:53:52 +00001912 else {
Fred Drakecde79132001-04-25 16:01:30 +00001913 temp = self->handlers[i];
1914 self->handlers[i] = NULL;
1915 Py_XDECREF(temp);
Martin v. Löwis5b68ce32001-10-21 08:53:52 +00001916 handler_info[i].setter(self->itself, NULL);
Fred Drakecde79132001-04-25 16:01:30 +00001917 }
Fred Drakecde79132001-04-25 16:01:30 +00001918 }
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001919}
1920
Tim Peters0c322792002-07-17 16:49:03 +00001921static struct HandlerInfo handler_info[] = {
Fred Drake71b63ff2002-06-28 22:29:01 +00001922 {"StartElementHandler",
1923 (xmlhandlersetter)XML_SetStartElementHandler,
Fred Drake0582df92000-07-12 04:49:00 +00001924 (xmlhandler)my_StartElementHandler},
Fred Drake71b63ff2002-06-28 22:29:01 +00001925 {"EndElementHandler",
1926 (xmlhandlersetter)XML_SetEndElementHandler,
Fred Drake0582df92000-07-12 04:49:00 +00001927 (xmlhandler)my_EndElementHandler},
Fred Drake71b63ff2002-06-28 22:29:01 +00001928 {"ProcessingInstructionHandler",
Fred Drake0582df92000-07-12 04:49:00 +00001929 (xmlhandlersetter)XML_SetProcessingInstructionHandler,
1930 (xmlhandler)my_ProcessingInstructionHandler},
Fred Drake71b63ff2002-06-28 22:29:01 +00001931 {"CharacterDataHandler",
Fred Drake0582df92000-07-12 04:49:00 +00001932 (xmlhandlersetter)XML_SetCharacterDataHandler,
1933 (xmlhandler)my_CharacterDataHandler},
Fred Drake71b63ff2002-06-28 22:29:01 +00001934 {"UnparsedEntityDeclHandler",
Fred Drake0582df92000-07-12 04:49:00 +00001935 (xmlhandlersetter)XML_SetUnparsedEntityDeclHandler,
Fred Drake2a3d7db2002-06-28 22:56:48 +00001936 (xmlhandler)my_UnparsedEntityDeclHandler},
Fred Drake71b63ff2002-06-28 22:29:01 +00001937 {"NotationDeclHandler",
Fred Drake0582df92000-07-12 04:49:00 +00001938 (xmlhandlersetter)XML_SetNotationDeclHandler,
Fred Drake2a3d7db2002-06-28 22:56:48 +00001939 (xmlhandler)my_NotationDeclHandler},
Fred Drake71b63ff2002-06-28 22:29:01 +00001940 {"StartNamespaceDeclHandler",
1941 (xmlhandlersetter)XML_SetStartNamespaceDeclHandler,
Fred Drake2a3d7db2002-06-28 22:56:48 +00001942 (xmlhandler)my_StartNamespaceDeclHandler},
Fred Drake71b63ff2002-06-28 22:29:01 +00001943 {"EndNamespaceDeclHandler",
1944 (xmlhandlersetter)XML_SetEndNamespaceDeclHandler,
Fred Drake2a3d7db2002-06-28 22:56:48 +00001945 (xmlhandler)my_EndNamespaceDeclHandler},
Fred Drake0582df92000-07-12 04:49:00 +00001946 {"CommentHandler",
1947 (xmlhandlersetter)XML_SetCommentHandler,
1948 (xmlhandler)my_CommentHandler},
1949 {"StartCdataSectionHandler",
Fred Drake71b63ff2002-06-28 22:29:01 +00001950 (xmlhandlersetter)XML_SetStartCdataSectionHandler,
Fred Drake0582df92000-07-12 04:49:00 +00001951 (xmlhandler)my_StartCdataSectionHandler},
1952 {"EndCdataSectionHandler",
Fred Drake71b63ff2002-06-28 22:29:01 +00001953 (xmlhandlersetter)XML_SetEndCdataSectionHandler,
Fred Drake0582df92000-07-12 04:49:00 +00001954 (xmlhandler)my_EndCdataSectionHandler},
1955 {"DefaultHandler",
1956 (xmlhandlersetter)XML_SetDefaultHandler,
1957 (xmlhandler)my_DefaultHandler},
1958 {"DefaultHandlerExpand",
1959 (xmlhandlersetter)XML_SetDefaultHandlerExpand,
1960 (xmlhandler)my_DefaultHandlerExpandHandler},
1961 {"NotStandaloneHandler",
1962 (xmlhandlersetter)XML_SetNotStandaloneHandler,
1963 (xmlhandler)my_NotStandaloneHandler},
1964 {"ExternalEntityRefHandler",
1965 (xmlhandlersetter)XML_SetExternalEntityRefHandler,
Fred Drake2a3d7db2002-06-28 22:56:48 +00001966 (xmlhandler)my_ExternalEntityRefHandler},
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001967 {"StartDoctypeDeclHandler",
Fred Drake71b63ff2002-06-28 22:29:01 +00001968 (xmlhandlersetter)XML_SetStartDoctypeDeclHandler,
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001969 (xmlhandler)my_StartDoctypeDeclHandler},
1970 {"EndDoctypeDeclHandler",
Fred Drake71b63ff2002-06-28 22:29:01 +00001971 (xmlhandlersetter)XML_SetEndDoctypeDeclHandler,
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001972 (xmlhandler)my_EndDoctypeDeclHandler},
Fred Drake85d835f2001-02-08 15:39:08 +00001973 {"EntityDeclHandler",
1974 (xmlhandlersetter)XML_SetEntityDeclHandler,
1975 (xmlhandler)my_EntityDeclHandler},
1976 {"XmlDeclHandler",
1977 (xmlhandlersetter)XML_SetXmlDeclHandler,
1978 (xmlhandler)my_XmlDeclHandler},
1979 {"ElementDeclHandler",
1980 (xmlhandlersetter)XML_SetElementDeclHandler,
1981 (xmlhandler)my_ElementDeclHandler},
1982 {"AttlistDeclHandler",
1983 (xmlhandlersetter)XML_SetAttlistDeclHandler,
1984 (xmlhandler)my_AttlistDeclHandler},
Martin v. Löwisc847f402003-01-21 11:09:21 +00001985#if XML_COMBINED_VERSION >= 19504
Martin v. Löwis069dde22003-01-21 10:58:18 +00001986 {"SkippedEntityHandler",
1987 (xmlhandlersetter)XML_SetSkippedEntityHandler,
1988 (xmlhandler)my_SkippedEntityHandler},
Martin v. Löwisc847f402003-01-21 11:09:21 +00001989#endif
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001990
Fred Drake0582df92000-07-12 04:49:00 +00001991 {NULL, NULL, NULL} /* sentinel */
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001992};