blob: 0f6608a6f96c2169848e180bd4e6a6b58ad99708 [file] [log] [blame]
Martin v. Löwis7090ed12001-09-19 10:37:50 +00001#include "Python.h"
Fred Drake4113b132001-03-24 19:58:26 +00002#include <ctype.h>
3
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00004#include "compile.h"
5#include "frameobject.h"
Fred Drakea77254a2000-09-29 19:23:29 +00006#include "expat.h"
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00007
Martin v. Löwisc847f402003-01-21 11:09:21 +00008#define XML_COMBINED_VERSION (10000*XML_MAJOR_VERSION+100*XML_MINOR_VERSION+XML_MICRO_VERSION)
9
Martin v. Löwisb4fcf4d2002-06-30 06:40:55 +000010#ifndef PyDoc_STRVAR
Martin v. Löwis069dde22003-01-21 10:58:18 +000011
12/*
13 * fdrake says:
14 * Don't change the PyDoc_STR macro definition to (str), because
15 * '''the parentheses cause compile failures
16 * ("non-constant static initializer" or something like that)
17 * on some platforms (Irix?)'''
18 */
Fred Drakef57b22a2002-09-02 15:54:06 +000019#define PyDoc_STR(str) str
Fred Drake7c75bf22002-07-01 14:02:31 +000020#define PyDoc_VAR(name) static char name[]
Martin v. Löwisb4fcf4d2002-06-30 06:40:55 +000021#define PyDoc_STRVAR(name,str) PyDoc_VAR(name) = PyDoc_STR(str)
Martin v. Löwis0078f6c2001-01-21 10:18:10 +000022#endif
23
Martin v. Löwisb4fcf4d2002-06-30 06:40:55 +000024#if (PY_MAJOR_VERSION == 2 && PY_MINOR_VERSION < 2)
25/* In Python 2.0 and 2.1, disabling Unicode was not possible. */
Martin v. Löwis339d0f72001-08-17 18:39:25 +000026#define Py_USING_UNICODE
Martin v. Löwis069dde22003-01-21 10:58:18 +000027#define NOFIX_TRACE
Martin v. Löwis339d0f72001-08-17 18:39:25 +000028#endif
29
Fred Drake0582df92000-07-12 04:49:00 +000030enum HandlerTypes {
31 StartElement,
32 EndElement,
33 ProcessingInstruction,
34 CharacterData,
35 UnparsedEntityDecl,
36 NotationDecl,
37 StartNamespaceDecl,
38 EndNamespaceDecl,
39 Comment,
40 StartCdataSection,
41 EndCdataSection,
42 Default,
43 DefaultHandlerExpand,
44 NotStandalone,
Martin v. Löwis0078f6c2001-01-21 10:18:10 +000045 ExternalEntityRef,
46 StartDoctypeDecl,
47 EndDoctypeDecl,
Fred Drake85d835f2001-02-08 15:39:08 +000048 EntityDecl,
49 XmlDecl,
50 ElementDecl,
51 AttlistDecl,
Martin v. Löwisc847f402003-01-21 11:09:21 +000052#if XML_COMBINED_VERSION >= 19504
Martin v. Löwis069dde22003-01-21 10:58:18 +000053 SkippedEntity,
Martin v. Löwisc847f402003-01-21 11:09:21 +000054#endif
Fred Drake85d835f2001-02-08 15:39:08 +000055 _DummyDecl
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000056};
57
58static PyObject *ErrorObject;
59
60/* ----------------------------------------------------- */
61
62/* Declarations for objects of type xmlparser */
63
64typedef struct {
Fred Drake0582df92000-07-12 04:49:00 +000065 PyObject_HEAD
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000066
Fred Drake0582df92000-07-12 04:49:00 +000067 XML_Parser itself;
Fred Drake85d835f2001-02-08 15:39:08 +000068 int returns_unicode; /* True if Unicode strings are returned;
69 if false, UTF-8 strings are returned */
70 int ordered_attributes; /* Return attributes as a list. */
71 int specified_attributes; /* Report only specified attributes. */
Fred Drakebd6101c2001-02-14 18:29:45 +000072 int in_callback; /* Is a callback active? */
Martin v. Löwis069dde22003-01-21 10:58:18 +000073 int ns_prefixes; /* Namespace-triplets mode? */
Fred Drake2a3d7db2002-06-28 22:56:48 +000074 XML_Char *buffer; /* Buffer used when accumulating characters */
75 /* NULL if not enabled */
76 int buffer_size; /* Size of buffer, in XML_Char units */
77 int buffer_used; /* Buffer units in use */
Fred Drakeb91a36b2002-06-27 19:40:48 +000078 PyObject *intern; /* Dictionary to intern strings */
Fred Drake0582df92000-07-12 04:49:00 +000079 PyObject **handlers;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000080} xmlparseobject;
81
Fred Drake2a3d7db2002-06-28 22:56:48 +000082#define CHARACTER_DATA_BUFFER_SIZE 8192
83
Jeremy Hylton938ace62002-07-17 16:30:39 +000084static PyTypeObject Xmlparsetype;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000085
Fred Drake117ac852002-09-24 16:24:54 +000086typedef void (*xmlhandlersetter)(XML_Parser self, void *meth);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000087typedef void* xmlhandler;
88
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +000089struct HandlerInfo {
Fred Drake0582df92000-07-12 04:49:00 +000090 const char *name;
91 xmlhandlersetter setter;
92 xmlhandler handler;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +000093 PyCodeObject *tb_code;
Fred Drake71b63ff2002-06-28 22:29:01 +000094 PyObject *nameobj;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000095};
96
Jeremy Hylton938ace62002-07-17 16:30:39 +000097static struct HandlerInfo handler_info[64];
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000098
Fred Drakebd6101c2001-02-14 18:29:45 +000099/* Set an integer attribute on the error object; return true on success,
100 * false on an exception.
101 */
102static int
103set_error_attr(PyObject *err, char *name, int value)
104{
105 PyObject *v = PyInt_FromLong(value);
Fred Drake85d835f2001-02-08 15:39:08 +0000106
Fred Drakebd6101c2001-02-14 18:29:45 +0000107 if (v != NULL && PyObject_SetAttrString(err, name, v) == -1) {
108 Py_DECREF(v);
109 return 0;
110 }
111 return 1;
112}
113
114/* Build and set an Expat exception, including positioning
115 * information. Always returns NULL.
116 */
Fred Drake85d835f2001-02-08 15:39:08 +0000117static PyObject *
Martin v. Löwis069dde22003-01-21 10:58:18 +0000118set_error(xmlparseobject *self, enum XML_Error code)
Fred Drake85d835f2001-02-08 15:39:08 +0000119{
120 PyObject *err;
121 char buffer[256];
122 XML_Parser parser = self->itself;
Fred Drakebd6101c2001-02-14 18:29:45 +0000123 int lineno = XML_GetErrorLineNumber(parser);
124 int column = XML_GetErrorColumnNumber(parser);
Fred Drake85d835f2001-02-08 15:39:08 +0000125
Martin v. Löwis6b2cf0e2002-06-30 06:03:35 +0000126 /* There is no risk of overflowing this buffer, since
127 even for 64-bit integers, there is sufficient space. */
128 sprintf(buffer, "%.200s: line %i, column %i",
Fred Drakebd6101c2001-02-14 18:29:45 +0000129 XML_ErrorString(code), lineno, column);
Fred Drake85d835f2001-02-08 15:39:08 +0000130 err = PyObject_CallFunction(ErrorObject, "s", buffer);
Fred Drakebd6101c2001-02-14 18:29:45 +0000131 if ( err != NULL
132 && set_error_attr(err, "code", code)
133 && set_error_attr(err, "offset", column)
134 && set_error_attr(err, "lineno", lineno)) {
135 PyErr_SetObject(ErrorObject, err);
Fred Drake85d835f2001-02-08 15:39:08 +0000136 }
137 return NULL;
138}
139
Fred Drake71b63ff2002-06-28 22:29:01 +0000140static int
141have_handler(xmlparseobject *self, int type)
142{
143 PyObject *handler = self->handlers[type];
144 return handler != NULL;
145}
146
147static PyObject *
148get_handler_name(struct HandlerInfo *hinfo)
149{
150 PyObject *name = hinfo->nameobj;
151 if (name == NULL) {
152 name = PyString_FromString(hinfo->name);
153 hinfo->nameobj = name;
154 }
155 Py_XINCREF(name);
156 return name;
157}
158
Fred Drake85d835f2001-02-08 15:39:08 +0000159
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000160#ifdef Py_USING_UNICODE
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000161/* Convert a string of XML_Chars into a Unicode string.
162 Returns None if str is a null pointer. */
163
Fred Drake0582df92000-07-12 04:49:00 +0000164static PyObject *
Fred Drakeb91a36b2002-06-27 19:40:48 +0000165conv_string_to_unicode(const XML_Char *str)
Fred Drake0582df92000-07-12 04:49:00 +0000166{
Fred Drake71b63ff2002-06-28 22:29:01 +0000167 /* XXX currently this code assumes that XML_Char is 8-bit,
Fred Drake0582df92000-07-12 04:49:00 +0000168 and hence in UTF-8. */
169 /* UTF-8 from Expat, Unicode desired */
170 if (str == NULL) {
171 Py_INCREF(Py_None);
172 return Py_None;
173 }
Fred Drake71b63ff2002-06-28 22:29:01 +0000174 return PyUnicode_DecodeUTF8(str, strlen(str), "strict");
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000175}
176
Fred Drake0582df92000-07-12 04:49:00 +0000177static PyObject *
178conv_string_len_to_unicode(const XML_Char *str, int len)
179{
Fred Drake71b63ff2002-06-28 22:29:01 +0000180 /* XXX currently this code assumes that XML_Char is 8-bit,
Fred Drake0582df92000-07-12 04:49:00 +0000181 and hence in UTF-8. */
182 /* UTF-8 from Expat, Unicode desired */
183 if (str == NULL) {
184 Py_INCREF(Py_None);
185 return Py_None;
186 }
Fred Drake6f987622000-08-25 18:03:30 +0000187 return PyUnicode_DecodeUTF8((const char *)str, len, "strict");
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000188}
189#endif
190
191/* Convert a string of XML_Chars into an 8-bit Python string.
192 Returns None if str is a null pointer. */
193
Fred Drake6f987622000-08-25 18:03:30 +0000194static PyObject *
Fred Drakeb91a36b2002-06-27 19:40:48 +0000195conv_string_to_utf8(const XML_Char *str)
Fred Drake6f987622000-08-25 18:03:30 +0000196{
Fred Drake71b63ff2002-06-28 22:29:01 +0000197 /* XXX currently this code assumes that XML_Char is 8-bit,
Fred Drake6f987622000-08-25 18:03:30 +0000198 and hence in UTF-8. */
199 /* UTF-8 from Expat, UTF-8 desired */
200 if (str == NULL) {
201 Py_INCREF(Py_None);
202 return Py_None;
203 }
Fred Drakeb91a36b2002-06-27 19:40:48 +0000204 return PyString_FromString(str);
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000205}
206
Fred Drake6f987622000-08-25 18:03:30 +0000207static PyObject *
Fred Drake71b63ff2002-06-28 22:29:01 +0000208conv_string_len_to_utf8(const XML_Char *str, int len)
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000209{
Fred Drake71b63ff2002-06-28 22:29:01 +0000210 /* XXX currently this code assumes that XML_Char is 8-bit,
Fred Drake6f987622000-08-25 18:03:30 +0000211 and hence in UTF-8. */
212 /* UTF-8 from Expat, UTF-8 desired */
213 if (str == NULL) {
214 Py_INCREF(Py_None);
215 return Py_None;
216 }
217 return PyString_FromStringAndSize((const char *)str, len);
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000218}
219
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000220/* Callback routines */
221
Martin v. Löwis5b68ce32001-10-21 08:53:52 +0000222static void clear_handlers(xmlparseobject *self, int initial);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000223
Martin v. Löwis069dde22003-01-21 10:58:18 +0000224/* This handler is used when an error has been detected, in the hope
225 that actual parsing can be terminated early. This will only help
226 if an external entity reference is encountered. */
227static int
228error_external_entity_ref_handler(XML_Parser parser,
229 const XML_Char *context,
230 const XML_Char *base,
231 const XML_Char *systemId,
232 const XML_Char *publicId)
233{
234 return 0;
235}
236
Fred Drake6f987622000-08-25 18:03:30 +0000237static void
238flag_error(xmlparseobject *self)
239{
Martin v. Löwis5b68ce32001-10-21 08:53:52 +0000240 clear_handlers(self, 0);
Martin v. Löwis069dde22003-01-21 10:58:18 +0000241 XML_SetExternalEntityRefHandler(self->itself,
242 error_external_entity_ref_handler);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000243}
244
245static PyCodeObject*
246getcode(enum HandlerTypes slot, char* func_name, int lineno)
247{
Fred Drakebd6101c2001-02-14 18:29:45 +0000248 PyObject *code = NULL;
249 PyObject *name = NULL;
250 PyObject *nulltuple = NULL;
251 PyObject *filename = NULL;
252
253 if (handler_info[slot].tb_code == NULL) {
254 code = PyString_FromString("");
255 if (code == NULL)
256 goto failed;
257 name = PyString_FromString(func_name);
258 if (name == NULL)
259 goto failed;
260 nulltuple = PyTuple_New(0);
261 if (nulltuple == NULL)
262 goto failed;
263 filename = PyString_FromString(__FILE__);
264 handler_info[slot].tb_code =
265 PyCode_New(0, /* argcount */
266 0, /* nlocals */
267 0, /* stacksize */
268 0, /* flags */
269 code, /* code */
270 nulltuple, /* consts */
271 nulltuple, /* names */
272 nulltuple, /* varnames */
Martin v. Löwis76192ee2001-02-06 09:34:40 +0000273#if PYTHON_API_VERSION >= 1010
Fred Drakebd6101c2001-02-14 18:29:45 +0000274 nulltuple, /* freevars */
275 nulltuple, /* cellvars */
Martin v. Löwis76192ee2001-02-06 09:34:40 +0000276#endif
Fred Drakebd6101c2001-02-14 18:29:45 +0000277 filename, /* filename */
278 name, /* name */
279 lineno, /* firstlineno */
280 code /* lnotab */
281 );
282 if (handler_info[slot].tb_code == NULL)
283 goto failed;
284 Py_DECREF(code);
285 Py_DECREF(nulltuple);
286 Py_DECREF(filename);
287 Py_DECREF(name);
288 }
289 return handler_info[slot].tb_code;
290 failed:
291 Py_XDECREF(code);
292 Py_XDECREF(name);
293 return NULL;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000294}
295
Martin v. Löwis069dde22003-01-21 10:58:18 +0000296#ifndef NOFIX_TRACE
Martin v. Löwis7d6e19d2002-08-04 08:24:49 +0000297static int
298trace_frame(PyThreadState *tstate, PyFrameObject *f, int code, PyObject *val)
299{
300 int result = 0;
301 if (!tstate->use_tracing || tstate->tracing)
302 return 0;
303 if (tstate->c_profilefunc != NULL) {
304 tstate->tracing++;
305 result = tstate->c_profilefunc(tstate->c_profileobj,
306 f, code , val);
307 tstate->use_tracing = ((tstate->c_tracefunc != NULL)
308 || (tstate->c_profilefunc != NULL));
309 tstate->tracing--;
310 if (result)
311 return result;
312 }
313 if (tstate->c_tracefunc != NULL) {
314 tstate->tracing++;
315 result = tstate->c_tracefunc(tstate->c_traceobj,
316 f, code , val);
317 tstate->use_tracing = ((tstate->c_tracefunc != NULL)
318 || (tstate->c_profilefunc != NULL));
319 tstate->tracing--;
320 }
321 return result;
322}
Martin v. Löwis069dde22003-01-21 10:58:18 +0000323#endif
Martin v. Löwis7d6e19d2002-08-04 08:24:49 +0000324
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000325static PyObject*
326call_with_frame(PyCodeObject *c, PyObject* func, PyObject* args)
327{
Fred Drakebd6101c2001-02-14 18:29:45 +0000328 PyThreadState *tstate = PyThreadState_GET();
329 PyFrameObject *f;
330 PyObject *res;
331
332 if (c == NULL)
333 return NULL;
Martin v. Löwis7d6e19d2002-08-04 08:24:49 +0000334
Fred Drakebd6101c2001-02-14 18:29:45 +0000335 f = PyFrame_New(
336 tstate, /*back*/
337 c, /*code*/
Michael W. Hudson019a78e2002-11-08 12:53:11 +0000338 PyEval_GetGlobals(), /*globals*/
Fred Drakebd6101c2001-02-14 18:29:45 +0000339 NULL /*locals*/
Fred Drakebd6101c2001-02-14 18:29:45 +0000340 );
341 if (f == NULL)
342 return NULL;
343 tstate->frame = f;
Martin v. Löwis069dde22003-01-21 10:58:18 +0000344#ifndef NOFIX_TRACE
Martin v. Löwis7d6e19d2002-08-04 08:24:49 +0000345 if (trace_frame(tstate, f, PyTrace_CALL, Py_None)) {
346 Py_DECREF(f);
347 return NULL;
348 }
Martin v. Löwis069dde22003-01-21 10:58:18 +0000349#endif
Fred Drakebd6101c2001-02-14 18:29:45 +0000350 res = PyEval_CallObject(func, args);
351 if (res == NULL && tstate->curexc_traceback == NULL)
352 PyTraceBack_Here(f);
Martin v. Löwis069dde22003-01-21 10:58:18 +0000353#ifndef NOFIX_TRACE
Martin v. Löwis7d6e19d2002-08-04 08:24:49 +0000354 else {
355 if (trace_frame(tstate, f, PyTrace_RETURN, res)) {
356 Py_XDECREF(res);
357 res = NULL;
358 }
359 }
Martin v. Löwis069dde22003-01-21 10:58:18 +0000360#endif
Fred Drakebd6101c2001-02-14 18:29:45 +0000361 tstate->frame = f->f_back;
362 Py_DECREF(f);
363 return res;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000364}
365
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000366#ifndef Py_USING_UNICODE
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000367#define STRING_CONV_FUNC conv_string_to_utf8
368#else
Martin v. Löwis069dde22003-01-21 10:58:18 +0000369/* Python 2.0 and later versions, when built with Unicode support */
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000370#define STRING_CONV_FUNC (self->returns_unicode \
371 ? conv_string_to_unicode : conv_string_to_utf8)
372#endif
Guido van Rossum5961f5a2000-03-31 16:18:11 +0000373
Fred Drakeb91a36b2002-06-27 19:40:48 +0000374static PyObject*
375string_intern(xmlparseobject *self, const char* str)
376{
377 PyObject *result = STRING_CONV_FUNC(str);
378 PyObject *value;
379 if (!self->intern)
380 return result;
381 value = PyDict_GetItem(self->intern, result);
382 if (!value) {
383 if (PyDict_SetItem(self->intern, result, result) == 0)
384 return result;
385 else
386 return NULL;
387 }
388 Py_INCREF(value);
389 Py_DECREF(result);
390 return value;
391}
392
Fred Drake2a3d7db2002-06-28 22:56:48 +0000393/* Return 0 on success, -1 on exception.
394 * flag_error() will be called before return if needed.
395 */
396static int
397call_character_handler(xmlparseobject *self, const XML_Char *buffer, int len)
398{
399 PyObject *args;
400 PyObject *temp;
401
402 args = PyTuple_New(1);
403 if (args == NULL)
404 return -1;
405#ifdef Py_USING_UNICODE
406 temp = (self->returns_unicode
407 ? conv_string_len_to_unicode(buffer, len)
408 : conv_string_len_to_utf8(buffer, len));
409#else
410 temp = conv_string_len_to_utf8(buffer, len);
411#endif
412 if (temp == NULL) {
413 Py_DECREF(args);
414 flag_error(self);
415 return -1;
416 }
417 PyTuple_SET_ITEM(args, 0, temp);
418 /* temp is now a borrowed reference; consider it unused. */
419 self->in_callback = 1;
420 temp = call_with_frame(getcode(CharacterData, "CharacterData", __LINE__),
421 self->handlers[CharacterData], args);
422 /* temp is an owned reference again, or NULL */
423 self->in_callback = 0;
424 Py_DECREF(args);
425 if (temp == NULL) {
426 flag_error(self);
427 return -1;
428 }
429 Py_DECREF(temp);
430 return 0;
431}
432
433static int
434flush_character_buffer(xmlparseobject *self)
435{
436 int rc;
437 if (self->buffer == NULL || self->buffer_used == 0)
438 return 0;
439 rc = call_character_handler(self, self->buffer, self->buffer_used);
440 self->buffer_used = 0;
441 return rc;
442}
443
444static void
445my_CharacterDataHandler(void *userData, const XML_Char *data, int len)
446{
447 xmlparseobject *self = (xmlparseobject *) userData;
448 if (self->buffer == NULL)
449 call_character_handler(self, data, len);
450 else {
451 if ((self->buffer_used + len) > self->buffer_size) {
452 if (flush_character_buffer(self) < 0)
453 return;
454 /* handler might have changed; drop the rest on the floor
455 * if there isn't a handler anymore
456 */
457 if (!have_handler(self, CharacterData))
458 return;
459 }
460 if (len > self->buffer_size) {
461 call_character_handler(self, data, len);
462 self->buffer_used = 0;
463 }
464 else {
465 memcpy(self->buffer + self->buffer_used,
466 data, len * sizeof(XML_Char));
467 self->buffer_used += len;
468 }
469 }
470}
471
Fred Drake85d835f2001-02-08 15:39:08 +0000472static void
473my_StartElementHandler(void *userData,
Fred Drake71b63ff2002-06-28 22:29:01 +0000474 const XML_Char *name, const XML_Char *atts[])
Fred Drake85d835f2001-02-08 15:39:08 +0000475{
476 xmlparseobject *self = (xmlparseobject *)userData;
477
Fred Drake71b63ff2002-06-28 22:29:01 +0000478 if (have_handler(self, StartElement)) {
Fred Drake85d835f2001-02-08 15:39:08 +0000479 PyObject *container, *rv, *args;
480 int i, max;
481
Fred Drake2a3d7db2002-06-28 22:56:48 +0000482 if (flush_character_buffer(self) < 0)
483 return;
Fred Drake85d835f2001-02-08 15:39:08 +0000484 /* Set max to the number of slots filled in atts[]; max/2 is
485 * the number of attributes we need to process.
486 */
487 if (self->specified_attributes) {
488 max = XML_GetSpecifiedAttributeCount(self->itself);
489 }
490 else {
491 max = 0;
492 while (atts[max] != NULL)
493 max += 2;
494 }
495 /* Build the container. */
496 if (self->ordered_attributes)
497 container = PyList_New(max);
498 else
499 container = PyDict_New();
500 if (container == NULL) {
501 flag_error(self);
502 return;
503 }
504 for (i = 0; i < max; i += 2) {
Fred Drakeb91a36b2002-06-27 19:40:48 +0000505 PyObject *n = string_intern(self, (XML_Char *) atts[i]);
Fred Drake85d835f2001-02-08 15:39:08 +0000506 PyObject *v;
507 if (n == NULL) {
508 flag_error(self);
509 Py_DECREF(container);
510 return;
511 }
512 v = STRING_CONV_FUNC((XML_Char *) atts[i+1]);
513 if (v == NULL) {
514 flag_error(self);
515 Py_DECREF(container);
516 Py_DECREF(n);
517 return;
518 }
519 if (self->ordered_attributes) {
520 PyList_SET_ITEM(container, i, n);
521 PyList_SET_ITEM(container, i+1, v);
522 }
523 else if (PyDict_SetItem(container, n, v)) {
524 flag_error(self);
525 Py_DECREF(n);
526 Py_DECREF(v);
527 return;
528 }
529 else {
530 Py_DECREF(n);
531 Py_DECREF(v);
532 }
533 }
Fred Drakeb91a36b2002-06-27 19:40:48 +0000534 args = Py_BuildValue("(NN)", string_intern(self, name), container);
Fred Drake85d835f2001-02-08 15:39:08 +0000535 if (args == NULL) {
536 Py_DECREF(container);
537 return;
538 }
539 /* Container is now a borrowed reference; ignore it. */
Fred Drakebd6101c2001-02-14 18:29:45 +0000540 self->in_callback = 1;
541 rv = call_with_frame(getcode(StartElement, "StartElement", __LINE__),
Fred Drake85d835f2001-02-08 15:39:08 +0000542 self->handlers[StartElement], args);
Fred Drakebd6101c2001-02-14 18:29:45 +0000543 self->in_callback = 0;
544 Py_DECREF(args);
Fred Drake85d835f2001-02-08 15:39:08 +0000545 if (rv == NULL) {
546 flag_error(self);
547 return;
Fred Drakebd6101c2001-02-14 18:29:45 +0000548 }
Fred Drake85d835f2001-02-08 15:39:08 +0000549 Py_DECREF(rv);
550 }
551}
552
553#define RC_HANDLER(RC, NAME, PARAMS, INIT, PARAM_FORMAT, CONVERSION, \
554 RETURN, GETUSERDATA) \
555static RC \
556my_##NAME##Handler PARAMS {\
557 xmlparseobject *self = GETUSERDATA ; \
558 PyObject *args = NULL; \
559 PyObject *rv = NULL; \
560 INIT \
561\
Fred Drake71b63ff2002-06-28 22:29:01 +0000562 if (have_handler(self, NAME)) { \
Fred Drake2a3d7db2002-06-28 22:56:48 +0000563 if (flush_character_buffer(self) < 0) \
564 return RETURN; \
Fred Drake85d835f2001-02-08 15:39:08 +0000565 args = Py_BuildValue PARAM_FORMAT ;\
Martin v. Löwis1d7c55f2001-11-10 13:57:55 +0000566 if (!args) { flag_error(self); return RETURN;} \
Fred Drakebd6101c2001-02-14 18:29:45 +0000567 self->in_callback = 1; \
Fred Drake85d835f2001-02-08 15:39:08 +0000568 rv = call_with_frame(getcode(NAME,#NAME,__LINE__), \
569 self->handlers[NAME], args); \
Fred Drakebd6101c2001-02-14 18:29:45 +0000570 self->in_callback = 0; \
Fred Drake85d835f2001-02-08 15:39:08 +0000571 Py_DECREF(args); \
572 if (rv == NULL) { \
573 flag_error(self); \
574 return RETURN; \
575 } \
576 CONVERSION \
577 Py_DECREF(rv); \
578 } \
579 return RETURN; \
580}
581
Fred Drake6f987622000-08-25 18:03:30 +0000582#define VOID_HANDLER(NAME, PARAMS, PARAM_FORMAT) \
583 RC_HANDLER(void, NAME, PARAMS, ;, PARAM_FORMAT, ;, ;,\
584 (xmlparseobject *)userData)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000585
Fred Drake6f987622000-08-25 18:03:30 +0000586#define INT_HANDLER(NAME, PARAMS, PARAM_FORMAT)\
587 RC_HANDLER(int, NAME, PARAMS, int rc=0;, PARAM_FORMAT, \
588 rc = PyInt_AsLong(rv);, rc, \
589 (xmlparseobject *)userData)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000590
Fred Drake71b63ff2002-06-28 22:29:01 +0000591VOID_HANDLER(EndElement,
592 (void *userData, const XML_Char *name),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000593 ("(N)", string_intern(self, name)))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000594
Fred Drake6f987622000-08-25 18:03:30 +0000595VOID_HANDLER(ProcessingInstruction,
Fred Drake71b63ff2002-06-28 22:29:01 +0000596 (void *userData,
597 const XML_Char *target,
Fred Drake85d835f2001-02-08 15:39:08 +0000598 const XML_Char *data),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000599 ("(NO&)", string_intern(self, target), STRING_CONV_FUNC,data))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000600
Fred Drake6f987622000-08-25 18:03:30 +0000601VOID_HANDLER(UnparsedEntityDecl,
Fred Drake71b63ff2002-06-28 22:29:01 +0000602 (void *userData,
Fred Drake85d835f2001-02-08 15:39:08 +0000603 const XML_Char *entityName,
604 const XML_Char *base,
605 const XML_Char *systemId,
606 const XML_Char *publicId,
607 const XML_Char *notationName),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000608 ("(NNNNN)",
Fred Drake71b63ff2002-06-28 22:29:01 +0000609 string_intern(self, entityName), string_intern(self, base),
610 string_intern(self, systemId), string_intern(self, publicId),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000611 string_intern(self, notationName)))
Fred Drake85d835f2001-02-08 15:39:08 +0000612
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000613#ifndef Py_USING_UNICODE
Fred Drake85d835f2001-02-08 15:39:08 +0000614VOID_HANDLER(EntityDecl,
615 (void *userData,
616 const XML_Char *entityName,
617 int is_parameter_entity,
618 const XML_Char *value,
619 int value_length,
620 const XML_Char *base,
621 const XML_Char *systemId,
622 const XML_Char *publicId,
623 const XML_Char *notationName),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000624 ("NiNNNNN",
625 string_intern(self, entityName), is_parameter_entity,
Fred Drake85d835f2001-02-08 15:39:08 +0000626 conv_string_len_to_utf8(value, value_length),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000627 string_intern(self, base), string_intern(self, systemId),
628 string_intern(self, publicId),
629 string_intern(self, notationName)))
Fred Drake85d835f2001-02-08 15:39:08 +0000630#else
631VOID_HANDLER(EntityDecl,
632 (void *userData,
633 const XML_Char *entityName,
634 int is_parameter_entity,
635 const XML_Char *value,
636 int value_length,
637 const XML_Char *base,
638 const XML_Char *systemId,
639 const XML_Char *publicId,
640 const XML_Char *notationName),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000641 ("NiNNNNN",
642 string_intern(self, entityName), is_parameter_entity,
Fred Drake71b63ff2002-06-28 22:29:01 +0000643 (self->returns_unicode
644 ? conv_string_len_to_unicode(value, value_length)
Fred Drake85d835f2001-02-08 15:39:08 +0000645 : conv_string_len_to_utf8(value, value_length)),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000646 string_intern(self, base), string_intern(self, systemId),
647 string_intern(self, publicId),
648 string_intern(self, notationName)))
Fred Drake85d835f2001-02-08 15:39:08 +0000649#endif
650
651VOID_HANDLER(XmlDecl,
652 (void *userData,
653 const XML_Char *version,
654 const XML_Char *encoding,
655 int standalone),
656 ("(O&O&i)",
Fred Drake71b63ff2002-06-28 22:29:01 +0000657 STRING_CONV_FUNC,version, STRING_CONV_FUNC,encoding,
Fred Drake85d835f2001-02-08 15:39:08 +0000658 standalone))
659
660static PyObject *
661conv_content_model(XML_Content * const model,
Fred Drakeb91a36b2002-06-27 19:40:48 +0000662 PyObject *(*conv_string)(const XML_Char *))
Fred Drake85d835f2001-02-08 15:39:08 +0000663{
664 PyObject *result = NULL;
665 PyObject *children = PyTuple_New(model->numchildren);
666 int i;
667
668 if (children != NULL) {
Tim Peters9544fc52001-07-28 09:36:36 +0000669 assert(model->numchildren < INT_MAX);
670 for (i = 0; i < (int)model->numchildren; ++i) {
Fred Drake85d835f2001-02-08 15:39:08 +0000671 PyObject *child = conv_content_model(&model->children[i],
672 conv_string);
673 if (child == NULL) {
674 Py_XDECREF(children);
675 return NULL;
676 }
677 PyTuple_SET_ITEM(children, i, child);
678 }
679 result = Py_BuildValue("(iiO&N)",
680 model->type, model->quant,
681 conv_string,model->name, children);
682 }
683 return result;
684}
685
Fred Drake06dd8cf2003-02-02 03:54:17 +0000686static void
687my_ElementDeclHandler(void *userData,
688 const XML_Char *name,
689 XML_Content *model)
Fred Drake85d835f2001-02-08 15:39:08 +0000690{
Fred Drake06dd8cf2003-02-02 03:54:17 +0000691 xmlparseobject *self = (xmlparseobject *)userData;
692 PyObject *args = NULL;
Fred Drake85d835f2001-02-08 15:39:08 +0000693
Fred Drake06dd8cf2003-02-02 03:54:17 +0000694 if (have_handler(self, ElementDecl)) {
695 PyObject *rv = NULL;
696 PyObject *modelobj, *nameobj;
697
698 if (flush_character_buffer(self) < 0)
699 goto finally;
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000700#ifdef Py_USING_UNICODE
Fred Drake06dd8cf2003-02-02 03:54:17 +0000701 modelobj = conv_content_model(model,
702 (self->returns_unicode
703 ? conv_string_to_unicode
704 : conv_string_to_utf8));
Fred Drake85d835f2001-02-08 15:39:08 +0000705#else
Fred Drake06dd8cf2003-02-02 03:54:17 +0000706 modelobj = conv_content_model(model, conv_string_to_utf8);
Fred Drake85d835f2001-02-08 15:39:08 +0000707#endif
Fred Drake06dd8cf2003-02-02 03:54:17 +0000708 if (modelobj == NULL) {
709 flag_error(self);
710 goto finally;
711 }
712 nameobj = string_intern(self, name);
713 if (nameobj == NULL) {
714 Py_DECREF(modelobj);
715 flag_error(self);
716 goto finally;
717 }
718 args = Py_BuildValue("NN", string_intern(self, name), modelobj);
719 if (args == NULL) {
720 Py_DECREF(modelobj);
721 flag_error(self);
722 goto finally;
723 }
724 self->in_callback = 1;
725 rv = call_with_frame(getcode(ElementDecl, "ElementDecl", __LINE__),
726 self->handlers[ElementDecl], args);
727 self->in_callback = 0;
728 if (rv == NULL) {
729 flag_error(self);
730 goto finally;
731 }
732 Py_DECREF(rv);
733 }
734 finally:
735 Py_XDECREF(args);
736 XML_FreeContentModel(self->itself, model);
737 return;
738}
Fred Drake85d835f2001-02-08 15:39:08 +0000739
740VOID_HANDLER(AttlistDecl,
741 (void *userData,
742 const XML_Char *elname,
743 const XML_Char *attname,
744 const XML_Char *att_type,
745 const XML_Char *dflt,
746 int isrequired),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000747 ("(NNO&O&i)",
748 string_intern(self, elname), string_intern(self, attname),
Fred Drake85d835f2001-02-08 15:39:08 +0000749 STRING_CONV_FUNC,att_type, STRING_CONV_FUNC,dflt,
750 isrequired))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000751
Martin v. Löwisc847f402003-01-21 11:09:21 +0000752#if XML_COMBINED_VERSION >= 19504
Martin v. Löwis069dde22003-01-21 10:58:18 +0000753VOID_HANDLER(SkippedEntity,
754 (void *userData,
755 const XML_Char *entityName,
756 int is_parameter_entity),
757 ("Ni",
758 string_intern(self, entityName), is_parameter_entity))
Martin v. Löwisc847f402003-01-21 11:09:21 +0000759#endif
Martin v. Löwis069dde22003-01-21 10:58:18 +0000760
Fred Drake71b63ff2002-06-28 22:29:01 +0000761VOID_HANDLER(NotationDecl,
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000762 (void *userData,
763 const XML_Char *notationName,
764 const XML_Char *base,
765 const XML_Char *systemId,
766 const XML_Char *publicId),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000767 ("(NNNN)",
Fred Drake71b63ff2002-06-28 22:29:01 +0000768 string_intern(self, notationName), string_intern(self, base),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000769 string_intern(self, systemId), string_intern(self, publicId)))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000770
Fred Drake6f987622000-08-25 18:03:30 +0000771VOID_HANDLER(StartNamespaceDecl,
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000772 (void *userData,
773 const XML_Char *prefix,
774 const XML_Char *uri),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000775 ("(NN)",
776 string_intern(self, prefix), string_intern(self, uri)))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000777
Fred Drake6f987622000-08-25 18:03:30 +0000778VOID_HANDLER(EndNamespaceDecl,
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000779 (void *userData,
780 const XML_Char *prefix),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000781 ("(N)", string_intern(self, prefix)))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000782
Fred Drake6f987622000-08-25 18:03:30 +0000783VOID_HANDLER(Comment,
Fred Drakeb91a36b2002-06-27 19:40:48 +0000784 (void *userData, const XML_Char *data),
785 ("(O&)", STRING_CONV_FUNC,data))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000786
Fred Drake6f987622000-08-25 18:03:30 +0000787VOID_HANDLER(StartCdataSection,
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000788 (void *userData),
Fred Drake6f987622000-08-25 18:03:30 +0000789 ("()"))
Fred Drake71b63ff2002-06-28 22:29:01 +0000790
Fred Drake6f987622000-08-25 18:03:30 +0000791VOID_HANDLER(EndCdataSection,
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000792 (void *userData),
Fred Drake6f987622000-08-25 18:03:30 +0000793 ("()"))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000794
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000795#ifndef Py_USING_UNICODE
Fred Drake6f987622000-08-25 18:03:30 +0000796VOID_HANDLER(Default,
Fred Drake71b63ff2002-06-28 22:29:01 +0000797 (void *userData, const XML_Char *s, int len),
Fred Drakeca1f4262000-09-21 20:10:23 +0000798 ("(N)", conv_string_len_to_utf8(s,len)))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000799
Fred Drake6f987622000-08-25 18:03:30 +0000800VOID_HANDLER(DefaultHandlerExpand,
Fred Drake71b63ff2002-06-28 22:29:01 +0000801 (void *userData, const XML_Char *s, int len),
Fred Drakeca1f4262000-09-21 20:10:23 +0000802 ("(N)", conv_string_len_to_utf8(s,len)))
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000803#else
Fred Drake6f987622000-08-25 18:03:30 +0000804VOID_HANDLER(Default,
Fred Drake71b63ff2002-06-28 22:29:01 +0000805 (void *userData, const XML_Char *s, int len),
806 ("(N)", (self->returns_unicode
807 ? conv_string_len_to_unicode(s,len)
Fred Drake6f987622000-08-25 18:03:30 +0000808 : conv_string_len_to_utf8(s,len))))
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000809
Fred Drake6f987622000-08-25 18:03:30 +0000810VOID_HANDLER(DefaultHandlerExpand,
Fred Drake71b63ff2002-06-28 22:29:01 +0000811 (void *userData, const XML_Char *s, int len),
812 ("(N)", (self->returns_unicode
813 ? conv_string_len_to_unicode(s,len)
Fred Drake6f987622000-08-25 18:03:30 +0000814 : conv_string_len_to_utf8(s,len))))
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000815#endif
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000816
Fred Drake71b63ff2002-06-28 22:29:01 +0000817INT_HANDLER(NotStandalone,
818 (void *userData),
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000819 ("()"))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000820
Fred Drake6f987622000-08-25 18:03:30 +0000821RC_HANDLER(int, ExternalEntityRef,
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000822 (XML_Parser parser,
823 const XML_Char *context,
824 const XML_Char *base,
825 const XML_Char *systemId,
826 const XML_Char *publicId),
827 int rc=0;,
Fred Drakeb91a36b2002-06-27 19:40:48 +0000828 ("(O&NNN)",
Fred Drake71b63ff2002-06-28 22:29:01 +0000829 STRING_CONV_FUNC,context, string_intern(self, base),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000830 string_intern(self, systemId), string_intern(self, publicId)),
Fred Drake6f987622000-08-25 18:03:30 +0000831 rc = PyInt_AsLong(rv);, rc,
832 XML_GetUserData(parser))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000833
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000834/* XXX UnknownEncodingHandler */
835
Fred Drake85d835f2001-02-08 15:39:08 +0000836VOID_HANDLER(StartDoctypeDecl,
837 (void *userData, const XML_Char *doctypeName,
838 const XML_Char *sysid, const XML_Char *pubid,
839 int has_internal_subset),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000840 ("(NNNi)", string_intern(self, doctypeName),
841 string_intern(self, sysid), string_intern(self, pubid),
Fred Drake85d835f2001-02-08 15:39:08 +0000842 has_internal_subset))
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000843
844VOID_HANDLER(EndDoctypeDecl, (void *userData), ("()"))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000845
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000846/* ---------------------------------------------------------------- */
847
Fred Drake71b63ff2002-06-28 22:29:01 +0000848static PyObject *
849get_parse_result(xmlparseobject *self, int rv)
850{
851 if (PyErr_Occurred()) {
852 return NULL;
853 }
854 if (rv == 0) {
Martin v. Löwis069dde22003-01-21 10:58:18 +0000855 return set_error(self, XML_GetErrorCode(self->itself));
Fred Drake71b63ff2002-06-28 22:29:01 +0000856 }
Fred Drake2a3d7db2002-06-28 22:56:48 +0000857 if (flush_character_buffer(self) < 0) {
858 return NULL;
859 }
Fred Drake71b63ff2002-06-28 22:29:01 +0000860 return PyInt_FromLong(rv);
861}
862
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000863PyDoc_STRVAR(xmlparse_Parse__doc__,
Thomas Wouters35317302000-07-22 16:34:15 +0000864"Parse(data[, isfinal])\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000865Parse XML data. `isfinal' should be true at end of input.");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000866
867static PyObject *
Fred Drake0582df92000-07-12 04:49:00 +0000868xmlparse_Parse(xmlparseobject *self, PyObject *args)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000869{
Fred Drake0582df92000-07-12 04:49:00 +0000870 char *s;
871 int slen;
872 int isFinal = 0;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000873
Fred Drake0582df92000-07-12 04:49:00 +0000874 if (!PyArg_ParseTuple(args, "s#|i:Parse", &s, &slen, &isFinal))
875 return NULL;
Fred Drake71b63ff2002-06-28 22:29:01 +0000876
877 return get_parse_result(self, XML_Parse(self->itself, s, slen, isFinal));
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000878}
879
Fred Drakeca1f4262000-09-21 20:10:23 +0000880/* File reading copied from cPickle */
881
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000882#define BUF_SIZE 2048
883
Fred Drake0582df92000-07-12 04:49:00 +0000884static int
885readinst(char *buf, int buf_size, PyObject *meth)
886{
887 PyObject *arg = NULL;
888 PyObject *bytes = NULL;
889 PyObject *str = NULL;
890 int len = -1;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000891
Fred Drake676940b2000-09-22 15:21:31 +0000892 if ((bytes = PyInt_FromLong(buf_size)) == NULL)
Fred Drake0582df92000-07-12 04:49:00 +0000893 goto finally;
Fred Drake676940b2000-09-22 15:21:31 +0000894
Fred Drakeca1f4262000-09-21 20:10:23 +0000895 if ((arg = PyTuple_New(1)) == NULL)
Fred Drake0582df92000-07-12 04:49:00 +0000896 goto finally;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000897
Tim Peters954eef72000-09-22 06:01:11 +0000898 PyTuple_SET_ITEM(arg, 0, bytes);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000899
Guido van Rossum84b2bed2002-08-16 17:01:09 +0000900 if ((str = PyObject_Call(meth, arg, NULL)) == NULL)
Fred Drake0582df92000-07-12 04:49:00 +0000901 goto finally;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000902
Fred Drake0582df92000-07-12 04:49:00 +0000903 /* XXX what to do if it returns a Unicode string? */
Fred Drakeca1f4262000-09-21 20:10:23 +0000904 if (!PyString_Check(str)) {
Fred Drake71b63ff2002-06-28 22:29:01 +0000905 PyErr_Format(PyExc_TypeError,
Fred Drake0582df92000-07-12 04:49:00 +0000906 "read() did not return a string object (type=%.400s)",
907 str->ob_type->tp_name);
908 goto finally;
909 }
910 len = PyString_GET_SIZE(str);
911 if (len > buf_size) {
912 PyErr_Format(PyExc_ValueError,
913 "read() returned too much data: "
914 "%i bytes requested, %i returned",
915 buf_size, len);
916 Py_DECREF(str);
917 goto finally;
918 }
919 memcpy(buf, PyString_AsString(str), len);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000920finally:
Fred Drake0582df92000-07-12 04:49:00 +0000921 Py_XDECREF(arg);
Fred Drakeca1f4262000-09-21 20:10:23 +0000922 Py_XDECREF(str);
Fred Drake0582df92000-07-12 04:49:00 +0000923 return len;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000924}
925
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000926PyDoc_STRVAR(xmlparse_ParseFile__doc__,
Thomas Wouters35317302000-07-22 16:34:15 +0000927"ParseFile(file)\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000928Parse XML data from file-like object.");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000929
930static PyObject *
Fred Drake0582df92000-07-12 04:49:00 +0000931xmlparse_ParseFile(xmlparseobject *self, PyObject *args)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000932{
Fred Drake0582df92000-07-12 04:49:00 +0000933 int rv = 1;
934 PyObject *f;
935 FILE *fp;
936 PyObject *readmethod = NULL;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000937
Fred Drake0582df92000-07-12 04:49:00 +0000938 if (!PyArg_ParseTuple(args, "O:ParseFile", &f))
939 return NULL;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000940
Fred Drake0582df92000-07-12 04:49:00 +0000941 if (PyFile_Check(f)) {
942 fp = PyFile_AsFile(f);
943 }
944 else{
945 fp = NULL;
Fred Drakeca1f4262000-09-21 20:10:23 +0000946 readmethod = PyObject_GetAttrString(f, "read");
947 if (readmethod == NULL) {
Fred Drake0582df92000-07-12 04:49:00 +0000948 PyErr_Clear();
Fred Drake71b63ff2002-06-28 22:29:01 +0000949 PyErr_SetString(PyExc_TypeError,
Fred Drake0582df92000-07-12 04:49:00 +0000950 "argument must have 'read' attribute");
Fred Drake814f9fe2002-07-19 22:03:03 +0000951 return NULL;
Fred Drake0582df92000-07-12 04:49:00 +0000952 }
953 }
954 for (;;) {
955 int bytes_read;
956 void *buf = XML_GetBuffer(self->itself, BUF_SIZE);
957 if (buf == NULL)
958 return PyErr_NoMemory();
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000959
Fred Drake0582df92000-07-12 04:49:00 +0000960 if (fp) {
961 bytes_read = fread(buf, sizeof(char), BUF_SIZE, fp);
962 if (bytes_read < 0) {
963 PyErr_SetFromErrno(PyExc_IOError);
964 return NULL;
965 }
966 }
967 else {
968 bytes_read = readinst(buf, BUF_SIZE, readmethod);
969 if (bytes_read < 0)
970 return NULL;
971 }
972 rv = XML_ParseBuffer(self->itself, bytes_read, bytes_read == 0);
973 if (PyErr_Occurred())
974 return NULL;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000975
Fred Drake0582df92000-07-12 04:49:00 +0000976 if (!rv || bytes_read == 0)
977 break;
978 }
Fred Drake71b63ff2002-06-28 22:29:01 +0000979 return get_parse_result(self, rv);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000980}
981
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000982PyDoc_STRVAR(xmlparse_SetBase__doc__,
Thomas Wouters35317302000-07-22 16:34:15 +0000983"SetBase(base_url)\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000984Set the base URL for the parser.");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000985
986static PyObject *
Fred Drake0582df92000-07-12 04:49:00 +0000987xmlparse_SetBase(xmlparseobject *self, PyObject *args)
988{
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000989 char *base;
990
Fred Drake0582df92000-07-12 04:49:00 +0000991 if (!PyArg_ParseTuple(args, "s:SetBase", &base))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000992 return NULL;
Fred Drake0582df92000-07-12 04:49:00 +0000993 if (!XML_SetBase(self->itself, base)) {
994 return PyErr_NoMemory();
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000995 }
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000996 Py_INCREF(Py_None);
997 return Py_None;
998}
999
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001000PyDoc_STRVAR(xmlparse_GetBase__doc__,
Thomas Wouters35317302000-07-22 16:34:15 +00001001"GetBase() -> url\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001002Return base URL string for the parser.");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001003
1004static PyObject *
Fred Drake0582df92000-07-12 04:49:00 +00001005xmlparse_GetBase(xmlparseobject *self, PyObject *args)
1006{
1007 if (!PyArg_ParseTuple(args, ":GetBase"))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001008 return NULL;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001009
Fred Drake0582df92000-07-12 04:49:00 +00001010 return Py_BuildValue("z", XML_GetBase(self->itself));
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001011}
1012
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001013PyDoc_STRVAR(xmlparse_GetInputContext__doc__,
Fred Drakebd6101c2001-02-14 18:29:45 +00001014"GetInputContext() -> string\n\
1015Return the untranslated text of the input that caused the current event.\n\
1016If the event was generated by a large amount of text (such as a start tag\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001017for an element with many attributes), not all of the text may be available.");
Fred Drakebd6101c2001-02-14 18:29:45 +00001018
1019static PyObject *
1020xmlparse_GetInputContext(xmlparseobject *self, PyObject *args)
1021{
1022 PyObject *result = NULL;
1023
1024 if (PyArg_ParseTuple(args, ":GetInputContext")) {
1025 if (self->in_callback) {
1026 int offset, size;
1027 const char *buffer
1028 = XML_GetInputContext(self->itself, &offset, &size);
1029
1030 if (buffer != NULL)
1031 result = PyString_FromStringAndSize(buffer + offset, size);
1032 else {
1033 result = Py_None;
1034 Py_INCREF(result);
1035 }
1036 }
1037 else {
1038 result = Py_None;
1039 Py_INCREF(result);
1040 }
1041 }
1042 return result;
1043}
Fred Drakebd6101c2001-02-14 18:29:45 +00001044
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001045PyDoc_STRVAR(xmlparse_ExternalEntityParserCreate__doc__,
Fred Drake2d4ac202001-01-03 15:36:25 +00001046"ExternalEntityParserCreate(context[, encoding])\n\
Tim Peters51dc9682000-09-24 22:12:45 +00001047Create a parser for parsing an external entity based on the\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001048information passed to the ExternalEntityRefHandler.");
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001049
1050static PyObject *
1051xmlparse_ExternalEntityParserCreate(xmlparseobject *self, PyObject *args)
1052{
1053 char *context;
1054 char *encoding = NULL;
1055 xmlparseobject *new_parser;
1056 int i;
1057
Martin v. Löwisc57428d2001-09-19 09:55:09 +00001058 if (!PyArg_ParseTuple(args, "z|s:ExternalEntityParserCreate",
Fred Drakecde79132001-04-25 16:01:30 +00001059 &context, &encoding)) {
1060 return NULL;
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001061 }
1062
Martin v. Löwis894258c2001-09-23 10:20:10 +00001063#ifndef Py_TPFLAGS_HAVE_GC
Martin v. Löwisb4fcf4d2002-06-30 06:40:55 +00001064 /* Python versions 2.0 and 2.1 */
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001065 new_parser = PyObject_New(xmlparseobject, &Xmlparsetype);
Martin v. Löwis894258c2001-09-23 10:20:10 +00001066#else
1067 /* Python versions 2.2 and later */
1068 new_parser = PyObject_GC_New(xmlparseobject, &Xmlparsetype);
1069#endif
Fred Drake85d835f2001-02-08 15:39:08 +00001070
1071 if (new_parser == NULL)
1072 return NULL;
Fred Drake2a3d7db2002-06-28 22:56:48 +00001073 new_parser->buffer_size = self->buffer_size;
1074 new_parser->buffer_used = 0;
1075 if (self->buffer != NULL) {
1076 new_parser->buffer = malloc(new_parser->buffer_size);
1077 if (new_parser->buffer == NULL) {
Fred Drakeb28467b2002-07-02 15:44:36 +00001078#ifndef Py_TPFLAGS_HAVE_GC
1079 /* Code for versions 2.0 and 2.1 */
1080 PyObject_Del(new_parser);
1081#else
1082 /* Code for versions 2.2 and later. */
Fred Drake2a3d7db2002-06-28 22:56:48 +00001083 PyObject_GC_Del(new_parser);
Fred Drakeb28467b2002-07-02 15:44:36 +00001084#endif
Fred Drake2a3d7db2002-06-28 22:56:48 +00001085 return PyErr_NoMemory();
1086 }
1087 }
1088 else
1089 new_parser->buffer = NULL;
Fred Drake85d835f2001-02-08 15:39:08 +00001090 new_parser->returns_unicode = self->returns_unicode;
1091 new_parser->ordered_attributes = self->ordered_attributes;
1092 new_parser->specified_attributes = self->specified_attributes;
Fred Drakebd6101c2001-02-14 18:29:45 +00001093 new_parser->in_callback = 0;
Martin v. Löwis069dde22003-01-21 10:58:18 +00001094 new_parser->ns_prefixes = self->ns_prefixes;
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001095 new_parser->itself = XML_ExternalEntityParserCreate(self->itself, context,
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001096 encoding);
1097 new_parser->handlers = 0;
Fred Drakeb91a36b2002-06-27 19:40:48 +00001098 new_parser->intern = self->intern;
1099 Py_XINCREF(new_parser->intern);
Martin v. Löwis894258c2001-09-23 10:20:10 +00001100#ifdef Py_TPFLAGS_HAVE_GC
1101 PyObject_GC_Track(new_parser);
1102#else
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001103 PyObject_GC_Init(new_parser);
Martin v. Löwis894258c2001-09-23 10:20:10 +00001104#endif
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001105
1106 if (!new_parser->itself) {
Fred Drake85d835f2001-02-08 15:39:08 +00001107 Py_DECREF(new_parser);
1108 return PyErr_NoMemory();
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001109 }
1110
1111 XML_SetUserData(new_parser->itself, (void *)new_parser);
1112
1113 /* allocate and clear handlers first */
Fred Drake2a3d7db2002-06-28 22:56:48 +00001114 for (i = 0; handler_info[i].name != NULL; i++)
Fred Drake85d835f2001-02-08 15:39:08 +00001115 /* do nothing */;
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001116
Fred Drake2a3d7db2002-06-28 22:56:48 +00001117 new_parser->handlers = malloc(sizeof(PyObject *) * i);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001118 if (!new_parser->handlers) {
Fred Drake85d835f2001-02-08 15:39:08 +00001119 Py_DECREF(new_parser);
1120 return PyErr_NoMemory();
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001121 }
Martin v. Löwis5b68ce32001-10-21 08:53:52 +00001122 clear_handlers(new_parser, 1);
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001123
1124 /* then copy handlers from self */
1125 for (i = 0; handler_info[i].name != NULL; i++) {
Fred Drake71b63ff2002-06-28 22:29:01 +00001126 PyObject *handler = self->handlers[i];
1127 if (handler != NULL) {
1128 Py_INCREF(handler);
1129 new_parser->handlers[i] = handler;
1130 handler_info[i].setter(new_parser->itself,
Fred Drake85d835f2001-02-08 15:39:08 +00001131 handler_info[i].handler);
1132 }
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001133 }
Fred Drake71b63ff2002-06-28 22:29:01 +00001134 return (PyObject *)new_parser;
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001135}
1136
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001137PyDoc_STRVAR(xmlparse_SetParamEntityParsing__doc__,
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001138"SetParamEntityParsing(flag) -> success\n\
1139Controls parsing of parameter entities (including the external DTD\n\
1140subset). Possible flag values are XML_PARAM_ENTITY_PARSING_NEVER,\n\
1141XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE and\n\
1142XML_PARAM_ENTITY_PARSING_ALWAYS. Returns true if setting the flag\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001143was successful.");
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001144
1145static PyObject*
Fred Drakebd6101c2001-02-14 18:29:45 +00001146xmlparse_SetParamEntityParsing(xmlparseobject *p, PyObject* args)
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001147{
Fred Drake85d835f2001-02-08 15:39:08 +00001148 int flag;
1149 if (!PyArg_ParseTuple(args, "i", &flag))
1150 return NULL;
Fred Drakebd6101c2001-02-14 18:29:45 +00001151 flag = XML_SetParamEntityParsing(p->itself, flag);
Fred Drake85d835f2001-02-08 15:39:08 +00001152 return PyInt_FromLong(flag);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001153}
1154
Martin v. Löwisc847f402003-01-21 11:09:21 +00001155
1156#if XML_COMBINED_VERSION >= 19505
Martin v. Löwis069dde22003-01-21 10:58:18 +00001157PyDoc_STRVAR(xmlparse_UseForeignDTD__doc__,
1158"UseForeignDTD([flag])\n\
1159Allows the application to provide an artificial external subset if one is\n\
1160not specified as part of the document instance. This readily allows the\n\
1161use of a 'default' document type controlled by the application, while still\n\
1162getting the advantage of providing document type information to the parser.\n\
1163'flag' defaults to True if not provided.");
1164
1165static PyObject *
1166xmlparse_UseForeignDTD(xmlparseobject *self, PyObject *args)
1167{
1168 PyObject *flagobj = NULL;
1169 XML_Bool flag = XML_TRUE;
1170 enum XML_Error rc;
1171 if (!PyArg_ParseTuple(args, "|O:UseForeignDTD", &flagobj))
1172 return NULL;
1173 if (flagobj != NULL)
1174 flag = PyObject_IsTrue(flagobj) ? XML_TRUE : XML_FALSE;
1175 rc = XML_UseForeignDTD(self->itself, flag);
1176 if (rc != XML_ERROR_NONE) {
1177 return set_error(self, rc);
1178 }
1179 Py_INCREF(Py_None);
1180 return Py_None;
1181}
Martin v. Löwisc847f402003-01-21 11:09:21 +00001182#endif
Martin v. Löwis069dde22003-01-21 10:58:18 +00001183
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001184static struct PyMethodDef xmlparse_methods[] = {
Fred Drake0582df92000-07-12 04:49:00 +00001185 {"Parse", (PyCFunction)xmlparse_Parse,
Fred Drakebd6101c2001-02-14 18:29:45 +00001186 METH_VARARGS, xmlparse_Parse__doc__},
Fred Drake0582df92000-07-12 04:49:00 +00001187 {"ParseFile", (PyCFunction)xmlparse_ParseFile,
Fred Drakebd6101c2001-02-14 18:29:45 +00001188 METH_VARARGS, xmlparse_ParseFile__doc__},
Fred Drake0582df92000-07-12 04:49:00 +00001189 {"SetBase", (PyCFunction)xmlparse_SetBase,
Martin v. Löwis069dde22003-01-21 10:58:18 +00001190 METH_VARARGS, xmlparse_SetBase__doc__},
Fred Drake0582df92000-07-12 04:49:00 +00001191 {"GetBase", (PyCFunction)xmlparse_GetBase,
Martin v. Löwis069dde22003-01-21 10:58:18 +00001192 METH_VARARGS, xmlparse_GetBase__doc__},
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001193 {"ExternalEntityParserCreate", (PyCFunction)xmlparse_ExternalEntityParserCreate,
Martin v. Löwis069dde22003-01-21 10:58:18 +00001194 METH_VARARGS, xmlparse_ExternalEntityParserCreate__doc__},
Fred Drakebd6101c2001-02-14 18:29:45 +00001195 {"SetParamEntityParsing", (PyCFunction)xmlparse_SetParamEntityParsing,
1196 METH_VARARGS, xmlparse_SetParamEntityParsing__doc__},
Fred Drakebd6101c2001-02-14 18:29:45 +00001197 {"GetInputContext", (PyCFunction)xmlparse_GetInputContext,
1198 METH_VARARGS, xmlparse_GetInputContext__doc__},
Martin v. Löwisc847f402003-01-21 11:09:21 +00001199#if XML_COMBINED_VERSION >= 19505
Martin v. Löwis069dde22003-01-21 10:58:18 +00001200 {"UseForeignDTD", (PyCFunction)xmlparse_UseForeignDTD,
1201 METH_VARARGS, xmlparse_UseForeignDTD__doc__},
Martin v. Löwisc847f402003-01-21 11:09:21 +00001202#endif
Martin v. Löwis069dde22003-01-21 10:58:18 +00001203 {NULL, NULL} /* sentinel */
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001204};
1205
1206/* ---------- */
1207
1208
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001209#ifdef Py_USING_UNICODE
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001210
Fred Drake71b63ff2002-06-28 22:29:01 +00001211/* pyexpat international encoding support.
1212 Make it as simple as possible.
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001213*/
1214
Martin v. Löwis3af7cc02001-01-22 08:19:10 +00001215static char template_buffer[257];
Fred Drakebb66a202001-03-01 20:48:17 +00001216PyObject *template_string = NULL;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001217
Fred Drake71b63ff2002-06-28 22:29:01 +00001218static void
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001219init_template_buffer(void)
1220{
1221 int i;
Fred Drakebb66a202001-03-01 20:48:17 +00001222 for (i = 0; i < 256; i++) {
1223 template_buffer[i] = i;
Tim Peters63cb99e2001-02-17 18:12:50 +00001224 }
Fred Drakebb66a202001-03-01 20:48:17 +00001225 template_buffer[256] = 0;
Tim Peters63cb99e2001-02-17 18:12:50 +00001226}
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001227
Fred Drake71b63ff2002-06-28 22:29:01 +00001228static int
1229PyUnknownEncodingHandler(void *encodingHandlerData,
1230 const XML_Char *name,
1231 XML_Encoding *info)
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001232{
Fred Drakebb66a202001-03-01 20:48:17 +00001233 PyUnicodeObject *_u_string = NULL;
1234 int result = 0;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001235 int i;
Fred Drake71b63ff2002-06-28 22:29:01 +00001236
Fred Drakebb66a202001-03-01 20:48:17 +00001237 /* Yes, supports only 8bit encodings */
1238 _u_string = (PyUnicodeObject *)
1239 PyUnicode_Decode(template_buffer, 256, name, "replace");
Fred Drake71b63ff2002-06-28 22:29:01 +00001240
Fred Drakebb66a202001-03-01 20:48:17 +00001241 if (_u_string == NULL)
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001242 return result;
Fred Drake71b63ff2002-06-28 22:29:01 +00001243
Fred Drakebb66a202001-03-01 20:48:17 +00001244 for (i = 0; i < 256; i++) {
1245 /* Stupid to access directly, but fast */
1246 Py_UNICODE c = _u_string->str[i];
1247 if (c == Py_UNICODE_REPLACEMENT_CHARACTER)
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001248 info->map[i] = -1;
Fred Drakebb66a202001-03-01 20:48:17 +00001249 else
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001250 info->map[i] = c;
Tim Peters63cb99e2001-02-17 18:12:50 +00001251 }
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001252 info->data = NULL;
1253 info->convert = NULL;
1254 info->release = NULL;
Fred Drake71b63ff2002-06-28 22:29:01 +00001255 result = 1;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001256 Py_DECREF(_u_string);
1257 return result;
1258}
1259
1260#endif
1261
1262static PyObject *
Fred Drakeb91a36b2002-06-27 19:40:48 +00001263newxmlparseobject(char *encoding, char *namespace_separator, PyObject *intern)
Fred Drake0582df92000-07-12 04:49:00 +00001264{
1265 int i;
1266 xmlparseobject *self;
Fred Drake71b63ff2002-06-28 22:29:01 +00001267
Martin v. Löwis894258c2001-09-23 10:20:10 +00001268#ifdef Py_TPFLAGS_HAVE_GC
1269 /* Code for versions 2.2 and later */
1270 self = PyObject_GC_New(xmlparseobject, &Xmlparsetype);
1271#else
Fred Drake0582df92000-07-12 04:49:00 +00001272 self = PyObject_New(xmlparseobject, &Xmlparsetype);
Martin v. Löwis894258c2001-09-23 10:20:10 +00001273#endif
Fred Drake0582df92000-07-12 04:49:00 +00001274 if (self == NULL)
1275 return NULL;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001276
Martin v. Löwisb4fcf4d2002-06-30 06:40:55 +00001277#ifdef Py_USING_UNICODE
Fred Drake0582df92000-07-12 04:49:00 +00001278 self->returns_unicode = 1;
Martin v. Löwisb4fcf4d2002-06-30 06:40:55 +00001279#else
1280 self->returns_unicode = 0;
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +00001281#endif
Martin v. Löwisb4fcf4d2002-06-30 06:40:55 +00001282
Fred Drake2a3d7db2002-06-28 22:56:48 +00001283 self->buffer = NULL;
1284 self->buffer_size = CHARACTER_DATA_BUFFER_SIZE;
1285 self->buffer_used = 0;
Fred Drake85d835f2001-02-08 15:39:08 +00001286 self->ordered_attributes = 0;
1287 self->specified_attributes = 0;
Fred Drakebd6101c2001-02-14 18:29:45 +00001288 self->in_callback = 0;
Martin v. Löwis069dde22003-01-21 10:58:18 +00001289 self->ns_prefixes = 0;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001290 self->handlers = NULL;
Fred Drakecde79132001-04-25 16:01:30 +00001291 if (namespace_separator != NULL) {
Fred Drake0582df92000-07-12 04:49:00 +00001292 self->itself = XML_ParserCreateNS(encoding, *namespace_separator);
1293 }
Fred Drake85d835f2001-02-08 15:39:08 +00001294 else {
Fred Drake0582df92000-07-12 04:49:00 +00001295 self->itself = XML_ParserCreate(encoding);
1296 }
Fred Drakeb91a36b2002-06-27 19:40:48 +00001297 self->intern = intern;
1298 Py_XINCREF(self->intern);
Martin v. Löwis894258c2001-09-23 10:20:10 +00001299#ifdef Py_TPFLAGS_HAVE_GC
1300 PyObject_GC_Track(self);
1301#else
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001302 PyObject_GC_Init(self);
Martin v. Löwis894258c2001-09-23 10:20:10 +00001303#endif
Fred Drake0582df92000-07-12 04:49:00 +00001304 if (self->itself == NULL) {
Fred Drake71b63ff2002-06-28 22:29:01 +00001305 PyErr_SetString(PyExc_RuntimeError,
Fred Drake0582df92000-07-12 04:49:00 +00001306 "XML_ParserCreate failed");
1307 Py_DECREF(self);
1308 return NULL;
1309 }
1310 XML_SetUserData(self->itself, (void *)self);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001311#ifdef Py_USING_UNICODE
Fred Drake7c75bf22002-07-01 14:02:31 +00001312 XML_SetUnknownEncodingHandler(self->itself,
1313 (XML_UnknownEncodingHandler) PyUnknownEncodingHandler, NULL);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001314#endif
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001315
Fred Drake2a3d7db2002-06-28 22:56:48 +00001316 for (i = 0; handler_info[i].name != NULL; i++)
Fred Drake0582df92000-07-12 04:49:00 +00001317 /* do nothing */;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001318
Fred Drake7c75bf22002-07-01 14:02:31 +00001319 self->handlers = malloc(sizeof(PyObject *) * i);
1320 if (!self->handlers) {
Fred Drake71b63ff2002-06-28 22:29:01 +00001321 Py_DECREF(self);
1322 return PyErr_NoMemory();
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001323 }
Martin v. Löwis5b68ce32001-10-21 08:53:52 +00001324 clear_handlers(self, 1);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001325
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001326 return (PyObject*)self;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001327}
1328
1329
1330static void
Fred Drake0582df92000-07-12 04:49:00 +00001331xmlparse_dealloc(xmlparseobject *self)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001332{
Fred Drake0582df92000-07-12 04:49:00 +00001333 int i;
Martin v. Löwis894258c2001-09-23 10:20:10 +00001334#ifdef Py_TPFLAGS_HAVE_GC
1335 PyObject_GC_UnTrack(self);
1336#else
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001337 PyObject_GC_Fini(self);
Martin v. Löwis894258c2001-09-23 10:20:10 +00001338#endif
Fred Drake85d835f2001-02-08 15:39:08 +00001339 if (self->itself != NULL)
Fred Drake0582df92000-07-12 04:49:00 +00001340 XML_ParserFree(self->itself);
1341 self->itself = NULL;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001342
Fred Drake85d835f2001-02-08 15:39:08 +00001343 if (self->handlers != NULL) {
Fred Drakecde79132001-04-25 16:01:30 +00001344 PyObject *temp;
Fred Drake85d835f2001-02-08 15:39:08 +00001345 for (i = 0; handler_info[i].name != NULL; i++) {
Fred Drakecde79132001-04-25 16:01:30 +00001346 temp = self->handlers[i];
1347 self->handlers[i] = NULL;
1348 Py_XDECREF(temp);
Fred Drake85d835f2001-02-08 15:39:08 +00001349 }
1350 free(self->handlers);
Fred Drake71b63ff2002-06-28 22:29:01 +00001351 self->handlers = NULL;
Fred Drake0582df92000-07-12 04:49:00 +00001352 }
Fred Drake2a3d7db2002-06-28 22:56:48 +00001353 if (self->buffer != NULL) {
1354 free(self->buffer);
1355 self->buffer = NULL;
1356 }
Fred Drakeb91a36b2002-06-27 19:40:48 +00001357 Py_XDECREF(self->intern);
Martin v. Löwis894258c2001-09-23 10:20:10 +00001358#ifndef Py_TPFLAGS_HAVE_GC
Martin v. Löwisb4fcf4d2002-06-30 06:40:55 +00001359 /* Code for versions 2.0 and 2.1 */
Fred Drake0582df92000-07-12 04:49:00 +00001360 PyObject_Del(self);
Martin v. Löwis894258c2001-09-23 10:20:10 +00001361#else
1362 /* Code for versions 2.2 and later. */
1363 PyObject_GC_Del(self);
1364#endif
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001365}
1366
Fred Drake0582df92000-07-12 04:49:00 +00001367static int
1368handlername2int(const char *name)
1369{
1370 int i;
Fred Drake71b63ff2002-06-28 22:29:01 +00001371 for (i = 0; handler_info[i].name != NULL; i++) {
Fred Drake0582df92000-07-12 04:49:00 +00001372 if (strcmp(name, handler_info[i].name) == 0) {
1373 return i;
1374 }
1375 }
1376 return -1;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001377}
1378
1379static PyObject *
Fred Drake71b63ff2002-06-28 22:29:01 +00001380get_pybool(int istrue)
1381{
1382 PyObject *result = istrue ? Py_True : Py_False;
1383 Py_INCREF(result);
1384 return result;
1385}
1386
1387static PyObject *
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001388xmlparse_getattr(xmlparseobject *self, char *name)
1389{
Fred Drake71b63ff2002-06-28 22:29:01 +00001390 int handlernum = handlername2int(name);
1391
1392 if (handlernum != -1) {
1393 PyObject *result = self->handlers[handlernum];
1394 if (result == NULL)
1395 result = Py_None;
1396 Py_INCREF(result);
1397 return result;
1398 }
1399 if (name[0] == 'E') {
1400 if (strcmp(name, "ErrorCode") == 0)
1401 return PyInt_FromLong((long)
1402 XML_GetErrorCode(self->itself));
1403 if (strcmp(name, "ErrorLineNumber") == 0)
1404 return PyInt_FromLong((long)
1405 XML_GetErrorLineNumber(self->itself));
1406 if (strcmp(name, "ErrorColumnNumber") == 0)
1407 return PyInt_FromLong((long)
1408 XML_GetErrorColumnNumber(self->itself));
1409 if (strcmp(name, "ErrorByteIndex") == 0)
1410 return PyInt_FromLong((long)
1411 XML_GetErrorByteIndex(self->itself));
1412 }
Fred Drake2a3d7db2002-06-28 22:56:48 +00001413 if (name[0] == 'b') {
1414 if (strcmp(name, "buffer_size") == 0)
1415 return PyInt_FromLong((long) self->buffer_size);
1416 if (strcmp(name, "buffer_text") == 0)
1417 return get_pybool(self->buffer != NULL);
1418 if (strcmp(name, "buffer_used") == 0)
1419 return PyInt_FromLong((long) self->buffer_used);
1420 }
Martin v. Löwis069dde22003-01-21 10:58:18 +00001421 if (strcmp(name, "namespace_prefixes") == 0)
1422 return get_pybool(self->ns_prefixes);
Fred Drake85d835f2001-02-08 15:39:08 +00001423 if (strcmp(name, "ordered_attributes") == 0)
Fred Drake71b63ff2002-06-28 22:29:01 +00001424 return get_pybool(self->ordered_attributes);
Fred Drake0582df92000-07-12 04:49:00 +00001425 if (strcmp(name, "returns_unicode") == 0)
Fred Drake71b63ff2002-06-28 22:29:01 +00001426 return get_pybool((long) self->returns_unicode);
Fred Drake85d835f2001-02-08 15:39:08 +00001427 if (strcmp(name, "specified_attributes") == 0)
Fred Drake71b63ff2002-06-28 22:29:01 +00001428 return get_pybool((long) self->specified_attributes);
Fred Drakeb91a36b2002-06-27 19:40:48 +00001429 if (strcmp(name, "intern") == 0) {
1430 if (self->intern == NULL) {
1431 Py_INCREF(Py_None);
1432 return Py_None;
1433 }
1434 else {
1435 Py_INCREF(self->intern);
1436 return self->intern;
1437 }
1438 }
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001439
Neal Norwitzfa56e2d2003-01-19 15:40:09 +00001440#define APPEND(list, str) \
Martin v. Löwis069dde22003-01-21 10:58:18 +00001441 do { \
1442 PyObject *o = PyString_FromString(str); \
1443 if (o != NULL) \
1444 PyList_Append(list, o); \
1445 Py_XDECREF(o); \
1446 } while (0)
Neal Norwitzfa56e2d2003-01-19 15:40:09 +00001447
Fred Drake0582df92000-07-12 04:49:00 +00001448 if (strcmp(name, "__members__") == 0) {
1449 int i;
1450 PyObject *rc = PyList_New(0);
Fred Drake71b63ff2002-06-28 22:29:01 +00001451 for (i = 0; handler_info[i].name != NULL; i++) {
Neal Norwitzfa56e2d2003-01-19 15:40:09 +00001452 PyObject *o = get_handler_name(&handler_info[i]);
1453 if (o != NULL)
1454 PyList_Append(rc, o);
1455 Py_XDECREF(o);
Fred Drake0582df92000-07-12 04:49:00 +00001456 }
Neal Norwitzfa56e2d2003-01-19 15:40:09 +00001457 APPEND(rc, "ErrorCode");
1458 APPEND(rc, "ErrorLineNumber");
1459 APPEND(rc, "ErrorColumnNumber");
1460 APPEND(rc, "ErrorByteIndex");
1461 APPEND(rc, "buffer_size");
1462 APPEND(rc, "buffer_text");
1463 APPEND(rc, "buffer_used");
Martin v. Löwis069dde22003-01-21 10:58:18 +00001464 APPEND(rc, "namespace_prefixes");
Neal Norwitzfa56e2d2003-01-19 15:40:09 +00001465 APPEND(rc, "ordered_attributes");
1466 APPEND(rc, "returns_unicode");
1467 APPEND(rc, "specified_attributes");
1468 APPEND(rc, "intern");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001469
Neal Norwitzfa56e2d2003-01-19 15:40:09 +00001470#undef APPEND
Fred Drake0582df92000-07-12 04:49:00 +00001471 return rc;
1472 }
1473 return Py_FindMethod(xmlparse_methods, (PyObject *)self, name);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001474}
1475
Fred Drake6f987622000-08-25 18:03:30 +00001476static int
1477sethandler(xmlparseobject *self, const char *name, PyObject* v)
Fred Drake0582df92000-07-12 04:49:00 +00001478{
1479 int handlernum = handlername2int(name);
Fred Drake71b63ff2002-06-28 22:29:01 +00001480 if (handlernum >= 0) {
1481 xmlhandler c_handler = NULL;
1482 PyObject *temp = self->handlers[handlernum];
1483
1484 if (v == Py_None)
1485 v = NULL;
1486 else if (v != NULL) {
1487 Py_INCREF(v);
1488 c_handler = handler_info[handlernum].handler;
1489 }
Fred Drake0582df92000-07-12 04:49:00 +00001490 self->handlers[handlernum] = v;
Fred Drake71b63ff2002-06-28 22:29:01 +00001491 Py_XDECREF(temp);
1492 handler_info[handlernum].setter(self->itself, c_handler);
Fred Drake0582df92000-07-12 04:49:00 +00001493 return 1;
1494 }
1495 return 0;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001496}
1497
1498static int
Fred Drake6f987622000-08-25 18:03:30 +00001499xmlparse_setattr(xmlparseobject *self, char *name, PyObject *v)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001500{
Fred Drake6f987622000-08-25 18:03:30 +00001501 /* Set attribute 'name' to value 'v'. v==NULL means delete */
Fred Drake85d835f2001-02-08 15:39:08 +00001502 if (v == NULL) {
Fred Drake6f987622000-08-25 18:03:30 +00001503 PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
1504 return -1;
1505 }
Fred Drake2a3d7db2002-06-28 22:56:48 +00001506 if (strcmp(name, "buffer_text") == 0) {
1507 if (PyObject_IsTrue(v)) {
1508 if (self->buffer == NULL) {
1509 self->buffer = malloc(self->buffer_size);
1510 if (self->buffer == NULL) {
1511 PyErr_NoMemory();
1512 return -1;
1513 }
1514 self->buffer_used = 0;
1515 }
1516 }
1517 else if (self->buffer != NULL) {
1518 if (flush_character_buffer(self) < 0)
1519 return -1;
1520 free(self->buffer);
1521 self->buffer = NULL;
1522 }
1523 return 0;
1524 }
Martin v. Löwis069dde22003-01-21 10:58:18 +00001525 if (strcmp(name, "namespace_prefixes") == 0) {
1526 if (PyObject_IsTrue(v))
1527 self->ns_prefixes = 1;
1528 else
1529 self->ns_prefixes = 0;
1530 XML_SetReturnNSTriplet(self->itself, self->ns_prefixes);
1531 return 0;
1532 }
Fred Drake85d835f2001-02-08 15:39:08 +00001533 if (strcmp(name, "ordered_attributes") == 0) {
1534 if (PyObject_IsTrue(v))
1535 self->ordered_attributes = 1;
1536 else
1537 self->ordered_attributes = 0;
1538 return 0;
1539 }
Fred Drake6f987622000-08-25 18:03:30 +00001540 if (strcmp(name, "returns_unicode") == 0) {
Fred Drake85d835f2001-02-08 15:39:08 +00001541 if (PyObject_IsTrue(v)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001542#ifndef Py_USING_UNICODE
Fred Drake71b63ff2002-06-28 22:29:01 +00001543 PyErr_SetString(PyExc_ValueError,
1544 "Unicode support not available");
Fred Drake6f987622000-08-25 18:03:30 +00001545 return -1;
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +00001546#else
Fred Drake6f987622000-08-25 18:03:30 +00001547 self->returns_unicode = 1;
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +00001548#endif
Fred Drake6f987622000-08-25 18:03:30 +00001549 }
1550 else
1551 self->returns_unicode = 0;
Fred Drake85d835f2001-02-08 15:39:08 +00001552 return 0;
1553 }
1554 if (strcmp(name, "specified_attributes") == 0) {
1555 if (PyObject_IsTrue(v))
1556 self->specified_attributes = 1;
1557 else
1558 self->specified_attributes = 0;
Fred Drake6f987622000-08-25 18:03:30 +00001559 return 0;
1560 }
Fred Drake2a3d7db2002-06-28 22:56:48 +00001561 if (strcmp(name, "CharacterDataHandler") == 0) {
1562 /* If we're changing the character data handler, flush all
1563 * cached data with the old handler. Not sure there's a
1564 * "right" thing to do, though, but this probably won't
1565 * happen.
1566 */
1567 if (flush_character_buffer(self) < 0)
1568 return -1;
1569 }
Fred Drake6f987622000-08-25 18:03:30 +00001570 if (sethandler(self, name, v)) {
1571 return 0;
1572 }
1573 PyErr_SetString(PyExc_AttributeError, name);
1574 return -1;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001575}
1576
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001577#ifdef WITH_CYCLE_GC
1578static int
1579xmlparse_traverse(xmlparseobject *op, visitproc visit, void *arg)
1580{
Fred Drakecde79132001-04-25 16:01:30 +00001581 int i, err;
1582 for (i = 0; handler_info[i].name != NULL; i++) {
1583 if (!op->handlers[i])
1584 continue;
1585 err = visit(op->handlers[i], arg);
1586 if (err)
1587 return err;
1588 }
1589 return 0;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001590}
1591
1592static int
1593xmlparse_clear(xmlparseobject *op)
1594{
Martin v. Löwis5b68ce32001-10-21 08:53:52 +00001595 clear_handlers(op, 0);
Fred Drakeb91a36b2002-06-27 19:40:48 +00001596 Py_XDECREF(op->intern);
1597 op->intern = 0;
Fred Drakecde79132001-04-25 16:01:30 +00001598 return 0;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001599}
1600#endif
1601
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001602PyDoc_STRVAR(Xmlparsetype__doc__, "XML parser");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001603
1604static PyTypeObject Xmlparsetype = {
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +00001605 PyObject_HEAD_INIT(NULL)
1606 0, /*ob_size*/
Guido van Rossum14648392001-12-08 18:02:58 +00001607 "pyexpat.xmlparser", /*tp_name*/
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001608 sizeof(xmlparseobject) + PyGC_HEAD_SIZE,/*tp_basicsize*/
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +00001609 0, /*tp_itemsize*/
1610 /* methods */
1611 (destructor)xmlparse_dealloc, /*tp_dealloc*/
1612 (printfunc)0, /*tp_print*/
1613 (getattrfunc)xmlparse_getattr, /*tp_getattr*/
1614 (setattrfunc)xmlparse_setattr, /*tp_setattr*/
1615 (cmpfunc)0, /*tp_compare*/
1616 (reprfunc)0, /*tp_repr*/
1617 0, /*tp_as_number*/
1618 0, /*tp_as_sequence*/
1619 0, /*tp_as_mapping*/
1620 (hashfunc)0, /*tp_hash*/
1621 (ternaryfunc)0, /*tp_call*/
1622 (reprfunc)0, /*tp_str*/
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001623 0, /* tp_getattro */
1624 0, /* tp_setattro */
1625 0, /* tp_as_buffer */
Martin v. Löwis894258c2001-09-23 10:20:10 +00001626#ifdef Py_TPFLAGS_HAVE_GC
Fred Drake71b63ff2002-06-28 22:29:01 +00001627 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /*tp_flags*/
Martin v. Löwis894258c2001-09-23 10:20:10 +00001628#else
Fred Drake71b63ff2002-06-28 22:29:01 +00001629 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_GC, /*tp_flags*/
Martin v. Löwis894258c2001-09-23 10:20:10 +00001630#endif
Martin v. Löwis069dde22003-01-21 10:58:18 +00001631 Xmlparsetype__doc__, /* tp_doc - Documentation string */
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001632#ifdef WITH_CYCLE_GC
1633 (traverseproc)xmlparse_traverse, /* tp_traverse */
1634 (inquiry)xmlparse_clear /* tp_clear */
1635#else
1636 0, 0
1637#endif
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001638};
1639
1640/* End of code for xmlparser objects */
1641/* -------------------------------------------------------- */
1642
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001643PyDoc_STRVAR(pyexpat_ParserCreate__doc__,
Fred Drake0582df92000-07-12 04:49:00 +00001644"ParserCreate([encoding[, namespace_separator]]) -> parser\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001645Return a new XML parser object.");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001646
1647static PyObject *
Fred Drake0582df92000-07-12 04:49:00 +00001648pyexpat_ParserCreate(PyObject *notused, PyObject *args, PyObject *kw)
1649{
Fred Drakecde79132001-04-25 16:01:30 +00001650 char *encoding = NULL;
1651 char *namespace_separator = NULL;
Fred Drakeb91a36b2002-06-27 19:40:48 +00001652 PyObject *intern = NULL;
1653 PyObject *result;
1654 int intern_decref = 0;
Fred Drake71b63ff2002-06-28 22:29:01 +00001655 static char *kwlist[] = {"encoding", "namespace_separator",
Fred Drakeb91a36b2002-06-27 19:40:48 +00001656 "intern", NULL};
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001657
Fred Drakeb91a36b2002-06-27 19:40:48 +00001658 if (!PyArg_ParseTupleAndKeywords(args, kw, "|zzO:ParserCreate", kwlist,
1659 &encoding, &namespace_separator, &intern))
Fred Drakecde79132001-04-25 16:01:30 +00001660 return NULL;
1661 if (namespace_separator != NULL
1662 && strlen(namespace_separator) > 1) {
1663 PyErr_SetString(PyExc_ValueError,
1664 "namespace_separator must be at most one"
1665 " character, omitted, or None");
1666 return NULL;
1667 }
Fred Drakeb91a36b2002-06-27 19:40:48 +00001668 /* Explicitly passing None means no interning is desired.
1669 Not passing anything means that a new dictionary is used. */
1670 if (intern == Py_None)
1671 intern = NULL;
1672 else if (intern == NULL) {
1673 intern = PyDict_New();
1674 if (!intern)
1675 return NULL;
1676 intern_decref = 1;
Fred Drake71b63ff2002-06-28 22:29:01 +00001677 }
Fred Drakeb91a36b2002-06-27 19:40:48 +00001678 else if (!PyDict_Check(intern)) {
1679 PyErr_SetString(PyExc_TypeError, "intern must be a dictionary");
1680 return NULL;
1681 }
1682
1683 result = newxmlparseobject(encoding, namespace_separator, intern);
1684 if (intern_decref) {
1685 Py_DECREF(intern);
1686 }
1687 return result;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001688}
1689
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001690PyDoc_STRVAR(pyexpat_ErrorString__doc__,
Fred Drake0582df92000-07-12 04:49:00 +00001691"ErrorString(errno) -> string\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001692Returns string error for given number.");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001693
1694static PyObject *
Fred Drake0582df92000-07-12 04:49:00 +00001695pyexpat_ErrorString(PyObject *self, PyObject *args)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001696{
Fred Drake0582df92000-07-12 04:49:00 +00001697 long code = 0;
1698
1699 if (!PyArg_ParseTuple(args, "l:ErrorString", &code))
1700 return NULL;
1701 return Py_BuildValue("z", XML_ErrorString((int)code));
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001702}
1703
1704/* List of methods defined in the module */
1705
1706static struct PyMethodDef pyexpat_methods[] = {
Fred Drake0582df92000-07-12 04:49:00 +00001707 {"ParserCreate", (PyCFunction)pyexpat_ParserCreate,
1708 METH_VARARGS|METH_KEYWORDS, pyexpat_ParserCreate__doc__},
1709 {"ErrorString", (PyCFunction)pyexpat_ErrorString,
1710 METH_VARARGS, pyexpat_ErrorString__doc__},
Fred Drake71b63ff2002-06-28 22:29:01 +00001711
Fred Drake0582df92000-07-12 04:49:00 +00001712 {NULL, (PyCFunction)NULL, 0, NULL} /* sentinel */
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001713};
1714
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +00001715/* Module docstring */
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001716
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001717PyDoc_STRVAR(pyexpat_module_documentation,
1718"Python wrapper for Expat parser.");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001719
Fred Drake4113b132001-03-24 19:58:26 +00001720/* Return a Python string that represents the version number without the
1721 * extra cruft added by revision control, even if the right options were
1722 * given to the "cvs export" command to make it not include the extra
1723 * cruft.
1724 */
1725static PyObject *
1726get_version_string(void)
1727{
1728 static char *rcsid = "$Revision$";
1729 char *rev = rcsid;
1730 int i = 0;
1731
Neal Norwitz3afb2d22002-03-20 21:32:07 +00001732 while (!isdigit((int)*rev))
Fred Drake4113b132001-03-24 19:58:26 +00001733 ++rev;
1734 while (rev[i] != ' ' && rev[i] != '\0')
1735 ++i;
1736
1737 return PyString_FromStringAndSize(rev, i);
1738}
1739
Fred Drakecde79132001-04-25 16:01:30 +00001740/* Initialization function for the module */
1741
1742#ifndef MODULE_NAME
1743#define MODULE_NAME "pyexpat"
1744#endif
1745
1746#ifndef MODULE_INITFUNC
1747#define MODULE_INITFUNC initpyexpat
1748#endif
1749
Martin v. Löwis069dde22003-01-21 10:58:18 +00001750#ifndef PyMODINIT_FUNC
1751# ifdef MS_WINDOWS
1752# define PyMODINIT_FUNC __declspec(dllexport) void
1753# else
1754# define PyMODINIT_FUNC void
1755# endif
1756#endif
1757
Mark Hammond8235ea12002-07-19 06:55:41 +00001758PyMODINIT_FUNC MODULE_INITFUNC(void); /* avoid compiler warnings */
Fred Drakecde79132001-04-25 16:01:30 +00001759
Martin v. Löwis069dde22003-01-21 10:58:18 +00001760PyMODINIT_FUNC
1761MODULE_INITFUNC(void)
Fred Drake0582df92000-07-12 04:49:00 +00001762{
1763 PyObject *m, *d;
Fred Drakecde79132001-04-25 16:01:30 +00001764 PyObject *errmod_name = PyString_FromString(MODULE_NAME ".errors");
Fred Drake85d835f2001-02-08 15:39:08 +00001765 PyObject *errors_module;
1766 PyObject *modelmod_name;
1767 PyObject *model_module;
Fred Drake0582df92000-07-12 04:49:00 +00001768 PyObject *sys_modules;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001769
Fred Drake6f987622000-08-25 18:03:30 +00001770 if (errmod_name == NULL)
1771 return;
Fred Drakecde79132001-04-25 16:01:30 +00001772 modelmod_name = PyString_FromString(MODULE_NAME ".model");
Fred Drake85d835f2001-02-08 15:39:08 +00001773 if (modelmod_name == NULL)
1774 return;
Fred Drake6f987622000-08-25 18:03:30 +00001775
Fred Drake0582df92000-07-12 04:49:00 +00001776 Xmlparsetype.ob_type = &PyType_Type;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001777
Fred Drake0582df92000-07-12 04:49:00 +00001778 /* Create the module and add the functions */
Fred Drakecde79132001-04-25 16:01:30 +00001779 m = Py_InitModule3(MODULE_NAME, pyexpat_methods,
Fred Drake85d835f2001-02-08 15:39:08 +00001780 pyexpat_module_documentation);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001781
Fred Drake0582df92000-07-12 04:49:00 +00001782 /* Add some symbolic constants to the module */
Fred Drakebd6101c2001-02-14 18:29:45 +00001783 if (ErrorObject == NULL) {
1784 ErrorObject = PyErr_NewException("xml.parsers.expat.ExpatError",
Fred Drake93adb692000-09-23 04:55:48 +00001785 NULL, NULL);
Fred Drakebd6101c2001-02-14 18:29:45 +00001786 if (ErrorObject == NULL)
1787 return;
1788 }
1789 Py_INCREF(ErrorObject);
Fred Drake93adb692000-09-23 04:55:48 +00001790 PyModule_AddObject(m, "error", ErrorObject);
Fred Drakebd6101c2001-02-14 18:29:45 +00001791 Py_INCREF(ErrorObject);
1792 PyModule_AddObject(m, "ExpatError", ErrorObject);
Fred Drake4ba298c2000-10-29 04:57:53 +00001793 Py_INCREF(&Xmlparsetype);
1794 PyModule_AddObject(m, "XMLParserType", (PyObject *) &Xmlparsetype);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001795
Fred Drake4113b132001-03-24 19:58:26 +00001796 PyModule_AddObject(m, "__version__", get_version_string());
Fred Drake738293d2000-12-21 17:25:07 +00001797 PyModule_AddStringConstant(m, "EXPAT_VERSION",
1798 (char *) XML_ExpatVersion());
Fred Drake85d835f2001-02-08 15:39:08 +00001799 {
1800 XML_Expat_Version info = XML_ExpatVersionInfo();
1801 PyModule_AddObject(m, "version_info",
1802 Py_BuildValue("(iii)", info.major,
1803 info.minor, info.micro));
1804 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001805#ifdef Py_USING_UNICODE
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001806 init_template_buffer();
1807#endif
Fred Drake0582df92000-07-12 04:49:00 +00001808 /* XXX When Expat supports some way of figuring out how it was
Fred Drake71b63ff2002-06-28 22:29:01 +00001809 compiled, this should check and set native_encoding
1810 appropriately.
Fred Drake0582df92000-07-12 04:49:00 +00001811 */
Fred Drake93adb692000-09-23 04:55:48 +00001812 PyModule_AddStringConstant(m, "native_encoding", "UTF-8");
Fred Drakec23b5232000-08-24 21:57:43 +00001813
Fred Drake85d835f2001-02-08 15:39:08 +00001814 sys_modules = PySys_GetObject("modules");
Fred Drake93adb692000-09-23 04:55:48 +00001815 d = PyModule_GetDict(m);
Fred Drake6f987622000-08-25 18:03:30 +00001816 errors_module = PyDict_GetItem(d, errmod_name);
1817 if (errors_module == NULL) {
Fred Drakecde79132001-04-25 16:01:30 +00001818 errors_module = PyModule_New(MODULE_NAME ".errors");
Fred Drake6f987622000-08-25 18:03:30 +00001819 if (errors_module != NULL) {
Fred Drake6f987622000-08-25 18:03:30 +00001820 PyDict_SetItem(sys_modules, errmod_name, errors_module);
Fred Drake93adb692000-09-23 04:55:48 +00001821 /* gives away the reference to errors_module */
1822 PyModule_AddObject(m, "errors", errors_module);
Fred Drakec23b5232000-08-24 21:57:43 +00001823 }
1824 }
Fred Drake6f987622000-08-25 18:03:30 +00001825 Py_DECREF(errmod_name);
Fred Drake85d835f2001-02-08 15:39:08 +00001826 model_module = PyDict_GetItem(d, modelmod_name);
1827 if (model_module == NULL) {
Fred Drakecde79132001-04-25 16:01:30 +00001828 model_module = PyModule_New(MODULE_NAME ".model");
Fred Drake85d835f2001-02-08 15:39:08 +00001829 if (model_module != NULL) {
1830 PyDict_SetItem(sys_modules, modelmod_name, model_module);
1831 /* gives away the reference to model_module */
1832 PyModule_AddObject(m, "model", model_module);
1833 }
1834 }
1835 Py_DECREF(modelmod_name);
1836 if (errors_module == NULL || model_module == NULL)
1837 /* Don't core dump later! */
Fred Drake6f987622000-08-25 18:03:30 +00001838 return;
Martin v. Löwis069dde22003-01-21 10:58:18 +00001839
Martin v. Löwisc847f402003-01-21 11:09:21 +00001840#if XML_COMBINED_VERSION > 19505
Martin v. Löwis069dde22003-01-21 10:58:18 +00001841 {
1842 const XML_Feature *features = XML_GetFeatureList();
1843 PyObject *list = PyList_New(0);
1844 if (list == NULL)
1845 /* just ignore it */
1846 PyErr_Clear();
1847 else {
1848 int i = 0;
1849 for (; features[i].feature != XML_FEATURE_END; ++i) {
1850 int ok;
1851 PyObject *item = Py_BuildValue("si", features[i].name,
1852 features[i].value);
1853 if (item == NULL) {
1854 Py_DECREF(list);
1855 list = NULL;
1856 break;
1857 }
1858 ok = PyList_Append(list, item);
1859 Py_DECREF(item);
1860 if (ok < 0) {
1861 PyErr_Clear();
1862 break;
1863 }
1864 }
1865 if (list != NULL)
1866 PyModule_AddObject(m, "features", list);
1867 }
1868 }
Martin v. Löwisc847f402003-01-21 11:09:21 +00001869#endif
Fred Drake6f987622000-08-25 18:03:30 +00001870
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001871#define MYCONST(name) \
Fred Drake93adb692000-09-23 04:55:48 +00001872 PyModule_AddStringConstant(errors_module, #name, \
1873 (char*)XML_ErrorString(name))
Fred Drake7bd9f412000-07-04 23:51:31 +00001874
Fred Drake0582df92000-07-12 04:49:00 +00001875 MYCONST(XML_ERROR_NO_MEMORY);
1876 MYCONST(XML_ERROR_SYNTAX);
1877 MYCONST(XML_ERROR_NO_ELEMENTS);
1878 MYCONST(XML_ERROR_INVALID_TOKEN);
1879 MYCONST(XML_ERROR_UNCLOSED_TOKEN);
1880 MYCONST(XML_ERROR_PARTIAL_CHAR);
1881 MYCONST(XML_ERROR_TAG_MISMATCH);
1882 MYCONST(XML_ERROR_DUPLICATE_ATTRIBUTE);
1883 MYCONST(XML_ERROR_JUNK_AFTER_DOC_ELEMENT);
1884 MYCONST(XML_ERROR_PARAM_ENTITY_REF);
1885 MYCONST(XML_ERROR_UNDEFINED_ENTITY);
1886 MYCONST(XML_ERROR_RECURSIVE_ENTITY_REF);
1887 MYCONST(XML_ERROR_ASYNC_ENTITY);
1888 MYCONST(XML_ERROR_BAD_CHAR_REF);
1889 MYCONST(XML_ERROR_BINARY_ENTITY_REF);
1890 MYCONST(XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF);
1891 MYCONST(XML_ERROR_MISPLACED_XML_PI);
1892 MYCONST(XML_ERROR_UNKNOWN_ENCODING);
1893 MYCONST(XML_ERROR_INCORRECT_ENCODING);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001894 MYCONST(XML_ERROR_UNCLOSED_CDATA_SECTION);
1895 MYCONST(XML_ERROR_EXTERNAL_ENTITY_HANDLING);
1896 MYCONST(XML_ERROR_NOT_STANDALONE);
1897
Fred Drake85d835f2001-02-08 15:39:08 +00001898 PyModule_AddStringConstant(errors_module, "__doc__",
1899 "Constants used to describe error conditions.");
1900
Fred Drake93adb692000-09-23 04:55:48 +00001901#undef MYCONST
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001902
Fred Drake85d835f2001-02-08 15:39:08 +00001903#define MYCONST(c) PyModule_AddIntConstant(m, #c, c)
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001904 MYCONST(XML_PARAM_ENTITY_PARSING_NEVER);
1905 MYCONST(XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE);
1906 MYCONST(XML_PARAM_ENTITY_PARSING_ALWAYS);
Fred Drake85d835f2001-02-08 15:39:08 +00001907#undef MYCONST
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001908
Fred Drake85d835f2001-02-08 15:39:08 +00001909#define MYCONST(c) PyModule_AddIntConstant(model_module, #c, c)
1910 PyModule_AddStringConstant(model_module, "__doc__",
1911 "Constants used to interpret content model information.");
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001912
Fred Drake85d835f2001-02-08 15:39:08 +00001913 MYCONST(XML_CTYPE_EMPTY);
1914 MYCONST(XML_CTYPE_ANY);
1915 MYCONST(XML_CTYPE_MIXED);
1916 MYCONST(XML_CTYPE_NAME);
1917 MYCONST(XML_CTYPE_CHOICE);
1918 MYCONST(XML_CTYPE_SEQ);
1919
1920 MYCONST(XML_CQUANT_NONE);
1921 MYCONST(XML_CQUANT_OPT);
1922 MYCONST(XML_CQUANT_REP);
1923 MYCONST(XML_CQUANT_PLUS);
1924#undef MYCONST
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001925}
1926
Fred Drake6f987622000-08-25 18:03:30 +00001927static void
Martin v. Löwis5b68ce32001-10-21 08:53:52 +00001928clear_handlers(xmlparseobject *self, int initial)
Fred Drake0582df92000-07-12 04:49:00 +00001929{
Fred Drakecde79132001-04-25 16:01:30 +00001930 int i = 0;
1931 PyObject *temp;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001932
Fred Drake71b63ff2002-06-28 22:29:01 +00001933 for (; handler_info[i].name != NULL; i++) {
Martin v. Löwis5b68ce32001-10-21 08:53:52 +00001934 if (initial)
Fred Drake71b63ff2002-06-28 22:29:01 +00001935 self->handlers[i] = NULL;
Martin v. Löwis5b68ce32001-10-21 08:53:52 +00001936 else {
Fred Drakecde79132001-04-25 16:01:30 +00001937 temp = self->handlers[i];
1938 self->handlers[i] = NULL;
1939 Py_XDECREF(temp);
Martin v. Löwis5b68ce32001-10-21 08:53:52 +00001940 handler_info[i].setter(self->itself, NULL);
Fred Drakecde79132001-04-25 16:01:30 +00001941 }
Fred Drakecde79132001-04-25 16:01:30 +00001942 }
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001943}
1944
Tim Peters0c322792002-07-17 16:49:03 +00001945static struct HandlerInfo handler_info[] = {
Fred Drake71b63ff2002-06-28 22:29:01 +00001946 {"StartElementHandler",
1947 (xmlhandlersetter)XML_SetStartElementHandler,
Fred Drake0582df92000-07-12 04:49:00 +00001948 (xmlhandler)my_StartElementHandler},
Fred Drake71b63ff2002-06-28 22:29:01 +00001949 {"EndElementHandler",
1950 (xmlhandlersetter)XML_SetEndElementHandler,
Fred Drake0582df92000-07-12 04:49:00 +00001951 (xmlhandler)my_EndElementHandler},
Fred Drake71b63ff2002-06-28 22:29:01 +00001952 {"ProcessingInstructionHandler",
Fred Drake0582df92000-07-12 04:49:00 +00001953 (xmlhandlersetter)XML_SetProcessingInstructionHandler,
1954 (xmlhandler)my_ProcessingInstructionHandler},
Fred Drake71b63ff2002-06-28 22:29:01 +00001955 {"CharacterDataHandler",
Fred Drake0582df92000-07-12 04:49:00 +00001956 (xmlhandlersetter)XML_SetCharacterDataHandler,
1957 (xmlhandler)my_CharacterDataHandler},
Fred Drake71b63ff2002-06-28 22:29:01 +00001958 {"UnparsedEntityDeclHandler",
Fred Drake0582df92000-07-12 04:49:00 +00001959 (xmlhandlersetter)XML_SetUnparsedEntityDeclHandler,
Fred Drake2a3d7db2002-06-28 22:56:48 +00001960 (xmlhandler)my_UnparsedEntityDeclHandler},
Fred Drake71b63ff2002-06-28 22:29:01 +00001961 {"NotationDeclHandler",
Fred Drake0582df92000-07-12 04:49:00 +00001962 (xmlhandlersetter)XML_SetNotationDeclHandler,
Fred Drake2a3d7db2002-06-28 22:56:48 +00001963 (xmlhandler)my_NotationDeclHandler},
Fred Drake71b63ff2002-06-28 22:29:01 +00001964 {"StartNamespaceDeclHandler",
1965 (xmlhandlersetter)XML_SetStartNamespaceDeclHandler,
Fred Drake2a3d7db2002-06-28 22:56:48 +00001966 (xmlhandler)my_StartNamespaceDeclHandler},
Fred Drake71b63ff2002-06-28 22:29:01 +00001967 {"EndNamespaceDeclHandler",
1968 (xmlhandlersetter)XML_SetEndNamespaceDeclHandler,
Fred Drake2a3d7db2002-06-28 22:56:48 +00001969 (xmlhandler)my_EndNamespaceDeclHandler},
Fred Drake0582df92000-07-12 04:49:00 +00001970 {"CommentHandler",
1971 (xmlhandlersetter)XML_SetCommentHandler,
1972 (xmlhandler)my_CommentHandler},
1973 {"StartCdataSectionHandler",
Fred Drake71b63ff2002-06-28 22:29:01 +00001974 (xmlhandlersetter)XML_SetStartCdataSectionHandler,
Fred Drake0582df92000-07-12 04:49:00 +00001975 (xmlhandler)my_StartCdataSectionHandler},
1976 {"EndCdataSectionHandler",
Fred Drake71b63ff2002-06-28 22:29:01 +00001977 (xmlhandlersetter)XML_SetEndCdataSectionHandler,
Fred Drake0582df92000-07-12 04:49:00 +00001978 (xmlhandler)my_EndCdataSectionHandler},
1979 {"DefaultHandler",
1980 (xmlhandlersetter)XML_SetDefaultHandler,
1981 (xmlhandler)my_DefaultHandler},
1982 {"DefaultHandlerExpand",
1983 (xmlhandlersetter)XML_SetDefaultHandlerExpand,
1984 (xmlhandler)my_DefaultHandlerExpandHandler},
1985 {"NotStandaloneHandler",
1986 (xmlhandlersetter)XML_SetNotStandaloneHandler,
1987 (xmlhandler)my_NotStandaloneHandler},
1988 {"ExternalEntityRefHandler",
1989 (xmlhandlersetter)XML_SetExternalEntityRefHandler,
Fred Drake2a3d7db2002-06-28 22:56:48 +00001990 (xmlhandler)my_ExternalEntityRefHandler},
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001991 {"StartDoctypeDeclHandler",
Fred Drake71b63ff2002-06-28 22:29:01 +00001992 (xmlhandlersetter)XML_SetStartDoctypeDeclHandler,
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001993 (xmlhandler)my_StartDoctypeDeclHandler},
1994 {"EndDoctypeDeclHandler",
Fred Drake71b63ff2002-06-28 22:29:01 +00001995 (xmlhandlersetter)XML_SetEndDoctypeDeclHandler,
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001996 (xmlhandler)my_EndDoctypeDeclHandler},
Fred Drake85d835f2001-02-08 15:39:08 +00001997 {"EntityDeclHandler",
1998 (xmlhandlersetter)XML_SetEntityDeclHandler,
1999 (xmlhandler)my_EntityDeclHandler},
2000 {"XmlDeclHandler",
2001 (xmlhandlersetter)XML_SetXmlDeclHandler,
2002 (xmlhandler)my_XmlDeclHandler},
2003 {"ElementDeclHandler",
2004 (xmlhandlersetter)XML_SetElementDeclHandler,
2005 (xmlhandler)my_ElementDeclHandler},
2006 {"AttlistDeclHandler",
2007 (xmlhandlersetter)XML_SetAttlistDeclHandler,
2008 (xmlhandler)my_AttlistDeclHandler},
Martin v. Löwisc847f402003-01-21 11:09:21 +00002009#if XML_COMBINED_VERSION >= 19504
Martin v. Löwis069dde22003-01-21 10:58:18 +00002010 {"SkippedEntityHandler",
2011 (xmlhandlersetter)XML_SetSkippedEntityHandler,
2012 (xmlhandler)my_SkippedEntityHandler},
Martin v. Löwisc847f402003-01-21 11:09:21 +00002013#endif
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00002014
Fred Drake0582df92000-07-12 04:49:00 +00002015 {NULL, NULL, NULL} /* sentinel */
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00002016};