blob: 5a8423e0248af581feaf4503ee8a0c828bef8500 [file] [log] [blame]
Martin v. Löwis7090ed12001-09-19 10:37:50 +00001#include "Python.h"
Fred Drake4113b132001-03-24 19:58:26 +00002#include <ctype.h>
3
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00004#include "compile.h"
5#include "frameobject.h"
Fred Drakea77254a2000-09-29 19:23:29 +00006#include "expat.h"
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00007
Martin v. Löwisc847f402003-01-21 11:09:21 +00008#define XML_COMBINED_VERSION (10000*XML_MAJOR_VERSION+100*XML_MINOR_VERSION+XML_MICRO_VERSION)
9
Martin v. Löwisb4fcf4d2002-06-30 06:40:55 +000010#ifndef PyDoc_STRVAR
Martin v. Löwis069dde22003-01-21 10:58:18 +000011
12/*
13 * fdrake says:
14 * Don't change the PyDoc_STR macro definition to (str), because
15 * '''the parentheses cause compile failures
16 * ("non-constant static initializer" or something like that)
17 * on some platforms (Irix?)'''
18 */
Fred Drakef57b22a2002-09-02 15:54:06 +000019#define PyDoc_STR(str) str
Fred Drake7c75bf22002-07-01 14:02:31 +000020#define PyDoc_VAR(name) static char name[]
Martin v. Löwisb4fcf4d2002-06-30 06:40:55 +000021#define PyDoc_STRVAR(name,str) PyDoc_VAR(name) = PyDoc_STR(str)
Martin v. Löwis0078f6c2001-01-21 10:18:10 +000022#endif
23
Martin v. Löwisb4fcf4d2002-06-30 06:40:55 +000024#if (PY_MAJOR_VERSION == 2 && PY_MINOR_VERSION < 2)
25/* In Python 2.0 and 2.1, disabling Unicode was not possible. */
Martin v. Löwis339d0f72001-08-17 18:39:25 +000026#define Py_USING_UNICODE
Jeremy Hylton9263f572003-06-27 16:13:17 +000027#else
28#define FIX_TRACE
Martin v. Löwis339d0f72001-08-17 18:39:25 +000029#endif
30
Fred Drake0582df92000-07-12 04:49:00 +000031enum HandlerTypes {
32 StartElement,
33 EndElement,
34 ProcessingInstruction,
35 CharacterData,
36 UnparsedEntityDecl,
37 NotationDecl,
38 StartNamespaceDecl,
39 EndNamespaceDecl,
40 Comment,
41 StartCdataSection,
42 EndCdataSection,
43 Default,
44 DefaultHandlerExpand,
45 NotStandalone,
Martin v. Löwis0078f6c2001-01-21 10:18:10 +000046 ExternalEntityRef,
47 StartDoctypeDecl,
48 EndDoctypeDecl,
Fred Drake85d835f2001-02-08 15:39:08 +000049 EntityDecl,
50 XmlDecl,
51 ElementDecl,
52 AttlistDecl,
Martin v. Löwisc847f402003-01-21 11:09:21 +000053#if XML_COMBINED_VERSION >= 19504
Martin v. Löwis069dde22003-01-21 10:58:18 +000054 SkippedEntity,
Martin v. Löwisc847f402003-01-21 11:09:21 +000055#endif
Fred Drake85d835f2001-02-08 15:39:08 +000056 _DummyDecl
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000057};
58
59static PyObject *ErrorObject;
60
61/* ----------------------------------------------------- */
62
63/* Declarations for objects of type xmlparser */
64
65typedef struct {
Fred Drake0582df92000-07-12 04:49:00 +000066 PyObject_HEAD
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000067
Fred Drake0582df92000-07-12 04:49:00 +000068 XML_Parser itself;
Fred Drake85d835f2001-02-08 15:39:08 +000069 int returns_unicode; /* True if Unicode strings are returned;
70 if false, UTF-8 strings are returned */
71 int ordered_attributes; /* Return attributes as a list. */
72 int specified_attributes; /* Report only specified attributes. */
Fred Drakebd6101c2001-02-14 18:29:45 +000073 int in_callback; /* Is a callback active? */
Martin v. Löwis069dde22003-01-21 10:58:18 +000074 int ns_prefixes; /* Namespace-triplets mode? */
Fred Drake2a3d7db2002-06-28 22:56:48 +000075 XML_Char *buffer; /* Buffer used when accumulating characters */
76 /* NULL if not enabled */
77 int buffer_size; /* Size of buffer, in XML_Char units */
78 int buffer_used; /* Buffer units in use */
Fred Drakeb91a36b2002-06-27 19:40:48 +000079 PyObject *intern; /* Dictionary to intern strings */
Fred Drake0582df92000-07-12 04:49:00 +000080 PyObject **handlers;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000081} xmlparseobject;
82
Fred Drake2a3d7db2002-06-28 22:56:48 +000083#define CHARACTER_DATA_BUFFER_SIZE 8192
84
Jeremy Hylton938ace62002-07-17 16:30:39 +000085static PyTypeObject Xmlparsetype;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000086
Fred Drake117ac852002-09-24 16:24:54 +000087typedef void (*xmlhandlersetter)(XML_Parser self, void *meth);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000088typedef void* xmlhandler;
89
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +000090struct HandlerInfo {
Fred Drake0582df92000-07-12 04:49:00 +000091 const char *name;
92 xmlhandlersetter setter;
93 xmlhandler handler;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +000094 PyCodeObject *tb_code;
Fred Drake71b63ff2002-06-28 22:29:01 +000095 PyObject *nameobj;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000096};
97
Jeremy Hylton938ace62002-07-17 16:30:39 +000098static struct HandlerInfo handler_info[64];
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000099
Fred Drakebd6101c2001-02-14 18:29:45 +0000100/* Set an integer attribute on the error object; return true on success,
101 * false on an exception.
102 */
103static int
104set_error_attr(PyObject *err, char *name, int value)
105{
106 PyObject *v = PyInt_FromLong(value);
Fred Drake85d835f2001-02-08 15:39:08 +0000107
Fred Drakebd6101c2001-02-14 18:29:45 +0000108 if (v != NULL && PyObject_SetAttrString(err, name, v) == -1) {
109 Py_DECREF(v);
110 return 0;
111 }
112 return 1;
113}
114
115/* Build and set an Expat exception, including positioning
116 * information. Always returns NULL.
117 */
Fred Drake85d835f2001-02-08 15:39:08 +0000118static PyObject *
Martin v. Löwis069dde22003-01-21 10:58:18 +0000119set_error(xmlparseobject *self, enum XML_Error code)
Fred Drake85d835f2001-02-08 15:39:08 +0000120{
121 PyObject *err;
122 char buffer[256];
123 XML_Parser parser = self->itself;
Fred Drakebd6101c2001-02-14 18:29:45 +0000124 int lineno = XML_GetErrorLineNumber(parser);
125 int column = XML_GetErrorColumnNumber(parser);
Fred Drake85d835f2001-02-08 15:39:08 +0000126
Martin v. Löwis6b2cf0e2002-06-30 06:03:35 +0000127 /* There is no risk of overflowing this buffer, since
128 even for 64-bit integers, there is sufficient space. */
129 sprintf(buffer, "%.200s: line %i, column %i",
Fred Drakebd6101c2001-02-14 18:29:45 +0000130 XML_ErrorString(code), lineno, column);
Fred Drake85d835f2001-02-08 15:39:08 +0000131 err = PyObject_CallFunction(ErrorObject, "s", buffer);
Fred Drakebd6101c2001-02-14 18:29:45 +0000132 if ( err != NULL
133 && set_error_attr(err, "code", code)
134 && set_error_attr(err, "offset", column)
135 && set_error_attr(err, "lineno", lineno)) {
136 PyErr_SetObject(ErrorObject, err);
Fred Drake85d835f2001-02-08 15:39:08 +0000137 }
138 return NULL;
139}
140
Fred Drake71b63ff2002-06-28 22:29:01 +0000141static int
142have_handler(xmlparseobject *self, int type)
143{
144 PyObject *handler = self->handlers[type];
145 return handler != NULL;
146}
147
148static PyObject *
149get_handler_name(struct HandlerInfo *hinfo)
150{
151 PyObject *name = hinfo->nameobj;
152 if (name == NULL) {
153 name = PyString_FromString(hinfo->name);
154 hinfo->nameobj = name;
155 }
156 Py_XINCREF(name);
157 return name;
158}
159
Fred Drake85d835f2001-02-08 15:39:08 +0000160
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000161#ifdef Py_USING_UNICODE
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000162/* Convert a string of XML_Chars into a Unicode string.
163 Returns None if str is a null pointer. */
164
Fred Drake0582df92000-07-12 04:49:00 +0000165static PyObject *
Fred Drakeb91a36b2002-06-27 19:40:48 +0000166conv_string_to_unicode(const XML_Char *str)
Fred Drake0582df92000-07-12 04:49:00 +0000167{
Fred Drake71b63ff2002-06-28 22:29:01 +0000168 /* XXX currently this code assumes that XML_Char is 8-bit,
Fred Drake0582df92000-07-12 04:49:00 +0000169 and hence in UTF-8. */
170 /* UTF-8 from Expat, Unicode desired */
171 if (str == NULL) {
172 Py_INCREF(Py_None);
173 return Py_None;
174 }
Fred Drake71b63ff2002-06-28 22:29:01 +0000175 return PyUnicode_DecodeUTF8(str, strlen(str), "strict");
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000176}
177
Fred Drake0582df92000-07-12 04:49:00 +0000178static PyObject *
179conv_string_len_to_unicode(const XML_Char *str, int len)
180{
Fred Drake71b63ff2002-06-28 22:29:01 +0000181 /* XXX currently this code assumes that XML_Char is 8-bit,
Fred Drake0582df92000-07-12 04:49:00 +0000182 and hence in UTF-8. */
183 /* UTF-8 from Expat, Unicode desired */
184 if (str == NULL) {
185 Py_INCREF(Py_None);
186 return Py_None;
187 }
Fred Drake6f987622000-08-25 18:03:30 +0000188 return PyUnicode_DecodeUTF8((const char *)str, len, "strict");
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000189}
190#endif
191
192/* Convert a string of XML_Chars into an 8-bit Python string.
193 Returns None if str is a null pointer. */
194
Fred Drake6f987622000-08-25 18:03:30 +0000195static PyObject *
Fred Drakeb91a36b2002-06-27 19:40:48 +0000196conv_string_to_utf8(const XML_Char *str)
Fred Drake6f987622000-08-25 18:03:30 +0000197{
Fred Drake71b63ff2002-06-28 22:29:01 +0000198 /* XXX currently this code assumes that XML_Char is 8-bit,
Fred Drake6f987622000-08-25 18:03:30 +0000199 and hence in UTF-8. */
200 /* UTF-8 from Expat, UTF-8 desired */
201 if (str == NULL) {
202 Py_INCREF(Py_None);
203 return Py_None;
204 }
Fred Drakeb91a36b2002-06-27 19:40:48 +0000205 return PyString_FromString(str);
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000206}
207
Fred Drake6f987622000-08-25 18:03:30 +0000208static PyObject *
Fred Drake71b63ff2002-06-28 22:29:01 +0000209conv_string_len_to_utf8(const XML_Char *str, int len)
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000210{
Fred Drake71b63ff2002-06-28 22:29:01 +0000211 /* XXX currently this code assumes that XML_Char is 8-bit,
Fred Drake6f987622000-08-25 18:03:30 +0000212 and hence in UTF-8. */
213 /* UTF-8 from Expat, UTF-8 desired */
214 if (str == NULL) {
215 Py_INCREF(Py_None);
216 return Py_None;
217 }
218 return PyString_FromStringAndSize((const char *)str, len);
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000219}
220
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000221/* Callback routines */
222
Martin v. Löwis5b68ce32001-10-21 08:53:52 +0000223static void clear_handlers(xmlparseobject *self, int initial);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000224
Martin v. Löwis069dde22003-01-21 10:58:18 +0000225/* This handler is used when an error has been detected, in the hope
226 that actual parsing can be terminated early. This will only help
227 if an external entity reference is encountered. */
228static int
229error_external_entity_ref_handler(XML_Parser parser,
230 const XML_Char *context,
231 const XML_Char *base,
232 const XML_Char *systemId,
233 const XML_Char *publicId)
234{
235 return 0;
236}
237
Fred Drake6f987622000-08-25 18:03:30 +0000238static void
239flag_error(xmlparseobject *self)
240{
Martin v. Löwis5b68ce32001-10-21 08:53:52 +0000241 clear_handlers(self, 0);
Martin v. Löwis069dde22003-01-21 10:58:18 +0000242 XML_SetExternalEntityRefHandler(self->itself,
243 error_external_entity_ref_handler);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000244}
245
246static PyCodeObject*
247getcode(enum HandlerTypes slot, char* func_name, int lineno)
248{
Fred Drakebd6101c2001-02-14 18:29:45 +0000249 PyObject *code = NULL;
250 PyObject *name = NULL;
251 PyObject *nulltuple = NULL;
252 PyObject *filename = NULL;
253
254 if (handler_info[slot].tb_code == NULL) {
255 code = PyString_FromString("");
256 if (code == NULL)
257 goto failed;
258 name = PyString_FromString(func_name);
259 if (name == NULL)
260 goto failed;
261 nulltuple = PyTuple_New(0);
262 if (nulltuple == NULL)
263 goto failed;
264 filename = PyString_FromString(__FILE__);
265 handler_info[slot].tb_code =
266 PyCode_New(0, /* argcount */
267 0, /* nlocals */
268 0, /* stacksize */
269 0, /* flags */
270 code, /* code */
271 nulltuple, /* consts */
272 nulltuple, /* names */
273 nulltuple, /* varnames */
Martin v. Löwis76192ee2001-02-06 09:34:40 +0000274#if PYTHON_API_VERSION >= 1010
Fred Drakebd6101c2001-02-14 18:29:45 +0000275 nulltuple, /* freevars */
276 nulltuple, /* cellvars */
Martin v. Löwis76192ee2001-02-06 09:34:40 +0000277#endif
Fred Drakebd6101c2001-02-14 18:29:45 +0000278 filename, /* filename */
279 name, /* name */
280 lineno, /* firstlineno */
281 code /* lnotab */
282 );
283 if (handler_info[slot].tb_code == NULL)
284 goto failed;
285 Py_DECREF(code);
286 Py_DECREF(nulltuple);
287 Py_DECREF(filename);
288 Py_DECREF(name);
289 }
290 return handler_info[slot].tb_code;
291 failed:
292 Py_XDECREF(code);
293 Py_XDECREF(name);
294 return NULL;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000295}
296
Jeremy Hylton9263f572003-06-27 16:13:17 +0000297#ifdef FIX_TRACE
Martin v. Löwis7d6e19d2002-08-04 08:24:49 +0000298static int
299trace_frame(PyThreadState *tstate, PyFrameObject *f, int code, PyObject *val)
300{
301 int result = 0;
302 if (!tstate->use_tracing || tstate->tracing)
303 return 0;
304 if (tstate->c_profilefunc != NULL) {
305 tstate->tracing++;
306 result = tstate->c_profilefunc(tstate->c_profileobj,
307 f, code , val);
308 tstate->use_tracing = ((tstate->c_tracefunc != NULL)
309 || (tstate->c_profilefunc != NULL));
310 tstate->tracing--;
311 if (result)
312 return result;
313 }
314 if (tstate->c_tracefunc != NULL) {
315 tstate->tracing++;
316 result = tstate->c_tracefunc(tstate->c_traceobj,
317 f, code , val);
318 tstate->use_tracing = ((tstate->c_tracefunc != NULL)
319 || (tstate->c_profilefunc != NULL));
320 tstate->tracing--;
321 }
322 return result;
323}
Jeremy Hylton9263f572003-06-27 16:13:17 +0000324
325static int
326trace_frame_exc(PyThreadState *tstate, PyFrameObject *f)
327{
328 PyObject *type, *value, *traceback, *arg;
329 int err;
330
331 if (tstate->c_tracefunc == NULL)
332 return 0;
333
334 PyErr_Fetch(&type, &value, &traceback);
335 if (value == NULL) {
336 value = Py_None;
337 Py_INCREF(value);
338 }
339 arg = Py_BuildValue("(OOO)", type, value, traceback);
340 if (arg == NULL) {
341 PyErr_Restore(type, value, traceback);
342 return 0;
343 }
344 err = trace_frame(tstate, f, PyTrace_EXCEPTION, arg);
345 Py_DECREF(arg);
346 if (err == 0)
347 PyErr_Restore(type, value, traceback);
348 else {
349 Py_XDECREF(type);
350 Py_XDECREF(value);
351 Py_XDECREF(traceback);
352 }
353 return err;
354}
Martin v. Löwis069dde22003-01-21 10:58:18 +0000355#endif
Martin v. Löwis7d6e19d2002-08-04 08:24:49 +0000356
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000357static PyObject*
358call_with_frame(PyCodeObject *c, PyObject* func, PyObject* args)
359{
Fred Drakebd6101c2001-02-14 18:29:45 +0000360 PyThreadState *tstate = PyThreadState_GET();
361 PyFrameObject *f;
362 PyObject *res;
363
364 if (c == NULL)
365 return NULL;
Martin v. Löwis7d6e19d2002-08-04 08:24:49 +0000366
Jeremy Hylton9263f572003-06-27 16:13:17 +0000367 f = PyFrame_New(tstate, c, PyEval_GetGlobals(), NULL);
Fred Drakebd6101c2001-02-14 18:29:45 +0000368 if (f == NULL)
369 return NULL;
370 tstate->frame = f;
Jeremy Hylton9263f572003-06-27 16:13:17 +0000371#ifdef FIX_TRACE
372 if (trace_frame(tstate, f, PyTrace_CALL, Py_None) < 0) {
Martin v. Löwis7d6e19d2002-08-04 08:24:49 +0000373 return NULL;
374 }
Martin v. Löwis069dde22003-01-21 10:58:18 +0000375#endif
Fred Drakebd6101c2001-02-14 18:29:45 +0000376 res = PyEval_CallObject(func, args);
Jeremy Hylton9263f572003-06-27 16:13:17 +0000377 if (res == NULL) {
378 if (tstate->curexc_traceback == NULL)
379 PyTraceBack_Here(f);
380#ifdef FIX_TRACE
381 if (trace_frame_exc(tstate, f) < 0) {
382 return NULL;
383 }
384 }
Martin v. Löwis7d6e19d2002-08-04 08:24:49 +0000385 else {
Jeremy Hylton9263f572003-06-27 16:13:17 +0000386 if (trace_frame(tstate, f, PyTrace_RETURN, res) < 0) {
Martin v. Löwis7d6e19d2002-08-04 08:24:49 +0000387 Py_XDECREF(res);
388 res = NULL;
389 }
390 }
Jeremy Hylton9263f572003-06-27 16:13:17 +0000391#else
392 }
Martin v. Löwis069dde22003-01-21 10:58:18 +0000393#endif
Fred Drakebd6101c2001-02-14 18:29:45 +0000394 tstate->frame = f->f_back;
395 Py_DECREF(f);
396 return res;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000397}
398
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000399#ifndef Py_USING_UNICODE
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000400#define STRING_CONV_FUNC conv_string_to_utf8
401#else
Martin v. Löwis069dde22003-01-21 10:58:18 +0000402/* Python 2.0 and later versions, when built with Unicode support */
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000403#define STRING_CONV_FUNC (self->returns_unicode \
404 ? conv_string_to_unicode : conv_string_to_utf8)
405#endif
Guido van Rossum5961f5a2000-03-31 16:18:11 +0000406
Fred Drakeb91a36b2002-06-27 19:40:48 +0000407static PyObject*
408string_intern(xmlparseobject *self, const char* str)
409{
410 PyObject *result = STRING_CONV_FUNC(str);
411 PyObject *value;
412 if (!self->intern)
413 return result;
414 value = PyDict_GetItem(self->intern, result);
415 if (!value) {
416 if (PyDict_SetItem(self->intern, result, result) == 0)
417 return result;
418 else
419 return NULL;
420 }
421 Py_INCREF(value);
422 Py_DECREF(result);
423 return value;
424}
425
Fred Drake2a3d7db2002-06-28 22:56:48 +0000426/* Return 0 on success, -1 on exception.
427 * flag_error() will be called before return if needed.
428 */
429static int
430call_character_handler(xmlparseobject *self, const XML_Char *buffer, int len)
431{
432 PyObject *args;
433 PyObject *temp;
434
435 args = PyTuple_New(1);
436 if (args == NULL)
437 return -1;
438#ifdef Py_USING_UNICODE
439 temp = (self->returns_unicode
440 ? conv_string_len_to_unicode(buffer, len)
441 : conv_string_len_to_utf8(buffer, len));
442#else
443 temp = conv_string_len_to_utf8(buffer, len);
444#endif
445 if (temp == NULL) {
446 Py_DECREF(args);
447 flag_error(self);
448 return -1;
449 }
450 PyTuple_SET_ITEM(args, 0, temp);
451 /* temp is now a borrowed reference; consider it unused. */
452 self->in_callback = 1;
453 temp = call_with_frame(getcode(CharacterData, "CharacterData", __LINE__),
454 self->handlers[CharacterData], args);
455 /* temp is an owned reference again, or NULL */
456 self->in_callback = 0;
457 Py_DECREF(args);
458 if (temp == NULL) {
459 flag_error(self);
460 return -1;
461 }
462 Py_DECREF(temp);
463 return 0;
464}
465
466static int
467flush_character_buffer(xmlparseobject *self)
468{
469 int rc;
470 if (self->buffer == NULL || self->buffer_used == 0)
471 return 0;
472 rc = call_character_handler(self, self->buffer, self->buffer_used);
473 self->buffer_used = 0;
474 return rc;
475}
476
477static void
478my_CharacterDataHandler(void *userData, const XML_Char *data, int len)
479{
480 xmlparseobject *self = (xmlparseobject *) userData;
481 if (self->buffer == NULL)
482 call_character_handler(self, data, len);
483 else {
484 if ((self->buffer_used + len) > self->buffer_size) {
485 if (flush_character_buffer(self) < 0)
486 return;
487 /* handler might have changed; drop the rest on the floor
488 * if there isn't a handler anymore
489 */
490 if (!have_handler(self, CharacterData))
491 return;
492 }
493 if (len > self->buffer_size) {
494 call_character_handler(self, data, len);
495 self->buffer_used = 0;
496 }
497 else {
498 memcpy(self->buffer + self->buffer_used,
499 data, len * sizeof(XML_Char));
500 self->buffer_used += len;
501 }
502 }
503}
504
Fred Drake85d835f2001-02-08 15:39:08 +0000505static void
506my_StartElementHandler(void *userData,
Fred Drake71b63ff2002-06-28 22:29:01 +0000507 const XML_Char *name, const XML_Char *atts[])
Fred Drake85d835f2001-02-08 15:39:08 +0000508{
509 xmlparseobject *self = (xmlparseobject *)userData;
510
Fred Drake71b63ff2002-06-28 22:29:01 +0000511 if (have_handler(self, StartElement)) {
Fred Drake85d835f2001-02-08 15:39:08 +0000512 PyObject *container, *rv, *args;
513 int i, max;
514
Fred Drake2a3d7db2002-06-28 22:56:48 +0000515 if (flush_character_buffer(self) < 0)
516 return;
Fred Drake85d835f2001-02-08 15:39:08 +0000517 /* Set max to the number of slots filled in atts[]; max/2 is
518 * the number of attributes we need to process.
519 */
520 if (self->specified_attributes) {
521 max = XML_GetSpecifiedAttributeCount(self->itself);
522 }
523 else {
524 max = 0;
525 while (atts[max] != NULL)
526 max += 2;
527 }
528 /* Build the container. */
529 if (self->ordered_attributes)
530 container = PyList_New(max);
531 else
532 container = PyDict_New();
533 if (container == NULL) {
534 flag_error(self);
535 return;
536 }
537 for (i = 0; i < max; i += 2) {
Fred Drakeb91a36b2002-06-27 19:40:48 +0000538 PyObject *n = string_intern(self, (XML_Char *) atts[i]);
Fred Drake85d835f2001-02-08 15:39:08 +0000539 PyObject *v;
540 if (n == NULL) {
541 flag_error(self);
542 Py_DECREF(container);
543 return;
544 }
545 v = STRING_CONV_FUNC((XML_Char *) atts[i+1]);
546 if (v == NULL) {
547 flag_error(self);
548 Py_DECREF(container);
549 Py_DECREF(n);
550 return;
551 }
552 if (self->ordered_attributes) {
553 PyList_SET_ITEM(container, i, n);
554 PyList_SET_ITEM(container, i+1, v);
555 }
556 else if (PyDict_SetItem(container, n, v)) {
557 flag_error(self);
558 Py_DECREF(n);
559 Py_DECREF(v);
560 return;
561 }
562 else {
563 Py_DECREF(n);
564 Py_DECREF(v);
565 }
566 }
Fred Drakeb91a36b2002-06-27 19:40:48 +0000567 args = Py_BuildValue("(NN)", string_intern(self, name), container);
Fred Drake85d835f2001-02-08 15:39:08 +0000568 if (args == NULL) {
569 Py_DECREF(container);
570 return;
571 }
572 /* Container is now a borrowed reference; ignore it. */
Fred Drakebd6101c2001-02-14 18:29:45 +0000573 self->in_callback = 1;
574 rv = call_with_frame(getcode(StartElement, "StartElement", __LINE__),
Fred Drake85d835f2001-02-08 15:39:08 +0000575 self->handlers[StartElement], args);
Fred Drakebd6101c2001-02-14 18:29:45 +0000576 self->in_callback = 0;
577 Py_DECREF(args);
Fred Drake85d835f2001-02-08 15:39:08 +0000578 if (rv == NULL) {
579 flag_error(self);
580 return;
Fred Drakebd6101c2001-02-14 18:29:45 +0000581 }
Fred Drake85d835f2001-02-08 15:39:08 +0000582 Py_DECREF(rv);
583 }
584}
585
586#define RC_HANDLER(RC, NAME, PARAMS, INIT, PARAM_FORMAT, CONVERSION, \
587 RETURN, GETUSERDATA) \
588static RC \
589my_##NAME##Handler PARAMS {\
590 xmlparseobject *self = GETUSERDATA ; \
591 PyObject *args = NULL; \
592 PyObject *rv = NULL; \
593 INIT \
594\
Fred Drake71b63ff2002-06-28 22:29:01 +0000595 if (have_handler(self, NAME)) { \
Fred Drake2a3d7db2002-06-28 22:56:48 +0000596 if (flush_character_buffer(self) < 0) \
597 return RETURN; \
Fred Drake85d835f2001-02-08 15:39:08 +0000598 args = Py_BuildValue PARAM_FORMAT ;\
Martin v. Löwis1d7c55f2001-11-10 13:57:55 +0000599 if (!args) { flag_error(self); return RETURN;} \
Fred Drakebd6101c2001-02-14 18:29:45 +0000600 self->in_callback = 1; \
Fred Drake85d835f2001-02-08 15:39:08 +0000601 rv = call_with_frame(getcode(NAME,#NAME,__LINE__), \
602 self->handlers[NAME], args); \
Fred Drakebd6101c2001-02-14 18:29:45 +0000603 self->in_callback = 0; \
Fred Drake85d835f2001-02-08 15:39:08 +0000604 Py_DECREF(args); \
605 if (rv == NULL) { \
606 flag_error(self); \
607 return RETURN; \
608 } \
609 CONVERSION \
610 Py_DECREF(rv); \
611 } \
612 return RETURN; \
613}
614
Fred Drake6f987622000-08-25 18:03:30 +0000615#define VOID_HANDLER(NAME, PARAMS, PARAM_FORMAT) \
616 RC_HANDLER(void, NAME, PARAMS, ;, PARAM_FORMAT, ;, ;,\
617 (xmlparseobject *)userData)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000618
Fred Drake6f987622000-08-25 18:03:30 +0000619#define INT_HANDLER(NAME, PARAMS, PARAM_FORMAT)\
620 RC_HANDLER(int, NAME, PARAMS, int rc=0;, PARAM_FORMAT, \
621 rc = PyInt_AsLong(rv);, rc, \
622 (xmlparseobject *)userData)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000623
Fred Drake71b63ff2002-06-28 22:29:01 +0000624VOID_HANDLER(EndElement,
625 (void *userData, const XML_Char *name),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000626 ("(N)", string_intern(self, name)))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000627
Fred Drake6f987622000-08-25 18:03:30 +0000628VOID_HANDLER(ProcessingInstruction,
Fred Drake71b63ff2002-06-28 22:29:01 +0000629 (void *userData,
630 const XML_Char *target,
Fred Drake85d835f2001-02-08 15:39:08 +0000631 const XML_Char *data),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000632 ("(NO&)", string_intern(self, target), STRING_CONV_FUNC,data))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000633
Fred Drake6f987622000-08-25 18:03:30 +0000634VOID_HANDLER(UnparsedEntityDecl,
Fred Drake71b63ff2002-06-28 22:29:01 +0000635 (void *userData,
Fred Drake85d835f2001-02-08 15:39:08 +0000636 const XML_Char *entityName,
637 const XML_Char *base,
638 const XML_Char *systemId,
639 const XML_Char *publicId,
640 const XML_Char *notationName),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000641 ("(NNNNN)",
Fred Drake71b63ff2002-06-28 22:29:01 +0000642 string_intern(self, entityName), string_intern(self, base),
643 string_intern(self, systemId), string_intern(self, publicId),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000644 string_intern(self, notationName)))
Fred Drake85d835f2001-02-08 15:39:08 +0000645
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000646#ifndef Py_USING_UNICODE
Fred Drake85d835f2001-02-08 15:39:08 +0000647VOID_HANDLER(EntityDecl,
648 (void *userData,
649 const XML_Char *entityName,
650 int is_parameter_entity,
651 const XML_Char *value,
652 int value_length,
653 const XML_Char *base,
654 const XML_Char *systemId,
655 const XML_Char *publicId,
656 const XML_Char *notationName),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000657 ("NiNNNNN",
658 string_intern(self, entityName), is_parameter_entity,
Fred Drake85d835f2001-02-08 15:39:08 +0000659 conv_string_len_to_utf8(value, value_length),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000660 string_intern(self, base), string_intern(self, systemId),
661 string_intern(self, publicId),
662 string_intern(self, notationName)))
Fred Drake85d835f2001-02-08 15:39:08 +0000663#else
664VOID_HANDLER(EntityDecl,
665 (void *userData,
666 const XML_Char *entityName,
667 int is_parameter_entity,
668 const XML_Char *value,
669 int value_length,
670 const XML_Char *base,
671 const XML_Char *systemId,
672 const XML_Char *publicId,
673 const XML_Char *notationName),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000674 ("NiNNNNN",
675 string_intern(self, entityName), is_parameter_entity,
Fred Drake71b63ff2002-06-28 22:29:01 +0000676 (self->returns_unicode
677 ? conv_string_len_to_unicode(value, value_length)
Fred Drake85d835f2001-02-08 15:39:08 +0000678 : conv_string_len_to_utf8(value, value_length)),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000679 string_intern(self, base), string_intern(self, systemId),
680 string_intern(self, publicId),
681 string_intern(self, notationName)))
Fred Drake85d835f2001-02-08 15:39:08 +0000682#endif
683
684VOID_HANDLER(XmlDecl,
685 (void *userData,
686 const XML_Char *version,
687 const XML_Char *encoding,
688 int standalone),
689 ("(O&O&i)",
Fred Drake71b63ff2002-06-28 22:29:01 +0000690 STRING_CONV_FUNC,version, STRING_CONV_FUNC,encoding,
Fred Drake85d835f2001-02-08 15:39:08 +0000691 standalone))
692
693static PyObject *
694conv_content_model(XML_Content * const model,
Fred Drakeb91a36b2002-06-27 19:40:48 +0000695 PyObject *(*conv_string)(const XML_Char *))
Fred Drake85d835f2001-02-08 15:39:08 +0000696{
697 PyObject *result = NULL;
698 PyObject *children = PyTuple_New(model->numchildren);
699 int i;
700
701 if (children != NULL) {
Tim Peters9544fc52001-07-28 09:36:36 +0000702 assert(model->numchildren < INT_MAX);
703 for (i = 0; i < (int)model->numchildren; ++i) {
Fred Drake85d835f2001-02-08 15:39:08 +0000704 PyObject *child = conv_content_model(&model->children[i],
705 conv_string);
706 if (child == NULL) {
707 Py_XDECREF(children);
708 return NULL;
709 }
710 PyTuple_SET_ITEM(children, i, child);
711 }
712 result = Py_BuildValue("(iiO&N)",
713 model->type, model->quant,
714 conv_string,model->name, children);
715 }
716 return result;
717}
718
Fred Drake06dd8cf2003-02-02 03:54:17 +0000719static void
720my_ElementDeclHandler(void *userData,
721 const XML_Char *name,
722 XML_Content *model)
Fred Drake85d835f2001-02-08 15:39:08 +0000723{
Fred Drake06dd8cf2003-02-02 03:54:17 +0000724 xmlparseobject *self = (xmlparseobject *)userData;
725 PyObject *args = NULL;
Fred Drake85d835f2001-02-08 15:39:08 +0000726
Fred Drake06dd8cf2003-02-02 03:54:17 +0000727 if (have_handler(self, ElementDecl)) {
728 PyObject *rv = NULL;
729 PyObject *modelobj, *nameobj;
730
731 if (flush_character_buffer(self) < 0)
732 goto finally;
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000733#ifdef Py_USING_UNICODE
Fred Drake06dd8cf2003-02-02 03:54:17 +0000734 modelobj = conv_content_model(model,
735 (self->returns_unicode
736 ? conv_string_to_unicode
737 : conv_string_to_utf8));
Fred Drake85d835f2001-02-08 15:39:08 +0000738#else
Fred Drake06dd8cf2003-02-02 03:54:17 +0000739 modelobj = conv_content_model(model, conv_string_to_utf8);
Fred Drake85d835f2001-02-08 15:39:08 +0000740#endif
Fred Drake06dd8cf2003-02-02 03:54:17 +0000741 if (modelobj == NULL) {
742 flag_error(self);
743 goto finally;
744 }
745 nameobj = string_intern(self, name);
746 if (nameobj == NULL) {
747 Py_DECREF(modelobj);
748 flag_error(self);
749 goto finally;
750 }
751 args = Py_BuildValue("NN", string_intern(self, name), modelobj);
752 if (args == NULL) {
753 Py_DECREF(modelobj);
754 flag_error(self);
755 goto finally;
756 }
757 self->in_callback = 1;
758 rv = call_with_frame(getcode(ElementDecl, "ElementDecl", __LINE__),
759 self->handlers[ElementDecl], args);
760 self->in_callback = 0;
761 if (rv == NULL) {
762 flag_error(self);
763 goto finally;
764 }
765 Py_DECREF(rv);
766 }
767 finally:
768 Py_XDECREF(args);
769 XML_FreeContentModel(self->itself, model);
770 return;
771}
Fred Drake85d835f2001-02-08 15:39:08 +0000772
773VOID_HANDLER(AttlistDecl,
774 (void *userData,
775 const XML_Char *elname,
776 const XML_Char *attname,
777 const XML_Char *att_type,
778 const XML_Char *dflt,
779 int isrequired),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000780 ("(NNO&O&i)",
781 string_intern(self, elname), string_intern(self, attname),
Fred Drake85d835f2001-02-08 15:39:08 +0000782 STRING_CONV_FUNC,att_type, STRING_CONV_FUNC,dflt,
783 isrequired))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000784
Martin v. Löwisc847f402003-01-21 11:09:21 +0000785#if XML_COMBINED_VERSION >= 19504
Martin v. Löwis069dde22003-01-21 10:58:18 +0000786VOID_HANDLER(SkippedEntity,
787 (void *userData,
788 const XML_Char *entityName,
789 int is_parameter_entity),
790 ("Ni",
791 string_intern(self, entityName), is_parameter_entity))
Martin v. Löwisc847f402003-01-21 11:09:21 +0000792#endif
Martin v. Löwis069dde22003-01-21 10:58:18 +0000793
Fred Drake71b63ff2002-06-28 22:29:01 +0000794VOID_HANDLER(NotationDecl,
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000795 (void *userData,
796 const XML_Char *notationName,
797 const XML_Char *base,
798 const XML_Char *systemId,
799 const XML_Char *publicId),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000800 ("(NNNN)",
Fred Drake71b63ff2002-06-28 22:29:01 +0000801 string_intern(self, notationName), string_intern(self, base),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000802 string_intern(self, systemId), string_intern(self, publicId)))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000803
Fred Drake6f987622000-08-25 18:03:30 +0000804VOID_HANDLER(StartNamespaceDecl,
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000805 (void *userData,
806 const XML_Char *prefix,
807 const XML_Char *uri),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000808 ("(NN)",
809 string_intern(self, prefix), string_intern(self, uri)))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000810
Fred Drake6f987622000-08-25 18:03:30 +0000811VOID_HANDLER(EndNamespaceDecl,
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000812 (void *userData,
813 const XML_Char *prefix),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000814 ("(N)", string_intern(self, prefix)))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000815
Fred Drake6f987622000-08-25 18:03:30 +0000816VOID_HANDLER(Comment,
Fred Drakeb91a36b2002-06-27 19:40:48 +0000817 (void *userData, const XML_Char *data),
818 ("(O&)", STRING_CONV_FUNC,data))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000819
Fred Drake6f987622000-08-25 18:03:30 +0000820VOID_HANDLER(StartCdataSection,
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000821 (void *userData),
Fred Drake6f987622000-08-25 18:03:30 +0000822 ("()"))
Fred Drake71b63ff2002-06-28 22:29:01 +0000823
Fred Drake6f987622000-08-25 18:03:30 +0000824VOID_HANDLER(EndCdataSection,
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000825 (void *userData),
Fred Drake6f987622000-08-25 18:03:30 +0000826 ("()"))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000827
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000828#ifndef Py_USING_UNICODE
Fred Drake6f987622000-08-25 18:03:30 +0000829VOID_HANDLER(Default,
Fred Drake71b63ff2002-06-28 22:29:01 +0000830 (void *userData, const XML_Char *s, int len),
Fred Drakeca1f4262000-09-21 20:10:23 +0000831 ("(N)", conv_string_len_to_utf8(s,len)))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000832
Fred Drake6f987622000-08-25 18:03:30 +0000833VOID_HANDLER(DefaultHandlerExpand,
Fred Drake71b63ff2002-06-28 22:29:01 +0000834 (void *userData, const XML_Char *s, int len),
Fred Drakeca1f4262000-09-21 20:10:23 +0000835 ("(N)", conv_string_len_to_utf8(s,len)))
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000836#else
Fred Drake6f987622000-08-25 18:03:30 +0000837VOID_HANDLER(Default,
Fred Drake71b63ff2002-06-28 22:29:01 +0000838 (void *userData, const XML_Char *s, int len),
839 ("(N)", (self->returns_unicode
840 ? conv_string_len_to_unicode(s,len)
Fred Drake6f987622000-08-25 18:03:30 +0000841 : conv_string_len_to_utf8(s,len))))
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000842
Fred Drake6f987622000-08-25 18:03:30 +0000843VOID_HANDLER(DefaultHandlerExpand,
Fred Drake71b63ff2002-06-28 22:29:01 +0000844 (void *userData, const XML_Char *s, int len),
845 ("(N)", (self->returns_unicode
846 ? conv_string_len_to_unicode(s,len)
Fred Drake6f987622000-08-25 18:03:30 +0000847 : conv_string_len_to_utf8(s,len))))
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000848#endif
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000849
Fred Drake71b63ff2002-06-28 22:29:01 +0000850INT_HANDLER(NotStandalone,
851 (void *userData),
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000852 ("()"))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000853
Fred Drake6f987622000-08-25 18:03:30 +0000854RC_HANDLER(int, ExternalEntityRef,
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000855 (XML_Parser parser,
856 const XML_Char *context,
857 const XML_Char *base,
858 const XML_Char *systemId,
859 const XML_Char *publicId),
860 int rc=0;,
Fred Drakeb91a36b2002-06-27 19:40:48 +0000861 ("(O&NNN)",
Fred Drake71b63ff2002-06-28 22:29:01 +0000862 STRING_CONV_FUNC,context, string_intern(self, base),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000863 string_intern(self, systemId), string_intern(self, publicId)),
Fred Drake6f987622000-08-25 18:03:30 +0000864 rc = PyInt_AsLong(rv);, rc,
865 XML_GetUserData(parser))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000866
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000867/* XXX UnknownEncodingHandler */
868
Fred Drake85d835f2001-02-08 15:39:08 +0000869VOID_HANDLER(StartDoctypeDecl,
870 (void *userData, const XML_Char *doctypeName,
871 const XML_Char *sysid, const XML_Char *pubid,
872 int has_internal_subset),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000873 ("(NNNi)", string_intern(self, doctypeName),
874 string_intern(self, sysid), string_intern(self, pubid),
Fred Drake85d835f2001-02-08 15:39:08 +0000875 has_internal_subset))
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000876
877VOID_HANDLER(EndDoctypeDecl, (void *userData), ("()"))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000878
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000879/* ---------------------------------------------------------------- */
880
Fred Drake71b63ff2002-06-28 22:29:01 +0000881static PyObject *
882get_parse_result(xmlparseobject *self, int rv)
883{
884 if (PyErr_Occurred()) {
885 return NULL;
886 }
887 if (rv == 0) {
Martin v. Löwis069dde22003-01-21 10:58:18 +0000888 return set_error(self, XML_GetErrorCode(self->itself));
Fred Drake71b63ff2002-06-28 22:29:01 +0000889 }
Fred Drake2a3d7db2002-06-28 22:56:48 +0000890 if (flush_character_buffer(self) < 0) {
891 return NULL;
892 }
Fred Drake71b63ff2002-06-28 22:29:01 +0000893 return PyInt_FromLong(rv);
894}
895
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000896PyDoc_STRVAR(xmlparse_Parse__doc__,
Thomas Wouters35317302000-07-22 16:34:15 +0000897"Parse(data[, isfinal])\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000898Parse XML data. `isfinal' should be true at end of input.");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000899
900static PyObject *
Fred Drake0582df92000-07-12 04:49:00 +0000901xmlparse_Parse(xmlparseobject *self, PyObject *args)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000902{
Fred Drake0582df92000-07-12 04:49:00 +0000903 char *s;
904 int slen;
905 int isFinal = 0;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000906
Fred Drake0582df92000-07-12 04:49:00 +0000907 if (!PyArg_ParseTuple(args, "s#|i:Parse", &s, &slen, &isFinal))
908 return NULL;
Fred Drake71b63ff2002-06-28 22:29:01 +0000909
910 return get_parse_result(self, XML_Parse(self->itself, s, slen, isFinal));
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000911}
912
Fred Drakeca1f4262000-09-21 20:10:23 +0000913/* File reading copied from cPickle */
914
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000915#define BUF_SIZE 2048
916
Fred Drake0582df92000-07-12 04:49:00 +0000917static int
918readinst(char *buf, int buf_size, PyObject *meth)
919{
920 PyObject *arg = NULL;
921 PyObject *bytes = NULL;
922 PyObject *str = NULL;
923 int len = -1;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000924
Fred Drake676940b2000-09-22 15:21:31 +0000925 if ((bytes = PyInt_FromLong(buf_size)) == NULL)
Fred Drake0582df92000-07-12 04:49:00 +0000926 goto finally;
Fred Drake676940b2000-09-22 15:21:31 +0000927
Fred Drakeca1f4262000-09-21 20:10:23 +0000928 if ((arg = PyTuple_New(1)) == NULL)
Fred Drake0582df92000-07-12 04:49:00 +0000929 goto finally;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000930
Tim Peters954eef72000-09-22 06:01:11 +0000931 PyTuple_SET_ITEM(arg, 0, bytes);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000932
Guido van Rossum84b2bed2002-08-16 17:01:09 +0000933 if ((str = PyObject_Call(meth, arg, NULL)) == NULL)
Fred Drake0582df92000-07-12 04:49:00 +0000934 goto finally;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000935
Fred Drake0582df92000-07-12 04:49:00 +0000936 /* XXX what to do if it returns a Unicode string? */
Fred Drakeca1f4262000-09-21 20:10:23 +0000937 if (!PyString_Check(str)) {
Fred Drake71b63ff2002-06-28 22:29:01 +0000938 PyErr_Format(PyExc_TypeError,
Fred Drake0582df92000-07-12 04:49:00 +0000939 "read() did not return a string object (type=%.400s)",
940 str->ob_type->tp_name);
941 goto finally;
942 }
943 len = PyString_GET_SIZE(str);
944 if (len > buf_size) {
945 PyErr_Format(PyExc_ValueError,
946 "read() returned too much data: "
947 "%i bytes requested, %i returned",
948 buf_size, len);
949 Py_DECREF(str);
950 goto finally;
951 }
952 memcpy(buf, PyString_AsString(str), len);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000953finally:
Fred Drake0582df92000-07-12 04:49:00 +0000954 Py_XDECREF(arg);
Fred Drakeca1f4262000-09-21 20:10:23 +0000955 Py_XDECREF(str);
Fred Drake0582df92000-07-12 04:49:00 +0000956 return len;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000957}
958
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000959PyDoc_STRVAR(xmlparse_ParseFile__doc__,
Thomas Wouters35317302000-07-22 16:34:15 +0000960"ParseFile(file)\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000961Parse XML data from file-like object.");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000962
963static PyObject *
Fred Drake0582df92000-07-12 04:49:00 +0000964xmlparse_ParseFile(xmlparseobject *self, PyObject *args)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000965{
Fred Drake0582df92000-07-12 04:49:00 +0000966 int rv = 1;
967 PyObject *f;
968 FILE *fp;
969 PyObject *readmethod = NULL;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000970
Fred Drake0582df92000-07-12 04:49:00 +0000971 if (!PyArg_ParseTuple(args, "O:ParseFile", &f))
972 return NULL;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000973
Fred Drake0582df92000-07-12 04:49:00 +0000974 if (PyFile_Check(f)) {
975 fp = PyFile_AsFile(f);
976 }
977 else{
978 fp = NULL;
Fred Drakeca1f4262000-09-21 20:10:23 +0000979 readmethod = PyObject_GetAttrString(f, "read");
980 if (readmethod == NULL) {
Fred Drake0582df92000-07-12 04:49:00 +0000981 PyErr_Clear();
Fred Drake71b63ff2002-06-28 22:29:01 +0000982 PyErr_SetString(PyExc_TypeError,
Fred Drake0582df92000-07-12 04:49:00 +0000983 "argument must have 'read' attribute");
Fred Drake814f9fe2002-07-19 22:03:03 +0000984 return NULL;
Fred Drake0582df92000-07-12 04:49:00 +0000985 }
986 }
987 for (;;) {
988 int bytes_read;
989 void *buf = XML_GetBuffer(self->itself, BUF_SIZE);
990 if (buf == NULL)
991 return PyErr_NoMemory();
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000992
Fred Drake0582df92000-07-12 04:49:00 +0000993 if (fp) {
994 bytes_read = fread(buf, sizeof(char), BUF_SIZE, fp);
995 if (bytes_read < 0) {
996 PyErr_SetFromErrno(PyExc_IOError);
997 return NULL;
998 }
999 }
1000 else {
1001 bytes_read = readinst(buf, BUF_SIZE, readmethod);
1002 if (bytes_read < 0)
1003 return NULL;
1004 }
1005 rv = XML_ParseBuffer(self->itself, bytes_read, bytes_read == 0);
1006 if (PyErr_Occurred())
1007 return NULL;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001008
Fred Drake0582df92000-07-12 04:49:00 +00001009 if (!rv || bytes_read == 0)
1010 break;
1011 }
Fred Drake71b63ff2002-06-28 22:29:01 +00001012 return get_parse_result(self, rv);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001013}
1014
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001015PyDoc_STRVAR(xmlparse_SetBase__doc__,
Thomas Wouters35317302000-07-22 16:34:15 +00001016"SetBase(base_url)\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001017Set the base URL for the parser.");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001018
1019static PyObject *
Fred Drake0582df92000-07-12 04:49:00 +00001020xmlparse_SetBase(xmlparseobject *self, PyObject *args)
1021{
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001022 char *base;
1023
Fred Drake0582df92000-07-12 04:49:00 +00001024 if (!PyArg_ParseTuple(args, "s:SetBase", &base))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001025 return NULL;
Fred Drake0582df92000-07-12 04:49:00 +00001026 if (!XML_SetBase(self->itself, base)) {
1027 return PyErr_NoMemory();
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001028 }
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001029 Py_INCREF(Py_None);
1030 return Py_None;
1031}
1032
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001033PyDoc_STRVAR(xmlparse_GetBase__doc__,
Thomas Wouters35317302000-07-22 16:34:15 +00001034"GetBase() -> url\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001035Return base URL string for the parser.");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001036
1037static PyObject *
Fred Drake0582df92000-07-12 04:49:00 +00001038xmlparse_GetBase(xmlparseobject *self, PyObject *args)
1039{
1040 if (!PyArg_ParseTuple(args, ":GetBase"))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001041 return NULL;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001042
Fred Drake0582df92000-07-12 04:49:00 +00001043 return Py_BuildValue("z", XML_GetBase(self->itself));
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001044}
1045
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001046PyDoc_STRVAR(xmlparse_GetInputContext__doc__,
Fred Drakebd6101c2001-02-14 18:29:45 +00001047"GetInputContext() -> string\n\
1048Return the untranslated text of the input that caused the current event.\n\
1049If the event was generated by a large amount of text (such as a start tag\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001050for an element with many attributes), not all of the text may be available.");
Fred Drakebd6101c2001-02-14 18:29:45 +00001051
1052static PyObject *
1053xmlparse_GetInputContext(xmlparseobject *self, PyObject *args)
1054{
1055 PyObject *result = NULL;
1056
1057 if (PyArg_ParseTuple(args, ":GetInputContext")) {
1058 if (self->in_callback) {
1059 int offset, size;
1060 const char *buffer
1061 = XML_GetInputContext(self->itself, &offset, &size);
1062
1063 if (buffer != NULL)
1064 result = PyString_FromStringAndSize(buffer + offset, size);
1065 else {
1066 result = Py_None;
1067 Py_INCREF(result);
1068 }
1069 }
1070 else {
1071 result = Py_None;
1072 Py_INCREF(result);
1073 }
1074 }
1075 return result;
1076}
Fred Drakebd6101c2001-02-14 18:29:45 +00001077
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001078PyDoc_STRVAR(xmlparse_ExternalEntityParserCreate__doc__,
Fred Drake2d4ac202001-01-03 15:36:25 +00001079"ExternalEntityParserCreate(context[, encoding])\n\
Tim Peters51dc9682000-09-24 22:12:45 +00001080Create a parser for parsing an external entity based on the\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001081information passed to the ExternalEntityRefHandler.");
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001082
1083static PyObject *
1084xmlparse_ExternalEntityParserCreate(xmlparseobject *self, PyObject *args)
1085{
1086 char *context;
1087 char *encoding = NULL;
1088 xmlparseobject *new_parser;
1089 int i;
1090
Martin v. Löwisc57428d2001-09-19 09:55:09 +00001091 if (!PyArg_ParseTuple(args, "z|s:ExternalEntityParserCreate",
Fred Drakecde79132001-04-25 16:01:30 +00001092 &context, &encoding)) {
1093 return NULL;
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001094 }
1095
Martin v. Löwis894258c2001-09-23 10:20:10 +00001096#ifndef Py_TPFLAGS_HAVE_GC
Martin v. Löwisb4fcf4d2002-06-30 06:40:55 +00001097 /* Python versions 2.0 and 2.1 */
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001098 new_parser = PyObject_New(xmlparseobject, &Xmlparsetype);
Martin v. Löwis894258c2001-09-23 10:20:10 +00001099#else
1100 /* Python versions 2.2 and later */
1101 new_parser = PyObject_GC_New(xmlparseobject, &Xmlparsetype);
1102#endif
Fred Drake85d835f2001-02-08 15:39:08 +00001103
1104 if (new_parser == NULL)
1105 return NULL;
Fred Drake2a3d7db2002-06-28 22:56:48 +00001106 new_parser->buffer_size = self->buffer_size;
1107 new_parser->buffer_used = 0;
1108 if (self->buffer != NULL) {
1109 new_parser->buffer = malloc(new_parser->buffer_size);
1110 if (new_parser->buffer == NULL) {
Fred Drakeb28467b2002-07-02 15:44:36 +00001111#ifndef Py_TPFLAGS_HAVE_GC
1112 /* Code for versions 2.0 and 2.1 */
1113 PyObject_Del(new_parser);
1114#else
1115 /* Code for versions 2.2 and later. */
Fred Drake2a3d7db2002-06-28 22:56:48 +00001116 PyObject_GC_Del(new_parser);
Fred Drakeb28467b2002-07-02 15:44:36 +00001117#endif
Fred Drake2a3d7db2002-06-28 22:56:48 +00001118 return PyErr_NoMemory();
1119 }
1120 }
1121 else
1122 new_parser->buffer = NULL;
Fred Drake85d835f2001-02-08 15:39:08 +00001123 new_parser->returns_unicode = self->returns_unicode;
1124 new_parser->ordered_attributes = self->ordered_attributes;
1125 new_parser->specified_attributes = self->specified_attributes;
Fred Drakebd6101c2001-02-14 18:29:45 +00001126 new_parser->in_callback = 0;
Martin v. Löwis069dde22003-01-21 10:58:18 +00001127 new_parser->ns_prefixes = self->ns_prefixes;
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001128 new_parser->itself = XML_ExternalEntityParserCreate(self->itself, context,
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001129 encoding);
1130 new_parser->handlers = 0;
Fred Drakeb91a36b2002-06-27 19:40:48 +00001131 new_parser->intern = self->intern;
1132 Py_XINCREF(new_parser->intern);
Martin v. Löwis894258c2001-09-23 10:20:10 +00001133#ifdef Py_TPFLAGS_HAVE_GC
1134 PyObject_GC_Track(new_parser);
1135#else
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001136 PyObject_GC_Init(new_parser);
Martin v. Löwis894258c2001-09-23 10:20:10 +00001137#endif
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001138
1139 if (!new_parser->itself) {
Fred Drake85d835f2001-02-08 15:39:08 +00001140 Py_DECREF(new_parser);
1141 return PyErr_NoMemory();
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001142 }
1143
1144 XML_SetUserData(new_parser->itself, (void *)new_parser);
1145
1146 /* allocate and clear handlers first */
Fred Drake2a3d7db2002-06-28 22:56:48 +00001147 for (i = 0; handler_info[i].name != NULL; i++)
Fred Drake85d835f2001-02-08 15:39:08 +00001148 /* do nothing */;
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001149
Fred Drake2a3d7db2002-06-28 22:56:48 +00001150 new_parser->handlers = malloc(sizeof(PyObject *) * i);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001151 if (!new_parser->handlers) {
Fred Drake85d835f2001-02-08 15:39:08 +00001152 Py_DECREF(new_parser);
1153 return PyErr_NoMemory();
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001154 }
Martin v. Löwis5b68ce32001-10-21 08:53:52 +00001155 clear_handlers(new_parser, 1);
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001156
1157 /* then copy handlers from self */
1158 for (i = 0; handler_info[i].name != NULL; i++) {
Fred Drake71b63ff2002-06-28 22:29:01 +00001159 PyObject *handler = self->handlers[i];
1160 if (handler != NULL) {
1161 Py_INCREF(handler);
1162 new_parser->handlers[i] = handler;
1163 handler_info[i].setter(new_parser->itself,
Fred Drake85d835f2001-02-08 15:39:08 +00001164 handler_info[i].handler);
1165 }
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001166 }
Fred Drake71b63ff2002-06-28 22:29:01 +00001167 return (PyObject *)new_parser;
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001168}
1169
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001170PyDoc_STRVAR(xmlparse_SetParamEntityParsing__doc__,
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001171"SetParamEntityParsing(flag) -> success\n\
1172Controls parsing of parameter entities (including the external DTD\n\
1173subset). Possible flag values are XML_PARAM_ENTITY_PARSING_NEVER,\n\
1174XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE and\n\
1175XML_PARAM_ENTITY_PARSING_ALWAYS. Returns true if setting the flag\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001176was successful.");
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001177
1178static PyObject*
Fred Drakebd6101c2001-02-14 18:29:45 +00001179xmlparse_SetParamEntityParsing(xmlparseobject *p, PyObject* args)
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001180{
Fred Drake85d835f2001-02-08 15:39:08 +00001181 int flag;
1182 if (!PyArg_ParseTuple(args, "i", &flag))
1183 return NULL;
Fred Drakebd6101c2001-02-14 18:29:45 +00001184 flag = XML_SetParamEntityParsing(p->itself, flag);
Fred Drake85d835f2001-02-08 15:39:08 +00001185 return PyInt_FromLong(flag);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001186}
1187
Martin v. Löwisc847f402003-01-21 11:09:21 +00001188
1189#if XML_COMBINED_VERSION >= 19505
Martin v. Löwis069dde22003-01-21 10:58:18 +00001190PyDoc_STRVAR(xmlparse_UseForeignDTD__doc__,
1191"UseForeignDTD([flag])\n\
1192Allows the application to provide an artificial external subset if one is\n\
1193not specified as part of the document instance. This readily allows the\n\
1194use of a 'default' document type controlled by the application, while still\n\
1195getting the advantage of providing document type information to the parser.\n\
1196'flag' defaults to True if not provided.");
1197
1198static PyObject *
1199xmlparse_UseForeignDTD(xmlparseobject *self, PyObject *args)
1200{
1201 PyObject *flagobj = NULL;
1202 XML_Bool flag = XML_TRUE;
1203 enum XML_Error rc;
1204 if (!PyArg_ParseTuple(args, "|O:UseForeignDTD", &flagobj))
1205 return NULL;
1206 if (flagobj != NULL)
1207 flag = PyObject_IsTrue(flagobj) ? XML_TRUE : XML_FALSE;
1208 rc = XML_UseForeignDTD(self->itself, flag);
1209 if (rc != XML_ERROR_NONE) {
1210 return set_error(self, rc);
1211 }
1212 Py_INCREF(Py_None);
1213 return Py_None;
1214}
Martin v. Löwisc847f402003-01-21 11:09:21 +00001215#endif
Martin v. Löwis069dde22003-01-21 10:58:18 +00001216
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001217static struct PyMethodDef xmlparse_methods[] = {
Fred Drake0582df92000-07-12 04:49:00 +00001218 {"Parse", (PyCFunction)xmlparse_Parse,
Fred Drakebd6101c2001-02-14 18:29:45 +00001219 METH_VARARGS, xmlparse_Parse__doc__},
Fred Drake0582df92000-07-12 04:49:00 +00001220 {"ParseFile", (PyCFunction)xmlparse_ParseFile,
Fred Drakebd6101c2001-02-14 18:29:45 +00001221 METH_VARARGS, xmlparse_ParseFile__doc__},
Fred Drake0582df92000-07-12 04:49:00 +00001222 {"SetBase", (PyCFunction)xmlparse_SetBase,
Martin v. Löwis069dde22003-01-21 10:58:18 +00001223 METH_VARARGS, xmlparse_SetBase__doc__},
Fred Drake0582df92000-07-12 04:49:00 +00001224 {"GetBase", (PyCFunction)xmlparse_GetBase,
Martin v. Löwis069dde22003-01-21 10:58:18 +00001225 METH_VARARGS, xmlparse_GetBase__doc__},
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001226 {"ExternalEntityParserCreate", (PyCFunction)xmlparse_ExternalEntityParserCreate,
Martin v. Löwis069dde22003-01-21 10:58:18 +00001227 METH_VARARGS, xmlparse_ExternalEntityParserCreate__doc__},
Fred Drakebd6101c2001-02-14 18:29:45 +00001228 {"SetParamEntityParsing", (PyCFunction)xmlparse_SetParamEntityParsing,
1229 METH_VARARGS, xmlparse_SetParamEntityParsing__doc__},
Fred Drakebd6101c2001-02-14 18:29:45 +00001230 {"GetInputContext", (PyCFunction)xmlparse_GetInputContext,
1231 METH_VARARGS, xmlparse_GetInputContext__doc__},
Martin v. Löwisc847f402003-01-21 11:09:21 +00001232#if XML_COMBINED_VERSION >= 19505
Martin v. Löwis069dde22003-01-21 10:58:18 +00001233 {"UseForeignDTD", (PyCFunction)xmlparse_UseForeignDTD,
1234 METH_VARARGS, xmlparse_UseForeignDTD__doc__},
Martin v. Löwisc847f402003-01-21 11:09:21 +00001235#endif
Martin v. Löwis069dde22003-01-21 10:58:18 +00001236 {NULL, NULL} /* sentinel */
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001237};
1238
1239/* ---------- */
1240
1241
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001242#ifdef Py_USING_UNICODE
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001243
Fred Drake71b63ff2002-06-28 22:29:01 +00001244/* pyexpat international encoding support.
1245 Make it as simple as possible.
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001246*/
1247
Martin v. Löwis3af7cc02001-01-22 08:19:10 +00001248static char template_buffer[257];
Fred Drakebb66a202001-03-01 20:48:17 +00001249PyObject *template_string = NULL;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001250
Fred Drake71b63ff2002-06-28 22:29:01 +00001251static void
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001252init_template_buffer(void)
1253{
1254 int i;
Fred Drakebb66a202001-03-01 20:48:17 +00001255 for (i = 0; i < 256; i++) {
1256 template_buffer[i] = i;
Tim Peters63cb99e2001-02-17 18:12:50 +00001257 }
Fred Drakebb66a202001-03-01 20:48:17 +00001258 template_buffer[256] = 0;
Tim Peters63cb99e2001-02-17 18:12:50 +00001259}
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001260
Fred Drake71b63ff2002-06-28 22:29:01 +00001261static int
1262PyUnknownEncodingHandler(void *encodingHandlerData,
1263 const XML_Char *name,
1264 XML_Encoding *info)
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001265{
Fred Drakebb66a202001-03-01 20:48:17 +00001266 PyUnicodeObject *_u_string = NULL;
1267 int result = 0;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001268 int i;
Fred Drake71b63ff2002-06-28 22:29:01 +00001269
Fred Drakebb66a202001-03-01 20:48:17 +00001270 /* Yes, supports only 8bit encodings */
1271 _u_string = (PyUnicodeObject *)
1272 PyUnicode_Decode(template_buffer, 256, name, "replace");
Fred Drake71b63ff2002-06-28 22:29:01 +00001273
Fred Drakebb66a202001-03-01 20:48:17 +00001274 if (_u_string == NULL)
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001275 return result;
Fred Drake71b63ff2002-06-28 22:29:01 +00001276
Fred Drakebb66a202001-03-01 20:48:17 +00001277 for (i = 0; i < 256; i++) {
1278 /* Stupid to access directly, but fast */
1279 Py_UNICODE c = _u_string->str[i];
1280 if (c == Py_UNICODE_REPLACEMENT_CHARACTER)
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001281 info->map[i] = -1;
Fred Drakebb66a202001-03-01 20:48:17 +00001282 else
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001283 info->map[i] = c;
Tim Peters63cb99e2001-02-17 18:12:50 +00001284 }
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001285 info->data = NULL;
1286 info->convert = NULL;
1287 info->release = NULL;
Fred Drake71b63ff2002-06-28 22:29:01 +00001288 result = 1;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001289 Py_DECREF(_u_string);
1290 return result;
1291}
1292
1293#endif
1294
1295static PyObject *
Fred Drakeb91a36b2002-06-27 19:40:48 +00001296newxmlparseobject(char *encoding, char *namespace_separator, PyObject *intern)
Fred Drake0582df92000-07-12 04:49:00 +00001297{
1298 int i;
1299 xmlparseobject *self;
Fred Drake71b63ff2002-06-28 22:29:01 +00001300
Martin v. Löwis894258c2001-09-23 10:20:10 +00001301#ifdef Py_TPFLAGS_HAVE_GC
1302 /* Code for versions 2.2 and later */
1303 self = PyObject_GC_New(xmlparseobject, &Xmlparsetype);
1304#else
Fred Drake0582df92000-07-12 04:49:00 +00001305 self = PyObject_New(xmlparseobject, &Xmlparsetype);
Martin v. Löwis894258c2001-09-23 10:20:10 +00001306#endif
Fred Drake0582df92000-07-12 04:49:00 +00001307 if (self == NULL)
1308 return NULL;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001309
Martin v. Löwisb4fcf4d2002-06-30 06:40:55 +00001310#ifdef Py_USING_UNICODE
Fred Drake0582df92000-07-12 04:49:00 +00001311 self->returns_unicode = 1;
Martin v. Löwisb4fcf4d2002-06-30 06:40:55 +00001312#else
1313 self->returns_unicode = 0;
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +00001314#endif
Martin v. Löwisb4fcf4d2002-06-30 06:40:55 +00001315
Fred Drake2a3d7db2002-06-28 22:56:48 +00001316 self->buffer = NULL;
1317 self->buffer_size = CHARACTER_DATA_BUFFER_SIZE;
1318 self->buffer_used = 0;
Fred Drake85d835f2001-02-08 15:39:08 +00001319 self->ordered_attributes = 0;
1320 self->specified_attributes = 0;
Fred Drakebd6101c2001-02-14 18:29:45 +00001321 self->in_callback = 0;
Martin v. Löwis069dde22003-01-21 10:58:18 +00001322 self->ns_prefixes = 0;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001323 self->handlers = NULL;
Fred Drakecde79132001-04-25 16:01:30 +00001324 if (namespace_separator != NULL) {
Fred Drake0582df92000-07-12 04:49:00 +00001325 self->itself = XML_ParserCreateNS(encoding, *namespace_separator);
1326 }
Fred Drake85d835f2001-02-08 15:39:08 +00001327 else {
Fred Drake0582df92000-07-12 04:49:00 +00001328 self->itself = XML_ParserCreate(encoding);
1329 }
Fred Drakeb91a36b2002-06-27 19:40:48 +00001330 self->intern = intern;
1331 Py_XINCREF(self->intern);
Martin v. Löwis894258c2001-09-23 10:20:10 +00001332#ifdef Py_TPFLAGS_HAVE_GC
1333 PyObject_GC_Track(self);
1334#else
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001335 PyObject_GC_Init(self);
Martin v. Löwis894258c2001-09-23 10:20:10 +00001336#endif
Fred Drake0582df92000-07-12 04:49:00 +00001337 if (self->itself == NULL) {
Fred Drake71b63ff2002-06-28 22:29:01 +00001338 PyErr_SetString(PyExc_RuntimeError,
Fred Drake0582df92000-07-12 04:49:00 +00001339 "XML_ParserCreate failed");
1340 Py_DECREF(self);
1341 return NULL;
1342 }
1343 XML_SetUserData(self->itself, (void *)self);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001344#ifdef Py_USING_UNICODE
Fred Drake7c75bf22002-07-01 14:02:31 +00001345 XML_SetUnknownEncodingHandler(self->itself,
1346 (XML_UnknownEncodingHandler) PyUnknownEncodingHandler, NULL);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001347#endif
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001348
Fred Drake2a3d7db2002-06-28 22:56:48 +00001349 for (i = 0; handler_info[i].name != NULL; i++)
Fred Drake0582df92000-07-12 04:49:00 +00001350 /* do nothing */;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001351
Fred Drake7c75bf22002-07-01 14:02:31 +00001352 self->handlers = malloc(sizeof(PyObject *) * i);
1353 if (!self->handlers) {
Fred Drake71b63ff2002-06-28 22:29:01 +00001354 Py_DECREF(self);
1355 return PyErr_NoMemory();
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001356 }
Martin v. Löwis5b68ce32001-10-21 08:53:52 +00001357 clear_handlers(self, 1);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001358
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001359 return (PyObject*)self;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001360}
1361
1362
1363static void
Fred Drake0582df92000-07-12 04:49:00 +00001364xmlparse_dealloc(xmlparseobject *self)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001365{
Fred Drake0582df92000-07-12 04:49:00 +00001366 int i;
Martin v. Löwis894258c2001-09-23 10:20:10 +00001367#ifdef Py_TPFLAGS_HAVE_GC
1368 PyObject_GC_UnTrack(self);
1369#else
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001370 PyObject_GC_Fini(self);
Martin v. Löwis894258c2001-09-23 10:20:10 +00001371#endif
Fred Drake85d835f2001-02-08 15:39:08 +00001372 if (self->itself != NULL)
Fred Drake0582df92000-07-12 04:49:00 +00001373 XML_ParserFree(self->itself);
1374 self->itself = NULL;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001375
Fred Drake85d835f2001-02-08 15:39:08 +00001376 if (self->handlers != NULL) {
Fred Drakecde79132001-04-25 16:01:30 +00001377 PyObject *temp;
Fred Drake85d835f2001-02-08 15:39:08 +00001378 for (i = 0; handler_info[i].name != NULL; i++) {
Fred Drakecde79132001-04-25 16:01:30 +00001379 temp = self->handlers[i];
1380 self->handlers[i] = NULL;
1381 Py_XDECREF(temp);
Fred Drake85d835f2001-02-08 15:39:08 +00001382 }
1383 free(self->handlers);
Fred Drake71b63ff2002-06-28 22:29:01 +00001384 self->handlers = NULL;
Fred Drake0582df92000-07-12 04:49:00 +00001385 }
Fred Drake2a3d7db2002-06-28 22:56:48 +00001386 if (self->buffer != NULL) {
1387 free(self->buffer);
1388 self->buffer = NULL;
1389 }
Fred Drakeb91a36b2002-06-27 19:40:48 +00001390 Py_XDECREF(self->intern);
Martin v. Löwis894258c2001-09-23 10:20:10 +00001391#ifndef Py_TPFLAGS_HAVE_GC
Martin v. Löwisb4fcf4d2002-06-30 06:40:55 +00001392 /* Code for versions 2.0 and 2.1 */
Fred Drake0582df92000-07-12 04:49:00 +00001393 PyObject_Del(self);
Martin v. Löwis894258c2001-09-23 10:20:10 +00001394#else
1395 /* Code for versions 2.2 and later. */
1396 PyObject_GC_Del(self);
1397#endif
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001398}
1399
Fred Drake0582df92000-07-12 04:49:00 +00001400static int
1401handlername2int(const char *name)
1402{
1403 int i;
Fred Drake71b63ff2002-06-28 22:29:01 +00001404 for (i = 0; handler_info[i].name != NULL; i++) {
Fred Drake0582df92000-07-12 04:49:00 +00001405 if (strcmp(name, handler_info[i].name) == 0) {
1406 return i;
1407 }
1408 }
1409 return -1;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001410}
1411
1412static PyObject *
Fred Drake71b63ff2002-06-28 22:29:01 +00001413get_pybool(int istrue)
1414{
1415 PyObject *result = istrue ? Py_True : Py_False;
1416 Py_INCREF(result);
1417 return result;
1418}
1419
1420static PyObject *
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001421xmlparse_getattr(xmlparseobject *self, char *name)
1422{
Fred Drake71b63ff2002-06-28 22:29:01 +00001423 int handlernum = handlername2int(name);
1424
1425 if (handlernum != -1) {
1426 PyObject *result = self->handlers[handlernum];
1427 if (result == NULL)
1428 result = Py_None;
1429 Py_INCREF(result);
1430 return result;
1431 }
1432 if (name[0] == 'E') {
1433 if (strcmp(name, "ErrorCode") == 0)
1434 return PyInt_FromLong((long)
1435 XML_GetErrorCode(self->itself));
1436 if (strcmp(name, "ErrorLineNumber") == 0)
1437 return PyInt_FromLong((long)
1438 XML_GetErrorLineNumber(self->itself));
1439 if (strcmp(name, "ErrorColumnNumber") == 0)
1440 return PyInt_FromLong((long)
1441 XML_GetErrorColumnNumber(self->itself));
1442 if (strcmp(name, "ErrorByteIndex") == 0)
1443 return PyInt_FromLong((long)
1444 XML_GetErrorByteIndex(self->itself));
1445 }
Fred Drake2a3d7db2002-06-28 22:56:48 +00001446 if (name[0] == 'b') {
1447 if (strcmp(name, "buffer_size") == 0)
1448 return PyInt_FromLong((long) self->buffer_size);
1449 if (strcmp(name, "buffer_text") == 0)
1450 return get_pybool(self->buffer != NULL);
1451 if (strcmp(name, "buffer_used") == 0)
1452 return PyInt_FromLong((long) self->buffer_used);
1453 }
Martin v. Löwis069dde22003-01-21 10:58:18 +00001454 if (strcmp(name, "namespace_prefixes") == 0)
1455 return get_pybool(self->ns_prefixes);
Fred Drake85d835f2001-02-08 15:39:08 +00001456 if (strcmp(name, "ordered_attributes") == 0)
Fred Drake71b63ff2002-06-28 22:29:01 +00001457 return get_pybool(self->ordered_attributes);
Fred Drake0582df92000-07-12 04:49:00 +00001458 if (strcmp(name, "returns_unicode") == 0)
Fred Drake71b63ff2002-06-28 22:29:01 +00001459 return get_pybool((long) self->returns_unicode);
Fred Drake85d835f2001-02-08 15:39:08 +00001460 if (strcmp(name, "specified_attributes") == 0)
Fred Drake71b63ff2002-06-28 22:29:01 +00001461 return get_pybool((long) self->specified_attributes);
Fred Drakeb91a36b2002-06-27 19:40:48 +00001462 if (strcmp(name, "intern") == 0) {
1463 if (self->intern == NULL) {
1464 Py_INCREF(Py_None);
1465 return Py_None;
1466 }
1467 else {
1468 Py_INCREF(self->intern);
1469 return self->intern;
1470 }
1471 }
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001472
Neal Norwitzfa56e2d2003-01-19 15:40:09 +00001473#define APPEND(list, str) \
Martin v. Löwis069dde22003-01-21 10:58:18 +00001474 do { \
1475 PyObject *o = PyString_FromString(str); \
1476 if (o != NULL) \
1477 PyList_Append(list, o); \
1478 Py_XDECREF(o); \
1479 } while (0)
Neal Norwitzfa56e2d2003-01-19 15:40:09 +00001480
Fred Drake0582df92000-07-12 04:49:00 +00001481 if (strcmp(name, "__members__") == 0) {
1482 int i;
1483 PyObject *rc = PyList_New(0);
Fred Drake71b63ff2002-06-28 22:29:01 +00001484 for (i = 0; handler_info[i].name != NULL; i++) {
Neal Norwitzfa56e2d2003-01-19 15:40:09 +00001485 PyObject *o = get_handler_name(&handler_info[i]);
1486 if (o != NULL)
1487 PyList_Append(rc, o);
1488 Py_XDECREF(o);
Fred Drake0582df92000-07-12 04:49:00 +00001489 }
Neal Norwitzfa56e2d2003-01-19 15:40:09 +00001490 APPEND(rc, "ErrorCode");
1491 APPEND(rc, "ErrorLineNumber");
1492 APPEND(rc, "ErrorColumnNumber");
1493 APPEND(rc, "ErrorByteIndex");
1494 APPEND(rc, "buffer_size");
1495 APPEND(rc, "buffer_text");
1496 APPEND(rc, "buffer_used");
Martin v. Löwis069dde22003-01-21 10:58:18 +00001497 APPEND(rc, "namespace_prefixes");
Neal Norwitzfa56e2d2003-01-19 15:40:09 +00001498 APPEND(rc, "ordered_attributes");
1499 APPEND(rc, "returns_unicode");
1500 APPEND(rc, "specified_attributes");
1501 APPEND(rc, "intern");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001502
Neal Norwitzfa56e2d2003-01-19 15:40:09 +00001503#undef APPEND
Fred Drake0582df92000-07-12 04:49:00 +00001504 return rc;
1505 }
1506 return Py_FindMethod(xmlparse_methods, (PyObject *)self, name);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001507}
1508
Fred Drake6f987622000-08-25 18:03:30 +00001509static int
1510sethandler(xmlparseobject *self, const char *name, PyObject* v)
Fred Drake0582df92000-07-12 04:49:00 +00001511{
1512 int handlernum = handlername2int(name);
Fred Drake71b63ff2002-06-28 22:29:01 +00001513 if (handlernum >= 0) {
1514 xmlhandler c_handler = NULL;
1515 PyObject *temp = self->handlers[handlernum];
1516
1517 if (v == Py_None)
1518 v = NULL;
1519 else if (v != NULL) {
1520 Py_INCREF(v);
1521 c_handler = handler_info[handlernum].handler;
1522 }
Fred Drake0582df92000-07-12 04:49:00 +00001523 self->handlers[handlernum] = v;
Fred Drake71b63ff2002-06-28 22:29:01 +00001524 Py_XDECREF(temp);
1525 handler_info[handlernum].setter(self->itself, c_handler);
Fred Drake0582df92000-07-12 04:49:00 +00001526 return 1;
1527 }
1528 return 0;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001529}
1530
1531static int
Fred Drake6f987622000-08-25 18:03:30 +00001532xmlparse_setattr(xmlparseobject *self, char *name, PyObject *v)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001533{
Fred Drake6f987622000-08-25 18:03:30 +00001534 /* Set attribute 'name' to value 'v'. v==NULL means delete */
Fred Drake85d835f2001-02-08 15:39:08 +00001535 if (v == NULL) {
Fred Drake6f987622000-08-25 18:03:30 +00001536 PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
1537 return -1;
1538 }
Fred Drake2a3d7db2002-06-28 22:56:48 +00001539 if (strcmp(name, "buffer_text") == 0) {
1540 if (PyObject_IsTrue(v)) {
1541 if (self->buffer == NULL) {
1542 self->buffer = malloc(self->buffer_size);
1543 if (self->buffer == NULL) {
1544 PyErr_NoMemory();
1545 return -1;
1546 }
1547 self->buffer_used = 0;
1548 }
1549 }
1550 else if (self->buffer != NULL) {
1551 if (flush_character_buffer(self) < 0)
1552 return -1;
1553 free(self->buffer);
1554 self->buffer = NULL;
1555 }
1556 return 0;
1557 }
Martin v. Löwis069dde22003-01-21 10:58:18 +00001558 if (strcmp(name, "namespace_prefixes") == 0) {
1559 if (PyObject_IsTrue(v))
1560 self->ns_prefixes = 1;
1561 else
1562 self->ns_prefixes = 0;
1563 XML_SetReturnNSTriplet(self->itself, self->ns_prefixes);
1564 return 0;
1565 }
Fred Drake85d835f2001-02-08 15:39:08 +00001566 if (strcmp(name, "ordered_attributes") == 0) {
1567 if (PyObject_IsTrue(v))
1568 self->ordered_attributes = 1;
1569 else
1570 self->ordered_attributes = 0;
1571 return 0;
1572 }
Fred Drake6f987622000-08-25 18:03:30 +00001573 if (strcmp(name, "returns_unicode") == 0) {
Fred Drake85d835f2001-02-08 15:39:08 +00001574 if (PyObject_IsTrue(v)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001575#ifndef Py_USING_UNICODE
Fred Drake71b63ff2002-06-28 22:29:01 +00001576 PyErr_SetString(PyExc_ValueError,
1577 "Unicode support not available");
Fred Drake6f987622000-08-25 18:03:30 +00001578 return -1;
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +00001579#else
Fred Drake6f987622000-08-25 18:03:30 +00001580 self->returns_unicode = 1;
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +00001581#endif
Fred Drake6f987622000-08-25 18:03:30 +00001582 }
1583 else
1584 self->returns_unicode = 0;
Fred Drake85d835f2001-02-08 15:39:08 +00001585 return 0;
1586 }
1587 if (strcmp(name, "specified_attributes") == 0) {
1588 if (PyObject_IsTrue(v))
1589 self->specified_attributes = 1;
1590 else
1591 self->specified_attributes = 0;
Fred Drake6f987622000-08-25 18:03:30 +00001592 return 0;
1593 }
Fred Drake2a3d7db2002-06-28 22:56:48 +00001594 if (strcmp(name, "CharacterDataHandler") == 0) {
1595 /* If we're changing the character data handler, flush all
1596 * cached data with the old handler. Not sure there's a
1597 * "right" thing to do, though, but this probably won't
1598 * happen.
1599 */
1600 if (flush_character_buffer(self) < 0)
1601 return -1;
1602 }
Fred Drake6f987622000-08-25 18:03:30 +00001603 if (sethandler(self, name, v)) {
1604 return 0;
1605 }
1606 PyErr_SetString(PyExc_AttributeError, name);
1607 return -1;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001608}
1609
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001610#ifdef WITH_CYCLE_GC
1611static int
1612xmlparse_traverse(xmlparseobject *op, visitproc visit, void *arg)
1613{
Fred Drakecde79132001-04-25 16:01:30 +00001614 int i, err;
1615 for (i = 0; handler_info[i].name != NULL; i++) {
1616 if (!op->handlers[i])
1617 continue;
1618 err = visit(op->handlers[i], arg);
1619 if (err)
1620 return err;
1621 }
1622 return 0;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001623}
1624
1625static int
1626xmlparse_clear(xmlparseobject *op)
1627{
Martin v. Löwis5b68ce32001-10-21 08:53:52 +00001628 clear_handlers(op, 0);
Fred Drakeb91a36b2002-06-27 19:40:48 +00001629 Py_XDECREF(op->intern);
1630 op->intern = 0;
Fred Drakecde79132001-04-25 16:01:30 +00001631 return 0;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001632}
1633#endif
1634
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001635PyDoc_STRVAR(Xmlparsetype__doc__, "XML parser");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001636
1637static PyTypeObject Xmlparsetype = {
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +00001638 PyObject_HEAD_INIT(NULL)
1639 0, /*ob_size*/
Guido van Rossum14648392001-12-08 18:02:58 +00001640 "pyexpat.xmlparser", /*tp_name*/
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001641 sizeof(xmlparseobject) + PyGC_HEAD_SIZE,/*tp_basicsize*/
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +00001642 0, /*tp_itemsize*/
1643 /* methods */
1644 (destructor)xmlparse_dealloc, /*tp_dealloc*/
1645 (printfunc)0, /*tp_print*/
1646 (getattrfunc)xmlparse_getattr, /*tp_getattr*/
1647 (setattrfunc)xmlparse_setattr, /*tp_setattr*/
1648 (cmpfunc)0, /*tp_compare*/
1649 (reprfunc)0, /*tp_repr*/
1650 0, /*tp_as_number*/
1651 0, /*tp_as_sequence*/
1652 0, /*tp_as_mapping*/
1653 (hashfunc)0, /*tp_hash*/
1654 (ternaryfunc)0, /*tp_call*/
1655 (reprfunc)0, /*tp_str*/
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001656 0, /* tp_getattro */
1657 0, /* tp_setattro */
1658 0, /* tp_as_buffer */
Martin v. Löwis894258c2001-09-23 10:20:10 +00001659#ifdef Py_TPFLAGS_HAVE_GC
Fred Drake71b63ff2002-06-28 22:29:01 +00001660 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /*tp_flags*/
Martin v. Löwis894258c2001-09-23 10:20:10 +00001661#else
Fred Drake71b63ff2002-06-28 22:29:01 +00001662 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_GC, /*tp_flags*/
Martin v. Löwis894258c2001-09-23 10:20:10 +00001663#endif
Martin v. Löwis069dde22003-01-21 10:58:18 +00001664 Xmlparsetype__doc__, /* tp_doc - Documentation string */
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001665#ifdef WITH_CYCLE_GC
1666 (traverseproc)xmlparse_traverse, /* tp_traverse */
1667 (inquiry)xmlparse_clear /* tp_clear */
1668#else
1669 0, 0
1670#endif
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001671};
1672
1673/* End of code for xmlparser objects */
1674/* -------------------------------------------------------- */
1675
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001676PyDoc_STRVAR(pyexpat_ParserCreate__doc__,
Fred Drake0582df92000-07-12 04:49:00 +00001677"ParserCreate([encoding[, namespace_separator]]) -> parser\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001678Return a new XML parser object.");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001679
1680static PyObject *
Fred Drake0582df92000-07-12 04:49:00 +00001681pyexpat_ParserCreate(PyObject *notused, PyObject *args, PyObject *kw)
1682{
Fred Drakecde79132001-04-25 16:01:30 +00001683 char *encoding = NULL;
1684 char *namespace_separator = NULL;
Fred Drakeb91a36b2002-06-27 19:40:48 +00001685 PyObject *intern = NULL;
1686 PyObject *result;
1687 int intern_decref = 0;
Fred Drake71b63ff2002-06-28 22:29:01 +00001688 static char *kwlist[] = {"encoding", "namespace_separator",
Fred Drakeb91a36b2002-06-27 19:40:48 +00001689 "intern", NULL};
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001690
Fred Drakeb91a36b2002-06-27 19:40:48 +00001691 if (!PyArg_ParseTupleAndKeywords(args, kw, "|zzO:ParserCreate", kwlist,
1692 &encoding, &namespace_separator, &intern))
Fred Drakecde79132001-04-25 16:01:30 +00001693 return NULL;
1694 if (namespace_separator != NULL
1695 && strlen(namespace_separator) > 1) {
1696 PyErr_SetString(PyExc_ValueError,
1697 "namespace_separator must be at most one"
1698 " character, omitted, or None");
1699 return NULL;
1700 }
Fred Drakeb91a36b2002-06-27 19:40:48 +00001701 /* Explicitly passing None means no interning is desired.
1702 Not passing anything means that a new dictionary is used. */
1703 if (intern == Py_None)
1704 intern = NULL;
1705 else if (intern == NULL) {
1706 intern = PyDict_New();
1707 if (!intern)
1708 return NULL;
1709 intern_decref = 1;
Fred Drake71b63ff2002-06-28 22:29:01 +00001710 }
Fred Drakeb91a36b2002-06-27 19:40:48 +00001711 else if (!PyDict_Check(intern)) {
1712 PyErr_SetString(PyExc_TypeError, "intern must be a dictionary");
1713 return NULL;
1714 }
1715
1716 result = newxmlparseobject(encoding, namespace_separator, intern);
1717 if (intern_decref) {
1718 Py_DECREF(intern);
1719 }
1720 return result;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001721}
1722
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001723PyDoc_STRVAR(pyexpat_ErrorString__doc__,
Fred Drake0582df92000-07-12 04:49:00 +00001724"ErrorString(errno) -> string\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001725Returns string error for given number.");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001726
1727static PyObject *
Fred Drake0582df92000-07-12 04:49:00 +00001728pyexpat_ErrorString(PyObject *self, PyObject *args)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001729{
Fred Drake0582df92000-07-12 04:49:00 +00001730 long code = 0;
1731
1732 if (!PyArg_ParseTuple(args, "l:ErrorString", &code))
1733 return NULL;
1734 return Py_BuildValue("z", XML_ErrorString((int)code));
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001735}
1736
1737/* List of methods defined in the module */
1738
1739static struct PyMethodDef pyexpat_methods[] = {
Fred Drake0582df92000-07-12 04:49:00 +00001740 {"ParserCreate", (PyCFunction)pyexpat_ParserCreate,
1741 METH_VARARGS|METH_KEYWORDS, pyexpat_ParserCreate__doc__},
1742 {"ErrorString", (PyCFunction)pyexpat_ErrorString,
1743 METH_VARARGS, pyexpat_ErrorString__doc__},
Fred Drake71b63ff2002-06-28 22:29:01 +00001744
Fred Drake0582df92000-07-12 04:49:00 +00001745 {NULL, (PyCFunction)NULL, 0, NULL} /* sentinel */
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001746};
1747
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +00001748/* Module docstring */
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001749
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001750PyDoc_STRVAR(pyexpat_module_documentation,
1751"Python wrapper for Expat parser.");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001752
Fred Drake4113b132001-03-24 19:58:26 +00001753/* Return a Python string that represents the version number without the
1754 * extra cruft added by revision control, even if the right options were
1755 * given to the "cvs export" command to make it not include the extra
1756 * cruft.
1757 */
1758static PyObject *
1759get_version_string(void)
1760{
1761 static char *rcsid = "$Revision$";
1762 char *rev = rcsid;
1763 int i = 0;
1764
Neal Norwitz3afb2d22002-03-20 21:32:07 +00001765 while (!isdigit((int)*rev))
Fred Drake4113b132001-03-24 19:58:26 +00001766 ++rev;
1767 while (rev[i] != ' ' && rev[i] != '\0')
1768 ++i;
1769
1770 return PyString_FromStringAndSize(rev, i);
1771}
1772
Fred Drakecde79132001-04-25 16:01:30 +00001773/* Initialization function for the module */
1774
1775#ifndef MODULE_NAME
1776#define MODULE_NAME "pyexpat"
1777#endif
1778
1779#ifndef MODULE_INITFUNC
1780#define MODULE_INITFUNC initpyexpat
1781#endif
1782
Martin v. Löwis069dde22003-01-21 10:58:18 +00001783#ifndef PyMODINIT_FUNC
1784# ifdef MS_WINDOWS
1785# define PyMODINIT_FUNC __declspec(dllexport) void
1786# else
1787# define PyMODINIT_FUNC void
1788# endif
1789#endif
1790
Mark Hammond8235ea12002-07-19 06:55:41 +00001791PyMODINIT_FUNC MODULE_INITFUNC(void); /* avoid compiler warnings */
Fred Drakecde79132001-04-25 16:01:30 +00001792
Martin v. Löwis069dde22003-01-21 10:58:18 +00001793PyMODINIT_FUNC
1794MODULE_INITFUNC(void)
Fred Drake0582df92000-07-12 04:49:00 +00001795{
1796 PyObject *m, *d;
Fred Drakecde79132001-04-25 16:01:30 +00001797 PyObject *errmod_name = PyString_FromString(MODULE_NAME ".errors");
Fred Drake85d835f2001-02-08 15:39:08 +00001798 PyObject *errors_module;
1799 PyObject *modelmod_name;
1800 PyObject *model_module;
Fred Drake0582df92000-07-12 04:49:00 +00001801 PyObject *sys_modules;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001802
Fred Drake6f987622000-08-25 18:03:30 +00001803 if (errmod_name == NULL)
1804 return;
Fred Drakecde79132001-04-25 16:01:30 +00001805 modelmod_name = PyString_FromString(MODULE_NAME ".model");
Fred Drake85d835f2001-02-08 15:39:08 +00001806 if (modelmod_name == NULL)
1807 return;
Fred Drake6f987622000-08-25 18:03:30 +00001808
Fred Drake0582df92000-07-12 04:49:00 +00001809 Xmlparsetype.ob_type = &PyType_Type;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001810
Fred Drake0582df92000-07-12 04:49:00 +00001811 /* Create the module and add the functions */
Fred Drakecde79132001-04-25 16:01:30 +00001812 m = Py_InitModule3(MODULE_NAME, pyexpat_methods,
Fred Drake85d835f2001-02-08 15:39:08 +00001813 pyexpat_module_documentation);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001814
Fred Drake0582df92000-07-12 04:49:00 +00001815 /* Add some symbolic constants to the module */
Fred Drakebd6101c2001-02-14 18:29:45 +00001816 if (ErrorObject == NULL) {
1817 ErrorObject = PyErr_NewException("xml.parsers.expat.ExpatError",
Fred Drake93adb692000-09-23 04:55:48 +00001818 NULL, NULL);
Fred Drakebd6101c2001-02-14 18:29:45 +00001819 if (ErrorObject == NULL)
1820 return;
1821 }
1822 Py_INCREF(ErrorObject);
Fred Drake93adb692000-09-23 04:55:48 +00001823 PyModule_AddObject(m, "error", ErrorObject);
Fred Drakebd6101c2001-02-14 18:29:45 +00001824 Py_INCREF(ErrorObject);
1825 PyModule_AddObject(m, "ExpatError", ErrorObject);
Fred Drake4ba298c2000-10-29 04:57:53 +00001826 Py_INCREF(&Xmlparsetype);
1827 PyModule_AddObject(m, "XMLParserType", (PyObject *) &Xmlparsetype);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001828
Fred Drake4113b132001-03-24 19:58:26 +00001829 PyModule_AddObject(m, "__version__", get_version_string());
Fred Drake738293d2000-12-21 17:25:07 +00001830 PyModule_AddStringConstant(m, "EXPAT_VERSION",
1831 (char *) XML_ExpatVersion());
Fred Drake85d835f2001-02-08 15:39:08 +00001832 {
1833 XML_Expat_Version info = XML_ExpatVersionInfo();
1834 PyModule_AddObject(m, "version_info",
1835 Py_BuildValue("(iii)", info.major,
1836 info.minor, info.micro));
1837 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001838#ifdef Py_USING_UNICODE
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001839 init_template_buffer();
1840#endif
Fred Drake0582df92000-07-12 04:49:00 +00001841 /* XXX When Expat supports some way of figuring out how it was
Fred Drake71b63ff2002-06-28 22:29:01 +00001842 compiled, this should check and set native_encoding
1843 appropriately.
Fred Drake0582df92000-07-12 04:49:00 +00001844 */
Fred Drake93adb692000-09-23 04:55:48 +00001845 PyModule_AddStringConstant(m, "native_encoding", "UTF-8");
Fred Drakec23b5232000-08-24 21:57:43 +00001846
Fred Drake85d835f2001-02-08 15:39:08 +00001847 sys_modules = PySys_GetObject("modules");
Fred Drake93adb692000-09-23 04:55:48 +00001848 d = PyModule_GetDict(m);
Fred Drake6f987622000-08-25 18:03:30 +00001849 errors_module = PyDict_GetItem(d, errmod_name);
1850 if (errors_module == NULL) {
Fred Drakecde79132001-04-25 16:01:30 +00001851 errors_module = PyModule_New(MODULE_NAME ".errors");
Fred Drake6f987622000-08-25 18:03:30 +00001852 if (errors_module != NULL) {
Fred Drake6f987622000-08-25 18:03:30 +00001853 PyDict_SetItem(sys_modules, errmod_name, errors_module);
Fred Drake93adb692000-09-23 04:55:48 +00001854 /* gives away the reference to errors_module */
1855 PyModule_AddObject(m, "errors", errors_module);
Fred Drakec23b5232000-08-24 21:57:43 +00001856 }
1857 }
Fred Drake6f987622000-08-25 18:03:30 +00001858 Py_DECREF(errmod_name);
Fred Drake85d835f2001-02-08 15:39:08 +00001859 model_module = PyDict_GetItem(d, modelmod_name);
1860 if (model_module == NULL) {
Fred Drakecde79132001-04-25 16:01:30 +00001861 model_module = PyModule_New(MODULE_NAME ".model");
Fred Drake85d835f2001-02-08 15:39:08 +00001862 if (model_module != NULL) {
1863 PyDict_SetItem(sys_modules, modelmod_name, model_module);
1864 /* gives away the reference to model_module */
1865 PyModule_AddObject(m, "model", model_module);
1866 }
1867 }
1868 Py_DECREF(modelmod_name);
1869 if (errors_module == NULL || model_module == NULL)
1870 /* Don't core dump later! */
Fred Drake6f987622000-08-25 18:03:30 +00001871 return;
Martin v. Löwis069dde22003-01-21 10:58:18 +00001872
Martin v. Löwisc847f402003-01-21 11:09:21 +00001873#if XML_COMBINED_VERSION > 19505
Martin v. Löwis069dde22003-01-21 10:58:18 +00001874 {
1875 const XML_Feature *features = XML_GetFeatureList();
1876 PyObject *list = PyList_New(0);
1877 if (list == NULL)
1878 /* just ignore it */
1879 PyErr_Clear();
1880 else {
1881 int i = 0;
1882 for (; features[i].feature != XML_FEATURE_END; ++i) {
1883 int ok;
1884 PyObject *item = Py_BuildValue("si", features[i].name,
1885 features[i].value);
1886 if (item == NULL) {
1887 Py_DECREF(list);
1888 list = NULL;
1889 break;
1890 }
1891 ok = PyList_Append(list, item);
1892 Py_DECREF(item);
1893 if (ok < 0) {
1894 PyErr_Clear();
1895 break;
1896 }
1897 }
1898 if (list != NULL)
1899 PyModule_AddObject(m, "features", list);
1900 }
1901 }
Martin v. Löwisc847f402003-01-21 11:09:21 +00001902#endif
Fred Drake6f987622000-08-25 18:03:30 +00001903
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001904#define MYCONST(name) \
Fred Drake93adb692000-09-23 04:55:48 +00001905 PyModule_AddStringConstant(errors_module, #name, \
1906 (char*)XML_ErrorString(name))
Fred Drake7bd9f412000-07-04 23:51:31 +00001907
Fred Drake0582df92000-07-12 04:49:00 +00001908 MYCONST(XML_ERROR_NO_MEMORY);
1909 MYCONST(XML_ERROR_SYNTAX);
1910 MYCONST(XML_ERROR_NO_ELEMENTS);
1911 MYCONST(XML_ERROR_INVALID_TOKEN);
1912 MYCONST(XML_ERROR_UNCLOSED_TOKEN);
1913 MYCONST(XML_ERROR_PARTIAL_CHAR);
1914 MYCONST(XML_ERROR_TAG_MISMATCH);
1915 MYCONST(XML_ERROR_DUPLICATE_ATTRIBUTE);
1916 MYCONST(XML_ERROR_JUNK_AFTER_DOC_ELEMENT);
1917 MYCONST(XML_ERROR_PARAM_ENTITY_REF);
1918 MYCONST(XML_ERROR_UNDEFINED_ENTITY);
1919 MYCONST(XML_ERROR_RECURSIVE_ENTITY_REF);
1920 MYCONST(XML_ERROR_ASYNC_ENTITY);
1921 MYCONST(XML_ERROR_BAD_CHAR_REF);
1922 MYCONST(XML_ERROR_BINARY_ENTITY_REF);
1923 MYCONST(XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF);
1924 MYCONST(XML_ERROR_MISPLACED_XML_PI);
1925 MYCONST(XML_ERROR_UNKNOWN_ENCODING);
1926 MYCONST(XML_ERROR_INCORRECT_ENCODING);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001927 MYCONST(XML_ERROR_UNCLOSED_CDATA_SECTION);
1928 MYCONST(XML_ERROR_EXTERNAL_ENTITY_HANDLING);
1929 MYCONST(XML_ERROR_NOT_STANDALONE);
1930
Fred Drake85d835f2001-02-08 15:39:08 +00001931 PyModule_AddStringConstant(errors_module, "__doc__",
1932 "Constants used to describe error conditions.");
1933
Fred Drake93adb692000-09-23 04:55:48 +00001934#undef MYCONST
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001935
Fred Drake85d835f2001-02-08 15:39:08 +00001936#define MYCONST(c) PyModule_AddIntConstant(m, #c, c)
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001937 MYCONST(XML_PARAM_ENTITY_PARSING_NEVER);
1938 MYCONST(XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE);
1939 MYCONST(XML_PARAM_ENTITY_PARSING_ALWAYS);
Fred Drake85d835f2001-02-08 15:39:08 +00001940#undef MYCONST
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001941
Fred Drake85d835f2001-02-08 15:39:08 +00001942#define MYCONST(c) PyModule_AddIntConstant(model_module, #c, c)
1943 PyModule_AddStringConstant(model_module, "__doc__",
1944 "Constants used to interpret content model information.");
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001945
Fred Drake85d835f2001-02-08 15:39:08 +00001946 MYCONST(XML_CTYPE_EMPTY);
1947 MYCONST(XML_CTYPE_ANY);
1948 MYCONST(XML_CTYPE_MIXED);
1949 MYCONST(XML_CTYPE_NAME);
1950 MYCONST(XML_CTYPE_CHOICE);
1951 MYCONST(XML_CTYPE_SEQ);
1952
1953 MYCONST(XML_CQUANT_NONE);
1954 MYCONST(XML_CQUANT_OPT);
1955 MYCONST(XML_CQUANT_REP);
1956 MYCONST(XML_CQUANT_PLUS);
1957#undef MYCONST
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001958}
1959
Fred Drake6f987622000-08-25 18:03:30 +00001960static void
Martin v. Löwis5b68ce32001-10-21 08:53:52 +00001961clear_handlers(xmlparseobject *self, int initial)
Fred Drake0582df92000-07-12 04:49:00 +00001962{
Fred Drakecde79132001-04-25 16:01:30 +00001963 int i = 0;
1964 PyObject *temp;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001965
Fred Drake71b63ff2002-06-28 22:29:01 +00001966 for (; handler_info[i].name != NULL; i++) {
Martin v. Löwis5b68ce32001-10-21 08:53:52 +00001967 if (initial)
Fred Drake71b63ff2002-06-28 22:29:01 +00001968 self->handlers[i] = NULL;
Martin v. Löwis5b68ce32001-10-21 08:53:52 +00001969 else {
Fred Drakecde79132001-04-25 16:01:30 +00001970 temp = self->handlers[i];
1971 self->handlers[i] = NULL;
1972 Py_XDECREF(temp);
Martin v. Löwis5b68ce32001-10-21 08:53:52 +00001973 handler_info[i].setter(self->itself, NULL);
Fred Drakecde79132001-04-25 16:01:30 +00001974 }
Fred Drakecde79132001-04-25 16:01:30 +00001975 }
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001976}
1977
Tim Peters0c322792002-07-17 16:49:03 +00001978static struct HandlerInfo handler_info[] = {
Fred Drake71b63ff2002-06-28 22:29:01 +00001979 {"StartElementHandler",
1980 (xmlhandlersetter)XML_SetStartElementHandler,
Fred Drake0582df92000-07-12 04:49:00 +00001981 (xmlhandler)my_StartElementHandler},
Fred Drake71b63ff2002-06-28 22:29:01 +00001982 {"EndElementHandler",
1983 (xmlhandlersetter)XML_SetEndElementHandler,
Fred Drake0582df92000-07-12 04:49:00 +00001984 (xmlhandler)my_EndElementHandler},
Fred Drake71b63ff2002-06-28 22:29:01 +00001985 {"ProcessingInstructionHandler",
Fred Drake0582df92000-07-12 04:49:00 +00001986 (xmlhandlersetter)XML_SetProcessingInstructionHandler,
1987 (xmlhandler)my_ProcessingInstructionHandler},
Fred Drake71b63ff2002-06-28 22:29:01 +00001988 {"CharacterDataHandler",
Fred Drake0582df92000-07-12 04:49:00 +00001989 (xmlhandlersetter)XML_SetCharacterDataHandler,
1990 (xmlhandler)my_CharacterDataHandler},
Fred Drake71b63ff2002-06-28 22:29:01 +00001991 {"UnparsedEntityDeclHandler",
Fred Drake0582df92000-07-12 04:49:00 +00001992 (xmlhandlersetter)XML_SetUnparsedEntityDeclHandler,
Fred Drake2a3d7db2002-06-28 22:56:48 +00001993 (xmlhandler)my_UnparsedEntityDeclHandler},
Fred Drake71b63ff2002-06-28 22:29:01 +00001994 {"NotationDeclHandler",
Fred Drake0582df92000-07-12 04:49:00 +00001995 (xmlhandlersetter)XML_SetNotationDeclHandler,
Fred Drake2a3d7db2002-06-28 22:56:48 +00001996 (xmlhandler)my_NotationDeclHandler},
Fred Drake71b63ff2002-06-28 22:29:01 +00001997 {"StartNamespaceDeclHandler",
1998 (xmlhandlersetter)XML_SetStartNamespaceDeclHandler,
Fred Drake2a3d7db2002-06-28 22:56:48 +00001999 (xmlhandler)my_StartNamespaceDeclHandler},
Fred Drake71b63ff2002-06-28 22:29:01 +00002000 {"EndNamespaceDeclHandler",
2001 (xmlhandlersetter)XML_SetEndNamespaceDeclHandler,
Fred Drake2a3d7db2002-06-28 22:56:48 +00002002 (xmlhandler)my_EndNamespaceDeclHandler},
Fred Drake0582df92000-07-12 04:49:00 +00002003 {"CommentHandler",
2004 (xmlhandlersetter)XML_SetCommentHandler,
2005 (xmlhandler)my_CommentHandler},
2006 {"StartCdataSectionHandler",
Fred Drake71b63ff2002-06-28 22:29:01 +00002007 (xmlhandlersetter)XML_SetStartCdataSectionHandler,
Fred Drake0582df92000-07-12 04:49:00 +00002008 (xmlhandler)my_StartCdataSectionHandler},
2009 {"EndCdataSectionHandler",
Fred Drake71b63ff2002-06-28 22:29:01 +00002010 (xmlhandlersetter)XML_SetEndCdataSectionHandler,
Fred Drake0582df92000-07-12 04:49:00 +00002011 (xmlhandler)my_EndCdataSectionHandler},
2012 {"DefaultHandler",
2013 (xmlhandlersetter)XML_SetDefaultHandler,
2014 (xmlhandler)my_DefaultHandler},
2015 {"DefaultHandlerExpand",
2016 (xmlhandlersetter)XML_SetDefaultHandlerExpand,
2017 (xmlhandler)my_DefaultHandlerExpandHandler},
2018 {"NotStandaloneHandler",
2019 (xmlhandlersetter)XML_SetNotStandaloneHandler,
2020 (xmlhandler)my_NotStandaloneHandler},
2021 {"ExternalEntityRefHandler",
2022 (xmlhandlersetter)XML_SetExternalEntityRefHandler,
Fred Drake2a3d7db2002-06-28 22:56:48 +00002023 (xmlhandler)my_ExternalEntityRefHandler},
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00002024 {"StartDoctypeDeclHandler",
Fred Drake71b63ff2002-06-28 22:29:01 +00002025 (xmlhandlersetter)XML_SetStartDoctypeDeclHandler,
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00002026 (xmlhandler)my_StartDoctypeDeclHandler},
2027 {"EndDoctypeDeclHandler",
Fred Drake71b63ff2002-06-28 22:29:01 +00002028 (xmlhandlersetter)XML_SetEndDoctypeDeclHandler,
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00002029 (xmlhandler)my_EndDoctypeDeclHandler},
Fred Drake85d835f2001-02-08 15:39:08 +00002030 {"EntityDeclHandler",
2031 (xmlhandlersetter)XML_SetEntityDeclHandler,
2032 (xmlhandler)my_EntityDeclHandler},
2033 {"XmlDeclHandler",
2034 (xmlhandlersetter)XML_SetXmlDeclHandler,
2035 (xmlhandler)my_XmlDeclHandler},
2036 {"ElementDeclHandler",
2037 (xmlhandlersetter)XML_SetElementDeclHandler,
2038 (xmlhandler)my_ElementDeclHandler},
2039 {"AttlistDeclHandler",
2040 (xmlhandlersetter)XML_SetAttlistDeclHandler,
2041 (xmlhandler)my_AttlistDeclHandler},
Martin v. Löwisc847f402003-01-21 11:09:21 +00002042#if XML_COMBINED_VERSION >= 19504
Martin v. Löwis069dde22003-01-21 10:58:18 +00002043 {"SkippedEntityHandler",
2044 (xmlhandlersetter)XML_SetSkippedEntityHandler,
2045 (xmlhandler)my_SkippedEntityHandler},
Martin v. Löwisc847f402003-01-21 11:09:21 +00002046#endif
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00002047
Fred Drake0582df92000-07-12 04:49:00 +00002048 {NULL, NULL, NULL} /* sentinel */
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00002049};