blob: 01971b78ed91de65f4fb66dcaf613a721a9ad801 [file] [log] [blame]
Martin v. Löwis7090ed12001-09-19 10:37:50 +00001#include "Python.h"
Fred Drake4113b132001-03-24 19:58:26 +00002#include <ctype.h>
3
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00004#include "frameobject.h"
Fred Drakea77254a2000-09-29 19:23:29 +00005#include "expat.h"
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00006
Fredrik Lundhc3345042005-12-13 19:49:55 +00007#include "pyexpat.h"
8
Martin v. Löwisc847f402003-01-21 11:09:21 +00009#define XML_COMBINED_VERSION (10000*XML_MAJOR_VERSION+100*XML_MINOR_VERSION+XML_MICRO_VERSION)
10
Martin v. Löwisb4fcf4d2002-06-30 06:40:55 +000011#ifndef PyDoc_STRVAR
Martin v. Löwis069dde22003-01-21 10:58:18 +000012
13/*
14 * fdrake says:
15 * Don't change the PyDoc_STR macro definition to (str), because
16 * '''the parentheses cause compile failures
17 * ("non-constant static initializer" or something like that)
18 * on some platforms (Irix?)'''
19 */
Fred Drakef57b22a2002-09-02 15:54:06 +000020#define PyDoc_STR(str) str
Fred Drake7c75bf22002-07-01 14:02:31 +000021#define PyDoc_VAR(name) static char name[]
Martin v. Löwisb4fcf4d2002-06-30 06:40:55 +000022#define PyDoc_STRVAR(name,str) PyDoc_VAR(name) = PyDoc_STR(str)
Martin v. Löwis0078f6c2001-01-21 10:18:10 +000023#endif
24
Martin v. Löwisb4fcf4d2002-06-30 06:40:55 +000025#if (PY_MAJOR_VERSION == 2 && PY_MINOR_VERSION < 2)
26/* In Python 2.0 and 2.1, disabling Unicode was not possible. */
Martin v. Löwis339d0f72001-08-17 18:39:25 +000027#define Py_USING_UNICODE
Jeremy Hylton9263f572003-06-27 16:13:17 +000028#else
29#define FIX_TRACE
Martin v. Löwis339d0f72001-08-17 18:39:25 +000030#endif
31
Fred Drake0582df92000-07-12 04:49:00 +000032enum HandlerTypes {
33 StartElement,
34 EndElement,
35 ProcessingInstruction,
36 CharacterData,
37 UnparsedEntityDecl,
38 NotationDecl,
39 StartNamespaceDecl,
40 EndNamespaceDecl,
41 Comment,
42 StartCdataSection,
43 EndCdataSection,
44 Default,
45 DefaultHandlerExpand,
46 NotStandalone,
Martin v. Löwis0078f6c2001-01-21 10:18:10 +000047 ExternalEntityRef,
48 StartDoctypeDecl,
49 EndDoctypeDecl,
Fred Drake85d835f2001-02-08 15:39:08 +000050 EntityDecl,
51 XmlDecl,
52 ElementDecl,
53 AttlistDecl,
Martin v. Löwisc847f402003-01-21 11:09:21 +000054#if XML_COMBINED_VERSION >= 19504
Martin v. Löwis069dde22003-01-21 10:58:18 +000055 SkippedEntity,
Martin v. Löwisc847f402003-01-21 11:09:21 +000056#endif
Fred Drake85d835f2001-02-08 15:39:08 +000057 _DummyDecl
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000058};
59
60static PyObject *ErrorObject;
61
62/* ----------------------------------------------------- */
63
64/* Declarations for objects of type xmlparser */
65
66typedef struct {
Fred Drake0582df92000-07-12 04:49:00 +000067 PyObject_HEAD
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000068
Fred Drake0582df92000-07-12 04:49:00 +000069 XML_Parser itself;
Fred Drake85d835f2001-02-08 15:39:08 +000070 int returns_unicode; /* True if Unicode strings are returned;
71 if false, UTF-8 strings are returned */
72 int ordered_attributes; /* Return attributes as a list. */
73 int specified_attributes; /* Report only specified attributes. */
Fred Drakebd6101c2001-02-14 18:29:45 +000074 int in_callback; /* Is a callback active? */
Martin v. Löwis069dde22003-01-21 10:58:18 +000075 int ns_prefixes; /* Namespace-triplets mode? */
Fred Drake2a3d7db2002-06-28 22:56:48 +000076 XML_Char *buffer; /* Buffer used when accumulating characters */
77 /* NULL if not enabled */
78 int buffer_size; /* Size of buffer, in XML_Char units */
79 int buffer_used; /* Buffer units in use */
Fred Drakeb91a36b2002-06-27 19:40:48 +000080 PyObject *intern; /* Dictionary to intern strings */
Fred Drake0582df92000-07-12 04:49:00 +000081 PyObject **handlers;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000082} xmlparseobject;
83
Fred Drake2a3d7db2002-06-28 22:56:48 +000084#define CHARACTER_DATA_BUFFER_SIZE 8192
85
Jeremy Hylton938ace62002-07-17 16:30:39 +000086static PyTypeObject Xmlparsetype;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000087
Fred Drake117ac852002-09-24 16:24:54 +000088typedef void (*xmlhandlersetter)(XML_Parser self, void *meth);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000089typedef void* xmlhandler;
90
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +000091struct HandlerInfo {
Fred Drake0582df92000-07-12 04:49:00 +000092 const char *name;
93 xmlhandlersetter setter;
94 xmlhandler handler;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +000095 PyCodeObject *tb_code;
Fred Drake71b63ff2002-06-28 22:29:01 +000096 PyObject *nameobj;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000097};
98
Jeremy Hylton938ace62002-07-17 16:30:39 +000099static struct HandlerInfo handler_info[64];
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000100
Fred Drakebd6101c2001-02-14 18:29:45 +0000101/* Set an integer attribute on the error object; return true on success,
102 * false on an exception.
103 */
104static int
105set_error_attr(PyObject *err, char *name, int value)
106{
107 PyObject *v = PyInt_FromLong(value);
Fred Drake85d835f2001-02-08 15:39:08 +0000108
Neal Norwitz2f5e9902006-03-08 06:36:45 +0000109 if (v == NULL || PyObject_SetAttrString(err, name, v) == -1) {
110 Py_XDECREF(v);
Fred Drakebd6101c2001-02-14 18:29:45 +0000111 return 0;
112 }
Michael W. Hudson0bb84542004-08-03 11:31:31 +0000113 Py_DECREF(v);
Fred Drakebd6101c2001-02-14 18:29:45 +0000114 return 1;
115}
116
117/* Build and set an Expat exception, including positioning
118 * information. Always returns NULL.
119 */
Fred Drake85d835f2001-02-08 15:39:08 +0000120static PyObject *
Martin v. Löwis069dde22003-01-21 10:58:18 +0000121set_error(xmlparseobject *self, enum XML_Error code)
Fred Drake85d835f2001-02-08 15:39:08 +0000122{
123 PyObject *err;
124 char buffer[256];
125 XML_Parser parser = self->itself;
Fred Drakebd6101c2001-02-14 18:29:45 +0000126 int lineno = XML_GetErrorLineNumber(parser);
127 int column = XML_GetErrorColumnNumber(parser);
Fred Drake85d835f2001-02-08 15:39:08 +0000128
Martin v. Löwis6b2cf0e2002-06-30 06:03:35 +0000129 /* There is no risk of overflowing this buffer, since
130 even for 64-bit integers, there is sufficient space. */
131 sprintf(buffer, "%.200s: line %i, column %i",
Fred Drakebd6101c2001-02-14 18:29:45 +0000132 XML_ErrorString(code), lineno, column);
Fred Drake85d835f2001-02-08 15:39:08 +0000133 err = PyObject_CallFunction(ErrorObject, "s", buffer);
Fred Drakebd6101c2001-02-14 18:29:45 +0000134 if ( err != NULL
135 && set_error_attr(err, "code", code)
136 && set_error_attr(err, "offset", column)
137 && set_error_attr(err, "lineno", lineno)) {
138 PyErr_SetObject(ErrorObject, err);
Fred Drake85d835f2001-02-08 15:39:08 +0000139 }
Neal Norwitz2f5e9902006-03-08 06:36:45 +0000140 Py_XDECREF(err);
Fred Drake85d835f2001-02-08 15:39:08 +0000141 return NULL;
142}
143
Fred Drake71b63ff2002-06-28 22:29:01 +0000144static int
145have_handler(xmlparseobject *self, int type)
146{
147 PyObject *handler = self->handlers[type];
148 return handler != NULL;
149}
150
151static PyObject *
152get_handler_name(struct HandlerInfo *hinfo)
153{
154 PyObject *name = hinfo->nameobj;
155 if (name == NULL) {
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000156 name = PyString_FromString(hinfo->name);
Fred Drake71b63ff2002-06-28 22:29:01 +0000157 hinfo->nameobj = name;
158 }
159 Py_XINCREF(name);
160 return name;
161}
162
Fred Drake85d835f2001-02-08 15:39:08 +0000163
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000164#ifdef Py_USING_UNICODE
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000165/* Convert a string of XML_Chars into a Unicode string.
166 Returns None if str is a null pointer. */
167
Fred Drake0582df92000-07-12 04:49:00 +0000168static PyObject *
Fred Drakeb91a36b2002-06-27 19:40:48 +0000169conv_string_to_unicode(const XML_Char *str)
Fred Drake0582df92000-07-12 04:49:00 +0000170{
Fred Drake71b63ff2002-06-28 22:29:01 +0000171 /* XXX currently this code assumes that XML_Char is 8-bit,
Fred Drake0582df92000-07-12 04:49:00 +0000172 and hence in UTF-8. */
173 /* UTF-8 from Expat, Unicode desired */
174 if (str == NULL) {
175 Py_INCREF(Py_None);
176 return Py_None;
177 }
Fred Drake71b63ff2002-06-28 22:29:01 +0000178 return PyUnicode_DecodeUTF8(str, strlen(str), "strict");
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000179}
180
Fred Drake0582df92000-07-12 04:49:00 +0000181static PyObject *
182conv_string_len_to_unicode(const XML_Char *str, int len)
183{
Fred Drake71b63ff2002-06-28 22:29:01 +0000184 /* XXX currently this code assumes that XML_Char is 8-bit,
Fred Drake0582df92000-07-12 04:49:00 +0000185 and hence in UTF-8. */
186 /* UTF-8 from Expat, Unicode desired */
187 if (str == NULL) {
188 Py_INCREF(Py_None);
189 return Py_None;
190 }
Fred Drake6f987622000-08-25 18:03:30 +0000191 return PyUnicode_DecodeUTF8((const char *)str, len, "strict");
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000192}
193#endif
194
195/* Convert a string of XML_Chars into an 8-bit Python string.
196 Returns None if str is a null pointer. */
197
Fred Drake6f987622000-08-25 18:03:30 +0000198static PyObject *
Fred Drakeb91a36b2002-06-27 19:40:48 +0000199conv_string_to_utf8(const XML_Char *str)
Fred Drake6f987622000-08-25 18:03:30 +0000200{
Fred Drake71b63ff2002-06-28 22:29:01 +0000201 /* XXX currently this code assumes that XML_Char is 8-bit,
Fred Drake6f987622000-08-25 18:03:30 +0000202 and hence in UTF-8. */
203 /* UTF-8 from Expat, UTF-8 desired */
204 if (str == NULL) {
205 Py_INCREF(Py_None);
206 return Py_None;
207 }
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000208 return PyString_FromString(str);
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000209}
210
Fred Drake6f987622000-08-25 18:03:30 +0000211static PyObject *
Fred Drake71b63ff2002-06-28 22:29:01 +0000212conv_string_len_to_utf8(const XML_Char *str, int len)
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000213{
Fred Drake71b63ff2002-06-28 22:29:01 +0000214 /* XXX currently this code assumes that XML_Char is 8-bit,
Fred Drake6f987622000-08-25 18:03:30 +0000215 and hence in UTF-8. */
216 /* UTF-8 from Expat, UTF-8 desired */
217 if (str == NULL) {
218 Py_INCREF(Py_None);
219 return Py_None;
220 }
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000221 return PyString_FromStringAndSize((const char *)str, len);
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000222}
223
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000224/* Callback routines */
225
Martin v. Löwis5b68ce32001-10-21 08:53:52 +0000226static void clear_handlers(xmlparseobject *self, int initial);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000227
Martin v. Löwis069dde22003-01-21 10:58:18 +0000228/* This handler is used when an error has been detected, in the hope
229 that actual parsing can be terminated early. This will only help
230 if an external entity reference is encountered. */
231static int
232error_external_entity_ref_handler(XML_Parser parser,
233 const XML_Char *context,
234 const XML_Char *base,
235 const XML_Char *systemId,
236 const XML_Char *publicId)
237{
238 return 0;
239}
240
Fred Drake24a0f412006-07-06 05:13:22 +0000241/* Dummy character data handler used when an error (exception) has
242 been detected, and the actual parsing can be terminated early.
243 This is needed since character data handler can't be safely removed
244 from within the character data handler, but can be replaced. It is
245 used only from the character data handler trampoline, and must be
246 used right after `flag_error()` is called. */
247static void
248noop_character_data_handler(void *userData, const XML_Char *data, int len)
249{
250 /* Do nothing. */
251}
252
Fred Drake6f987622000-08-25 18:03:30 +0000253static void
254flag_error(xmlparseobject *self)
255{
Martin v. Löwis5b68ce32001-10-21 08:53:52 +0000256 clear_handlers(self, 0);
Martin v. Löwis069dde22003-01-21 10:58:18 +0000257 XML_SetExternalEntityRefHandler(self->itself,
258 error_external_entity_ref_handler);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000259}
260
261static PyCodeObject*
262getcode(enum HandlerTypes slot, char* func_name, int lineno)
263{
Fred Drakebd6101c2001-02-14 18:29:45 +0000264 PyObject *code = NULL;
265 PyObject *name = NULL;
266 PyObject *nulltuple = NULL;
267 PyObject *filename = NULL;
268
269 if (handler_info[slot].tb_code == NULL) {
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000270 code = PyString_FromString("");
Fred Drakebd6101c2001-02-14 18:29:45 +0000271 if (code == NULL)
272 goto failed;
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000273 name = PyString_FromString(func_name);
Fred Drakebd6101c2001-02-14 18:29:45 +0000274 if (name == NULL)
275 goto failed;
276 nulltuple = PyTuple_New(0);
277 if (nulltuple == NULL)
278 goto failed;
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000279 filename = PyString_FromString(__FILE__);
Fred Drakebd6101c2001-02-14 18:29:45 +0000280 handler_info[slot].tb_code =
281 PyCode_New(0, /* argcount */
282 0, /* nlocals */
283 0, /* stacksize */
284 0, /* flags */
285 code, /* code */
286 nulltuple, /* consts */
287 nulltuple, /* names */
288 nulltuple, /* varnames */
Martin v. Löwis76192ee2001-02-06 09:34:40 +0000289#if PYTHON_API_VERSION >= 1010
Fred Drakebd6101c2001-02-14 18:29:45 +0000290 nulltuple, /* freevars */
291 nulltuple, /* cellvars */
Martin v. Löwis76192ee2001-02-06 09:34:40 +0000292#endif
Fred Drakebd6101c2001-02-14 18:29:45 +0000293 filename, /* filename */
294 name, /* name */
295 lineno, /* firstlineno */
296 code /* lnotab */
297 );
298 if (handler_info[slot].tb_code == NULL)
299 goto failed;
300 Py_DECREF(code);
301 Py_DECREF(nulltuple);
302 Py_DECREF(filename);
303 Py_DECREF(name);
304 }
305 return handler_info[slot].tb_code;
306 failed:
307 Py_XDECREF(code);
308 Py_XDECREF(name);
309 return NULL;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000310}
311
Jeremy Hylton9263f572003-06-27 16:13:17 +0000312#ifdef FIX_TRACE
Martin v. Löwis7d6e19d2002-08-04 08:24:49 +0000313static int
314trace_frame(PyThreadState *tstate, PyFrameObject *f, int code, PyObject *val)
315{
316 int result = 0;
317 if (!tstate->use_tracing || tstate->tracing)
318 return 0;
319 if (tstate->c_profilefunc != NULL) {
320 tstate->tracing++;
321 result = tstate->c_profilefunc(tstate->c_profileobj,
322 f, code , val);
323 tstate->use_tracing = ((tstate->c_tracefunc != NULL)
324 || (tstate->c_profilefunc != NULL));
325 tstate->tracing--;
326 if (result)
327 return result;
328 }
329 if (tstate->c_tracefunc != NULL) {
330 tstate->tracing++;
331 result = tstate->c_tracefunc(tstate->c_traceobj,
332 f, code , val);
333 tstate->use_tracing = ((tstate->c_tracefunc != NULL)
334 || (tstate->c_profilefunc != NULL));
335 tstate->tracing--;
336 }
337 return result;
338}
Jeremy Hylton9263f572003-06-27 16:13:17 +0000339
340static int
341trace_frame_exc(PyThreadState *tstate, PyFrameObject *f)
342{
343 PyObject *type, *value, *traceback, *arg;
344 int err;
345
346 if (tstate->c_tracefunc == NULL)
347 return 0;
348
349 PyErr_Fetch(&type, &value, &traceback);
350 if (value == NULL) {
351 value = Py_None;
352 Py_INCREF(value);
353 }
Martin v. Löwis9171f022004-10-13 19:50:11 +0000354#if PY_VERSION_HEX < 0x02040000
355 arg = Py_BuildValue("(OOO)", type, value, traceback);
356#else
Raymond Hettinger8ae46892003-10-12 19:09:37 +0000357 arg = PyTuple_Pack(3, type, value, traceback);
Martin v. Löwis9171f022004-10-13 19:50:11 +0000358#endif
Jeremy Hylton9263f572003-06-27 16:13:17 +0000359 if (arg == NULL) {
360 PyErr_Restore(type, value, traceback);
361 return 0;
362 }
363 err = trace_frame(tstate, f, PyTrace_EXCEPTION, arg);
364 Py_DECREF(arg);
365 if (err == 0)
366 PyErr_Restore(type, value, traceback);
367 else {
368 Py_XDECREF(type);
369 Py_XDECREF(value);
370 Py_XDECREF(traceback);
371 }
372 return err;
373}
Martin v. Löwis069dde22003-01-21 10:58:18 +0000374#endif
Martin v. Löwis7d6e19d2002-08-04 08:24:49 +0000375
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000376static PyObject*
Fred Drake39689c52004-08-13 03:12:57 +0000377call_with_frame(PyCodeObject *c, PyObject* func, PyObject* args,
378 xmlparseobject *self)
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000379{
Fred Drakebd6101c2001-02-14 18:29:45 +0000380 PyThreadState *tstate = PyThreadState_GET();
381 PyFrameObject *f;
382 PyObject *res;
383
384 if (c == NULL)
385 return NULL;
Martin v. Löwis7d6e19d2002-08-04 08:24:49 +0000386
Jeremy Hylton9263f572003-06-27 16:13:17 +0000387 f = PyFrame_New(tstate, c, PyEval_GetGlobals(), NULL);
Fred Drakebd6101c2001-02-14 18:29:45 +0000388 if (f == NULL)
389 return NULL;
390 tstate->frame = f;
Jeremy Hylton9263f572003-06-27 16:13:17 +0000391#ifdef FIX_TRACE
392 if (trace_frame(tstate, f, PyTrace_CALL, Py_None) < 0) {
Martin v. Löwis7d6e19d2002-08-04 08:24:49 +0000393 return NULL;
394 }
Martin v. Löwis069dde22003-01-21 10:58:18 +0000395#endif
Fred Drakebd6101c2001-02-14 18:29:45 +0000396 res = PyEval_CallObject(func, args);
Jeremy Hylton9263f572003-06-27 16:13:17 +0000397 if (res == NULL) {
398 if (tstate->curexc_traceback == NULL)
399 PyTraceBack_Here(f);
Fred Drake39689c52004-08-13 03:12:57 +0000400 XML_StopParser(self->itself, XML_FALSE);
Jeremy Hylton9263f572003-06-27 16:13:17 +0000401#ifdef FIX_TRACE
402 if (trace_frame_exc(tstate, f) < 0) {
403 return NULL;
404 }
405 }
Martin v. Löwis7d6e19d2002-08-04 08:24:49 +0000406 else {
Jeremy Hylton9263f572003-06-27 16:13:17 +0000407 if (trace_frame(tstate, f, PyTrace_RETURN, res) < 0) {
Martin v. Löwis7d6e19d2002-08-04 08:24:49 +0000408 Py_XDECREF(res);
409 res = NULL;
410 }
411 }
Jeremy Hylton9263f572003-06-27 16:13:17 +0000412#else
413 }
Martin v. Löwis069dde22003-01-21 10:58:18 +0000414#endif
Fred Drakebd6101c2001-02-14 18:29:45 +0000415 tstate->frame = f->f_back;
416 Py_DECREF(f);
417 return res;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000418}
419
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000420#ifndef Py_USING_UNICODE
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000421#define STRING_CONV_FUNC conv_string_to_utf8
422#else
Martin v. Löwis069dde22003-01-21 10:58:18 +0000423/* Python 2.0 and later versions, when built with Unicode support */
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000424#define STRING_CONV_FUNC (self->returns_unicode \
425 ? conv_string_to_unicode : conv_string_to_utf8)
426#endif
Guido van Rossum5961f5a2000-03-31 16:18:11 +0000427
Fred Drakeb91a36b2002-06-27 19:40:48 +0000428static PyObject*
429string_intern(xmlparseobject *self, const char* str)
430{
431 PyObject *result = STRING_CONV_FUNC(str);
432 PyObject *value;
Neal Norwitz484d9a42005-09-30 04:46:49 +0000433 /* result can be NULL if the unicode conversion failed. */
434 if (!result)
435 return result;
Fred Drakeb91a36b2002-06-27 19:40:48 +0000436 if (!self->intern)
437 return result;
438 value = PyDict_GetItem(self->intern, result);
439 if (!value) {
440 if (PyDict_SetItem(self->intern, result, result) == 0)
441 return result;
442 else
443 return NULL;
444 }
445 Py_INCREF(value);
446 Py_DECREF(result);
447 return value;
448}
449
Fred Drake2a3d7db2002-06-28 22:56:48 +0000450/* Return 0 on success, -1 on exception.
451 * flag_error() will be called before return if needed.
452 */
453static int
454call_character_handler(xmlparseobject *self, const XML_Char *buffer, int len)
455{
456 PyObject *args;
457 PyObject *temp;
458
459 args = PyTuple_New(1);
460 if (args == NULL)
461 return -1;
462#ifdef Py_USING_UNICODE
463 temp = (self->returns_unicode
464 ? conv_string_len_to_unicode(buffer, len)
465 : conv_string_len_to_utf8(buffer, len));
466#else
467 temp = conv_string_len_to_utf8(buffer, len);
468#endif
469 if (temp == NULL) {
470 Py_DECREF(args);
471 flag_error(self);
Fred Drake24a0f412006-07-06 05:13:22 +0000472 XML_SetCharacterDataHandler(self->itself,
473 noop_character_data_handler);
Fred Drake2a3d7db2002-06-28 22:56:48 +0000474 return -1;
475 }
476 PyTuple_SET_ITEM(args, 0, temp);
477 /* temp is now a borrowed reference; consider it unused. */
478 self->in_callback = 1;
479 temp = call_with_frame(getcode(CharacterData, "CharacterData", __LINE__),
Fred Drake39689c52004-08-13 03:12:57 +0000480 self->handlers[CharacterData], args, self);
Fred Drake2a3d7db2002-06-28 22:56:48 +0000481 /* temp is an owned reference again, or NULL */
482 self->in_callback = 0;
483 Py_DECREF(args);
484 if (temp == NULL) {
485 flag_error(self);
Fred Drake24a0f412006-07-06 05:13:22 +0000486 XML_SetCharacterDataHandler(self->itself,
487 noop_character_data_handler);
Fred Drake2a3d7db2002-06-28 22:56:48 +0000488 return -1;
489 }
490 Py_DECREF(temp);
491 return 0;
492}
493
494static int
495flush_character_buffer(xmlparseobject *self)
496{
497 int rc;
498 if (self->buffer == NULL || self->buffer_used == 0)
499 return 0;
500 rc = call_character_handler(self, self->buffer, self->buffer_used);
501 self->buffer_used = 0;
502 return rc;
503}
504
505static void
506my_CharacterDataHandler(void *userData, const XML_Char *data, int len)
507{
508 xmlparseobject *self = (xmlparseobject *) userData;
509 if (self->buffer == NULL)
510 call_character_handler(self, data, len);
511 else {
512 if ((self->buffer_used + len) > self->buffer_size) {
513 if (flush_character_buffer(self) < 0)
514 return;
515 /* handler might have changed; drop the rest on the floor
516 * if there isn't a handler anymore
517 */
518 if (!have_handler(self, CharacterData))
519 return;
520 }
521 if (len > self->buffer_size) {
522 call_character_handler(self, data, len);
523 self->buffer_used = 0;
524 }
525 else {
526 memcpy(self->buffer + self->buffer_used,
527 data, len * sizeof(XML_Char));
528 self->buffer_used += len;
529 }
530 }
531}
532
Fred Drake85d835f2001-02-08 15:39:08 +0000533static void
534my_StartElementHandler(void *userData,
Fred Drake71b63ff2002-06-28 22:29:01 +0000535 const XML_Char *name, const XML_Char *atts[])
Fred Drake85d835f2001-02-08 15:39:08 +0000536{
537 xmlparseobject *self = (xmlparseobject *)userData;
538
Fred Drake71b63ff2002-06-28 22:29:01 +0000539 if (have_handler(self, StartElement)) {
Fred Drake85d835f2001-02-08 15:39:08 +0000540 PyObject *container, *rv, *args;
541 int i, max;
542
Fred Drake2a3d7db2002-06-28 22:56:48 +0000543 if (flush_character_buffer(self) < 0)
544 return;
Fred Drake85d835f2001-02-08 15:39:08 +0000545 /* Set max to the number of slots filled in atts[]; max/2 is
546 * the number of attributes we need to process.
547 */
548 if (self->specified_attributes) {
549 max = XML_GetSpecifiedAttributeCount(self->itself);
550 }
551 else {
552 max = 0;
553 while (atts[max] != NULL)
554 max += 2;
555 }
556 /* Build the container. */
557 if (self->ordered_attributes)
558 container = PyList_New(max);
559 else
560 container = PyDict_New();
561 if (container == NULL) {
562 flag_error(self);
563 return;
564 }
565 for (i = 0; i < max; i += 2) {
Fred Drakeb91a36b2002-06-27 19:40:48 +0000566 PyObject *n = string_intern(self, (XML_Char *) atts[i]);
Fred Drake85d835f2001-02-08 15:39:08 +0000567 PyObject *v;
568 if (n == NULL) {
569 flag_error(self);
570 Py_DECREF(container);
571 return;
572 }
573 v = STRING_CONV_FUNC((XML_Char *) atts[i+1]);
574 if (v == NULL) {
575 flag_error(self);
576 Py_DECREF(container);
577 Py_DECREF(n);
578 return;
579 }
580 if (self->ordered_attributes) {
581 PyList_SET_ITEM(container, i, n);
582 PyList_SET_ITEM(container, i+1, v);
583 }
584 else if (PyDict_SetItem(container, n, v)) {
585 flag_error(self);
586 Py_DECREF(n);
587 Py_DECREF(v);
588 return;
589 }
590 else {
591 Py_DECREF(n);
592 Py_DECREF(v);
593 }
594 }
Neal Norwitz484d9a42005-09-30 04:46:49 +0000595 args = string_intern(self, name);
596 if (args != NULL)
597 args = Py_BuildValue("(NN)", args, container);
Fred Drake85d835f2001-02-08 15:39:08 +0000598 if (args == NULL) {
599 Py_DECREF(container);
600 return;
601 }
602 /* Container is now a borrowed reference; ignore it. */
Fred Drakebd6101c2001-02-14 18:29:45 +0000603 self->in_callback = 1;
604 rv = call_with_frame(getcode(StartElement, "StartElement", __LINE__),
Fred Drake39689c52004-08-13 03:12:57 +0000605 self->handlers[StartElement], args, self);
Fred Drakebd6101c2001-02-14 18:29:45 +0000606 self->in_callback = 0;
607 Py_DECREF(args);
Fred Drake85d835f2001-02-08 15:39:08 +0000608 if (rv == NULL) {
609 flag_error(self);
610 return;
Fred Drakebd6101c2001-02-14 18:29:45 +0000611 }
Fred Drake85d835f2001-02-08 15:39:08 +0000612 Py_DECREF(rv);
613 }
614}
615
616#define RC_HANDLER(RC, NAME, PARAMS, INIT, PARAM_FORMAT, CONVERSION, \
617 RETURN, GETUSERDATA) \
618static RC \
619my_##NAME##Handler PARAMS {\
620 xmlparseobject *self = GETUSERDATA ; \
621 PyObject *args = NULL; \
622 PyObject *rv = NULL; \
623 INIT \
624\
Fred Drake71b63ff2002-06-28 22:29:01 +0000625 if (have_handler(self, NAME)) { \
Fred Drake2a3d7db2002-06-28 22:56:48 +0000626 if (flush_character_buffer(self) < 0) \
627 return RETURN; \
Fred Drake85d835f2001-02-08 15:39:08 +0000628 args = Py_BuildValue PARAM_FORMAT ;\
Martin v. Löwis1d7c55f2001-11-10 13:57:55 +0000629 if (!args) { flag_error(self); return RETURN;} \
Fred Drakebd6101c2001-02-14 18:29:45 +0000630 self->in_callback = 1; \
Fred Drake85d835f2001-02-08 15:39:08 +0000631 rv = call_with_frame(getcode(NAME,#NAME,__LINE__), \
Fred Drake39689c52004-08-13 03:12:57 +0000632 self->handlers[NAME], args, self); \
Fred Drakebd6101c2001-02-14 18:29:45 +0000633 self->in_callback = 0; \
Fred Drake85d835f2001-02-08 15:39:08 +0000634 Py_DECREF(args); \
635 if (rv == NULL) { \
636 flag_error(self); \
637 return RETURN; \
638 } \
639 CONVERSION \
640 Py_DECREF(rv); \
641 } \
642 return RETURN; \
643}
644
Fred Drake6f987622000-08-25 18:03:30 +0000645#define VOID_HANDLER(NAME, PARAMS, PARAM_FORMAT) \
646 RC_HANDLER(void, NAME, PARAMS, ;, PARAM_FORMAT, ;, ;,\
647 (xmlparseobject *)userData)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000648
Fred Drake6f987622000-08-25 18:03:30 +0000649#define INT_HANDLER(NAME, PARAMS, PARAM_FORMAT)\
650 RC_HANDLER(int, NAME, PARAMS, int rc=0;, PARAM_FORMAT, \
651 rc = PyInt_AsLong(rv);, rc, \
652 (xmlparseobject *)userData)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000653
Fred Drake71b63ff2002-06-28 22:29:01 +0000654VOID_HANDLER(EndElement,
655 (void *userData, const XML_Char *name),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000656 ("(N)", string_intern(self, name)))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000657
Fred Drake6f987622000-08-25 18:03:30 +0000658VOID_HANDLER(ProcessingInstruction,
Fred Drake71b63ff2002-06-28 22:29:01 +0000659 (void *userData,
660 const XML_Char *target,
Fred Drake85d835f2001-02-08 15:39:08 +0000661 const XML_Char *data),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000662 ("(NO&)", string_intern(self, target), STRING_CONV_FUNC,data))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000663
Fred Drake6f987622000-08-25 18:03:30 +0000664VOID_HANDLER(UnparsedEntityDecl,
Fred Drake71b63ff2002-06-28 22:29:01 +0000665 (void *userData,
Fred Drake85d835f2001-02-08 15:39:08 +0000666 const XML_Char *entityName,
667 const XML_Char *base,
668 const XML_Char *systemId,
669 const XML_Char *publicId,
670 const XML_Char *notationName),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000671 ("(NNNNN)",
Fred Drake71b63ff2002-06-28 22:29:01 +0000672 string_intern(self, entityName), string_intern(self, base),
673 string_intern(self, systemId), string_intern(self, publicId),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000674 string_intern(self, notationName)))
Fred Drake85d835f2001-02-08 15:39:08 +0000675
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000676#ifndef Py_USING_UNICODE
Fred Drake85d835f2001-02-08 15:39:08 +0000677VOID_HANDLER(EntityDecl,
678 (void *userData,
679 const XML_Char *entityName,
680 int is_parameter_entity,
681 const XML_Char *value,
682 int value_length,
683 const XML_Char *base,
684 const XML_Char *systemId,
685 const XML_Char *publicId,
686 const XML_Char *notationName),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000687 ("NiNNNNN",
688 string_intern(self, entityName), is_parameter_entity,
Fred Drake85d835f2001-02-08 15:39:08 +0000689 conv_string_len_to_utf8(value, value_length),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000690 string_intern(self, base), string_intern(self, systemId),
691 string_intern(self, publicId),
692 string_intern(self, notationName)))
Fred Drake85d835f2001-02-08 15:39:08 +0000693#else
694VOID_HANDLER(EntityDecl,
695 (void *userData,
696 const XML_Char *entityName,
697 int is_parameter_entity,
698 const XML_Char *value,
699 int value_length,
700 const XML_Char *base,
701 const XML_Char *systemId,
702 const XML_Char *publicId,
703 const XML_Char *notationName),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000704 ("NiNNNNN",
705 string_intern(self, entityName), is_parameter_entity,
Fred Drake71b63ff2002-06-28 22:29:01 +0000706 (self->returns_unicode
707 ? conv_string_len_to_unicode(value, value_length)
Fred Drake85d835f2001-02-08 15:39:08 +0000708 : conv_string_len_to_utf8(value, value_length)),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000709 string_intern(self, base), string_intern(self, systemId),
710 string_intern(self, publicId),
711 string_intern(self, notationName)))
Fred Drake85d835f2001-02-08 15:39:08 +0000712#endif
713
714VOID_HANDLER(XmlDecl,
715 (void *userData,
716 const XML_Char *version,
717 const XML_Char *encoding,
718 int standalone),
719 ("(O&O&i)",
Fred Drake71b63ff2002-06-28 22:29:01 +0000720 STRING_CONV_FUNC,version, STRING_CONV_FUNC,encoding,
Fred Drake85d835f2001-02-08 15:39:08 +0000721 standalone))
722
723static PyObject *
724conv_content_model(XML_Content * const model,
Fred Drakeb91a36b2002-06-27 19:40:48 +0000725 PyObject *(*conv_string)(const XML_Char *))
Fred Drake85d835f2001-02-08 15:39:08 +0000726{
727 PyObject *result = NULL;
728 PyObject *children = PyTuple_New(model->numchildren);
729 int i;
730
731 if (children != NULL) {
Tim Peters9544fc52001-07-28 09:36:36 +0000732 assert(model->numchildren < INT_MAX);
733 for (i = 0; i < (int)model->numchildren; ++i) {
Fred Drake85d835f2001-02-08 15:39:08 +0000734 PyObject *child = conv_content_model(&model->children[i],
735 conv_string);
736 if (child == NULL) {
737 Py_XDECREF(children);
738 return NULL;
739 }
740 PyTuple_SET_ITEM(children, i, child);
741 }
742 result = Py_BuildValue("(iiO&N)",
743 model->type, model->quant,
744 conv_string,model->name, children);
745 }
746 return result;
747}
748
Fred Drake06dd8cf2003-02-02 03:54:17 +0000749static void
750my_ElementDeclHandler(void *userData,
751 const XML_Char *name,
752 XML_Content *model)
Fred Drake85d835f2001-02-08 15:39:08 +0000753{
Fred Drake06dd8cf2003-02-02 03:54:17 +0000754 xmlparseobject *self = (xmlparseobject *)userData;
755 PyObject *args = NULL;
Fred Drake85d835f2001-02-08 15:39:08 +0000756
Fred Drake06dd8cf2003-02-02 03:54:17 +0000757 if (have_handler(self, ElementDecl)) {
758 PyObject *rv = NULL;
759 PyObject *modelobj, *nameobj;
760
761 if (flush_character_buffer(self) < 0)
762 goto finally;
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000763#ifdef Py_USING_UNICODE
Fred Drake06dd8cf2003-02-02 03:54:17 +0000764 modelobj = conv_content_model(model,
765 (self->returns_unicode
766 ? conv_string_to_unicode
767 : conv_string_to_utf8));
Fred Drake85d835f2001-02-08 15:39:08 +0000768#else
Fred Drake06dd8cf2003-02-02 03:54:17 +0000769 modelobj = conv_content_model(model, conv_string_to_utf8);
Fred Drake85d835f2001-02-08 15:39:08 +0000770#endif
Fred Drake06dd8cf2003-02-02 03:54:17 +0000771 if (modelobj == NULL) {
772 flag_error(self);
773 goto finally;
774 }
775 nameobj = string_intern(self, name);
776 if (nameobj == NULL) {
777 Py_DECREF(modelobj);
778 flag_error(self);
779 goto finally;
780 }
Michael W. Hudson0bb84542004-08-03 11:31:31 +0000781 args = Py_BuildValue("NN", nameobj, modelobj);
Fred Drake06dd8cf2003-02-02 03:54:17 +0000782 if (args == NULL) {
783 Py_DECREF(modelobj);
784 flag_error(self);
785 goto finally;
786 }
787 self->in_callback = 1;
788 rv = call_with_frame(getcode(ElementDecl, "ElementDecl", __LINE__),
Fred Drake39689c52004-08-13 03:12:57 +0000789 self->handlers[ElementDecl], args, self);
Fred Drake06dd8cf2003-02-02 03:54:17 +0000790 self->in_callback = 0;
791 if (rv == NULL) {
792 flag_error(self);
793 goto finally;
794 }
795 Py_DECREF(rv);
796 }
797 finally:
798 Py_XDECREF(args);
799 XML_FreeContentModel(self->itself, model);
800 return;
801}
Fred Drake85d835f2001-02-08 15:39:08 +0000802
803VOID_HANDLER(AttlistDecl,
804 (void *userData,
805 const XML_Char *elname,
806 const XML_Char *attname,
807 const XML_Char *att_type,
808 const XML_Char *dflt,
809 int isrequired),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000810 ("(NNO&O&i)",
811 string_intern(self, elname), string_intern(self, attname),
Fred Drake85d835f2001-02-08 15:39:08 +0000812 STRING_CONV_FUNC,att_type, STRING_CONV_FUNC,dflt,
813 isrequired))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000814
Martin v. Löwisc847f402003-01-21 11:09:21 +0000815#if XML_COMBINED_VERSION >= 19504
Martin v. Löwis069dde22003-01-21 10:58:18 +0000816VOID_HANDLER(SkippedEntity,
817 (void *userData,
818 const XML_Char *entityName,
819 int is_parameter_entity),
820 ("Ni",
821 string_intern(self, entityName), is_parameter_entity))
Martin v. Löwisc847f402003-01-21 11:09:21 +0000822#endif
Martin v. Löwis069dde22003-01-21 10:58:18 +0000823
Fred Drake71b63ff2002-06-28 22:29:01 +0000824VOID_HANDLER(NotationDecl,
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000825 (void *userData,
826 const XML_Char *notationName,
827 const XML_Char *base,
828 const XML_Char *systemId,
829 const XML_Char *publicId),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000830 ("(NNNN)",
Fred Drake71b63ff2002-06-28 22:29:01 +0000831 string_intern(self, notationName), string_intern(self, base),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000832 string_intern(self, systemId), string_intern(self, publicId)))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000833
Fred Drake6f987622000-08-25 18:03:30 +0000834VOID_HANDLER(StartNamespaceDecl,
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000835 (void *userData,
836 const XML_Char *prefix,
837 const XML_Char *uri),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000838 ("(NN)",
839 string_intern(self, prefix), string_intern(self, uri)))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000840
Fred Drake6f987622000-08-25 18:03:30 +0000841VOID_HANDLER(EndNamespaceDecl,
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000842 (void *userData,
843 const XML_Char *prefix),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000844 ("(N)", string_intern(self, prefix)))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000845
Fred Drake6f987622000-08-25 18:03:30 +0000846VOID_HANDLER(Comment,
Fred Drakeb91a36b2002-06-27 19:40:48 +0000847 (void *userData, const XML_Char *data),
848 ("(O&)", STRING_CONV_FUNC,data))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000849
Fred Drake6f987622000-08-25 18:03:30 +0000850VOID_HANDLER(StartCdataSection,
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000851 (void *userData),
Fred Drake6f987622000-08-25 18:03:30 +0000852 ("()"))
Fred Drake71b63ff2002-06-28 22:29:01 +0000853
Fred Drake6f987622000-08-25 18:03:30 +0000854VOID_HANDLER(EndCdataSection,
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000855 (void *userData),
Fred Drake6f987622000-08-25 18:03:30 +0000856 ("()"))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000857
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000858#ifndef Py_USING_UNICODE
Fred Drake6f987622000-08-25 18:03:30 +0000859VOID_HANDLER(Default,
Fred Drake71b63ff2002-06-28 22:29:01 +0000860 (void *userData, const XML_Char *s, int len),
Fred Drakeca1f4262000-09-21 20:10:23 +0000861 ("(N)", conv_string_len_to_utf8(s,len)))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000862
Fred Drake6f987622000-08-25 18:03:30 +0000863VOID_HANDLER(DefaultHandlerExpand,
Fred Drake71b63ff2002-06-28 22:29:01 +0000864 (void *userData, const XML_Char *s, int len),
Fred Drakeca1f4262000-09-21 20:10:23 +0000865 ("(N)", conv_string_len_to_utf8(s,len)))
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000866#else
Fred Drake6f987622000-08-25 18:03:30 +0000867VOID_HANDLER(Default,
Fred Drake71b63ff2002-06-28 22:29:01 +0000868 (void *userData, const XML_Char *s, int len),
869 ("(N)", (self->returns_unicode
870 ? conv_string_len_to_unicode(s,len)
Fred Drake6f987622000-08-25 18:03:30 +0000871 : conv_string_len_to_utf8(s,len))))
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000872
Fred Drake6f987622000-08-25 18:03:30 +0000873VOID_HANDLER(DefaultHandlerExpand,
Fred Drake71b63ff2002-06-28 22:29:01 +0000874 (void *userData, const XML_Char *s, int len),
875 ("(N)", (self->returns_unicode
876 ? conv_string_len_to_unicode(s,len)
Fred Drake6f987622000-08-25 18:03:30 +0000877 : conv_string_len_to_utf8(s,len))))
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000878#endif
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000879
Fred Drake71b63ff2002-06-28 22:29:01 +0000880INT_HANDLER(NotStandalone,
881 (void *userData),
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000882 ("()"))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000883
Fred Drake6f987622000-08-25 18:03:30 +0000884RC_HANDLER(int, ExternalEntityRef,
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000885 (XML_Parser parser,
886 const XML_Char *context,
887 const XML_Char *base,
888 const XML_Char *systemId,
889 const XML_Char *publicId),
890 int rc=0;,
Fred Drakeb91a36b2002-06-27 19:40:48 +0000891 ("(O&NNN)",
Fred Drake71b63ff2002-06-28 22:29:01 +0000892 STRING_CONV_FUNC,context, string_intern(self, base),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000893 string_intern(self, systemId), string_intern(self, publicId)),
Fred Drake6f987622000-08-25 18:03:30 +0000894 rc = PyInt_AsLong(rv);, rc,
895 XML_GetUserData(parser))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000896
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000897/* XXX UnknownEncodingHandler */
898
Fred Drake85d835f2001-02-08 15:39:08 +0000899VOID_HANDLER(StartDoctypeDecl,
900 (void *userData, const XML_Char *doctypeName,
901 const XML_Char *sysid, const XML_Char *pubid,
902 int has_internal_subset),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000903 ("(NNNi)", string_intern(self, doctypeName),
904 string_intern(self, sysid), string_intern(self, pubid),
Fred Drake85d835f2001-02-08 15:39:08 +0000905 has_internal_subset))
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000906
907VOID_HANDLER(EndDoctypeDecl, (void *userData), ("()"))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000908
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000909/* ---------------------------------------------------------------- */
910
Fred Drake71b63ff2002-06-28 22:29:01 +0000911static PyObject *
912get_parse_result(xmlparseobject *self, int rv)
913{
914 if (PyErr_Occurred()) {
915 return NULL;
916 }
917 if (rv == 0) {
Martin v. Löwis069dde22003-01-21 10:58:18 +0000918 return set_error(self, XML_GetErrorCode(self->itself));
Fred Drake71b63ff2002-06-28 22:29:01 +0000919 }
Fred Drake2a3d7db2002-06-28 22:56:48 +0000920 if (flush_character_buffer(self) < 0) {
921 return NULL;
922 }
Fred Drake71b63ff2002-06-28 22:29:01 +0000923 return PyInt_FromLong(rv);
924}
925
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000926PyDoc_STRVAR(xmlparse_Parse__doc__,
Thomas Wouters35317302000-07-22 16:34:15 +0000927"Parse(data[, isfinal])\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000928Parse XML data. `isfinal' should be true at end of input.");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000929
930static PyObject *
Fred Drake0582df92000-07-12 04:49:00 +0000931xmlparse_Parse(xmlparseobject *self, PyObject *args)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000932{
Fred Drake0582df92000-07-12 04:49:00 +0000933 char *s;
934 int slen;
935 int isFinal = 0;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000936
Fred Drake0582df92000-07-12 04:49:00 +0000937 if (!PyArg_ParseTuple(args, "s#|i:Parse", &s, &slen, &isFinal))
938 return NULL;
Fred Drake71b63ff2002-06-28 22:29:01 +0000939
940 return get_parse_result(self, XML_Parse(self->itself, s, slen, isFinal));
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000941}
942
Fred Drakeca1f4262000-09-21 20:10:23 +0000943/* File reading copied from cPickle */
944
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000945#define BUF_SIZE 2048
946
Fred Drake0582df92000-07-12 04:49:00 +0000947static int
948readinst(char *buf, int buf_size, PyObject *meth)
949{
950 PyObject *arg = NULL;
951 PyObject *bytes = NULL;
952 PyObject *str = NULL;
953 int len = -1;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000954
Fred Drake676940b2000-09-22 15:21:31 +0000955 if ((bytes = PyInt_FromLong(buf_size)) == NULL)
Fred Drake0582df92000-07-12 04:49:00 +0000956 goto finally;
Fred Drake676940b2000-09-22 15:21:31 +0000957
Fred Drake7b6caff2003-07-21 17:05:56 +0000958 if ((arg = PyTuple_New(1)) == NULL) {
959 Py_DECREF(bytes);
Fred Drake0582df92000-07-12 04:49:00 +0000960 goto finally;
Fred Drake7b6caff2003-07-21 17:05:56 +0000961 }
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000962
Tim Peters954eef72000-09-22 06:01:11 +0000963 PyTuple_SET_ITEM(arg, 0, bytes);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000964
Martin v. Löwis9171f022004-10-13 19:50:11 +0000965#if PY_VERSION_HEX < 0x02020000
966 str = PyObject_CallObject(meth, arg);
967#else
968 str = PyObject_Call(meth, arg, NULL);
969#endif
970 if (str == NULL)
Fred Drake0582df92000-07-12 04:49:00 +0000971 goto finally;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000972
Fred Drake0582df92000-07-12 04:49:00 +0000973 /* XXX what to do if it returns a Unicode string? */
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000974 if (!PyString_Check(str)) {
Fred Drake71b63ff2002-06-28 22:29:01 +0000975 PyErr_Format(PyExc_TypeError,
Fred Drake0582df92000-07-12 04:49:00 +0000976 "read() did not return a string object (type=%.400s)",
Christian Heimese93237d2007-12-19 02:37:44 +0000977 Py_TYPE(str)->tp_name);
Fred Drake0582df92000-07-12 04:49:00 +0000978 goto finally;
979 }
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000980 len = PyString_GET_SIZE(str);
Fred Drake0582df92000-07-12 04:49:00 +0000981 if (len > buf_size) {
982 PyErr_Format(PyExc_ValueError,
983 "read() returned too much data: "
984 "%i bytes requested, %i returned",
985 buf_size, len);
Fred Drake0582df92000-07-12 04:49:00 +0000986 goto finally;
987 }
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000988 memcpy(buf, PyString_AsString(str), len);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000989finally:
Fred Drake0582df92000-07-12 04:49:00 +0000990 Py_XDECREF(arg);
Fred Drakeca1f4262000-09-21 20:10:23 +0000991 Py_XDECREF(str);
Fred Drake0582df92000-07-12 04:49:00 +0000992 return len;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000993}
994
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000995PyDoc_STRVAR(xmlparse_ParseFile__doc__,
Thomas Wouters35317302000-07-22 16:34:15 +0000996"ParseFile(file)\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000997Parse XML data from file-like object.");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000998
999static PyObject *
Georg Brandl96a8c392006-05-29 21:04:52 +00001000xmlparse_ParseFile(xmlparseobject *self, PyObject *f)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001001{
Fred Drake0582df92000-07-12 04:49:00 +00001002 int rv = 1;
Fred Drake0582df92000-07-12 04:49:00 +00001003 FILE *fp;
1004 PyObject *readmethod = NULL;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001005
Fred Drake0582df92000-07-12 04:49:00 +00001006 if (PyFile_Check(f)) {
1007 fp = PyFile_AsFile(f);
1008 }
Neal Norwitz2f5e9902006-03-08 06:36:45 +00001009 else {
Fred Drake0582df92000-07-12 04:49:00 +00001010 fp = NULL;
Fred Drakeca1f4262000-09-21 20:10:23 +00001011 readmethod = PyObject_GetAttrString(f, "read");
1012 if (readmethod == NULL) {
Fred Drake0582df92000-07-12 04:49:00 +00001013 PyErr_Clear();
Fred Drake71b63ff2002-06-28 22:29:01 +00001014 PyErr_SetString(PyExc_TypeError,
Fred Drake0582df92000-07-12 04:49:00 +00001015 "argument must have 'read' attribute");
Fred Drake814f9fe2002-07-19 22:03:03 +00001016 return NULL;
Fred Drake0582df92000-07-12 04:49:00 +00001017 }
1018 }
1019 for (;;) {
1020 int bytes_read;
1021 void *buf = XML_GetBuffer(self->itself, BUF_SIZE);
Fred Drake7b6caff2003-07-21 17:05:56 +00001022 if (buf == NULL) {
Fred Drakef239c6d2003-07-21 17:22:43 +00001023 Py_XDECREF(readmethod);
Fred Drake0582df92000-07-12 04:49:00 +00001024 return PyErr_NoMemory();
Fred Drake7b6caff2003-07-21 17:05:56 +00001025 }
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001026
Fred Drake0582df92000-07-12 04:49:00 +00001027 if (fp) {
1028 bytes_read = fread(buf, sizeof(char), BUF_SIZE, fp);
1029 if (bytes_read < 0) {
1030 PyErr_SetFromErrno(PyExc_IOError);
1031 return NULL;
1032 }
1033 }
1034 else {
1035 bytes_read = readinst(buf, BUF_SIZE, readmethod);
Fred Drake7b6caff2003-07-21 17:05:56 +00001036 if (bytes_read < 0) {
1037 Py_DECREF(readmethod);
Fred Drake0582df92000-07-12 04:49:00 +00001038 return NULL;
Fred Drake7b6caff2003-07-21 17:05:56 +00001039 }
Fred Drake0582df92000-07-12 04:49:00 +00001040 }
1041 rv = XML_ParseBuffer(self->itself, bytes_read, bytes_read == 0);
Fred Drake7b6caff2003-07-21 17:05:56 +00001042 if (PyErr_Occurred()) {
1043 Py_XDECREF(readmethod);
Fred Drake0582df92000-07-12 04:49:00 +00001044 return NULL;
Fred Drake7b6caff2003-07-21 17:05:56 +00001045 }
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001046
Fred Drake0582df92000-07-12 04:49:00 +00001047 if (!rv || bytes_read == 0)
1048 break;
1049 }
Fred Drake7b6caff2003-07-21 17:05:56 +00001050 Py_XDECREF(readmethod);
Fred Drake71b63ff2002-06-28 22:29:01 +00001051 return get_parse_result(self, rv);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001052}
1053
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001054PyDoc_STRVAR(xmlparse_SetBase__doc__,
Thomas Wouters35317302000-07-22 16:34:15 +00001055"SetBase(base_url)\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001056Set the base URL for the parser.");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001057
1058static PyObject *
Fred Drake0582df92000-07-12 04:49:00 +00001059xmlparse_SetBase(xmlparseobject *self, PyObject *args)
1060{
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001061 char *base;
1062
Fred Drake0582df92000-07-12 04:49:00 +00001063 if (!PyArg_ParseTuple(args, "s:SetBase", &base))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001064 return NULL;
Fred Drake0582df92000-07-12 04:49:00 +00001065 if (!XML_SetBase(self->itself, base)) {
1066 return PyErr_NoMemory();
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001067 }
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001068 Py_INCREF(Py_None);
1069 return Py_None;
1070}
1071
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001072PyDoc_STRVAR(xmlparse_GetBase__doc__,
Thomas Wouters35317302000-07-22 16:34:15 +00001073"GetBase() -> url\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001074Return base URL string for the parser.");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001075
1076static PyObject *
Georg Brandl96a8c392006-05-29 21:04:52 +00001077xmlparse_GetBase(xmlparseobject *self, PyObject *unused)
Fred Drake0582df92000-07-12 04:49:00 +00001078{
Fred Drake0582df92000-07-12 04:49:00 +00001079 return Py_BuildValue("z", XML_GetBase(self->itself));
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001080}
1081
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001082PyDoc_STRVAR(xmlparse_GetInputContext__doc__,
Fred Drakebd6101c2001-02-14 18:29:45 +00001083"GetInputContext() -> string\n\
1084Return the untranslated text of the input that caused the current event.\n\
1085If the event was generated by a large amount of text (such as a start tag\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001086for an element with many attributes), not all of the text may be available.");
Fred Drakebd6101c2001-02-14 18:29:45 +00001087
1088static PyObject *
Georg Brandl96a8c392006-05-29 21:04:52 +00001089xmlparse_GetInputContext(xmlparseobject *self, PyObject *unused)
Fred Drakebd6101c2001-02-14 18:29:45 +00001090{
Georg Brandl96a8c392006-05-29 21:04:52 +00001091 if (self->in_callback) {
1092 int offset, size;
1093 const char *buffer
1094 = XML_GetInputContext(self->itself, &offset, &size);
Fred Drakebd6101c2001-02-14 18:29:45 +00001095
Georg Brandl96a8c392006-05-29 21:04:52 +00001096 if (buffer != NULL)
Gregory P. Smithdd96db62008-06-09 04:58:54 +00001097 return PyString_FromStringAndSize(buffer + offset,
Georg Brandl96a8c392006-05-29 21:04:52 +00001098 size - offset);
1099 else
1100 Py_RETURN_NONE;
Fred Drakebd6101c2001-02-14 18:29:45 +00001101 }
Georg Brandl96a8c392006-05-29 21:04:52 +00001102 else
1103 Py_RETURN_NONE;
Fred Drakebd6101c2001-02-14 18:29:45 +00001104}
Fred Drakebd6101c2001-02-14 18:29:45 +00001105
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001106PyDoc_STRVAR(xmlparse_ExternalEntityParserCreate__doc__,
Fred Drake2d4ac202001-01-03 15:36:25 +00001107"ExternalEntityParserCreate(context[, encoding])\n\
Tim Peters51dc9682000-09-24 22:12:45 +00001108Create a parser for parsing an external entity based on the\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001109information passed to the ExternalEntityRefHandler.");
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001110
1111static PyObject *
1112xmlparse_ExternalEntityParserCreate(xmlparseobject *self, PyObject *args)
1113{
1114 char *context;
1115 char *encoding = NULL;
1116 xmlparseobject *new_parser;
1117 int i;
1118
Martin v. Löwisc57428d2001-09-19 09:55:09 +00001119 if (!PyArg_ParseTuple(args, "z|s:ExternalEntityParserCreate",
Fred Drakecde79132001-04-25 16:01:30 +00001120 &context, &encoding)) {
1121 return NULL;
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001122 }
1123
Martin v. Löwis894258c2001-09-23 10:20:10 +00001124#ifndef Py_TPFLAGS_HAVE_GC
Martin v. Löwisb4fcf4d2002-06-30 06:40:55 +00001125 /* Python versions 2.0 and 2.1 */
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001126 new_parser = PyObject_New(xmlparseobject, &Xmlparsetype);
Martin v. Löwis894258c2001-09-23 10:20:10 +00001127#else
1128 /* Python versions 2.2 and later */
1129 new_parser = PyObject_GC_New(xmlparseobject, &Xmlparsetype);
1130#endif
Fred Drake85d835f2001-02-08 15:39:08 +00001131
1132 if (new_parser == NULL)
1133 return NULL;
Fred Drake2a3d7db2002-06-28 22:56:48 +00001134 new_parser->buffer_size = self->buffer_size;
1135 new_parser->buffer_used = 0;
1136 if (self->buffer != NULL) {
1137 new_parser->buffer = malloc(new_parser->buffer_size);
1138 if (new_parser->buffer == NULL) {
Fred Drakeb28467b2002-07-02 15:44:36 +00001139#ifndef Py_TPFLAGS_HAVE_GC
1140 /* Code for versions 2.0 and 2.1 */
1141 PyObject_Del(new_parser);
1142#else
1143 /* Code for versions 2.2 and later. */
Fred Drake2a3d7db2002-06-28 22:56:48 +00001144 PyObject_GC_Del(new_parser);
Fred Drakeb28467b2002-07-02 15:44:36 +00001145#endif
Fred Drake2a3d7db2002-06-28 22:56:48 +00001146 return PyErr_NoMemory();
1147 }
1148 }
1149 else
1150 new_parser->buffer = NULL;
Fred Drake85d835f2001-02-08 15:39:08 +00001151 new_parser->returns_unicode = self->returns_unicode;
1152 new_parser->ordered_attributes = self->ordered_attributes;
1153 new_parser->specified_attributes = self->specified_attributes;
Fred Drakebd6101c2001-02-14 18:29:45 +00001154 new_parser->in_callback = 0;
Martin v. Löwis069dde22003-01-21 10:58:18 +00001155 new_parser->ns_prefixes = self->ns_prefixes;
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001156 new_parser->itself = XML_ExternalEntityParserCreate(self->itself, context,
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001157 encoding);
1158 new_parser->handlers = 0;
Fred Drakeb91a36b2002-06-27 19:40:48 +00001159 new_parser->intern = self->intern;
1160 Py_XINCREF(new_parser->intern);
Martin v. Löwis894258c2001-09-23 10:20:10 +00001161#ifdef Py_TPFLAGS_HAVE_GC
1162 PyObject_GC_Track(new_parser);
1163#else
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001164 PyObject_GC_Init(new_parser);
Martin v. Löwis894258c2001-09-23 10:20:10 +00001165#endif
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001166
1167 if (!new_parser->itself) {
Fred Drake85d835f2001-02-08 15:39:08 +00001168 Py_DECREF(new_parser);
1169 return PyErr_NoMemory();
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001170 }
1171
1172 XML_SetUserData(new_parser->itself, (void *)new_parser);
1173
1174 /* allocate and clear handlers first */
Fred Drake2a3d7db2002-06-28 22:56:48 +00001175 for (i = 0; handler_info[i].name != NULL; i++)
Fred Drake85d835f2001-02-08 15:39:08 +00001176 /* do nothing */;
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001177
Fred Drake2a3d7db2002-06-28 22:56:48 +00001178 new_parser->handlers = malloc(sizeof(PyObject *) * i);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001179 if (!new_parser->handlers) {
Fred Drake85d835f2001-02-08 15:39:08 +00001180 Py_DECREF(new_parser);
1181 return PyErr_NoMemory();
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001182 }
Martin v. Löwis5b68ce32001-10-21 08:53:52 +00001183 clear_handlers(new_parser, 1);
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001184
1185 /* then copy handlers from self */
1186 for (i = 0; handler_info[i].name != NULL; i++) {
Fred Drake71b63ff2002-06-28 22:29:01 +00001187 PyObject *handler = self->handlers[i];
1188 if (handler != NULL) {
1189 Py_INCREF(handler);
1190 new_parser->handlers[i] = handler;
1191 handler_info[i].setter(new_parser->itself,
Fred Drake85d835f2001-02-08 15:39:08 +00001192 handler_info[i].handler);
1193 }
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001194 }
Fred Drake71b63ff2002-06-28 22:29:01 +00001195 return (PyObject *)new_parser;
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001196}
1197
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001198PyDoc_STRVAR(xmlparse_SetParamEntityParsing__doc__,
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001199"SetParamEntityParsing(flag) -> success\n\
1200Controls parsing of parameter entities (including the external DTD\n\
1201subset). Possible flag values are XML_PARAM_ENTITY_PARSING_NEVER,\n\
1202XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE and\n\
1203XML_PARAM_ENTITY_PARSING_ALWAYS. Returns true if setting the flag\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001204was successful.");
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001205
1206static PyObject*
Fred Drakebd6101c2001-02-14 18:29:45 +00001207xmlparse_SetParamEntityParsing(xmlparseobject *p, PyObject* args)
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001208{
Fred Drake85d835f2001-02-08 15:39:08 +00001209 int flag;
1210 if (!PyArg_ParseTuple(args, "i", &flag))
1211 return NULL;
Fred Drakebd6101c2001-02-14 18:29:45 +00001212 flag = XML_SetParamEntityParsing(p->itself, flag);
Fred Drake85d835f2001-02-08 15:39:08 +00001213 return PyInt_FromLong(flag);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001214}
1215
Martin v. Löwisc847f402003-01-21 11:09:21 +00001216
1217#if XML_COMBINED_VERSION >= 19505
Martin v. Löwis069dde22003-01-21 10:58:18 +00001218PyDoc_STRVAR(xmlparse_UseForeignDTD__doc__,
1219"UseForeignDTD([flag])\n\
1220Allows the application to provide an artificial external subset if one is\n\
1221not specified as part of the document instance. This readily allows the\n\
1222use of a 'default' document type controlled by the application, while still\n\
1223getting the advantage of providing document type information to the parser.\n\
1224'flag' defaults to True if not provided.");
1225
1226static PyObject *
1227xmlparse_UseForeignDTD(xmlparseobject *self, PyObject *args)
1228{
1229 PyObject *flagobj = NULL;
1230 XML_Bool flag = XML_TRUE;
1231 enum XML_Error rc;
Georg Brandl96a8c392006-05-29 21:04:52 +00001232 if (!PyArg_UnpackTuple(args, "UseForeignDTD", 0, 1, &flagobj))
Martin v. Löwis069dde22003-01-21 10:58:18 +00001233 return NULL;
1234 if (flagobj != NULL)
1235 flag = PyObject_IsTrue(flagobj) ? XML_TRUE : XML_FALSE;
1236 rc = XML_UseForeignDTD(self->itself, flag);
1237 if (rc != XML_ERROR_NONE) {
1238 return set_error(self, rc);
1239 }
1240 Py_INCREF(Py_None);
1241 return Py_None;
1242}
Martin v. Löwisc847f402003-01-21 11:09:21 +00001243#endif
Martin v. Löwis069dde22003-01-21 10:58:18 +00001244
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001245static struct PyMethodDef xmlparse_methods[] = {
Fred Drake0582df92000-07-12 04:49:00 +00001246 {"Parse", (PyCFunction)xmlparse_Parse,
Fred Drakebd6101c2001-02-14 18:29:45 +00001247 METH_VARARGS, xmlparse_Parse__doc__},
Fred Drake0582df92000-07-12 04:49:00 +00001248 {"ParseFile", (PyCFunction)xmlparse_ParseFile,
Georg Brandl96a8c392006-05-29 21:04:52 +00001249 METH_O, xmlparse_ParseFile__doc__},
Fred Drake0582df92000-07-12 04:49:00 +00001250 {"SetBase", (PyCFunction)xmlparse_SetBase,
Martin v. Löwis069dde22003-01-21 10:58:18 +00001251 METH_VARARGS, xmlparse_SetBase__doc__},
Fred Drake0582df92000-07-12 04:49:00 +00001252 {"GetBase", (PyCFunction)xmlparse_GetBase,
Georg Brandl96a8c392006-05-29 21:04:52 +00001253 METH_NOARGS, xmlparse_GetBase__doc__},
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001254 {"ExternalEntityParserCreate", (PyCFunction)xmlparse_ExternalEntityParserCreate,
Martin v. Löwis069dde22003-01-21 10:58:18 +00001255 METH_VARARGS, xmlparse_ExternalEntityParserCreate__doc__},
Fred Drakebd6101c2001-02-14 18:29:45 +00001256 {"SetParamEntityParsing", (PyCFunction)xmlparse_SetParamEntityParsing,
1257 METH_VARARGS, xmlparse_SetParamEntityParsing__doc__},
Fred Drakebd6101c2001-02-14 18:29:45 +00001258 {"GetInputContext", (PyCFunction)xmlparse_GetInputContext,
Georg Brandl96a8c392006-05-29 21:04:52 +00001259 METH_NOARGS, xmlparse_GetInputContext__doc__},
Martin v. Löwisc847f402003-01-21 11:09:21 +00001260#if XML_COMBINED_VERSION >= 19505
Martin v. Löwis069dde22003-01-21 10:58:18 +00001261 {"UseForeignDTD", (PyCFunction)xmlparse_UseForeignDTD,
1262 METH_VARARGS, xmlparse_UseForeignDTD__doc__},
Martin v. Löwisc847f402003-01-21 11:09:21 +00001263#endif
Martin v. Löwis069dde22003-01-21 10:58:18 +00001264 {NULL, NULL} /* sentinel */
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001265};
1266
1267/* ---------- */
1268
1269
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001270#ifdef Py_USING_UNICODE
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001271
Fred Drake71b63ff2002-06-28 22:29:01 +00001272/* pyexpat international encoding support.
1273 Make it as simple as possible.
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001274*/
1275
Martin v. Löwis3af7cc02001-01-22 08:19:10 +00001276static char template_buffer[257];
Fred Drakebb66a202001-03-01 20:48:17 +00001277PyObject *template_string = NULL;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001278
Fred Drake71b63ff2002-06-28 22:29:01 +00001279static void
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001280init_template_buffer(void)
1281{
1282 int i;
Fred Drakebb66a202001-03-01 20:48:17 +00001283 for (i = 0; i < 256; i++) {
1284 template_buffer[i] = i;
Tim Peters63cb99e2001-02-17 18:12:50 +00001285 }
Fred Drakebb66a202001-03-01 20:48:17 +00001286 template_buffer[256] = 0;
Tim Peters63cb99e2001-02-17 18:12:50 +00001287}
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001288
Fred Drake71b63ff2002-06-28 22:29:01 +00001289static int
1290PyUnknownEncodingHandler(void *encodingHandlerData,
1291 const XML_Char *name,
1292 XML_Encoding *info)
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001293{
Fred Drakebb66a202001-03-01 20:48:17 +00001294 PyUnicodeObject *_u_string = NULL;
1295 int result = 0;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001296 int i;
Fred Drake71b63ff2002-06-28 22:29:01 +00001297
Fred Drakebb66a202001-03-01 20:48:17 +00001298 /* Yes, supports only 8bit encodings */
1299 _u_string = (PyUnicodeObject *)
1300 PyUnicode_Decode(template_buffer, 256, name, "replace");
Fred Drake71b63ff2002-06-28 22:29:01 +00001301
Fred Drakebb66a202001-03-01 20:48:17 +00001302 if (_u_string == NULL)
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001303 return result;
Fred Drake71b63ff2002-06-28 22:29:01 +00001304
Fred Drakebb66a202001-03-01 20:48:17 +00001305 for (i = 0; i < 256; i++) {
1306 /* Stupid to access directly, but fast */
1307 Py_UNICODE c = _u_string->str[i];
1308 if (c == Py_UNICODE_REPLACEMENT_CHARACTER)
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001309 info->map[i] = -1;
Fred Drakebb66a202001-03-01 20:48:17 +00001310 else
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001311 info->map[i] = c;
Tim Peters63cb99e2001-02-17 18:12:50 +00001312 }
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001313 info->data = NULL;
1314 info->convert = NULL;
1315 info->release = NULL;
Fred Drake71b63ff2002-06-28 22:29:01 +00001316 result = 1;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001317 Py_DECREF(_u_string);
1318 return result;
1319}
1320
1321#endif
1322
1323static PyObject *
Fred Drakeb91a36b2002-06-27 19:40:48 +00001324newxmlparseobject(char *encoding, char *namespace_separator, PyObject *intern)
Fred Drake0582df92000-07-12 04:49:00 +00001325{
1326 int i;
1327 xmlparseobject *self;
Fred Drake71b63ff2002-06-28 22:29:01 +00001328
Martin v. Löwis894258c2001-09-23 10:20:10 +00001329#ifdef Py_TPFLAGS_HAVE_GC
1330 /* Code for versions 2.2 and later */
1331 self = PyObject_GC_New(xmlparseobject, &Xmlparsetype);
1332#else
Fred Drake0582df92000-07-12 04:49:00 +00001333 self = PyObject_New(xmlparseobject, &Xmlparsetype);
Martin v. Löwis894258c2001-09-23 10:20:10 +00001334#endif
Fred Drake0582df92000-07-12 04:49:00 +00001335 if (self == NULL)
1336 return NULL;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001337
Martin v. Löwisb4fcf4d2002-06-30 06:40:55 +00001338#ifdef Py_USING_UNICODE
Fred Drake0582df92000-07-12 04:49:00 +00001339 self->returns_unicode = 1;
Martin v. Löwisb4fcf4d2002-06-30 06:40:55 +00001340#else
1341 self->returns_unicode = 0;
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +00001342#endif
Martin v. Löwisb4fcf4d2002-06-30 06:40:55 +00001343
Fred Drake2a3d7db2002-06-28 22:56:48 +00001344 self->buffer = NULL;
1345 self->buffer_size = CHARACTER_DATA_BUFFER_SIZE;
1346 self->buffer_used = 0;
Fred Drake85d835f2001-02-08 15:39:08 +00001347 self->ordered_attributes = 0;
1348 self->specified_attributes = 0;
Fred Drakebd6101c2001-02-14 18:29:45 +00001349 self->in_callback = 0;
Martin v. Löwis069dde22003-01-21 10:58:18 +00001350 self->ns_prefixes = 0;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001351 self->handlers = NULL;
Fred Drakecde79132001-04-25 16:01:30 +00001352 if (namespace_separator != NULL) {
Fred Drake0582df92000-07-12 04:49:00 +00001353 self->itself = XML_ParserCreateNS(encoding, *namespace_separator);
1354 }
Fred Drake85d835f2001-02-08 15:39:08 +00001355 else {
Fred Drake0582df92000-07-12 04:49:00 +00001356 self->itself = XML_ParserCreate(encoding);
1357 }
Fred Drakeb91a36b2002-06-27 19:40:48 +00001358 self->intern = intern;
1359 Py_XINCREF(self->intern);
Martin v. Löwis894258c2001-09-23 10:20:10 +00001360#ifdef Py_TPFLAGS_HAVE_GC
1361 PyObject_GC_Track(self);
1362#else
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001363 PyObject_GC_Init(self);
Martin v. Löwis894258c2001-09-23 10:20:10 +00001364#endif
Fred Drake0582df92000-07-12 04:49:00 +00001365 if (self->itself == NULL) {
Fred Drake71b63ff2002-06-28 22:29:01 +00001366 PyErr_SetString(PyExc_RuntimeError,
Fred Drake0582df92000-07-12 04:49:00 +00001367 "XML_ParserCreate failed");
1368 Py_DECREF(self);
1369 return NULL;
1370 }
1371 XML_SetUserData(self->itself, (void *)self);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001372#ifdef Py_USING_UNICODE
Fred Drake7c75bf22002-07-01 14:02:31 +00001373 XML_SetUnknownEncodingHandler(self->itself,
1374 (XML_UnknownEncodingHandler) PyUnknownEncodingHandler, NULL);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001375#endif
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001376
Fred Drake2a3d7db2002-06-28 22:56:48 +00001377 for (i = 0; handler_info[i].name != NULL; i++)
Fred Drake0582df92000-07-12 04:49:00 +00001378 /* do nothing */;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001379
Fred Drake7c75bf22002-07-01 14:02:31 +00001380 self->handlers = malloc(sizeof(PyObject *) * i);
1381 if (!self->handlers) {
Fred Drake71b63ff2002-06-28 22:29:01 +00001382 Py_DECREF(self);
1383 return PyErr_NoMemory();
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001384 }
Martin v. Löwis5b68ce32001-10-21 08:53:52 +00001385 clear_handlers(self, 1);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001386
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001387 return (PyObject*)self;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001388}
1389
1390
1391static void
Fred Drake0582df92000-07-12 04:49:00 +00001392xmlparse_dealloc(xmlparseobject *self)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001393{
Fred Drake0582df92000-07-12 04:49:00 +00001394 int i;
Martin v. Löwis894258c2001-09-23 10:20:10 +00001395#ifdef Py_TPFLAGS_HAVE_GC
1396 PyObject_GC_UnTrack(self);
1397#else
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001398 PyObject_GC_Fini(self);
Martin v. Löwis894258c2001-09-23 10:20:10 +00001399#endif
Fred Drake85d835f2001-02-08 15:39:08 +00001400 if (self->itself != NULL)
Fred Drake0582df92000-07-12 04:49:00 +00001401 XML_ParserFree(self->itself);
1402 self->itself = NULL;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001403
Fred Drake85d835f2001-02-08 15:39:08 +00001404 if (self->handlers != NULL) {
Fred Drakecde79132001-04-25 16:01:30 +00001405 PyObject *temp;
Fred Drake85d835f2001-02-08 15:39:08 +00001406 for (i = 0; handler_info[i].name != NULL; i++) {
Fred Drakecde79132001-04-25 16:01:30 +00001407 temp = self->handlers[i];
1408 self->handlers[i] = NULL;
1409 Py_XDECREF(temp);
Fred Drake85d835f2001-02-08 15:39:08 +00001410 }
1411 free(self->handlers);
Fred Drake71b63ff2002-06-28 22:29:01 +00001412 self->handlers = NULL;
Fred Drake0582df92000-07-12 04:49:00 +00001413 }
Fred Drake2a3d7db2002-06-28 22:56:48 +00001414 if (self->buffer != NULL) {
1415 free(self->buffer);
1416 self->buffer = NULL;
1417 }
Fred Drakeb91a36b2002-06-27 19:40:48 +00001418 Py_XDECREF(self->intern);
Martin v. Löwis894258c2001-09-23 10:20:10 +00001419#ifndef Py_TPFLAGS_HAVE_GC
Martin v. Löwisb4fcf4d2002-06-30 06:40:55 +00001420 /* Code for versions 2.0 and 2.1 */
Fred Drake0582df92000-07-12 04:49:00 +00001421 PyObject_Del(self);
Martin v. Löwis894258c2001-09-23 10:20:10 +00001422#else
1423 /* Code for versions 2.2 and later. */
1424 PyObject_GC_Del(self);
1425#endif
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001426}
1427
Fred Drake0582df92000-07-12 04:49:00 +00001428static int
1429handlername2int(const char *name)
1430{
1431 int i;
Fred Drake71b63ff2002-06-28 22:29:01 +00001432 for (i = 0; handler_info[i].name != NULL; i++) {
Fred Drake0582df92000-07-12 04:49:00 +00001433 if (strcmp(name, handler_info[i].name) == 0) {
1434 return i;
1435 }
1436 }
1437 return -1;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001438}
1439
1440static PyObject *
Fred Drake71b63ff2002-06-28 22:29:01 +00001441get_pybool(int istrue)
1442{
1443 PyObject *result = istrue ? Py_True : Py_False;
1444 Py_INCREF(result);
1445 return result;
1446}
1447
1448static PyObject *
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001449xmlparse_getattr(xmlparseobject *self, char *name)
1450{
Fred Drake71b63ff2002-06-28 22:29:01 +00001451 int handlernum = handlername2int(name);
1452
1453 if (handlernum != -1) {
1454 PyObject *result = self->handlers[handlernum];
1455 if (result == NULL)
1456 result = Py_None;
1457 Py_INCREF(result);
1458 return result;
1459 }
1460 if (name[0] == 'E') {
1461 if (strcmp(name, "ErrorCode") == 0)
1462 return PyInt_FromLong((long)
1463 XML_GetErrorCode(self->itself));
1464 if (strcmp(name, "ErrorLineNumber") == 0)
1465 return PyInt_FromLong((long)
1466 XML_GetErrorLineNumber(self->itself));
1467 if (strcmp(name, "ErrorColumnNumber") == 0)
1468 return PyInt_FromLong((long)
1469 XML_GetErrorColumnNumber(self->itself));
1470 if (strcmp(name, "ErrorByteIndex") == 0)
1471 return PyInt_FromLong((long)
1472 XML_GetErrorByteIndex(self->itself));
1473 }
Dave Cole3203efb2004-08-26 00:37:31 +00001474 if (name[0] == 'C') {
1475 if (strcmp(name, "CurrentLineNumber") == 0)
1476 return PyInt_FromLong((long)
1477 XML_GetCurrentLineNumber(self->itself));
1478 if (strcmp(name, "CurrentColumnNumber") == 0)
1479 return PyInt_FromLong((long)
1480 XML_GetCurrentColumnNumber(self->itself));
1481 if (strcmp(name, "CurrentByteIndex") == 0)
1482 return PyInt_FromLong((long)
1483 XML_GetCurrentByteIndex(self->itself));
1484 }
Fred Drake2a3d7db2002-06-28 22:56:48 +00001485 if (name[0] == 'b') {
1486 if (strcmp(name, "buffer_size") == 0)
1487 return PyInt_FromLong((long) self->buffer_size);
1488 if (strcmp(name, "buffer_text") == 0)
1489 return get_pybool(self->buffer != NULL);
1490 if (strcmp(name, "buffer_used") == 0)
1491 return PyInt_FromLong((long) self->buffer_used);
1492 }
Martin v. Löwis069dde22003-01-21 10:58:18 +00001493 if (strcmp(name, "namespace_prefixes") == 0)
1494 return get_pybool(self->ns_prefixes);
Fred Drake85d835f2001-02-08 15:39:08 +00001495 if (strcmp(name, "ordered_attributes") == 0)
Fred Drake71b63ff2002-06-28 22:29:01 +00001496 return get_pybool(self->ordered_attributes);
Fred Drake0582df92000-07-12 04:49:00 +00001497 if (strcmp(name, "returns_unicode") == 0)
Fred Drake71b63ff2002-06-28 22:29:01 +00001498 return get_pybool((long) self->returns_unicode);
Fred Drake85d835f2001-02-08 15:39:08 +00001499 if (strcmp(name, "specified_attributes") == 0)
Fred Drake71b63ff2002-06-28 22:29:01 +00001500 return get_pybool((long) self->specified_attributes);
Fred Drakeb91a36b2002-06-27 19:40:48 +00001501 if (strcmp(name, "intern") == 0) {
1502 if (self->intern == NULL) {
1503 Py_INCREF(Py_None);
1504 return Py_None;
1505 }
1506 else {
1507 Py_INCREF(self->intern);
1508 return self->intern;
1509 }
1510 }
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001511
Neal Norwitzfa56e2d2003-01-19 15:40:09 +00001512#define APPEND(list, str) \
Martin v. Löwis069dde22003-01-21 10:58:18 +00001513 do { \
Gregory P. Smithdd96db62008-06-09 04:58:54 +00001514 PyObject *o = PyString_FromString(str); \
Martin v. Löwis069dde22003-01-21 10:58:18 +00001515 if (o != NULL) \
1516 PyList_Append(list, o); \
1517 Py_XDECREF(o); \
1518 } while (0)
Neal Norwitzfa56e2d2003-01-19 15:40:09 +00001519
Fred Drake0582df92000-07-12 04:49:00 +00001520 if (strcmp(name, "__members__") == 0) {
1521 int i;
1522 PyObject *rc = PyList_New(0);
Georg Brandl5c170fd2006-03-17 19:03:25 +00001523 if (!rc)
1524 return NULL;
Fred Drake71b63ff2002-06-28 22:29:01 +00001525 for (i = 0; handler_info[i].name != NULL; i++) {
Neal Norwitzfa56e2d2003-01-19 15:40:09 +00001526 PyObject *o = get_handler_name(&handler_info[i]);
1527 if (o != NULL)
1528 PyList_Append(rc, o);
1529 Py_XDECREF(o);
Fred Drake0582df92000-07-12 04:49:00 +00001530 }
Neal Norwitzfa56e2d2003-01-19 15:40:09 +00001531 APPEND(rc, "ErrorCode");
1532 APPEND(rc, "ErrorLineNumber");
1533 APPEND(rc, "ErrorColumnNumber");
1534 APPEND(rc, "ErrorByteIndex");
Dave Cole3203efb2004-08-26 00:37:31 +00001535 APPEND(rc, "CurrentLineNumber");
1536 APPEND(rc, "CurrentColumnNumber");
1537 APPEND(rc, "CurrentByteIndex");
Neal Norwitzfa56e2d2003-01-19 15:40:09 +00001538 APPEND(rc, "buffer_size");
1539 APPEND(rc, "buffer_text");
1540 APPEND(rc, "buffer_used");
Martin v. Löwis069dde22003-01-21 10:58:18 +00001541 APPEND(rc, "namespace_prefixes");
Neal Norwitzfa56e2d2003-01-19 15:40:09 +00001542 APPEND(rc, "ordered_attributes");
1543 APPEND(rc, "returns_unicode");
1544 APPEND(rc, "specified_attributes");
1545 APPEND(rc, "intern");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001546
Neal Norwitzfa56e2d2003-01-19 15:40:09 +00001547#undef APPEND
Fred Drake0582df92000-07-12 04:49:00 +00001548 return rc;
1549 }
1550 return Py_FindMethod(xmlparse_methods, (PyObject *)self, name);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001551}
1552
Fred Drake6f987622000-08-25 18:03:30 +00001553static int
1554sethandler(xmlparseobject *self, const char *name, PyObject* v)
Fred Drake0582df92000-07-12 04:49:00 +00001555{
1556 int handlernum = handlername2int(name);
Fred Drake71b63ff2002-06-28 22:29:01 +00001557 if (handlernum >= 0) {
1558 xmlhandler c_handler = NULL;
1559 PyObject *temp = self->handlers[handlernum];
1560
Fred Drake24a0f412006-07-06 05:13:22 +00001561 if (v == Py_None) {
1562 /* If this is the character data handler, and a character
1563 data handler is already active, we need to be more
1564 careful. What we can safely do is replace the existing
1565 character data handler callback function with a no-op
1566 function that will refuse to call Python. The downside
1567 is that this doesn't completely remove the character
1568 data handler from the C layer if there's any callback
1569 active, so Expat does a little more work than it
1570 otherwise would, but that's really an odd case. A more
1571 elaborate system of handlers and state could remove the
1572 C handler more effectively. */
1573 if (handlernum == CharacterData && self->in_callback)
1574 c_handler = noop_character_data_handler;
Fred Drake71b63ff2002-06-28 22:29:01 +00001575 v = NULL;
Fred Drake24a0f412006-07-06 05:13:22 +00001576 }
Fred Drake71b63ff2002-06-28 22:29:01 +00001577 else if (v != NULL) {
1578 Py_INCREF(v);
1579 c_handler = handler_info[handlernum].handler;
1580 }
Fred Drake0582df92000-07-12 04:49:00 +00001581 self->handlers[handlernum] = v;
Fred Drake71b63ff2002-06-28 22:29:01 +00001582 Py_XDECREF(temp);
1583 handler_info[handlernum].setter(self->itself, c_handler);
Fred Drake0582df92000-07-12 04:49:00 +00001584 return 1;
1585 }
1586 return 0;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001587}
1588
1589static int
Fred Drake6f987622000-08-25 18:03:30 +00001590xmlparse_setattr(xmlparseobject *self, char *name, PyObject *v)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001591{
Fred Drake6f987622000-08-25 18:03:30 +00001592 /* Set attribute 'name' to value 'v'. v==NULL means delete */
Fred Drake85d835f2001-02-08 15:39:08 +00001593 if (v == NULL) {
Fred Drake6f987622000-08-25 18:03:30 +00001594 PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
1595 return -1;
1596 }
Fred Drake2a3d7db2002-06-28 22:56:48 +00001597 if (strcmp(name, "buffer_text") == 0) {
1598 if (PyObject_IsTrue(v)) {
1599 if (self->buffer == NULL) {
1600 self->buffer = malloc(self->buffer_size);
1601 if (self->buffer == NULL) {
1602 PyErr_NoMemory();
1603 return -1;
1604 }
1605 self->buffer_used = 0;
1606 }
1607 }
1608 else if (self->buffer != NULL) {
1609 if (flush_character_buffer(self) < 0)
1610 return -1;
1611 free(self->buffer);
1612 self->buffer = NULL;
1613 }
1614 return 0;
1615 }
Martin v. Löwis069dde22003-01-21 10:58:18 +00001616 if (strcmp(name, "namespace_prefixes") == 0) {
1617 if (PyObject_IsTrue(v))
1618 self->ns_prefixes = 1;
1619 else
1620 self->ns_prefixes = 0;
1621 XML_SetReturnNSTriplet(self->itself, self->ns_prefixes);
1622 return 0;
1623 }
Fred Drake85d835f2001-02-08 15:39:08 +00001624 if (strcmp(name, "ordered_attributes") == 0) {
1625 if (PyObject_IsTrue(v))
1626 self->ordered_attributes = 1;
1627 else
1628 self->ordered_attributes = 0;
1629 return 0;
1630 }
Fred Drake6f987622000-08-25 18:03:30 +00001631 if (strcmp(name, "returns_unicode") == 0) {
Fred Drake85d835f2001-02-08 15:39:08 +00001632 if (PyObject_IsTrue(v)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001633#ifndef Py_USING_UNICODE
Fred Drake71b63ff2002-06-28 22:29:01 +00001634 PyErr_SetString(PyExc_ValueError,
1635 "Unicode support not available");
Fred Drake6f987622000-08-25 18:03:30 +00001636 return -1;
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +00001637#else
Fred Drake6f987622000-08-25 18:03:30 +00001638 self->returns_unicode = 1;
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +00001639#endif
Fred Drake6f987622000-08-25 18:03:30 +00001640 }
1641 else
1642 self->returns_unicode = 0;
Fred Drake85d835f2001-02-08 15:39:08 +00001643 return 0;
1644 }
1645 if (strcmp(name, "specified_attributes") == 0) {
1646 if (PyObject_IsTrue(v))
1647 self->specified_attributes = 1;
1648 else
1649 self->specified_attributes = 0;
Fred Drake6f987622000-08-25 18:03:30 +00001650 return 0;
1651 }
Andrew M. Kuchlinge0a49b62008-01-08 14:30:55 +00001652
1653 if (strcmp(name, "buffer_size") == 0) {
1654 long new_buffer_size;
1655 if (!PyInt_Check(v)) {
1656 PyErr_SetString(PyExc_TypeError, "buffer_size must be an integer");
1657 return -1;
1658 }
1659
1660 new_buffer_size=PyInt_AS_LONG(v);
1661 /* trivial case -- no change */
1662 if (new_buffer_size == self->buffer_size) {
1663 return 0;
1664 }
1665
1666 if (new_buffer_size <= 0) {
1667 PyErr_SetString(PyExc_ValueError, "buffer_size must be greater than zero");
1668 return -1;
1669 }
1670
1671 /* check maximum */
1672 if (new_buffer_size > INT_MAX) {
1673 char errmsg[100];
1674 sprintf(errmsg, "buffer_size must not be greater than %i", INT_MAX);
1675 PyErr_SetString(PyExc_ValueError, errmsg);
1676 return -1;
1677 }
1678
1679 if (self->buffer != NULL) {
1680 /* there is already a buffer */
1681 if (self->buffer_used != 0) {
1682 flush_character_buffer(self);
1683 }
1684 /* free existing buffer */
1685 free(self->buffer);
1686 }
1687 self->buffer = malloc(new_buffer_size);
1688 if (self->buffer == NULL) {
1689 PyErr_NoMemory();
1690 return -1;
1691 }
1692 self->buffer_size = new_buffer_size;
1693 return 0;
1694 }
1695
Fred Drake2a3d7db2002-06-28 22:56:48 +00001696 if (strcmp(name, "CharacterDataHandler") == 0) {
1697 /* If we're changing the character data handler, flush all
1698 * cached data with the old handler. Not sure there's a
1699 * "right" thing to do, though, but this probably won't
1700 * happen.
1701 */
1702 if (flush_character_buffer(self) < 0)
1703 return -1;
1704 }
Fred Drake6f987622000-08-25 18:03:30 +00001705 if (sethandler(self, name, v)) {
1706 return 0;
1707 }
1708 PyErr_SetString(PyExc_AttributeError, name);
1709 return -1;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001710}
1711
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001712#ifdef WITH_CYCLE_GC
1713static int
1714xmlparse_traverse(xmlparseobject *op, visitproc visit, void *arg)
1715{
Neal Norwitz035b1852006-04-16 00:02:59 +00001716 int i;
Thomas Woutersc6e55062006-04-15 21:47:09 +00001717 for (i = 0; handler_info[i].name != NULL; i++)
1718 Py_VISIT(op->handlers[i]);
Fred Drakecde79132001-04-25 16:01:30 +00001719 return 0;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001720}
1721
1722static int
1723xmlparse_clear(xmlparseobject *op)
1724{
Martin v. Löwis5b68ce32001-10-21 08:53:52 +00001725 clear_handlers(op, 0);
Thomas Woutersedf17d82006-04-15 17:28:34 +00001726 Py_CLEAR(op->intern);
Fred Drakecde79132001-04-25 16:01:30 +00001727 return 0;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001728}
1729#endif
1730
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001731PyDoc_STRVAR(Xmlparsetype__doc__, "XML parser");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001732
1733static PyTypeObject Xmlparsetype = {
Martin v. Löwis68192102007-07-21 06:55:02 +00001734 PyVarObject_HEAD_INIT(NULL, 0)
Guido van Rossum14648392001-12-08 18:02:58 +00001735 "pyexpat.xmlparser", /*tp_name*/
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001736 sizeof(xmlparseobject) + PyGC_HEAD_SIZE,/*tp_basicsize*/
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +00001737 0, /*tp_itemsize*/
1738 /* methods */
1739 (destructor)xmlparse_dealloc, /*tp_dealloc*/
1740 (printfunc)0, /*tp_print*/
1741 (getattrfunc)xmlparse_getattr, /*tp_getattr*/
1742 (setattrfunc)xmlparse_setattr, /*tp_setattr*/
1743 (cmpfunc)0, /*tp_compare*/
1744 (reprfunc)0, /*tp_repr*/
1745 0, /*tp_as_number*/
1746 0, /*tp_as_sequence*/
1747 0, /*tp_as_mapping*/
1748 (hashfunc)0, /*tp_hash*/
1749 (ternaryfunc)0, /*tp_call*/
1750 (reprfunc)0, /*tp_str*/
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001751 0, /* tp_getattro */
1752 0, /* tp_setattro */
1753 0, /* tp_as_buffer */
Martin v. Löwis894258c2001-09-23 10:20:10 +00001754#ifdef Py_TPFLAGS_HAVE_GC
Fred Drake71b63ff2002-06-28 22:29:01 +00001755 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /*tp_flags*/
Martin v. Löwis894258c2001-09-23 10:20:10 +00001756#else
Fred Drake71b63ff2002-06-28 22:29:01 +00001757 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_GC, /*tp_flags*/
Martin v. Löwis894258c2001-09-23 10:20:10 +00001758#endif
Martin v. Löwis069dde22003-01-21 10:58:18 +00001759 Xmlparsetype__doc__, /* tp_doc - Documentation string */
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001760#ifdef WITH_CYCLE_GC
1761 (traverseproc)xmlparse_traverse, /* tp_traverse */
1762 (inquiry)xmlparse_clear /* tp_clear */
1763#else
1764 0, 0
1765#endif
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001766};
1767
1768/* End of code for xmlparser objects */
1769/* -------------------------------------------------------- */
1770
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001771PyDoc_STRVAR(pyexpat_ParserCreate__doc__,
Fred Drake0582df92000-07-12 04:49:00 +00001772"ParserCreate([encoding[, namespace_separator]]) -> parser\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001773Return a new XML parser object.");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001774
1775static PyObject *
Fred Drake0582df92000-07-12 04:49:00 +00001776pyexpat_ParserCreate(PyObject *notused, PyObject *args, PyObject *kw)
1777{
Fred Drakecde79132001-04-25 16:01:30 +00001778 char *encoding = NULL;
1779 char *namespace_separator = NULL;
Fred Drakeb91a36b2002-06-27 19:40:48 +00001780 PyObject *intern = NULL;
1781 PyObject *result;
1782 int intern_decref = 0;
Martin v. Löwis15e62742006-02-27 16:46:16 +00001783 static char *kwlist[] = {"encoding", "namespace_separator",
Jeremy Hyltonaf68c872005-12-10 18:50:16 +00001784 "intern", NULL};
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001785
Fred Drakeb91a36b2002-06-27 19:40:48 +00001786 if (!PyArg_ParseTupleAndKeywords(args, kw, "|zzO:ParserCreate", kwlist,
1787 &encoding, &namespace_separator, &intern))
Fred Drakecde79132001-04-25 16:01:30 +00001788 return NULL;
1789 if (namespace_separator != NULL
1790 && strlen(namespace_separator) > 1) {
1791 PyErr_SetString(PyExc_ValueError,
1792 "namespace_separator must be at most one"
1793 " character, omitted, or None");
1794 return NULL;
1795 }
Fred Drakeb91a36b2002-06-27 19:40:48 +00001796 /* Explicitly passing None means no interning is desired.
1797 Not passing anything means that a new dictionary is used. */
1798 if (intern == Py_None)
1799 intern = NULL;
1800 else if (intern == NULL) {
1801 intern = PyDict_New();
1802 if (!intern)
1803 return NULL;
1804 intern_decref = 1;
Fred Drake71b63ff2002-06-28 22:29:01 +00001805 }
Fred Drakeb91a36b2002-06-27 19:40:48 +00001806 else if (!PyDict_Check(intern)) {
1807 PyErr_SetString(PyExc_TypeError, "intern must be a dictionary");
1808 return NULL;
1809 }
1810
1811 result = newxmlparseobject(encoding, namespace_separator, intern);
1812 if (intern_decref) {
1813 Py_DECREF(intern);
1814 }
1815 return result;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001816}
1817
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001818PyDoc_STRVAR(pyexpat_ErrorString__doc__,
Fred Drake0582df92000-07-12 04:49:00 +00001819"ErrorString(errno) -> string\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001820Returns string error for given number.");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001821
1822static PyObject *
Fred Drake0582df92000-07-12 04:49:00 +00001823pyexpat_ErrorString(PyObject *self, PyObject *args)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001824{
Fred Drake0582df92000-07-12 04:49:00 +00001825 long code = 0;
1826
1827 if (!PyArg_ParseTuple(args, "l:ErrorString", &code))
1828 return NULL;
1829 return Py_BuildValue("z", XML_ErrorString((int)code));
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001830}
1831
1832/* List of methods defined in the module */
1833
1834static struct PyMethodDef pyexpat_methods[] = {
Fred Drake0582df92000-07-12 04:49:00 +00001835 {"ParserCreate", (PyCFunction)pyexpat_ParserCreate,
1836 METH_VARARGS|METH_KEYWORDS, pyexpat_ParserCreate__doc__},
1837 {"ErrorString", (PyCFunction)pyexpat_ErrorString,
1838 METH_VARARGS, pyexpat_ErrorString__doc__},
Fred Drake71b63ff2002-06-28 22:29:01 +00001839
Fred Drake0582df92000-07-12 04:49:00 +00001840 {NULL, (PyCFunction)NULL, 0, NULL} /* sentinel */
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001841};
1842
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +00001843/* Module docstring */
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001844
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001845PyDoc_STRVAR(pyexpat_module_documentation,
1846"Python wrapper for Expat parser.");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001847
Fred Drake4113b132001-03-24 19:58:26 +00001848/* Return a Python string that represents the version number without the
1849 * extra cruft added by revision control, even if the right options were
1850 * given to the "cvs export" command to make it not include the extra
1851 * cruft.
1852 */
1853static PyObject *
1854get_version_string(void)
1855{
1856 static char *rcsid = "$Revision$";
1857 char *rev = rcsid;
1858 int i = 0;
1859
Neal Norwitz30b5c5d2005-12-19 06:05:18 +00001860 while (!isdigit(Py_CHARMASK(*rev)))
Fred Drake4113b132001-03-24 19:58:26 +00001861 ++rev;
1862 while (rev[i] != ' ' && rev[i] != '\0')
1863 ++i;
1864
Gregory P. Smithdd96db62008-06-09 04:58:54 +00001865 return PyString_FromStringAndSize(rev, i);
Fred Drake4113b132001-03-24 19:58:26 +00001866}
1867
Fred Drakecde79132001-04-25 16:01:30 +00001868/* Initialization function for the module */
1869
1870#ifndef MODULE_NAME
1871#define MODULE_NAME "pyexpat"
1872#endif
1873
1874#ifndef MODULE_INITFUNC
1875#define MODULE_INITFUNC initpyexpat
1876#endif
1877
Martin v. Löwis069dde22003-01-21 10:58:18 +00001878#ifndef PyMODINIT_FUNC
1879# ifdef MS_WINDOWS
1880# define PyMODINIT_FUNC __declspec(dllexport) void
1881# else
1882# define PyMODINIT_FUNC void
1883# endif
1884#endif
1885
Mark Hammond8235ea12002-07-19 06:55:41 +00001886PyMODINIT_FUNC MODULE_INITFUNC(void); /* avoid compiler warnings */
Fred Drakecde79132001-04-25 16:01:30 +00001887
Martin v. Löwis069dde22003-01-21 10:58:18 +00001888PyMODINIT_FUNC
1889MODULE_INITFUNC(void)
Fred Drake0582df92000-07-12 04:49:00 +00001890{
1891 PyObject *m, *d;
Gregory P. Smithdd96db62008-06-09 04:58:54 +00001892 PyObject *errmod_name = PyString_FromString(MODULE_NAME ".errors");
Fred Drake85d835f2001-02-08 15:39:08 +00001893 PyObject *errors_module;
1894 PyObject *modelmod_name;
1895 PyObject *model_module;
Fred Drake0582df92000-07-12 04:49:00 +00001896 PyObject *sys_modules;
Fredrik Lundhd7a42882005-12-13 20:43:04 +00001897 static struct PyExpat_CAPI capi;
1898 PyObject* capi_object;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001899
Fred Drake6f987622000-08-25 18:03:30 +00001900 if (errmod_name == NULL)
1901 return;
Gregory P. Smithdd96db62008-06-09 04:58:54 +00001902 modelmod_name = PyString_FromString(MODULE_NAME ".model");
Fred Drake85d835f2001-02-08 15:39:08 +00001903 if (modelmod_name == NULL)
1904 return;
Fred Drake6f987622000-08-25 18:03:30 +00001905
Christian Heimese93237d2007-12-19 02:37:44 +00001906 Py_TYPE(&Xmlparsetype) = &PyType_Type;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001907
Fred Drake0582df92000-07-12 04:49:00 +00001908 /* Create the module and add the functions */
Fred Drakecde79132001-04-25 16:01:30 +00001909 m = Py_InitModule3(MODULE_NAME, pyexpat_methods,
Fred Drake85d835f2001-02-08 15:39:08 +00001910 pyexpat_module_documentation);
Neal Norwitz1ac754f2006-01-19 06:09:39 +00001911 if (m == NULL)
1912 return;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001913
Fred Drake0582df92000-07-12 04:49:00 +00001914 /* Add some symbolic constants to the module */
Fred Drakebd6101c2001-02-14 18:29:45 +00001915 if (ErrorObject == NULL) {
1916 ErrorObject = PyErr_NewException("xml.parsers.expat.ExpatError",
Fred Drake93adb692000-09-23 04:55:48 +00001917 NULL, NULL);
Fred Drakebd6101c2001-02-14 18:29:45 +00001918 if (ErrorObject == NULL)
1919 return;
1920 }
1921 Py_INCREF(ErrorObject);
Fred Drake93adb692000-09-23 04:55:48 +00001922 PyModule_AddObject(m, "error", ErrorObject);
Fred Drakebd6101c2001-02-14 18:29:45 +00001923 Py_INCREF(ErrorObject);
1924 PyModule_AddObject(m, "ExpatError", ErrorObject);
Fred Drake4ba298c2000-10-29 04:57:53 +00001925 Py_INCREF(&Xmlparsetype);
1926 PyModule_AddObject(m, "XMLParserType", (PyObject *) &Xmlparsetype);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001927
Fred Drake4113b132001-03-24 19:58:26 +00001928 PyModule_AddObject(m, "__version__", get_version_string());
Fred Drake738293d2000-12-21 17:25:07 +00001929 PyModule_AddStringConstant(m, "EXPAT_VERSION",
1930 (char *) XML_ExpatVersion());
Fred Drake85d835f2001-02-08 15:39:08 +00001931 {
1932 XML_Expat_Version info = XML_ExpatVersionInfo();
1933 PyModule_AddObject(m, "version_info",
1934 Py_BuildValue("(iii)", info.major,
1935 info.minor, info.micro));
1936 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001937#ifdef Py_USING_UNICODE
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001938 init_template_buffer();
1939#endif
Fred Drake0582df92000-07-12 04:49:00 +00001940 /* XXX When Expat supports some way of figuring out how it was
Fred Drake71b63ff2002-06-28 22:29:01 +00001941 compiled, this should check and set native_encoding
1942 appropriately.
Fred Drake0582df92000-07-12 04:49:00 +00001943 */
Fred Drake93adb692000-09-23 04:55:48 +00001944 PyModule_AddStringConstant(m, "native_encoding", "UTF-8");
Fred Drakec23b5232000-08-24 21:57:43 +00001945
Fred Drake85d835f2001-02-08 15:39:08 +00001946 sys_modules = PySys_GetObject("modules");
Fred Drake93adb692000-09-23 04:55:48 +00001947 d = PyModule_GetDict(m);
Fred Drake6f987622000-08-25 18:03:30 +00001948 errors_module = PyDict_GetItem(d, errmod_name);
1949 if (errors_module == NULL) {
Fred Drakecde79132001-04-25 16:01:30 +00001950 errors_module = PyModule_New(MODULE_NAME ".errors");
Fred Drake6f987622000-08-25 18:03:30 +00001951 if (errors_module != NULL) {
Fred Drake6f987622000-08-25 18:03:30 +00001952 PyDict_SetItem(sys_modules, errmod_name, errors_module);
Fred Drake93adb692000-09-23 04:55:48 +00001953 /* gives away the reference to errors_module */
1954 PyModule_AddObject(m, "errors", errors_module);
Fred Drakec23b5232000-08-24 21:57:43 +00001955 }
1956 }
Fred Drake6f987622000-08-25 18:03:30 +00001957 Py_DECREF(errmod_name);
Fred Drake85d835f2001-02-08 15:39:08 +00001958 model_module = PyDict_GetItem(d, modelmod_name);
1959 if (model_module == NULL) {
Fred Drakecde79132001-04-25 16:01:30 +00001960 model_module = PyModule_New(MODULE_NAME ".model");
Fred Drake85d835f2001-02-08 15:39:08 +00001961 if (model_module != NULL) {
1962 PyDict_SetItem(sys_modules, modelmod_name, model_module);
1963 /* gives away the reference to model_module */
1964 PyModule_AddObject(m, "model", model_module);
1965 }
1966 }
1967 Py_DECREF(modelmod_name);
1968 if (errors_module == NULL || model_module == NULL)
1969 /* Don't core dump later! */
Fred Drake6f987622000-08-25 18:03:30 +00001970 return;
Martin v. Löwis069dde22003-01-21 10:58:18 +00001971
Martin v. Löwisc847f402003-01-21 11:09:21 +00001972#if XML_COMBINED_VERSION > 19505
Martin v. Löwis069dde22003-01-21 10:58:18 +00001973 {
1974 const XML_Feature *features = XML_GetFeatureList();
1975 PyObject *list = PyList_New(0);
1976 if (list == NULL)
1977 /* just ignore it */
1978 PyErr_Clear();
1979 else {
1980 int i = 0;
1981 for (; features[i].feature != XML_FEATURE_END; ++i) {
1982 int ok;
1983 PyObject *item = Py_BuildValue("si", features[i].name,
1984 features[i].value);
1985 if (item == NULL) {
1986 Py_DECREF(list);
1987 list = NULL;
1988 break;
1989 }
1990 ok = PyList_Append(list, item);
1991 Py_DECREF(item);
1992 if (ok < 0) {
1993 PyErr_Clear();
1994 break;
1995 }
1996 }
1997 if (list != NULL)
1998 PyModule_AddObject(m, "features", list);
1999 }
2000 }
Martin v. Löwisc847f402003-01-21 11:09:21 +00002001#endif
Fred Drake6f987622000-08-25 18:03:30 +00002002
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00002003#define MYCONST(name) \
Fred Drake93adb692000-09-23 04:55:48 +00002004 PyModule_AddStringConstant(errors_module, #name, \
2005 (char*)XML_ErrorString(name))
Fred Drake7bd9f412000-07-04 23:51:31 +00002006
Fred Drake0582df92000-07-12 04:49:00 +00002007 MYCONST(XML_ERROR_NO_MEMORY);
2008 MYCONST(XML_ERROR_SYNTAX);
2009 MYCONST(XML_ERROR_NO_ELEMENTS);
2010 MYCONST(XML_ERROR_INVALID_TOKEN);
2011 MYCONST(XML_ERROR_UNCLOSED_TOKEN);
2012 MYCONST(XML_ERROR_PARTIAL_CHAR);
2013 MYCONST(XML_ERROR_TAG_MISMATCH);
2014 MYCONST(XML_ERROR_DUPLICATE_ATTRIBUTE);
2015 MYCONST(XML_ERROR_JUNK_AFTER_DOC_ELEMENT);
2016 MYCONST(XML_ERROR_PARAM_ENTITY_REF);
2017 MYCONST(XML_ERROR_UNDEFINED_ENTITY);
2018 MYCONST(XML_ERROR_RECURSIVE_ENTITY_REF);
2019 MYCONST(XML_ERROR_ASYNC_ENTITY);
2020 MYCONST(XML_ERROR_BAD_CHAR_REF);
2021 MYCONST(XML_ERROR_BINARY_ENTITY_REF);
2022 MYCONST(XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF);
2023 MYCONST(XML_ERROR_MISPLACED_XML_PI);
2024 MYCONST(XML_ERROR_UNKNOWN_ENCODING);
2025 MYCONST(XML_ERROR_INCORRECT_ENCODING);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00002026 MYCONST(XML_ERROR_UNCLOSED_CDATA_SECTION);
2027 MYCONST(XML_ERROR_EXTERNAL_ENTITY_HANDLING);
2028 MYCONST(XML_ERROR_NOT_STANDALONE);
Fred Drake283b6702004-08-04 22:28:16 +00002029 MYCONST(XML_ERROR_UNEXPECTED_STATE);
2030 MYCONST(XML_ERROR_ENTITY_DECLARED_IN_PE);
2031 MYCONST(XML_ERROR_FEATURE_REQUIRES_XML_DTD);
2032 MYCONST(XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING);
2033 /* Added in Expat 1.95.7. */
2034 MYCONST(XML_ERROR_UNBOUND_PREFIX);
2035 /* Added in Expat 1.95.8. */
2036 MYCONST(XML_ERROR_UNDECLARING_PREFIX);
2037 MYCONST(XML_ERROR_INCOMPLETE_PE);
2038 MYCONST(XML_ERROR_XML_DECL);
2039 MYCONST(XML_ERROR_TEXT_DECL);
2040 MYCONST(XML_ERROR_PUBLICID);
2041 MYCONST(XML_ERROR_SUSPENDED);
2042 MYCONST(XML_ERROR_NOT_SUSPENDED);
2043 MYCONST(XML_ERROR_ABORTED);
2044 MYCONST(XML_ERROR_FINISHED);
2045 MYCONST(XML_ERROR_SUSPEND_PE);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00002046
Fred Drake85d835f2001-02-08 15:39:08 +00002047 PyModule_AddStringConstant(errors_module, "__doc__",
2048 "Constants used to describe error conditions.");
2049
Fred Drake93adb692000-09-23 04:55:48 +00002050#undef MYCONST
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00002051
Fred Drake85d835f2001-02-08 15:39:08 +00002052#define MYCONST(c) PyModule_AddIntConstant(m, #c, c)
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00002053 MYCONST(XML_PARAM_ENTITY_PARSING_NEVER);
2054 MYCONST(XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE);
2055 MYCONST(XML_PARAM_ENTITY_PARSING_ALWAYS);
Fred Drake85d835f2001-02-08 15:39:08 +00002056#undef MYCONST
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00002057
Fred Drake85d835f2001-02-08 15:39:08 +00002058#define MYCONST(c) PyModule_AddIntConstant(model_module, #c, c)
2059 PyModule_AddStringConstant(model_module, "__doc__",
2060 "Constants used to interpret content model information.");
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00002061
Fred Drake85d835f2001-02-08 15:39:08 +00002062 MYCONST(XML_CTYPE_EMPTY);
2063 MYCONST(XML_CTYPE_ANY);
2064 MYCONST(XML_CTYPE_MIXED);
2065 MYCONST(XML_CTYPE_NAME);
2066 MYCONST(XML_CTYPE_CHOICE);
2067 MYCONST(XML_CTYPE_SEQ);
2068
2069 MYCONST(XML_CQUANT_NONE);
2070 MYCONST(XML_CQUANT_OPT);
2071 MYCONST(XML_CQUANT_REP);
2072 MYCONST(XML_CQUANT_PLUS);
2073#undef MYCONST
Fredrik Lundhc3345042005-12-13 19:49:55 +00002074
2075 /* initialize pyexpat dispatch table */
Fredrik Lundhd7a42882005-12-13 20:43:04 +00002076 capi.size = sizeof(capi);
Fredrik Lundhcc117db2005-12-13 21:55:36 +00002077 capi.magic = PyExpat_CAPI_MAGIC;
Fredrik Lundhd7a42882005-12-13 20:43:04 +00002078 capi.MAJOR_VERSION = XML_MAJOR_VERSION;
2079 capi.MINOR_VERSION = XML_MINOR_VERSION;
2080 capi.MICRO_VERSION = XML_MICRO_VERSION;
2081 capi.ErrorString = XML_ErrorString;
Fredrik Lundhcc117db2005-12-13 21:55:36 +00002082 capi.GetErrorCode = XML_GetErrorCode;
2083 capi.GetErrorColumnNumber = XML_GetErrorColumnNumber;
2084 capi.GetErrorLineNumber = XML_GetErrorLineNumber;
Fredrik Lundhd7a42882005-12-13 20:43:04 +00002085 capi.Parse = XML_Parse;
2086 capi.ParserCreate_MM = XML_ParserCreate_MM;
2087 capi.ParserFree = XML_ParserFree;
2088 capi.SetCharacterDataHandler = XML_SetCharacterDataHandler;
2089 capi.SetCommentHandler = XML_SetCommentHandler;
2090 capi.SetDefaultHandlerExpand = XML_SetDefaultHandlerExpand;
2091 capi.SetElementHandler = XML_SetElementHandler;
2092 capi.SetNamespaceDeclHandler = XML_SetNamespaceDeclHandler;
2093 capi.SetProcessingInstructionHandler = XML_SetProcessingInstructionHandler;
2094 capi.SetUnknownEncodingHandler = XML_SetUnknownEncodingHandler;
2095 capi.SetUserData = XML_SetUserData;
Fredrik Lundhc3345042005-12-13 19:49:55 +00002096
2097 /* export as cobject */
Fredrik Lundhcc117db2005-12-13 21:55:36 +00002098 capi_object = PyCObject_FromVoidPtr(&capi, NULL);
Fredrik Lundhd7a42882005-12-13 20:43:04 +00002099 if (capi_object)
2100 PyModule_AddObject(m, "expat_CAPI", capi_object);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00002101}
2102
Fred Drake6f987622000-08-25 18:03:30 +00002103static void
Martin v. Löwis5b68ce32001-10-21 08:53:52 +00002104clear_handlers(xmlparseobject *self, int initial)
Fred Drake0582df92000-07-12 04:49:00 +00002105{
Fred Drakecde79132001-04-25 16:01:30 +00002106 int i = 0;
2107 PyObject *temp;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00002108
Fred Drake71b63ff2002-06-28 22:29:01 +00002109 for (; handler_info[i].name != NULL; i++) {
Martin v. Löwis5b68ce32001-10-21 08:53:52 +00002110 if (initial)
Fred Drake71b63ff2002-06-28 22:29:01 +00002111 self->handlers[i] = NULL;
Martin v. Löwis5b68ce32001-10-21 08:53:52 +00002112 else {
Fred Drakecde79132001-04-25 16:01:30 +00002113 temp = self->handlers[i];
2114 self->handlers[i] = NULL;
2115 Py_XDECREF(temp);
Martin v. Löwis5b68ce32001-10-21 08:53:52 +00002116 handler_info[i].setter(self->itself, NULL);
Fred Drakecde79132001-04-25 16:01:30 +00002117 }
Fred Drakecde79132001-04-25 16:01:30 +00002118 }
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00002119}
2120
Tim Peters0c322792002-07-17 16:49:03 +00002121static struct HandlerInfo handler_info[] = {
Fred Drake71b63ff2002-06-28 22:29:01 +00002122 {"StartElementHandler",
2123 (xmlhandlersetter)XML_SetStartElementHandler,
Fred Drake0582df92000-07-12 04:49:00 +00002124 (xmlhandler)my_StartElementHandler},
Fred Drake71b63ff2002-06-28 22:29:01 +00002125 {"EndElementHandler",
2126 (xmlhandlersetter)XML_SetEndElementHandler,
Fred Drake0582df92000-07-12 04:49:00 +00002127 (xmlhandler)my_EndElementHandler},
Fred Drake71b63ff2002-06-28 22:29:01 +00002128 {"ProcessingInstructionHandler",
Fred Drake0582df92000-07-12 04:49:00 +00002129 (xmlhandlersetter)XML_SetProcessingInstructionHandler,
2130 (xmlhandler)my_ProcessingInstructionHandler},
Fred Drake71b63ff2002-06-28 22:29:01 +00002131 {"CharacterDataHandler",
Fred Drake0582df92000-07-12 04:49:00 +00002132 (xmlhandlersetter)XML_SetCharacterDataHandler,
2133 (xmlhandler)my_CharacterDataHandler},
Fred Drake71b63ff2002-06-28 22:29:01 +00002134 {"UnparsedEntityDeclHandler",
Fred Drake0582df92000-07-12 04:49:00 +00002135 (xmlhandlersetter)XML_SetUnparsedEntityDeclHandler,
Fred Drake2a3d7db2002-06-28 22:56:48 +00002136 (xmlhandler)my_UnparsedEntityDeclHandler},
Fred Drake71b63ff2002-06-28 22:29:01 +00002137 {"NotationDeclHandler",
Fred Drake0582df92000-07-12 04:49:00 +00002138 (xmlhandlersetter)XML_SetNotationDeclHandler,
Fred Drake2a3d7db2002-06-28 22:56:48 +00002139 (xmlhandler)my_NotationDeclHandler},
Fred Drake71b63ff2002-06-28 22:29:01 +00002140 {"StartNamespaceDeclHandler",
2141 (xmlhandlersetter)XML_SetStartNamespaceDeclHandler,
Fred Drake2a3d7db2002-06-28 22:56:48 +00002142 (xmlhandler)my_StartNamespaceDeclHandler},
Fred Drake71b63ff2002-06-28 22:29:01 +00002143 {"EndNamespaceDeclHandler",
2144 (xmlhandlersetter)XML_SetEndNamespaceDeclHandler,
Fred Drake2a3d7db2002-06-28 22:56:48 +00002145 (xmlhandler)my_EndNamespaceDeclHandler},
Fred Drake0582df92000-07-12 04:49:00 +00002146 {"CommentHandler",
2147 (xmlhandlersetter)XML_SetCommentHandler,
2148 (xmlhandler)my_CommentHandler},
2149 {"StartCdataSectionHandler",
Fred Drake71b63ff2002-06-28 22:29:01 +00002150 (xmlhandlersetter)XML_SetStartCdataSectionHandler,
Fred Drake0582df92000-07-12 04:49:00 +00002151 (xmlhandler)my_StartCdataSectionHandler},
2152 {"EndCdataSectionHandler",
Fred Drake71b63ff2002-06-28 22:29:01 +00002153 (xmlhandlersetter)XML_SetEndCdataSectionHandler,
Fred Drake0582df92000-07-12 04:49:00 +00002154 (xmlhandler)my_EndCdataSectionHandler},
2155 {"DefaultHandler",
2156 (xmlhandlersetter)XML_SetDefaultHandler,
2157 (xmlhandler)my_DefaultHandler},
2158 {"DefaultHandlerExpand",
2159 (xmlhandlersetter)XML_SetDefaultHandlerExpand,
2160 (xmlhandler)my_DefaultHandlerExpandHandler},
2161 {"NotStandaloneHandler",
2162 (xmlhandlersetter)XML_SetNotStandaloneHandler,
2163 (xmlhandler)my_NotStandaloneHandler},
2164 {"ExternalEntityRefHandler",
2165 (xmlhandlersetter)XML_SetExternalEntityRefHandler,
Fred Drake2a3d7db2002-06-28 22:56:48 +00002166 (xmlhandler)my_ExternalEntityRefHandler},
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00002167 {"StartDoctypeDeclHandler",
Fred Drake71b63ff2002-06-28 22:29:01 +00002168 (xmlhandlersetter)XML_SetStartDoctypeDeclHandler,
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00002169 (xmlhandler)my_StartDoctypeDeclHandler},
2170 {"EndDoctypeDeclHandler",
Fred Drake71b63ff2002-06-28 22:29:01 +00002171 (xmlhandlersetter)XML_SetEndDoctypeDeclHandler,
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00002172 (xmlhandler)my_EndDoctypeDeclHandler},
Fred Drake85d835f2001-02-08 15:39:08 +00002173 {"EntityDeclHandler",
2174 (xmlhandlersetter)XML_SetEntityDeclHandler,
2175 (xmlhandler)my_EntityDeclHandler},
2176 {"XmlDeclHandler",
2177 (xmlhandlersetter)XML_SetXmlDeclHandler,
2178 (xmlhandler)my_XmlDeclHandler},
2179 {"ElementDeclHandler",
2180 (xmlhandlersetter)XML_SetElementDeclHandler,
2181 (xmlhandler)my_ElementDeclHandler},
2182 {"AttlistDeclHandler",
2183 (xmlhandlersetter)XML_SetAttlistDeclHandler,
2184 (xmlhandler)my_AttlistDeclHandler},
Martin v. Löwisc847f402003-01-21 11:09:21 +00002185#if XML_COMBINED_VERSION >= 19504
Martin v. Löwis069dde22003-01-21 10:58:18 +00002186 {"SkippedEntityHandler",
2187 (xmlhandlersetter)XML_SetSkippedEntityHandler,
2188 (xmlhandler)my_SkippedEntityHandler},
Martin v. Löwisc847f402003-01-21 11:09:21 +00002189#endif
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00002190
Fred Drake0582df92000-07-12 04:49:00 +00002191 {NULL, NULL, NULL} /* sentinel */
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00002192};