blob: 3b4ccfd0696d0ae89ba890396986c6e0620359cc [file] [log] [blame]
Martin v. Löwis7090ed12001-09-19 10:37:50 +00001#include "Python.h"
Fred Drake4113b132001-03-24 19:58:26 +00002#include <ctype.h>
3
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00004#include "frameobject.h"
Fred Drakea77254a2000-09-29 19:23:29 +00005#include "expat.h"
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00006
Fredrik Lundhc3345042005-12-13 19:49:55 +00007#include "pyexpat.h"
8
Martin v. Löwisc847f402003-01-21 11:09:21 +00009#define XML_COMBINED_VERSION (10000*XML_MAJOR_VERSION+100*XML_MINOR_VERSION+XML_MICRO_VERSION)
10
Martin v. Löwisb4fcf4d2002-06-30 06:40:55 +000011#ifndef PyDoc_STRVAR
Martin v. Löwis069dde22003-01-21 10:58:18 +000012
13/*
14 * fdrake says:
15 * Don't change the PyDoc_STR macro definition to (str), because
16 * '''the parentheses cause compile failures
17 * ("non-constant static initializer" or something like that)
18 * on some platforms (Irix?)'''
19 */
Fred Drakef57b22a2002-09-02 15:54:06 +000020#define PyDoc_STR(str) str
Fred Drake7c75bf22002-07-01 14:02:31 +000021#define PyDoc_VAR(name) static char name[]
Martin v. Löwisb4fcf4d2002-06-30 06:40:55 +000022#define PyDoc_STRVAR(name,str) PyDoc_VAR(name) = PyDoc_STR(str)
Martin v. Löwis0078f6c2001-01-21 10:18:10 +000023#endif
24
Martin v. Löwisb4fcf4d2002-06-30 06:40:55 +000025#if (PY_MAJOR_VERSION == 2 && PY_MINOR_VERSION < 2)
26/* In Python 2.0 and 2.1, disabling Unicode was not possible. */
Martin v. Löwis339d0f72001-08-17 18:39:25 +000027#define Py_USING_UNICODE
Jeremy Hylton9263f572003-06-27 16:13:17 +000028#else
29#define FIX_TRACE
Martin v. Löwis339d0f72001-08-17 18:39:25 +000030#endif
31
Fred Drake0582df92000-07-12 04:49:00 +000032enum HandlerTypes {
33 StartElement,
34 EndElement,
35 ProcessingInstruction,
36 CharacterData,
37 UnparsedEntityDecl,
38 NotationDecl,
39 StartNamespaceDecl,
40 EndNamespaceDecl,
41 Comment,
42 StartCdataSection,
43 EndCdataSection,
44 Default,
45 DefaultHandlerExpand,
46 NotStandalone,
Martin v. Löwis0078f6c2001-01-21 10:18:10 +000047 ExternalEntityRef,
48 StartDoctypeDecl,
49 EndDoctypeDecl,
Fred Drake85d835f2001-02-08 15:39:08 +000050 EntityDecl,
51 XmlDecl,
52 ElementDecl,
53 AttlistDecl,
Martin v. Löwisc847f402003-01-21 11:09:21 +000054#if XML_COMBINED_VERSION >= 19504
Martin v. Löwis069dde22003-01-21 10:58:18 +000055 SkippedEntity,
Martin v. Löwisc847f402003-01-21 11:09:21 +000056#endif
Fred Drake85d835f2001-02-08 15:39:08 +000057 _DummyDecl
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000058};
59
60static PyObject *ErrorObject;
61
62/* ----------------------------------------------------- */
63
64/* Declarations for objects of type xmlparser */
65
66typedef struct {
Fred Drake0582df92000-07-12 04:49:00 +000067 PyObject_HEAD
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000068
Fred Drake0582df92000-07-12 04:49:00 +000069 XML_Parser itself;
Fred Drake85d835f2001-02-08 15:39:08 +000070 int returns_unicode; /* True if Unicode strings are returned;
71 if false, UTF-8 strings are returned */
72 int ordered_attributes; /* Return attributes as a list. */
73 int specified_attributes; /* Report only specified attributes. */
Fred Drakebd6101c2001-02-14 18:29:45 +000074 int in_callback; /* Is a callback active? */
Martin v. Löwis069dde22003-01-21 10:58:18 +000075 int ns_prefixes; /* Namespace-triplets mode? */
Fred Drake2a3d7db2002-06-28 22:56:48 +000076 XML_Char *buffer; /* Buffer used when accumulating characters */
77 /* NULL if not enabled */
78 int buffer_size; /* Size of buffer, in XML_Char units */
79 int buffer_used; /* Buffer units in use */
Fred Drakeb91a36b2002-06-27 19:40:48 +000080 PyObject *intern; /* Dictionary to intern strings */
Fred Drake0582df92000-07-12 04:49:00 +000081 PyObject **handlers;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000082} xmlparseobject;
83
Fred Drake2a3d7db2002-06-28 22:56:48 +000084#define CHARACTER_DATA_BUFFER_SIZE 8192
85
Jeremy Hylton938ace62002-07-17 16:30:39 +000086static PyTypeObject Xmlparsetype;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000087
Fred Drake117ac852002-09-24 16:24:54 +000088typedef void (*xmlhandlersetter)(XML_Parser self, void *meth);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000089typedef void* xmlhandler;
90
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +000091struct HandlerInfo {
Fred Drake0582df92000-07-12 04:49:00 +000092 const char *name;
93 xmlhandlersetter setter;
94 xmlhandler handler;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +000095 PyCodeObject *tb_code;
Fred Drake71b63ff2002-06-28 22:29:01 +000096 PyObject *nameobj;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000097};
98
Jeremy Hylton938ace62002-07-17 16:30:39 +000099static struct HandlerInfo handler_info[64];
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000100
Fred Drakebd6101c2001-02-14 18:29:45 +0000101/* Set an integer attribute on the error object; return true on success,
102 * false on an exception.
103 */
104static int
105set_error_attr(PyObject *err, char *name, int value)
106{
107 PyObject *v = PyInt_FromLong(value);
Fred Drake85d835f2001-02-08 15:39:08 +0000108
Neal Norwitz2f5e9902006-03-08 06:36:45 +0000109 if (v == NULL || PyObject_SetAttrString(err, name, v) == -1) {
110 Py_XDECREF(v);
Fred Drakebd6101c2001-02-14 18:29:45 +0000111 return 0;
112 }
Michael W. Hudson0bb84542004-08-03 11:31:31 +0000113 Py_DECREF(v);
Fred Drakebd6101c2001-02-14 18:29:45 +0000114 return 1;
115}
116
117/* Build and set an Expat exception, including positioning
118 * information. Always returns NULL.
119 */
Fred Drake85d835f2001-02-08 15:39:08 +0000120static PyObject *
Martin v. Löwis069dde22003-01-21 10:58:18 +0000121set_error(xmlparseobject *self, enum XML_Error code)
Fred Drake85d835f2001-02-08 15:39:08 +0000122{
123 PyObject *err;
124 char buffer[256];
125 XML_Parser parser = self->itself;
Fred Drakebd6101c2001-02-14 18:29:45 +0000126 int lineno = XML_GetErrorLineNumber(parser);
127 int column = XML_GetErrorColumnNumber(parser);
Fred Drake85d835f2001-02-08 15:39:08 +0000128
Martin v. Löwis6b2cf0e2002-06-30 06:03:35 +0000129 /* There is no risk of overflowing this buffer, since
130 even for 64-bit integers, there is sufficient space. */
131 sprintf(buffer, "%.200s: line %i, column %i",
Fred Drakebd6101c2001-02-14 18:29:45 +0000132 XML_ErrorString(code), lineno, column);
Fred Drake85d835f2001-02-08 15:39:08 +0000133 err = PyObject_CallFunction(ErrorObject, "s", buffer);
Fred Drakebd6101c2001-02-14 18:29:45 +0000134 if ( err != NULL
135 && set_error_attr(err, "code", code)
136 && set_error_attr(err, "offset", column)
137 && set_error_attr(err, "lineno", lineno)) {
138 PyErr_SetObject(ErrorObject, err);
Fred Drake85d835f2001-02-08 15:39:08 +0000139 }
Neal Norwitz2f5e9902006-03-08 06:36:45 +0000140 Py_XDECREF(err);
Fred Drake85d835f2001-02-08 15:39:08 +0000141 return NULL;
142}
143
Fred Drake71b63ff2002-06-28 22:29:01 +0000144static int
145have_handler(xmlparseobject *self, int type)
146{
147 PyObject *handler = self->handlers[type];
148 return handler != NULL;
149}
150
151static PyObject *
152get_handler_name(struct HandlerInfo *hinfo)
153{
154 PyObject *name = hinfo->nameobj;
155 if (name == NULL) {
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000156 name = PyString_FromString(hinfo->name);
Fred Drake71b63ff2002-06-28 22:29:01 +0000157 hinfo->nameobj = name;
158 }
159 Py_XINCREF(name);
160 return name;
161}
162
Fred Drake85d835f2001-02-08 15:39:08 +0000163
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000164#ifdef Py_USING_UNICODE
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000165/* Convert a string of XML_Chars into a Unicode string.
166 Returns None if str is a null pointer. */
167
Fred Drake0582df92000-07-12 04:49:00 +0000168static PyObject *
Fred Drakeb91a36b2002-06-27 19:40:48 +0000169conv_string_to_unicode(const XML_Char *str)
Fred Drake0582df92000-07-12 04:49:00 +0000170{
Fred Drake71b63ff2002-06-28 22:29:01 +0000171 /* XXX currently this code assumes that XML_Char is 8-bit,
Fred Drake0582df92000-07-12 04:49:00 +0000172 and hence in UTF-8. */
173 /* UTF-8 from Expat, Unicode desired */
174 if (str == NULL) {
175 Py_INCREF(Py_None);
176 return Py_None;
177 }
Fred Drake71b63ff2002-06-28 22:29:01 +0000178 return PyUnicode_DecodeUTF8(str, strlen(str), "strict");
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000179}
180
Fred Drake0582df92000-07-12 04:49:00 +0000181static PyObject *
182conv_string_len_to_unicode(const XML_Char *str, int len)
183{
Fred Drake71b63ff2002-06-28 22:29:01 +0000184 /* XXX currently this code assumes that XML_Char is 8-bit,
Fred Drake0582df92000-07-12 04:49:00 +0000185 and hence in UTF-8. */
186 /* UTF-8 from Expat, Unicode desired */
187 if (str == NULL) {
188 Py_INCREF(Py_None);
189 return Py_None;
190 }
Fred Drake6f987622000-08-25 18:03:30 +0000191 return PyUnicode_DecodeUTF8((const char *)str, len, "strict");
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000192}
193#endif
194
195/* Convert a string of XML_Chars into an 8-bit Python string.
196 Returns None if str is a null pointer. */
197
Fred Drake6f987622000-08-25 18:03:30 +0000198static PyObject *
Fred Drakeb91a36b2002-06-27 19:40:48 +0000199conv_string_to_utf8(const XML_Char *str)
Fred Drake6f987622000-08-25 18:03:30 +0000200{
Fred Drake71b63ff2002-06-28 22:29:01 +0000201 /* XXX currently this code assumes that XML_Char is 8-bit,
Fred Drake6f987622000-08-25 18:03:30 +0000202 and hence in UTF-8. */
203 /* UTF-8 from Expat, UTF-8 desired */
204 if (str == NULL) {
205 Py_INCREF(Py_None);
206 return Py_None;
207 }
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000208 return PyString_FromString(str);
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000209}
210
Fred Drake6f987622000-08-25 18:03:30 +0000211static PyObject *
Fred Drake71b63ff2002-06-28 22:29:01 +0000212conv_string_len_to_utf8(const XML_Char *str, int len)
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000213{
Fred Drake71b63ff2002-06-28 22:29:01 +0000214 /* XXX currently this code assumes that XML_Char is 8-bit,
Fred Drake6f987622000-08-25 18:03:30 +0000215 and hence in UTF-8. */
216 /* UTF-8 from Expat, UTF-8 desired */
217 if (str == NULL) {
218 Py_INCREF(Py_None);
219 return Py_None;
220 }
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000221 return PyString_FromStringAndSize((const char *)str, len);
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000222}
223
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000224/* Callback routines */
225
Martin v. Löwis5b68ce32001-10-21 08:53:52 +0000226static void clear_handlers(xmlparseobject *self, int initial);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000227
Martin v. Löwis069dde22003-01-21 10:58:18 +0000228/* This handler is used when an error has been detected, in the hope
229 that actual parsing can be terminated early. This will only help
230 if an external entity reference is encountered. */
231static int
232error_external_entity_ref_handler(XML_Parser parser,
233 const XML_Char *context,
234 const XML_Char *base,
235 const XML_Char *systemId,
236 const XML_Char *publicId)
237{
238 return 0;
239}
240
Fred Drake24a0f412006-07-06 05:13:22 +0000241/* Dummy character data handler used when an error (exception) has
242 been detected, and the actual parsing can be terminated early.
243 This is needed since character data handler can't be safely removed
244 from within the character data handler, but can be replaced. It is
245 used only from the character data handler trampoline, and must be
246 used right after `flag_error()` is called. */
247static void
248noop_character_data_handler(void *userData, const XML_Char *data, int len)
249{
250 /* Do nothing. */
251}
252
Fred Drake6f987622000-08-25 18:03:30 +0000253static void
254flag_error(xmlparseobject *self)
255{
Martin v. Löwis5b68ce32001-10-21 08:53:52 +0000256 clear_handlers(self, 0);
Martin v. Löwis069dde22003-01-21 10:58:18 +0000257 XML_SetExternalEntityRefHandler(self->itself,
258 error_external_entity_ref_handler);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000259}
260
261static PyCodeObject*
262getcode(enum HandlerTypes slot, char* func_name, int lineno)
263{
Fred Drakebd6101c2001-02-14 18:29:45 +0000264 if (handler_info[slot].tb_code == NULL) {
Fred Drakebd6101c2001-02-14 18:29:45 +0000265 handler_info[slot].tb_code =
Jeffrey Yasskin1aa47002009-05-08 21:51:06 +0000266 PyCode_NewEmpty(__FILE__, func_name, lineno);
Fred Drakebd6101c2001-02-14 18:29:45 +0000267 }
268 return handler_info[slot].tb_code;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000269}
270
Jeremy Hylton9263f572003-06-27 16:13:17 +0000271#ifdef FIX_TRACE
Martin v. Löwis7d6e19d2002-08-04 08:24:49 +0000272static int
273trace_frame(PyThreadState *tstate, PyFrameObject *f, int code, PyObject *val)
274{
275 int result = 0;
276 if (!tstate->use_tracing || tstate->tracing)
277 return 0;
278 if (tstate->c_profilefunc != NULL) {
279 tstate->tracing++;
280 result = tstate->c_profilefunc(tstate->c_profileobj,
281 f, code , val);
282 tstate->use_tracing = ((tstate->c_tracefunc != NULL)
283 || (tstate->c_profilefunc != NULL));
284 tstate->tracing--;
285 if (result)
286 return result;
287 }
288 if (tstate->c_tracefunc != NULL) {
289 tstate->tracing++;
290 result = tstate->c_tracefunc(tstate->c_traceobj,
291 f, code , val);
292 tstate->use_tracing = ((tstate->c_tracefunc != NULL)
293 || (tstate->c_profilefunc != NULL));
294 tstate->tracing--;
295 }
296 return result;
297}
Jeremy Hylton9263f572003-06-27 16:13:17 +0000298
299static int
300trace_frame_exc(PyThreadState *tstate, PyFrameObject *f)
301{
302 PyObject *type, *value, *traceback, *arg;
303 int err;
304
305 if (tstate->c_tracefunc == NULL)
306 return 0;
307
308 PyErr_Fetch(&type, &value, &traceback);
309 if (value == NULL) {
310 value = Py_None;
311 Py_INCREF(value);
312 }
Martin v. Löwis9171f022004-10-13 19:50:11 +0000313#if PY_VERSION_HEX < 0x02040000
314 arg = Py_BuildValue("(OOO)", type, value, traceback);
315#else
Raymond Hettinger8ae46892003-10-12 19:09:37 +0000316 arg = PyTuple_Pack(3, type, value, traceback);
Martin v. Löwis9171f022004-10-13 19:50:11 +0000317#endif
Jeremy Hylton9263f572003-06-27 16:13:17 +0000318 if (arg == NULL) {
319 PyErr_Restore(type, value, traceback);
320 return 0;
321 }
322 err = trace_frame(tstate, f, PyTrace_EXCEPTION, arg);
323 Py_DECREF(arg);
324 if (err == 0)
325 PyErr_Restore(type, value, traceback);
326 else {
327 Py_XDECREF(type);
328 Py_XDECREF(value);
329 Py_XDECREF(traceback);
330 }
331 return err;
332}
Martin v. Löwis069dde22003-01-21 10:58:18 +0000333#endif
Martin v. Löwis7d6e19d2002-08-04 08:24:49 +0000334
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000335static PyObject*
Fred Drake39689c52004-08-13 03:12:57 +0000336call_with_frame(PyCodeObject *c, PyObject* func, PyObject* args,
337 xmlparseobject *self)
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000338{
Fred Drakebd6101c2001-02-14 18:29:45 +0000339 PyThreadState *tstate = PyThreadState_GET();
340 PyFrameObject *f;
341 PyObject *res;
342
343 if (c == NULL)
344 return NULL;
Martin v. Löwis7d6e19d2002-08-04 08:24:49 +0000345
Jeremy Hylton9263f572003-06-27 16:13:17 +0000346 f = PyFrame_New(tstate, c, PyEval_GetGlobals(), NULL);
Fred Drakebd6101c2001-02-14 18:29:45 +0000347 if (f == NULL)
348 return NULL;
349 tstate->frame = f;
Jeremy Hylton9263f572003-06-27 16:13:17 +0000350#ifdef FIX_TRACE
351 if (trace_frame(tstate, f, PyTrace_CALL, Py_None) < 0) {
Martin v. Löwis7d6e19d2002-08-04 08:24:49 +0000352 return NULL;
353 }
Martin v. Löwis069dde22003-01-21 10:58:18 +0000354#endif
Fred Drakebd6101c2001-02-14 18:29:45 +0000355 res = PyEval_CallObject(func, args);
Jeremy Hylton9263f572003-06-27 16:13:17 +0000356 if (res == NULL) {
357 if (tstate->curexc_traceback == NULL)
358 PyTraceBack_Here(f);
Fred Drake39689c52004-08-13 03:12:57 +0000359 XML_StopParser(self->itself, XML_FALSE);
Jeremy Hylton9263f572003-06-27 16:13:17 +0000360#ifdef FIX_TRACE
361 if (trace_frame_exc(tstate, f) < 0) {
362 return NULL;
363 }
364 }
Martin v. Löwis7d6e19d2002-08-04 08:24:49 +0000365 else {
Jeremy Hylton9263f572003-06-27 16:13:17 +0000366 if (trace_frame(tstate, f, PyTrace_RETURN, res) < 0) {
Martin v. Löwis7d6e19d2002-08-04 08:24:49 +0000367 Py_XDECREF(res);
368 res = NULL;
369 }
370 }
Jeremy Hylton9263f572003-06-27 16:13:17 +0000371#else
372 }
Martin v. Löwis069dde22003-01-21 10:58:18 +0000373#endif
Fred Drakebd6101c2001-02-14 18:29:45 +0000374 tstate->frame = f->f_back;
375 Py_DECREF(f);
376 return res;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000377}
378
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000379#ifndef Py_USING_UNICODE
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000380#define STRING_CONV_FUNC conv_string_to_utf8
381#else
Martin v. Löwis069dde22003-01-21 10:58:18 +0000382/* Python 2.0 and later versions, when built with Unicode support */
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000383#define STRING_CONV_FUNC (self->returns_unicode \
384 ? conv_string_to_unicode : conv_string_to_utf8)
385#endif
Guido van Rossum5961f5a2000-03-31 16:18:11 +0000386
Fred Drakeb91a36b2002-06-27 19:40:48 +0000387static PyObject*
388string_intern(xmlparseobject *self, const char* str)
389{
390 PyObject *result = STRING_CONV_FUNC(str);
391 PyObject *value;
Neal Norwitz484d9a42005-09-30 04:46:49 +0000392 /* result can be NULL if the unicode conversion failed. */
393 if (!result)
394 return result;
Fred Drakeb91a36b2002-06-27 19:40:48 +0000395 if (!self->intern)
396 return result;
397 value = PyDict_GetItem(self->intern, result);
398 if (!value) {
399 if (PyDict_SetItem(self->intern, result, result) == 0)
400 return result;
401 else
402 return NULL;
403 }
404 Py_INCREF(value);
405 Py_DECREF(result);
406 return value;
407}
408
Fred Drake2a3d7db2002-06-28 22:56:48 +0000409/* Return 0 on success, -1 on exception.
410 * flag_error() will be called before return if needed.
411 */
412static int
413call_character_handler(xmlparseobject *self, const XML_Char *buffer, int len)
414{
415 PyObject *args;
416 PyObject *temp;
417
418 args = PyTuple_New(1);
419 if (args == NULL)
420 return -1;
421#ifdef Py_USING_UNICODE
422 temp = (self->returns_unicode
423 ? conv_string_len_to_unicode(buffer, len)
424 : conv_string_len_to_utf8(buffer, len));
425#else
426 temp = conv_string_len_to_utf8(buffer, len);
427#endif
428 if (temp == NULL) {
429 Py_DECREF(args);
430 flag_error(self);
Fred Drake24a0f412006-07-06 05:13:22 +0000431 XML_SetCharacterDataHandler(self->itself,
432 noop_character_data_handler);
Fred Drake2a3d7db2002-06-28 22:56:48 +0000433 return -1;
434 }
435 PyTuple_SET_ITEM(args, 0, temp);
436 /* temp is now a borrowed reference; consider it unused. */
437 self->in_callback = 1;
438 temp = call_with_frame(getcode(CharacterData, "CharacterData", __LINE__),
Fred Drake39689c52004-08-13 03:12:57 +0000439 self->handlers[CharacterData], args, self);
Fred Drake2a3d7db2002-06-28 22:56:48 +0000440 /* temp is an owned reference again, or NULL */
441 self->in_callback = 0;
442 Py_DECREF(args);
443 if (temp == NULL) {
444 flag_error(self);
Fred Drake24a0f412006-07-06 05:13:22 +0000445 XML_SetCharacterDataHandler(self->itself,
446 noop_character_data_handler);
Fred Drake2a3d7db2002-06-28 22:56:48 +0000447 return -1;
448 }
449 Py_DECREF(temp);
450 return 0;
451}
452
453static int
454flush_character_buffer(xmlparseobject *self)
455{
456 int rc;
457 if (self->buffer == NULL || self->buffer_used == 0)
458 return 0;
459 rc = call_character_handler(self, self->buffer, self->buffer_used);
460 self->buffer_used = 0;
461 return rc;
462}
463
464static void
465my_CharacterDataHandler(void *userData, const XML_Char *data, int len)
466{
467 xmlparseobject *self = (xmlparseobject *) userData;
468 if (self->buffer == NULL)
469 call_character_handler(self, data, len);
470 else {
471 if ((self->buffer_used + len) > self->buffer_size) {
472 if (flush_character_buffer(self) < 0)
473 return;
474 /* handler might have changed; drop the rest on the floor
475 * if there isn't a handler anymore
476 */
477 if (!have_handler(self, CharacterData))
478 return;
479 }
480 if (len > self->buffer_size) {
481 call_character_handler(self, data, len);
482 self->buffer_used = 0;
483 }
484 else {
485 memcpy(self->buffer + self->buffer_used,
486 data, len * sizeof(XML_Char));
487 self->buffer_used += len;
488 }
489 }
490}
491
Fred Drake85d835f2001-02-08 15:39:08 +0000492static void
493my_StartElementHandler(void *userData,
Fred Drake71b63ff2002-06-28 22:29:01 +0000494 const XML_Char *name, const XML_Char *atts[])
Fred Drake85d835f2001-02-08 15:39:08 +0000495{
496 xmlparseobject *self = (xmlparseobject *)userData;
497
Fred Drake71b63ff2002-06-28 22:29:01 +0000498 if (have_handler(self, StartElement)) {
Fred Drake85d835f2001-02-08 15:39:08 +0000499 PyObject *container, *rv, *args;
500 int i, max;
501
Fred Drake2a3d7db2002-06-28 22:56:48 +0000502 if (flush_character_buffer(self) < 0)
503 return;
Fred Drake85d835f2001-02-08 15:39:08 +0000504 /* Set max to the number of slots filled in atts[]; max/2 is
505 * the number of attributes we need to process.
506 */
507 if (self->specified_attributes) {
508 max = XML_GetSpecifiedAttributeCount(self->itself);
509 }
510 else {
511 max = 0;
512 while (atts[max] != NULL)
513 max += 2;
514 }
515 /* Build the container. */
516 if (self->ordered_attributes)
517 container = PyList_New(max);
518 else
519 container = PyDict_New();
520 if (container == NULL) {
521 flag_error(self);
522 return;
523 }
524 for (i = 0; i < max; i += 2) {
Fred Drakeb91a36b2002-06-27 19:40:48 +0000525 PyObject *n = string_intern(self, (XML_Char *) atts[i]);
Fred Drake85d835f2001-02-08 15:39:08 +0000526 PyObject *v;
527 if (n == NULL) {
528 flag_error(self);
529 Py_DECREF(container);
530 return;
531 }
532 v = STRING_CONV_FUNC((XML_Char *) atts[i+1]);
533 if (v == NULL) {
534 flag_error(self);
535 Py_DECREF(container);
536 Py_DECREF(n);
537 return;
538 }
539 if (self->ordered_attributes) {
540 PyList_SET_ITEM(container, i, n);
541 PyList_SET_ITEM(container, i+1, v);
542 }
543 else if (PyDict_SetItem(container, n, v)) {
544 flag_error(self);
545 Py_DECREF(n);
546 Py_DECREF(v);
547 return;
548 }
549 else {
550 Py_DECREF(n);
551 Py_DECREF(v);
552 }
553 }
Neal Norwitz484d9a42005-09-30 04:46:49 +0000554 args = string_intern(self, name);
555 if (args != NULL)
556 args = Py_BuildValue("(NN)", args, container);
Fred Drake85d835f2001-02-08 15:39:08 +0000557 if (args == NULL) {
558 Py_DECREF(container);
559 return;
560 }
561 /* Container is now a borrowed reference; ignore it. */
Fred Drakebd6101c2001-02-14 18:29:45 +0000562 self->in_callback = 1;
563 rv = call_with_frame(getcode(StartElement, "StartElement", __LINE__),
Fred Drake39689c52004-08-13 03:12:57 +0000564 self->handlers[StartElement], args, self);
Fred Drakebd6101c2001-02-14 18:29:45 +0000565 self->in_callback = 0;
566 Py_DECREF(args);
Fred Drake85d835f2001-02-08 15:39:08 +0000567 if (rv == NULL) {
568 flag_error(self);
569 return;
Fred Drakebd6101c2001-02-14 18:29:45 +0000570 }
Fred Drake85d835f2001-02-08 15:39:08 +0000571 Py_DECREF(rv);
572 }
573}
574
575#define RC_HANDLER(RC, NAME, PARAMS, INIT, PARAM_FORMAT, CONVERSION, \
576 RETURN, GETUSERDATA) \
577static RC \
578my_##NAME##Handler PARAMS {\
579 xmlparseobject *self = GETUSERDATA ; \
580 PyObject *args = NULL; \
581 PyObject *rv = NULL; \
582 INIT \
583\
Fred Drake71b63ff2002-06-28 22:29:01 +0000584 if (have_handler(self, NAME)) { \
Fred Drake2a3d7db2002-06-28 22:56:48 +0000585 if (flush_character_buffer(self) < 0) \
586 return RETURN; \
Fred Drake85d835f2001-02-08 15:39:08 +0000587 args = Py_BuildValue PARAM_FORMAT ;\
Martin v. Löwis1d7c55f2001-11-10 13:57:55 +0000588 if (!args) { flag_error(self); return RETURN;} \
Fred Drakebd6101c2001-02-14 18:29:45 +0000589 self->in_callback = 1; \
Fred Drake85d835f2001-02-08 15:39:08 +0000590 rv = call_with_frame(getcode(NAME,#NAME,__LINE__), \
Fred Drake39689c52004-08-13 03:12:57 +0000591 self->handlers[NAME], args, self); \
Fred Drakebd6101c2001-02-14 18:29:45 +0000592 self->in_callback = 0; \
Fred Drake85d835f2001-02-08 15:39:08 +0000593 Py_DECREF(args); \
594 if (rv == NULL) { \
595 flag_error(self); \
596 return RETURN; \
597 } \
598 CONVERSION \
599 Py_DECREF(rv); \
600 } \
601 return RETURN; \
602}
603
Fred Drake6f987622000-08-25 18:03:30 +0000604#define VOID_HANDLER(NAME, PARAMS, PARAM_FORMAT) \
605 RC_HANDLER(void, NAME, PARAMS, ;, PARAM_FORMAT, ;, ;,\
606 (xmlparseobject *)userData)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000607
Fred Drake6f987622000-08-25 18:03:30 +0000608#define INT_HANDLER(NAME, PARAMS, PARAM_FORMAT)\
609 RC_HANDLER(int, NAME, PARAMS, int rc=0;, PARAM_FORMAT, \
610 rc = PyInt_AsLong(rv);, rc, \
611 (xmlparseobject *)userData)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000612
Fred Drake71b63ff2002-06-28 22:29:01 +0000613VOID_HANDLER(EndElement,
614 (void *userData, const XML_Char *name),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000615 ("(N)", string_intern(self, name)))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000616
Fred Drake6f987622000-08-25 18:03:30 +0000617VOID_HANDLER(ProcessingInstruction,
Fred Drake71b63ff2002-06-28 22:29:01 +0000618 (void *userData,
619 const XML_Char *target,
Fred Drake85d835f2001-02-08 15:39:08 +0000620 const XML_Char *data),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000621 ("(NO&)", string_intern(self, target), STRING_CONV_FUNC,data))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000622
Fred Drake6f987622000-08-25 18:03:30 +0000623VOID_HANDLER(UnparsedEntityDecl,
Fred Drake71b63ff2002-06-28 22:29:01 +0000624 (void *userData,
Fred Drake85d835f2001-02-08 15:39:08 +0000625 const XML_Char *entityName,
626 const XML_Char *base,
627 const XML_Char *systemId,
628 const XML_Char *publicId,
629 const XML_Char *notationName),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000630 ("(NNNNN)",
Fred Drake71b63ff2002-06-28 22:29:01 +0000631 string_intern(self, entityName), string_intern(self, base),
632 string_intern(self, systemId), string_intern(self, publicId),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000633 string_intern(self, notationName)))
Fred Drake85d835f2001-02-08 15:39:08 +0000634
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000635#ifndef Py_USING_UNICODE
Fred Drake85d835f2001-02-08 15:39:08 +0000636VOID_HANDLER(EntityDecl,
637 (void *userData,
638 const XML_Char *entityName,
639 int is_parameter_entity,
640 const XML_Char *value,
641 int value_length,
642 const XML_Char *base,
643 const XML_Char *systemId,
644 const XML_Char *publicId,
645 const XML_Char *notationName),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000646 ("NiNNNNN",
647 string_intern(self, entityName), is_parameter_entity,
Fred Drake85d835f2001-02-08 15:39:08 +0000648 conv_string_len_to_utf8(value, value_length),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000649 string_intern(self, base), string_intern(self, systemId),
650 string_intern(self, publicId),
651 string_intern(self, notationName)))
Fred Drake85d835f2001-02-08 15:39:08 +0000652#else
653VOID_HANDLER(EntityDecl,
654 (void *userData,
655 const XML_Char *entityName,
656 int is_parameter_entity,
657 const XML_Char *value,
658 int value_length,
659 const XML_Char *base,
660 const XML_Char *systemId,
661 const XML_Char *publicId,
662 const XML_Char *notationName),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000663 ("NiNNNNN",
664 string_intern(self, entityName), is_parameter_entity,
Fred Drake71b63ff2002-06-28 22:29:01 +0000665 (self->returns_unicode
666 ? conv_string_len_to_unicode(value, value_length)
Fred Drake85d835f2001-02-08 15:39:08 +0000667 : conv_string_len_to_utf8(value, value_length)),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000668 string_intern(self, base), string_intern(self, systemId),
669 string_intern(self, publicId),
670 string_intern(self, notationName)))
Fred Drake85d835f2001-02-08 15:39:08 +0000671#endif
672
673VOID_HANDLER(XmlDecl,
674 (void *userData,
675 const XML_Char *version,
676 const XML_Char *encoding,
677 int standalone),
678 ("(O&O&i)",
Fred Drake71b63ff2002-06-28 22:29:01 +0000679 STRING_CONV_FUNC,version, STRING_CONV_FUNC,encoding,
Fred Drake85d835f2001-02-08 15:39:08 +0000680 standalone))
681
682static PyObject *
683conv_content_model(XML_Content * const model,
Fred Drakeb91a36b2002-06-27 19:40:48 +0000684 PyObject *(*conv_string)(const XML_Char *))
Fred Drake85d835f2001-02-08 15:39:08 +0000685{
686 PyObject *result = NULL;
687 PyObject *children = PyTuple_New(model->numchildren);
688 int i;
689
690 if (children != NULL) {
Tim Peters9544fc52001-07-28 09:36:36 +0000691 assert(model->numchildren < INT_MAX);
692 for (i = 0; i < (int)model->numchildren; ++i) {
Fred Drake85d835f2001-02-08 15:39:08 +0000693 PyObject *child = conv_content_model(&model->children[i],
694 conv_string);
695 if (child == NULL) {
696 Py_XDECREF(children);
697 return NULL;
698 }
699 PyTuple_SET_ITEM(children, i, child);
700 }
701 result = Py_BuildValue("(iiO&N)",
702 model->type, model->quant,
703 conv_string,model->name, children);
704 }
705 return result;
706}
707
Fred Drake06dd8cf2003-02-02 03:54:17 +0000708static void
709my_ElementDeclHandler(void *userData,
710 const XML_Char *name,
711 XML_Content *model)
Fred Drake85d835f2001-02-08 15:39:08 +0000712{
Fred Drake06dd8cf2003-02-02 03:54:17 +0000713 xmlparseobject *self = (xmlparseobject *)userData;
714 PyObject *args = NULL;
Fred Drake85d835f2001-02-08 15:39:08 +0000715
Fred Drake06dd8cf2003-02-02 03:54:17 +0000716 if (have_handler(self, ElementDecl)) {
717 PyObject *rv = NULL;
718 PyObject *modelobj, *nameobj;
719
720 if (flush_character_buffer(self) < 0)
721 goto finally;
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000722#ifdef Py_USING_UNICODE
Fred Drake06dd8cf2003-02-02 03:54:17 +0000723 modelobj = conv_content_model(model,
724 (self->returns_unicode
725 ? conv_string_to_unicode
726 : conv_string_to_utf8));
Fred Drake85d835f2001-02-08 15:39:08 +0000727#else
Fred Drake06dd8cf2003-02-02 03:54:17 +0000728 modelobj = conv_content_model(model, conv_string_to_utf8);
Fred Drake85d835f2001-02-08 15:39:08 +0000729#endif
Fred Drake06dd8cf2003-02-02 03:54:17 +0000730 if (modelobj == NULL) {
731 flag_error(self);
732 goto finally;
733 }
734 nameobj = string_intern(self, name);
735 if (nameobj == NULL) {
736 Py_DECREF(modelobj);
737 flag_error(self);
738 goto finally;
739 }
Michael W. Hudson0bb84542004-08-03 11:31:31 +0000740 args = Py_BuildValue("NN", nameobj, modelobj);
Fred Drake06dd8cf2003-02-02 03:54:17 +0000741 if (args == NULL) {
742 Py_DECREF(modelobj);
743 flag_error(self);
744 goto finally;
745 }
746 self->in_callback = 1;
747 rv = call_with_frame(getcode(ElementDecl, "ElementDecl", __LINE__),
Fred Drake39689c52004-08-13 03:12:57 +0000748 self->handlers[ElementDecl], args, self);
Fred Drake06dd8cf2003-02-02 03:54:17 +0000749 self->in_callback = 0;
750 if (rv == NULL) {
751 flag_error(self);
752 goto finally;
753 }
754 Py_DECREF(rv);
755 }
756 finally:
757 Py_XDECREF(args);
758 XML_FreeContentModel(self->itself, model);
759 return;
760}
Fred Drake85d835f2001-02-08 15:39:08 +0000761
762VOID_HANDLER(AttlistDecl,
763 (void *userData,
764 const XML_Char *elname,
765 const XML_Char *attname,
766 const XML_Char *att_type,
767 const XML_Char *dflt,
768 int isrequired),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000769 ("(NNO&O&i)",
770 string_intern(self, elname), string_intern(self, attname),
Fred Drake85d835f2001-02-08 15:39:08 +0000771 STRING_CONV_FUNC,att_type, STRING_CONV_FUNC,dflt,
772 isrequired))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000773
Martin v. Löwisc847f402003-01-21 11:09:21 +0000774#if XML_COMBINED_VERSION >= 19504
Martin v. Löwis069dde22003-01-21 10:58:18 +0000775VOID_HANDLER(SkippedEntity,
776 (void *userData,
777 const XML_Char *entityName,
778 int is_parameter_entity),
779 ("Ni",
780 string_intern(self, entityName), is_parameter_entity))
Martin v. Löwisc847f402003-01-21 11:09:21 +0000781#endif
Martin v. Löwis069dde22003-01-21 10:58:18 +0000782
Fred Drake71b63ff2002-06-28 22:29:01 +0000783VOID_HANDLER(NotationDecl,
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000784 (void *userData,
785 const XML_Char *notationName,
786 const XML_Char *base,
787 const XML_Char *systemId,
788 const XML_Char *publicId),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000789 ("(NNNN)",
Fred Drake71b63ff2002-06-28 22:29:01 +0000790 string_intern(self, notationName), string_intern(self, base),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000791 string_intern(self, systemId), string_intern(self, publicId)))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000792
Fred Drake6f987622000-08-25 18:03:30 +0000793VOID_HANDLER(StartNamespaceDecl,
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000794 (void *userData,
795 const XML_Char *prefix,
796 const XML_Char *uri),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000797 ("(NN)",
798 string_intern(self, prefix), string_intern(self, uri)))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000799
Fred Drake6f987622000-08-25 18:03:30 +0000800VOID_HANDLER(EndNamespaceDecl,
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000801 (void *userData,
802 const XML_Char *prefix),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000803 ("(N)", string_intern(self, prefix)))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000804
Fred Drake6f987622000-08-25 18:03:30 +0000805VOID_HANDLER(Comment,
Fred Drakeb91a36b2002-06-27 19:40:48 +0000806 (void *userData, const XML_Char *data),
807 ("(O&)", STRING_CONV_FUNC,data))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000808
Fred Drake6f987622000-08-25 18:03:30 +0000809VOID_HANDLER(StartCdataSection,
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000810 (void *userData),
Fred Drake6f987622000-08-25 18:03:30 +0000811 ("()"))
Fred Drake71b63ff2002-06-28 22:29:01 +0000812
Fred Drake6f987622000-08-25 18:03:30 +0000813VOID_HANDLER(EndCdataSection,
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000814 (void *userData),
Fred Drake6f987622000-08-25 18:03:30 +0000815 ("()"))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000816
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000817#ifndef Py_USING_UNICODE
Fred Drake6f987622000-08-25 18:03:30 +0000818VOID_HANDLER(Default,
Fred Drake71b63ff2002-06-28 22:29:01 +0000819 (void *userData, const XML_Char *s, int len),
Fred Drakeca1f4262000-09-21 20:10:23 +0000820 ("(N)", conv_string_len_to_utf8(s,len)))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000821
Fred Drake6f987622000-08-25 18:03:30 +0000822VOID_HANDLER(DefaultHandlerExpand,
Fred Drake71b63ff2002-06-28 22:29:01 +0000823 (void *userData, const XML_Char *s, int len),
Fred Drakeca1f4262000-09-21 20:10:23 +0000824 ("(N)", conv_string_len_to_utf8(s,len)))
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000825#else
Fred Drake6f987622000-08-25 18:03:30 +0000826VOID_HANDLER(Default,
Fred Drake71b63ff2002-06-28 22:29:01 +0000827 (void *userData, const XML_Char *s, int len),
828 ("(N)", (self->returns_unicode
829 ? conv_string_len_to_unicode(s,len)
Fred Drake6f987622000-08-25 18:03:30 +0000830 : conv_string_len_to_utf8(s,len))))
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000831
Fred Drake6f987622000-08-25 18:03:30 +0000832VOID_HANDLER(DefaultHandlerExpand,
Fred Drake71b63ff2002-06-28 22:29:01 +0000833 (void *userData, const XML_Char *s, int len),
834 ("(N)", (self->returns_unicode
835 ? conv_string_len_to_unicode(s,len)
Fred Drake6f987622000-08-25 18:03:30 +0000836 : conv_string_len_to_utf8(s,len))))
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000837#endif
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000838
Fred Drake71b63ff2002-06-28 22:29:01 +0000839INT_HANDLER(NotStandalone,
840 (void *userData),
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000841 ("()"))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000842
Fred Drake6f987622000-08-25 18:03:30 +0000843RC_HANDLER(int, ExternalEntityRef,
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000844 (XML_Parser parser,
845 const XML_Char *context,
846 const XML_Char *base,
847 const XML_Char *systemId,
848 const XML_Char *publicId),
849 int rc=0;,
Fred Drakeb91a36b2002-06-27 19:40:48 +0000850 ("(O&NNN)",
Fred Drake71b63ff2002-06-28 22:29:01 +0000851 STRING_CONV_FUNC,context, string_intern(self, base),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000852 string_intern(self, systemId), string_intern(self, publicId)),
Fred Drake6f987622000-08-25 18:03:30 +0000853 rc = PyInt_AsLong(rv);, rc,
854 XML_GetUserData(parser))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000855
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000856/* XXX UnknownEncodingHandler */
857
Fred Drake85d835f2001-02-08 15:39:08 +0000858VOID_HANDLER(StartDoctypeDecl,
859 (void *userData, const XML_Char *doctypeName,
860 const XML_Char *sysid, const XML_Char *pubid,
861 int has_internal_subset),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000862 ("(NNNi)", string_intern(self, doctypeName),
863 string_intern(self, sysid), string_intern(self, pubid),
Fred Drake85d835f2001-02-08 15:39:08 +0000864 has_internal_subset))
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000865
866VOID_HANDLER(EndDoctypeDecl, (void *userData), ("()"))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000867
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000868/* ---------------------------------------------------------------- */
869
Fred Drake71b63ff2002-06-28 22:29:01 +0000870static PyObject *
871get_parse_result(xmlparseobject *self, int rv)
872{
873 if (PyErr_Occurred()) {
874 return NULL;
875 }
876 if (rv == 0) {
Martin v. Löwis069dde22003-01-21 10:58:18 +0000877 return set_error(self, XML_GetErrorCode(self->itself));
Fred Drake71b63ff2002-06-28 22:29:01 +0000878 }
Fred Drake2a3d7db2002-06-28 22:56:48 +0000879 if (flush_character_buffer(self) < 0) {
880 return NULL;
881 }
Fred Drake71b63ff2002-06-28 22:29:01 +0000882 return PyInt_FromLong(rv);
883}
884
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000885PyDoc_STRVAR(xmlparse_Parse__doc__,
Thomas Wouters35317302000-07-22 16:34:15 +0000886"Parse(data[, isfinal])\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000887Parse XML data. `isfinal' should be true at end of input.");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000888
889static PyObject *
Fred Drake0582df92000-07-12 04:49:00 +0000890xmlparse_Parse(xmlparseobject *self, PyObject *args)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000891{
Fred Drake0582df92000-07-12 04:49:00 +0000892 char *s;
893 int slen;
894 int isFinal = 0;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000895
Fred Drake0582df92000-07-12 04:49:00 +0000896 if (!PyArg_ParseTuple(args, "s#|i:Parse", &s, &slen, &isFinal))
897 return NULL;
Fred Drake71b63ff2002-06-28 22:29:01 +0000898
899 return get_parse_result(self, XML_Parse(self->itself, s, slen, isFinal));
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000900}
901
Fred Drakeca1f4262000-09-21 20:10:23 +0000902/* File reading copied from cPickle */
903
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000904#define BUF_SIZE 2048
905
Fred Drake0582df92000-07-12 04:49:00 +0000906static int
907readinst(char *buf, int buf_size, PyObject *meth)
908{
909 PyObject *arg = NULL;
910 PyObject *bytes = NULL;
911 PyObject *str = NULL;
912 int len = -1;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000913
Fred Drake676940b2000-09-22 15:21:31 +0000914 if ((bytes = PyInt_FromLong(buf_size)) == NULL)
Fred Drake0582df92000-07-12 04:49:00 +0000915 goto finally;
Fred Drake676940b2000-09-22 15:21:31 +0000916
Fred Drake7b6caff2003-07-21 17:05:56 +0000917 if ((arg = PyTuple_New(1)) == NULL) {
918 Py_DECREF(bytes);
Fred Drake0582df92000-07-12 04:49:00 +0000919 goto finally;
Fred Drake7b6caff2003-07-21 17:05:56 +0000920 }
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000921
Tim Peters954eef72000-09-22 06:01:11 +0000922 PyTuple_SET_ITEM(arg, 0, bytes);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000923
Martin v. Löwis9171f022004-10-13 19:50:11 +0000924#if PY_VERSION_HEX < 0x02020000
925 str = PyObject_CallObject(meth, arg);
926#else
927 str = PyObject_Call(meth, arg, NULL);
928#endif
929 if (str == NULL)
Fred Drake0582df92000-07-12 04:49:00 +0000930 goto finally;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000931
Fred Drake0582df92000-07-12 04:49:00 +0000932 /* XXX what to do if it returns a Unicode string? */
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000933 if (!PyString_Check(str)) {
Fred Drake71b63ff2002-06-28 22:29:01 +0000934 PyErr_Format(PyExc_TypeError,
Fred Drake0582df92000-07-12 04:49:00 +0000935 "read() did not return a string object (type=%.400s)",
Christian Heimese93237d2007-12-19 02:37:44 +0000936 Py_TYPE(str)->tp_name);
Fred Drake0582df92000-07-12 04:49:00 +0000937 goto finally;
938 }
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000939 len = PyString_GET_SIZE(str);
Fred Drake0582df92000-07-12 04:49:00 +0000940 if (len > buf_size) {
941 PyErr_Format(PyExc_ValueError,
942 "read() returned too much data: "
943 "%i bytes requested, %i returned",
944 buf_size, len);
Fred Drake0582df92000-07-12 04:49:00 +0000945 goto finally;
946 }
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000947 memcpy(buf, PyString_AsString(str), len);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000948finally:
Fred Drake0582df92000-07-12 04:49:00 +0000949 Py_XDECREF(arg);
Fred Drakeca1f4262000-09-21 20:10:23 +0000950 Py_XDECREF(str);
Fred Drake0582df92000-07-12 04:49:00 +0000951 return len;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000952}
953
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000954PyDoc_STRVAR(xmlparse_ParseFile__doc__,
Thomas Wouters35317302000-07-22 16:34:15 +0000955"ParseFile(file)\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000956Parse XML data from file-like object.");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000957
958static PyObject *
Georg Brandl96a8c392006-05-29 21:04:52 +0000959xmlparse_ParseFile(xmlparseobject *self, PyObject *f)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000960{
Fred Drake0582df92000-07-12 04:49:00 +0000961 int rv = 1;
Fred Drake0582df92000-07-12 04:49:00 +0000962 FILE *fp;
963 PyObject *readmethod = NULL;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000964
Fred Drake0582df92000-07-12 04:49:00 +0000965 if (PyFile_Check(f)) {
966 fp = PyFile_AsFile(f);
967 }
Neal Norwitz2f5e9902006-03-08 06:36:45 +0000968 else {
Fred Drake0582df92000-07-12 04:49:00 +0000969 fp = NULL;
Fred Drakeca1f4262000-09-21 20:10:23 +0000970 readmethod = PyObject_GetAttrString(f, "read");
971 if (readmethod == NULL) {
Fred Drake0582df92000-07-12 04:49:00 +0000972 PyErr_Clear();
Fred Drake71b63ff2002-06-28 22:29:01 +0000973 PyErr_SetString(PyExc_TypeError,
Fred Drake0582df92000-07-12 04:49:00 +0000974 "argument must have 'read' attribute");
Fred Drake814f9fe2002-07-19 22:03:03 +0000975 return NULL;
Fred Drake0582df92000-07-12 04:49:00 +0000976 }
977 }
978 for (;;) {
979 int bytes_read;
980 void *buf = XML_GetBuffer(self->itself, BUF_SIZE);
Fred Drake7b6caff2003-07-21 17:05:56 +0000981 if (buf == NULL) {
Fred Drakef239c6d2003-07-21 17:22:43 +0000982 Py_XDECREF(readmethod);
Fred Drake0582df92000-07-12 04:49:00 +0000983 return PyErr_NoMemory();
Fred Drake7b6caff2003-07-21 17:05:56 +0000984 }
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000985
Fred Drake0582df92000-07-12 04:49:00 +0000986 if (fp) {
987 bytes_read = fread(buf, sizeof(char), BUF_SIZE, fp);
988 if (bytes_read < 0) {
989 PyErr_SetFromErrno(PyExc_IOError);
990 return NULL;
991 }
992 }
993 else {
994 bytes_read = readinst(buf, BUF_SIZE, readmethod);
Fred Drake7b6caff2003-07-21 17:05:56 +0000995 if (bytes_read < 0) {
Brett Cannon33e5dd82010-05-04 00:48:11 +0000996 Py_XDECREF(readmethod);
Fred Drake0582df92000-07-12 04:49:00 +0000997 return NULL;
Fred Drake7b6caff2003-07-21 17:05:56 +0000998 }
Fred Drake0582df92000-07-12 04:49:00 +0000999 }
1000 rv = XML_ParseBuffer(self->itself, bytes_read, bytes_read == 0);
Fred Drake7b6caff2003-07-21 17:05:56 +00001001 if (PyErr_Occurred()) {
1002 Py_XDECREF(readmethod);
Fred Drake0582df92000-07-12 04:49:00 +00001003 return NULL;
Fred Drake7b6caff2003-07-21 17:05:56 +00001004 }
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001005
Fred Drake0582df92000-07-12 04:49:00 +00001006 if (!rv || bytes_read == 0)
1007 break;
1008 }
Fred Drake7b6caff2003-07-21 17:05:56 +00001009 Py_XDECREF(readmethod);
Fred Drake71b63ff2002-06-28 22:29:01 +00001010 return get_parse_result(self, rv);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001011}
1012
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001013PyDoc_STRVAR(xmlparse_SetBase__doc__,
Thomas Wouters35317302000-07-22 16:34:15 +00001014"SetBase(base_url)\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001015Set the base URL for the parser.");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001016
1017static PyObject *
Fred Drake0582df92000-07-12 04:49:00 +00001018xmlparse_SetBase(xmlparseobject *self, PyObject *args)
1019{
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001020 char *base;
1021
Fred Drake0582df92000-07-12 04:49:00 +00001022 if (!PyArg_ParseTuple(args, "s:SetBase", &base))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001023 return NULL;
Fred Drake0582df92000-07-12 04:49:00 +00001024 if (!XML_SetBase(self->itself, base)) {
1025 return PyErr_NoMemory();
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001026 }
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001027 Py_INCREF(Py_None);
1028 return Py_None;
1029}
1030
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001031PyDoc_STRVAR(xmlparse_GetBase__doc__,
Thomas Wouters35317302000-07-22 16:34:15 +00001032"GetBase() -> url\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001033Return base URL string for the parser.");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001034
1035static PyObject *
Georg Brandl96a8c392006-05-29 21:04:52 +00001036xmlparse_GetBase(xmlparseobject *self, PyObject *unused)
Fred Drake0582df92000-07-12 04:49:00 +00001037{
Fred Drake0582df92000-07-12 04:49:00 +00001038 return Py_BuildValue("z", XML_GetBase(self->itself));
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001039}
1040
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001041PyDoc_STRVAR(xmlparse_GetInputContext__doc__,
Fred Drakebd6101c2001-02-14 18:29:45 +00001042"GetInputContext() -> string\n\
1043Return the untranslated text of the input that caused the current event.\n\
1044If the event was generated by a large amount of text (such as a start tag\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001045for an element with many attributes), not all of the text may be available.");
Fred Drakebd6101c2001-02-14 18:29:45 +00001046
1047static PyObject *
Georg Brandl96a8c392006-05-29 21:04:52 +00001048xmlparse_GetInputContext(xmlparseobject *self, PyObject *unused)
Fred Drakebd6101c2001-02-14 18:29:45 +00001049{
Georg Brandl96a8c392006-05-29 21:04:52 +00001050 if (self->in_callback) {
1051 int offset, size;
1052 const char *buffer
1053 = XML_GetInputContext(self->itself, &offset, &size);
Fred Drakebd6101c2001-02-14 18:29:45 +00001054
Georg Brandl96a8c392006-05-29 21:04:52 +00001055 if (buffer != NULL)
Gregory P. Smithdd96db62008-06-09 04:58:54 +00001056 return PyString_FromStringAndSize(buffer + offset,
Georg Brandl96a8c392006-05-29 21:04:52 +00001057 size - offset);
1058 else
1059 Py_RETURN_NONE;
Fred Drakebd6101c2001-02-14 18:29:45 +00001060 }
Georg Brandl96a8c392006-05-29 21:04:52 +00001061 else
1062 Py_RETURN_NONE;
Fred Drakebd6101c2001-02-14 18:29:45 +00001063}
Fred Drakebd6101c2001-02-14 18:29:45 +00001064
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001065PyDoc_STRVAR(xmlparse_ExternalEntityParserCreate__doc__,
Fred Drake2d4ac202001-01-03 15:36:25 +00001066"ExternalEntityParserCreate(context[, encoding])\n\
Tim Peters51dc9682000-09-24 22:12:45 +00001067Create a parser for parsing an external entity based on the\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001068information passed to the ExternalEntityRefHandler.");
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001069
1070static PyObject *
1071xmlparse_ExternalEntityParserCreate(xmlparseobject *self, PyObject *args)
1072{
1073 char *context;
1074 char *encoding = NULL;
1075 xmlparseobject *new_parser;
1076 int i;
1077
Martin v. Löwisc57428d2001-09-19 09:55:09 +00001078 if (!PyArg_ParseTuple(args, "z|s:ExternalEntityParserCreate",
Fred Drakecde79132001-04-25 16:01:30 +00001079 &context, &encoding)) {
1080 return NULL;
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001081 }
1082
Martin v. Löwis894258c2001-09-23 10:20:10 +00001083#ifndef Py_TPFLAGS_HAVE_GC
Martin v. Löwisb4fcf4d2002-06-30 06:40:55 +00001084 /* Python versions 2.0 and 2.1 */
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001085 new_parser = PyObject_New(xmlparseobject, &Xmlparsetype);
Martin v. Löwis894258c2001-09-23 10:20:10 +00001086#else
1087 /* Python versions 2.2 and later */
1088 new_parser = PyObject_GC_New(xmlparseobject, &Xmlparsetype);
1089#endif
Fred Drake85d835f2001-02-08 15:39:08 +00001090
1091 if (new_parser == NULL)
1092 return NULL;
Fred Drake2a3d7db2002-06-28 22:56:48 +00001093 new_parser->buffer_size = self->buffer_size;
1094 new_parser->buffer_used = 0;
1095 if (self->buffer != NULL) {
1096 new_parser->buffer = malloc(new_parser->buffer_size);
1097 if (new_parser->buffer == NULL) {
Fred Drakeb28467b2002-07-02 15:44:36 +00001098#ifndef Py_TPFLAGS_HAVE_GC
1099 /* Code for versions 2.0 and 2.1 */
1100 PyObject_Del(new_parser);
1101#else
1102 /* Code for versions 2.2 and later. */
Fred Drake2a3d7db2002-06-28 22:56:48 +00001103 PyObject_GC_Del(new_parser);
Fred Drakeb28467b2002-07-02 15:44:36 +00001104#endif
Fred Drake2a3d7db2002-06-28 22:56:48 +00001105 return PyErr_NoMemory();
1106 }
1107 }
1108 else
1109 new_parser->buffer = NULL;
Fred Drake85d835f2001-02-08 15:39:08 +00001110 new_parser->returns_unicode = self->returns_unicode;
1111 new_parser->ordered_attributes = self->ordered_attributes;
1112 new_parser->specified_attributes = self->specified_attributes;
Fred Drakebd6101c2001-02-14 18:29:45 +00001113 new_parser->in_callback = 0;
Martin v. Löwis069dde22003-01-21 10:58:18 +00001114 new_parser->ns_prefixes = self->ns_prefixes;
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001115 new_parser->itself = XML_ExternalEntityParserCreate(self->itself, context,
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001116 encoding);
1117 new_parser->handlers = 0;
Fred Drakeb91a36b2002-06-27 19:40:48 +00001118 new_parser->intern = self->intern;
1119 Py_XINCREF(new_parser->intern);
Martin v. Löwis894258c2001-09-23 10:20:10 +00001120#ifdef Py_TPFLAGS_HAVE_GC
1121 PyObject_GC_Track(new_parser);
1122#else
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001123 PyObject_GC_Init(new_parser);
Martin v. Löwis894258c2001-09-23 10:20:10 +00001124#endif
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001125
1126 if (!new_parser->itself) {
Fred Drake85d835f2001-02-08 15:39:08 +00001127 Py_DECREF(new_parser);
1128 return PyErr_NoMemory();
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001129 }
1130
1131 XML_SetUserData(new_parser->itself, (void *)new_parser);
1132
1133 /* allocate and clear handlers first */
Fred Drake2a3d7db2002-06-28 22:56:48 +00001134 for (i = 0; handler_info[i].name != NULL; i++)
Fred Drake85d835f2001-02-08 15:39:08 +00001135 /* do nothing */;
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001136
Fred Drake2a3d7db2002-06-28 22:56:48 +00001137 new_parser->handlers = malloc(sizeof(PyObject *) * i);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001138 if (!new_parser->handlers) {
Fred Drake85d835f2001-02-08 15:39:08 +00001139 Py_DECREF(new_parser);
1140 return PyErr_NoMemory();
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001141 }
Martin v. Löwis5b68ce32001-10-21 08:53:52 +00001142 clear_handlers(new_parser, 1);
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001143
1144 /* then copy handlers from self */
1145 for (i = 0; handler_info[i].name != NULL; i++) {
Fred Drake71b63ff2002-06-28 22:29:01 +00001146 PyObject *handler = self->handlers[i];
1147 if (handler != NULL) {
1148 Py_INCREF(handler);
1149 new_parser->handlers[i] = handler;
1150 handler_info[i].setter(new_parser->itself,
Fred Drake85d835f2001-02-08 15:39:08 +00001151 handler_info[i].handler);
1152 }
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001153 }
Fred Drake71b63ff2002-06-28 22:29:01 +00001154 return (PyObject *)new_parser;
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001155}
1156
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001157PyDoc_STRVAR(xmlparse_SetParamEntityParsing__doc__,
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001158"SetParamEntityParsing(flag) -> success\n\
1159Controls parsing of parameter entities (including the external DTD\n\
1160subset). Possible flag values are XML_PARAM_ENTITY_PARSING_NEVER,\n\
1161XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE and\n\
1162XML_PARAM_ENTITY_PARSING_ALWAYS. Returns true if setting the flag\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001163was successful.");
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001164
1165static PyObject*
Fred Drakebd6101c2001-02-14 18:29:45 +00001166xmlparse_SetParamEntityParsing(xmlparseobject *p, PyObject* args)
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001167{
Fred Drake85d835f2001-02-08 15:39:08 +00001168 int flag;
1169 if (!PyArg_ParseTuple(args, "i", &flag))
1170 return NULL;
Fred Drakebd6101c2001-02-14 18:29:45 +00001171 flag = XML_SetParamEntityParsing(p->itself, flag);
Fred Drake85d835f2001-02-08 15:39:08 +00001172 return PyInt_FromLong(flag);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001173}
1174
Martin v. Löwisc847f402003-01-21 11:09:21 +00001175
1176#if XML_COMBINED_VERSION >= 19505
Martin v. Löwis069dde22003-01-21 10:58:18 +00001177PyDoc_STRVAR(xmlparse_UseForeignDTD__doc__,
1178"UseForeignDTD([flag])\n\
1179Allows the application to provide an artificial external subset if one is\n\
1180not specified as part of the document instance. This readily allows the\n\
1181use of a 'default' document type controlled by the application, while still\n\
1182getting the advantage of providing document type information to the parser.\n\
1183'flag' defaults to True if not provided.");
1184
1185static PyObject *
1186xmlparse_UseForeignDTD(xmlparseobject *self, PyObject *args)
1187{
1188 PyObject *flagobj = NULL;
1189 XML_Bool flag = XML_TRUE;
1190 enum XML_Error rc;
Georg Brandl96a8c392006-05-29 21:04:52 +00001191 if (!PyArg_UnpackTuple(args, "UseForeignDTD", 0, 1, &flagobj))
Martin v. Löwis069dde22003-01-21 10:58:18 +00001192 return NULL;
1193 if (flagobj != NULL)
1194 flag = PyObject_IsTrue(flagobj) ? XML_TRUE : XML_FALSE;
1195 rc = XML_UseForeignDTD(self->itself, flag);
1196 if (rc != XML_ERROR_NONE) {
1197 return set_error(self, rc);
1198 }
1199 Py_INCREF(Py_None);
1200 return Py_None;
1201}
Martin v. Löwisc847f402003-01-21 11:09:21 +00001202#endif
Martin v. Löwis069dde22003-01-21 10:58:18 +00001203
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001204static struct PyMethodDef xmlparse_methods[] = {
Fred Drake0582df92000-07-12 04:49:00 +00001205 {"Parse", (PyCFunction)xmlparse_Parse,
Fred Drakebd6101c2001-02-14 18:29:45 +00001206 METH_VARARGS, xmlparse_Parse__doc__},
Fred Drake0582df92000-07-12 04:49:00 +00001207 {"ParseFile", (PyCFunction)xmlparse_ParseFile,
Georg Brandl96a8c392006-05-29 21:04:52 +00001208 METH_O, xmlparse_ParseFile__doc__},
Fred Drake0582df92000-07-12 04:49:00 +00001209 {"SetBase", (PyCFunction)xmlparse_SetBase,
Martin v. Löwis069dde22003-01-21 10:58:18 +00001210 METH_VARARGS, xmlparse_SetBase__doc__},
Fred Drake0582df92000-07-12 04:49:00 +00001211 {"GetBase", (PyCFunction)xmlparse_GetBase,
Georg Brandl96a8c392006-05-29 21:04:52 +00001212 METH_NOARGS, xmlparse_GetBase__doc__},
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001213 {"ExternalEntityParserCreate", (PyCFunction)xmlparse_ExternalEntityParserCreate,
Martin v. Löwis069dde22003-01-21 10:58:18 +00001214 METH_VARARGS, xmlparse_ExternalEntityParserCreate__doc__},
Fred Drakebd6101c2001-02-14 18:29:45 +00001215 {"SetParamEntityParsing", (PyCFunction)xmlparse_SetParamEntityParsing,
1216 METH_VARARGS, xmlparse_SetParamEntityParsing__doc__},
Fred Drakebd6101c2001-02-14 18:29:45 +00001217 {"GetInputContext", (PyCFunction)xmlparse_GetInputContext,
Georg Brandl96a8c392006-05-29 21:04:52 +00001218 METH_NOARGS, xmlparse_GetInputContext__doc__},
Martin v. Löwisc847f402003-01-21 11:09:21 +00001219#if XML_COMBINED_VERSION >= 19505
Martin v. Löwis069dde22003-01-21 10:58:18 +00001220 {"UseForeignDTD", (PyCFunction)xmlparse_UseForeignDTD,
1221 METH_VARARGS, xmlparse_UseForeignDTD__doc__},
Martin v. Löwisc847f402003-01-21 11:09:21 +00001222#endif
Martin v. Löwis069dde22003-01-21 10:58:18 +00001223 {NULL, NULL} /* sentinel */
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001224};
1225
1226/* ---------- */
1227
1228
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001229#ifdef Py_USING_UNICODE
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001230
Fred Drake71b63ff2002-06-28 22:29:01 +00001231/* pyexpat international encoding support.
1232 Make it as simple as possible.
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001233*/
1234
Martin v. Löwis3af7cc02001-01-22 08:19:10 +00001235static char template_buffer[257];
Fred Drakebb66a202001-03-01 20:48:17 +00001236PyObject *template_string = NULL;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001237
Fred Drake71b63ff2002-06-28 22:29:01 +00001238static void
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001239init_template_buffer(void)
1240{
1241 int i;
Fred Drakebb66a202001-03-01 20:48:17 +00001242 for (i = 0; i < 256; i++) {
1243 template_buffer[i] = i;
Tim Peters63cb99e2001-02-17 18:12:50 +00001244 }
Fred Drakebb66a202001-03-01 20:48:17 +00001245 template_buffer[256] = 0;
Tim Peters63cb99e2001-02-17 18:12:50 +00001246}
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001247
Fred Drake71b63ff2002-06-28 22:29:01 +00001248static int
1249PyUnknownEncodingHandler(void *encodingHandlerData,
1250 const XML_Char *name,
1251 XML_Encoding *info)
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001252{
Fred Drakebb66a202001-03-01 20:48:17 +00001253 PyUnicodeObject *_u_string = NULL;
1254 int result = 0;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001255 int i;
Fred Drake71b63ff2002-06-28 22:29:01 +00001256
Fred Drakebb66a202001-03-01 20:48:17 +00001257 /* Yes, supports only 8bit encodings */
1258 _u_string = (PyUnicodeObject *)
1259 PyUnicode_Decode(template_buffer, 256, name, "replace");
Fred Drake71b63ff2002-06-28 22:29:01 +00001260
Fred Drakebb66a202001-03-01 20:48:17 +00001261 if (_u_string == NULL)
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001262 return result;
Fred Drake71b63ff2002-06-28 22:29:01 +00001263
Fred Drakebb66a202001-03-01 20:48:17 +00001264 for (i = 0; i < 256; i++) {
1265 /* Stupid to access directly, but fast */
1266 Py_UNICODE c = _u_string->str[i];
1267 if (c == Py_UNICODE_REPLACEMENT_CHARACTER)
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001268 info->map[i] = -1;
Fred Drakebb66a202001-03-01 20:48:17 +00001269 else
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001270 info->map[i] = c;
Tim Peters63cb99e2001-02-17 18:12:50 +00001271 }
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001272 info->data = NULL;
1273 info->convert = NULL;
1274 info->release = NULL;
Fred Drake71b63ff2002-06-28 22:29:01 +00001275 result = 1;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001276 Py_DECREF(_u_string);
1277 return result;
1278}
1279
1280#endif
1281
1282static PyObject *
Fred Drakeb91a36b2002-06-27 19:40:48 +00001283newxmlparseobject(char *encoding, char *namespace_separator, PyObject *intern)
Fred Drake0582df92000-07-12 04:49:00 +00001284{
1285 int i;
1286 xmlparseobject *self;
Fred Drake71b63ff2002-06-28 22:29:01 +00001287
Martin v. Löwis894258c2001-09-23 10:20:10 +00001288#ifdef Py_TPFLAGS_HAVE_GC
1289 /* Code for versions 2.2 and later */
1290 self = PyObject_GC_New(xmlparseobject, &Xmlparsetype);
1291#else
Fred Drake0582df92000-07-12 04:49:00 +00001292 self = PyObject_New(xmlparseobject, &Xmlparsetype);
Martin v. Löwis894258c2001-09-23 10:20:10 +00001293#endif
Fred Drake0582df92000-07-12 04:49:00 +00001294 if (self == NULL)
1295 return NULL;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001296
Martin v. Löwisb4fcf4d2002-06-30 06:40:55 +00001297#ifdef Py_USING_UNICODE
Fred Drake0582df92000-07-12 04:49:00 +00001298 self->returns_unicode = 1;
Martin v. Löwisb4fcf4d2002-06-30 06:40:55 +00001299#else
1300 self->returns_unicode = 0;
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +00001301#endif
Martin v. Löwisb4fcf4d2002-06-30 06:40:55 +00001302
Fred Drake2a3d7db2002-06-28 22:56:48 +00001303 self->buffer = NULL;
1304 self->buffer_size = CHARACTER_DATA_BUFFER_SIZE;
1305 self->buffer_used = 0;
Fred Drake85d835f2001-02-08 15:39:08 +00001306 self->ordered_attributes = 0;
1307 self->specified_attributes = 0;
Fred Drakebd6101c2001-02-14 18:29:45 +00001308 self->in_callback = 0;
Martin v. Löwis069dde22003-01-21 10:58:18 +00001309 self->ns_prefixes = 0;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001310 self->handlers = NULL;
Fred Drakecde79132001-04-25 16:01:30 +00001311 if (namespace_separator != NULL) {
Fred Drake0582df92000-07-12 04:49:00 +00001312 self->itself = XML_ParserCreateNS(encoding, *namespace_separator);
1313 }
Fred Drake85d835f2001-02-08 15:39:08 +00001314 else {
Fred Drake0582df92000-07-12 04:49:00 +00001315 self->itself = XML_ParserCreate(encoding);
1316 }
Fred Drakeb91a36b2002-06-27 19:40:48 +00001317 self->intern = intern;
1318 Py_XINCREF(self->intern);
Martin v. Löwis894258c2001-09-23 10:20:10 +00001319#ifdef Py_TPFLAGS_HAVE_GC
1320 PyObject_GC_Track(self);
1321#else
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001322 PyObject_GC_Init(self);
Martin v. Löwis894258c2001-09-23 10:20:10 +00001323#endif
Fred Drake0582df92000-07-12 04:49:00 +00001324 if (self->itself == NULL) {
Fred Drake71b63ff2002-06-28 22:29:01 +00001325 PyErr_SetString(PyExc_RuntimeError,
Fred Drake0582df92000-07-12 04:49:00 +00001326 "XML_ParserCreate failed");
1327 Py_DECREF(self);
1328 return NULL;
1329 }
1330 XML_SetUserData(self->itself, (void *)self);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001331#ifdef Py_USING_UNICODE
Fred Drake7c75bf22002-07-01 14:02:31 +00001332 XML_SetUnknownEncodingHandler(self->itself,
1333 (XML_UnknownEncodingHandler) PyUnknownEncodingHandler, NULL);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001334#endif
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001335
Fred Drake2a3d7db2002-06-28 22:56:48 +00001336 for (i = 0; handler_info[i].name != NULL; i++)
Fred Drake0582df92000-07-12 04:49:00 +00001337 /* do nothing */;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001338
Fred Drake7c75bf22002-07-01 14:02:31 +00001339 self->handlers = malloc(sizeof(PyObject *) * i);
1340 if (!self->handlers) {
Fred Drake71b63ff2002-06-28 22:29:01 +00001341 Py_DECREF(self);
1342 return PyErr_NoMemory();
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001343 }
Martin v. Löwis5b68ce32001-10-21 08:53:52 +00001344 clear_handlers(self, 1);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001345
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001346 return (PyObject*)self;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001347}
1348
1349
1350static void
Fred Drake0582df92000-07-12 04:49:00 +00001351xmlparse_dealloc(xmlparseobject *self)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001352{
Fred Drake0582df92000-07-12 04:49:00 +00001353 int i;
Martin v. Löwis894258c2001-09-23 10:20:10 +00001354#ifdef Py_TPFLAGS_HAVE_GC
1355 PyObject_GC_UnTrack(self);
1356#else
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001357 PyObject_GC_Fini(self);
Martin v. Löwis894258c2001-09-23 10:20:10 +00001358#endif
Fred Drake85d835f2001-02-08 15:39:08 +00001359 if (self->itself != NULL)
Fred Drake0582df92000-07-12 04:49:00 +00001360 XML_ParserFree(self->itself);
1361 self->itself = NULL;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001362
Fred Drake85d835f2001-02-08 15:39:08 +00001363 if (self->handlers != NULL) {
Fred Drakecde79132001-04-25 16:01:30 +00001364 PyObject *temp;
Fred Drake85d835f2001-02-08 15:39:08 +00001365 for (i = 0; handler_info[i].name != NULL; i++) {
Fred Drakecde79132001-04-25 16:01:30 +00001366 temp = self->handlers[i];
1367 self->handlers[i] = NULL;
1368 Py_XDECREF(temp);
Fred Drake85d835f2001-02-08 15:39:08 +00001369 }
1370 free(self->handlers);
Fred Drake71b63ff2002-06-28 22:29:01 +00001371 self->handlers = NULL;
Fred Drake0582df92000-07-12 04:49:00 +00001372 }
Fred Drake2a3d7db2002-06-28 22:56:48 +00001373 if (self->buffer != NULL) {
1374 free(self->buffer);
1375 self->buffer = NULL;
1376 }
Fred Drakeb91a36b2002-06-27 19:40:48 +00001377 Py_XDECREF(self->intern);
Martin v. Löwis894258c2001-09-23 10:20:10 +00001378#ifndef Py_TPFLAGS_HAVE_GC
Martin v. Löwisb4fcf4d2002-06-30 06:40:55 +00001379 /* Code for versions 2.0 and 2.1 */
Fred Drake0582df92000-07-12 04:49:00 +00001380 PyObject_Del(self);
Martin v. Löwis894258c2001-09-23 10:20:10 +00001381#else
1382 /* Code for versions 2.2 and later. */
1383 PyObject_GC_Del(self);
1384#endif
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001385}
1386
Fred Drake0582df92000-07-12 04:49:00 +00001387static int
1388handlername2int(const char *name)
1389{
1390 int i;
Fred Drake71b63ff2002-06-28 22:29:01 +00001391 for (i = 0; handler_info[i].name != NULL; i++) {
Fred Drake0582df92000-07-12 04:49:00 +00001392 if (strcmp(name, handler_info[i].name) == 0) {
1393 return i;
1394 }
1395 }
1396 return -1;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001397}
1398
1399static PyObject *
Fred Drake71b63ff2002-06-28 22:29:01 +00001400get_pybool(int istrue)
1401{
1402 PyObject *result = istrue ? Py_True : Py_False;
1403 Py_INCREF(result);
1404 return result;
1405}
1406
1407static PyObject *
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001408xmlparse_getattr(xmlparseobject *self, char *name)
1409{
Fred Drake71b63ff2002-06-28 22:29:01 +00001410 int handlernum = handlername2int(name);
1411
1412 if (handlernum != -1) {
1413 PyObject *result = self->handlers[handlernum];
1414 if (result == NULL)
1415 result = Py_None;
1416 Py_INCREF(result);
1417 return result;
1418 }
1419 if (name[0] == 'E') {
1420 if (strcmp(name, "ErrorCode") == 0)
1421 return PyInt_FromLong((long)
1422 XML_GetErrorCode(self->itself));
1423 if (strcmp(name, "ErrorLineNumber") == 0)
1424 return PyInt_FromLong((long)
1425 XML_GetErrorLineNumber(self->itself));
1426 if (strcmp(name, "ErrorColumnNumber") == 0)
1427 return PyInt_FromLong((long)
1428 XML_GetErrorColumnNumber(self->itself));
1429 if (strcmp(name, "ErrorByteIndex") == 0)
1430 return PyInt_FromLong((long)
1431 XML_GetErrorByteIndex(self->itself));
1432 }
Dave Cole3203efb2004-08-26 00:37:31 +00001433 if (name[0] == 'C') {
1434 if (strcmp(name, "CurrentLineNumber") == 0)
1435 return PyInt_FromLong((long)
1436 XML_GetCurrentLineNumber(self->itself));
1437 if (strcmp(name, "CurrentColumnNumber") == 0)
1438 return PyInt_FromLong((long)
1439 XML_GetCurrentColumnNumber(self->itself));
1440 if (strcmp(name, "CurrentByteIndex") == 0)
1441 return PyInt_FromLong((long)
1442 XML_GetCurrentByteIndex(self->itself));
1443 }
Fred Drake2a3d7db2002-06-28 22:56:48 +00001444 if (name[0] == 'b') {
1445 if (strcmp(name, "buffer_size") == 0)
1446 return PyInt_FromLong((long) self->buffer_size);
1447 if (strcmp(name, "buffer_text") == 0)
1448 return get_pybool(self->buffer != NULL);
1449 if (strcmp(name, "buffer_used") == 0)
1450 return PyInt_FromLong((long) self->buffer_used);
1451 }
Martin v. Löwis069dde22003-01-21 10:58:18 +00001452 if (strcmp(name, "namespace_prefixes") == 0)
1453 return get_pybool(self->ns_prefixes);
Fred Drake85d835f2001-02-08 15:39:08 +00001454 if (strcmp(name, "ordered_attributes") == 0)
Fred Drake71b63ff2002-06-28 22:29:01 +00001455 return get_pybool(self->ordered_attributes);
Fred Drake0582df92000-07-12 04:49:00 +00001456 if (strcmp(name, "returns_unicode") == 0)
Fred Drake71b63ff2002-06-28 22:29:01 +00001457 return get_pybool((long) self->returns_unicode);
Fred Drake85d835f2001-02-08 15:39:08 +00001458 if (strcmp(name, "specified_attributes") == 0)
Fred Drake71b63ff2002-06-28 22:29:01 +00001459 return get_pybool((long) self->specified_attributes);
Fred Drakeb91a36b2002-06-27 19:40:48 +00001460 if (strcmp(name, "intern") == 0) {
1461 if (self->intern == NULL) {
1462 Py_INCREF(Py_None);
1463 return Py_None;
1464 }
1465 else {
1466 Py_INCREF(self->intern);
1467 return self->intern;
1468 }
1469 }
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001470
Neal Norwitzfa56e2d2003-01-19 15:40:09 +00001471#define APPEND(list, str) \
Martin v. Löwis069dde22003-01-21 10:58:18 +00001472 do { \
Gregory P. Smithdd96db62008-06-09 04:58:54 +00001473 PyObject *o = PyString_FromString(str); \
Martin v. Löwis069dde22003-01-21 10:58:18 +00001474 if (o != NULL) \
1475 PyList_Append(list, o); \
1476 Py_XDECREF(o); \
1477 } while (0)
Neal Norwitzfa56e2d2003-01-19 15:40:09 +00001478
Fred Drake0582df92000-07-12 04:49:00 +00001479 if (strcmp(name, "__members__") == 0) {
1480 int i;
1481 PyObject *rc = PyList_New(0);
Georg Brandl5c170fd2006-03-17 19:03:25 +00001482 if (!rc)
1483 return NULL;
Fred Drake71b63ff2002-06-28 22:29:01 +00001484 for (i = 0; handler_info[i].name != NULL; i++) {
Neal Norwitzfa56e2d2003-01-19 15:40:09 +00001485 PyObject *o = get_handler_name(&handler_info[i]);
1486 if (o != NULL)
1487 PyList_Append(rc, o);
1488 Py_XDECREF(o);
Fred Drake0582df92000-07-12 04:49:00 +00001489 }
Neal Norwitzfa56e2d2003-01-19 15:40:09 +00001490 APPEND(rc, "ErrorCode");
1491 APPEND(rc, "ErrorLineNumber");
1492 APPEND(rc, "ErrorColumnNumber");
1493 APPEND(rc, "ErrorByteIndex");
Dave Cole3203efb2004-08-26 00:37:31 +00001494 APPEND(rc, "CurrentLineNumber");
1495 APPEND(rc, "CurrentColumnNumber");
1496 APPEND(rc, "CurrentByteIndex");
Neal Norwitzfa56e2d2003-01-19 15:40:09 +00001497 APPEND(rc, "buffer_size");
1498 APPEND(rc, "buffer_text");
1499 APPEND(rc, "buffer_used");
Martin v. Löwis069dde22003-01-21 10:58:18 +00001500 APPEND(rc, "namespace_prefixes");
Neal Norwitzfa56e2d2003-01-19 15:40:09 +00001501 APPEND(rc, "ordered_attributes");
1502 APPEND(rc, "returns_unicode");
1503 APPEND(rc, "specified_attributes");
1504 APPEND(rc, "intern");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001505
Neal Norwitzfa56e2d2003-01-19 15:40:09 +00001506#undef APPEND
Fred Drake0582df92000-07-12 04:49:00 +00001507 return rc;
1508 }
1509 return Py_FindMethod(xmlparse_methods, (PyObject *)self, name);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001510}
1511
Fred Drake6f987622000-08-25 18:03:30 +00001512static int
1513sethandler(xmlparseobject *self, const char *name, PyObject* v)
Fred Drake0582df92000-07-12 04:49:00 +00001514{
1515 int handlernum = handlername2int(name);
Fred Drake71b63ff2002-06-28 22:29:01 +00001516 if (handlernum >= 0) {
1517 xmlhandler c_handler = NULL;
1518 PyObject *temp = self->handlers[handlernum];
1519
Fred Drake24a0f412006-07-06 05:13:22 +00001520 if (v == Py_None) {
1521 /* If this is the character data handler, and a character
1522 data handler is already active, we need to be more
1523 careful. What we can safely do is replace the existing
1524 character data handler callback function with a no-op
1525 function that will refuse to call Python. The downside
1526 is that this doesn't completely remove the character
1527 data handler from the C layer if there's any callback
1528 active, so Expat does a little more work than it
1529 otherwise would, but that's really an odd case. A more
1530 elaborate system of handlers and state could remove the
1531 C handler more effectively. */
1532 if (handlernum == CharacterData && self->in_callback)
1533 c_handler = noop_character_data_handler;
Fred Drake71b63ff2002-06-28 22:29:01 +00001534 v = NULL;
Fred Drake24a0f412006-07-06 05:13:22 +00001535 }
Fred Drake71b63ff2002-06-28 22:29:01 +00001536 else if (v != NULL) {
1537 Py_INCREF(v);
1538 c_handler = handler_info[handlernum].handler;
1539 }
Fred Drake0582df92000-07-12 04:49:00 +00001540 self->handlers[handlernum] = v;
Fred Drake71b63ff2002-06-28 22:29:01 +00001541 Py_XDECREF(temp);
1542 handler_info[handlernum].setter(self->itself, c_handler);
Fred Drake0582df92000-07-12 04:49:00 +00001543 return 1;
1544 }
1545 return 0;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001546}
1547
1548static int
Fred Drake6f987622000-08-25 18:03:30 +00001549xmlparse_setattr(xmlparseobject *self, char *name, PyObject *v)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001550{
Fred Drake6f987622000-08-25 18:03:30 +00001551 /* Set attribute 'name' to value 'v'. v==NULL means delete */
Fred Drake85d835f2001-02-08 15:39:08 +00001552 if (v == NULL) {
Fred Drake6f987622000-08-25 18:03:30 +00001553 PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
1554 return -1;
1555 }
Fred Drake2a3d7db2002-06-28 22:56:48 +00001556 if (strcmp(name, "buffer_text") == 0) {
1557 if (PyObject_IsTrue(v)) {
1558 if (self->buffer == NULL) {
1559 self->buffer = malloc(self->buffer_size);
1560 if (self->buffer == NULL) {
1561 PyErr_NoMemory();
1562 return -1;
1563 }
1564 self->buffer_used = 0;
1565 }
1566 }
1567 else if (self->buffer != NULL) {
1568 if (flush_character_buffer(self) < 0)
1569 return -1;
1570 free(self->buffer);
1571 self->buffer = NULL;
1572 }
1573 return 0;
1574 }
Martin v. Löwis069dde22003-01-21 10:58:18 +00001575 if (strcmp(name, "namespace_prefixes") == 0) {
1576 if (PyObject_IsTrue(v))
1577 self->ns_prefixes = 1;
1578 else
1579 self->ns_prefixes = 0;
1580 XML_SetReturnNSTriplet(self->itself, self->ns_prefixes);
1581 return 0;
1582 }
Fred Drake85d835f2001-02-08 15:39:08 +00001583 if (strcmp(name, "ordered_attributes") == 0) {
1584 if (PyObject_IsTrue(v))
1585 self->ordered_attributes = 1;
1586 else
1587 self->ordered_attributes = 0;
1588 return 0;
1589 }
Fred Drake6f987622000-08-25 18:03:30 +00001590 if (strcmp(name, "returns_unicode") == 0) {
Fred Drake85d835f2001-02-08 15:39:08 +00001591 if (PyObject_IsTrue(v)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001592#ifndef Py_USING_UNICODE
Fred Drake71b63ff2002-06-28 22:29:01 +00001593 PyErr_SetString(PyExc_ValueError,
1594 "Unicode support not available");
Fred Drake6f987622000-08-25 18:03:30 +00001595 return -1;
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +00001596#else
Fred Drake6f987622000-08-25 18:03:30 +00001597 self->returns_unicode = 1;
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +00001598#endif
Fred Drake6f987622000-08-25 18:03:30 +00001599 }
1600 else
1601 self->returns_unicode = 0;
Fred Drake85d835f2001-02-08 15:39:08 +00001602 return 0;
1603 }
1604 if (strcmp(name, "specified_attributes") == 0) {
1605 if (PyObject_IsTrue(v))
1606 self->specified_attributes = 1;
1607 else
1608 self->specified_attributes = 0;
Fred Drake6f987622000-08-25 18:03:30 +00001609 return 0;
1610 }
Andrew M. Kuchlinge0a49b62008-01-08 14:30:55 +00001611
1612 if (strcmp(name, "buffer_size") == 0) {
1613 long new_buffer_size;
1614 if (!PyInt_Check(v)) {
1615 PyErr_SetString(PyExc_TypeError, "buffer_size must be an integer");
1616 return -1;
1617 }
1618
1619 new_buffer_size=PyInt_AS_LONG(v);
1620 /* trivial case -- no change */
1621 if (new_buffer_size == self->buffer_size) {
1622 return 0;
1623 }
1624
1625 if (new_buffer_size <= 0) {
1626 PyErr_SetString(PyExc_ValueError, "buffer_size must be greater than zero");
1627 return -1;
1628 }
1629
1630 /* check maximum */
1631 if (new_buffer_size > INT_MAX) {
1632 char errmsg[100];
1633 sprintf(errmsg, "buffer_size must not be greater than %i", INT_MAX);
1634 PyErr_SetString(PyExc_ValueError, errmsg);
1635 return -1;
1636 }
1637
1638 if (self->buffer != NULL) {
1639 /* there is already a buffer */
1640 if (self->buffer_used != 0) {
1641 flush_character_buffer(self);
1642 }
1643 /* free existing buffer */
1644 free(self->buffer);
1645 }
1646 self->buffer = malloc(new_buffer_size);
1647 if (self->buffer == NULL) {
1648 PyErr_NoMemory();
1649 return -1;
1650 }
1651 self->buffer_size = new_buffer_size;
1652 return 0;
1653 }
1654
Fred Drake2a3d7db2002-06-28 22:56:48 +00001655 if (strcmp(name, "CharacterDataHandler") == 0) {
1656 /* If we're changing the character data handler, flush all
1657 * cached data with the old handler. Not sure there's a
1658 * "right" thing to do, though, but this probably won't
1659 * happen.
1660 */
1661 if (flush_character_buffer(self) < 0)
1662 return -1;
1663 }
Fred Drake6f987622000-08-25 18:03:30 +00001664 if (sethandler(self, name, v)) {
1665 return 0;
1666 }
1667 PyErr_SetString(PyExc_AttributeError, name);
1668 return -1;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001669}
1670
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001671#ifdef WITH_CYCLE_GC
1672static int
1673xmlparse_traverse(xmlparseobject *op, visitproc visit, void *arg)
1674{
Neal Norwitz035b1852006-04-16 00:02:59 +00001675 int i;
Thomas Woutersc6e55062006-04-15 21:47:09 +00001676 for (i = 0; handler_info[i].name != NULL; i++)
1677 Py_VISIT(op->handlers[i]);
Fred Drakecde79132001-04-25 16:01:30 +00001678 return 0;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001679}
1680
1681static int
1682xmlparse_clear(xmlparseobject *op)
1683{
Martin v. Löwis5b68ce32001-10-21 08:53:52 +00001684 clear_handlers(op, 0);
Thomas Woutersedf17d82006-04-15 17:28:34 +00001685 Py_CLEAR(op->intern);
Fred Drakecde79132001-04-25 16:01:30 +00001686 return 0;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001687}
1688#endif
1689
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001690PyDoc_STRVAR(Xmlparsetype__doc__, "XML parser");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001691
1692static PyTypeObject Xmlparsetype = {
Martin v. Löwis68192102007-07-21 06:55:02 +00001693 PyVarObject_HEAD_INIT(NULL, 0)
Guido van Rossum14648392001-12-08 18:02:58 +00001694 "pyexpat.xmlparser", /*tp_name*/
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001695 sizeof(xmlparseobject) + PyGC_HEAD_SIZE,/*tp_basicsize*/
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +00001696 0, /*tp_itemsize*/
1697 /* methods */
1698 (destructor)xmlparse_dealloc, /*tp_dealloc*/
1699 (printfunc)0, /*tp_print*/
1700 (getattrfunc)xmlparse_getattr, /*tp_getattr*/
1701 (setattrfunc)xmlparse_setattr, /*tp_setattr*/
1702 (cmpfunc)0, /*tp_compare*/
1703 (reprfunc)0, /*tp_repr*/
1704 0, /*tp_as_number*/
1705 0, /*tp_as_sequence*/
1706 0, /*tp_as_mapping*/
1707 (hashfunc)0, /*tp_hash*/
1708 (ternaryfunc)0, /*tp_call*/
1709 (reprfunc)0, /*tp_str*/
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001710 0, /* tp_getattro */
1711 0, /* tp_setattro */
1712 0, /* tp_as_buffer */
Martin v. Löwis894258c2001-09-23 10:20:10 +00001713#ifdef Py_TPFLAGS_HAVE_GC
Fred Drake71b63ff2002-06-28 22:29:01 +00001714 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /*tp_flags*/
Martin v. Löwis894258c2001-09-23 10:20:10 +00001715#else
Fred Drake71b63ff2002-06-28 22:29:01 +00001716 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_GC, /*tp_flags*/
Martin v. Löwis894258c2001-09-23 10:20:10 +00001717#endif
Martin v. Löwis069dde22003-01-21 10:58:18 +00001718 Xmlparsetype__doc__, /* tp_doc - Documentation string */
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001719#ifdef WITH_CYCLE_GC
1720 (traverseproc)xmlparse_traverse, /* tp_traverse */
1721 (inquiry)xmlparse_clear /* tp_clear */
1722#else
1723 0, 0
1724#endif
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001725};
1726
1727/* End of code for xmlparser objects */
1728/* -------------------------------------------------------- */
1729
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001730PyDoc_STRVAR(pyexpat_ParserCreate__doc__,
Fred Drake0582df92000-07-12 04:49:00 +00001731"ParserCreate([encoding[, namespace_separator]]) -> parser\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001732Return a new XML parser object.");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001733
1734static PyObject *
Fred Drake0582df92000-07-12 04:49:00 +00001735pyexpat_ParserCreate(PyObject *notused, PyObject *args, PyObject *kw)
1736{
Fred Drakecde79132001-04-25 16:01:30 +00001737 char *encoding = NULL;
1738 char *namespace_separator = NULL;
Fred Drakeb91a36b2002-06-27 19:40:48 +00001739 PyObject *intern = NULL;
1740 PyObject *result;
1741 int intern_decref = 0;
Martin v. Löwis15e62742006-02-27 16:46:16 +00001742 static char *kwlist[] = {"encoding", "namespace_separator",
Jeremy Hyltonaf68c872005-12-10 18:50:16 +00001743 "intern", NULL};
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001744
Fred Drakeb91a36b2002-06-27 19:40:48 +00001745 if (!PyArg_ParseTupleAndKeywords(args, kw, "|zzO:ParserCreate", kwlist,
1746 &encoding, &namespace_separator, &intern))
Fred Drakecde79132001-04-25 16:01:30 +00001747 return NULL;
1748 if (namespace_separator != NULL
1749 && strlen(namespace_separator) > 1) {
1750 PyErr_SetString(PyExc_ValueError,
1751 "namespace_separator must be at most one"
1752 " character, omitted, or None");
1753 return NULL;
1754 }
Fred Drakeb91a36b2002-06-27 19:40:48 +00001755 /* Explicitly passing None means no interning is desired.
1756 Not passing anything means that a new dictionary is used. */
1757 if (intern == Py_None)
1758 intern = NULL;
1759 else if (intern == NULL) {
1760 intern = PyDict_New();
1761 if (!intern)
1762 return NULL;
1763 intern_decref = 1;
Fred Drake71b63ff2002-06-28 22:29:01 +00001764 }
Fred Drakeb91a36b2002-06-27 19:40:48 +00001765 else if (!PyDict_Check(intern)) {
1766 PyErr_SetString(PyExc_TypeError, "intern must be a dictionary");
1767 return NULL;
1768 }
1769
1770 result = newxmlparseobject(encoding, namespace_separator, intern);
1771 if (intern_decref) {
1772 Py_DECREF(intern);
1773 }
1774 return result;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001775}
1776
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001777PyDoc_STRVAR(pyexpat_ErrorString__doc__,
Fred Drake0582df92000-07-12 04:49:00 +00001778"ErrorString(errno) -> string\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001779Returns string error for given number.");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001780
1781static PyObject *
Fred Drake0582df92000-07-12 04:49:00 +00001782pyexpat_ErrorString(PyObject *self, PyObject *args)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001783{
Fred Drake0582df92000-07-12 04:49:00 +00001784 long code = 0;
1785
1786 if (!PyArg_ParseTuple(args, "l:ErrorString", &code))
1787 return NULL;
1788 return Py_BuildValue("z", XML_ErrorString((int)code));
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001789}
1790
1791/* List of methods defined in the module */
1792
1793static struct PyMethodDef pyexpat_methods[] = {
Fred Drake0582df92000-07-12 04:49:00 +00001794 {"ParserCreate", (PyCFunction)pyexpat_ParserCreate,
1795 METH_VARARGS|METH_KEYWORDS, pyexpat_ParserCreate__doc__},
1796 {"ErrorString", (PyCFunction)pyexpat_ErrorString,
1797 METH_VARARGS, pyexpat_ErrorString__doc__},
Fred Drake71b63ff2002-06-28 22:29:01 +00001798
Fred Drake0582df92000-07-12 04:49:00 +00001799 {NULL, (PyCFunction)NULL, 0, NULL} /* sentinel */
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001800};
1801
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +00001802/* Module docstring */
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001803
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001804PyDoc_STRVAR(pyexpat_module_documentation,
1805"Python wrapper for Expat parser.");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001806
Fred Drake4113b132001-03-24 19:58:26 +00001807/* Return a Python string that represents the version number without the
1808 * extra cruft added by revision control, even if the right options were
1809 * given to the "cvs export" command to make it not include the extra
1810 * cruft.
1811 */
1812static PyObject *
1813get_version_string(void)
1814{
1815 static char *rcsid = "$Revision$";
1816 char *rev = rcsid;
1817 int i = 0;
1818
Neal Norwitz30b5c5d2005-12-19 06:05:18 +00001819 while (!isdigit(Py_CHARMASK(*rev)))
Fred Drake4113b132001-03-24 19:58:26 +00001820 ++rev;
1821 while (rev[i] != ' ' && rev[i] != '\0')
1822 ++i;
1823
Gregory P. Smithdd96db62008-06-09 04:58:54 +00001824 return PyString_FromStringAndSize(rev, i);
Fred Drake4113b132001-03-24 19:58:26 +00001825}
1826
Fred Drakecde79132001-04-25 16:01:30 +00001827/* Initialization function for the module */
1828
1829#ifndef MODULE_NAME
1830#define MODULE_NAME "pyexpat"
1831#endif
1832
1833#ifndef MODULE_INITFUNC
1834#define MODULE_INITFUNC initpyexpat
1835#endif
1836
Martin v. Löwis069dde22003-01-21 10:58:18 +00001837#ifndef PyMODINIT_FUNC
1838# ifdef MS_WINDOWS
1839# define PyMODINIT_FUNC __declspec(dllexport) void
1840# else
1841# define PyMODINIT_FUNC void
1842# endif
1843#endif
1844
Mark Hammond8235ea12002-07-19 06:55:41 +00001845PyMODINIT_FUNC MODULE_INITFUNC(void); /* avoid compiler warnings */
Fred Drakecde79132001-04-25 16:01:30 +00001846
Martin v. Löwis069dde22003-01-21 10:58:18 +00001847PyMODINIT_FUNC
1848MODULE_INITFUNC(void)
Fred Drake0582df92000-07-12 04:49:00 +00001849{
1850 PyObject *m, *d;
Gregory P. Smithdd96db62008-06-09 04:58:54 +00001851 PyObject *errmod_name = PyString_FromString(MODULE_NAME ".errors");
Fred Drake85d835f2001-02-08 15:39:08 +00001852 PyObject *errors_module;
1853 PyObject *modelmod_name;
1854 PyObject *model_module;
Fred Drake0582df92000-07-12 04:49:00 +00001855 PyObject *sys_modules;
Fredrik Lundhd7a42882005-12-13 20:43:04 +00001856 static struct PyExpat_CAPI capi;
1857 PyObject* capi_object;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001858
Fred Drake6f987622000-08-25 18:03:30 +00001859 if (errmod_name == NULL)
1860 return;
Gregory P. Smithdd96db62008-06-09 04:58:54 +00001861 modelmod_name = PyString_FromString(MODULE_NAME ".model");
Fred Drake85d835f2001-02-08 15:39:08 +00001862 if (modelmod_name == NULL)
1863 return;
Fred Drake6f987622000-08-25 18:03:30 +00001864
Christian Heimese93237d2007-12-19 02:37:44 +00001865 Py_TYPE(&Xmlparsetype) = &PyType_Type;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001866
Fred Drake0582df92000-07-12 04:49:00 +00001867 /* Create the module and add the functions */
Fred Drakecde79132001-04-25 16:01:30 +00001868 m = Py_InitModule3(MODULE_NAME, pyexpat_methods,
Fred Drake85d835f2001-02-08 15:39:08 +00001869 pyexpat_module_documentation);
Neal Norwitz1ac754f2006-01-19 06:09:39 +00001870 if (m == NULL)
1871 return;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001872
Fred Drake0582df92000-07-12 04:49:00 +00001873 /* Add some symbolic constants to the module */
Fred Drakebd6101c2001-02-14 18:29:45 +00001874 if (ErrorObject == NULL) {
1875 ErrorObject = PyErr_NewException("xml.parsers.expat.ExpatError",
Fred Drake93adb692000-09-23 04:55:48 +00001876 NULL, NULL);
Fred Drakebd6101c2001-02-14 18:29:45 +00001877 if (ErrorObject == NULL)
1878 return;
1879 }
1880 Py_INCREF(ErrorObject);
Fred Drake93adb692000-09-23 04:55:48 +00001881 PyModule_AddObject(m, "error", ErrorObject);
Fred Drakebd6101c2001-02-14 18:29:45 +00001882 Py_INCREF(ErrorObject);
1883 PyModule_AddObject(m, "ExpatError", ErrorObject);
Fred Drake4ba298c2000-10-29 04:57:53 +00001884 Py_INCREF(&Xmlparsetype);
1885 PyModule_AddObject(m, "XMLParserType", (PyObject *) &Xmlparsetype);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001886
Fred Drake4113b132001-03-24 19:58:26 +00001887 PyModule_AddObject(m, "__version__", get_version_string());
Fred Drake738293d2000-12-21 17:25:07 +00001888 PyModule_AddStringConstant(m, "EXPAT_VERSION",
1889 (char *) XML_ExpatVersion());
Fred Drake85d835f2001-02-08 15:39:08 +00001890 {
1891 XML_Expat_Version info = XML_ExpatVersionInfo();
1892 PyModule_AddObject(m, "version_info",
1893 Py_BuildValue("(iii)", info.major,
1894 info.minor, info.micro));
1895 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001896#ifdef Py_USING_UNICODE
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001897 init_template_buffer();
1898#endif
Fred Drake0582df92000-07-12 04:49:00 +00001899 /* XXX When Expat supports some way of figuring out how it was
Fred Drake71b63ff2002-06-28 22:29:01 +00001900 compiled, this should check and set native_encoding
1901 appropriately.
Fred Drake0582df92000-07-12 04:49:00 +00001902 */
Fred Drake93adb692000-09-23 04:55:48 +00001903 PyModule_AddStringConstant(m, "native_encoding", "UTF-8");
Fred Drakec23b5232000-08-24 21:57:43 +00001904
Fred Drake85d835f2001-02-08 15:39:08 +00001905 sys_modules = PySys_GetObject("modules");
Fred Drake93adb692000-09-23 04:55:48 +00001906 d = PyModule_GetDict(m);
Fred Drake6f987622000-08-25 18:03:30 +00001907 errors_module = PyDict_GetItem(d, errmod_name);
1908 if (errors_module == NULL) {
Fred Drakecde79132001-04-25 16:01:30 +00001909 errors_module = PyModule_New(MODULE_NAME ".errors");
Fred Drake6f987622000-08-25 18:03:30 +00001910 if (errors_module != NULL) {
Fred Drake6f987622000-08-25 18:03:30 +00001911 PyDict_SetItem(sys_modules, errmod_name, errors_module);
Fred Drake93adb692000-09-23 04:55:48 +00001912 /* gives away the reference to errors_module */
1913 PyModule_AddObject(m, "errors", errors_module);
Fred Drakec23b5232000-08-24 21:57:43 +00001914 }
1915 }
Fred Drake6f987622000-08-25 18:03:30 +00001916 Py_DECREF(errmod_name);
Fred Drake85d835f2001-02-08 15:39:08 +00001917 model_module = PyDict_GetItem(d, modelmod_name);
1918 if (model_module == NULL) {
Fred Drakecde79132001-04-25 16:01:30 +00001919 model_module = PyModule_New(MODULE_NAME ".model");
Fred Drake85d835f2001-02-08 15:39:08 +00001920 if (model_module != NULL) {
1921 PyDict_SetItem(sys_modules, modelmod_name, model_module);
1922 /* gives away the reference to model_module */
1923 PyModule_AddObject(m, "model", model_module);
1924 }
1925 }
1926 Py_DECREF(modelmod_name);
1927 if (errors_module == NULL || model_module == NULL)
1928 /* Don't core dump later! */
Fred Drake6f987622000-08-25 18:03:30 +00001929 return;
Martin v. Löwis069dde22003-01-21 10:58:18 +00001930
Martin v. Löwisc847f402003-01-21 11:09:21 +00001931#if XML_COMBINED_VERSION > 19505
Martin v. Löwis069dde22003-01-21 10:58:18 +00001932 {
1933 const XML_Feature *features = XML_GetFeatureList();
1934 PyObject *list = PyList_New(0);
1935 if (list == NULL)
1936 /* just ignore it */
1937 PyErr_Clear();
1938 else {
1939 int i = 0;
1940 for (; features[i].feature != XML_FEATURE_END; ++i) {
1941 int ok;
1942 PyObject *item = Py_BuildValue("si", features[i].name,
1943 features[i].value);
1944 if (item == NULL) {
1945 Py_DECREF(list);
1946 list = NULL;
1947 break;
1948 }
1949 ok = PyList_Append(list, item);
1950 Py_DECREF(item);
1951 if (ok < 0) {
1952 PyErr_Clear();
1953 break;
1954 }
1955 }
1956 if (list != NULL)
1957 PyModule_AddObject(m, "features", list);
1958 }
1959 }
Martin v. Löwisc847f402003-01-21 11:09:21 +00001960#endif
Fred Drake6f987622000-08-25 18:03:30 +00001961
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001962#define MYCONST(name) \
Fred Drake93adb692000-09-23 04:55:48 +00001963 PyModule_AddStringConstant(errors_module, #name, \
1964 (char*)XML_ErrorString(name))
Fred Drake7bd9f412000-07-04 23:51:31 +00001965
Fred Drake0582df92000-07-12 04:49:00 +00001966 MYCONST(XML_ERROR_NO_MEMORY);
1967 MYCONST(XML_ERROR_SYNTAX);
1968 MYCONST(XML_ERROR_NO_ELEMENTS);
1969 MYCONST(XML_ERROR_INVALID_TOKEN);
1970 MYCONST(XML_ERROR_UNCLOSED_TOKEN);
1971 MYCONST(XML_ERROR_PARTIAL_CHAR);
1972 MYCONST(XML_ERROR_TAG_MISMATCH);
1973 MYCONST(XML_ERROR_DUPLICATE_ATTRIBUTE);
1974 MYCONST(XML_ERROR_JUNK_AFTER_DOC_ELEMENT);
1975 MYCONST(XML_ERROR_PARAM_ENTITY_REF);
1976 MYCONST(XML_ERROR_UNDEFINED_ENTITY);
1977 MYCONST(XML_ERROR_RECURSIVE_ENTITY_REF);
1978 MYCONST(XML_ERROR_ASYNC_ENTITY);
1979 MYCONST(XML_ERROR_BAD_CHAR_REF);
1980 MYCONST(XML_ERROR_BINARY_ENTITY_REF);
1981 MYCONST(XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF);
1982 MYCONST(XML_ERROR_MISPLACED_XML_PI);
1983 MYCONST(XML_ERROR_UNKNOWN_ENCODING);
1984 MYCONST(XML_ERROR_INCORRECT_ENCODING);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001985 MYCONST(XML_ERROR_UNCLOSED_CDATA_SECTION);
1986 MYCONST(XML_ERROR_EXTERNAL_ENTITY_HANDLING);
1987 MYCONST(XML_ERROR_NOT_STANDALONE);
Fred Drake283b6702004-08-04 22:28:16 +00001988 MYCONST(XML_ERROR_UNEXPECTED_STATE);
1989 MYCONST(XML_ERROR_ENTITY_DECLARED_IN_PE);
1990 MYCONST(XML_ERROR_FEATURE_REQUIRES_XML_DTD);
1991 MYCONST(XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING);
1992 /* Added in Expat 1.95.7. */
1993 MYCONST(XML_ERROR_UNBOUND_PREFIX);
1994 /* Added in Expat 1.95.8. */
1995 MYCONST(XML_ERROR_UNDECLARING_PREFIX);
1996 MYCONST(XML_ERROR_INCOMPLETE_PE);
1997 MYCONST(XML_ERROR_XML_DECL);
1998 MYCONST(XML_ERROR_TEXT_DECL);
1999 MYCONST(XML_ERROR_PUBLICID);
2000 MYCONST(XML_ERROR_SUSPENDED);
2001 MYCONST(XML_ERROR_NOT_SUSPENDED);
2002 MYCONST(XML_ERROR_ABORTED);
2003 MYCONST(XML_ERROR_FINISHED);
2004 MYCONST(XML_ERROR_SUSPEND_PE);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00002005
Fred Drake85d835f2001-02-08 15:39:08 +00002006 PyModule_AddStringConstant(errors_module, "__doc__",
2007 "Constants used to describe error conditions.");
2008
Fred Drake93adb692000-09-23 04:55:48 +00002009#undef MYCONST
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00002010
Fred Drake85d835f2001-02-08 15:39:08 +00002011#define MYCONST(c) PyModule_AddIntConstant(m, #c, c)
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00002012 MYCONST(XML_PARAM_ENTITY_PARSING_NEVER);
2013 MYCONST(XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE);
2014 MYCONST(XML_PARAM_ENTITY_PARSING_ALWAYS);
Fred Drake85d835f2001-02-08 15:39:08 +00002015#undef MYCONST
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00002016
Fred Drake85d835f2001-02-08 15:39:08 +00002017#define MYCONST(c) PyModule_AddIntConstant(model_module, #c, c)
2018 PyModule_AddStringConstant(model_module, "__doc__",
2019 "Constants used to interpret content model information.");
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00002020
Fred Drake85d835f2001-02-08 15:39:08 +00002021 MYCONST(XML_CTYPE_EMPTY);
2022 MYCONST(XML_CTYPE_ANY);
2023 MYCONST(XML_CTYPE_MIXED);
2024 MYCONST(XML_CTYPE_NAME);
2025 MYCONST(XML_CTYPE_CHOICE);
2026 MYCONST(XML_CTYPE_SEQ);
2027
2028 MYCONST(XML_CQUANT_NONE);
2029 MYCONST(XML_CQUANT_OPT);
2030 MYCONST(XML_CQUANT_REP);
2031 MYCONST(XML_CQUANT_PLUS);
2032#undef MYCONST
Fredrik Lundhc3345042005-12-13 19:49:55 +00002033
2034 /* initialize pyexpat dispatch table */
Fredrik Lundhd7a42882005-12-13 20:43:04 +00002035 capi.size = sizeof(capi);
Fredrik Lundhcc117db2005-12-13 21:55:36 +00002036 capi.magic = PyExpat_CAPI_MAGIC;
Fredrik Lundhd7a42882005-12-13 20:43:04 +00002037 capi.MAJOR_VERSION = XML_MAJOR_VERSION;
2038 capi.MINOR_VERSION = XML_MINOR_VERSION;
2039 capi.MICRO_VERSION = XML_MICRO_VERSION;
2040 capi.ErrorString = XML_ErrorString;
Fredrik Lundhcc117db2005-12-13 21:55:36 +00002041 capi.GetErrorCode = XML_GetErrorCode;
2042 capi.GetErrorColumnNumber = XML_GetErrorColumnNumber;
2043 capi.GetErrorLineNumber = XML_GetErrorLineNumber;
Fredrik Lundhd7a42882005-12-13 20:43:04 +00002044 capi.Parse = XML_Parse;
2045 capi.ParserCreate_MM = XML_ParserCreate_MM;
2046 capi.ParserFree = XML_ParserFree;
2047 capi.SetCharacterDataHandler = XML_SetCharacterDataHandler;
2048 capi.SetCommentHandler = XML_SetCommentHandler;
2049 capi.SetDefaultHandlerExpand = XML_SetDefaultHandlerExpand;
2050 capi.SetElementHandler = XML_SetElementHandler;
2051 capi.SetNamespaceDeclHandler = XML_SetNamespaceDeclHandler;
2052 capi.SetProcessingInstructionHandler = XML_SetProcessingInstructionHandler;
2053 capi.SetUnknownEncodingHandler = XML_SetUnknownEncodingHandler;
2054 capi.SetUserData = XML_SetUserData;
Fredrik Lundhc3345042005-12-13 19:49:55 +00002055
Larry Hastings402b73f2010-03-25 00:54:54 +00002056 /* export using capsule */
2057 capi_object = PyCapsule_New(&capi, PyExpat_CAPSULE_NAME, NULL);
Fredrik Lundhd7a42882005-12-13 20:43:04 +00002058 if (capi_object)
2059 PyModule_AddObject(m, "expat_CAPI", capi_object);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00002060}
2061
Fred Drake6f987622000-08-25 18:03:30 +00002062static void
Martin v. Löwis5b68ce32001-10-21 08:53:52 +00002063clear_handlers(xmlparseobject *self, int initial)
Fred Drake0582df92000-07-12 04:49:00 +00002064{
Fred Drakecde79132001-04-25 16:01:30 +00002065 int i = 0;
2066 PyObject *temp;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00002067
Fred Drake71b63ff2002-06-28 22:29:01 +00002068 for (; handler_info[i].name != NULL; i++) {
Martin v. Löwis5b68ce32001-10-21 08:53:52 +00002069 if (initial)
Fred Drake71b63ff2002-06-28 22:29:01 +00002070 self->handlers[i] = NULL;
Martin v. Löwis5b68ce32001-10-21 08:53:52 +00002071 else {
Fred Drakecde79132001-04-25 16:01:30 +00002072 temp = self->handlers[i];
2073 self->handlers[i] = NULL;
2074 Py_XDECREF(temp);
Martin v. Löwis5b68ce32001-10-21 08:53:52 +00002075 handler_info[i].setter(self->itself, NULL);
Fred Drakecde79132001-04-25 16:01:30 +00002076 }
Fred Drakecde79132001-04-25 16:01:30 +00002077 }
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00002078}
2079
Tim Peters0c322792002-07-17 16:49:03 +00002080static struct HandlerInfo handler_info[] = {
Fred Drake71b63ff2002-06-28 22:29:01 +00002081 {"StartElementHandler",
2082 (xmlhandlersetter)XML_SetStartElementHandler,
Fred Drake0582df92000-07-12 04:49:00 +00002083 (xmlhandler)my_StartElementHandler},
Fred Drake71b63ff2002-06-28 22:29:01 +00002084 {"EndElementHandler",
2085 (xmlhandlersetter)XML_SetEndElementHandler,
Fred Drake0582df92000-07-12 04:49:00 +00002086 (xmlhandler)my_EndElementHandler},
Fred Drake71b63ff2002-06-28 22:29:01 +00002087 {"ProcessingInstructionHandler",
Fred Drake0582df92000-07-12 04:49:00 +00002088 (xmlhandlersetter)XML_SetProcessingInstructionHandler,
2089 (xmlhandler)my_ProcessingInstructionHandler},
Fred Drake71b63ff2002-06-28 22:29:01 +00002090 {"CharacterDataHandler",
Fred Drake0582df92000-07-12 04:49:00 +00002091 (xmlhandlersetter)XML_SetCharacterDataHandler,
2092 (xmlhandler)my_CharacterDataHandler},
Fred Drake71b63ff2002-06-28 22:29:01 +00002093 {"UnparsedEntityDeclHandler",
Fred Drake0582df92000-07-12 04:49:00 +00002094 (xmlhandlersetter)XML_SetUnparsedEntityDeclHandler,
Fred Drake2a3d7db2002-06-28 22:56:48 +00002095 (xmlhandler)my_UnparsedEntityDeclHandler},
Fred Drake71b63ff2002-06-28 22:29:01 +00002096 {"NotationDeclHandler",
Fred Drake0582df92000-07-12 04:49:00 +00002097 (xmlhandlersetter)XML_SetNotationDeclHandler,
Fred Drake2a3d7db2002-06-28 22:56:48 +00002098 (xmlhandler)my_NotationDeclHandler},
Fred Drake71b63ff2002-06-28 22:29:01 +00002099 {"StartNamespaceDeclHandler",
2100 (xmlhandlersetter)XML_SetStartNamespaceDeclHandler,
Fred Drake2a3d7db2002-06-28 22:56:48 +00002101 (xmlhandler)my_StartNamespaceDeclHandler},
Fred Drake71b63ff2002-06-28 22:29:01 +00002102 {"EndNamespaceDeclHandler",
2103 (xmlhandlersetter)XML_SetEndNamespaceDeclHandler,
Fred Drake2a3d7db2002-06-28 22:56:48 +00002104 (xmlhandler)my_EndNamespaceDeclHandler},
Fred Drake0582df92000-07-12 04:49:00 +00002105 {"CommentHandler",
2106 (xmlhandlersetter)XML_SetCommentHandler,
2107 (xmlhandler)my_CommentHandler},
2108 {"StartCdataSectionHandler",
Fred Drake71b63ff2002-06-28 22:29:01 +00002109 (xmlhandlersetter)XML_SetStartCdataSectionHandler,
Fred Drake0582df92000-07-12 04:49:00 +00002110 (xmlhandler)my_StartCdataSectionHandler},
2111 {"EndCdataSectionHandler",
Fred Drake71b63ff2002-06-28 22:29:01 +00002112 (xmlhandlersetter)XML_SetEndCdataSectionHandler,
Fred Drake0582df92000-07-12 04:49:00 +00002113 (xmlhandler)my_EndCdataSectionHandler},
2114 {"DefaultHandler",
2115 (xmlhandlersetter)XML_SetDefaultHandler,
2116 (xmlhandler)my_DefaultHandler},
2117 {"DefaultHandlerExpand",
2118 (xmlhandlersetter)XML_SetDefaultHandlerExpand,
2119 (xmlhandler)my_DefaultHandlerExpandHandler},
2120 {"NotStandaloneHandler",
2121 (xmlhandlersetter)XML_SetNotStandaloneHandler,
2122 (xmlhandler)my_NotStandaloneHandler},
2123 {"ExternalEntityRefHandler",
2124 (xmlhandlersetter)XML_SetExternalEntityRefHandler,
Fred Drake2a3d7db2002-06-28 22:56:48 +00002125 (xmlhandler)my_ExternalEntityRefHandler},
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00002126 {"StartDoctypeDeclHandler",
Fred Drake71b63ff2002-06-28 22:29:01 +00002127 (xmlhandlersetter)XML_SetStartDoctypeDeclHandler,
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00002128 (xmlhandler)my_StartDoctypeDeclHandler},
2129 {"EndDoctypeDeclHandler",
Fred Drake71b63ff2002-06-28 22:29:01 +00002130 (xmlhandlersetter)XML_SetEndDoctypeDeclHandler,
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00002131 (xmlhandler)my_EndDoctypeDeclHandler},
Fred Drake85d835f2001-02-08 15:39:08 +00002132 {"EntityDeclHandler",
2133 (xmlhandlersetter)XML_SetEntityDeclHandler,
2134 (xmlhandler)my_EntityDeclHandler},
2135 {"XmlDeclHandler",
2136 (xmlhandlersetter)XML_SetXmlDeclHandler,
2137 (xmlhandler)my_XmlDeclHandler},
2138 {"ElementDeclHandler",
2139 (xmlhandlersetter)XML_SetElementDeclHandler,
2140 (xmlhandler)my_ElementDeclHandler},
2141 {"AttlistDeclHandler",
2142 (xmlhandlersetter)XML_SetAttlistDeclHandler,
2143 (xmlhandler)my_AttlistDeclHandler},
Martin v. Löwisc847f402003-01-21 11:09:21 +00002144#if XML_COMBINED_VERSION >= 19504
Martin v. Löwis069dde22003-01-21 10:58:18 +00002145 {"SkippedEntityHandler",
2146 (xmlhandlersetter)XML_SetSkippedEntityHandler,
2147 (xmlhandler)my_SkippedEntityHandler},
Martin v. Löwisc847f402003-01-21 11:09:21 +00002148#endif
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00002149
Fred Drake0582df92000-07-12 04:49:00 +00002150 {NULL, NULL, NULL} /* sentinel */
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00002151};