blob: cddbdbe1b1ec37d624a30b02d13f2f82003d499f [file] [log] [blame]
Martin v. Löwis7090ed12001-09-19 10:37:50 +00001#include "Python.h"
Fred Drake4113b132001-03-24 19:58:26 +00002#include <ctype.h>
3
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00004#include "frameobject.h"
Fred Drakea77254a2000-09-29 19:23:29 +00005#include "expat.h"
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00006
Fredrik Lundhc3345042005-12-13 19:49:55 +00007#include "pyexpat.h"
8
Martin v. Löwisc847f402003-01-21 11:09:21 +00009#define XML_COMBINED_VERSION (10000*XML_MAJOR_VERSION+100*XML_MINOR_VERSION+XML_MICRO_VERSION)
10
Martin v. Löwisb4fcf4d2002-06-30 06:40:55 +000011#ifndef PyDoc_STRVAR
Martin v. Löwis069dde22003-01-21 10:58:18 +000012
13/*
14 * fdrake says:
15 * Don't change the PyDoc_STR macro definition to (str), because
16 * '''the parentheses cause compile failures
17 * ("non-constant static initializer" or something like that)
18 * on some platforms (Irix?)'''
19 */
Fred Drakef57b22a2002-09-02 15:54:06 +000020#define PyDoc_STR(str) str
Fred Drake7c75bf22002-07-01 14:02:31 +000021#define PyDoc_VAR(name) static char name[]
Martin v. Löwisb4fcf4d2002-06-30 06:40:55 +000022#define PyDoc_STRVAR(name,str) PyDoc_VAR(name) = PyDoc_STR(str)
Martin v. Löwis0078f6c2001-01-21 10:18:10 +000023#endif
24
Martin v. Löwisb4fcf4d2002-06-30 06:40:55 +000025#if (PY_MAJOR_VERSION == 2 && PY_MINOR_VERSION < 2)
26/* In Python 2.0 and 2.1, disabling Unicode was not possible. */
Martin v. Löwis339d0f72001-08-17 18:39:25 +000027#define Py_USING_UNICODE
Jeremy Hylton9263f572003-06-27 16:13:17 +000028#else
29#define FIX_TRACE
Martin v. Löwis339d0f72001-08-17 18:39:25 +000030#endif
31
Fred Drake0582df92000-07-12 04:49:00 +000032enum HandlerTypes {
33 StartElement,
34 EndElement,
35 ProcessingInstruction,
36 CharacterData,
37 UnparsedEntityDecl,
38 NotationDecl,
39 StartNamespaceDecl,
40 EndNamespaceDecl,
41 Comment,
42 StartCdataSection,
43 EndCdataSection,
44 Default,
45 DefaultHandlerExpand,
46 NotStandalone,
Martin v. Löwis0078f6c2001-01-21 10:18:10 +000047 ExternalEntityRef,
48 StartDoctypeDecl,
49 EndDoctypeDecl,
Fred Drake85d835f2001-02-08 15:39:08 +000050 EntityDecl,
51 XmlDecl,
52 ElementDecl,
53 AttlistDecl,
Martin v. Löwisc847f402003-01-21 11:09:21 +000054#if XML_COMBINED_VERSION >= 19504
Martin v. Löwis069dde22003-01-21 10:58:18 +000055 SkippedEntity,
Martin v. Löwisc847f402003-01-21 11:09:21 +000056#endif
Fred Drake85d835f2001-02-08 15:39:08 +000057 _DummyDecl
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000058};
59
60static PyObject *ErrorObject;
61
62/* ----------------------------------------------------- */
63
64/* Declarations for objects of type xmlparser */
65
66typedef struct {
Fred Drake0582df92000-07-12 04:49:00 +000067 PyObject_HEAD
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000068
Fred Drake0582df92000-07-12 04:49:00 +000069 XML_Parser itself;
Fred Drake85d835f2001-02-08 15:39:08 +000070 int returns_unicode; /* True if Unicode strings are returned;
71 if false, UTF-8 strings are returned */
72 int ordered_attributes; /* Return attributes as a list. */
73 int specified_attributes; /* Report only specified attributes. */
Fred Drakebd6101c2001-02-14 18:29:45 +000074 int in_callback; /* Is a callback active? */
Martin v. Löwis069dde22003-01-21 10:58:18 +000075 int ns_prefixes; /* Namespace-triplets mode? */
Fred Drake2a3d7db2002-06-28 22:56:48 +000076 XML_Char *buffer; /* Buffer used when accumulating characters */
77 /* NULL if not enabled */
78 int buffer_size; /* Size of buffer, in XML_Char units */
79 int buffer_used; /* Buffer units in use */
Fred Drakeb91a36b2002-06-27 19:40:48 +000080 PyObject *intern; /* Dictionary to intern strings */
Fred Drake0582df92000-07-12 04:49:00 +000081 PyObject **handlers;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000082} xmlparseobject;
83
Fred Drake2a3d7db2002-06-28 22:56:48 +000084#define CHARACTER_DATA_BUFFER_SIZE 8192
85
Jeremy Hylton938ace62002-07-17 16:30:39 +000086static PyTypeObject Xmlparsetype;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000087
Fred Drake117ac852002-09-24 16:24:54 +000088typedef void (*xmlhandlersetter)(XML_Parser self, void *meth);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000089typedef void* xmlhandler;
90
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +000091struct HandlerInfo {
Fred Drake0582df92000-07-12 04:49:00 +000092 const char *name;
93 xmlhandlersetter setter;
94 xmlhandler handler;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +000095 PyCodeObject *tb_code;
Fred Drake71b63ff2002-06-28 22:29:01 +000096 PyObject *nameobj;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000097};
98
Jeremy Hylton938ace62002-07-17 16:30:39 +000099static struct HandlerInfo handler_info[64];
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000100
Fred Drakebd6101c2001-02-14 18:29:45 +0000101/* Set an integer attribute on the error object; return true on success,
102 * false on an exception.
103 */
104static int
105set_error_attr(PyObject *err, char *name, int value)
106{
107 PyObject *v = PyInt_FromLong(value);
Fred Drake85d835f2001-02-08 15:39:08 +0000108
Neal Norwitz2f5e9902006-03-08 06:36:45 +0000109 if (v == NULL || PyObject_SetAttrString(err, name, v) == -1) {
110 Py_XDECREF(v);
Fred Drakebd6101c2001-02-14 18:29:45 +0000111 return 0;
112 }
Michael W. Hudson0bb84542004-08-03 11:31:31 +0000113 Py_DECREF(v);
Fred Drakebd6101c2001-02-14 18:29:45 +0000114 return 1;
115}
116
117/* Build and set an Expat exception, including positioning
118 * information. Always returns NULL.
119 */
Fred Drake85d835f2001-02-08 15:39:08 +0000120static PyObject *
Martin v. Löwis069dde22003-01-21 10:58:18 +0000121set_error(xmlparseobject *self, enum XML_Error code)
Fred Drake85d835f2001-02-08 15:39:08 +0000122{
123 PyObject *err;
124 char buffer[256];
125 XML_Parser parser = self->itself;
Fred Drakebd6101c2001-02-14 18:29:45 +0000126 int lineno = XML_GetErrorLineNumber(parser);
127 int column = XML_GetErrorColumnNumber(parser);
Fred Drake85d835f2001-02-08 15:39:08 +0000128
Martin v. Löwis6b2cf0e2002-06-30 06:03:35 +0000129 /* There is no risk of overflowing this buffer, since
130 even for 64-bit integers, there is sufficient space. */
131 sprintf(buffer, "%.200s: line %i, column %i",
Fred Drakebd6101c2001-02-14 18:29:45 +0000132 XML_ErrorString(code), lineno, column);
Fred Drake85d835f2001-02-08 15:39:08 +0000133 err = PyObject_CallFunction(ErrorObject, "s", buffer);
Fred Drakebd6101c2001-02-14 18:29:45 +0000134 if ( err != NULL
135 && set_error_attr(err, "code", code)
136 && set_error_attr(err, "offset", column)
137 && set_error_attr(err, "lineno", lineno)) {
138 PyErr_SetObject(ErrorObject, err);
Fred Drake85d835f2001-02-08 15:39:08 +0000139 }
Neal Norwitz2f5e9902006-03-08 06:36:45 +0000140 Py_XDECREF(err);
Fred Drake85d835f2001-02-08 15:39:08 +0000141 return NULL;
142}
143
Fred Drake71b63ff2002-06-28 22:29:01 +0000144static int
145have_handler(xmlparseobject *self, int type)
146{
147 PyObject *handler = self->handlers[type];
148 return handler != NULL;
149}
150
151static PyObject *
152get_handler_name(struct HandlerInfo *hinfo)
153{
154 PyObject *name = hinfo->nameobj;
155 if (name == NULL) {
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000156 name = PyString_FromString(hinfo->name);
Fred Drake71b63ff2002-06-28 22:29:01 +0000157 hinfo->nameobj = name;
158 }
159 Py_XINCREF(name);
160 return name;
161}
162
Fred Drake85d835f2001-02-08 15:39:08 +0000163
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000164#ifdef Py_USING_UNICODE
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000165/* Convert a string of XML_Chars into a Unicode string.
166 Returns None if str is a null pointer. */
167
Fred Drake0582df92000-07-12 04:49:00 +0000168static PyObject *
Fred Drakeb91a36b2002-06-27 19:40:48 +0000169conv_string_to_unicode(const XML_Char *str)
Fred Drake0582df92000-07-12 04:49:00 +0000170{
Fred Drake71b63ff2002-06-28 22:29:01 +0000171 /* XXX currently this code assumes that XML_Char is 8-bit,
Fred Drake0582df92000-07-12 04:49:00 +0000172 and hence in UTF-8. */
173 /* UTF-8 from Expat, Unicode desired */
174 if (str == NULL) {
175 Py_INCREF(Py_None);
176 return Py_None;
177 }
Fred Drake71b63ff2002-06-28 22:29:01 +0000178 return PyUnicode_DecodeUTF8(str, strlen(str), "strict");
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000179}
180
Fred Drake0582df92000-07-12 04:49:00 +0000181static PyObject *
182conv_string_len_to_unicode(const XML_Char *str, int len)
183{
Fred Drake71b63ff2002-06-28 22:29:01 +0000184 /* XXX currently this code assumes that XML_Char is 8-bit,
Fred Drake0582df92000-07-12 04:49:00 +0000185 and hence in UTF-8. */
186 /* UTF-8 from Expat, Unicode desired */
187 if (str == NULL) {
188 Py_INCREF(Py_None);
189 return Py_None;
190 }
Fred Drake6f987622000-08-25 18:03:30 +0000191 return PyUnicode_DecodeUTF8((const char *)str, len, "strict");
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000192}
193#endif
194
195/* Convert a string of XML_Chars into an 8-bit Python string.
196 Returns None if str is a null pointer. */
197
Fred Drake6f987622000-08-25 18:03:30 +0000198static PyObject *
Fred Drakeb91a36b2002-06-27 19:40:48 +0000199conv_string_to_utf8(const XML_Char *str)
Fred Drake6f987622000-08-25 18:03:30 +0000200{
Fred Drake71b63ff2002-06-28 22:29:01 +0000201 /* XXX currently this code assumes that XML_Char is 8-bit,
Fred Drake6f987622000-08-25 18:03:30 +0000202 and hence in UTF-8. */
203 /* UTF-8 from Expat, UTF-8 desired */
204 if (str == NULL) {
205 Py_INCREF(Py_None);
206 return Py_None;
207 }
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000208 return PyString_FromString(str);
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000209}
210
Fred Drake6f987622000-08-25 18:03:30 +0000211static PyObject *
Fred Drake71b63ff2002-06-28 22:29:01 +0000212conv_string_len_to_utf8(const XML_Char *str, int len)
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000213{
Fred Drake71b63ff2002-06-28 22:29:01 +0000214 /* XXX currently this code assumes that XML_Char is 8-bit,
Fred Drake6f987622000-08-25 18:03:30 +0000215 and hence in UTF-8. */
216 /* UTF-8 from Expat, UTF-8 desired */
217 if (str == NULL) {
218 Py_INCREF(Py_None);
219 return Py_None;
220 }
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000221 return PyString_FromStringAndSize((const char *)str, len);
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000222}
223
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000224/* Callback routines */
225
Martin v. Löwis5b68ce32001-10-21 08:53:52 +0000226static void clear_handlers(xmlparseobject *self, int initial);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000227
Martin v. Löwis069dde22003-01-21 10:58:18 +0000228/* This handler is used when an error has been detected, in the hope
229 that actual parsing can be terminated early. This will only help
230 if an external entity reference is encountered. */
231static int
232error_external_entity_ref_handler(XML_Parser parser,
233 const XML_Char *context,
234 const XML_Char *base,
235 const XML_Char *systemId,
236 const XML_Char *publicId)
237{
238 return 0;
239}
240
Fred Drake24a0f412006-07-06 05:13:22 +0000241/* Dummy character data handler used when an error (exception) has
242 been detected, and the actual parsing can be terminated early.
243 This is needed since character data handler can't be safely removed
244 from within the character data handler, but can be replaced. It is
245 used only from the character data handler trampoline, and must be
246 used right after `flag_error()` is called. */
247static void
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000248noop_character_data_handler(void *userData, const XML_Char *data, int len)
Fred Drake24a0f412006-07-06 05:13:22 +0000249{
250 /* Do nothing. */
251}
252
Fred Drake6f987622000-08-25 18:03:30 +0000253static void
254flag_error(xmlparseobject *self)
255{
Martin v. Löwis5b68ce32001-10-21 08:53:52 +0000256 clear_handlers(self, 0);
Martin v. Löwis069dde22003-01-21 10:58:18 +0000257 XML_SetExternalEntityRefHandler(self->itself,
258 error_external_entity_ref_handler);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000259}
260
261static PyCodeObject*
262getcode(enum HandlerTypes slot, char* func_name, int lineno)
263{
Fred Drakebd6101c2001-02-14 18:29:45 +0000264 if (handler_info[slot].tb_code == NULL) {
Fred Drakebd6101c2001-02-14 18:29:45 +0000265 handler_info[slot].tb_code =
Jeffrey Yasskin1aa47002009-05-08 21:51:06 +0000266 PyCode_NewEmpty(__FILE__, func_name, lineno);
Fred Drakebd6101c2001-02-14 18:29:45 +0000267 }
268 return handler_info[slot].tb_code;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000269}
270
Jeremy Hylton9263f572003-06-27 16:13:17 +0000271#ifdef FIX_TRACE
Martin v. Löwis7d6e19d2002-08-04 08:24:49 +0000272static int
273trace_frame(PyThreadState *tstate, PyFrameObject *f, int code, PyObject *val)
274{
275 int result = 0;
276 if (!tstate->use_tracing || tstate->tracing)
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000277 return 0;
Martin v. Löwis7d6e19d2002-08-04 08:24:49 +0000278 if (tstate->c_profilefunc != NULL) {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000279 tstate->tracing++;
280 result = tstate->c_profilefunc(tstate->c_profileobj,
281 f, code , val);
282 tstate->use_tracing = ((tstate->c_tracefunc != NULL)
283 || (tstate->c_profilefunc != NULL));
284 tstate->tracing--;
285 if (result)
286 return result;
Martin v. Löwis7d6e19d2002-08-04 08:24:49 +0000287 }
288 if (tstate->c_tracefunc != NULL) {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000289 tstate->tracing++;
290 result = tstate->c_tracefunc(tstate->c_traceobj,
291 f, code , val);
292 tstate->use_tracing = ((tstate->c_tracefunc != NULL)
293 || (tstate->c_profilefunc != NULL));
294 tstate->tracing--;
295 }
Martin v. Löwis7d6e19d2002-08-04 08:24:49 +0000296 return result;
297}
Jeremy Hylton9263f572003-06-27 16:13:17 +0000298
299static int
300trace_frame_exc(PyThreadState *tstate, PyFrameObject *f)
301{
302 PyObject *type, *value, *traceback, *arg;
303 int err;
304
305 if (tstate->c_tracefunc == NULL)
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000306 return 0;
Jeremy Hylton9263f572003-06-27 16:13:17 +0000307
308 PyErr_Fetch(&type, &value, &traceback);
309 if (value == NULL) {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000310 value = Py_None;
311 Py_INCREF(value);
Jeremy Hylton9263f572003-06-27 16:13:17 +0000312 }
Martin v. Löwis9171f022004-10-13 19:50:11 +0000313#if PY_VERSION_HEX < 0x02040000
314 arg = Py_BuildValue("(OOO)", type, value, traceback);
315#else
Raymond Hettinger8ae46892003-10-12 19:09:37 +0000316 arg = PyTuple_Pack(3, type, value, traceback);
Martin v. Löwis9171f022004-10-13 19:50:11 +0000317#endif
Jeremy Hylton9263f572003-06-27 16:13:17 +0000318 if (arg == NULL) {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000319 PyErr_Restore(type, value, traceback);
320 return 0;
Jeremy Hylton9263f572003-06-27 16:13:17 +0000321 }
322 err = trace_frame(tstate, f, PyTrace_EXCEPTION, arg);
323 Py_DECREF(arg);
324 if (err == 0)
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000325 PyErr_Restore(type, value, traceback);
Jeremy Hylton9263f572003-06-27 16:13:17 +0000326 else {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000327 Py_XDECREF(type);
328 Py_XDECREF(value);
329 Py_XDECREF(traceback);
Jeremy Hylton9263f572003-06-27 16:13:17 +0000330 }
331 return err;
332}
Martin v. Löwis069dde22003-01-21 10:58:18 +0000333#endif
Martin v. Löwis7d6e19d2002-08-04 08:24:49 +0000334
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000335static PyObject*
Fred Drake39689c52004-08-13 03:12:57 +0000336call_with_frame(PyCodeObject *c, PyObject* func, PyObject* args,
337 xmlparseobject *self)
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000338{
Fred Drakebd6101c2001-02-14 18:29:45 +0000339 PyThreadState *tstate = PyThreadState_GET();
340 PyFrameObject *f;
341 PyObject *res;
342
343 if (c == NULL)
344 return NULL;
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000345
Jeremy Hylton9263f572003-06-27 16:13:17 +0000346 f = PyFrame_New(tstate, c, PyEval_GetGlobals(), NULL);
Fred Drakebd6101c2001-02-14 18:29:45 +0000347 if (f == NULL)
348 return NULL;
349 tstate->frame = f;
Jeremy Hylton9263f572003-06-27 16:13:17 +0000350#ifdef FIX_TRACE
351 if (trace_frame(tstate, f, PyTrace_CALL, Py_None) < 0) {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000352 return NULL;
Martin v. Löwis7d6e19d2002-08-04 08:24:49 +0000353 }
Martin v. Löwis069dde22003-01-21 10:58:18 +0000354#endif
Fred Drakebd6101c2001-02-14 18:29:45 +0000355 res = PyEval_CallObject(func, args);
Jeremy Hylton9263f572003-06-27 16:13:17 +0000356 if (res == NULL) {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000357 if (tstate->curexc_traceback == NULL)
358 PyTraceBack_Here(f);
Fred Drake39689c52004-08-13 03:12:57 +0000359 XML_StopParser(self->itself, XML_FALSE);
Jeremy Hylton9263f572003-06-27 16:13:17 +0000360#ifdef FIX_TRACE
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000361 if (trace_frame_exc(tstate, f) < 0) {
362 return NULL;
363 }
Jeremy Hylton9263f572003-06-27 16:13:17 +0000364 }
Martin v. Löwis7d6e19d2002-08-04 08:24:49 +0000365 else {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000366 if (trace_frame(tstate, f, PyTrace_RETURN, res) < 0) {
367 Py_XDECREF(res);
368 res = NULL;
369 }
Martin v. Löwis7d6e19d2002-08-04 08:24:49 +0000370 }
Jeremy Hylton9263f572003-06-27 16:13:17 +0000371#else
372 }
Martin v. Löwis069dde22003-01-21 10:58:18 +0000373#endif
Fred Drakebd6101c2001-02-14 18:29:45 +0000374 tstate->frame = f->f_back;
375 Py_DECREF(f);
376 return res;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000377}
378
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000379#ifndef Py_USING_UNICODE
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000380#define STRING_CONV_FUNC conv_string_to_utf8
381#else
Martin v. Löwis069dde22003-01-21 10:58:18 +0000382/* Python 2.0 and later versions, when built with Unicode support */
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000383#define STRING_CONV_FUNC (self->returns_unicode \
384 ? conv_string_to_unicode : conv_string_to_utf8)
385#endif
Guido van Rossum5961f5a2000-03-31 16:18:11 +0000386
Fred Drakeb91a36b2002-06-27 19:40:48 +0000387static PyObject*
388string_intern(xmlparseobject *self, const char* str)
389{
390 PyObject *result = STRING_CONV_FUNC(str);
391 PyObject *value;
Neal Norwitz484d9a42005-09-30 04:46:49 +0000392 /* result can be NULL if the unicode conversion failed. */
393 if (!result)
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000394 return result;
Fred Drakeb91a36b2002-06-27 19:40:48 +0000395 if (!self->intern)
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000396 return result;
Fred Drakeb91a36b2002-06-27 19:40:48 +0000397 value = PyDict_GetItem(self->intern, result);
398 if (!value) {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000399 if (PyDict_SetItem(self->intern, result, result) == 0)
Fred Drakeb91a36b2002-06-27 19:40:48 +0000400 return result;
401 else
402 return NULL;
403 }
404 Py_INCREF(value);
405 Py_DECREF(result);
406 return value;
407}
408
Fred Drake2a3d7db2002-06-28 22:56:48 +0000409/* Return 0 on success, -1 on exception.
410 * flag_error() will be called before return if needed.
411 */
412static int
413call_character_handler(xmlparseobject *self, const XML_Char *buffer, int len)
414{
415 PyObject *args;
416 PyObject *temp;
417
Georg Brandl67c27332010-10-24 14:16:05 +0000418 if (!have_handler(self, CharacterData))
419 return -1;
420
Fred Drake2a3d7db2002-06-28 22:56:48 +0000421 args = PyTuple_New(1);
422 if (args == NULL)
423 return -1;
424#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000425 temp = (self->returns_unicode
426 ? conv_string_len_to_unicode(buffer, len)
Fred Drake2a3d7db2002-06-28 22:56:48 +0000427 : conv_string_len_to_utf8(buffer, len));
428#else
429 temp = conv_string_len_to_utf8(buffer, len);
430#endif
431 if (temp == NULL) {
432 Py_DECREF(args);
433 flag_error(self);
Fred Drake24a0f412006-07-06 05:13:22 +0000434 XML_SetCharacterDataHandler(self->itself,
435 noop_character_data_handler);
Fred Drake2a3d7db2002-06-28 22:56:48 +0000436 return -1;
437 }
438 PyTuple_SET_ITEM(args, 0, temp);
439 /* temp is now a borrowed reference; consider it unused. */
440 self->in_callback = 1;
441 temp = call_with_frame(getcode(CharacterData, "CharacterData", __LINE__),
Fred Drake39689c52004-08-13 03:12:57 +0000442 self->handlers[CharacterData], args, self);
Fred Drake2a3d7db2002-06-28 22:56:48 +0000443 /* temp is an owned reference again, or NULL */
444 self->in_callback = 0;
445 Py_DECREF(args);
446 if (temp == NULL) {
447 flag_error(self);
Fred Drake24a0f412006-07-06 05:13:22 +0000448 XML_SetCharacterDataHandler(self->itself,
449 noop_character_data_handler);
Fred Drake2a3d7db2002-06-28 22:56:48 +0000450 return -1;
451 }
452 Py_DECREF(temp);
453 return 0;
454}
455
456static int
457flush_character_buffer(xmlparseobject *self)
458{
459 int rc;
460 if (self->buffer == NULL || self->buffer_used == 0)
461 return 0;
462 rc = call_character_handler(self, self->buffer, self->buffer_used);
463 self->buffer_used = 0;
464 return rc;
465}
466
467static void
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000468my_CharacterDataHandler(void *userData, const XML_Char *data, int len)
Fred Drake2a3d7db2002-06-28 22:56:48 +0000469{
470 xmlparseobject *self = (xmlparseobject *) userData;
471 if (self->buffer == NULL)
472 call_character_handler(self, data, len);
473 else {
474 if ((self->buffer_used + len) > self->buffer_size) {
475 if (flush_character_buffer(self) < 0)
476 return;
477 /* handler might have changed; drop the rest on the floor
478 * if there isn't a handler anymore
479 */
480 if (!have_handler(self, CharacterData))
481 return;
482 }
483 if (len > self->buffer_size) {
484 call_character_handler(self, data, len);
485 self->buffer_used = 0;
486 }
487 else {
488 memcpy(self->buffer + self->buffer_used,
489 data, len * sizeof(XML_Char));
490 self->buffer_used += len;
491 }
492 }
493}
494
Fred Drake85d835f2001-02-08 15:39:08 +0000495static void
496my_StartElementHandler(void *userData,
Fred Drake71b63ff2002-06-28 22:29:01 +0000497 const XML_Char *name, const XML_Char *atts[])
Fred Drake85d835f2001-02-08 15:39:08 +0000498{
499 xmlparseobject *self = (xmlparseobject *)userData;
500
Fred Drake71b63ff2002-06-28 22:29:01 +0000501 if (have_handler(self, StartElement)) {
Fred Drake85d835f2001-02-08 15:39:08 +0000502 PyObject *container, *rv, *args;
503 int i, max;
504
Fred Drake2a3d7db2002-06-28 22:56:48 +0000505 if (flush_character_buffer(self) < 0)
506 return;
Fred Drake85d835f2001-02-08 15:39:08 +0000507 /* Set max to the number of slots filled in atts[]; max/2 is
508 * the number of attributes we need to process.
509 */
510 if (self->specified_attributes) {
511 max = XML_GetSpecifiedAttributeCount(self->itself);
512 }
513 else {
514 max = 0;
515 while (atts[max] != NULL)
516 max += 2;
517 }
518 /* Build the container. */
519 if (self->ordered_attributes)
520 container = PyList_New(max);
521 else
522 container = PyDict_New();
523 if (container == NULL) {
524 flag_error(self);
525 return;
526 }
527 for (i = 0; i < max; i += 2) {
Fred Drakeb91a36b2002-06-27 19:40:48 +0000528 PyObject *n = string_intern(self, (XML_Char *) atts[i]);
Fred Drake85d835f2001-02-08 15:39:08 +0000529 PyObject *v;
530 if (n == NULL) {
531 flag_error(self);
532 Py_DECREF(container);
533 return;
534 }
535 v = STRING_CONV_FUNC((XML_Char *) atts[i+1]);
536 if (v == NULL) {
537 flag_error(self);
538 Py_DECREF(container);
539 Py_DECREF(n);
540 return;
541 }
542 if (self->ordered_attributes) {
543 PyList_SET_ITEM(container, i, n);
544 PyList_SET_ITEM(container, i+1, v);
545 }
546 else if (PyDict_SetItem(container, n, v)) {
547 flag_error(self);
548 Py_DECREF(n);
549 Py_DECREF(v);
550 return;
551 }
552 else {
553 Py_DECREF(n);
554 Py_DECREF(v);
555 }
556 }
Neal Norwitz484d9a42005-09-30 04:46:49 +0000557 args = string_intern(self, name);
558 if (args != NULL)
559 args = Py_BuildValue("(NN)", args, container);
Fred Drake85d835f2001-02-08 15:39:08 +0000560 if (args == NULL) {
561 Py_DECREF(container);
562 return;
563 }
564 /* Container is now a borrowed reference; ignore it. */
Fred Drakebd6101c2001-02-14 18:29:45 +0000565 self->in_callback = 1;
566 rv = call_with_frame(getcode(StartElement, "StartElement", __LINE__),
Fred Drake39689c52004-08-13 03:12:57 +0000567 self->handlers[StartElement], args, self);
Fred Drakebd6101c2001-02-14 18:29:45 +0000568 self->in_callback = 0;
569 Py_DECREF(args);
Fred Drake85d835f2001-02-08 15:39:08 +0000570 if (rv == NULL) {
571 flag_error(self);
572 return;
Fred Drakebd6101c2001-02-14 18:29:45 +0000573 }
Fred Drake85d835f2001-02-08 15:39:08 +0000574 Py_DECREF(rv);
575 }
576}
577
578#define RC_HANDLER(RC, NAME, PARAMS, INIT, PARAM_FORMAT, CONVERSION, \
579 RETURN, GETUSERDATA) \
580static RC \
581my_##NAME##Handler PARAMS {\
582 xmlparseobject *self = GETUSERDATA ; \
583 PyObject *args = NULL; \
584 PyObject *rv = NULL; \
585 INIT \
586\
Fred Drake71b63ff2002-06-28 22:29:01 +0000587 if (have_handler(self, NAME)) { \
Fred Drake2a3d7db2002-06-28 22:56:48 +0000588 if (flush_character_buffer(self) < 0) \
589 return RETURN; \
Fred Drake85d835f2001-02-08 15:39:08 +0000590 args = Py_BuildValue PARAM_FORMAT ;\
Martin v. Löwis1d7c55f2001-11-10 13:57:55 +0000591 if (!args) { flag_error(self); return RETURN;} \
Fred Drakebd6101c2001-02-14 18:29:45 +0000592 self->in_callback = 1; \
Fred Drake85d835f2001-02-08 15:39:08 +0000593 rv = call_with_frame(getcode(NAME,#NAME,__LINE__), \
Fred Drake39689c52004-08-13 03:12:57 +0000594 self->handlers[NAME], args, self); \
Fred Drakebd6101c2001-02-14 18:29:45 +0000595 self->in_callback = 0; \
Fred Drake85d835f2001-02-08 15:39:08 +0000596 Py_DECREF(args); \
597 if (rv == NULL) { \
598 flag_error(self); \
599 return RETURN; \
600 } \
601 CONVERSION \
602 Py_DECREF(rv); \
603 } \
604 return RETURN; \
605}
606
Fred Drake6f987622000-08-25 18:03:30 +0000607#define VOID_HANDLER(NAME, PARAMS, PARAM_FORMAT) \
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000608 RC_HANDLER(void, NAME, PARAMS, ;, PARAM_FORMAT, ;, ;,\
609 (xmlparseobject *)userData)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000610
Fred Drake6f987622000-08-25 18:03:30 +0000611#define INT_HANDLER(NAME, PARAMS, PARAM_FORMAT)\
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000612 RC_HANDLER(int, NAME, PARAMS, int rc=0;, PARAM_FORMAT, \
613 rc = PyInt_AsLong(rv);, rc, \
614 (xmlparseobject *)userData)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000615
Fred Drake71b63ff2002-06-28 22:29:01 +0000616VOID_HANDLER(EndElement,
617 (void *userData, const XML_Char *name),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000618 ("(N)", string_intern(self, name)))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000619
Fred Drake6f987622000-08-25 18:03:30 +0000620VOID_HANDLER(ProcessingInstruction,
Fred Drake71b63ff2002-06-28 22:29:01 +0000621 (void *userData,
622 const XML_Char *target,
Fred Drake85d835f2001-02-08 15:39:08 +0000623 const XML_Char *data),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000624 ("(NO&)", string_intern(self, target), STRING_CONV_FUNC,data))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000625
Fred Drake6f987622000-08-25 18:03:30 +0000626VOID_HANDLER(UnparsedEntityDecl,
Fred Drake71b63ff2002-06-28 22:29:01 +0000627 (void *userData,
Fred Drake85d835f2001-02-08 15:39:08 +0000628 const XML_Char *entityName,
629 const XML_Char *base,
630 const XML_Char *systemId,
631 const XML_Char *publicId,
632 const XML_Char *notationName),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000633 ("(NNNNN)",
Fred Drake71b63ff2002-06-28 22:29:01 +0000634 string_intern(self, entityName), string_intern(self, base),
635 string_intern(self, systemId), string_intern(self, publicId),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000636 string_intern(self, notationName)))
Fred Drake85d835f2001-02-08 15:39:08 +0000637
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000638#ifndef Py_USING_UNICODE
Fred Drake85d835f2001-02-08 15:39:08 +0000639VOID_HANDLER(EntityDecl,
640 (void *userData,
641 const XML_Char *entityName,
642 int is_parameter_entity,
643 const XML_Char *value,
644 int value_length,
645 const XML_Char *base,
646 const XML_Char *systemId,
647 const XML_Char *publicId,
648 const XML_Char *notationName),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000649 ("NiNNNNN",
650 string_intern(self, entityName), is_parameter_entity,
Fred Drake85d835f2001-02-08 15:39:08 +0000651 conv_string_len_to_utf8(value, value_length),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000652 string_intern(self, base), string_intern(self, systemId),
653 string_intern(self, publicId),
654 string_intern(self, notationName)))
Fred Drake85d835f2001-02-08 15:39:08 +0000655#else
656VOID_HANDLER(EntityDecl,
657 (void *userData,
658 const XML_Char *entityName,
659 int is_parameter_entity,
660 const XML_Char *value,
661 int value_length,
662 const XML_Char *base,
663 const XML_Char *systemId,
664 const XML_Char *publicId,
665 const XML_Char *notationName),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000666 ("NiNNNNN",
667 string_intern(self, entityName), is_parameter_entity,
Fred Drake71b63ff2002-06-28 22:29:01 +0000668 (self->returns_unicode
669 ? conv_string_len_to_unicode(value, value_length)
Fred Drake85d835f2001-02-08 15:39:08 +0000670 : conv_string_len_to_utf8(value, value_length)),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000671 string_intern(self, base), string_intern(self, systemId),
672 string_intern(self, publicId),
673 string_intern(self, notationName)))
Fred Drake85d835f2001-02-08 15:39:08 +0000674#endif
675
676VOID_HANDLER(XmlDecl,
677 (void *userData,
678 const XML_Char *version,
679 const XML_Char *encoding,
680 int standalone),
681 ("(O&O&i)",
Fred Drake71b63ff2002-06-28 22:29:01 +0000682 STRING_CONV_FUNC,version, STRING_CONV_FUNC,encoding,
Fred Drake85d835f2001-02-08 15:39:08 +0000683 standalone))
684
685static PyObject *
686conv_content_model(XML_Content * const model,
Fred Drakeb91a36b2002-06-27 19:40:48 +0000687 PyObject *(*conv_string)(const XML_Char *))
Fred Drake85d835f2001-02-08 15:39:08 +0000688{
689 PyObject *result = NULL;
690 PyObject *children = PyTuple_New(model->numchildren);
691 int i;
692
693 if (children != NULL) {
Tim Peters9544fc52001-07-28 09:36:36 +0000694 assert(model->numchildren < INT_MAX);
695 for (i = 0; i < (int)model->numchildren; ++i) {
Fred Drake85d835f2001-02-08 15:39:08 +0000696 PyObject *child = conv_content_model(&model->children[i],
697 conv_string);
698 if (child == NULL) {
699 Py_XDECREF(children);
700 return NULL;
701 }
702 PyTuple_SET_ITEM(children, i, child);
703 }
704 result = Py_BuildValue("(iiO&N)",
705 model->type, model->quant,
706 conv_string,model->name, children);
707 }
708 return result;
709}
710
Fred Drake06dd8cf2003-02-02 03:54:17 +0000711static void
712my_ElementDeclHandler(void *userData,
713 const XML_Char *name,
714 XML_Content *model)
Fred Drake85d835f2001-02-08 15:39:08 +0000715{
Fred Drake06dd8cf2003-02-02 03:54:17 +0000716 xmlparseobject *self = (xmlparseobject *)userData;
717 PyObject *args = NULL;
Fred Drake85d835f2001-02-08 15:39:08 +0000718
Fred Drake06dd8cf2003-02-02 03:54:17 +0000719 if (have_handler(self, ElementDecl)) {
720 PyObject *rv = NULL;
721 PyObject *modelobj, *nameobj;
722
723 if (flush_character_buffer(self) < 0)
724 goto finally;
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000725#ifdef Py_USING_UNICODE
Fred Drake06dd8cf2003-02-02 03:54:17 +0000726 modelobj = conv_content_model(model,
727 (self->returns_unicode
728 ? conv_string_to_unicode
729 : conv_string_to_utf8));
Fred Drake85d835f2001-02-08 15:39:08 +0000730#else
Fred Drake06dd8cf2003-02-02 03:54:17 +0000731 modelobj = conv_content_model(model, conv_string_to_utf8);
Fred Drake85d835f2001-02-08 15:39:08 +0000732#endif
Fred Drake06dd8cf2003-02-02 03:54:17 +0000733 if (modelobj == NULL) {
734 flag_error(self);
735 goto finally;
736 }
737 nameobj = string_intern(self, name);
738 if (nameobj == NULL) {
739 Py_DECREF(modelobj);
740 flag_error(self);
741 goto finally;
742 }
Michael W. Hudson0bb84542004-08-03 11:31:31 +0000743 args = Py_BuildValue("NN", nameobj, modelobj);
Fred Drake06dd8cf2003-02-02 03:54:17 +0000744 if (args == NULL) {
745 Py_DECREF(modelobj);
746 flag_error(self);
747 goto finally;
748 }
749 self->in_callback = 1;
750 rv = call_with_frame(getcode(ElementDecl, "ElementDecl", __LINE__),
Fred Drake39689c52004-08-13 03:12:57 +0000751 self->handlers[ElementDecl], args, self);
Fred Drake06dd8cf2003-02-02 03:54:17 +0000752 self->in_callback = 0;
753 if (rv == NULL) {
754 flag_error(self);
755 goto finally;
756 }
757 Py_DECREF(rv);
758 }
759 finally:
760 Py_XDECREF(args);
761 XML_FreeContentModel(self->itself, model);
762 return;
763}
Fred Drake85d835f2001-02-08 15:39:08 +0000764
765VOID_HANDLER(AttlistDecl,
766 (void *userData,
767 const XML_Char *elname,
768 const XML_Char *attname,
769 const XML_Char *att_type,
770 const XML_Char *dflt,
771 int isrequired),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000772 ("(NNO&O&i)",
773 string_intern(self, elname), string_intern(self, attname),
Fred Drake85d835f2001-02-08 15:39:08 +0000774 STRING_CONV_FUNC,att_type, STRING_CONV_FUNC,dflt,
775 isrequired))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000776
Martin v. Löwisc847f402003-01-21 11:09:21 +0000777#if XML_COMBINED_VERSION >= 19504
Martin v. Löwis069dde22003-01-21 10:58:18 +0000778VOID_HANDLER(SkippedEntity,
779 (void *userData,
780 const XML_Char *entityName,
781 int is_parameter_entity),
782 ("Ni",
783 string_intern(self, entityName), is_parameter_entity))
Martin v. Löwisc847f402003-01-21 11:09:21 +0000784#endif
Martin v. Löwis069dde22003-01-21 10:58:18 +0000785
Fred Drake71b63ff2002-06-28 22:29:01 +0000786VOID_HANDLER(NotationDecl,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000787 (void *userData,
788 const XML_Char *notationName,
789 const XML_Char *base,
790 const XML_Char *systemId,
791 const XML_Char *publicId),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000792 ("(NNNN)",
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000793 string_intern(self, notationName), string_intern(self, base),
794 string_intern(self, systemId), string_intern(self, publicId)))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000795
Fred Drake6f987622000-08-25 18:03:30 +0000796VOID_HANDLER(StartNamespaceDecl,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000797 (void *userData,
798 const XML_Char *prefix,
799 const XML_Char *uri),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000800 ("(NN)",
801 string_intern(self, prefix), string_intern(self, uri)))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000802
Fred Drake6f987622000-08-25 18:03:30 +0000803VOID_HANDLER(EndNamespaceDecl,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000804 (void *userData,
805 const XML_Char *prefix),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000806 ("(N)", string_intern(self, prefix)))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000807
Fred Drake6f987622000-08-25 18:03:30 +0000808VOID_HANDLER(Comment,
Fred Drakeb91a36b2002-06-27 19:40:48 +0000809 (void *userData, const XML_Char *data),
810 ("(O&)", STRING_CONV_FUNC,data))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000811
Fred Drake6f987622000-08-25 18:03:30 +0000812VOID_HANDLER(StartCdataSection,
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000813 (void *userData),
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000814 ("()"))
Fred Drake71b63ff2002-06-28 22:29:01 +0000815
Fred Drake6f987622000-08-25 18:03:30 +0000816VOID_HANDLER(EndCdataSection,
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000817 (void *userData),
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000818 ("()"))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000819
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000820#ifndef Py_USING_UNICODE
Fred Drake6f987622000-08-25 18:03:30 +0000821VOID_HANDLER(Default,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000822 (void *userData, const XML_Char *s, int len),
823 ("(N)", conv_string_len_to_utf8(s,len)))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000824
Fred Drake6f987622000-08-25 18:03:30 +0000825VOID_HANDLER(DefaultHandlerExpand,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000826 (void *userData, const XML_Char *s, int len),
827 ("(N)", conv_string_len_to_utf8(s,len)))
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000828#else
Fred Drake6f987622000-08-25 18:03:30 +0000829VOID_HANDLER(Default,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000830 (void *userData, const XML_Char *s, int len),
831 ("(N)", (self->returns_unicode
832 ? conv_string_len_to_unicode(s,len)
833 : conv_string_len_to_utf8(s,len))))
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000834
Fred Drake6f987622000-08-25 18:03:30 +0000835VOID_HANDLER(DefaultHandlerExpand,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000836 (void *userData, const XML_Char *s, int len),
837 ("(N)", (self->returns_unicode
838 ? conv_string_len_to_unicode(s,len)
839 : conv_string_len_to_utf8(s,len))))
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000840#endif
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000841
Fred Drake71b63ff2002-06-28 22:29:01 +0000842INT_HANDLER(NotStandalone,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000843 (void *userData),
844 ("()"))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000845
Fred Drake6f987622000-08-25 18:03:30 +0000846RC_HANDLER(int, ExternalEntityRef,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000847 (XML_Parser parser,
848 const XML_Char *context,
849 const XML_Char *base,
850 const XML_Char *systemId,
851 const XML_Char *publicId),
852 int rc=0;,
Fred Drakeb91a36b2002-06-27 19:40:48 +0000853 ("(O&NNN)",
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000854 STRING_CONV_FUNC,context, string_intern(self, base),
855 string_intern(self, systemId), string_intern(self, publicId)),
856 rc = PyInt_AsLong(rv);, rc,
857 XML_GetUserData(parser))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000858
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000859/* XXX UnknownEncodingHandler */
860
Fred Drake85d835f2001-02-08 15:39:08 +0000861VOID_HANDLER(StartDoctypeDecl,
862 (void *userData, const XML_Char *doctypeName,
863 const XML_Char *sysid, const XML_Char *pubid,
864 int has_internal_subset),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000865 ("(NNNi)", string_intern(self, doctypeName),
866 string_intern(self, sysid), string_intern(self, pubid),
Fred Drake85d835f2001-02-08 15:39:08 +0000867 has_internal_subset))
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000868
869VOID_HANDLER(EndDoctypeDecl, (void *userData), ("()"))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000870
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000871/* ---------------------------------------------------------------- */
872
Fred Drake71b63ff2002-06-28 22:29:01 +0000873static PyObject *
874get_parse_result(xmlparseobject *self, int rv)
875{
876 if (PyErr_Occurred()) {
877 return NULL;
878 }
879 if (rv == 0) {
Martin v. Löwis069dde22003-01-21 10:58:18 +0000880 return set_error(self, XML_GetErrorCode(self->itself));
Fred Drake71b63ff2002-06-28 22:29:01 +0000881 }
Fred Drake2a3d7db2002-06-28 22:56:48 +0000882 if (flush_character_buffer(self) < 0) {
883 return NULL;
884 }
Fred Drake71b63ff2002-06-28 22:29:01 +0000885 return PyInt_FromLong(rv);
886}
887
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000888PyDoc_STRVAR(xmlparse_Parse__doc__,
Thomas Wouters35317302000-07-22 16:34:15 +0000889"Parse(data[, isfinal])\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000890Parse XML data. `isfinal' should be true at end of input.");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000891
892static PyObject *
Fred Drake0582df92000-07-12 04:49:00 +0000893xmlparse_Parse(xmlparseobject *self, PyObject *args)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000894{
Fred Drake0582df92000-07-12 04:49:00 +0000895 char *s;
896 int slen;
897 int isFinal = 0;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000898
Fred Drake0582df92000-07-12 04:49:00 +0000899 if (!PyArg_ParseTuple(args, "s#|i:Parse", &s, &slen, &isFinal))
900 return NULL;
Fred Drake71b63ff2002-06-28 22:29:01 +0000901
902 return get_parse_result(self, XML_Parse(self->itself, s, slen, isFinal));
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000903}
904
Fred Drakeca1f4262000-09-21 20:10:23 +0000905/* File reading copied from cPickle */
906
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000907#define BUF_SIZE 2048
908
Fred Drake0582df92000-07-12 04:49:00 +0000909static int
910readinst(char *buf, int buf_size, PyObject *meth)
911{
912 PyObject *arg = NULL;
913 PyObject *bytes = NULL;
914 PyObject *str = NULL;
915 int len = -1;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000916
Fred Drake676940b2000-09-22 15:21:31 +0000917 if ((bytes = PyInt_FromLong(buf_size)) == NULL)
Fred Drake0582df92000-07-12 04:49:00 +0000918 goto finally;
Fred Drake676940b2000-09-22 15:21:31 +0000919
Fred Drake7b6caff2003-07-21 17:05:56 +0000920 if ((arg = PyTuple_New(1)) == NULL) {
921 Py_DECREF(bytes);
Fred Drake0582df92000-07-12 04:49:00 +0000922 goto finally;
Fred Drake7b6caff2003-07-21 17:05:56 +0000923 }
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000924
Tim Peters954eef72000-09-22 06:01:11 +0000925 PyTuple_SET_ITEM(arg, 0, bytes);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000926
Martin v. Löwis9171f022004-10-13 19:50:11 +0000927#if PY_VERSION_HEX < 0x02020000
928 str = PyObject_CallObject(meth, arg);
929#else
930 str = PyObject_Call(meth, arg, NULL);
931#endif
932 if (str == NULL)
Fred Drake0582df92000-07-12 04:49:00 +0000933 goto finally;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000934
Fred Drake0582df92000-07-12 04:49:00 +0000935 /* XXX what to do if it returns a Unicode string? */
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000936 if (!PyString_Check(str)) {
Fred Drake71b63ff2002-06-28 22:29:01 +0000937 PyErr_Format(PyExc_TypeError,
Fred Drake0582df92000-07-12 04:49:00 +0000938 "read() did not return a string object (type=%.400s)",
Christian Heimese93237d2007-12-19 02:37:44 +0000939 Py_TYPE(str)->tp_name);
Fred Drake0582df92000-07-12 04:49:00 +0000940 goto finally;
941 }
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000942 len = PyString_GET_SIZE(str);
Fred Drake0582df92000-07-12 04:49:00 +0000943 if (len > buf_size) {
944 PyErr_Format(PyExc_ValueError,
945 "read() returned too much data: "
946 "%i bytes requested, %i returned",
947 buf_size, len);
Fred Drake0582df92000-07-12 04:49:00 +0000948 goto finally;
949 }
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000950 memcpy(buf, PyString_AsString(str), len);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000951finally:
Fred Drake0582df92000-07-12 04:49:00 +0000952 Py_XDECREF(arg);
Fred Drakeca1f4262000-09-21 20:10:23 +0000953 Py_XDECREF(str);
Fred Drake0582df92000-07-12 04:49:00 +0000954 return len;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000955}
956
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000957PyDoc_STRVAR(xmlparse_ParseFile__doc__,
Thomas Wouters35317302000-07-22 16:34:15 +0000958"ParseFile(file)\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000959Parse XML data from file-like object.");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000960
961static PyObject *
Georg Brandl96a8c392006-05-29 21:04:52 +0000962xmlparse_ParseFile(xmlparseobject *self, PyObject *f)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000963{
Fred Drake0582df92000-07-12 04:49:00 +0000964 int rv = 1;
Fred Drake0582df92000-07-12 04:49:00 +0000965 PyObject *readmethod = NULL;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000966
Ezio Melotti8b4367e2011-04-11 03:44:28 +0300967 readmethod = PyObject_GetAttrString(f, "read");
968 if (readmethod == NULL) {
Ezio Melotti8b4367e2011-04-11 03:44:28 +0300969 PyErr_SetString(PyExc_TypeError,
970 "argument must have 'read' attribute");
971 return NULL;
972
Fred Drake0582df92000-07-12 04:49:00 +0000973 }
974 for (;;) {
975 int bytes_read;
976 void *buf = XML_GetBuffer(self->itself, BUF_SIZE);
Fred Drake7b6caff2003-07-21 17:05:56 +0000977 if (buf == NULL) {
Fred Drakef239c6d2003-07-21 17:22:43 +0000978 Py_XDECREF(readmethod);
Ned Deilyb693e9f2014-03-27 16:38:32 -0700979 return get_parse_result(self, 0);
Fred Drake7b6caff2003-07-21 17:05:56 +0000980 }
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000981
Ezio Melotti8b4367e2011-04-11 03:44:28 +0300982 bytes_read = readinst(buf, BUF_SIZE, readmethod);
983 if (bytes_read < 0) {
984 Py_XDECREF(readmethod);
985 return NULL;
Fred Drake0582df92000-07-12 04:49:00 +0000986 }
Ezio Melotti8b4367e2011-04-11 03:44:28 +0300987
Fred Drake0582df92000-07-12 04:49:00 +0000988 rv = XML_ParseBuffer(self->itself, bytes_read, bytes_read == 0);
Fred Drake7b6caff2003-07-21 17:05:56 +0000989 if (PyErr_Occurred()) {
990 Py_XDECREF(readmethod);
Fred Drake0582df92000-07-12 04:49:00 +0000991 return NULL;
Fred Drake7b6caff2003-07-21 17:05:56 +0000992 }
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000993
Fred Drake0582df92000-07-12 04:49:00 +0000994 if (!rv || bytes_read == 0)
995 break;
996 }
Fred Drake7b6caff2003-07-21 17:05:56 +0000997 Py_XDECREF(readmethod);
Fred Drake71b63ff2002-06-28 22:29:01 +0000998 return get_parse_result(self, rv);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000999}
1000
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001001PyDoc_STRVAR(xmlparse_SetBase__doc__,
Thomas Wouters35317302000-07-22 16:34:15 +00001002"SetBase(base_url)\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001003Set the base URL for the parser.");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001004
1005static PyObject *
Fred Drake0582df92000-07-12 04:49:00 +00001006xmlparse_SetBase(xmlparseobject *self, PyObject *args)
1007{
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001008 char *base;
1009
Fred Drake0582df92000-07-12 04:49:00 +00001010 if (!PyArg_ParseTuple(args, "s:SetBase", &base))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001011 return NULL;
Fred Drake0582df92000-07-12 04:49:00 +00001012 if (!XML_SetBase(self->itself, base)) {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001013 return PyErr_NoMemory();
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001014 }
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001015 Py_INCREF(Py_None);
1016 return Py_None;
1017}
1018
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001019PyDoc_STRVAR(xmlparse_GetBase__doc__,
Thomas Wouters35317302000-07-22 16:34:15 +00001020"GetBase() -> url\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001021Return base URL string for the parser.");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001022
1023static PyObject *
Georg Brandl96a8c392006-05-29 21:04:52 +00001024xmlparse_GetBase(xmlparseobject *self, PyObject *unused)
Fred Drake0582df92000-07-12 04:49:00 +00001025{
Fred Drake0582df92000-07-12 04:49:00 +00001026 return Py_BuildValue("z", XML_GetBase(self->itself));
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001027}
1028
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001029PyDoc_STRVAR(xmlparse_GetInputContext__doc__,
Fred Drakebd6101c2001-02-14 18:29:45 +00001030"GetInputContext() -> string\n\
1031Return the untranslated text of the input that caused the current event.\n\
1032If the event was generated by a large amount of text (such as a start tag\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001033for an element with many attributes), not all of the text may be available.");
Fred Drakebd6101c2001-02-14 18:29:45 +00001034
1035static PyObject *
Georg Brandl96a8c392006-05-29 21:04:52 +00001036xmlparse_GetInputContext(xmlparseobject *self, PyObject *unused)
Fred Drakebd6101c2001-02-14 18:29:45 +00001037{
Georg Brandl96a8c392006-05-29 21:04:52 +00001038 if (self->in_callback) {
1039 int offset, size;
1040 const char *buffer
1041 = XML_GetInputContext(self->itself, &offset, &size);
Fred Drakebd6101c2001-02-14 18:29:45 +00001042
Georg Brandl96a8c392006-05-29 21:04:52 +00001043 if (buffer != NULL)
Gregory P. Smithdd96db62008-06-09 04:58:54 +00001044 return PyString_FromStringAndSize(buffer + offset,
Georg Brandl96a8c392006-05-29 21:04:52 +00001045 size - offset);
1046 else
1047 Py_RETURN_NONE;
Fred Drakebd6101c2001-02-14 18:29:45 +00001048 }
Georg Brandl96a8c392006-05-29 21:04:52 +00001049 else
1050 Py_RETURN_NONE;
Fred Drakebd6101c2001-02-14 18:29:45 +00001051}
Fred Drakebd6101c2001-02-14 18:29:45 +00001052
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001053PyDoc_STRVAR(xmlparse_ExternalEntityParserCreate__doc__,
Fred Drake2d4ac202001-01-03 15:36:25 +00001054"ExternalEntityParserCreate(context[, encoding])\n\
Tim Peters51dc9682000-09-24 22:12:45 +00001055Create a parser for parsing an external entity based on the\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001056information passed to the ExternalEntityRefHandler.");
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001057
1058static PyObject *
1059xmlparse_ExternalEntityParserCreate(xmlparseobject *self, PyObject *args)
1060{
1061 char *context;
1062 char *encoding = NULL;
1063 xmlparseobject *new_parser;
1064 int i;
1065
Martin v. Löwisc57428d2001-09-19 09:55:09 +00001066 if (!PyArg_ParseTuple(args, "z|s:ExternalEntityParserCreate",
Fred Drakecde79132001-04-25 16:01:30 +00001067 &context, &encoding)) {
1068 return NULL;
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001069 }
1070
Martin v. Löwis894258c2001-09-23 10:20:10 +00001071#ifndef Py_TPFLAGS_HAVE_GC
Martin v. Löwisb4fcf4d2002-06-30 06:40:55 +00001072 /* Python versions 2.0 and 2.1 */
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001073 new_parser = PyObject_New(xmlparseobject, &Xmlparsetype);
Martin v. Löwis894258c2001-09-23 10:20:10 +00001074#else
1075 /* Python versions 2.2 and later */
1076 new_parser = PyObject_GC_New(xmlparseobject, &Xmlparsetype);
1077#endif
Fred Drake85d835f2001-02-08 15:39:08 +00001078
1079 if (new_parser == NULL)
1080 return NULL;
Fred Drake2a3d7db2002-06-28 22:56:48 +00001081 new_parser->buffer_size = self->buffer_size;
1082 new_parser->buffer_used = 0;
1083 if (self->buffer != NULL) {
1084 new_parser->buffer = malloc(new_parser->buffer_size);
1085 if (new_parser->buffer == NULL) {
Fred Drakeb28467b2002-07-02 15:44:36 +00001086#ifndef Py_TPFLAGS_HAVE_GC
1087 /* Code for versions 2.0 and 2.1 */
1088 PyObject_Del(new_parser);
1089#else
1090 /* Code for versions 2.2 and later. */
Fred Drake2a3d7db2002-06-28 22:56:48 +00001091 PyObject_GC_Del(new_parser);
Fred Drakeb28467b2002-07-02 15:44:36 +00001092#endif
Fred Drake2a3d7db2002-06-28 22:56:48 +00001093 return PyErr_NoMemory();
1094 }
1095 }
1096 else
1097 new_parser->buffer = NULL;
Fred Drake85d835f2001-02-08 15:39:08 +00001098 new_parser->returns_unicode = self->returns_unicode;
1099 new_parser->ordered_attributes = self->ordered_attributes;
1100 new_parser->specified_attributes = self->specified_attributes;
Fred Drakebd6101c2001-02-14 18:29:45 +00001101 new_parser->in_callback = 0;
Martin v. Löwis069dde22003-01-21 10:58:18 +00001102 new_parser->ns_prefixes = self->ns_prefixes;
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001103 new_parser->itself = XML_ExternalEntityParserCreate(self->itself, context,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001104 encoding);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001105 new_parser->handlers = 0;
Fred Drakeb91a36b2002-06-27 19:40:48 +00001106 new_parser->intern = self->intern;
1107 Py_XINCREF(new_parser->intern);
Martin v. Löwis894258c2001-09-23 10:20:10 +00001108#ifdef Py_TPFLAGS_HAVE_GC
1109 PyObject_GC_Track(new_parser);
1110#else
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001111 PyObject_GC_Init(new_parser);
Martin v. Löwis894258c2001-09-23 10:20:10 +00001112#endif
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001113
1114 if (!new_parser->itself) {
Fred Drake85d835f2001-02-08 15:39:08 +00001115 Py_DECREF(new_parser);
1116 return PyErr_NoMemory();
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001117 }
1118
1119 XML_SetUserData(new_parser->itself, (void *)new_parser);
1120
1121 /* allocate and clear handlers first */
Fred Drake2a3d7db2002-06-28 22:56:48 +00001122 for (i = 0; handler_info[i].name != NULL; i++)
Fred Drake85d835f2001-02-08 15:39:08 +00001123 /* do nothing */;
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001124
Fred Drake2a3d7db2002-06-28 22:56:48 +00001125 new_parser->handlers = malloc(sizeof(PyObject *) * i);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001126 if (!new_parser->handlers) {
Fred Drake85d835f2001-02-08 15:39:08 +00001127 Py_DECREF(new_parser);
1128 return PyErr_NoMemory();
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001129 }
Martin v. Löwis5b68ce32001-10-21 08:53:52 +00001130 clear_handlers(new_parser, 1);
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001131
1132 /* then copy handlers from self */
1133 for (i = 0; handler_info[i].name != NULL; i++) {
Fred Drake71b63ff2002-06-28 22:29:01 +00001134 PyObject *handler = self->handlers[i];
1135 if (handler != NULL) {
1136 Py_INCREF(handler);
1137 new_parser->handlers[i] = handler;
1138 handler_info[i].setter(new_parser->itself,
Fred Drake85d835f2001-02-08 15:39:08 +00001139 handler_info[i].handler);
1140 }
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001141 }
Fred Drake71b63ff2002-06-28 22:29:01 +00001142 return (PyObject *)new_parser;
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001143}
1144
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001145PyDoc_STRVAR(xmlparse_SetParamEntityParsing__doc__,
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001146"SetParamEntityParsing(flag) -> success\n\
1147Controls parsing of parameter entities (including the external DTD\n\
1148subset). Possible flag values are XML_PARAM_ENTITY_PARSING_NEVER,\n\
1149XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE and\n\
1150XML_PARAM_ENTITY_PARSING_ALWAYS. Returns true if setting the flag\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001151was successful.");
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001152
1153static PyObject*
Fred Drakebd6101c2001-02-14 18:29:45 +00001154xmlparse_SetParamEntityParsing(xmlparseobject *p, PyObject* args)
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001155{
Fred Drake85d835f2001-02-08 15:39:08 +00001156 int flag;
1157 if (!PyArg_ParseTuple(args, "i", &flag))
1158 return NULL;
Fred Drakebd6101c2001-02-14 18:29:45 +00001159 flag = XML_SetParamEntityParsing(p->itself, flag);
Fred Drake85d835f2001-02-08 15:39:08 +00001160 return PyInt_FromLong(flag);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001161}
1162
Martin v. Löwisc847f402003-01-21 11:09:21 +00001163
1164#if XML_COMBINED_VERSION >= 19505
Martin v. Löwis069dde22003-01-21 10:58:18 +00001165PyDoc_STRVAR(xmlparse_UseForeignDTD__doc__,
1166"UseForeignDTD([flag])\n\
1167Allows the application to provide an artificial external subset if one is\n\
1168not specified as part of the document instance. This readily allows the\n\
1169use of a 'default' document type controlled by the application, while still\n\
1170getting the advantage of providing document type information to the parser.\n\
1171'flag' defaults to True if not provided.");
1172
1173static PyObject *
1174xmlparse_UseForeignDTD(xmlparseobject *self, PyObject *args)
1175{
1176 PyObject *flagobj = NULL;
Antoine Pitrouc5bef752012-08-15 23:16:51 +02001177 int flag = 1;
Martin v. Löwis069dde22003-01-21 10:58:18 +00001178 enum XML_Error rc;
Christian Heimes8112ea22012-09-24 13:17:08 +02001179 if (!PyArg_ParseTuple(args, "|O:UseForeignDTD", &flagobj))
Martin v. Löwis069dde22003-01-21 10:58:18 +00001180 return NULL;
Antoine Pitrouc5bef752012-08-15 23:16:51 +02001181 if (flagobj != NULL) {
1182 flag = PyObject_IsTrue(flagobj);
1183 if (flag < 0)
1184 return NULL;
1185 }
1186 rc = XML_UseForeignDTD(self->itself, flag ? XML_TRUE : XML_FALSE);
Martin v. Löwis069dde22003-01-21 10:58:18 +00001187 if (rc != XML_ERROR_NONE) {
1188 return set_error(self, rc);
1189 }
1190 Py_INCREF(Py_None);
1191 return Py_None;
1192}
Martin v. Löwisc847f402003-01-21 11:09:21 +00001193#endif
Martin v. Löwis069dde22003-01-21 10:58:18 +00001194
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001195static struct PyMethodDef xmlparse_methods[] = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001196 {"Parse", (PyCFunction)xmlparse_Parse,
1197 METH_VARARGS, xmlparse_Parse__doc__},
Fred Drake0582df92000-07-12 04:49:00 +00001198 {"ParseFile", (PyCFunction)xmlparse_ParseFile,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001199 METH_O, xmlparse_ParseFile__doc__},
Fred Drake0582df92000-07-12 04:49:00 +00001200 {"SetBase", (PyCFunction)xmlparse_SetBase,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001201 METH_VARARGS, xmlparse_SetBase__doc__},
Fred Drake0582df92000-07-12 04:49:00 +00001202 {"GetBase", (PyCFunction)xmlparse_GetBase,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001203 METH_NOARGS, xmlparse_GetBase__doc__},
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001204 {"ExternalEntityParserCreate", (PyCFunction)xmlparse_ExternalEntityParserCreate,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001205 METH_VARARGS, xmlparse_ExternalEntityParserCreate__doc__},
Fred Drakebd6101c2001-02-14 18:29:45 +00001206 {"SetParamEntityParsing", (PyCFunction)xmlparse_SetParamEntityParsing,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001207 METH_VARARGS, xmlparse_SetParamEntityParsing__doc__},
Fred Drakebd6101c2001-02-14 18:29:45 +00001208 {"GetInputContext", (PyCFunction)xmlparse_GetInputContext,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001209 METH_NOARGS, xmlparse_GetInputContext__doc__},
Martin v. Löwisc847f402003-01-21 11:09:21 +00001210#if XML_COMBINED_VERSION >= 19505
Martin v. Löwis069dde22003-01-21 10:58:18 +00001211 {"UseForeignDTD", (PyCFunction)xmlparse_UseForeignDTD,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001212 METH_VARARGS, xmlparse_UseForeignDTD__doc__},
Martin v. Löwisc847f402003-01-21 11:09:21 +00001213#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001214 {NULL, NULL} /* sentinel */
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001215};
1216
1217/* ---------- */
1218
1219
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001220#ifdef Py_USING_UNICODE
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001221
Fred Drake71b63ff2002-06-28 22:29:01 +00001222/* pyexpat international encoding support.
1223 Make it as simple as possible.
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001224*/
1225
Martin v. Löwis3af7cc02001-01-22 08:19:10 +00001226static char template_buffer[257];
Fred Drakebb66a202001-03-01 20:48:17 +00001227PyObject *template_string = NULL;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001228
Fred Drake71b63ff2002-06-28 22:29:01 +00001229static void
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001230init_template_buffer(void)
1231{
1232 int i;
Fred Drakebb66a202001-03-01 20:48:17 +00001233 for (i = 0; i < 256; i++) {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001234 template_buffer[i] = i;
Tim Peters63cb99e2001-02-17 18:12:50 +00001235 }
Fred Drakebb66a202001-03-01 20:48:17 +00001236 template_buffer[256] = 0;
Tim Peters63cb99e2001-02-17 18:12:50 +00001237}
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001238
Fred Drake71b63ff2002-06-28 22:29:01 +00001239static int
1240PyUnknownEncodingHandler(void *encodingHandlerData,
1241 const XML_Char *name,
1242 XML_Encoding *info)
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001243{
Fred Drakebb66a202001-03-01 20:48:17 +00001244 PyUnicodeObject *_u_string = NULL;
1245 int result = 0;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001246 int i;
Fred Drake71b63ff2002-06-28 22:29:01 +00001247
Fred Drakebb66a202001-03-01 20:48:17 +00001248 /* Yes, supports only 8bit encodings */
1249 _u_string = (PyUnicodeObject *)
1250 PyUnicode_Decode(template_buffer, 256, name, "replace");
Fred Drake71b63ff2002-06-28 22:29:01 +00001251
Fred Drakebb66a202001-03-01 20:48:17 +00001252 if (_u_string == NULL)
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001253 return result;
Fred Drake71b63ff2002-06-28 22:29:01 +00001254
Eli Benderskyb6717012013-08-04 06:09:49 -07001255 if (PyUnicode_GET_SIZE(_u_string) != 256) {
1256 Py_DECREF(_u_string);
1257 PyErr_SetString(PyExc_ValueError,
1258 "multi-byte encodings are not supported");
1259 return result;
1260 }
1261
Fred Drakebb66a202001-03-01 20:48:17 +00001262 for (i = 0; i < 256; i++) {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001263 /* Stupid to access directly, but fast */
1264 Py_UNICODE c = _u_string->str[i];
1265 if (c == Py_UNICODE_REPLACEMENT_CHARACTER)
1266 info->map[i] = -1;
1267 else
1268 info->map[i] = c;
Tim Peters63cb99e2001-02-17 18:12:50 +00001269 }
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001270 info->data = NULL;
1271 info->convert = NULL;
1272 info->release = NULL;
Fred Drake71b63ff2002-06-28 22:29:01 +00001273 result = 1;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001274 Py_DECREF(_u_string);
1275 return result;
1276}
1277
1278#endif
1279
1280static PyObject *
Fred Drakeb91a36b2002-06-27 19:40:48 +00001281newxmlparseobject(char *encoding, char *namespace_separator, PyObject *intern)
Fred Drake0582df92000-07-12 04:49:00 +00001282{
1283 int i;
1284 xmlparseobject *self;
Fred Drake71b63ff2002-06-28 22:29:01 +00001285
Martin v. Löwis894258c2001-09-23 10:20:10 +00001286#ifdef Py_TPFLAGS_HAVE_GC
1287 /* Code for versions 2.2 and later */
1288 self = PyObject_GC_New(xmlparseobject, &Xmlparsetype);
1289#else
Fred Drake0582df92000-07-12 04:49:00 +00001290 self = PyObject_New(xmlparseobject, &Xmlparsetype);
Martin v. Löwis894258c2001-09-23 10:20:10 +00001291#endif
Fred Drake0582df92000-07-12 04:49:00 +00001292 if (self == NULL)
1293 return NULL;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001294
Martin v. Löwisb4fcf4d2002-06-30 06:40:55 +00001295#ifdef Py_USING_UNICODE
Fred Drake0582df92000-07-12 04:49:00 +00001296 self->returns_unicode = 1;
Martin v. Löwisb4fcf4d2002-06-30 06:40:55 +00001297#else
1298 self->returns_unicode = 0;
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +00001299#endif
Martin v. Löwisb4fcf4d2002-06-30 06:40:55 +00001300
Fred Drake2a3d7db2002-06-28 22:56:48 +00001301 self->buffer = NULL;
1302 self->buffer_size = CHARACTER_DATA_BUFFER_SIZE;
1303 self->buffer_used = 0;
Fred Drake85d835f2001-02-08 15:39:08 +00001304 self->ordered_attributes = 0;
1305 self->specified_attributes = 0;
Fred Drakebd6101c2001-02-14 18:29:45 +00001306 self->in_callback = 0;
Martin v. Löwis069dde22003-01-21 10:58:18 +00001307 self->ns_prefixes = 0;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001308 self->handlers = NULL;
Fred Drakecde79132001-04-25 16:01:30 +00001309 if (namespace_separator != NULL) {
Fred Drake0582df92000-07-12 04:49:00 +00001310 self->itself = XML_ParserCreateNS(encoding, *namespace_separator);
1311 }
Fred Drake85d835f2001-02-08 15:39:08 +00001312 else {
Fred Drake0582df92000-07-12 04:49:00 +00001313 self->itself = XML_ParserCreate(encoding);
1314 }
svelankarde1c7d52017-03-09 00:27:48 -05001315 if (self->itself == NULL) {
1316 PyErr_SetString(PyExc_RuntimeError,
1317 "XML_ParserCreate failed");
1318 Py_DECREF(self);
1319 return NULL;
1320 }
1321
Serhiy Storchaka0c2ed762017-03-09 10:52:50 +02001322#if XML_COMBINED_VERSION >= 20100 || defined(XML_HAS_SET_HASH_SALT)
Gregory P. Smithc10f5c22012-03-14 18:12:23 -07001323 /* This feature was added upstream in libexpat 2.1.0. Our expat copy
1324 * has a backport of this feature where we also define XML_HAS_SET_HASH_SALT
1325 * to indicate that we can still use it. */
Gregory P. Smithc8ff4602012-03-14 15:28:10 -07001326 XML_SetHashSalt(self->itself,
1327 (unsigned long)_Py_HashSecret.prefix);
Gregory P. Smithc10f5c22012-03-14 18:12:23 -07001328#endif
Fred Drakeb91a36b2002-06-27 19:40:48 +00001329 self->intern = intern;
1330 Py_XINCREF(self->intern);
Martin v. Löwis894258c2001-09-23 10:20:10 +00001331#ifdef Py_TPFLAGS_HAVE_GC
1332 PyObject_GC_Track(self);
1333#else
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001334 PyObject_GC_Init(self);
Martin v. Löwis894258c2001-09-23 10:20:10 +00001335#endif
Fred Drake0582df92000-07-12 04:49:00 +00001336 XML_SetUserData(self->itself, (void *)self);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001337#ifdef Py_USING_UNICODE
Fred Drake7c75bf22002-07-01 14:02:31 +00001338 XML_SetUnknownEncodingHandler(self->itself,
1339 (XML_UnknownEncodingHandler) PyUnknownEncodingHandler, NULL);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001340#endif
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001341
Fred Drake2a3d7db2002-06-28 22:56:48 +00001342 for (i = 0; handler_info[i].name != NULL; i++)
Fred Drake0582df92000-07-12 04:49:00 +00001343 /* do nothing */;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001344
Fred Drake7c75bf22002-07-01 14:02:31 +00001345 self->handlers = malloc(sizeof(PyObject *) * i);
1346 if (!self->handlers) {
Fred Drake71b63ff2002-06-28 22:29:01 +00001347 Py_DECREF(self);
1348 return PyErr_NoMemory();
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001349 }
Martin v. Löwis5b68ce32001-10-21 08:53:52 +00001350 clear_handlers(self, 1);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001351
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001352 return (PyObject*)self;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001353}
1354
1355
1356static void
Fred Drake0582df92000-07-12 04:49:00 +00001357xmlparse_dealloc(xmlparseobject *self)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001358{
Fred Drake0582df92000-07-12 04:49:00 +00001359 int i;
Martin v. Löwis894258c2001-09-23 10:20:10 +00001360#ifdef Py_TPFLAGS_HAVE_GC
1361 PyObject_GC_UnTrack(self);
1362#else
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001363 PyObject_GC_Fini(self);
Martin v. Löwis894258c2001-09-23 10:20:10 +00001364#endif
Fred Drake85d835f2001-02-08 15:39:08 +00001365 if (self->itself != NULL)
Fred Drake0582df92000-07-12 04:49:00 +00001366 XML_ParserFree(self->itself);
1367 self->itself = NULL;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001368
Fred Drake85d835f2001-02-08 15:39:08 +00001369 if (self->handlers != NULL) {
Fred Drakecde79132001-04-25 16:01:30 +00001370 PyObject *temp;
Fred Drake85d835f2001-02-08 15:39:08 +00001371 for (i = 0; handler_info[i].name != NULL; i++) {
Fred Drakecde79132001-04-25 16:01:30 +00001372 temp = self->handlers[i];
1373 self->handlers[i] = NULL;
1374 Py_XDECREF(temp);
Fred Drake85d835f2001-02-08 15:39:08 +00001375 }
1376 free(self->handlers);
Fred Drake71b63ff2002-06-28 22:29:01 +00001377 self->handlers = NULL;
Fred Drake0582df92000-07-12 04:49:00 +00001378 }
Fred Drake2a3d7db2002-06-28 22:56:48 +00001379 if (self->buffer != NULL) {
1380 free(self->buffer);
1381 self->buffer = NULL;
1382 }
Fred Drakeb91a36b2002-06-27 19:40:48 +00001383 Py_XDECREF(self->intern);
Martin v. Löwis894258c2001-09-23 10:20:10 +00001384#ifndef Py_TPFLAGS_HAVE_GC
Martin v. Löwisb4fcf4d2002-06-30 06:40:55 +00001385 /* Code for versions 2.0 and 2.1 */
Fred Drake0582df92000-07-12 04:49:00 +00001386 PyObject_Del(self);
Martin v. Löwis894258c2001-09-23 10:20:10 +00001387#else
1388 /* Code for versions 2.2 and later. */
1389 PyObject_GC_Del(self);
1390#endif
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001391}
1392
Fred Drake0582df92000-07-12 04:49:00 +00001393static int
1394handlername2int(const char *name)
1395{
1396 int i;
Fred Drake71b63ff2002-06-28 22:29:01 +00001397 for (i = 0; handler_info[i].name != NULL; i++) {
Fred Drake0582df92000-07-12 04:49:00 +00001398 if (strcmp(name, handler_info[i].name) == 0) {
1399 return i;
1400 }
1401 }
1402 return -1;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001403}
1404
1405static PyObject *
Fred Drake71b63ff2002-06-28 22:29:01 +00001406get_pybool(int istrue)
1407{
1408 PyObject *result = istrue ? Py_True : Py_False;
1409 Py_INCREF(result);
1410 return result;
1411}
1412
1413static PyObject *
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001414xmlparse_getattr(xmlparseobject *self, char *name)
1415{
Fred Drake71b63ff2002-06-28 22:29:01 +00001416 int handlernum = handlername2int(name);
1417
1418 if (handlernum != -1) {
1419 PyObject *result = self->handlers[handlernum];
1420 if (result == NULL)
1421 result = Py_None;
1422 Py_INCREF(result);
1423 return result;
1424 }
1425 if (name[0] == 'E') {
1426 if (strcmp(name, "ErrorCode") == 0)
1427 return PyInt_FromLong((long)
1428 XML_GetErrorCode(self->itself));
1429 if (strcmp(name, "ErrorLineNumber") == 0)
1430 return PyInt_FromLong((long)
1431 XML_GetErrorLineNumber(self->itself));
1432 if (strcmp(name, "ErrorColumnNumber") == 0)
1433 return PyInt_FromLong((long)
1434 XML_GetErrorColumnNumber(self->itself));
1435 if (strcmp(name, "ErrorByteIndex") == 0)
1436 return PyInt_FromLong((long)
1437 XML_GetErrorByteIndex(self->itself));
1438 }
Dave Cole3203efb2004-08-26 00:37:31 +00001439 if (name[0] == 'C') {
1440 if (strcmp(name, "CurrentLineNumber") == 0)
1441 return PyInt_FromLong((long)
1442 XML_GetCurrentLineNumber(self->itself));
1443 if (strcmp(name, "CurrentColumnNumber") == 0)
1444 return PyInt_FromLong((long)
1445 XML_GetCurrentColumnNumber(self->itself));
1446 if (strcmp(name, "CurrentByteIndex") == 0)
1447 return PyInt_FromLong((long)
1448 XML_GetCurrentByteIndex(self->itself));
1449 }
Fred Drake2a3d7db2002-06-28 22:56:48 +00001450 if (name[0] == 'b') {
1451 if (strcmp(name, "buffer_size") == 0)
1452 return PyInt_FromLong((long) self->buffer_size);
1453 if (strcmp(name, "buffer_text") == 0)
1454 return get_pybool(self->buffer != NULL);
1455 if (strcmp(name, "buffer_used") == 0)
1456 return PyInt_FromLong((long) self->buffer_used);
1457 }
Martin v. Löwis069dde22003-01-21 10:58:18 +00001458 if (strcmp(name, "namespace_prefixes") == 0)
1459 return get_pybool(self->ns_prefixes);
Fred Drake85d835f2001-02-08 15:39:08 +00001460 if (strcmp(name, "ordered_attributes") == 0)
Fred Drake71b63ff2002-06-28 22:29:01 +00001461 return get_pybool(self->ordered_attributes);
Fred Drake0582df92000-07-12 04:49:00 +00001462 if (strcmp(name, "returns_unicode") == 0)
Fred Drake71b63ff2002-06-28 22:29:01 +00001463 return get_pybool((long) self->returns_unicode);
Fred Drake85d835f2001-02-08 15:39:08 +00001464 if (strcmp(name, "specified_attributes") == 0)
Fred Drake71b63ff2002-06-28 22:29:01 +00001465 return get_pybool((long) self->specified_attributes);
Fred Drakeb91a36b2002-06-27 19:40:48 +00001466 if (strcmp(name, "intern") == 0) {
1467 if (self->intern == NULL) {
1468 Py_INCREF(Py_None);
1469 return Py_None;
1470 }
1471 else {
1472 Py_INCREF(self->intern);
1473 return self->intern;
1474 }
1475 }
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001476
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001477#define APPEND(list, str) \
1478 do { \
1479 PyObject *o = PyString_FromString(str); \
1480 if (o != NULL) \
1481 PyList_Append(list, o); \
1482 Py_XDECREF(o); \
Martin v. Löwis069dde22003-01-21 10:58:18 +00001483 } while (0)
Neal Norwitzfa56e2d2003-01-19 15:40:09 +00001484
Fred Drake0582df92000-07-12 04:49:00 +00001485 if (strcmp(name, "__members__") == 0) {
1486 int i;
1487 PyObject *rc = PyList_New(0);
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001488 if (!rc)
1489 return NULL;
Fred Drake71b63ff2002-06-28 22:29:01 +00001490 for (i = 0; handler_info[i].name != NULL; i++) {
Neal Norwitzfa56e2d2003-01-19 15:40:09 +00001491 PyObject *o = get_handler_name(&handler_info[i]);
1492 if (o != NULL)
1493 PyList_Append(rc, o);
1494 Py_XDECREF(o);
Fred Drake0582df92000-07-12 04:49:00 +00001495 }
Neal Norwitzfa56e2d2003-01-19 15:40:09 +00001496 APPEND(rc, "ErrorCode");
1497 APPEND(rc, "ErrorLineNumber");
1498 APPEND(rc, "ErrorColumnNumber");
1499 APPEND(rc, "ErrorByteIndex");
Dave Cole3203efb2004-08-26 00:37:31 +00001500 APPEND(rc, "CurrentLineNumber");
1501 APPEND(rc, "CurrentColumnNumber");
1502 APPEND(rc, "CurrentByteIndex");
Neal Norwitzfa56e2d2003-01-19 15:40:09 +00001503 APPEND(rc, "buffer_size");
1504 APPEND(rc, "buffer_text");
1505 APPEND(rc, "buffer_used");
Martin v. Löwis069dde22003-01-21 10:58:18 +00001506 APPEND(rc, "namespace_prefixes");
Neal Norwitzfa56e2d2003-01-19 15:40:09 +00001507 APPEND(rc, "ordered_attributes");
1508 APPEND(rc, "returns_unicode");
1509 APPEND(rc, "specified_attributes");
1510 APPEND(rc, "intern");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001511
Neal Norwitzfa56e2d2003-01-19 15:40:09 +00001512#undef APPEND
Fred Drake0582df92000-07-12 04:49:00 +00001513 return rc;
1514 }
1515 return Py_FindMethod(xmlparse_methods, (PyObject *)self, name);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001516}
1517
Fred Drake6f987622000-08-25 18:03:30 +00001518static int
1519sethandler(xmlparseobject *self, const char *name, PyObject* v)
Fred Drake0582df92000-07-12 04:49:00 +00001520{
1521 int handlernum = handlername2int(name);
Fred Drake71b63ff2002-06-28 22:29:01 +00001522 if (handlernum >= 0) {
1523 xmlhandler c_handler = NULL;
1524 PyObject *temp = self->handlers[handlernum];
1525
Fred Drake24a0f412006-07-06 05:13:22 +00001526 if (v == Py_None) {
1527 /* If this is the character data handler, and a character
1528 data handler is already active, we need to be more
1529 careful. What we can safely do is replace the existing
1530 character data handler callback function with a no-op
1531 function that will refuse to call Python. The downside
1532 is that this doesn't completely remove the character
1533 data handler from the C layer if there's any callback
1534 active, so Expat does a little more work than it
1535 otherwise would, but that's really an odd case. A more
1536 elaborate system of handlers and state could remove the
1537 C handler more effectively. */
1538 if (handlernum == CharacterData && self->in_callback)
1539 c_handler = noop_character_data_handler;
Fred Drake71b63ff2002-06-28 22:29:01 +00001540 v = NULL;
Fred Drake24a0f412006-07-06 05:13:22 +00001541 }
Fred Drake71b63ff2002-06-28 22:29:01 +00001542 else if (v != NULL) {
1543 Py_INCREF(v);
1544 c_handler = handler_info[handlernum].handler;
1545 }
Fred Drake0582df92000-07-12 04:49:00 +00001546 self->handlers[handlernum] = v;
Fred Drake71b63ff2002-06-28 22:29:01 +00001547 Py_XDECREF(temp);
1548 handler_info[handlernum].setter(self->itself, c_handler);
Fred Drake0582df92000-07-12 04:49:00 +00001549 return 1;
1550 }
1551 return 0;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001552}
1553
1554static int
Fred Drake6f987622000-08-25 18:03:30 +00001555xmlparse_setattr(xmlparseobject *self, char *name, PyObject *v)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001556{
Fred Drake6f987622000-08-25 18:03:30 +00001557 /* Set attribute 'name' to value 'v'. v==NULL means delete */
Fred Drake85d835f2001-02-08 15:39:08 +00001558 if (v == NULL) {
Fred Drake6f987622000-08-25 18:03:30 +00001559 PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
1560 return -1;
1561 }
Fred Drake2a3d7db2002-06-28 22:56:48 +00001562 if (strcmp(name, "buffer_text") == 0) {
Antoine Pitrouc5bef752012-08-15 23:16:51 +02001563 int b = PyObject_IsTrue(v);
1564 if (b < 0)
1565 return -1;
1566 if (b) {
Fred Drake2a3d7db2002-06-28 22:56:48 +00001567 if (self->buffer == NULL) {
1568 self->buffer = malloc(self->buffer_size);
1569 if (self->buffer == NULL) {
1570 PyErr_NoMemory();
1571 return -1;
1572 }
1573 self->buffer_used = 0;
1574 }
1575 }
1576 else if (self->buffer != NULL) {
1577 if (flush_character_buffer(self) < 0)
1578 return -1;
1579 free(self->buffer);
1580 self->buffer = NULL;
1581 }
1582 return 0;
1583 }
Martin v. Löwis069dde22003-01-21 10:58:18 +00001584 if (strcmp(name, "namespace_prefixes") == 0) {
Antoine Pitrouc5bef752012-08-15 23:16:51 +02001585 int b = PyObject_IsTrue(v);
1586 if (b < 0)
1587 return -1;
1588 self->ns_prefixes = b;
Martin v. Löwis069dde22003-01-21 10:58:18 +00001589 XML_SetReturnNSTriplet(self->itself, self->ns_prefixes);
1590 return 0;
1591 }
Fred Drake85d835f2001-02-08 15:39:08 +00001592 if (strcmp(name, "ordered_attributes") == 0) {
Antoine Pitrouc5bef752012-08-15 23:16:51 +02001593 int b = PyObject_IsTrue(v);
1594 if (b < 0)
1595 return -1;
1596 self->ordered_attributes = b;
Fred Drake85d835f2001-02-08 15:39:08 +00001597 return 0;
1598 }
Fred Drake6f987622000-08-25 18:03:30 +00001599 if (strcmp(name, "returns_unicode") == 0) {
Antoine Pitrouc5bef752012-08-15 23:16:51 +02001600 int b = PyObject_IsTrue(v);
1601 if (b < 0)
1602 return -1;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001603#ifndef Py_USING_UNICODE
Antoine Pitrouc5bef752012-08-15 23:16:51 +02001604 if (b) {
Fred Drake71b63ff2002-06-28 22:29:01 +00001605 PyErr_SetString(PyExc_ValueError,
1606 "Unicode support not available");
Fred Drake6f987622000-08-25 18:03:30 +00001607 return -1;
Fred Drake6f987622000-08-25 18:03:30 +00001608 }
Antoine Pitrouc5bef752012-08-15 23:16:51 +02001609#endif
1610 self->returns_unicode = b;
Fred Drake85d835f2001-02-08 15:39:08 +00001611 return 0;
1612 }
1613 if (strcmp(name, "specified_attributes") == 0) {
Antoine Pitrouc5bef752012-08-15 23:16:51 +02001614 int b = PyObject_IsTrue(v);
1615 if (b < 0)
1616 return -1;
1617 self->specified_attributes = b;
Fred Drake6f987622000-08-25 18:03:30 +00001618 return 0;
1619 }
Andrew M. Kuchlinge0a49b62008-01-08 14:30:55 +00001620
1621 if (strcmp(name, "buffer_size") == 0) {
1622 long new_buffer_size;
1623 if (!PyInt_Check(v)) {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001624 PyErr_SetString(PyExc_TypeError, "buffer_size must be an integer");
1625 return -1;
Andrew M. Kuchlinge0a49b62008-01-08 14:30:55 +00001626 }
1627
1628 new_buffer_size=PyInt_AS_LONG(v);
1629 /* trivial case -- no change */
1630 if (new_buffer_size == self->buffer_size) {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001631 return 0;
Andrew M. Kuchlinge0a49b62008-01-08 14:30:55 +00001632 }
1633
1634 if (new_buffer_size <= 0) {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001635 PyErr_SetString(PyExc_ValueError, "buffer_size must be greater than zero");
1636 return -1;
Andrew M. Kuchlinge0a49b62008-01-08 14:30:55 +00001637 }
1638
1639 /* check maximum */
1640 if (new_buffer_size > INT_MAX) {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001641 char errmsg[100];
1642 sprintf(errmsg, "buffer_size must not be greater than %i", INT_MAX);
1643 PyErr_SetString(PyExc_ValueError, errmsg);
1644 return -1;
Andrew M. Kuchlinge0a49b62008-01-08 14:30:55 +00001645 }
1646
1647 if (self->buffer != NULL) {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001648 /* there is already a buffer */
1649 if (self->buffer_used != 0) {
1650 flush_character_buffer(self);
1651 }
1652 /* free existing buffer */
1653 free(self->buffer);
Andrew M. Kuchlinge0a49b62008-01-08 14:30:55 +00001654 }
1655 self->buffer = malloc(new_buffer_size);
1656 if (self->buffer == NULL) {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001657 PyErr_NoMemory();
1658 return -1;
1659 }
Andrew M. Kuchlinge0a49b62008-01-08 14:30:55 +00001660 self->buffer_size = new_buffer_size;
1661 return 0;
1662 }
1663
Fred Drake2a3d7db2002-06-28 22:56:48 +00001664 if (strcmp(name, "CharacterDataHandler") == 0) {
1665 /* If we're changing the character data handler, flush all
1666 * cached data with the old handler. Not sure there's a
1667 * "right" thing to do, though, but this probably won't
1668 * happen.
1669 */
1670 if (flush_character_buffer(self) < 0)
1671 return -1;
1672 }
Fred Drake6f987622000-08-25 18:03:30 +00001673 if (sethandler(self, name, v)) {
1674 return 0;
1675 }
1676 PyErr_SetString(PyExc_AttributeError, name);
1677 return -1;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001678}
1679
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001680#ifdef WITH_CYCLE_GC
1681static int
1682xmlparse_traverse(xmlparseobject *op, visitproc visit, void *arg)
1683{
Neal Norwitz035b1852006-04-16 00:02:59 +00001684 int i;
Thomas Woutersc6e55062006-04-15 21:47:09 +00001685 for (i = 0; handler_info[i].name != NULL; i++)
1686 Py_VISIT(op->handlers[i]);
Fred Drakecde79132001-04-25 16:01:30 +00001687 return 0;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001688}
1689
1690static int
1691xmlparse_clear(xmlparseobject *op)
1692{
Martin v. Löwis5b68ce32001-10-21 08:53:52 +00001693 clear_handlers(op, 0);
Thomas Woutersedf17d82006-04-15 17:28:34 +00001694 Py_CLEAR(op->intern);
Fred Drakecde79132001-04-25 16:01:30 +00001695 return 0;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001696}
1697#endif
1698
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001699PyDoc_STRVAR(Xmlparsetype__doc__, "XML parser");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001700
1701static PyTypeObject Xmlparsetype = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001702 PyVarObject_HEAD_INIT(NULL, 0)
1703 "pyexpat.xmlparser", /*tp_name*/
1704 sizeof(xmlparseobject) + PyGC_HEAD_SIZE,/*tp_basicsize*/
1705 0, /*tp_itemsize*/
1706 /* methods */
1707 (destructor)xmlparse_dealloc, /*tp_dealloc*/
1708 (printfunc)0, /*tp_print*/
1709 (getattrfunc)xmlparse_getattr, /*tp_getattr*/
1710 (setattrfunc)xmlparse_setattr, /*tp_setattr*/
1711 (cmpfunc)0, /*tp_compare*/
1712 (reprfunc)0, /*tp_repr*/
1713 0, /*tp_as_number*/
1714 0, /*tp_as_sequence*/
1715 0, /*tp_as_mapping*/
1716 (hashfunc)0, /*tp_hash*/
1717 (ternaryfunc)0, /*tp_call*/
1718 (reprfunc)0, /*tp_str*/
1719 0, /* tp_getattro */
1720 0, /* tp_setattro */
1721 0, /* tp_as_buffer */
Martin v. Löwis894258c2001-09-23 10:20:10 +00001722#ifdef Py_TPFLAGS_HAVE_GC
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001723 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /*tp_flags*/
Martin v. Löwis894258c2001-09-23 10:20:10 +00001724#else
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001725 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_GC, /*tp_flags*/
Martin v. Löwis894258c2001-09-23 10:20:10 +00001726#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001727 Xmlparsetype__doc__, /* tp_doc - Documentation string */
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001728#ifdef WITH_CYCLE_GC
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001729 (traverseproc)xmlparse_traverse, /* tp_traverse */
1730 (inquiry)xmlparse_clear /* tp_clear */
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001731#else
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001732 0, 0
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001733#endif
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001734};
1735
1736/* End of code for xmlparser objects */
1737/* -------------------------------------------------------- */
1738
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001739PyDoc_STRVAR(pyexpat_ParserCreate__doc__,
Fred Drake0582df92000-07-12 04:49:00 +00001740"ParserCreate([encoding[, namespace_separator]]) -> parser\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001741Return a new XML parser object.");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001742
1743static PyObject *
Fred Drake0582df92000-07-12 04:49:00 +00001744pyexpat_ParserCreate(PyObject *notused, PyObject *args, PyObject *kw)
1745{
Fred Drakecde79132001-04-25 16:01:30 +00001746 char *encoding = NULL;
1747 char *namespace_separator = NULL;
Fred Drakeb91a36b2002-06-27 19:40:48 +00001748 PyObject *intern = NULL;
1749 PyObject *result;
1750 int intern_decref = 0;
Martin v. Löwis15e62742006-02-27 16:46:16 +00001751 static char *kwlist[] = {"encoding", "namespace_separator",
Jeremy Hyltonaf68c872005-12-10 18:50:16 +00001752 "intern", NULL};
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001753
Fred Drakeb91a36b2002-06-27 19:40:48 +00001754 if (!PyArg_ParseTupleAndKeywords(args, kw, "|zzO:ParserCreate", kwlist,
1755 &encoding, &namespace_separator, &intern))
Fred Drakecde79132001-04-25 16:01:30 +00001756 return NULL;
1757 if (namespace_separator != NULL
1758 && strlen(namespace_separator) > 1) {
1759 PyErr_SetString(PyExc_ValueError,
1760 "namespace_separator must be at most one"
1761 " character, omitted, or None");
1762 return NULL;
1763 }
Fred Drakeb91a36b2002-06-27 19:40:48 +00001764 /* Explicitly passing None means no interning is desired.
1765 Not passing anything means that a new dictionary is used. */
1766 if (intern == Py_None)
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001767 intern = NULL;
Fred Drakeb91a36b2002-06-27 19:40:48 +00001768 else if (intern == NULL) {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001769 intern = PyDict_New();
1770 if (!intern)
1771 return NULL;
1772 intern_decref = 1;
Fred Drake71b63ff2002-06-28 22:29:01 +00001773 }
Fred Drakeb91a36b2002-06-27 19:40:48 +00001774 else if (!PyDict_Check(intern)) {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001775 PyErr_SetString(PyExc_TypeError, "intern must be a dictionary");
1776 return NULL;
Fred Drakeb91a36b2002-06-27 19:40:48 +00001777 }
1778
1779 result = newxmlparseobject(encoding, namespace_separator, intern);
1780 if (intern_decref) {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001781 Py_DECREF(intern);
Fred Drakeb91a36b2002-06-27 19:40:48 +00001782 }
1783 return result;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001784}
1785
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001786PyDoc_STRVAR(pyexpat_ErrorString__doc__,
Fred Drake0582df92000-07-12 04:49:00 +00001787"ErrorString(errno) -> string\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001788Returns string error for given number.");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001789
1790static PyObject *
Fred Drake0582df92000-07-12 04:49:00 +00001791pyexpat_ErrorString(PyObject *self, PyObject *args)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001792{
Fred Drake0582df92000-07-12 04:49:00 +00001793 long code = 0;
1794
1795 if (!PyArg_ParseTuple(args, "l:ErrorString", &code))
1796 return NULL;
1797 return Py_BuildValue("z", XML_ErrorString((int)code));
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001798}
1799
1800/* List of methods defined in the module */
1801
1802static struct PyMethodDef pyexpat_methods[] = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001803 {"ParserCreate", (PyCFunction)pyexpat_ParserCreate,
Fred Drake0582df92000-07-12 04:49:00 +00001804 METH_VARARGS|METH_KEYWORDS, pyexpat_ParserCreate__doc__},
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001805 {"ErrorString", (PyCFunction)pyexpat_ErrorString,
1806 METH_VARARGS, pyexpat_ErrorString__doc__},
Fred Drake71b63ff2002-06-28 22:29:01 +00001807
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001808 {NULL, (PyCFunction)NULL, 0, NULL} /* sentinel */
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001809};
1810
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +00001811/* Module docstring */
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001812
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001813PyDoc_STRVAR(pyexpat_module_documentation,
1814"Python wrapper for Expat parser.");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001815
Fred Drakecde79132001-04-25 16:01:30 +00001816/* Initialization function for the module */
1817
1818#ifndef MODULE_NAME
1819#define MODULE_NAME "pyexpat"
1820#endif
1821
1822#ifndef MODULE_INITFUNC
1823#define MODULE_INITFUNC initpyexpat
1824#endif
1825
Martin v. Löwis069dde22003-01-21 10:58:18 +00001826#ifndef PyMODINIT_FUNC
1827# ifdef MS_WINDOWS
1828# define PyMODINIT_FUNC __declspec(dllexport) void
1829# else
1830# define PyMODINIT_FUNC void
1831# endif
1832#endif
1833
Mark Hammond8235ea12002-07-19 06:55:41 +00001834PyMODINIT_FUNC MODULE_INITFUNC(void); /* avoid compiler warnings */
Fred Drakecde79132001-04-25 16:01:30 +00001835
Martin v. Löwis069dde22003-01-21 10:58:18 +00001836PyMODINIT_FUNC
1837MODULE_INITFUNC(void)
Fred Drake0582df92000-07-12 04:49:00 +00001838{
1839 PyObject *m, *d;
Gregory P. Smithdd96db62008-06-09 04:58:54 +00001840 PyObject *errmod_name = PyString_FromString(MODULE_NAME ".errors");
Fred Drake85d835f2001-02-08 15:39:08 +00001841 PyObject *errors_module;
1842 PyObject *modelmod_name;
1843 PyObject *model_module;
Fred Drake0582df92000-07-12 04:49:00 +00001844 PyObject *sys_modules;
Benjamin Peterson4ba009d2011-05-31 18:59:49 -05001845 PyObject *version;
Fredrik Lundhd7a42882005-12-13 20:43:04 +00001846 static struct PyExpat_CAPI capi;
1847 PyObject* capi_object;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001848
Fred Drake6f987622000-08-25 18:03:30 +00001849 if (errmod_name == NULL)
1850 return;
Gregory P. Smithdd96db62008-06-09 04:58:54 +00001851 modelmod_name = PyString_FromString(MODULE_NAME ".model");
Fred Drake85d835f2001-02-08 15:39:08 +00001852 if (modelmod_name == NULL)
1853 return;
Fred Drake6f987622000-08-25 18:03:30 +00001854
Christian Heimese93237d2007-12-19 02:37:44 +00001855 Py_TYPE(&Xmlparsetype) = &PyType_Type;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001856
Fred Drake0582df92000-07-12 04:49:00 +00001857 /* Create the module and add the functions */
Fred Drakecde79132001-04-25 16:01:30 +00001858 m = Py_InitModule3(MODULE_NAME, pyexpat_methods,
Fred Drake85d835f2001-02-08 15:39:08 +00001859 pyexpat_module_documentation);
Neal Norwitz1ac754f2006-01-19 06:09:39 +00001860 if (m == NULL)
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001861 return;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001862
Fred Drake0582df92000-07-12 04:49:00 +00001863 /* Add some symbolic constants to the module */
Fred Drakebd6101c2001-02-14 18:29:45 +00001864 if (ErrorObject == NULL) {
1865 ErrorObject = PyErr_NewException("xml.parsers.expat.ExpatError",
Fred Drake93adb692000-09-23 04:55:48 +00001866 NULL, NULL);
Fred Drakebd6101c2001-02-14 18:29:45 +00001867 if (ErrorObject == NULL)
1868 return;
1869 }
1870 Py_INCREF(ErrorObject);
Fred Drake93adb692000-09-23 04:55:48 +00001871 PyModule_AddObject(m, "error", ErrorObject);
Fred Drakebd6101c2001-02-14 18:29:45 +00001872 Py_INCREF(ErrorObject);
1873 PyModule_AddObject(m, "ExpatError", ErrorObject);
Fred Drake4ba298c2000-10-29 04:57:53 +00001874 Py_INCREF(&Xmlparsetype);
1875 PyModule_AddObject(m, "XMLParserType", (PyObject *) &Xmlparsetype);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001876
Benjamin Peterson4ba009d2011-05-31 18:59:49 -05001877 version = PyString_FromString(PY_VERSION);
1878 if (!version)
1879 return;
1880 PyModule_AddObject(m, "__version__", version);
Fred Drake738293d2000-12-21 17:25:07 +00001881 PyModule_AddStringConstant(m, "EXPAT_VERSION",
1882 (char *) XML_ExpatVersion());
Fred Drake85d835f2001-02-08 15:39:08 +00001883 {
1884 XML_Expat_Version info = XML_ExpatVersionInfo();
1885 PyModule_AddObject(m, "version_info",
1886 Py_BuildValue("(iii)", info.major,
1887 info.minor, info.micro));
1888 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001889#ifdef Py_USING_UNICODE
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001890 init_template_buffer();
1891#endif
Fred Drake0582df92000-07-12 04:49:00 +00001892 /* XXX When Expat supports some way of figuring out how it was
Fred Drake71b63ff2002-06-28 22:29:01 +00001893 compiled, this should check and set native_encoding
1894 appropriately.
Fred Drake0582df92000-07-12 04:49:00 +00001895 */
Fred Drake93adb692000-09-23 04:55:48 +00001896 PyModule_AddStringConstant(m, "native_encoding", "UTF-8");
Fred Drakec23b5232000-08-24 21:57:43 +00001897
Fred Drake85d835f2001-02-08 15:39:08 +00001898 sys_modules = PySys_GetObject("modules");
Fred Drake93adb692000-09-23 04:55:48 +00001899 d = PyModule_GetDict(m);
Fred Drake6f987622000-08-25 18:03:30 +00001900 errors_module = PyDict_GetItem(d, errmod_name);
1901 if (errors_module == NULL) {
Fred Drakecde79132001-04-25 16:01:30 +00001902 errors_module = PyModule_New(MODULE_NAME ".errors");
Fred Drake6f987622000-08-25 18:03:30 +00001903 if (errors_module != NULL) {
Fred Drake6f987622000-08-25 18:03:30 +00001904 PyDict_SetItem(sys_modules, errmod_name, errors_module);
Fred Drake93adb692000-09-23 04:55:48 +00001905 /* gives away the reference to errors_module */
1906 PyModule_AddObject(m, "errors", errors_module);
Fred Drakec23b5232000-08-24 21:57:43 +00001907 }
1908 }
Fred Drake6f987622000-08-25 18:03:30 +00001909 Py_DECREF(errmod_name);
Fred Drake85d835f2001-02-08 15:39:08 +00001910 model_module = PyDict_GetItem(d, modelmod_name);
1911 if (model_module == NULL) {
Fred Drakecde79132001-04-25 16:01:30 +00001912 model_module = PyModule_New(MODULE_NAME ".model");
Fred Drake85d835f2001-02-08 15:39:08 +00001913 if (model_module != NULL) {
1914 PyDict_SetItem(sys_modules, modelmod_name, model_module);
1915 /* gives away the reference to model_module */
1916 PyModule_AddObject(m, "model", model_module);
1917 }
1918 }
1919 Py_DECREF(modelmod_name);
1920 if (errors_module == NULL || model_module == NULL)
1921 /* Don't core dump later! */
Fred Drake6f987622000-08-25 18:03:30 +00001922 return;
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001923
Martin v. Löwisc847f402003-01-21 11:09:21 +00001924#if XML_COMBINED_VERSION > 19505
Martin v. Löwis069dde22003-01-21 10:58:18 +00001925 {
1926 const XML_Feature *features = XML_GetFeatureList();
1927 PyObject *list = PyList_New(0);
1928 if (list == NULL)
1929 /* just ignore it */
1930 PyErr_Clear();
1931 else {
1932 int i = 0;
1933 for (; features[i].feature != XML_FEATURE_END; ++i) {
1934 int ok;
1935 PyObject *item = Py_BuildValue("si", features[i].name,
1936 features[i].value);
1937 if (item == NULL) {
1938 Py_DECREF(list);
1939 list = NULL;
1940 break;
1941 }
1942 ok = PyList_Append(list, item);
1943 Py_DECREF(item);
1944 if (ok < 0) {
1945 PyErr_Clear();
1946 break;
1947 }
1948 }
1949 if (list != NULL)
1950 PyModule_AddObject(m, "features", list);
1951 }
1952 }
Martin v. Löwisc847f402003-01-21 11:09:21 +00001953#endif
Fred Drake6f987622000-08-25 18:03:30 +00001954
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001955#define MYCONST(name) \
Fred Drake93adb692000-09-23 04:55:48 +00001956 PyModule_AddStringConstant(errors_module, #name, \
1957 (char*)XML_ErrorString(name))
Fred Drake7bd9f412000-07-04 23:51:31 +00001958
Fred Drake0582df92000-07-12 04:49:00 +00001959 MYCONST(XML_ERROR_NO_MEMORY);
1960 MYCONST(XML_ERROR_SYNTAX);
1961 MYCONST(XML_ERROR_NO_ELEMENTS);
1962 MYCONST(XML_ERROR_INVALID_TOKEN);
1963 MYCONST(XML_ERROR_UNCLOSED_TOKEN);
1964 MYCONST(XML_ERROR_PARTIAL_CHAR);
1965 MYCONST(XML_ERROR_TAG_MISMATCH);
1966 MYCONST(XML_ERROR_DUPLICATE_ATTRIBUTE);
1967 MYCONST(XML_ERROR_JUNK_AFTER_DOC_ELEMENT);
1968 MYCONST(XML_ERROR_PARAM_ENTITY_REF);
1969 MYCONST(XML_ERROR_UNDEFINED_ENTITY);
1970 MYCONST(XML_ERROR_RECURSIVE_ENTITY_REF);
1971 MYCONST(XML_ERROR_ASYNC_ENTITY);
1972 MYCONST(XML_ERROR_BAD_CHAR_REF);
1973 MYCONST(XML_ERROR_BINARY_ENTITY_REF);
1974 MYCONST(XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF);
1975 MYCONST(XML_ERROR_MISPLACED_XML_PI);
1976 MYCONST(XML_ERROR_UNKNOWN_ENCODING);
1977 MYCONST(XML_ERROR_INCORRECT_ENCODING);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001978 MYCONST(XML_ERROR_UNCLOSED_CDATA_SECTION);
1979 MYCONST(XML_ERROR_EXTERNAL_ENTITY_HANDLING);
1980 MYCONST(XML_ERROR_NOT_STANDALONE);
Fred Drake283b6702004-08-04 22:28:16 +00001981 MYCONST(XML_ERROR_UNEXPECTED_STATE);
1982 MYCONST(XML_ERROR_ENTITY_DECLARED_IN_PE);
1983 MYCONST(XML_ERROR_FEATURE_REQUIRES_XML_DTD);
1984 MYCONST(XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING);
1985 /* Added in Expat 1.95.7. */
1986 MYCONST(XML_ERROR_UNBOUND_PREFIX);
1987 /* Added in Expat 1.95.8. */
1988 MYCONST(XML_ERROR_UNDECLARING_PREFIX);
1989 MYCONST(XML_ERROR_INCOMPLETE_PE);
1990 MYCONST(XML_ERROR_XML_DECL);
1991 MYCONST(XML_ERROR_TEXT_DECL);
1992 MYCONST(XML_ERROR_PUBLICID);
1993 MYCONST(XML_ERROR_SUSPENDED);
1994 MYCONST(XML_ERROR_NOT_SUSPENDED);
1995 MYCONST(XML_ERROR_ABORTED);
1996 MYCONST(XML_ERROR_FINISHED);
1997 MYCONST(XML_ERROR_SUSPEND_PE);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001998
Fred Drake85d835f2001-02-08 15:39:08 +00001999 PyModule_AddStringConstant(errors_module, "__doc__",
2000 "Constants used to describe error conditions.");
2001
Fred Drake93adb692000-09-23 04:55:48 +00002002#undef MYCONST
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00002003
Fred Drake85d835f2001-02-08 15:39:08 +00002004#define MYCONST(c) PyModule_AddIntConstant(m, #c, c)
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00002005 MYCONST(XML_PARAM_ENTITY_PARSING_NEVER);
2006 MYCONST(XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE);
2007 MYCONST(XML_PARAM_ENTITY_PARSING_ALWAYS);
Fred Drake85d835f2001-02-08 15:39:08 +00002008#undef MYCONST
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00002009
Fred Drake85d835f2001-02-08 15:39:08 +00002010#define MYCONST(c) PyModule_AddIntConstant(model_module, #c, c)
2011 PyModule_AddStringConstant(model_module, "__doc__",
2012 "Constants used to interpret content model information.");
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00002013
Fred Drake85d835f2001-02-08 15:39:08 +00002014 MYCONST(XML_CTYPE_EMPTY);
2015 MYCONST(XML_CTYPE_ANY);
2016 MYCONST(XML_CTYPE_MIXED);
2017 MYCONST(XML_CTYPE_NAME);
2018 MYCONST(XML_CTYPE_CHOICE);
2019 MYCONST(XML_CTYPE_SEQ);
2020
2021 MYCONST(XML_CQUANT_NONE);
2022 MYCONST(XML_CQUANT_OPT);
2023 MYCONST(XML_CQUANT_REP);
2024 MYCONST(XML_CQUANT_PLUS);
2025#undef MYCONST
Fredrik Lundhc3345042005-12-13 19:49:55 +00002026
2027 /* initialize pyexpat dispatch table */
Fredrik Lundhd7a42882005-12-13 20:43:04 +00002028 capi.size = sizeof(capi);
Fredrik Lundhcc117db2005-12-13 21:55:36 +00002029 capi.magic = PyExpat_CAPI_MAGIC;
Fredrik Lundhd7a42882005-12-13 20:43:04 +00002030 capi.MAJOR_VERSION = XML_MAJOR_VERSION;
2031 capi.MINOR_VERSION = XML_MINOR_VERSION;
2032 capi.MICRO_VERSION = XML_MICRO_VERSION;
2033 capi.ErrorString = XML_ErrorString;
Fredrik Lundhcc117db2005-12-13 21:55:36 +00002034 capi.GetErrorCode = XML_GetErrorCode;
2035 capi.GetErrorColumnNumber = XML_GetErrorColumnNumber;
2036 capi.GetErrorLineNumber = XML_GetErrorLineNumber;
Fredrik Lundhd7a42882005-12-13 20:43:04 +00002037 capi.Parse = XML_Parse;
2038 capi.ParserCreate_MM = XML_ParserCreate_MM;
2039 capi.ParserFree = XML_ParserFree;
2040 capi.SetCharacterDataHandler = XML_SetCharacterDataHandler;
2041 capi.SetCommentHandler = XML_SetCommentHandler;
2042 capi.SetDefaultHandlerExpand = XML_SetDefaultHandlerExpand;
2043 capi.SetElementHandler = XML_SetElementHandler;
2044 capi.SetNamespaceDeclHandler = XML_SetNamespaceDeclHandler;
2045 capi.SetProcessingInstructionHandler = XML_SetProcessingInstructionHandler;
2046 capi.SetUnknownEncodingHandler = XML_SetUnknownEncodingHandler;
2047 capi.SetUserData = XML_SetUserData;
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002048
Larry Hastings402b73f2010-03-25 00:54:54 +00002049 /* export using capsule */
2050 capi_object = PyCapsule_New(&capi, PyExpat_CAPSULE_NAME, NULL);
Fredrik Lundhd7a42882005-12-13 20:43:04 +00002051 if (capi_object)
2052 PyModule_AddObject(m, "expat_CAPI", capi_object);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00002053}
2054
Fred Drake6f987622000-08-25 18:03:30 +00002055static void
Martin v. Löwis5b68ce32001-10-21 08:53:52 +00002056clear_handlers(xmlparseobject *self, int initial)
Fred Drake0582df92000-07-12 04:49:00 +00002057{
Fred Drakecde79132001-04-25 16:01:30 +00002058 int i = 0;
2059 PyObject *temp;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00002060
Fred Drake71b63ff2002-06-28 22:29:01 +00002061 for (; handler_info[i].name != NULL; i++) {
Martin v. Löwis5b68ce32001-10-21 08:53:52 +00002062 if (initial)
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002063 self->handlers[i] = NULL;
2064 else {
Fred Drakecde79132001-04-25 16:01:30 +00002065 temp = self->handlers[i];
2066 self->handlers[i] = NULL;
2067 Py_XDECREF(temp);
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002068 handler_info[i].setter(self->itself, NULL);
Fred Drakecde79132001-04-25 16:01:30 +00002069 }
Fred Drakecde79132001-04-25 16:01:30 +00002070 }
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00002071}
2072
Tim Peters0c322792002-07-17 16:49:03 +00002073static struct HandlerInfo handler_info[] = {
Fred Drake71b63ff2002-06-28 22:29:01 +00002074 {"StartElementHandler",
2075 (xmlhandlersetter)XML_SetStartElementHandler,
Fred Drake0582df92000-07-12 04:49:00 +00002076 (xmlhandler)my_StartElementHandler},
Fred Drake71b63ff2002-06-28 22:29:01 +00002077 {"EndElementHandler",
2078 (xmlhandlersetter)XML_SetEndElementHandler,
Fred Drake0582df92000-07-12 04:49:00 +00002079 (xmlhandler)my_EndElementHandler},
Fred Drake71b63ff2002-06-28 22:29:01 +00002080 {"ProcessingInstructionHandler",
Fred Drake0582df92000-07-12 04:49:00 +00002081 (xmlhandlersetter)XML_SetProcessingInstructionHandler,
2082 (xmlhandler)my_ProcessingInstructionHandler},
Fred Drake71b63ff2002-06-28 22:29:01 +00002083 {"CharacterDataHandler",
Fred Drake0582df92000-07-12 04:49:00 +00002084 (xmlhandlersetter)XML_SetCharacterDataHandler,
2085 (xmlhandler)my_CharacterDataHandler},
Fred Drake71b63ff2002-06-28 22:29:01 +00002086 {"UnparsedEntityDeclHandler",
Fred Drake0582df92000-07-12 04:49:00 +00002087 (xmlhandlersetter)XML_SetUnparsedEntityDeclHandler,
Fred Drake2a3d7db2002-06-28 22:56:48 +00002088 (xmlhandler)my_UnparsedEntityDeclHandler},
Fred Drake71b63ff2002-06-28 22:29:01 +00002089 {"NotationDeclHandler",
Fred Drake0582df92000-07-12 04:49:00 +00002090 (xmlhandlersetter)XML_SetNotationDeclHandler,
Fred Drake2a3d7db2002-06-28 22:56:48 +00002091 (xmlhandler)my_NotationDeclHandler},
Fred Drake71b63ff2002-06-28 22:29:01 +00002092 {"StartNamespaceDeclHandler",
2093 (xmlhandlersetter)XML_SetStartNamespaceDeclHandler,
Fred Drake2a3d7db2002-06-28 22:56:48 +00002094 (xmlhandler)my_StartNamespaceDeclHandler},
Fred Drake71b63ff2002-06-28 22:29:01 +00002095 {"EndNamespaceDeclHandler",
2096 (xmlhandlersetter)XML_SetEndNamespaceDeclHandler,
Fred Drake2a3d7db2002-06-28 22:56:48 +00002097 (xmlhandler)my_EndNamespaceDeclHandler},
Fred Drake0582df92000-07-12 04:49:00 +00002098 {"CommentHandler",
2099 (xmlhandlersetter)XML_SetCommentHandler,
2100 (xmlhandler)my_CommentHandler},
2101 {"StartCdataSectionHandler",
Fred Drake71b63ff2002-06-28 22:29:01 +00002102 (xmlhandlersetter)XML_SetStartCdataSectionHandler,
Fred Drake0582df92000-07-12 04:49:00 +00002103 (xmlhandler)my_StartCdataSectionHandler},
2104 {"EndCdataSectionHandler",
Fred Drake71b63ff2002-06-28 22:29:01 +00002105 (xmlhandlersetter)XML_SetEndCdataSectionHandler,
Fred Drake0582df92000-07-12 04:49:00 +00002106 (xmlhandler)my_EndCdataSectionHandler},
2107 {"DefaultHandler",
2108 (xmlhandlersetter)XML_SetDefaultHandler,
2109 (xmlhandler)my_DefaultHandler},
2110 {"DefaultHandlerExpand",
2111 (xmlhandlersetter)XML_SetDefaultHandlerExpand,
2112 (xmlhandler)my_DefaultHandlerExpandHandler},
2113 {"NotStandaloneHandler",
2114 (xmlhandlersetter)XML_SetNotStandaloneHandler,
2115 (xmlhandler)my_NotStandaloneHandler},
2116 {"ExternalEntityRefHandler",
2117 (xmlhandlersetter)XML_SetExternalEntityRefHandler,
Fred Drake2a3d7db2002-06-28 22:56:48 +00002118 (xmlhandler)my_ExternalEntityRefHandler},
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00002119 {"StartDoctypeDeclHandler",
Fred Drake71b63ff2002-06-28 22:29:01 +00002120 (xmlhandlersetter)XML_SetStartDoctypeDeclHandler,
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00002121 (xmlhandler)my_StartDoctypeDeclHandler},
2122 {"EndDoctypeDeclHandler",
Fred Drake71b63ff2002-06-28 22:29:01 +00002123 (xmlhandlersetter)XML_SetEndDoctypeDeclHandler,
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00002124 (xmlhandler)my_EndDoctypeDeclHandler},
Fred Drake85d835f2001-02-08 15:39:08 +00002125 {"EntityDeclHandler",
2126 (xmlhandlersetter)XML_SetEntityDeclHandler,
2127 (xmlhandler)my_EntityDeclHandler},
2128 {"XmlDeclHandler",
2129 (xmlhandlersetter)XML_SetXmlDeclHandler,
2130 (xmlhandler)my_XmlDeclHandler},
2131 {"ElementDeclHandler",
2132 (xmlhandlersetter)XML_SetElementDeclHandler,
2133 (xmlhandler)my_ElementDeclHandler},
2134 {"AttlistDeclHandler",
2135 (xmlhandlersetter)XML_SetAttlistDeclHandler,
2136 (xmlhandler)my_AttlistDeclHandler},
Martin v. Löwisc847f402003-01-21 11:09:21 +00002137#if XML_COMBINED_VERSION >= 19504
Martin v. Löwis069dde22003-01-21 10:58:18 +00002138 {"SkippedEntityHandler",
2139 (xmlhandlersetter)XML_SetSkippedEntityHandler,
2140 (xmlhandler)my_SkippedEntityHandler},
Martin v. Löwisc847f402003-01-21 11:09:21 +00002141#endif
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00002142
Fred Drake0582df92000-07-12 04:49:00 +00002143 {NULL, NULL, NULL} /* sentinel */
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00002144};