blob: 7e51d35e622006b46cd4940e5751016d8dc9b166 [file] [log] [blame]
Martin v. Löwis7090ed12001-09-19 10:37:50 +00001#include "Python.h"
Fred Drake4113b132001-03-24 19:58:26 +00002#include <ctype.h>
3
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00004#include "frameobject.h"
Fred Drakea77254a2000-09-29 19:23:29 +00005#include "expat.h"
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00006
Fredrik Lundhc3345042005-12-13 19:49:55 +00007#include "pyexpat.h"
8
Martin v. Löwisc847f402003-01-21 11:09:21 +00009#define XML_COMBINED_VERSION (10000*XML_MAJOR_VERSION+100*XML_MINOR_VERSION+XML_MICRO_VERSION)
10
Jeremy Hylton9263f572003-06-27 16:13:17 +000011#define FIX_TRACE
Martin v. Löwis339d0f72001-08-17 18:39:25 +000012
Fred Drake0582df92000-07-12 04:49:00 +000013enum HandlerTypes {
14 StartElement,
15 EndElement,
16 ProcessingInstruction,
17 CharacterData,
18 UnparsedEntityDecl,
19 NotationDecl,
20 StartNamespaceDecl,
21 EndNamespaceDecl,
22 Comment,
23 StartCdataSection,
24 EndCdataSection,
25 Default,
26 DefaultHandlerExpand,
27 NotStandalone,
Martin v. Löwis0078f6c2001-01-21 10:18:10 +000028 ExternalEntityRef,
29 StartDoctypeDecl,
30 EndDoctypeDecl,
Fred Drake85d835f2001-02-08 15:39:08 +000031 EntityDecl,
32 XmlDecl,
33 ElementDecl,
34 AttlistDecl,
Martin v. Löwisc847f402003-01-21 11:09:21 +000035#if XML_COMBINED_VERSION >= 19504
Martin v. Löwis069dde22003-01-21 10:58:18 +000036 SkippedEntity,
Martin v. Löwisc847f402003-01-21 11:09:21 +000037#endif
Fred Drake85d835f2001-02-08 15:39:08 +000038 _DummyDecl
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000039};
40
41static PyObject *ErrorObject;
42
43/* ----------------------------------------------------- */
44
45/* Declarations for objects of type xmlparser */
46
47typedef struct {
Fred Drake0582df92000-07-12 04:49:00 +000048 PyObject_HEAD
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000049
Fred Drake0582df92000-07-12 04:49:00 +000050 XML_Parser itself;
Fred Drake85d835f2001-02-08 15:39:08 +000051 int ordered_attributes; /* Return attributes as a list. */
52 int specified_attributes; /* Report only specified attributes. */
Fred Drakebd6101c2001-02-14 18:29:45 +000053 int in_callback; /* Is a callback active? */
Martin v. Löwis069dde22003-01-21 10:58:18 +000054 int ns_prefixes; /* Namespace-triplets mode? */
Fred Drake2a3d7db2002-06-28 22:56:48 +000055 XML_Char *buffer; /* Buffer used when accumulating characters */
56 /* NULL if not enabled */
57 int buffer_size; /* Size of buffer, in XML_Char units */
58 int buffer_used; /* Buffer units in use */
Fred Drakeb91a36b2002-06-27 19:40:48 +000059 PyObject *intern; /* Dictionary to intern strings */
Fred Drake0582df92000-07-12 04:49:00 +000060 PyObject **handlers;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000061} xmlparseobject;
62
Fred Drake2a3d7db2002-06-28 22:56:48 +000063#define CHARACTER_DATA_BUFFER_SIZE 8192
64
Jeremy Hylton938ace62002-07-17 16:30:39 +000065static PyTypeObject Xmlparsetype;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000066
Fred Drake117ac852002-09-24 16:24:54 +000067typedef void (*xmlhandlersetter)(XML_Parser self, void *meth);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000068typedef void* xmlhandler;
69
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +000070struct HandlerInfo {
Fred Drake0582df92000-07-12 04:49:00 +000071 const char *name;
72 xmlhandlersetter setter;
73 xmlhandler handler;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +000074 PyCodeObject *tb_code;
Fred Drake71b63ff2002-06-28 22:29:01 +000075 PyObject *nameobj;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000076};
77
Jeremy Hylton938ace62002-07-17 16:30:39 +000078static struct HandlerInfo handler_info[64];
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000079
Fred Drakebd6101c2001-02-14 18:29:45 +000080/* Set an integer attribute on the error object; return true on success,
81 * false on an exception.
82 */
83static int
84set_error_attr(PyObject *err, char *name, int value)
85{
Christian Heimes217cfd12007-12-02 14:31:20 +000086 PyObject *v = PyLong_FromLong(value);
Fred Drake85d835f2001-02-08 15:39:08 +000087
Neal Norwitz2f5e9902006-03-08 06:36:45 +000088 if (v == NULL || PyObject_SetAttrString(err, name, v) == -1) {
89 Py_XDECREF(v);
Fred Drakebd6101c2001-02-14 18:29:45 +000090 return 0;
91 }
Michael W. Hudson0bb84542004-08-03 11:31:31 +000092 Py_DECREF(v);
Fred Drakebd6101c2001-02-14 18:29:45 +000093 return 1;
94}
95
96/* Build and set an Expat exception, including positioning
97 * information. Always returns NULL.
98 */
Fred Drake85d835f2001-02-08 15:39:08 +000099static PyObject *
Martin v. Löwis069dde22003-01-21 10:58:18 +0000100set_error(xmlparseobject *self, enum XML_Error code)
Fred Drake85d835f2001-02-08 15:39:08 +0000101{
102 PyObject *err;
Victor Stinner499dfcf2011-03-21 13:26:24 +0100103 PyObject *buffer;
Fred Drake85d835f2001-02-08 15:39:08 +0000104 XML_Parser parser = self->itself;
Fred Drakebd6101c2001-02-14 18:29:45 +0000105 int lineno = XML_GetErrorLineNumber(parser);
106 int column = XML_GetErrorColumnNumber(parser);
Fred Drake85d835f2001-02-08 15:39:08 +0000107
Victor Stinner499dfcf2011-03-21 13:26:24 +0100108 buffer = PyUnicode_FromFormat("%s: line %i, column %i",
109 XML_ErrorString(code), lineno, column);
110 if (buffer == NULL)
111 return NULL;
112 err = PyObject_CallFunction(ErrorObject, "O", buffer);
113 Py_DECREF(buffer);
Fred Drakebd6101c2001-02-14 18:29:45 +0000114 if ( err != NULL
115 && set_error_attr(err, "code", code)
116 && set_error_attr(err, "offset", column)
117 && set_error_attr(err, "lineno", lineno)) {
118 PyErr_SetObject(ErrorObject, err);
Fred Drake85d835f2001-02-08 15:39:08 +0000119 }
Neal Norwitz2f5e9902006-03-08 06:36:45 +0000120 Py_XDECREF(err);
Fred Drake85d835f2001-02-08 15:39:08 +0000121 return NULL;
122}
123
Fred Drake71b63ff2002-06-28 22:29:01 +0000124static int
125have_handler(xmlparseobject *self, int type)
126{
127 PyObject *handler = self->handlers[type];
128 return handler != NULL;
129}
130
131static PyObject *
132get_handler_name(struct HandlerInfo *hinfo)
133{
134 PyObject *name = hinfo->nameobj;
135 if (name == NULL) {
Neal Norwitz392c5be2007-08-25 17:20:32 +0000136 name = PyUnicode_FromString(hinfo->name);
Fred Drake71b63ff2002-06-28 22:29:01 +0000137 hinfo->nameobj = name;
138 }
139 Py_XINCREF(name);
140 return name;
141}
142
Fred Drake85d835f2001-02-08 15:39:08 +0000143
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000144/* Convert a string of XML_Chars into a Unicode string.
145 Returns None if str is a null pointer. */
146
Fred Drake0582df92000-07-12 04:49:00 +0000147static PyObject *
Fred Drakeb91a36b2002-06-27 19:40:48 +0000148conv_string_to_unicode(const XML_Char *str)
Fred Drake0582df92000-07-12 04:49:00 +0000149{
Fred Drake71b63ff2002-06-28 22:29:01 +0000150 /* XXX currently this code assumes that XML_Char is 8-bit,
Fred Drake0582df92000-07-12 04:49:00 +0000151 and hence in UTF-8. */
152 /* UTF-8 from Expat, Unicode desired */
153 if (str == NULL) {
154 Py_INCREF(Py_None);
155 return Py_None;
156 }
Fred Drake71b63ff2002-06-28 22:29:01 +0000157 return PyUnicode_DecodeUTF8(str, strlen(str), "strict");
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000158}
159
Fred Drake0582df92000-07-12 04:49:00 +0000160static PyObject *
161conv_string_len_to_unicode(const XML_Char *str, int len)
162{
Fred Drake71b63ff2002-06-28 22:29:01 +0000163 /* XXX currently this code assumes that XML_Char is 8-bit,
Fred Drake0582df92000-07-12 04:49:00 +0000164 and hence in UTF-8. */
165 /* UTF-8 from Expat, Unicode desired */
166 if (str == NULL) {
167 Py_INCREF(Py_None);
168 return Py_None;
169 }
Fred Drake6f987622000-08-25 18:03:30 +0000170 return PyUnicode_DecodeUTF8((const char *)str, len, "strict");
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000171}
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000172
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000173/* Callback routines */
174
Martin v. Löwis5b68ce32001-10-21 08:53:52 +0000175static void clear_handlers(xmlparseobject *self, int initial);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000176
Martin v. Löwis069dde22003-01-21 10:58:18 +0000177/* This handler is used when an error has been detected, in the hope
178 that actual parsing can be terminated early. This will only help
179 if an external entity reference is encountered. */
180static int
181error_external_entity_ref_handler(XML_Parser parser,
182 const XML_Char *context,
183 const XML_Char *base,
184 const XML_Char *systemId,
185 const XML_Char *publicId)
186{
187 return 0;
188}
189
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000190/* Dummy character data handler used when an error (exception) has
191 been detected, and the actual parsing can be terminated early.
192 This is needed since character data handler can't be safely removed
193 from within the character data handler, but can be replaced. It is
194 used only from the character data handler trampoline, and must be
195 used right after `flag_error()` is called. */
196static void
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000197noop_character_data_handler(void *userData, const XML_Char *data, int len)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000198{
199 /* Do nothing. */
200}
201
Fred Drake6f987622000-08-25 18:03:30 +0000202static void
203flag_error(xmlparseobject *self)
204{
Martin v. Löwis5b68ce32001-10-21 08:53:52 +0000205 clear_handlers(self, 0);
Martin v. Löwis069dde22003-01-21 10:58:18 +0000206 XML_SetExternalEntityRefHandler(self->itself,
207 error_external_entity_ref_handler);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000208}
209
210static PyCodeObject*
211getcode(enum HandlerTypes slot, char* func_name, int lineno)
212{
Fred Drakebd6101c2001-02-14 18:29:45 +0000213 if (handler_info[slot].tb_code == NULL) {
Fred Drakebd6101c2001-02-14 18:29:45 +0000214 handler_info[slot].tb_code =
Alexandre Vassalotti7b82b402009-07-21 04:30:03 +0000215 PyCode_NewEmpty(__FILE__, func_name, lineno);
Fred Drakebd6101c2001-02-14 18:29:45 +0000216 }
217 return handler_info[slot].tb_code;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000218}
219
Jeremy Hylton9263f572003-06-27 16:13:17 +0000220#ifdef FIX_TRACE
Martin v. Löwis7d6e19d2002-08-04 08:24:49 +0000221static int
222trace_frame(PyThreadState *tstate, PyFrameObject *f, int code, PyObject *val)
223{
224 int result = 0;
225 if (!tstate->use_tracing || tstate->tracing)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000226 return 0;
Martin v. Löwis7d6e19d2002-08-04 08:24:49 +0000227 if (tstate->c_profilefunc != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000228 tstate->tracing++;
229 result = tstate->c_profilefunc(tstate->c_profileobj,
230 f, code , val);
231 tstate->use_tracing = ((tstate->c_tracefunc != NULL)
232 || (tstate->c_profilefunc != NULL));
233 tstate->tracing--;
234 if (result)
235 return result;
Martin v. Löwis7d6e19d2002-08-04 08:24:49 +0000236 }
237 if (tstate->c_tracefunc != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000238 tstate->tracing++;
239 result = tstate->c_tracefunc(tstate->c_traceobj,
240 f, code , val);
241 tstate->use_tracing = ((tstate->c_tracefunc != NULL)
242 || (tstate->c_profilefunc != NULL));
243 tstate->tracing--;
244 }
Martin v. Löwis7d6e19d2002-08-04 08:24:49 +0000245 return result;
246}
Jeremy Hylton9263f572003-06-27 16:13:17 +0000247
248static int
249trace_frame_exc(PyThreadState *tstate, PyFrameObject *f)
250{
251 PyObject *type, *value, *traceback, *arg;
252 int err;
253
254 if (tstate->c_tracefunc == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000255 return 0;
Jeremy Hylton9263f572003-06-27 16:13:17 +0000256
257 PyErr_Fetch(&type, &value, &traceback);
258 if (value == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000259 value = Py_None;
260 Py_INCREF(value);
Jeremy Hylton9263f572003-06-27 16:13:17 +0000261 }
Raymond Hettinger8ae46892003-10-12 19:09:37 +0000262 arg = PyTuple_Pack(3, type, value, traceback);
Jeremy Hylton9263f572003-06-27 16:13:17 +0000263 if (arg == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000264 PyErr_Restore(type, value, traceback);
265 return 0;
Jeremy Hylton9263f572003-06-27 16:13:17 +0000266 }
267 err = trace_frame(tstate, f, PyTrace_EXCEPTION, arg);
268 Py_DECREF(arg);
269 if (err == 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000270 PyErr_Restore(type, value, traceback);
Jeremy Hylton9263f572003-06-27 16:13:17 +0000271 else {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000272 Py_XDECREF(type);
273 Py_XDECREF(value);
274 Py_XDECREF(traceback);
Jeremy Hylton9263f572003-06-27 16:13:17 +0000275 }
276 return err;
277}
Martin v. Löwis069dde22003-01-21 10:58:18 +0000278#endif
Martin v. Löwis7d6e19d2002-08-04 08:24:49 +0000279
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000280static PyObject*
Fred Drake39689c52004-08-13 03:12:57 +0000281call_with_frame(PyCodeObject *c, PyObject* func, PyObject* args,
282 xmlparseobject *self)
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000283{
Fred Drakebd6101c2001-02-14 18:29:45 +0000284 PyThreadState *tstate = PyThreadState_GET();
285 PyFrameObject *f;
Christian Heimesa6404ad2013-07-20 22:54:25 +0200286 PyObject *res, *globals;
Fred Drakebd6101c2001-02-14 18:29:45 +0000287
288 if (c == NULL)
289 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000290
Christian Heimesa6404ad2013-07-20 22:54:25 +0200291 globals = PyEval_GetGlobals();
292 if (globals == NULL) {
293 return NULL;
294 }
295
296 f = PyFrame_New(tstate, c, globals, NULL);
Fred Drakebd6101c2001-02-14 18:29:45 +0000297 if (f == NULL)
298 return NULL;
299 tstate->frame = f;
Jeremy Hylton9263f572003-06-27 16:13:17 +0000300#ifdef FIX_TRACE
301 if (trace_frame(tstate, f, PyTrace_CALL, Py_None) < 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000302 return NULL;
Martin v. Löwis7d6e19d2002-08-04 08:24:49 +0000303 }
Martin v. Löwis069dde22003-01-21 10:58:18 +0000304#endif
Fred Drakebd6101c2001-02-14 18:29:45 +0000305 res = PyEval_CallObject(func, args);
Jeremy Hylton9263f572003-06-27 16:13:17 +0000306 if (res == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000307 if (tstate->curexc_traceback == NULL)
308 PyTraceBack_Here(f);
Fred Drake39689c52004-08-13 03:12:57 +0000309 XML_StopParser(self->itself, XML_FALSE);
Jeremy Hylton9263f572003-06-27 16:13:17 +0000310#ifdef FIX_TRACE
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000311 if (trace_frame_exc(tstate, f) < 0) {
312 return NULL;
313 }
Jeremy Hylton9263f572003-06-27 16:13:17 +0000314 }
Martin v. Löwis7d6e19d2002-08-04 08:24:49 +0000315 else {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000316 if (trace_frame(tstate, f, PyTrace_RETURN, res) < 0) {
317 Py_XDECREF(res);
318 res = NULL;
319 }
Martin v. Löwis7d6e19d2002-08-04 08:24:49 +0000320 }
Jeremy Hylton9263f572003-06-27 16:13:17 +0000321#else
322 }
Martin v. Löwis069dde22003-01-21 10:58:18 +0000323#endif
Fred Drakebd6101c2001-02-14 18:29:45 +0000324 tstate->frame = f->f_back;
325 Py_DECREF(f);
326 return res;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000327}
328
Fred Drakeb91a36b2002-06-27 19:40:48 +0000329static PyObject*
330string_intern(xmlparseobject *self, const char* str)
331{
Guido van Rossum4ca94712007-07-23 17:42:32 +0000332 PyObject *result = conv_string_to_unicode(str);
Fred Drakeb91a36b2002-06-27 19:40:48 +0000333 PyObject *value;
Neal Norwitz484d9a42005-09-30 04:46:49 +0000334 /* result can be NULL if the unicode conversion failed. */
335 if (!result)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000336 return result;
Fred Drakeb91a36b2002-06-27 19:40:48 +0000337 if (!self->intern)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000338 return result;
Fred Drakeb91a36b2002-06-27 19:40:48 +0000339 value = PyDict_GetItem(self->intern, result);
340 if (!value) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000341 if (PyDict_SetItem(self->intern, result, result) == 0)
Fred Drakeb91a36b2002-06-27 19:40:48 +0000342 return result;
343 else
344 return NULL;
345 }
346 Py_INCREF(value);
347 Py_DECREF(result);
348 return value;
349}
350
Fred Drake2a3d7db2002-06-28 22:56:48 +0000351/* Return 0 on success, -1 on exception.
352 * flag_error() will be called before return if needed.
353 */
354static int
355call_character_handler(xmlparseobject *self, const XML_Char *buffer, int len)
356{
357 PyObject *args;
358 PyObject *temp;
359
Georg Brandlc01537f2010-10-15 16:26:08 +0000360 if (!have_handler(self, CharacterData))
361 return -1;
362
Fred Drake2a3d7db2002-06-28 22:56:48 +0000363 args = PyTuple_New(1);
364 if (args == NULL)
365 return -1;
Guido van Rossum4ca94712007-07-23 17:42:32 +0000366 temp = (conv_string_len_to_unicode(buffer, len));
Fred Drake2a3d7db2002-06-28 22:56:48 +0000367 if (temp == NULL) {
368 Py_DECREF(args);
369 flag_error(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000370 XML_SetCharacterDataHandler(self->itself,
371 noop_character_data_handler);
Fred Drake2a3d7db2002-06-28 22:56:48 +0000372 return -1;
373 }
374 PyTuple_SET_ITEM(args, 0, temp);
375 /* temp is now a borrowed reference; consider it unused. */
376 self->in_callback = 1;
377 temp = call_with_frame(getcode(CharacterData, "CharacterData", __LINE__),
Fred Drake39689c52004-08-13 03:12:57 +0000378 self->handlers[CharacterData], args, self);
Fred Drake2a3d7db2002-06-28 22:56:48 +0000379 /* temp is an owned reference again, or NULL */
380 self->in_callback = 0;
381 Py_DECREF(args);
382 if (temp == NULL) {
383 flag_error(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000384 XML_SetCharacterDataHandler(self->itself,
385 noop_character_data_handler);
Fred Drake2a3d7db2002-06-28 22:56:48 +0000386 return -1;
387 }
388 Py_DECREF(temp);
389 return 0;
390}
391
392static int
393flush_character_buffer(xmlparseobject *self)
394{
395 int rc;
396 if (self->buffer == NULL || self->buffer_used == 0)
397 return 0;
398 rc = call_character_handler(self, self->buffer, self->buffer_used);
399 self->buffer_used = 0;
400 return rc;
401}
402
403static void
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000404my_CharacterDataHandler(void *userData, const XML_Char *data, int len)
Fred Drake2a3d7db2002-06-28 22:56:48 +0000405{
406 xmlparseobject *self = (xmlparseobject *) userData;
407 if (self->buffer == NULL)
408 call_character_handler(self, data, len);
409 else {
410 if ((self->buffer_used + len) > self->buffer_size) {
411 if (flush_character_buffer(self) < 0)
412 return;
413 /* handler might have changed; drop the rest on the floor
414 * if there isn't a handler anymore
415 */
416 if (!have_handler(self, CharacterData))
417 return;
418 }
419 if (len > self->buffer_size) {
420 call_character_handler(self, data, len);
421 self->buffer_used = 0;
422 }
423 else {
424 memcpy(self->buffer + self->buffer_used,
425 data, len * sizeof(XML_Char));
426 self->buffer_used += len;
427 }
428 }
429}
430
Fred Drake85d835f2001-02-08 15:39:08 +0000431static void
432my_StartElementHandler(void *userData,
Fred Drake71b63ff2002-06-28 22:29:01 +0000433 const XML_Char *name, const XML_Char *atts[])
Fred Drake85d835f2001-02-08 15:39:08 +0000434{
435 xmlparseobject *self = (xmlparseobject *)userData;
436
Fred Drake71b63ff2002-06-28 22:29:01 +0000437 if (have_handler(self, StartElement)) {
Fred Drake85d835f2001-02-08 15:39:08 +0000438 PyObject *container, *rv, *args;
439 int i, max;
440
Fred Drake2a3d7db2002-06-28 22:56:48 +0000441 if (flush_character_buffer(self) < 0)
442 return;
Fred Drake85d835f2001-02-08 15:39:08 +0000443 /* Set max to the number of slots filled in atts[]; max/2 is
444 * the number of attributes we need to process.
445 */
446 if (self->specified_attributes) {
447 max = XML_GetSpecifiedAttributeCount(self->itself);
448 }
449 else {
450 max = 0;
451 while (atts[max] != NULL)
452 max += 2;
453 }
454 /* Build the container. */
455 if (self->ordered_attributes)
456 container = PyList_New(max);
457 else
458 container = PyDict_New();
459 if (container == NULL) {
460 flag_error(self);
461 return;
462 }
463 for (i = 0; i < max; i += 2) {
Fred Drakeb91a36b2002-06-27 19:40:48 +0000464 PyObject *n = string_intern(self, (XML_Char *) atts[i]);
Fred Drake85d835f2001-02-08 15:39:08 +0000465 PyObject *v;
466 if (n == NULL) {
467 flag_error(self);
468 Py_DECREF(container);
469 return;
470 }
Guido van Rossum4ca94712007-07-23 17:42:32 +0000471 v = conv_string_to_unicode((XML_Char *) atts[i+1]);
Fred Drake85d835f2001-02-08 15:39:08 +0000472 if (v == NULL) {
473 flag_error(self);
474 Py_DECREF(container);
475 Py_DECREF(n);
476 return;
477 }
478 if (self->ordered_attributes) {
479 PyList_SET_ITEM(container, i, n);
480 PyList_SET_ITEM(container, i+1, v);
481 }
482 else if (PyDict_SetItem(container, n, v)) {
483 flag_error(self);
484 Py_DECREF(n);
485 Py_DECREF(v);
486 return;
487 }
488 else {
489 Py_DECREF(n);
490 Py_DECREF(v);
491 }
492 }
Neal Norwitz484d9a42005-09-30 04:46:49 +0000493 args = string_intern(self, name);
494 if (args != NULL)
495 args = Py_BuildValue("(NN)", args, container);
Fred Drake85d835f2001-02-08 15:39:08 +0000496 if (args == NULL) {
497 Py_DECREF(container);
498 return;
499 }
500 /* Container is now a borrowed reference; ignore it. */
Fred Drakebd6101c2001-02-14 18:29:45 +0000501 self->in_callback = 1;
502 rv = call_with_frame(getcode(StartElement, "StartElement", __LINE__),
Fred Drake39689c52004-08-13 03:12:57 +0000503 self->handlers[StartElement], args, self);
Fred Drakebd6101c2001-02-14 18:29:45 +0000504 self->in_callback = 0;
505 Py_DECREF(args);
Fred Drake85d835f2001-02-08 15:39:08 +0000506 if (rv == NULL) {
507 flag_error(self);
508 return;
Fred Drakebd6101c2001-02-14 18:29:45 +0000509 }
Fred Drake85d835f2001-02-08 15:39:08 +0000510 Py_DECREF(rv);
511 }
512}
513
514#define RC_HANDLER(RC, NAME, PARAMS, INIT, PARAM_FORMAT, CONVERSION, \
515 RETURN, GETUSERDATA) \
516static RC \
517my_##NAME##Handler PARAMS {\
518 xmlparseobject *self = GETUSERDATA ; \
519 PyObject *args = NULL; \
520 PyObject *rv = NULL; \
521 INIT \
522\
Fred Drake71b63ff2002-06-28 22:29:01 +0000523 if (have_handler(self, NAME)) { \
Fred Drake2a3d7db2002-06-28 22:56:48 +0000524 if (flush_character_buffer(self) < 0) \
525 return RETURN; \
Fred Drake85d835f2001-02-08 15:39:08 +0000526 args = Py_BuildValue PARAM_FORMAT ;\
Martin v. Löwis1d7c55f2001-11-10 13:57:55 +0000527 if (!args) { flag_error(self); return RETURN;} \
Fred Drakebd6101c2001-02-14 18:29:45 +0000528 self->in_callback = 1; \
Fred Drake85d835f2001-02-08 15:39:08 +0000529 rv = call_with_frame(getcode(NAME,#NAME,__LINE__), \
Fred Drake39689c52004-08-13 03:12:57 +0000530 self->handlers[NAME], args, self); \
Fred Drakebd6101c2001-02-14 18:29:45 +0000531 self->in_callback = 0; \
Fred Drake85d835f2001-02-08 15:39:08 +0000532 Py_DECREF(args); \
533 if (rv == NULL) { \
534 flag_error(self); \
535 return RETURN; \
536 } \
537 CONVERSION \
538 Py_DECREF(rv); \
539 } \
540 return RETURN; \
541}
542
Fred Drake6f987622000-08-25 18:03:30 +0000543#define VOID_HANDLER(NAME, PARAMS, PARAM_FORMAT) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000544 RC_HANDLER(void, NAME, PARAMS, ;, PARAM_FORMAT, ;, ;,\
545 (xmlparseobject *)userData)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000546
Fred Drake6f987622000-08-25 18:03:30 +0000547#define INT_HANDLER(NAME, PARAMS, PARAM_FORMAT)\
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000548 RC_HANDLER(int, NAME, PARAMS, int rc=0;, PARAM_FORMAT, \
549 rc = PyLong_AsLong(rv);, rc, \
550 (xmlparseobject *)userData)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000551
Fred Drake71b63ff2002-06-28 22:29:01 +0000552VOID_HANDLER(EndElement,
553 (void *userData, const XML_Char *name),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000554 ("(N)", string_intern(self, name)))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000555
Fred Drake6f987622000-08-25 18:03:30 +0000556VOID_HANDLER(ProcessingInstruction,
Fred Drake71b63ff2002-06-28 22:29:01 +0000557 (void *userData,
558 const XML_Char *target,
Fred Drake85d835f2001-02-08 15:39:08 +0000559 const XML_Char *data),
Guido van Rossum4ca94712007-07-23 17:42:32 +0000560 ("(NO&)", string_intern(self, target), conv_string_to_unicode ,data))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000561
Fred Drake6f987622000-08-25 18:03:30 +0000562VOID_HANDLER(UnparsedEntityDecl,
Fred Drake71b63ff2002-06-28 22:29:01 +0000563 (void *userData,
Fred Drake85d835f2001-02-08 15:39:08 +0000564 const XML_Char *entityName,
565 const XML_Char *base,
566 const XML_Char *systemId,
567 const XML_Char *publicId,
568 const XML_Char *notationName),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000569 ("(NNNNN)",
Fred Drake71b63ff2002-06-28 22:29:01 +0000570 string_intern(self, entityName), string_intern(self, base),
571 string_intern(self, systemId), string_intern(self, publicId),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000572 string_intern(self, notationName)))
Fred Drake85d835f2001-02-08 15:39:08 +0000573
Fred Drake85d835f2001-02-08 15:39:08 +0000574VOID_HANDLER(EntityDecl,
575 (void *userData,
576 const XML_Char *entityName,
577 int is_parameter_entity,
578 const XML_Char *value,
579 int value_length,
580 const XML_Char *base,
581 const XML_Char *systemId,
582 const XML_Char *publicId,
583 const XML_Char *notationName),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000584 ("NiNNNNN",
585 string_intern(self, entityName), is_parameter_entity,
Guido van Rossum4ca94712007-07-23 17:42:32 +0000586 (conv_string_len_to_unicode(value, value_length)),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000587 string_intern(self, base), string_intern(self, systemId),
588 string_intern(self, publicId),
589 string_intern(self, notationName)))
Fred Drake85d835f2001-02-08 15:39:08 +0000590
591VOID_HANDLER(XmlDecl,
592 (void *userData,
593 const XML_Char *version,
594 const XML_Char *encoding,
595 int standalone),
596 ("(O&O&i)",
Guido van Rossum4ca94712007-07-23 17:42:32 +0000597 conv_string_to_unicode ,version, conv_string_to_unicode ,encoding,
Fred Drake85d835f2001-02-08 15:39:08 +0000598 standalone))
599
600static PyObject *
601conv_content_model(XML_Content * const model,
Fred Drakeb91a36b2002-06-27 19:40:48 +0000602 PyObject *(*conv_string)(const XML_Char *))
Fred Drake85d835f2001-02-08 15:39:08 +0000603{
604 PyObject *result = NULL;
605 PyObject *children = PyTuple_New(model->numchildren);
606 int i;
607
608 if (children != NULL) {
Tim Peters9544fc52001-07-28 09:36:36 +0000609 assert(model->numchildren < INT_MAX);
610 for (i = 0; i < (int)model->numchildren; ++i) {
Fred Drake85d835f2001-02-08 15:39:08 +0000611 PyObject *child = conv_content_model(&model->children[i],
612 conv_string);
613 if (child == NULL) {
614 Py_XDECREF(children);
615 return NULL;
616 }
617 PyTuple_SET_ITEM(children, i, child);
618 }
619 result = Py_BuildValue("(iiO&N)",
620 model->type, model->quant,
621 conv_string,model->name, children);
622 }
623 return result;
624}
625
Fred Drake06dd8cf2003-02-02 03:54:17 +0000626static void
627my_ElementDeclHandler(void *userData,
628 const XML_Char *name,
629 XML_Content *model)
Fred Drake85d835f2001-02-08 15:39:08 +0000630{
Fred Drake06dd8cf2003-02-02 03:54:17 +0000631 xmlparseobject *self = (xmlparseobject *)userData;
632 PyObject *args = NULL;
Fred Drake85d835f2001-02-08 15:39:08 +0000633
Fred Drake06dd8cf2003-02-02 03:54:17 +0000634 if (have_handler(self, ElementDecl)) {
635 PyObject *rv = NULL;
636 PyObject *modelobj, *nameobj;
637
638 if (flush_character_buffer(self) < 0)
639 goto finally;
Guido van Rossum4ca94712007-07-23 17:42:32 +0000640 modelobj = conv_content_model(model, (conv_string_to_unicode));
Fred Drake06dd8cf2003-02-02 03:54:17 +0000641 if (modelobj == NULL) {
642 flag_error(self);
643 goto finally;
644 }
645 nameobj = string_intern(self, name);
646 if (nameobj == NULL) {
647 Py_DECREF(modelobj);
648 flag_error(self);
649 goto finally;
650 }
Michael W. Hudson0bb84542004-08-03 11:31:31 +0000651 args = Py_BuildValue("NN", nameobj, modelobj);
Fred Drake06dd8cf2003-02-02 03:54:17 +0000652 if (args == NULL) {
653 Py_DECREF(modelobj);
654 flag_error(self);
655 goto finally;
656 }
657 self->in_callback = 1;
658 rv = call_with_frame(getcode(ElementDecl, "ElementDecl", __LINE__),
Fred Drake39689c52004-08-13 03:12:57 +0000659 self->handlers[ElementDecl], args, self);
Fred Drake06dd8cf2003-02-02 03:54:17 +0000660 self->in_callback = 0;
661 if (rv == NULL) {
662 flag_error(self);
663 goto finally;
664 }
665 Py_DECREF(rv);
666 }
667 finally:
668 Py_XDECREF(args);
669 XML_FreeContentModel(self->itself, model);
670 return;
671}
Fred Drake85d835f2001-02-08 15:39:08 +0000672
673VOID_HANDLER(AttlistDecl,
674 (void *userData,
675 const XML_Char *elname,
676 const XML_Char *attname,
677 const XML_Char *att_type,
678 const XML_Char *dflt,
679 int isrequired),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000680 ("(NNO&O&i)",
681 string_intern(self, elname), string_intern(self, attname),
Guido van Rossum4ca94712007-07-23 17:42:32 +0000682 conv_string_to_unicode ,att_type, conv_string_to_unicode ,dflt,
Fred Drake85d835f2001-02-08 15:39:08 +0000683 isrequired))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000684
Martin v. Löwisc847f402003-01-21 11:09:21 +0000685#if XML_COMBINED_VERSION >= 19504
Martin v. Löwis069dde22003-01-21 10:58:18 +0000686VOID_HANDLER(SkippedEntity,
687 (void *userData,
688 const XML_Char *entityName,
689 int is_parameter_entity),
690 ("Ni",
691 string_intern(self, entityName), is_parameter_entity))
Martin v. Löwisc847f402003-01-21 11:09:21 +0000692#endif
Martin v. Löwis069dde22003-01-21 10:58:18 +0000693
Fred Drake71b63ff2002-06-28 22:29:01 +0000694VOID_HANDLER(NotationDecl,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000695 (void *userData,
696 const XML_Char *notationName,
697 const XML_Char *base,
698 const XML_Char *systemId,
699 const XML_Char *publicId),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000700 ("(NNNN)",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000701 string_intern(self, notationName), string_intern(self, base),
702 string_intern(self, systemId), string_intern(self, publicId)))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000703
Fred Drake6f987622000-08-25 18:03:30 +0000704VOID_HANDLER(StartNamespaceDecl,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000705 (void *userData,
706 const XML_Char *prefix,
707 const XML_Char *uri),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000708 ("(NN)",
709 string_intern(self, prefix), string_intern(self, uri)))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000710
Fred Drake6f987622000-08-25 18:03:30 +0000711VOID_HANDLER(EndNamespaceDecl,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000712 (void *userData,
713 const XML_Char *prefix),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000714 ("(N)", string_intern(self, prefix)))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000715
Fred Drake6f987622000-08-25 18:03:30 +0000716VOID_HANDLER(Comment,
Fred Drakeb91a36b2002-06-27 19:40:48 +0000717 (void *userData, const XML_Char *data),
Guido van Rossum4ca94712007-07-23 17:42:32 +0000718 ("(O&)", conv_string_to_unicode ,data))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000719
Fred Drake6f987622000-08-25 18:03:30 +0000720VOID_HANDLER(StartCdataSection,
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000721 (void *userData),
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000722 ("()"))
Fred Drake71b63ff2002-06-28 22:29:01 +0000723
Fred Drake6f987622000-08-25 18:03:30 +0000724VOID_HANDLER(EndCdataSection,
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000725 (void *userData),
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000726 ("()"))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000727
Fred Drake6f987622000-08-25 18:03:30 +0000728VOID_HANDLER(Default,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000729 (void *userData, const XML_Char *s, int len),
730 ("(N)", (conv_string_len_to_unicode(s,len))))
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000731
Fred Drake6f987622000-08-25 18:03:30 +0000732VOID_HANDLER(DefaultHandlerExpand,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000733 (void *userData, const XML_Char *s, int len),
734 ("(N)", (conv_string_len_to_unicode(s,len))))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000735
Fred Drake71b63ff2002-06-28 22:29:01 +0000736INT_HANDLER(NotStandalone,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000737 (void *userData),
738 ("()"))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000739
Fred Drake6f987622000-08-25 18:03:30 +0000740RC_HANDLER(int, ExternalEntityRef,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000741 (XML_Parser parser,
742 const XML_Char *context,
743 const XML_Char *base,
744 const XML_Char *systemId,
745 const XML_Char *publicId),
746 int rc=0;,
Fred Drakeb91a36b2002-06-27 19:40:48 +0000747 ("(O&NNN)",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000748 conv_string_to_unicode ,context, string_intern(self, base),
749 string_intern(self, systemId), string_intern(self, publicId)),
750 rc = PyLong_AsLong(rv);, rc,
751 XML_GetUserData(parser))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000752
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000753/* XXX UnknownEncodingHandler */
754
Fred Drake85d835f2001-02-08 15:39:08 +0000755VOID_HANDLER(StartDoctypeDecl,
756 (void *userData, const XML_Char *doctypeName,
757 const XML_Char *sysid, const XML_Char *pubid,
758 int has_internal_subset),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000759 ("(NNNi)", string_intern(self, doctypeName),
760 string_intern(self, sysid), string_intern(self, pubid),
Fred Drake85d835f2001-02-08 15:39:08 +0000761 has_internal_subset))
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000762
763VOID_HANDLER(EndDoctypeDecl, (void *userData), ("()"))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000764
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000765/* ---------------------------------------------------------------- */
766
Fred Drake71b63ff2002-06-28 22:29:01 +0000767static PyObject *
768get_parse_result(xmlparseobject *self, int rv)
769{
770 if (PyErr_Occurred()) {
771 return NULL;
772 }
773 if (rv == 0) {
Martin v. Löwis069dde22003-01-21 10:58:18 +0000774 return set_error(self, XML_GetErrorCode(self->itself));
Fred Drake71b63ff2002-06-28 22:29:01 +0000775 }
Fred Drake2a3d7db2002-06-28 22:56:48 +0000776 if (flush_character_buffer(self) < 0) {
777 return NULL;
778 }
Christian Heimes217cfd12007-12-02 14:31:20 +0000779 return PyLong_FromLong(rv);
Fred Drake71b63ff2002-06-28 22:29:01 +0000780}
781
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000782PyDoc_STRVAR(xmlparse_Parse__doc__,
Thomas Wouters35317302000-07-22 16:34:15 +0000783"Parse(data[, isfinal])\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000784Parse XML data. `isfinal' should be true at end of input.");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000785
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200786#define MAX_CHUNK_SIZE (1 << 20)
787
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000788static PyObject *
Fred Drake0582df92000-07-12 04:49:00 +0000789xmlparse_Parse(xmlparseobject *self, PyObject *args)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000790{
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200791 PyObject *data;
Fred Drake0582df92000-07-12 04:49:00 +0000792 int isFinal = 0;
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200793 const char *s;
794 Py_ssize_t slen;
795 Py_buffer view;
796 int rc;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000797
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200798 if (!PyArg_ParseTuple(args, "O|i:Parse", &data, &isFinal))
Fred Drake0582df92000-07-12 04:49:00 +0000799 return NULL;
Fred Drake71b63ff2002-06-28 22:29:01 +0000800
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200801 if (PyUnicode_Check(data)) {
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200802 view.buf = NULL;
Serhiy Storchaka36b365c2013-02-04 18:28:01 +0200803 s = PyUnicode_AsUTF8AndSize(data, &slen);
804 if (s == NULL)
805 return NULL;
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200806 /* Explicitly set UTF-8 encoding. Return code ignored. */
807 (void)XML_SetEncoding(self->itself, "utf-8");
808 }
809 else {
810 if (PyObject_GetBuffer(data, &view, PyBUF_SIMPLE) < 0)
811 return NULL;
812 s = view.buf;
813 slen = view.len;
814 }
815
816 while (slen > MAX_CHUNK_SIZE) {
817 rc = XML_Parse(self->itself, s, MAX_CHUNK_SIZE, 0);
818 if (!rc)
819 goto done;
820 s += MAX_CHUNK_SIZE;
821 slen -= MAX_CHUNK_SIZE;
822 }
823 rc = XML_Parse(self->itself, s, slen, isFinal);
824
825done:
826 if (view.buf != NULL)
827 PyBuffer_Release(&view);
828 return get_parse_result(self, rc);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000829}
830
Fred Drakeca1f4262000-09-21 20:10:23 +0000831/* File reading copied from cPickle */
832
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000833#define BUF_SIZE 2048
834
Fred Drake0582df92000-07-12 04:49:00 +0000835static int
836readinst(char *buf, int buf_size, PyObject *meth)
837{
Victor Stinner95f1dfc2011-01-10 23:00:36 +0000838 PyObject *str;
839 Py_ssize_t len;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000840 char *ptr;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000841
Victor Stinner95f1dfc2011-01-10 23:00:36 +0000842 str = PyObject_CallFunction(meth, "n", buf_size);
Martin v. Löwis9171f022004-10-13 19:50:11 +0000843 if (str == NULL)
Victor Stinner95f1dfc2011-01-10 23:00:36 +0000844 goto error;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000845
Christian Heimes72b710a2008-05-26 13:28:38 +0000846 if (PyBytes_Check(str))
847 ptr = PyBytes_AS_STRING(str);
Christian Heimes9c4756e2008-05-26 13:22:05 +0000848 else if (PyByteArray_Check(str))
849 ptr = PyByteArray_AS_STRING(str);
Guido van Rossum98297ee2007-11-06 21:34:58 +0000850 else {
Fred Drake71b63ff2002-06-28 22:29:01 +0000851 PyErr_Format(PyExc_TypeError,
Guido van Rossum4ca94712007-07-23 17:42:32 +0000852 "read() did not return a bytes object (type=%.400s)",
Christian Heimes90aa7642007-12-19 02:45:37 +0000853 Py_TYPE(str)->tp_name);
Victor Stinner95f1dfc2011-01-10 23:00:36 +0000854 goto error;
Fred Drake0582df92000-07-12 04:49:00 +0000855 }
Christian Heimes90aa7642007-12-19 02:45:37 +0000856 len = Py_SIZE(str);
Fred Drake0582df92000-07-12 04:49:00 +0000857 if (len > buf_size) {
858 PyErr_Format(PyExc_ValueError,
859 "read() returned too much data: "
Victor Stinner9d6f9362011-01-04 22:00:04 +0000860 "%i bytes requested, %zd returned",
Fred Drake0582df92000-07-12 04:49:00 +0000861 buf_size, len);
Victor Stinner95f1dfc2011-01-10 23:00:36 +0000862 goto error;
Fred Drake0582df92000-07-12 04:49:00 +0000863 }
Guido van Rossum98297ee2007-11-06 21:34:58 +0000864 memcpy(buf, ptr, len);
Victor Stinner95f1dfc2011-01-10 23:00:36 +0000865 Py_DECREF(str);
866 /* len <= buf_size <= INT_MAX */
Victor Stinner0fcab4a2011-01-04 12:59:15 +0000867 return (int)len;
Victor Stinner95f1dfc2011-01-10 23:00:36 +0000868
869error:
870 Py_XDECREF(str);
871 return -1;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000872}
873
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000874PyDoc_STRVAR(xmlparse_ParseFile__doc__,
Thomas Wouters35317302000-07-22 16:34:15 +0000875"ParseFile(file)\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000876Parse XML data from file-like object.");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000877
878static PyObject *
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000879xmlparse_ParseFile(xmlparseobject *self, PyObject *f)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000880{
Fred Drake0582df92000-07-12 04:49:00 +0000881 int rv = 1;
Fred Drake0582df92000-07-12 04:49:00 +0000882 PyObject *readmethod = NULL;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +0200883 _Py_IDENTIFIER(read);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000884
Martin v. Löwis1ee1b6f2011-10-10 18:11:30 +0200885 readmethod = _PyObject_GetAttrId(f, &PyId_read);
Benjamin Peterson4e7f2852010-08-08 16:54:58 +0000886 if (readmethod == NULL) {
Benjamin Peterson4e7f2852010-08-08 16:54:58 +0000887 PyErr_SetString(PyExc_TypeError,
888 "argument must have 'read' attribute");
889 return NULL;
Fred Drake0582df92000-07-12 04:49:00 +0000890 }
891 for (;;) {
892 int bytes_read;
893 void *buf = XML_GetBuffer(self->itself, BUF_SIZE);
Fred Drake7b6caff2003-07-21 17:05:56 +0000894 if (buf == NULL) {
Fred Drakef239c6d2003-07-21 17:22:43 +0000895 Py_XDECREF(readmethod);
Fred Drake0582df92000-07-12 04:49:00 +0000896 return PyErr_NoMemory();
Fred Drake7b6caff2003-07-21 17:05:56 +0000897 }
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000898
Benjamin Peterson4e7f2852010-08-08 16:54:58 +0000899 bytes_read = readinst(buf, BUF_SIZE, readmethod);
900 if (bytes_read < 0) {
901 Py_DECREF(readmethod);
902 return NULL;
Fred Drake0582df92000-07-12 04:49:00 +0000903 }
904 rv = XML_ParseBuffer(self->itself, bytes_read, bytes_read == 0);
Fred Drake7b6caff2003-07-21 17:05:56 +0000905 if (PyErr_Occurred()) {
906 Py_XDECREF(readmethod);
Fred Drake0582df92000-07-12 04:49:00 +0000907 return NULL;
Fred Drake7b6caff2003-07-21 17:05:56 +0000908 }
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000909
Fred Drake0582df92000-07-12 04:49:00 +0000910 if (!rv || bytes_read == 0)
911 break;
912 }
Fred Drake7b6caff2003-07-21 17:05:56 +0000913 Py_XDECREF(readmethod);
Fred Drake71b63ff2002-06-28 22:29:01 +0000914 return get_parse_result(self, rv);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000915}
916
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000917PyDoc_STRVAR(xmlparse_SetBase__doc__,
Thomas Wouters35317302000-07-22 16:34:15 +0000918"SetBase(base_url)\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000919Set the base URL for the parser.");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000920
921static PyObject *
Fred Drake0582df92000-07-12 04:49:00 +0000922xmlparse_SetBase(xmlparseobject *self, PyObject *args)
923{
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000924 char *base;
925
Fred Drake0582df92000-07-12 04:49:00 +0000926 if (!PyArg_ParseTuple(args, "s:SetBase", &base))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000927 return NULL;
Fred Drake0582df92000-07-12 04:49:00 +0000928 if (!XML_SetBase(self->itself, base)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000929 return PyErr_NoMemory();
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000930 }
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000931 Py_INCREF(Py_None);
932 return Py_None;
933}
934
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000935PyDoc_STRVAR(xmlparse_GetBase__doc__,
Thomas Wouters35317302000-07-22 16:34:15 +0000936"GetBase() -> url\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000937Return base URL string for the parser.");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000938
939static PyObject *
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000940xmlparse_GetBase(xmlparseobject *self, PyObject *unused)
Fred Drake0582df92000-07-12 04:49:00 +0000941{
Fred Drake0582df92000-07-12 04:49:00 +0000942 return Py_BuildValue("z", XML_GetBase(self->itself));
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000943}
944
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000945PyDoc_STRVAR(xmlparse_GetInputContext__doc__,
Fred Drakebd6101c2001-02-14 18:29:45 +0000946"GetInputContext() -> string\n\
947Return the untranslated text of the input that caused the current event.\n\
948If the event was generated by a large amount of text (such as a start tag\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000949for an element with many attributes), not all of the text may be available.");
Fred Drakebd6101c2001-02-14 18:29:45 +0000950
951static PyObject *
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000952xmlparse_GetInputContext(xmlparseobject *self, PyObject *unused)
Fred Drakebd6101c2001-02-14 18:29:45 +0000953{
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000954 if (self->in_callback) {
955 int offset, size;
956 const char *buffer
957 = XML_GetInputContext(self->itself, &offset, &size);
Fred Drakebd6101c2001-02-14 18:29:45 +0000958
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000959 if (buffer != NULL)
Christian Heimes72b710a2008-05-26 13:28:38 +0000960 return PyBytes_FromStringAndSize(buffer + offset,
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000961 size - offset);
962 else
963 Py_RETURN_NONE;
Fred Drakebd6101c2001-02-14 18:29:45 +0000964 }
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000965 else
966 Py_RETURN_NONE;
Fred Drakebd6101c2001-02-14 18:29:45 +0000967}
Fred Drakebd6101c2001-02-14 18:29:45 +0000968
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000969PyDoc_STRVAR(xmlparse_ExternalEntityParserCreate__doc__,
Fred Drake2d4ac202001-01-03 15:36:25 +0000970"ExternalEntityParserCreate(context[, encoding])\n\
Tim Peters51dc9682000-09-24 22:12:45 +0000971Create a parser for parsing an external entity based on the\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000972information passed to the ExternalEntityRefHandler.");
Lars Gustäbel4a30a072000-09-24 20:50:52 +0000973
974static PyObject *
975xmlparse_ExternalEntityParserCreate(xmlparseobject *self, PyObject *args)
976{
977 char *context;
978 char *encoding = NULL;
979 xmlparseobject *new_parser;
980 int i;
981
Martin v. Löwisc57428d2001-09-19 09:55:09 +0000982 if (!PyArg_ParseTuple(args, "z|s:ExternalEntityParserCreate",
Fred Drakecde79132001-04-25 16:01:30 +0000983 &context, &encoding)) {
984 return NULL;
Lars Gustäbel4a30a072000-09-24 20:50:52 +0000985 }
986
Martin v. Löwis894258c2001-09-23 10:20:10 +0000987 new_parser = PyObject_GC_New(xmlparseobject, &Xmlparsetype);
Fred Drake85d835f2001-02-08 15:39:08 +0000988 if (new_parser == NULL)
989 return NULL;
Fred Drake2a3d7db2002-06-28 22:56:48 +0000990 new_parser->buffer_size = self->buffer_size;
991 new_parser->buffer_used = 0;
Victor Stinnerb4ba9862010-09-10 22:25:19 +0000992 new_parser->buffer = NULL;
Fred Drake85d835f2001-02-08 15:39:08 +0000993 new_parser->ordered_attributes = self->ordered_attributes;
994 new_parser->specified_attributes = self->specified_attributes;
Fred Drakebd6101c2001-02-14 18:29:45 +0000995 new_parser->in_callback = 0;
Martin v. Löwis069dde22003-01-21 10:58:18 +0000996 new_parser->ns_prefixes = self->ns_prefixes;
Lars Gustäbel4a30a072000-09-24 20:50:52 +0000997 new_parser->itself = XML_ExternalEntityParserCreate(self->itself, context,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000998 encoding);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000999 new_parser->handlers = 0;
Fred Drakeb91a36b2002-06-27 19:40:48 +00001000 new_parser->intern = self->intern;
1001 Py_XINCREF(new_parser->intern);
Martin v. Löwis894258c2001-09-23 10:20:10 +00001002 PyObject_GC_Track(new_parser);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001003
Victor Stinnerb4ba9862010-09-10 22:25:19 +00001004 if (self->buffer != NULL) {
1005 new_parser->buffer = malloc(new_parser->buffer_size);
1006 if (new_parser->buffer == NULL) {
1007 Py_DECREF(new_parser);
1008 return PyErr_NoMemory();
1009 }
1010 }
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001011 if (!new_parser->itself) {
Fred Drake85d835f2001-02-08 15:39:08 +00001012 Py_DECREF(new_parser);
1013 return PyErr_NoMemory();
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001014 }
1015
1016 XML_SetUserData(new_parser->itself, (void *)new_parser);
1017
1018 /* allocate and clear handlers first */
Fred Drake2a3d7db2002-06-28 22:56:48 +00001019 for (i = 0; handler_info[i].name != NULL; i++)
Fred Drake85d835f2001-02-08 15:39:08 +00001020 /* do nothing */;
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001021
Fred Drake2a3d7db2002-06-28 22:56:48 +00001022 new_parser->handlers = malloc(sizeof(PyObject *) * i);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001023 if (!new_parser->handlers) {
Fred Drake85d835f2001-02-08 15:39:08 +00001024 Py_DECREF(new_parser);
1025 return PyErr_NoMemory();
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001026 }
Martin v. Löwis5b68ce32001-10-21 08:53:52 +00001027 clear_handlers(new_parser, 1);
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001028
1029 /* then copy handlers from self */
1030 for (i = 0; handler_info[i].name != NULL; i++) {
Fred Drake71b63ff2002-06-28 22:29:01 +00001031 PyObject *handler = self->handlers[i];
1032 if (handler != NULL) {
1033 Py_INCREF(handler);
1034 new_parser->handlers[i] = handler;
1035 handler_info[i].setter(new_parser->itself,
Fred Drake85d835f2001-02-08 15:39:08 +00001036 handler_info[i].handler);
1037 }
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001038 }
Fred Drake71b63ff2002-06-28 22:29:01 +00001039 return (PyObject *)new_parser;
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001040}
1041
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001042PyDoc_STRVAR(xmlparse_SetParamEntityParsing__doc__,
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001043"SetParamEntityParsing(flag) -> success\n\
1044Controls parsing of parameter entities (including the external DTD\n\
1045subset). Possible flag values are XML_PARAM_ENTITY_PARSING_NEVER,\n\
1046XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE and\n\
1047XML_PARAM_ENTITY_PARSING_ALWAYS. Returns true if setting the flag\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001048was successful.");
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001049
1050static PyObject*
Fred Drakebd6101c2001-02-14 18:29:45 +00001051xmlparse_SetParamEntityParsing(xmlparseobject *p, PyObject* args)
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001052{
Fred Drake85d835f2001-02-08 15:39:08 +00001053 int flag;
1054 if (!PyArg_ParseTuple(args, "i", &flag))
1055 return NULL;
Fred Drakebd6101c2001-02-14 18:29:45 +00001056 flag = XML_SetParamEntityParsing(p->itself, flag);
Christian Heimes217cfd12007-12-02 14:31:20 +00001057 return PyLong_FromLong(flag);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001058}
1059
Martin v. Löwisc847f402003-01-21 11:09:21 +00001060
1061#if XML_COMBINED_VERSION >= 19505
Martin v. Löwis069dde22003-01-21 10:58:18 +00001062PyDoc_STRVAR(xmlparse_UseForeignDTD__doc__,
1063"UseForeignDTD([flag])\n\
1064Allows the application to provide an artificial external subset if one is\n\
1065not specified as part of the document instance. This readily allows the\n\
1066use of a 'default' document type controlled by the application, while still\n\
1067getting the advantage of providing document type information to the parser.\n\
1068'flag' defaults to True if not provided.");
1069
1070static PyObject *
1071xmlparse_UseForeignDTD(xmlparseobject *self, PyObject *args)
1072{
Antoine Pitrou6f430e42012-08-15 23:18:25 +02001073 int flag = 1;
Martin v. Löwis069dde22003-01-21 10:58:18 +00001074 enum XML_Error rc;
Georg Brandld37b9d72012-09-24 13:41:52 +02001075 if (!PyArg_ParseTuple(args, "|p:UseForeignDTD", &flag))
Martin v. Löwis069dde22003-01-21 10:58:18 +00001076 return NULL;
Antoine Pitrou6f430e42012-08-15 23:18:25 +02001077 rc = XML_UseForeignDTD(self->itself, flag ? XML_TRUE : XML_FALSE);
Martin v. Löwis069dde22003-01-21 10:58:18 +00001078 if (rc != XML_ERROR_NONE) {
1079 return set_error(self, rc);
1080 }
1081 Py_INCREF(Py_None);
1082 return Py_None;
1083}
Martin v. Löwisc847f402003-01-21 11:09:21 +00001084#endif
Martin v. Löwis069dde22003-01-21 10:58:18 +00001085
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001086static PyObject *xmlparse_dir(PyObject *self, PyObject* noargs);
1087
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001088static struct PyMethodDef xmlparse_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001089 {"Parse", (PyCFunction)xmlparse_Parse,
1090 METH_VARARGS, xmlparse_Parse__doc__},
Fred Drake0582df92000-07-12 04:49:00 +00001091 {"ParseFile", (PyCFunction)xmlparse_ParseFile,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001092 METH_O, xmlparse_ParseFile__doc__},
Fred Drake0582df92000-07-12 04:49:00 +00001093 {"SetBase", (PyCFunction)xmlparse_SetBase,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001094 METH_VARARGS, xmlparse_SetBase__doc__},
Fred Drake0582df92000-07-12 04:49:00 +00001095 {"GetBase", (PyCFunction)xmlparse_GetBase,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001096 METH_NOARGS, xmlparse_GetBase__doc__},
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001097 {"ExternalEntityParserCreate", (PyCFunction)xmlparse_ExternalEntityParserCreate,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001098 METH_VARARGS, xmlparse_ExternalEntityParserCreate__doc__},
Fred Drakebd6101c2001-02-14 18:29:45 +00001099 {"SetParamEntityParsing", (PyCFunction)xmlparse_SetParamEntityParsing,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001100 METH_VARARGS, xmlparse_SetParamEntityParsing__doc__},
Fred Drakebd6101c2001-02-14 18:29:45 +00001101 {"GetInputContext", (PyCFunction)xmlparse_GetInputContext,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001102 METH_NOARGS, xmlparse_GetInputContext__doc__},
Martin v. Löwisc847f402003-01-21 11:09:21 +00001103#if XML_COMBINED_VERSION >= 19505
Martin v. Löwis069dde22003-01-21 10:58:18 +00001104 {"UseForeignDTD", (PyCFunction)xmlparse_UseForeignDTD,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001105 METH_VARARGS, xmlparse_UseForeignDTD__doc__},
Martin v. Löwisc847f402003-01-21 11:09:21 +00001106#endif
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001107 {"__dir__", xmlparse_dir, METH_NOARGS},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001108 {NULL, NULL} /* sentinel */
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001109};
1110
1111/* ---------- */
1112
1113
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001114
Fred Drake71b63ff2002-06-28 22:29:01 +00001115/* pyexpat international encoding support.
1116 Make it as simple as possible.
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001117*/
1118
Fred Drake71b63ff2002-06-28 22:29:01 +00001119static int
1120PyUnknownEncodingHandler(void *encodingHandlerData,
1121 const XML_Char *name,
1122 XML_Encoding *info)
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001123{
Eli Bendersky6dc32b32013-05-25 05:25:48 -07001124 static unsigned char template_buffer[256] = {0};
1125 PyObject* u;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001126 int i;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001127 void *data;
Eli Bendersky6dc32b32013-05-25 05:25:48 -07001128 unsigned int kind;
Fred Drake71b63ff2002-06-28 22:29:01 +00001129
Eli Bendersky6dc32b32013-05-25 05:25:48 -07001130 if (template_buffer[1] == 0) {
1131 for (i = 0; i < 256; i++)
1132 template_buffer[i] = i;
Tim Peters63cb99e2001-02-17 18:12:50 +00001133 }
Eli Bendersky6dc32b32013-05-25 05:25:48 -07001134
1135 u = PyUnicode_Decode((char*) template_buffer, 256, name, "replace");
1136 if (u == NULL || PyUnicode_READY(u))
1137 return XML_STATUS_ERROR;
1138
1139 if (PyUnicode_GET_LENGTH(u) != 256) {
1140 Py_DECREF(u);
1141 PyErr_SetString(PyExc_ValueError,
1142 "multi-byte encodings are not supported");
1143 return XML_STATUS_ERROR;
1144 }
1145
1146 kind = PyUnicode_KIND(u);
1147 data = PyUnicode_DATA(u);
1148 for (i = 0; i < 256; i++) {
1149 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
1150 if (ch != Py_UNICODE_REPLACEMENT_CHARACTER)
1151 info->map[i] = ch;
1152 else
1153 info->map[i] = -1;
1154 }
1155
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001156 info->data = NULL;
1157 info->convert = NULL;
1158 info->release = NULL;
Eli Bendersky6dc32b32013-05-25 05:25:48 -07001159 Py_DECREF(u);
1160
1161 return XML_STATUS_OK;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001162}
1163
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001164
1165static PyObject *
Fred Drakeb91a36b2002-06-27 19:40:48 +00001166newxmlparseobject(char *encoding, char *namespace_separator, PyObject *intern)
Fred Drake0582df92000-07-12 04:49:00 +00001167{
1168 int i;
1169 xmlparseobject *self;
Fred Drake71b63ff2002-06-28 22:29:01 +00001170
Martin v. Löwis894258c2001-09-23 10:20:10 +00001171 self = PyObject_GC_New(xmlparseobject, &Xmlparsetype);
Fred Drake0582df92000-07-12 04:49:00 +00001172 if (self == NULL)
1173 return NULL;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001174
Fred Drake2a3d7db2002-06-28 22:56:48 +00001175 self->buffer = NULL;
1176 self->buffer_size = CHARACTER_DATA_BUFFER_SIZE;
1177 self->buffer_used = 0;
Fred Drake85d835f2001-02-08 15:39:08 +00001178 self->ordered_attributes = 0;
1179 self->specified_attributes = 0;
Fred Drakebd6101c2001-02-14 18:29:45 +00001180 self->in_callback = 0;
Martin v. Löwis069dde22003-01-21 10:58:18 +00001181 self->ns_prefixes = 0;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001182 self->handlers = NULL;
Fred Drakecde79132001-04-25 16:01:30 +00001183 if (namespace_separator != NULL) {
Fred Drake0582df92000-07-12 04:49:00 +00001184 self->itself = XML_ParserCreateNS(encoding, *namespace_separator);
1185 }
Fred Drake85d835f2001-02-08 15:39:08 +00001186 else {
Fred Drake0582df92000-07-12 04:49:00 +00001187 self->itself = XML_ParserCreate(encoding);
1188 }
Gregory P. Smith25227712012-03-14 18:10:37 -07001189#if ((XML_MAJOR_VERSION >= 2) && (XML_MINOR_VERSION >= 1)) || defined(XML_HAS_SET_HASH_SALT)
1190 /* This feature was added upstream in libexpat 2.1.0. Our expat copy
1191 * has a backport of this feature where we also define XML_HAS_SET_HASH_SALT
1192 * to indicate that we can still use it. */
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07001193 XML_SetHashSalt(self->itself,
1194 (unsigned long)_Py_HashSecret.prefix);
Gregory P. Smith25227712012-03-14 18:10:37 -07001195#endif
Fred Drakeb91a36b2002-06-27 19:40:48 +00001196 self->intern = intern;
1197 Py_XINCREF(self->intern);
Martin v. Löwis894258c2001-09-23 10:20:10 +00001198 PyObject_GC_Track(self);
Fred Drake0582df92000-07-12 04:49:00 +00001199 if (self->itself == NULL) {
Fred Drake71b63ff2002-06-28 22:29:01 +00001200 PyErr_SetString(PyExc_RuntimeError,
Fred Drake0582df92000-07-12 04:49:00 +00001201 "XML_ParserCreate failed");
1202 Py_DECREF(self);
1203 return NULL;
1204 }
1205 XML_SetUserData(self->itself, (void *)self);
Fred Drake7c75bf22002-07-01 14:02:31 +00001206 XML_SetUnknownEncodingHandler(self->itself,
1207 (XML_UnknownEncodingHandler) PyUnknownEncodingHandler, NULL);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001208
Fred Drake2a3d7db2002-06-28 22:56:48 +00001209 for (i = 0; handler_info[i].name != NULL; i++)
Fred Drake0582df92000-07-12 04:49:00 +00001210 /* do nothing */;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001211
Fred Drake7c75bf22002-07-01 14:02:31 +00001212 self->handlers = malloc(sizeof(PyObject *) * i);
1213 if (!self->handlers) {
Fred Drake71b63ff2002-06-28 22:29:01 +00001214 Py_DECREF(self);
1215 return PyErr_NoMemory();
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001216 }
Martin v. Löwis5b68ce32001-10-21 08:53:52 +00001217 clear_handlers(self, 1);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001218
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001219 return (PyObject*)self;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001220}
1221
1222
1223static void
Fred Drake0582df92000-07-12 04:49:00 +00001224xmlparse_dealloc(xmlparseobject *self)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001225{
Fred Drake0582df92000-07-12 04:49:00 +00001226 int i;
Martin v. Löwis894258c2001-09-23 10:20:10 +00001227 PyObject_GC_UnTrack(self);
Fred Drake85d835f2001-02-08 15:39:08 +00001228 if (self->itself != NULL)
Fred Drake0582df92000-07-12 04:49:00 +00001229 XML_ParserFree(self->itself);
1230 self->itself = NULL;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001231
Fred Drake85d835f2001-02-08 15:39:08 +00001232 if (self->handlers != NULL) {
Fred Drakecde79132001-04-25 16:01:30 +00001233 PyObject *temp;
Fred Drake85d835f2001-02-08 15:39:08 +00001234 for (i = 0; handler_info[i].name != NULL; i++) {
Fred Drakecde79132001-04-25 16:01:30 +00001235 temp = self->handlers[i];
1236 self->handlers[i] = NULL;
1237 Py_XDECREF(temp);
Fred Drake85d835f2001-02-08 15:39:08 +00001238 }
1239 free(self->handlers);
Fred Drake71b63ff2002-06-28 22:29:01 +00001240 self->handlers = NULL;
Fred Drake0582df92000-07-12 04:49:00 +00001241 }
Fred Drake2a3d7db2002-06-28 22:56:48 +00001242 if (self->buffer != NULL) {
1243 free(self->buffer);
1244 self->buffer = NULL;
1245 }
Fred Drakeb91a36b2002-06-27 19:40:48 +00001246 Py_XDECREF(self->intern);
Martin v. Löwis894258c2001-09-23 10:20:10 +00001247 PyObject_GC_Del(self);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001248}
1249
Fred Drake0582df92000-07-12 04:49:00 +00001250static int
Alexander Belopolskye239d232010-12-08 23:31:48 +00001251handlername2int(PyObject *name)
Fred Drake0582df92000-07-12 04:49:00 +00001252{
1253 int i;
Fred Drake71b63ff2002-06-28 22:29:01 +00001254 for (i = 0; handler_info[i].name != NULL; i++) {
Alexander Belopolskye239d232010-12-08 23:31:48 +00001255 if (PyUnicode_CompareWithASCIIString(
1256 name, handler_info[i].name) == 0) {
Fred Drake0582df92000-07-12 04:49:00 +00001257 return i;
1258 }
1259 }
1260 return -1;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001261}
1262
1263static PyObject *
Fred Drake71b63ff2002-06-28 22:29:01 +00001264get_pybool(int istrue)
1265{
1266 PyObject *result = istrue ? Py_True : Py_False;
1267 Py_INCREF(result);
1268 return result;
1269}
1270
1271static PyObject *
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001272xmlparse_getattro(xmlparseobject *self, PyObject *nameobj)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001273{
Victor Stinner9e5bd6c2011-10-01 01:05:40 +02001274 Py_UCS4 first_char;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001275 int handlernum = -1;
1276
Alexander Belopolskye239d232010-12-08 23:31:48 +00001277 if (!PyUnicode_Check(nameobj))
1278 goto generic;
Victor Stinner9e5bd6c2011-10-01 01:05:40 +02001279 if (PyUnicode_READY(nameobj))
1280 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001281
Alexander Belopolskye239d232010-12-08 23:31:48 +00001282 handlernum = handlername2int(nameobj);
Fred Drake71b63ff2002-06-28 22:29:01 +00001283
1284 if (handlernum != -1) {
1285 PyObject *result = self->handlers[handlernum];
1286 if (result == NULL)
1287 result = Py_None;
1288 Py_INCREF(result);
1289 return result;
1290 }
Alexander Belopolskye239d232010-12-08 23:31:48 +00001291
Victor Stinner9e5bd6c2011-10-01 01:05:40 +02001292 first_char = PyUnicode_READ_CHAR(nameobj, 0);
1293 if (first_char == 'E') {
Alexander Belopolskye239d232010-12-08 23:31:48 +00001294 if (PyUnicode_CompareWithASCIIString(nameobj, "ErrorCode") == 0)
Christian Heimes217cfd12007-12-02 14:31:20 +00001295 return PyLong_FromLong((long)
Fred Drake71b63ff2002-06-28 22:29:01 +00001296 XML_GetErrorCode(self->itself));
Alexander Belopolskye239d232010-12-08 23:31:48 +00001297 if (PyUnicode_CompareWithASCIIString(nameobj, "ErrorLineNumber") == 0)
Christian Heimes217cfd12007-12-02 14:31:20 +00001298 return PyLong_FromLong((long)
Fred Drake71b63ff2002-06-28 22:29:01 +00001299 XML_GetErrorLineNumber(self->itself));
Alexander Belopolskye239d232010-12-08 23:31:48 +00001300 if (PyUnicode_CompareWithASCIIString(nameobj, "ErrorColumnNumber") == 0)
Christian Heimes217cfd12007-12-02 14:31:20 +00001301 return PyLong_FromLong((long)
Fred Drake71b63ff2002-06-28 22:29:01 +00001302 XML_GetErrorColumnNumber(self->itself));
Alexander Belopolskye239d232010-12-08 23:31:48 +00001303 if (PyUnicode_CompareWithASCIIString(nameobj, "ErrorByteIndex") == 0)
Christian Heimes217cfd12007-12-02 14:31:20 +00001304 return PyLong_FromLong((long)
Fred Drake71b63ff2002-06-28 22:29:01 +00001305 XML_GetErrorByteIndex(self->itself));
1306 }
Victor Stinner9e5bd6c2011-10-01 01:05:40 +02001307 if (first_char == 'C') {
Alexander Belopolskye239d232010-12-08 23:31:48 +00001308 if (PyUnicode_CompareWithASCIIString(nameobj, "CurrentLineNumber") == 0)
Christian Heimes217cfd12007-12-02 14:31:20 +00001309 return PyLong_FromLong((long)
Dave Cole3203efb2004-08-26 00:37:31 +00001310 XML_GetCurrentLineNumber(self->itself));
Alexander Belopolskye239d232010-12-08 23:31:48 +00001311 if (PyUnicode_CompareWithASCIIString(nameobj, "CurrentColumnNumber") == 0)
Christian Heimes217cfd12007-12-02 14:31:20 +00001312 return PyLong_FromLong((long)
Dave Cole3203efb2004-08-26 00:37:31 +00001313 XML_GetCurrentColumnNumber(self->itself));
Alexander Belopolskye239d232010-12-08 23:31:48 +00001314 if (PyUnicode_CompareWithASCIIString(nameobj, "CurrentByteIndex") == 0)
Christian Heimes217cfd12007-12-02 14:31:20 +00001315 return PyLong_FromLong((long)
Dave Cole3203efb2004-08-26 00:37:31 +00001316 XML_GetCurrentByteIndex(self->itself));
1317 }
Victor Stinner9e5bd6c2011-10-01 01:05:40 +02001318 if (first_char == 'b') {
Alexander Belopolskye239d232010-12-08 23:31:48 +00001319 if (PyUnicode_CompareWithASCIIString(nameobj, "buffer_size") == 0)
Christian Heimes217cfd12007-12-02 14:31:20 +00001320 return PyLong_FromLong((long) self->buffer_size);
Alexander Belopolskye239d232010-12-08 23:31:48 +00001321 if (PyUnicode_CompareWithASCIIString(nameobj, "buffer_text") == 0)
Fred Drake2a3d7db2002-06-28 22:56:48 +00001322 return get_pybool(self->buffer != NULL);
Alexander Belopolskye239d232010-12-08 23:31:48 +00001323 if (PyUnicode_CompareWithASCIIString(nameobj, "buffer_used") == 0)
Christian Heimes217cfd12007-12-02 14:31:20 +00001324 return PyLong_FromLong((long) self->buffer_used);
Fred Drake2a3d7db2002-06-28 22:56:48 +00001325 }
Alexander Belopolskye239d232010-12-08 23:31:48 +00001326 if (PyUnicode_CompareWithASCIIString(nameobj, "namespace_prefixes") == 0)
Martin v. Löwis069dde22003-01-21 10:58:18 +00001327 return get_pybool(self->ns_prefixes);
Alexander Belopolskye239d232010-12-08 23:31:48 +00001328 if (PyUnicode_CompareWithASCIIString(nameobj, "ordered_attributes") == 0)
Fred Drake71b63ff2002-06-28 22:29:01 +00001329 return get_pybool(self->ordered_attributes);
Alexander Belopolskye239d232010-12-08 23:31:48 +00001330 if (PyUnicode_CompareWithASCIIString(nameobj, "specified_attributes") == 0)
Fred Drake71b63ff2002-06-28 22:29:01 +00001331 return get_pybool((long) self->specified_attributes);
Alexander Belopolskye239d232010-12-08 23:31:48 +00001332 if (PyUnicode_CompareWithASCIIString(nameobj, "intern") == 0) {
Fred Drakeb91a36b2002-06-27 19:40:48 +00001333 if (self->intern == NULL) {
1334 Py_INCREF(Py_None);
1335 return Py_None;
1336 }
1337 else {
1338 Py_INCREF(self->intern);
1339 return self->intern;
1340 }
1341 }
Alexander Belopolskye239d232010-12-08 23:31:48 +00001342 generic:
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001343 return PyObject_GenericGetAttr((PyObject*)self, nameobj);
Neal Norwitz8dfc4a92007-08-11 06:39:53 +00001344}
1345
1346static PyObject *
1347xmlparse_dir(PyObject *self, PyObject* noargs)
1348{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001349#define APPEND(list, str) \
1350 do { \
1351 PyObject *o = PyUnicode_FromString(str); \
1352 if (o != NULL) \
1353 PyList_Append(list, o); \
1354 Py_XDECREF(o); \
Martin v. Löwis069dde22003-01-21 10:58:18 +00001355 } while (0)
Neal Norwitzfa56e2d2003-01-19 15:40:09 +00001356
Neal Norwitz8dfc4a92007-08-11 06:39:53 +00001357 int i;
1358 PyObject *rc = PyList_New(0);
1359 if (!rc)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001360 return NULL;
Neal Norwitz8dfc4a92007-08-11 06:39:53 +00001361 for (i = 0; handler_info[i].name != NULL; i++) {
1362 PyObject *o = get_handler_name(&handler_info[i]);
1363 if (o != NULL)
1364 PyList_Append(rc, o);
1365 Py_XDECREF(o);
1366 }
1367 APPEND(rc, "ErrorCode");
1368 APPEND(rc, "ErrorLineNumber");
1369 APPEND(rc, "ErrorColumnNumber");
1370 APPEND(rc, "ErrorByteIndex");
1371 APPEND(rc, "CurrentLineNumber");
1372 APPEND(rc, "CurrentColumnNumber");
1373 APPEND(rc, "CurrentByteIndex");
1374 APPEND(rc, "buffer_size");
1375 APPEND(rc, "buffer_text");
1376 APPEND(rc, "buffer_used");
1377 APPEND(rc, "namespace_prefixes");
1378 APPEND(rc, "ordered_attributes");
1379 APPEND(rc, "specified_attributes");
1380 APPEND(rc, "intern");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001381
Neal Norwitzfa56e2d2003-01-19 15:40:09 +00001382#undef APPEND
Neal Norwitz8dfc4a92007-08-11 06:39:53 +00001383
1384 if (PyErr_Occurred()) {
1385 Py_DECREF(rc);
1386 rc = NULL;
Fred Drake0582df92000-07-12 04:49:00 +00001387 }
Neal Norwitz8dfc4a92007-08-11 06:39:53 +00001388
1389 return rc;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001390}
1391
Fred Drake6f987622000-08-25 18:03:30 +00001392static int
Alexander Belopolskye239d232010-12-08 23:31:48 +00001393sethandler(xmlparseobject *self, PyObject *name, PyObject* v)
Fred Drake0582df92000-07-12 04:49:00 +00001394{
1395 int handlernum = handlername2int(name);
Fred Drake71b63ff2002-06-28 22:29:01 +00001396 if (handlernum >= 0) {
1397 xmlhandler c_handler = NULL;
1398 PyObject *temp = self->handlers[handlernum];
1399
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001400 if (v == Py_None) {
1401 /* If this is the character data handler, and a character
1402 data handler is already active, we need to be more
1403 careful. What we can safely do is replace the existing
1404 character data handler callback function with a no-op
1405 function that will refuse to call Python. The downside
1406 is that this doesn't completely remove the character
1407 data handler from the C layer if there's any callback
1408 active, so Expat does a little more work than it
1409 otherwise would, but that's really an odd case. A more
1410 elaborate system of handlers and state could remove the
1411 C handler more effectively. */
1412 if (handlernum == CharacterData && self->in_callback)
1413 c_handler = noop_character_data_handler;
Fred Drake71b63ff2002-06-28 22:29:01 +00001414 v = NULL;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001415 }
Fred Drake71b63ff2002-06-28 22:29:01 +00001416 else if (v != NULL) {
1417 Py_INCREF(v);
1418 c_handler = handler_info[handlernum].handler;
1419 }
Fred Drake0582df92000-07-12 04:49:00 +00001420 self->handlers[handlernum] = v;
Fred Drake71b63ff2002-06-28 22:29:01 +00001421 Py_XDECREF(temp);
1422 handler_info[handlernum].setter(self->itself, c_handler);
Fred Drake0582df92000-07-12 04:49:00 +00001423 return 1;
1424 }
1425 return 0;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001426}
1427
1428static int
Alexander Belopolskye239d232010-12-08 23:31:48 +00001429xmlparse_setattro(xmlparseobject *self, PyObject *name, PyObject *v)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001430{
Fred Drake6f987622000-08-25 18:03:30 +00001431 /* Set attribute 'name' to value 'v'. v==NULL means delete */
Fred Drake85d835f2001-02-08 15:39:08 +00001432 if (v == NULL) {
Fred Drake6f987622000-08-25 18:03:30 +00001433 PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
1434 return -1;
1435 }
Alexander Belopolskye239d232010-12-08 23:31:48 +00001436 assert(PyUnicode_Check(name));
1437 if (PyUnicode_CompareWithASCIIString(name, "buffer_text") == 0) {
Antoine Pitrou6f430e42012-08-15 23:18:25 +02001438 int b = PyObject_IsTrue(v);
1439 if (b < 0)
1440 return -1;
1441 if (b) {
Fred Drake2a3d7db2002-06-28 22:56:48 +00001442 if (self->buffer == NULL) {
1443 self->buffer = malloc(self->buffer_size);
1444 if (self->buffer == NULL) {
1445 PyErr_NoMemory();
1446 return -1;
1447 }
1448 self->buffer_used = 0;
1449 }
1450 }
1451 else if (self->buffer != NULL) {
1452 if (flush_character_buffer(self) < 0)
1453 return -1;
1454 free(self->buffer);
1455 self->buffer = NULL;
1456 }
1457 return 0;
1458 }
Alexander Belopolskye239d232010-12-08 23:31:48 +00001459 if (PyUnicode_CompareWithASCIIString(name, "namespace_prefixes") == 0) {
Antoine Pitrou6f430e42012-08-15 23:18:25 +02001460 int b = PyObject_IsTrue(v);
1461 if (b < 0)
1462 return -1;
1463 self->ns_prefixes = b;
Martin v. Löwis069dde22003-01-21 10:58:18 +00001464 XML_SetReturnNSTriplet(self->itself, self->ns_prefixes);
1465 return 0;
1466 }
Alexander Belopolskye239d232010-12-08 23:31:48 +00001467 if (PyUnicode_CompareWithASCIIString(name, "ordered_attributes") == 0) {
Antoine Pitrou6f430e42012-08-15 23:18:25 +02001468 int b = PyObject_IsTrue(v);
1469 if (b < 0)
1470 return -1;
1471 self->ordered_attributes = b;
Fred Drake85d835f2001-02-08 15:39:08 +00001472 return 0;
1473 }
Alexander Belopolskye239d232010-12-08 23:31:48 +00001474 if (PyUnicode_CompareWithASCIIString(name, "specified_attributes") == 0) {
Antoine Pitrou6f430e42012-08-15 23:18:25 +02001475 int b = PyObject_IsTrue(v);
1476 if (b < 0)
1477 return -1;
1478 self->specified_attributes = b;
Fred Drake6f987622000-08-25 18:03:30 +00001479 return 0;
1480 }
Christian Heimes2380ac72008-01-09 00:17:24 +00001481
Alexander Belopolskye239d232010-12-08 23:31:48 +00001482 if (PyUnicode_CompareWithASCIIString(name, "buffer_size") == 0) {
Christian Heimes2380ac72008-01-09 00:17:24 +00001483 long new_buffer_size;
1484 if (!PyLong_Check(v)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001485 PyErr_SetString(PyExc_TypeError, "buffer_size must be an integer");
1486 return -1;
Christian Heimes2380ac72008-01-09 00:17:24 +00001487 }
1488
1489 new_buffer_size=PyLong_AS_LONG(v);
1490 /* trivial case -- no change */
1491 if (new_buffer_size == self->buffer_size) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001492 return 0;
Christian Heimes2380ac72008-01-09 00:17:24 +00001493 }
1494
1495 if (new_buffer_size <= 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001496 PyErr_SetString(PyExc_ValueError, "buffer_size must be greater than zero");
1497 return -1;
Christian Heimes2380ac72008-01-09 00:17:24 +00001498 }
1499
1500 /* check maximum */
1501 if (new_buffer_size > INT_MAX) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001502 char errmsg[100];
1503 sprintf(errmsg, "buffer_size must not be greater than %i", INT_MAX);
1504 PyErr_SetString(PyExc_ValueError, errmsg);
1505 return -1;
Christian Heimes2380ac72008-01-09 00:17:24 +00001506 }
1507
1508 if (self->buffer != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001509 /* there is already a buffer */
1510 if (self->buffer_used != 0) {
Christian Heimes09994a92013-07-20 22:41:58 +02001511 if (flush_character_buffer(self) < 0) {
1512 return -1;
1513 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001514 }
1515 /* free existing buffer */
1516 free(self->buffer);
Christian Heimes2380ac72008-01-09 00:17:24 +00001517 }
1518 self->buffer = malloc(new_buffer_size);
1519 if (self->buffer == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001520 PyErr_NoMemory();
1521 return -1;
1522 }
Christian Heimes2380ac72008-01-09 00:17:24 +00001523 self->buffer_size = new_buffer_size;
1524 return 0;
1525 }
1526
Alexander Belopolskye239d232010-12-08 23:31:48 +00001527 if (PyUnicode_CompareWithASCIIString(name, "CharacterDataHandler") == 0) {
Fred Drake2a3d7db2002-06-28 22:56:48 +00001528 /* If we're changing the character data handler, flush all
1529 * cached data with the old handler. Not sure there's a
1530 * "right" thing to do, though, but this probably won't
1531 * happen.
1532 */
1533 if (flush_character_buffer(self) < 0)
1534 return -1;
1535 }
Fred Drake6f987622000-08-25 18:03:30 +00001536 if (sethandler(self, name, v)) {
1537 return 0;
1538 }
Alexander Belopolskye239d232010-12-08 23:31:48 +00001539 PyErr_SetObject(PyExc_AttributeError, name);
Fred Drake6f987622000-08-25 18:03:30 +00001540 return -1;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001541}
1542
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001543static int
1544xmlparse_traverse(xmlparseobject *op, visitproc visit, void *arg)
1545{
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001546 int i;
1547 for (i = 0; handler_info[i].name != NULL; i++)
1548 Py_VISIT(op->handlers[i]);
Fred Drakecde79132001-04-25 16:01:30 +00001549 return 0;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001550}
1551
1552static int
1553xmlparse_clear(xmlparseobject *op)
1554{
Martin v. Löwis5b68ce32001-10-21 08:53:52 +00001555 clear_handlers(op, 0);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001556 Py_CLEAR(op->intern);
Fred Drakecde79132001-04-25 16:01:30 +00001557 return 0;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001558}
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001559
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001560PyDoc_STRVAR(Xmlparsetype__doc__, "XML parser");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001561
1562static PyTypeObject Xmlparsetype = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001563 PyVarObject_HEAD_INIT(NULL, 0)
1564 "pyexpat.xmlparser", /*tp_name*/
Antoine Pitrou23683ef2011-01-04 00:00:31 +00001565 sizeof(xmlparseobject), /*tp_basicsize*/
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001566 0, /*tp_itemsize*/
1567 /* methods */
1568 (destructor)xmlparse_dealloc, /*tp_dealloc*/
1569 (printfunc)0, /*tp_print*/
1570 0, /*tp_getattr*/
Alexander Belopolskye239d232010-12-08 23:31:48 +00001571 0, /*tp_setattr*/
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001572 0, /*tp_reserved*/
1573 (reprfunc)0, /*tp_repr*/
1574 0, /*tp_as_number*/
1575 0, /*tp_as_sequence*/
1576 0, /*tp_as_mapping*/
1577 (hashfunc)0, /*tp_hash*/
1578 (ternaryfunc)0, /*tp_call*/
1579 (reprfunc)0, /*tp_str*/
1580 (getattrofunc)xmlparse_getattro, /* tp_getattro */
Alexander Belopolskye239d232010-12-08 23:31:48 +00001581 (setattrofunc)xmlparse_setattro, /* tp_setattro */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001582 0, /* tp_as_buffer */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001583 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /*tp_flags*/
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001584 Xmlparsetype__doc__, /* tp_doc - Documentation string */
1585 (traverseproc)xmlparse_traverse, /* tp_traverse */
1586 (inquiry)xmlparse_clear, /* tp_clear */
1587 0, /* tp_richcompare */
1588 0, /* tp_weaklistoffset */
1589 0, /* tp_iter */
1590 0, /* tp_iternext */
1591 xmlparse_methods, /* tp_methods */
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001592};
1593
1594/* End of code for xmlparser objects */
1595/* -------------------------------------------------------- */
1596
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001597PyDoc_STRVAR(pyexpat_ParserCreate__doc__,
Fred Drake0582df92000-07-12 04:49:00 +00001598"ParserCreate([encoding[, namespace_separator]]) -> parser\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001599Return a new XML parser object.");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001600
1601static PyObject *
Fred Drake0582df92000-07-12 04:49:00 +00001602pyexpat_ParserCreate(PyObject *notused, PyObject *args, PyObject *kw)
1603{
Fred Drakecde79132001-04-25 16:01:30 +00001604 char *encoding = NULL;
1605 char *namespace_separator = NULL;
Fred Drakeb91a36b2002-06-27 19:40:48 +00001606 PyObject *intern = NULL;
1607 PyObject *result;
1608 int intern_decref = 0;
Martin v. Löwis15e62742006-02-27 16:46:16 +00001609 static char *kwlist[] = {"encoding", "namespace_separator",
Jeremy Hyltonaf68c872005-12-10 18:50:16 +00001610 "intern", NULL};
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001611
Fred Drakeb91a36b2002-06-27 19:40:48 +00001612 if (!PyArg_ParseTupleAndKeywords(args, kw, "|zzO:ParserCreate", kwlist,
1613 &encoding, &namespace_separator, &intern))
Fred Drakecde79132001-04-25 16:01:30 +00001614 return NULL;
1615 if (namespace_separator != NULL
1616 && strlen(namespace_separator) > 1) {
1617 PyErr_SetString(PyExc_ValueError,
1618 "namespace_separator must be at most one"
1619 " character, omitted, or None");
1620 return NULL;
1621 }
Fred Drakeb91a36b2002-06-27 19:40:48 +00001622 /* Explicitly passing None means no interning is desired.
1623 Not passing anything means that a new dictionary is used. */
1624 if (intern == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001625 intern = NULL;
Fred Drakeb91a36b2002-06-27 19:40:48 +00001626 else if (intern == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001627 intern = PyDict_New();
1628 if (!intern)
1629 return NULL;
1630 intern_decref = 1;
Fred Drake71b63ff2002-06-28 22:29:01 +00001631 }
Fred Drakeb91a36b2002-06-27 19:40:48 +00001632 else if (!PyDict_Check(intern)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001633 PyErr_SetString(PyExc_TypeError, "intern must be a dictionary");
1634 return NULL;
Fred Drakeb91a36b2002-06-27 19:40:48 +00001635 }
1636
1637 result = newxmlparseobject(encoding, namespace_separator, intern);
1638 if (intern_decref) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001639 Py_DECREF(intern);
Fred Drakeb91a36b2002-06-27 19:40:48 +00001640 }
1641 return result;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001642}
1643
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001644PyDoc_STRVAR(pyexpat_ErrorString__doc__,
Fred Drake0582df92000-07-12 04:49:00 +00001645"ErrorString(errno) -> string\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001646Returns string error for given number.");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001647
1648static PyObject *
Fred Drake0582df92000-07-12 04:49:00 +00001649pyexpat_ErrorString(PyObject *self, PyObject *args)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001650{
Fred Drake0582df92000-07-12 04:49:00 +00001651 long code = 0;
1652
1653 if (!PyArg_ParseTuple(args, "l:ErrorString", &code))
1654 return NULL;
1655 return Py_BuildValue("z", XML_ErrorString((int)code));
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001656}
1657
1658/* List of methods defined in the module */
1659
1660static struct PyMethodDef pyexpat_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001661 {"ParserCreate", (PyCFunction)pyexpat_ParserCreate,
Fred Drake0582df92000-07-12 04:49:00 +00001662 METH_VARARGS|METH_KEYWORDS, pyexpat_ParserCreate__doc__},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001663 {"ErrorString", (PyCFunction)pyexpat_ErrorString,
1664 METH_VARARGS, pyexpat_ErrorString__doc__},
Fred Drake71b63ff2002-06-28 22:29:01 +00001665
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001666 {NULL, (PyCFunction)NULL, 0, NULL} /* sentinel */
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001667};
1668
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +00001669/* Module docstring */
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001670
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001671PyDoc_STRVAR(pyexpat_module_documentation,
1672"Python wrapper for Expat parser.");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001673
Fred Drakecde79132001-04-25 16:01:30 +00001674/* Initialization function for the module */
1675
1676#ifndef MODULE_NAME
1677#define MODULE_NAME "pyexpat"
1678#endif
1679
1680#ifndef MODULE_INITFUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00001681#define MODULE_INITFUNC PyInit_pyexpat
Fred Drakecde79132001-04-25 16:01:30 +00001682#endif
1683
Martin v. Löwis069dde22003-01-21 10:58:18 +00001684#ifndef PyMODINIT_FUNC
1685# ifdef MS_WINDOWS
1686# define PyMODINIT_FUNC __declspec(dllexport) void
1687# else
1688# define PyMODINIT_FUNC void
1689# endif
1690#endif
1691
Mark Hammond8235ea12002-07-19 06:55:41 +00001692PyMODINIT_FUNC MODULE_INITFUNC(void); /* avoid compiler warnings */
Fred Drakecde79132001-04-25 16:01:30 +00001693
Martin v. Löwis1a214512008-06-11 05:26:20 +00001694static struct PyModuleDef pyexpatmodule = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001695 PyModuleDef_HEAD_INIT,
1696 MODULE_NAME,
1697 pyexpat_module_documentation,
1698 -1,
1699 pyexpat_methods,
1700 NULL,
1701 NULL,
1702 NULL,
1703 NULL
Martin v. Löwis1a214512008-06-11 05:26:20 +00001704};
1705
Martin v. Löwis069dde22003-01-21 10:58:18 +00001706PyMODINIT_FUNC
1707MODULE_INITFUNC(void)
Fred Drake0582df92000-07-12 04:49:00 +00001708{
1709 PyObject *m, *d;
Neal Norwitz392c5be2007-08-25 17:20:32 +00001710 PyObject *errmod_name = PyUnicode_FromString(MODULE_NAME ".errors");
Fred Drake85d835f2001-02-08 15:39:08 +00001711 PyObject *errors_module;
1712 PyObject *modelmod_name;
1713 PyObject *model_module;
Fred Drake0582df92000-07-12 04:49:00 +00001714 PyObject *sys_modules;
Georg Brandlb4dac712010-10-15 14:46:48 +00001715 PyObject *tmpnum, *tmpstr;
1716 PyObject *codes_dict;
1717 PyObject *rev_codes_dict;
1718 int res;
Fredrik Lundhd7a42882005-12-13 20:43:04 +00001719 static struct PyExpat_CAPI capi;
Georg Brandlb4dac712010-10-15 14:46:48 +00001720 PyObject *capi_object;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001721
Fred Drake6f987622000-08-25 18:03:30 +00001722 if (errmod_name == NULL)
Martin v. Löwis1a214512008-06-11 05:26:20 +00001723 return NULL;
Neal Norwitz392c5be2007-08-25 17:20:32 +00001724 modelmod_name = PyUnicode_FromString(MODULE_NAME ".model");
Fred Drake85d835f2001-02-08 15:39:08 +00001725 if (modelmod_name == NULL)
Martin v. Löwis1a214512008-06-11 05:26:20 +00001726 return NULL;
Fred Drake6f987622000-08-25 18:03:30 +00001727
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001728 if (PyType_Ready(&Xmlparsetype) < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001729 return NULL;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001730
Fred Drake0582df92000-07-12 04:49:00 +00001731 /* Create the module and add the functions */
Martin v. Löwis1a214512008-06-11 05:26:20 +00001732 m = PyModule_Create(&pyexpatmodule);
Neal Norwitz1ac754f2006-01-19 06:09:39 +00001733 if (m == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001734 return NULL;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001735
Fred Drake0582df92000-07-12 04:49:00 +00001736 /* Add some symbolic constants to the module */
Fred Drakebd6101c2001-02-14 18:29:45 +00001737 if (ErrorObject == NULL) {
1738 ErrorObject = PyErr_NewException("xml.parsers.expat.ExpatError",
Fred Drake93adb692000-09-23 04:55:48 +00001739 NULL, NULL);
Fred Drakebd6101c2001-02-14 18:29:45 +00001740 if (ErrorObject == NULL)
Martin v. Löwis1a214512008-06-11 05:26:20 +00001741 return NULL;
Fred Drakebd6101c2001-02-14 18:29:45 +00001742 }
1743 Py_INCREF(ErrorObject);
Fred Drake93adb692000-09-23 04:55:48 +00001744 PyModule_AddObject(m, "error", ErrorObject);
Fred Drakebd6101c2001-02-14 18:29:45 +00001745 Py_INCREF(ErrorObject);
1746 PyModule_AddObject(m, "ExpatError", ErrorObject);
Fred Drake4ba298c2000-10-29 04:57:53 +00001747 Py_INCREF(&Xmlparsetype);
1748 PyModule_AddObject(m, "XMLParserType", (PyObject *) &Xmlparsetype);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001749
Fred Drake738293d2000-12-21 17:25:07 +00001750 PyModule_AddStringConstant(m, "EXPAT_VERSION",
1751 (char *) XML_ExpatVersion());
Fred Drake85d835f2001-02-08 15:39:08 +00001752 {
1753 XML_Expat_Version info = XML_ExpatVersionInfo();
1754 PyModule_AddObject(m, "version_info",
1755 Py_BuildValue("(iii)", info.major,
1756 info.minor, info.micro));
1757 }
Fred Drake0582df92000-07-12 04:49:00 +00001758 /* XXX When Expat supports some way of figuring out how it was
Fred Drake71b63ff2002-06-28 22:29:01 +00001759 compiled, this should check and set native_encoding
1760 appropriately.
Fred Drake0582df92000-07-12 04:49:00 +00001761 */
Fred Drake93adb692000-09-23 04:55:48 +00001762 PyModule_AddStringConstant(m, "native_encoding", "UTF-8");
Fred Drakec23b5232000-08-24 21:57:43 +00001763
Fred Drake85d835f2001-02-08 15:39:08 +00001764 sys_modules = PySys_GetObject("modules");
Fred Drake93adb692000-09-23 04:55:48 +00001765 d = PyModule_GetDict(m);
Fred Drake6f987622000-08-25 18:03:30 +00001766 errors_module = PyDict_GetItem(d, errmod_name);
1767 if (errors_module == NULL) {
Fred Drakecde79132001-04-25 16:01:30 +00001768 errors_module = PyModule_New(MODULE_NAME ".errors");
Fred Drake6f987622000-08-25 18:03:30 +00001769 if (errors_module != NULL) {
Fred Drake6f987622000-08-25 18:03:30 +00001770 PyDict_SetItem(sys_modules, errmod_name, errors_module);
Fred Drake93adb692000-09-23 04:55:48 +00001771 /* gives away the reference to errors_module */
1772 PyModule_AddObject(m, "errors", errors_module);
Fred Drakec23b5232000-08-24 21:57:43 +00001773 }
1774 }
Fred Drake6f987622000-08-25 18:03:30 +00001775 Py_DECREF(errmod_name);
Fred Drake85d835f2001-02-08 15:39:08 +00001776 model_module = PyDict_GetItem(d, modelmod_name);
1777 if (model_module == NULL) {
Fred Drakecde79132001-04-25 16:01:30 +00001778 model_module = PyModule_New(MODULE_NAME ".model");
Fred Drake85d835f2001-02-08 15:39:08 +00001779 if (model_module != NULL) {
1780 PyDict_SetItem(sys_modules, modelmod_name, model_module);
1781 /* gives away the reference to model_module */
1782 PyModule_AddObject(m, "model", model_module);
1783 }
1784 }
1785 Py_DECREF(modelmod_name);
1786 if (errors_module == NULL || model_module == NULL)
1787 /* Don't core dump later! */
Martin v. Löwis1a214512008-06-11 05:26:20 +00001788 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001789
Martin v. Löwisc847f402003-01-21 11:09:21 +00001790#if XML_COMBINED_VERSION > 19505
Martin v. Löwis069dde22003-01-21 10:58:18 +00001791 {
1792 const XML_Feature *features = XML_GetFeatureList();
1793 PyObject *list = PyList_New(0);
1794 if (list == NULL)
1795 /* just ignore it */
1796 PyErr_Clear();
1797 else {
1798 int i = 0;
1799 for (; features[i].feature != XML_FEATURE_END; ++i) {
1800 int ok;
1801 PyObject *item = Py_BuildValue("si", features[i].name,
1802 features[i].value);
1803 if (item == NULL) {
1804 Py_DECREF(list);
1805 list = NULL;
1806 break;
1807 }
1808 ok = PyList_Append(list, item);
1809 Py_DECREF(item);
1810 if (ok < 0) {
1811 PyErr_Clear();
1812 break;
1813 }
1814 }
1815 if (list != NULL)
1816 PyModule_AddObject(m, "features", list);
1817 }
1818 }
Martin v. Löwisc847f402003-01-21 11:09:21 +00001819#endif
Fred Drake6f987622000-08-25 18:03:30 +00001820
Georg Brandlb4dac712010-10-15 14:46:48 +00001821 codes_dict = PyDict_New();
1822 rev_codes_dict = PyDict_New();
1823 if (codes_dict == NULL || rev_codes_dict == NULL) {
1824 Py_XDECREF(codes_dict);
1825 Py_XDECREF(rev_codes_dict);
1826 return NULL;
1827 }
Victor Stinner0fcab4a2011-01-04 12:59:15 +00001828
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001829#define MYCONST(name) \
Georg Brandlb4dac712010-10-15 14:46:48 +00001830 if (PyModule_AddStringConstant(errors_module, #name, \
1831 (char *)XML_ErrorString(name)) < 0) \
1832 return NULL; \
1833 tmpnum = PyLong_FromLong(name); \
1834 if (tmpnum == NULL) return NULL; \
1835 res = PyDict_SetItemString(codes_dict, \
1836 XML_ErrorString(name), tmpnum); \
1837 if (res < 0) return NULL; \
1838 tmpstr = PyUnicode_FromString(XML_ErrorString(name)); \
1839 if (tmpstr == NULL) return NULL; \
1840 res = PyDict_SetItem(rev_codes_dict, tmpnum, tmpstr); \
1841 Py_DECREF(tmpstr); \
1842 Py_DECREF(tmpnum); \
1843 if (res < 0) return NULL; \
Fred Drake7bd9f412000-07-04 23:51:31 +00001844
Fred Drake0582df92000-07-12 04:49:00 +00001845 MYCONST(XML_ERROR_NO_MEMORY);
1846 MYCONST(XML_ERROR_SYNTAX);
1847 MYCONST(XML_ERROR_NO_ELEMENTS);
1848 MYCONST(XML_ERROR_INVALID_TOKEN);
1849 MYCONST(XML_ERROR_UNCLOSED_TOKEN);
1850 MYCONST(XML_ERROR_PARTIAL_CHAR);
1851 MYCONST(XML_ERROR_TAG_MISMATCH);
1852 MYCONST(XML_ERROR_DUPLICATE_ATTRIBUTE);
1853 MYCONST(XML_ERROR_JUNK_AFTER_DOC_ELEMENT);
1854 MYCONST(XML_ERROR_PARAM_ENTITY_REF);
1855 MYCONST(XML_ERROR_UNDEFINED_ENTITY);
1856 MYCONST(XML_ERROR_RECURSIVE_ENTITY_REF);
1857 MYCONST(XML_ERROR_ASYNC_ENTITY);
1858 MYCONST(XML_ERROR_BAD_CHAR_REF);
1859 MYCONST(XML_ERROR_BINARY_ENTITY_REF);
1860 MYCONST(XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF);
1861 MYCONST(XML_ERROR_MISPLACED_XML_PI);
1862 MYCONST(XML_ERROR_UNKNOWN_ENCODING);
1863 MYCONST(XML_ERROR_INCORRECT_ENCODING);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001864 MYCONST(XML_ERROR_UNCLOSED_CDATA_SECTION);
1865 MYCONST(XML_ERROR_EXTERNAL_ENTITY_HANDLING);
1866 MYCONST(XML_ERROR_NOT_STANDALONE);
Fred Drake283b6702004-08-04 22:28:16 +00001867 MYCONST(XML_ERROR_UNEXPECTED_STATE);
1868 MYCONST(XML_ERROR_ENTITY_DECLARED_IN_PE);
1869 MYCONST(XML_ERROR_FEATURE_REQUIRES_XML_DTD);
1870 MYCONST(XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING);
1871 /* Added in Expat 1.95.7. */
1872 MYCONST(XML_ERROR_UNBOUND_PREFIX);
1873 /* Added in Expat 1.95.8. */
1874 MYCONST(XML_ERROR_UNDECLARING_PREFIX);
1875 MYCONST(XML_ERROR_INCOMPLETE_PE);
1876 MYCONST(XML_ERROR_XML_DECL);
1877 MYCONST(XML_ERROR_TEXT_DECL);
1878 MYCONST(XML_ERROR_PUBLICID);
1879 MYCONST(XML_ERROR_SUSPENDED);
1880 MYCONST(XML_ERROR_NOT_SUSPENDED);
1881 MYCONST(XML_ERROR_ABORTED);
1882 MYCONST(XML_ERROR_FINISHED);
1883 MYCONST(XML_ERROR_SUSPEND_PE);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001884
Georg Brandlb4dac712010-10-15 14:46:48 +00001885 if (PyModule_AddStringConstant(errors_module, "__doc__",
1886 "Constants used to describe "
1887 "error conditions.") < 0)
1888 return NULL;
Fred Drake85d835f2001-02-08 15:39:08 +00001889
Georg Brandlb4dac712010-10-15 14:46:48 +00001890 if (PyModule_AddObject(errors_module, "codes", codes_dict) < 0)
1891 return NULL;
1892 if (PyModule_AddObject(errors_module, "messages", rev_codes_dict) < 0)
1893 return NULL;
Victor Stinner0fcab4a2011-01-04 12:59:15 +00001894
Fred Drake93adb692000-09-23 04:55:48 +00001895#undef MYCONST
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001896
Fred Drake85d835f2001-02-08 15:39:08 +00001897#define MYCONST(c) PyModule_AddIntConstant(m, #c, c)
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001898 MYCONST(XML_PARAM_ENTITY_PARSING_NEVER);
1899 MYCONST(XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE);
1900 MYCONST(XML_PARAM_ENTITY_PARSING_ALWAYS);
Fred Drake85d835f2001-02-08 15:39:08 +00001901#undef MYCONST
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001902
Fred Drake85d835f2001-02-08 15:39:08 +00001903#define MYCONST(c) PyModule_AddIntConstant(model_module, #c, c)
1904 PyModule_AddStringConstant(model_module, "__doc__",
1905 "Constants used to interpret content model information.");
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001906
Fred Drake85d835f2001-02-08 15:39:08 +00001907 MYCONST(XML_CTYPE_EMPTY);
1908 MYCONST(XML_CTYPE_ANY);
1909 MYCONST(XML_CTYPE_MIXED);
1910 MYCONST(XML_CTYPE_NAME);
1911 MYCONST(XML_CTYPE_CHOICE);
1912 MYCONST(XML_CTYPE_SEQ);
1913
1914 MYCONST(XML_CQUANT_NONE);
1915 MYCONST(XML_CQUANT_OPT);
1916 MYCONST(XML_CQUANT_REP);
1917 MYCONST(XML_CQUANT_PLUS);
1918#undef MYCONST
Fredrik Lundhc3345042005-12-13 19:49:55 +00001919
1920 /* initialize pyexpat dispatch table */
Fredrik Lundhd7a42882005-12-13 20:43:04 +00001921 capi.size = sizeof(capi);
Fredrik Lundhcc117db2005-12-13 21:55:36 +00001922 capi.magic = PyExpat_CAPI_MAGIC;
Fredrik Lundhd7a42882005-12-13 20:43:04 +00001923 capi.MAJOR_VERSION = XML_MAJOR_VERSION;
1924 capi.MINOR_VERSION = XML_MINOR_VERSION;
1925 capi.MICRO_VERSION = XML_MICRO_VERSION;
1926 capi.ErrorString = XML_ErrorString;
Fredrik Lundhcc117db2005-12-13 21:55:36 +00001927 capi.GetErrorCode = XML_GetErrorCode;
1928 capi.GetErrorColumnNumber = XML_GetErrorColumnNumber;
1929 capi.GetErrorLineNumber = XML_GetErrorLineNumber;
Fredrik Lundhd7a42882005-12-13 20:43:04 +00001930 capi.Parse = XML_Parse;
1931 capi.ParserCreate_MM = XML_ParserCreate_MM;
1932 capi.ParserFree = XML_ParserFree;
1933 capi.SetCharacterDataHandler = XML_SetCharacterDataHandler;
1934 capi.SetCommentHandler = XML_SetCommentHandler;
1935 capi.SetDefaultHandlerExpand = XML_SetDefaultHandlerExpand;
1936 capi.SetElementHandler = XML_SetElementHandler;
1937 capi.SetNamespaceDeclHandler = XML_SetNamespaceDeclHandler;
1938 capi.SetProcessingInstructionHandler = XML_SetProcessingInstructionHandler;
1939 capi.SetUnknownEncodingHandler = XML_SetUnknownEncodingHandler;
1940 capi.SetUserData = XML_SetUserData;
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03001941 capi.SetStartDoctypeDeclHandler = XML_SetStartDoctypeDeclHandler;
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03001942 capi.SetEncoding = XML_SetEncoding;
Eli Bendersky6dc32b32013-05-25 05:25:48 -07001943 capi.DefaultUnknownEncodingHandler = PyUnknownEncodingHandler;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001944
Benjamin Petersonb173f782009-05-05 22:31:58 +00001945 /* export using capsule */
1946 capi_object = PyCapsule_New(&capi, PyExpat_CAPSULE_NAME, NULL);
Fredrik Lundhd7a42882005-12-13 20:43:04 +00001947 if (capi_object)
1948 PyModule_AddObject(m, "expat_CAPI", capi_object);
Martin v. Löwis1a214512008-06-11 05:26:20 +00001949 return m;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001950}
1951
Fred Drake6f987622000-08-25 18:03:30 +00001952static void
Martin v. Löwis5b68ce32001-10-21 08:53:52 +00001953clear_handlers(xmlparseobject *self, int initial)
Fred Drake0582df92000-07-12 04:49:00 +00001954{
Fred Drakecde79132001-04-25 16:01:30 +00001955 int i = 0;
1956 PyObject *temp;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001957
Fred Drake71b63ff2002-06-28 22:29:01 +00001958 for (; handler_info[i].name != NULL; i++) {
Martin v. Löwis5b68ce32001-10-21 08:53:52 +00001959 if (initial)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001960 self->handlers[i] = NULL;
1961 else {
Fred Drakecde79132001-04-25 16:01:30 +00001962 temp = self->handlers[i];
1963 self->handlers[i] = NULL;
1964 Py_XDECREF(temp);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001965 handler_info[i].setter(self->itself, NULL);
Fred Drakecde79132001-04-25 16:01:30 +00001966 }
Fred Drakecde79132001-04-25 16:01:30 +00001967 }
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001968}
1969
Tim Peters0c322792002-07-17 16:49:03 +00001970static struct HandlerInfo handler_info[] = {
Fred Drake71b63ff2002-06-28 22:29:01 +00001971 {"StartElementHandler",
1972 (xmlhandlersetter)XML_SetStartElementHandler,
Fred Drake0582df92000-07-12 04:49:00 +00001973 (xmlhandler)my_StartElementHandler},
Fred Drake71b63ff2002-06-28 22:29:01 +00001974 {"EndElementHandler",
1975 (xmlhandlersetter)XML_SetEndElementHandler,
Fred Drake0582df92000-07-12 04:49:00 +00001976 (xmlhandler)my_EndElementHandler},
Fred Drake71b63ff2002-06-28 22:29:01 +00001977 {"ProcessingInstructionHandler",
Fred Drake0582df92000-07-12 04:49:00 +00001978 (xmlhandlersetter)XML_SetProcessingInstructionHandler,
1979 (xmlhandler)my_ProcessingInstructionHandler},
Fred Drake71b63ff2002-06-28 22:29:01 +00001980 {"CharacterDataHandler",
Fred Drake0582df92000-07-12 04:49:00 +00001981 (xmlhandlersetter)XML_SetCharacterDataHandler,
1982 (xmlhandler)my_CharacterDataHandler},
Fred Drake71b63ff2002-06-28 22:29:01 +00001983 {"UnparsedEntityDeclHandler",
Fred Drake0582df92000-07-12 04:49:00 +00001984 (xmlhandlersetter)XML_SetUnparsedEntityDeclHandler,
Fred Drake2a3d7db2002-06-28 22:56:48 +00001985 (xmlhandler)my_UnparsedEntityDeclHandler},
Fred Drake71b63ff2002-06-28 22:29:01 +00001986 {"NotationDeclHandler",
Fred Drake0582df92000-07-12 04:49:00 +00001987 (xmlhandlersetter)XML_SetNotationDeclHandler,
Fred Drake2a3d7db2002-06-28 22:56:48 +00001988 (xmlhandler)my_NotationDeclHandler},
Fred Drake71b63ff2002-06-28 22:29:01 +00001989 {"StartNamespaceDeclHandler",
1990 (xmlhandlersetter)XML_SetStartNamespaceDeclHandler,
Fred Drake2a3d7db2002-06-28 22:56:48 +00001991 (xmlhandler)my_StartNamespaceDeclHandler},
Fred Drake71b63ff2002-06-28 22:29:01 +00001992 {"EndNamespaceDeclHandler",
1993 (xmlhandlersetter)XML_SetEndNamespaceDeclHandler,
Fred Drake2a3d7db2002-06-28 22:56:48 +00001994 (xmlhandler)my_EndNamespaceDeclHandler},
Fred Drake0582df92000-07-12 04:49:00 +00001995 {"CommentHandler",
1996 (xmlhandlersetter)XML_SetCommentHandler,
1997 (xmlhandler)my_CommentHandler},
1998 {"StartCdataSectionHandler",
Fred Drake71b63ff2002-06-28 22:29:01 +00001999 (xmlhandlersetter)XML_SetStartCdataSectionHandler,
Fred Drake0582df92000-07-12 04:49:00 +00002000 (xmlhandler)my_StartCdataSectionHandler},
2001 {"EndCdataSectionHandler",
Fred Drake71b63ff2002-06-28 22:29:01 +00002002 (xmlhandlersetter)XML_SetEndCdataSectionHandler,
Fred Drake0582df92000-07-12 04:49:00 +00002003 (xmlhandler)my_EndCdataSectionHandler},
2004 {"DefaultHandler",
2005 (xmlhandlersetter)XML_SetDefaultHandler,
2006 (xmlhandler)my_DefaultHandler},
2007 {"DefaultHandlerExpand",
2008 (xmlhandlersetter)XML_SetDefaultHandlerExpand,
2009 (xmlhandler)my_DefaultHandlerExpandHandler},
2010 {"NotStandaloneHandler",
2011 (xmlhandlersetter)XML_SetNotStandaloneHandler,
2012 (xmlhandler)my_NotStandaloneHandler},
2013 {"ExternalEntityRefHandler",
2014 (xmlhandlersetter)XML_SetExternalEntityRefHandler,
Fred Drake2a3d7db2002-06-28 22:56:48 +00002015 (xmlhandler)my_ExternalEntityRefHandler},
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00002016 {"StartDoctypeDeclHandler",
Fred Drake71b63ff2002-06-28 22:29:01 +00002017 (xmlhandlersetter)XML_SetStartDoctypeDeclHandler,
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00002018 (xmlhandler)my_StartDoctypeDeclHandler},
2019 {"EndDoctypeDeclHandler",
Fred Drake71b63ff2002-06-28 22:29:01 +00002020 (xmlhandlersetter)XML_SetEndDoctypeDeclHandler,
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00002021 (xmlhandler)my_EndDoctypeDeclHandler},
Fred Drake85d835f2001-02-08 15:39:08 +00002022 {"EntityDeclHandler",
2023 (xmlhandlersetter)XML_SetEntityDeclHandler,
2024 (xmlhandler)my_EntityDeclHandler},
2025 {"XmlDeclHandler",
2026 (xmlhandlersetter)XML_SetXmlDeclHandler,
2027 (xmlhandler)my_XmlDeclHandler},
2028 {"ElementDeclHandler",
2029 (xmlhandlersetter)XML_SetElementDeclHandler,
2030 (xmlhandler)my_ElementDeclHandler},
2031 {"AttlistDeclHandler",
2032 (xmlhandlersetter)XML_SetAttlistDeclHandler,
2033 (xmlhandler)my_AttlistDeclHandler},
Martin v. Löwisc847f402003-01-21 11:09:21 +00002034#if XML_COMBINED_VERSION >= 19504
Martin v. Löwis069dde22003-01-21 10:58:18 +00002035 {"SkippedEntityHandler",
2036 (xmlhandlersetter)XML_SetSkippedEntityHandler,
2037 (xmlhandler)my_SkippedEntityHandler},
Martin v. Löwisc847f402003-01-21 11:09:21 +00002038#endif
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00002039
Fred Drake0582df92000-07-12 04:49:00 +00002040 {NULL, NULL, NULL} /* sentinel */
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00002041};