blob: 01ac14ee6cde51295d5fe569d3fd8249d5e5e1b7 [file] [log] [blame]
Martin v. Löwis7090ed12001-09-19 10:37:50 +00001#include "Python.h"
Fred Drake4113b132001-03-24 19:58:26 +00002#include <ctype.h>
3
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00004#include "frameobject.h"
Fred Drakea77254a2000-09-29 19:23:29 +00005#include "expat.h"
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00006
Fredrik Lundhc3345042005-12-13 19:49:55 +00007#include "pyexpat.h"
8
Martin v. Löwisc847f402003-01-21 11:09:21 +00009#define XML_COMBINED_VERSION (10000*XML_MAJOR_VERSION+100*XML_MINOR_VERSION+XML_MICRO_VERSION)
10
Jeremy Hylton9263f572003-06-27 16:13:17 +000011#define FIX_TRACE
Martin v. Löwis339d0f72001-08-17 18:39:25 +000012
Fred Drake0582df92000-07-12 04:49:00 +000013enum HandlerTypes {
14 StartElement,
15 EndElement,
16 ProcessingInstruction,
17 CharacterData,
18 UnparsedEntityDecl,
19 NotationDecl,
20 StartNamespaceDecl,
21 EndNamespaceDecl,
22 Comment,
23 StartCdataSection,
24 EndCdataSection,
25 Default,
26 DefaultHandlerExpand,
27 NotStandalone,
Martin v. Löwis0078f6c2001-01-21 10:18:10 +000028 ExternalEntityRef,
29 StartDoctypeDecl,
30 EndDoctypeDecl,
Fred Drake85d835f2001-02-08 15:39:08 +000031 EntityDecl,
32 XmlDecl,
33 ElementDecl,
34 AttlistDecl,
Martin v. Löwisc847f402003-01-21 11:09:21 +000035#if XML_COMBINED_VERSION >= 19504
Martin v. Löwis069dde22003-01-21 10:58:18 +000036 SkippedEntity,
Martin v. Löwisc847f402003-01-21 11:09:21 +000037#endif
Fred Drake85d835f2001-02-08 15:39:08 +000038 _DummyDecl
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000039};
40
41static PyObject *ErrorObject;
42
43/* ----------------------------------------------------- */
44
45/* Declarations for objects of type xmlparser */
46
47typedef struct {
Fred Drake0582df92000-07-12 04:49:00 +000048 PyObject_HEAD
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000049
Fred Drake0582df92000-07-12 04:49:00 +000050 XML_Parser itself;
Fred Drake85d835f2001-02-08 15:39:08 +000051 int ordered_attributes; /* Return attributes as a list. */
52 int specified_attributes; /* Report only specified attributes. */
Fred Drakebd6101c2001-02-14 18:29:45 +000053 int in_callback; /* Is a callback active? */
Martin v. Löwis069dde22003-01-21 10:58:18 +000054 int ns_prefixes; /* Namespace-triplets mode? */
Fred Drake2a3d7db2002-06-28 22:56:48 +000055 XML_Char *buffer; /* Buffer used when accumulating characters */
56 /* NULL if not enabled */
57 int buffer_size; /* Size of buffer, in XML_Char units */
58 int buffer_used; /* Buffer units in use */
Fred Drakeb91a36b2002-06-27 19:40:48 +000059 PyObject *intern; /* Dictionary to intern strings */
Fred Drake0582df92000-07-12 04:49:00 +000060 PyObject **handlers;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000061} xmlparseobject;
62
Fred Drake2a3d7db2002-06-28 22:56:48 +000063#define CHARACTER_DATA_BUFFER_SIZE 8192
64
Jeremy Hylton938ace62002-07-17 16:30:39 +000065static PyTypeObject Xmlparsetype;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000066
Fred Drake117ac852002-09-24 16:24:54 +000067typedef void (*xmlhandlersetter)(XML_Parser self, void *meth);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000068typedef void* xmlhandler;
69
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +000070struct HandlerInfo {
Fred Drake0582df92000-07-12 04:49:00 +000071 const char *name;
72 xmlhandlersetter setter;
73 xmlhandler handler;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +000074 PyCodeObject *tb_code;
Fred Drake71b63ff2002-06-28 22:29:01 +000075 PyObject *nameobj;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000076};
77
Jeremy Hylton938ace62002-07-17 16:30:39 +000078static struct HandlerInfo handler_info[64];
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000079
Fred Drakebd6101c2001-02-14 18:29:45 +000080/* Set an integer attribute on the error object; return true on success,
81 * false on an exception.
82 */
83static int
84set_error_attr(PyObject *err, char *name, int value)
85{
Christian Heimes217cfd12007-12-02 14:31:20 +000086 PyObject *v = PyLong_FromLong(value);
Fred Drake85d835f2001-02-08 15:39:08 +000087
Neal Norwitz2f5e9902006-03-08 06:36:45 +000088 if (v == NULL || PyObject_SetAttrString(err, name, v) == -1) {
89 Py_XDECREF(v);
Fred Drakebd6101c2001-02-14 18:29:45 +000090 return 0;
91 }
Michael W. Hudson0bb84542004-08-03 11:31:31 +000092 Py_DECREF(v);
Fred Drakebd6101c2001-02-14 18:29:45 +000093 return 1;
94}
95
96/* Build and set an Expat exception, including positioning
97 * information. Always returns NULL.
98 */
Fred Drake85d835f2001-02-08 15:39:08 +000099static PyObject *
Martin v. Löwis069dde22003-01-21 10:58:18 +0000100set_error(xmlparseobject *self, enum XML_Error code)
Fred Drake85d835f2001-02-08 15:39:08 +0000101{
102 PyObject *err;
Victor Stinner499dfcf2011-03-21 13:26:24 +0100103 PyObject *buffer;
Fred Drake85d835f2001-02-08 15:39:08 +0000104 XML_Parser parser = self->itself;
Fred Drakebd6101c2001-02-14 18:29:45 +0000105 int lineno = XML_GetErrorLineNumber(parser);
106 int column = XML_GetErrorColumnNumber(parser);
Fred Drake85d835f2001-02-08 15:39:08 +0000107
Victor Stinner499dfcf2011-03-21 13:26:24 +0100108 buffer = PyUnicode_FromFormat("%s: line %i, column %i",
109 XML_ErrorString(code), lineno, column);
110 if (buffer == NULL)
111 return NULL;
112 err = PyObject_CallFunction(ErrorObject, "O", buffer);
113 Py_DECREF(buffer);
Fred Drakebd6101c2001-02-14 18:29:45 +0000114 if ( err != NULL
115 && set_error_attr(err, "code", code)
116 && set_error_attr(err, "offset", column)
117 && set_error_attr(err, "lineno", lineno)) {
118 PyErr_SetObject(ErrorObject, err);
Fred Drake85d835f2001-02-08 15:39:08 +0000119 }
Neal Norwitz2f5e9902006-03-08 06:36:45 +0000120 Py_XDECREF(err);
Fred Drake85d835f2001-02-08 15:39:08 +0000121 return NULL;
122}
123
Fred Drake71b63ff2002-06-28 22:29:01 +0000124static int
125have_handler(xmlparseobject *self, int type)
126{
127 PyObject *handler = self->handlers[type];
128 return handler != NULL;
129}
130
131static PyObject *
132get_handler_name(struct HandlerInfo *hinfo)
133{
134 PyObject *name = hinfo->nameobj;
135 if (name == NULL) {
Neal Norwitz392c5be2007-08-25 17:20:32 +0000136 name = PyUnicode_FromString(hinfo->name);
Fred Drake71b63ff2002-06-28 22:29:01 +0000137 hinfo->nameobj = name;
138 }
139 Py_XINCREF(name);
140 return name;
141}
142
Fred Drake85d835f2001-02-08 15:39:08 +0000143
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000144/* Convert a string of XML_Chars into a Unicode string.
145 Returns None if str is a null pointer. */
146
Fred Drake0582df92000-07-12 04:49:00 +0000147static PyObject *
Fred Drakeb91a36b2002-06-27 19:40:48 +0000148conv_string_to_unicode(const XML_Char *str)
Fred Drake0582df92000-07-12 04:49:00 +0000149{
Fred Drake71b63ff2002-06-28 22:29:01 +0000150 /* XXX currently this code assumes that XML_Char is 8-bit,
Fred Drake0582df92000-07-12 04:49:00 +0000151 and hence in UTF-8. */
152 /* UTF-8 from Expat, Unicode desired */
153 if (str == NULL) {
154 Py_INCREF(Py_None);
155 return Py_None;
156 }
Fred Drake71b63ff2002-06-28 22:29:01 +0000157 return PyUnicode_DecodeUTF8(str, strlen(str), "strict");
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000158}
159
Fred Drake0582df92000-07-12 04:49:00 +0000160static PyObject *
161conv_string_len_to_unicode(const XML_Char *str, int len)
162{
Fred Drake71b63ff2002-06-28 22:29:01 +0000163 /* XXX currently this code assumes that XML_Char is 8-bit,
Fred Drake0582df92000-07-12 04:49:00 +0000164 and hence in UTF-8. */
165 /* UTF-8 from Expat, Unicode desired */
166 if (str == NULL) {
167 Py_INCREF(Py_None);
168 return Py_None;
169 }
Fred Drake6f987622000-08-25 18:03:30 +0000170 return PyUnicode_DecodeUTF8((const char *)str, len, "strict");
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000171}
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000172
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000173/* Callback routines */
174
Martin v. Löwis5b68ce32001-10-21 08:53:52 +0000175static void clear_handlers(xmlparseobject *self, int initial);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000176
Martin v. Löwis069dde22003-01-21 10:58:18 +0000177/* This handler is used when an error has been detected, in the hope
178 that actual parsing can be terminated early. This will only help
179 if an external entity reference is encountered. */
180static int
181error_external_entity_ref_handler(XML_Parser parser,
182 const XML_Char *context,
183 const XML_Char *base,
184 const XML_Char *systemId,
185 const XML_Char *publicId)
186{
187 return 0;
188}
189
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000190/* Dummy character data handler used when an error (exception) has
191 been detected, and the actual parsing can be terminated early.
192 This is needed since character data handler can't be safely removed
193 from within the character data handler, but can be replaced. It is
194 used only from the character data handler trampoline, and must be
195 used right after `flag_error()` is called. */
196static void
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000197noop_character_data_handler(void *userData, const XML_Char *data, int len)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000198{
199 /* Do nothing. */
200}
201
Fred Drake6f987622000-08-25 18:03:30 +0000202static void
203flag_error(xmlparseobject *self)
204{
Martin v. Löwis5b68ce32001-10-21 08:53:52 +0000205 clear_handlers(self, 0);
Martin v. Löwis069dde22003-01-21 10:58:18 +0000206 XML_SetExternalEntityRefHandler(self->itself,
207 error_external_entity_ref_handler);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000208}
209
210static PyCodeObject*
211getcode(enum HandlerTypes slot, char* func_name, int lineno)
212{
Fred Drakebd6101c2001-02-14 18:29:45 +0000213 if (handler_info[slot].tb_code == NULL) {
Fred Drakebd6101c2001-02-14 18:29:45 +0000214 handler_info[slot].tb_code =
Alexandre Vassalotti7b82b402009-07-21 04:30:03 +0000215 PyCode_NewEmpty(__FILE__, func_name, lineno);
Fred Drakebd6101c2001-02-14 18:29:45 +0000216 }
217 return handler_info[slot].tb_code;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000218}
219
Jeremy Hylton9263f572003-06-27 16:13:17 +0000220#ifdef FIX_TRACE
Martin v. Löwis7d6e19d2002-08-04 08:24:49 +0000221static int
222trace_frame(PyThreadState *tstate, PyFrameObject *f, int code, PyObject *val)
223{
224 int result = 0;
225 if (!tstate->use_tracing || tstate->tracing)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000226 return 0;
Martin v. Löwis7d6e19d2002-08-04 08:24:49 +0000227 if (tstate->c_profilefunc != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000228 tstate->tracing++;
229 result = tstate->c_profilefunc(tstate->c_profileobj,
230 f, code , val);
231 tstate->use_tracing = ((tstate->c_tracefunc != NULL)
232 || (tstate->c_profilefunc != NULL));
233 tstate->tracing--;
234 if (result)
235 return result;
Martin v. Löwis7d6e19d2002-08-04 08:24:49 +0000236 }
237 if (tstate->c_tracefunc != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000238 tstate->tracing++;
239 result = tstate->c_tracefunc(tstate->c_traceobj,
240 f, code , val);
241 tstate->use_tracing = ((tstate->c_tracefunc != NULL)
242 || (tstate->c_profilefunc != NULL));
243 tstate->tracing--;
244 }
Martin v. Löwis7d6e19d2002-08-04 08:24:49 +0000245 return result;
246}
Jeremy Hylton9263f572003-06-27 16:13:17 +0000247
248static int
249trace_frame_exc(PyThreadState *tstate, PyFrameObject *f)
250{
251 PyObject *type, *value, *traceback, *arg;
252 int err;
253
254 if (tstate->c_tracefunc == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000255 return 0;
Jeremy Hylton9263f572003-06-27 16:13:17 +0000256
257 PyErr_Fetch(&type, &value, &traceback);
258 if (value == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000259 value = Py_None;
260 Py_INCREF(value);
Jeremy Hylton9263f572003-06-27 16:13:17 +0000261 }
Raymond Hettinger8ae46892003-10-12 19:09:37 +0000262 arg = PyTuple_Pack(3, type, value, traceback);
Jeremy Hylton9263f572003-06-27 16:13:17 +0000263 if (arg == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000264 PyErr_Restore(type, value, traceback);
265 return 0;
Jeremy Hylton9263f572003-06-27 16:13:17 +0000266 }
267 err = trace_frame(tstate, f, PyTrace_EXCEPTION, arg);
268 Py_DECREF(arg);
269 if (err == 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000270 PyErr_Restore(type, value, traceback);
Jeremy Hylton9263f572003-06-27 16:13:17 +0000271 else {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000272 Py_XDECREF(type);
273 Py_XDECREF(value);
274 Py_XDECREF(traceback);
Jeremy Hylton9263f572003-06-27 16:13:17 +0000275 }
276 return err;
277}
Martin v. Löwis069dde22003-01-21 10:58:18 +0000278#endif
Martin v. Löwis7d6e19d2002-08-04 08:24:49 +0000279
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000280static PyObject*
Fred Drake39689c52004-08-13 03:12:57 +0000281call_with_frame(PyCodeObject *c, PyObject* func, PyObject* args,
282 xmlparseobject *self)
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000283{
Fred Drakebd6101c2001-02-14 18:29:45 +0000284 PyThreadState *tstate = PyThreadState_GET();
285 PyFrameObject *f;
286 PyObject *res;
287
288 if (c == NULL)
289 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000290
Jeremy Hylton9263f572003-06-27 16:13:17 +0000291 f = PyFrame_New(tstate, c, PyEval_GetGlobals(), NULL);
Fred Drakebd6101c2001-02-14 18:29:45 +0000292 if (f == NULL)
293 return NULL;
294 tstate->frame = f;
Jeremy Hylton9263f572003-06-27 16:13:17 +0000295#ifdef FIX_TRACE
296 if (trace_frame(tstate, f, PyTrace_CALL, Py_None) < 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000297 return NULL;
Martin v. Löwis7d6e19d2002-08-04 08:24:49 +0000298 }
Martin v. Löwis069dde22003-01-21 10:58:18 +0000299#endif
Fred Drakebd6101c2001-02-14 18:29:45 +0000300 res = PyEval_CallObject(func, args);
Jeremy Hylton9263f572003-06-27 16:13:17 +0000301 if (res == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000302 if (tstate->curexc_traceback == NULL)
303 PyTraceBack_Here(f);
Fred Drake39689c52004-08-13 03:12:57 +0000304 XML_StopParser(self->itself, XML_FALSE);
Jeremy Hylton9263f572003-06-27 16:13:17 +0000305#ifdef FIX_TRACE
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000306 if (trace_frame_exc(tstate, f) < 0) {
307 return NULL;
308 }
Jeremy Hylton9263f572003-06-27 16:13:17 +0000309 }
Martin v. Löwis7d6e19d2002-08-04 08:24:49 +0000310 else {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000311 if (trace_frame(tstate, f, PyTrace_RETURN, res) < 0) {
312 Py_XDECREF(res);
313 res = NULL;
314 }
Martin v. Löwis7d6e19d2002-08-04 08:24:49 +0000315 }
Jeremy Hylton9263f572003-06-27 16:13:17 +0000316#else
317 }
Martin v. Löwis069dde22003-01-21 10:58:18 +0000318#endif
Fred Drakebd6101c2001-02-14 18:29:45 +0000319 tstate->frame = f->f_back;
320 Py_DECREF(f);
321 return res;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000322}
323
Fred Drakeb91a36b2002-06-27 19:40:48 +0000324static PyObject*
325string_intern(xmlparseobject *self, const char* str)
326{
Guido van Rossum4ca94712007-07-23 17:42:32 +0000327 PyObject *result = conv_string_to_unicode(str);
Fred Drakeb91a36b2002-06-27 19:40:48 +0000328 PyObject *value;
Neal Norwitz484d9a42005-09-30 04:46:49 +0000329 /* result can be NULL if the unicode conversion failed. */
330 if (!result)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000331 return result;
Fred Drakeb91a36b2002-06-27 19:40:48 +0000332 if (!self->intern)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000333 return result;
Fred Drakeb91a36b2002-06-27 19:40:48 +0000334 value = PyDict_GetItem(self->intern, result);
335 if (!value) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000336 if (PyDict_SetItem(self->intern, result, result) == 0)
Fred Drakeb91a36b2002-06-27 19:40:48 +0000337 return result;
338 else
339 return NULL;
340 }
341 Py_INCREF(value);
342 Py_DECREF(result);
343 return value;
344}
345
Fred Drake2a3d7db2002-06-28 22:56:48 +0000346/* Return 0 on success, -1 on exception.
347 * flag_error() will be called before return if needed.
348 */
349static int
350call_character_handler(xmlparseobject *self, const XML_Char *buffer, int len)
351{
352 PyObject *args;
353 PyObject *temp;
354
Georg Brandlc01537f2010-10-15 16:26:08 +0000355 if (!have_handler(self, CharacterData))
356 return -1;
357
Fred Drake2a3d7db2002-06-28 22:56:48 +0000358 args = PyTuple_New(1);
359 if (args == NULL)
360 return -1;
Guido van Rossum4ca94712007-07-23 17:42:32 +0000361 temp = (conv_string_len_to_unicode(buffer, len));
Fred Drake2a3d7db2002-06-28 22:56:48 +0000362 if (temp == NULL) {
363 Py_DECREF(args);
364 flag_error(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000365 XML_SetCharacterDataHandler(self->itself,
366 noop_character_data_handler);
Fred Drake2a3d7db2002-06-28 22:56:48 +0000367 return -1;
368 }
369 PyTuple_SET_ITEM(args, 0, temp);
370 /* temp is now a borrowed reference; consider it unused. */
371 self->in_callback = 1;
372 temp = call_with_frame(getcode(CharacterData, "CharacterData", __LINE__),
Fred Drake39689c52004-08-13 03:12:57 +0000373 self->handlers[CharacterData], args, self);
Fred Drake2a3d7db2002-06-28 22:56:48 +0000374 /* temp is an owned reference again, or NULL */
375 self->in_callback = 0;
376 Py_DECREF(args);
377 if (temp == NULL) {
378 flag_error(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000379 XML_SetCharacterDataHandler(self->itself,
380 noop_character_data_handler);
Fred Drake2a3d7db2002-06-28 22:56:48 +0000381 return -1;
382 }
383 Py_DECREF(temp);
384 return 0;
385}
386
387static int
388flush_character_buffer(xmlparseobject *self)
389{
390 int rc;
391 if (self->buffer == NULL || self->buffer_used == 0)
392 return 0;
393 rc = call_character_handler(self, self->buffer, self->buffer_used);
394 self->buffer_used = 0;
395 return rc;
396}
397
398static void
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000399my_CharacterDataHandler(void *userData, const XML_Char *data, int len)
Fred Drake2a3d7db2002-06-28 22:56:48 +0000400{
401 xmlparseobject *self = (xmlparseobject *) userData;
402 if (self->buffer == NULL)
403 call_character_handler(self, data, len);
404 else {
405 if ((self->buffer_used + len) > self->buffer_size) {
406 if (flush_character_buffer(self) < 0)
407 return;
408 /* handler might have changed; drop the rest on the floor
409 * if there isn't a handler anymore
410 */
411 if (!have_handler(self, CharacterData))
412 return;
413 }
414 if (len > self->buffer_size) {
415 call_character_handler(self, data, len);
416 self->buffer_used = 0;
417 }
418 else {
419 memcpy(self->buffer + self->buffer_used,
420 data, len * sizeof(XML_Char));
421 self->buffer_used += len;
422 }
423 }
424}
425
Fred Drake85d835f2001-02-08 15:39:08 +0000426static void
427my_StartElementHandler(void *userData,
Fred Drake71b63ff2002-06-28 22:29:01 +0000428 const XML_Char *name, const XML_Char *atts[])
Fred Drake85d835f2001-02-08 15:39:08 +0000429{
430 xmlparseobject *self = (xmlparseobject *)userData;
431
Fred Drake71b63ff2002-06-28 22:29:01 +0000432 if (have_handler(self, StartElement)) {
Fred Drake85d835f2001-02-08 15:39:08 +0000433 PyObject *container, *rv, *args;
434 int i, max;
435
Fred Drake2a3d7db2002-06-28 22:56:48 +0000436 if (flush_character_buffer(self) < 0)
437 return;
Fred Drake85d835f2001-02-08 15:39:08 +0000438 /* Set max to the number of slots filled in atts[]; max/2 is
439 * the number of attributes we need to process.
440 */
441 if (self->specified_attributes) {
442 max = XML_GetSpecifiedAttributeCount(self->itself);
443 }
444 else {
445 max = 0;
446 while (atts[max] != NULL)
447 max += 2;
448 }
449 /* Build the container. */
450 if (self->ordered_attributes)
451 container = PyList_New(max);
452 else
453 container = PyDict_New();
454 if (container == NULL) {
455 flag_error(self);
456 return;
457 }
458 for (i = 0; i < max; i += 2) {
Fred Drakeb91a36b2002-06-27 19:40:48 +0000459 PyObject *n = string_intern(self, (XML_Char *) atts[i]);
Fred Drake85d835f2001-02-08 15:39:08 +0000460 PyObject *v;
461 if (n == NULL) {
462 flag_error(self);
463 Py_DECREF(container);
464 return;
465 }
Guido van Rossum4ca94712007-07-23 17:42:32 +0000466 v = conv_string_to_unicode((XML_Char *) atts[i+1]);
Fred Drake85d835f2001-02-08 15:39:08 +0000467 if (v == NULL) {
468 flag_error(self);
469 Py_DECREF(container);
470 Py_DECREF(n);
471 return;
472 }
473 if (self->ordered_attributes) {
474 PyList_SET_ITEM(container, i, n);
475 PyList_SET_ITEM(container, i+1, v);
476 }
477 else if (PyDict_SetItem(container, n, v)) {
478 flag_error(self);
479 Py_DECREF(n);
480 Py_DECREF(v);
481 return;
482 }
483 else {
484 Py_DECREF(n);
485 Py_DECREF(v);
486 }
487 }
Neal Norwitz484d9a42005-09-30 04:46:49 +0000488 args = string_intern(self, name);
489 if (args != NULL)
490 args = Py_BuildValue("(NN)", args, container);
Fred Drake85d835f2001-02-08 15:39:08 +0000491 if (args == NULL) {
492 Py_DECREF(container);
493 return;
494 }
495 /* Container is now a borrowed reference; ignore it. */
Fred Drakebd6101c2001-02-14 18:29:45 +0000496 self->in_callback = 1;
497 rv = call_with_frame(getcode(StartElement, "StartElement", __LINE__),
Fred Drake39689c52004-08-13 03:12:57 +0000498 self->handlers[StartElement], args, self);
Fred Drakebd6101c2001-02-14 18:29:45 +0000499 self->in_callback = 0;
500 Py_DECREF(args);
Fred Drake85d835f2001-02-08 15:39:08 +0000501 if (rv == NULL) {
502 flag_error(self);
503 return;
Fred Drakebd6101c2001-02-14 18:29:45 +0000504 }
Fred Drake85d835f2001-02-08 15:39:08 +0000505 Py_DECREF(rv);
506 }
507}
508
509#define RC_HANDLER(RC, NAME, PARAMS, INIT, PARAM_FORMAT, CONVERSION, \
510 RETURN, GETUSERDATA) \
511static RC \
512my_##NAME##Handler PARAMS {\
513 xmlparseobject *self = GETUSERDATA ; \
514 PyObject *args = NULL; \
515 PyObject *rv = NULL; \
516 INIT \
517\
Fred Drake71b63ff2002-06-28 22:29:01 +0000518 if (have_handler(self, NAME)) { \
Fred Drake2a3d7db2002-06-28 22:56:48 +0000519 if (flush_character_buffer(self) < 0) \
520 return RETURN; \
Fred Drake85d835f2001-02-08 15:39:08 +0000521 args = Py_BuildValue PARAM_FORMAT ;\
Martin v. Löwis1d7c55f2001-11-10 13:57:55 +0000522 if (!args) { flag_error(self); return RETURN;} \
Fred Drakebd6101c2001-02-14 18:29:45 +0000523 self->in_callback = 1; \
Fred Drake85d835f2001-02-08 15:39:08 +0000524 rv = call_with_frame(getcode(NAME,#NAME,__LINE__), \
Fred Drake39689c52004-08-13 03:12:57 +0000525 self->handlers[NAME], args, self); \
Fred Drakebd6101c2001-02-14 18:29:45 +0000526 self->in_callback = 0; \
Fred Drake85d835f2001-02-08 15:39:08 +0000527 Py_DECREF(args); \
528 if (rv == NULL) { \
529 flag_error(self); \
530 return RETURN; \
531 } \
532 CONVERSION \
533 Py_DECREF(rv); \
534 } \
535 return RETURN; \
536}
537
Fred Drake6f987622000-08-25 18:03:30 +0000538#define VOID_HANDLER(NAME, PARAMS, PARAM_FORMAT) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000539 RC_HANDLER(void, NAME, PARAMS, ;, PARAM_FORMAT, ;, ;,\
540 (xmlparseobject *)userData)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000541
Fred Drake6f987622000-08-25 18:03:30 +0000542#define INT_HANDLER(NAME, PARAMS, PARAM_FORMAT)\
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000543 RC_HANDLER(int, NAME, PARAMS, int rc=0;, PARAM_FORMAT, \
544 rc = PyLong_AsLong(rv);, rc, \
545 (xmlparseobject *)userData)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000546
Fred Drake71b63ff2002-06-28 22:29:01 +0000547VOID_HANDLER(EndElement,
548 (void *userData, const XML_Char *name),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000549 ("(N)", string_intern(self, name)))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000550
Fred Drake6f987622000-08-25 18:03:30 +0000551VOID_HANDLER(ProcessingInstruction,
Fred Drake71b63ff2002-06-28 22:29:01 +0000552 (void *userData,
553 const XML_Char *target,
Fred Drake85d835f2001-02-08 15:39:08 +0000554 const XML_Char *data),
Guido van Rossum4ca94712007-07-23 17:42:32 +0000555 ("(NO&)", string_intern(self, target), conv_string_to_unicode ,data))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000556
Fred Drake6f987622000-08-25 18:03:30 +0000557VOID_HANDLER(UnparsedEntityDecl,
Fred Drake71b63ff2002-06-28 22:29:01 +0000558 (void *userData,
Fred Drake85d835f2001-02-08 15:39:08 +0000559 const XML_Char *entityName,
560 const XML_Char *base,
561 const XML_Char *systemId,
562 const XML_Char *publicId,
563 const XML_Char *notationName),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000564 ("(NNNNN)",
Fred Drake71b63ff2002-06-28 22:29:01 +0000565 string_intern(self, entityName), string_intern(self, base),
566 string_intern(self, systemId), string_intern(self, publicId),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000567 string_intern(self, notationName)))
Fred Drake85d835f2001-02-08 15:39:08 +0000568
Fred Drake85d835f2001-02-08 15:39:08 +0000569VOID_HANDLER(EntityDecl,
570 (void *userData,
571 const XML_Char *entityName,
572 int is_parameter_entity,
573 const XML_Char *value,
574 int value_length,
575 const XML_Char *base,
576 const XML_Char *systemId,
577 const XML_Char *publicId,
578 const XML_Char *notationName),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000579 ("NiNNNNN",
580 string_intern(self, entityName), is_parameter_entity,
Guido van Rossum4ca94712007-07-23 17:42:32 +0000581 (conv_string_len_to_unicode(value, value_length)),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000582 string_intern(self, base), string_intern(self, systemId),
583 string_intern(self, publicId),
584 string_intern(self, notationName)))
Fred Drake85d835f2001-02-08 15:39:08 +0000585
586VOID_HANDLER(XmlDecl,
587 (void *userData,
588 const XML_Char *version,
589 const XML_Char *encoding,
590 int standalone),
591 ("(O&O&i)",
Guido van Rossum4ca94712007-07-23 17:42:32 +0000592 conv_string_to_unicode ,version, conv_string_to_unicode ,encoding,
Fred Drake85d835f2001-02-08 15:39:08 +0000593 standalone))
594
595static PyObject *
596conv_content_model(XML_Content * const model,
Fred Drakeb91a36b2002-06-27 19:40:48 +0000597 PyObject *(*conv_string)(const XML_Char *))
Fred Drake85d835f2001-02-08 15:39:08 +0000598{
599 PyObject *result = NULL;
600 PyObject *children = PyTuple_New(model->numchildren);
601 int i;
602
603 if (children != NULL) {
Tim Peters9544fc52001-07-28 09:36:36 +0000604 assert(model->numchildren < INT_MAX);
605 for (i = 0; i < (int)model->numchildren; ++i) {
Fred Drake85d835f2001-02-08 15:39:08 +0000606 PyObject *child = conv_content_model(&model->children[i],
607 conv_string);
608 if (child == NULL) {
609 Py_XDECREF(children);
610 return NULL;
611 }
612 PyTuple_SET_ITEM(children, i, child);
613 }
614 result = Py_BuildValue("(iiO&N)",
615 model->type, model->quant,
616 conv_string,model->name, children);
617 }
618 return result;
619}
620
Fred Drake06dd8cf2003-02-02 03:54:17 +0000621static void
622my_ElementDeclHandler(void *userData,
623 const XML_Char *name,
624 XML_Content *model)
Fred Drake85d835f2001-02-08 15:39:08 +0000625{
Fred Drake06dd8cf2003-02-02 03:54:17 +0000626 xmlparseobject *self = (xmlparseobject *)userData;
627 PyObject *args = NULL;
Fred Drake85d835f2001-02-08 15:39:08 +0000628
Fred Drake06dd8cf2003-02-02 03:54:17 +0000629 if (have_handler(self, ElementDecl)) {
630 PyObject *rv = NULL;
631 PyObject *modelobj, *nameobj;
632
633 if (flush_character_buffer(self) < 0)
634 goto finally;
Guido van Rossum4ca94712007-07-23 17:42:32 +0000635 modelobj = conv_content_model(model, (conv_string_to_unicode));
Fred Drake06dd8cf2003-02-02 03:54:17 +0000636 if (modelobj == NULL) {
637 flag_error(self);
638 goto finally;
639 }
640 nameobj = string_intern(self, name);
641 if (nameobj == NULL) {
642 Py_DECREF(modelobj);
643 flag_error(self);
644 goto finally;
645 }
Michael W. Hudson0bb84542004-08-03 11:31:31 +0000646 args = Py_BuildValue("NN", nameobj, modelobj);
Fred Drake06dd8cf2003-02-02 03:54:17 +0000647 if (args == NULL) {
648 Py_DECREF(modelobj);
649 flag_error(self);
650 goto finally;
651 }
652 self->in_callback = 1;
653 rv = call_with_frame(getcode(ElementDecl, "ElementDecl", __LINE__),
Fred Drake39689c52004-08-13 03:12:57 +0000654 self->handlers[ElementDecl], args, self);
Fred Drake06dd8cf2003-02-02 03:54:17 +0000655 self->in_callback = 0;
656 if (rv == NULL) {
657 flag_error(self);
658 goto finally;
659 }
660 Py_DECREF(rv);
661 }
662 finally:
663 Py_XDECREF(args);
664 XML_FreeContentModel(self->itself, model);
665 return;
666}
Fred Drake85d835f2001-02-08 15:39:08 +0000667
668VOID_HANDLER(AttlistDecl,
669 (void *userData,
670 const XML_Char *elname,
671 const XML_Char *attname,
672 const XML_Char *att_type,
673 const XML_Char *dflt,
674 int isrequired),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000675 ("(NNO&O&i)",
676 string_intern(self, elname), string_intern(self, attname),
Guido van Rossum4ca94712007-07-23 17:42:32 +0000677 conv_string_to_unicode ,att_type, conv_string_to_unicode ,dflt,
Fred Drake85d835f2001-02-08 15:39:08 +0000678 isrequired))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000679
Martin v. Löwisc847f402003-01-21 11:09:21 +0000680#if XML_COMBINED_VERSION >= 19504
Martin v. Löwis069dde22003-01-21 10:58:18 +0000681VOID_HANDLER(SkippedEntity,
682 (void *userData,
683 const XML_Char *entityName,
684 int is_parameter_entity),
685 ("Ni",
686 string_intern(self, entityName), is_parameter_entity))
Martin v. Löwisc847f402003-01-21 11:09:21 +0000687#endif
Martin v. Löwis069dde22003-01-21 10:58:18 +0000688
Fred Drake71b63ff2002-06-28 22:29:01 +0000689VOID_HANDLER(NotationDecl,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000690 (void *userData,
691 const XML_Char *notationName,
692 const XML_Char *base,
693 const XML_Char *systemId,
694 const XML_Char *publicId),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000695 ("(NNNN)",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000696 string_intern(self, notationName), string_intern(self, base),
697 string_intern(self, systemId), string_intern(self, publicId)))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000698
Fred Drake6f987622000-08-25 18:03:30 +0000699VOID_HANDLER(StartNamespaceDecl,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000700 (void *userData,
701 const XML_Char *prefix,
702 const XML_Char *uri),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000703 ("(NN)",
704 string_intern(self, prefix), string_intern(self, uri)))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000705
Fred Drake6f987622000-08-25 18:03:30 +0000706VOID_HANDLER(EndNamespaceDecl,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000707 (void *userData,
708 const XML_Char *prefix),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000709 ("(N)", string_intern(self, prefix)))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000710
Fred Drake6f987622000-08-25 18:03:30 +0000711VOID_HANDLER(Comment,
Fred Drakeb91a36b2002-06-27 19:40:48 +0000712 (void *userData, const XML_Char *data),
Guido van Rossum4ca94712007-07-23 17:42:32 +0000713 ("(O&)", conv_string_to_unicode ,data))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000714
Fred Drake6f987622000-08-25 18:03:30 +0000715VOID_HANDLER(StartCdataSection,
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000716 (void *userData),
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000717 ("()"))
Fred Drake71b63ff2002-06-28 22:29:01 +0000718
Fred Drake6f987622000-08-25 18:03:30 +0000719VOID_HANDLER(EndCdataSection,
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000720 (void *userData),
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000721 ("()"))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000722
Fred Drake6f987622000-08-25 18:03:30 +0000723VOID_HANDLER(Default,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000724 (void *userData, const XML_Char *s, int len),
725 ("(N)", (conv_string_len_to_unicode(s,len))))
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000726
Fred Drake6f987622000-08-25 18:03:30 +0000727VOID_HANDLER(DefaultHandlerExpand,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000728 (void *userData, const XML_Char *s, int len),
729 ("(N)", (conv_string_len_to_unicode(s,len))))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000730
Fred Drake71b63ff2002-06-28 22:29:01 +0000731INT_HANDLER(NotStandalone,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000732 (void *userData),
733 ("()"))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000734
Fred Drake6f987622000-08-25 18:03:30 +0000735RC_HANDLER(int, ExternalEntityRef,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000736 (XML_Parser parser,
737 const XML_Char *context,
738 const XML_Char *base,
739 const XML_Char *systemId,
740 const XML_Char *publicId),
741 int rc=0;,
Fred Drakeb91a36b2002-06-27 19:40:48 +0000742 ("(O&NNN)",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000743 conv_string_to_unicode ,context, string_intern(self, base),
744 string_intern(self, systemId), string_intern(self, publicId)),
745 rc = PyLong_AsLong(rv);, rc,
746 XML_GetUserData(parser))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000747
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000748/* XXX UnknownEncodingHandler */
749
Fred Drake85d835f2001-02-08 15:39:08 +0000750VOID_HANDLER(StartDoctypeDecl,
751 (void *userData, const XML_Char *doctypeName,
752 const XML_Char *sysid, const XML_Char *pubid,
753 int has_internal_subset),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000754 ("(NNNi)", string_intern(self, doctypeName),
755 string_intern(self, sysid), string_intern(self, pubid),
Fred Drake85d835f2001-02-08 15:39:08 +0000756 has_internal_subset))
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000757
758VOID_HANDLER(EndDoctypeDecl, (void *userData), ("()"))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000759
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000760/* ---------------------------------------------------------------- */
761
Fred Drake71b63ff2002-06-28 22:29:01 +0000762static PyObject *
763get_parse_result(xmlparseobject *self, int rv)
764{
765 if (PyErr_Occurred()) {
766 return NULL;
767 }
768 if (rv == 0) {
Martin v. Löwis069dde22003-01-21 10:58:18 +0000769 return set_error(self, XML_GetErrorCode(self->itself));
Fred Drake71b63ff2002-06-28 22:29:01 +0000770 }
Fred Drake2a3d7db2002-06-28 22:56:48 +0000771 if (flush_character_buffer(self) < 0) {
772 return NULL;
773 }
Christian Heimes217cfd12007-12-02 14:31:20 +0000774 return PyLong_FromLong(rv);
Fred Drake71b63ff2002-06-28 22:29:01 +0000775}
776
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000777PyDoc_STRVAR(xmlparse_Parse__doc__,
Thomas Wouters35317302000-07-22 16:34:15 +0000778"Parse(data[, isfinal])\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000779Parse XML data. `isfinal' should be true at end of input.");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000780
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200781#define MAX_CHUNK_SIZE (1 << 20)
782
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000783static PyObject *
Fred Drake0582df92000-07-12 04:49:00 +0000784xmlparse_Parse(xmlparseobject *self, PyObject *args)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000785{
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200786 PyObject *data;
Fred Drake0582df92000-07-12 04:49:00 +0000787 int isFinal = 0;
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200788 const char *s;
789 Py_ssize_t slen;
790 Py_buffer view;
791 int rc;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000792
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200793 if (!PyArg_ParseTuple(args, "O|i:Parse", &data, &isFinal))
Fred Drake0582df92000-07-12 04:49:00 +0000794 return NULL;
Fred Drake71b63ff2002-06-28 22:29:01 +0000795
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200796 if (PyUnicode_Check(data)) {
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200797 view.buf = NULL;
Serhiy Storchaka36b365c2013-02-04 18:28:01 +0200798 s = PyUnicode_AsUTF8AndSize(data, &slen);
799 if (s == NULL)
800 return NULL;
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200801 /* Explicitly set UTF-8 encoding. Return code ignored. */
802 (void)XML_SetEncoding(self->itself, "utf-8");
803 }
804 else {
805 if (PyObject_GetBuffer(data, &view, PyBUF_SIMPLE) < 0)
806 return NULL;
807 s = view.buf;
808 slen = view.len;
809 }
810
811 while (slen > MAX_CHUNK_SIZE) {
812 rc = XML_Parse(self->itself, s, MAX_CHUNK_SIZE, 0);
813 if (!rc)
814 goto done;
815 s += MAX_CHUNK_SIZE;
816 slen -= MAX_CHUNK_SIZE;
817 }
818 rc = XML_Parse(self->itself, s, slen, isFinal);
819
820done:
821 if (view.buf != NULL)
822 PyBuffer_Release(&view);
823 return get_parse_result(self, rc);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000824}
825
Fred Drakeca1f4262000-09-21 20:10:23 +0000826/* File reading copied from cPickle */
827
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000828#define BUF_SIZE 2048
829
Fred Drake0582df92000-07-12 04:49:00 +0000830static int
831readinst(char *buf, int buf_size, PyObject *meth)
832{
Victor Stinner95f1dfc2011-01-10 23:00:36 +0000833 PyObject *str;
834 Py_ssize_t len;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000835 char *ptr;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000836
Victor Stinner95f1dfc2011-01-10 23:00:36 +0000837 str = PyObject_CallFunction(meth, "n", buf_size);
Martin v. Löwis9171f022004-10-13 19:50:11 +0000838 if (str == NULL)
Victor Stinner95f1dfc2011-01-10 23:00:36 +0000839 goto error;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000840
Christian Heimes72b710a2008-05-26 13:28:38 +0000841 if (PyBytes_Check(str))
842 ptr = PyBytes_AS_STRING(str);
Christian Heimes9c4756e2008-05-26 13:22:05 +0000843 else if (PyByteArray_Check(str))
844 ptr = PyByteArray_AS_STRING(str);
Guido van Rossum98297ee2007-11-06 21:34:58 +0000845 else {
Fred Drake71b63ff2002-06-28 22:29:01 +0000846 PyErr_Format(PyExc_TypeError,
Guido van Rossum4ca94712007-07-23 17:42:32 +0000847 "read() did not return a bytes object (type=%.400s)",
Christian Heimes90aa7642007-12-19 02:45:37 +0000848 Py_TYPE(str)->tp_name);
Victor Stinner95f1dfc2011-01-10 23:00:36 +0000849 goto error;
Fred Drake0582df92000-07-12 04:49:00 +0000850 }
Christian Heimes90aa7642007-12-19 02:45:37 +0000851 len = Py_SIZE(str);
Fred Drake0582df92000-07-12 04:49:00 +0000852 if (len > buf_size) {
853 PyErr_Format(PyExc_ValueError,
854 "read() returned too much data: "
Victor Stinner9d6f9362011-01-04 22:00:04 +0000855 "%i bytes requested, %zd returned",
Fred Drake0582df92000-07-12 04:49:00 +0000856 buf_size, len);
Victor Stinner95f1dfc2011-01-10 23:00:36 +0000857 goto error;
Fred Drake0582df92000-07-12 04:49:00 +0000858 }
Guido van Rossum98297ee2007-11-06 21:34:58 +0000859 memcpy(buf, ptr, len);
Victor Stinner95f1dfc2011-01-10 23:00:36 +0000860 Py_DECREF(str);
861 /* len <= buf_size <= INT_MAX */
Victor Stinner0fcab4a2011-01-04 12:59:15 +0000862 return (int)len;
Victor Stinner95f1dfc2011-01-10 23:00:36 +0000863
864error:
865 Py_XDECREF(str);
866 return -1;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000867}
868
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000869PyDoc_STRVAR(xmlparse_ParseFile__doc__,
Thomas Wouters35317302000-07-22 16:34:15 +0000870"ParseFile(file)\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000871Parse XML data from file-like object.");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000872
873static PyObject *
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000874xmlparse_ParseFile(xmlparseobject *self, PyObject *f)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000875{
Fred Drake0582df92000-07-12 04:49:00 +0000876 int rv = 1;
Fred Drake0582df92000-07-12 04:49:00 +0000877 PyObject *readmethod = NULL;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +0200878 _Py_IDENTIFIER(read);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000879
Martin v. Löwis1ee1b6f2011-10-10 18:11:30 +0200880 readmethod = _PyObject_GetAttrId(f, &PyId_read);
Benjamin Peterson4e7f2852010-08-08 16:54:58 +0000881 if (readmethod == NULL) {
Benjamin Peterson4e7f2852010-08-08 16:54:58 +0000882 PyErr_SetString(PyExc_TypeError,
883 "argument must have 'read' attribute");
884 return NULL;
Fred Drake0582df92000-07-12 04:49:00 +0000885 }
886 for (;;) {
887 int bytes_read;
888 void *buf = XML_GetBuffer(self->itself, BUF_SIZE);
Fred Drake7b6caff2003-07-21 17:05:56 +0000889 if (buf == NULL) {
Fred Drakef239c6d2003-07-21 17:22:43 +0000890 Py_XDECREF(readmethod);
Fred Drake0582df92000-07-12 04:49:00 +0000891 return PyErr_NoMemory();
Fred Drake7b6caff2003-07-21 17:05:56 +0000892 }
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000893
Benjamin Peterson4e7f2852010-08-08 16:54:58 +0000894 bytes_read = readinst(buf, BUF_SIZE, readmethod);
895 if (bytes_read < 0) {
896 Py_DECREF(readmethod);
897 return NULL;
Fred Drake0582df92000-07-12 04:49:00 +0000898 }
899 rv = XML_ParseBuffer(self->itself, bytes_read, bytes_read == 0);
Fred Drake7b6caff2003-07-21 17:05:56 +0000900 if (PyErr_Occurred()) {
901 Py_XDECREF(readmethod);
Fred Drake0582df92000-07-12 04:49:00 +0000902 return NULL;
Fred Drake7b6caff2003-07-21 17:05:56 +0000903 }
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000904
Fred Drake0582df92000-07-12 04:49:00 +0000905 if (!rv || bytes_read == 0)
906 break;
907 }
Fred Drake7b6caff2003-07-21 17:05:56 +0000908 Py_XDECREF(readmethod);
Fred Drake71b63ff2002-06-28 22:29:01 +0000909 return get_parse_result(self, rv);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000910}
911
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000912PyDoc_STRVAR(xmlparse_SetBase__doc__,
Thomas Wouters35317302000-07-22 16:34:15 +0000913"SetBase(base_url)\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000914Set the base URL for the parser.");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000915
916static PyObject *
Fred Drake0582df92000-07-12 04:49:00 +0000917xmlparse_SetBase(xmlparseobject *self, PyObject *args)
918{
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000919 char *base;
920
Fred Drake0582df92000-07-12 04:49:00 +0000921 if (!PyArg_ParseTuple(args, "s:SetBase", &base))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000922 return NULL;
Fred Drake0582df92000-07-12 04:49:00 +0000923 if (!XML_SetBase(self->itself, base)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000924 return PyErr_NoMemory();
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000925 }
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000926 Py_INCREF(Py_None);
927 return Py_None;
928}
929
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000930PyDoc_STRVAR(xmlparse_GetBase__doc__,
Thomas Wouters35317302000-07-22 16:34:15 +0000931"GetBase() -> url\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000932Return base URL string for the parser.");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000933
934static PyObject *
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000935xmlparse_GetBase(xmlparseobject *self, PyObject *unused)
Fred Drake0582df92000-07-12 04:49:00 +0000936{
Fred Drake0582df92000-07-12 04:49:00 +0000937 return Py_BuildValue("z", XML_GetBase(self->itself));
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000938}
939
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000940PyDoc_STRVAR(xmlparse_GetInputContext__doc__,
Fred Drakebd6101c2001-02-14 18:29:45 +0000941"GetInputContext() -> string\n\
942Return the untranslated text of the input that caused the current event.\n\
943If the event was generated by a large amount of text (such as a start tag\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000944for an element with many attributes), not all of the text may be available.");
Fred Drakebd6101c2001-02-14 18:29:45 +0000945
946static PyObject *
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000947xmlparse_GetInputContext(xmlparseobject *self, PyObject *unused)
Fred Drakebd6101c2001-02-14 18:29:45 +0000948{
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000949 if (self->in_callback) {
950 int offset, size;
951 const char *buffer
952 = XML_GetInputContext(self->itself, &offset, &size);
Fred Drakebd6101c2001-02-14 18:29:45 +0000953
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000954 if (buffer != NULL)
Christian Heimes72b710a2008-05-26 13:28:38 +0000955 return PyBytes_FromStringAndSize(buffer + offset,
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000956 size - offset);
957 else
958 Py_RETURN_NONE;
Fred Drakebd6101c2001-02-14 18:29:45 +0000959 }
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000960 else
961 Py_RETURN_NONE;
Fred Drakebd6101c2001-02-14 18:29:45 +0000962}
Fred Drakebd6101c2001-02-14 18:29:45 +0000963
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000964PyDoc_STRVAR(xmlparse_ExternalEntityParserCreate__doc__,
Fred Drake2d4ac202001-01-03 15:36:25 +0000965"ExternalEntityParserCreate(context[, encoding])\n\
Tim Peters51dc9682000-09-24 22:12:45 +0000966Create a parser for parsing an external entity based on the\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000967information passed to the ExternalEntityRefHandler.");
Lars Gustäbel4a30a072000-09-24 20:50:52 +0000968
969static PyObject *
970xmlparse_ExternalEntityParserCreate(xmlparseobject *self, PyObject *args)
971{
972 char *context;
973 char *encoding = NULL;
974 xmlparseobject *new_parser;
975 int i;
976
Martin v. Löwisc57428d2001-09-19 09:55:09 +0000977 if (!PyArg_ParseTuple(args, "z|s:ExternalEntityParserCreate",
Fred Drakecde79132001-04-25 16:01:30 +0000978 &context, &encoding)) {
979 return NULL;
Lars Gustäbel4a30a072000-09-24 20:50:52 +0000980 }
981
Martin v. Löwis894258c2001-09-23 10:20:10 +0000982 new_parser = PyObject_GC_New(xmlparseobject, &Xmlparsetype);
Fred Drake85d835f2001-02-08 15:39:08 +0000983 if (new_parser == NULL)
984 return NULL;
Fred Drake2a3d7db2002-06-28 22:56:48 +0000985 new_parser->buffer_size = self->buffer_size;
986 new_parser->buffer_used = 0;
Victor Stinnerb4ba9862010-09-10 22:25:19 +0000987 new_parser->buffer = NULL;
Fred Drake85d835f2001-02-08 15:39:08 +0000988 new_parser->ordered_attributes = self->ordered_attributes;
989 new_parser->specified_attributes = self->specified_attributes;
Fred Drakebd6101c2001-02-14 18:29:45 +0000990 new_parser->in_callback = 0;
Martin v. Löwis069dde22003-01-21 10:58:18 +0000991 new_parser->ns_prefixes = self->ns_prefixes;
Lars Gustäbel4a30a072000-09-24 20:50:52 +0000992 new_parser->itself = XML_ExternalEntityParserCreate(self->itself, context,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000993 encoding);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000994 new_parser->handlers = 0;
Fred Drakeb91a36b2002-06-27 19:40:48 +0000995 new_parser->intern = self->intern;
996 Py_XINCREF(new_parser->intern);
Martin v. Löwis894258c2001-09-23 10:20:10 +0000997 PyObject_GC_Track(new_parser);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000998
Victor Stinnerb4ba9862010-09-10 22:25:19 +0000999 if (self->buffer != NULL) {
1000 new_parser->buffer = malloc(new_parser->buffer_size);
1001 if (new_parser->buffer == NULL) {
1002 Py_DECREF(new_parser);
1003 return PyErr_NoMemory();
1004 }
1005 }
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001006 if (!new_parser->itself) {
Fred Drake85d835f2001-02-08 15:39:08 +00001007 Py_DECREF(new_parser);
1008 return PyErr_NoMemory();
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001009 }
1010
1011 XML_SetUserData(new_parser->itself, (void *)new_parser);
1012
1013 /* allocate and clear handlers first */
Fred Drake2a3d7db2002-06-28 22:56:48 +00001014 for (i = 0; handler_info[i].name != NULL; i++)
Fred Drake85d835f2001-02-08 15:39:08 +00001015 /* do nothing */;
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001016
Fred Drake2a3d7db2002-06-28 22:56:48 +00001017 new_parser->handlers = malloc(sizeof(PyObject *) * i);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001018 if (!new_parser->handlers) {
Fred Drake85d835f2001-02-08 15:39:08 +00001019 Py_DECREF(new_parser);
1020 return PyErr_NoMemory();
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001021 }
Martin v. Löwis5b68ce32001-10-21 08:53:52 +00001022 clear_handlers(new_parser, 1);
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001023
1024 /* then copy handlers from self */
1025 for (i = 0; handler_info[i].name != NULL; i++) {
Fred Drake71b63ff2002-06-28 22:29:01 +00001026 PyObject *handler = self->handlers[i];
1027 if (handler != NULL) {
1028 Py_INCREF(handler);
1029 new_parser->handlers[i] = handler;
1030 handler_info[i].setter(new_parser->itself,
Fred Drake85d835f2001-02-08 15:39:08 +00001031 handler_info[i].handler);
1032 }
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001033 }
Fred Drake71b63ff2002-06-28 22:29:01 +00001034 return (PyObject *)new_parser;
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001035}
1036
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001037PyDoc_STRVAR(xmlparse_SetParamEntityParsing__doc__,
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001038"SetParamEntityParsing(flag) -> success\n\
1039Controls parsing of parameter entities (including the external DTD\n\
1040subset). Possible flag values are XML_PARAM_ENTITY_PARSING_NEVER,\n\
1041XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE and\n\
1042XML_PARAM_ENTITY_PARSING_ALWAYS. Returns true if setting the flag\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001043was successful.");
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001044
1045static PyObject*
Fred Drakebd6101c2001-02-14 18:29:45 +00001046xmlparse_SetParamEntityParsing(xmlparseobject *p, PyObject* args)
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001047{
Fred Drake85d835f2001-02-08 15:39:08 +00001048 int flag;
1049 if (!PyArg_ParseTuple(args, "i", &flag))
1050 return NULL;
Fred Drakebd6101c2001-02-14 18:29:45 +00001051 flag = XML_SetParamEntityParsing(p->itself, flag);
Christian Heimes217cfd12007-12-02 14:31:20 +00001052 return PyLong_FromLong(flag);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001053}
1054
Martin v. Löwisc847f402003-01-21 11:09:21 +00001055
1056#if XML_COMBINED_VERSION >= 19505
Martin v. Löwis069dde22003-01-21 10:58:18 +00001057PyDoc_STRVAR(xmlparse_UseForeignDTD__doc__,
1058"UseForeignDTD([flag])\n\
1059Allows the application to provide an artificial external subset if one is\n\
1060not specified as part of the document instance. This readily allows the\n\
1061use of a 'default' document type controlled by the application, while still\n\
1062getting the advantage of providing document type information to the parser.\n\
1063'flag' defaults to True if not provided.");
1064
1065static PyObject *
1066xmlparse_UseForeignDTD(xmlparseobject *self, PyObject *args)
1067{
Antoine Pitrou6f430e42012-08-15 23:18:25 +02001068 int flag = 1;
Martin v. Löwis069dde22003-01-21 10:58:18 +00001069 enum XML_Error rc;
Georg Brandld37b9d72012-09-24 13:41:52 +02001070 if (!PyArg_ParseTuple(args, "|p:UseForeignDTD", &flag))
Martin v. Löwis069dde22003-01-21 10:58:18 +00001071 return NULL;
Antoine Pitrou6f430e42012-08-15 23:18:25 +02001072 rc = XML_UseForeignDTD(self->itself, flag ? XML_TRUE : XML_FALSE);
Martin v. Löwis069dde22003-01-21 10:58:18 +00001073 if (rc != XML_ERROR_NONE) {
1074 return set_error(self, rc);
1075 }
1076 Py_INCREF(Py_None);
1077 return Py_None;
1078}
Martin v. Löwisc847f402003-01-21 11:09:21 +00001079#endif
Martin v. Löwis069dde22003-01-21 10:58:18 +00001080
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001081static PyObject *xmlparse_dir(PyObject *self, PyObject* noargs);
1082
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001083static struct PyMethodDef xmlparse_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001084 {"Parse", (PyCFunction)xmlparse_Parse,
1085 METH_VARARGS, xmlparse_Parse__doc__},
Fred Drake0582df92000-07-12 04:49:00 +00001086 {"ParseFile", (PyCFunction)xmlparse_ParseFile,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001087 METH_O, xmlparse_ParseFile__doc__},
Fred Drake0582df92000-07-12 04:49:00 +00001088 {"SetBase", (PyCFunction)xmlparse_SetBase,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001089 METH_VARARGS, xmlparse_SetBase__doc__},
Fred Drake0582df92000-07-12 04:49:00 +00001090 {"GetBase", (PyCFunction)xmlparse_GetBase,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001091 METH_NOARGS, xmlparse_GetBase__doc__},
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001092 {"ExternalEntityParserCreate", (PyCFunction)xmlparse_ExternalEntityParserCreate,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001093 METH_VARARGS, xmlparse_ExternalEntityParserCreate__doc__},
Fred Drakebd6101c2001-02-14 18:29:45 +00001094 {"SetParamEntityParsing", (PyCFunction)xmlparse_SetParamEntityParsing,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001095 METH_VARARGS, xmlparse_SetParamEntityParsing__doc__},
Fred Drakebd6101c2001-02-14 18:29:45 +00001096 {"GetInputContext", (PyCFunction)xmlparse_GetInputContext,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001097 METH_NOARGS, xmlparse_GetInputContext__doc__},
Martin v. Löwisc847f402003-01-21 11:09:21 +00001098#if XML_COMBINED_VERSION >= 19505
Martin v. Löwis069dde22003-01-21 10:58:18 +00001099 {"UseForeignDTD", (PyCFunction)xmlparse_UseForeignDTD,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001100 METH_VARARGS, xmlparse_UseForeignDTD__doc__},
Martin v. Löwisc847f402003-01-21 11:09:21 +00001101#endif
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001102 {"__dir__", xmlparse_dir, METH_NOARGS},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001103 {NULL, NULL} /* sentinel */
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001104};
1105
1106/* ---------- */
1107
1108
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001109
Fred Drake71b63ff2002-06-28 22:29:01 +00001110/* pyexpat international encoding support.
1111 Make it as simple as possible.
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001112*/
1113
Fred Drake71b63ff2002-06-28 22:29:01 +00001114static int
1115PyUnknownEncodingHandler(void *encodingHandlerData,
1116 const XML_Char *name,
1117 XML_Encoding *info)
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001118{
Eli Bendersky6dc32b32013-05-25 05:25:48 -07001119 static unsigned char template_buffer[256] = {0};
1120 PyObject* u;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001121 int i;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001122 void *data;
Eli Bendersky6dc32b32013-05-25 05:25:48 -07001123 unsigned int kind;
Fred Drake71b63ff2002-06-28 22:29:01 +00001124
Eli Bendersky6dc32b32013-05-25 05:25:48 -07001125 if (template_buffer[1] == 0) {
1126 for (i = 0; i < 256; i++)
1127 template_buffer[i] = i;
Tim Peters63cb99e2001-02-17 18:12:50 +00001128 }
Eli Bendersky6dc32b32013-05-25 05:25:48 -07001129
1130 u = PyUnicode_Decode((char*) template_buffer, 256, name, "replace");
1131 if (u == NULL || PyUnicode_READY(u))
1132 return XML_STATUS_ERROR;
1133
1134 if (PyUnicode_GET_LENGTH(u) != 256) {
1135 Py_DECREF(u);
1136 PyErr_SetString(PyExc_ValueError,
1137 "multi-byte encodings are not supported");
1138 return XML_STATUS_ERROR;
1139 }
1140
1141 kind = PyUnicode_KIND(u);
1142 data = PyUnicode_DATA(u);
1143 for (i = 0; i < 256; i++) {
1144 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
1145 if (ch != Py_UNICODE_REPLACEMENT_CHARACTER)
1146 info->map[i] = ch;
1147 else
1148 info->map[i] = -1;
1149 }
1150
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001151 info->data = NULL;
1152 info->convert = NULL;
1153 info->release = NULL;
Eli Bendersky6dc32b32013-05-25 05:25:48 -07001154 Py_DECREF(u);
1155
1156 return XML_STATUS_OK;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001157}
1158
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001159
1160static PyObject *
Fred Drakeb91a36b2002-06-27 19:40:48 +00001161newxmlparseobject(char *encoding, char *namespace_separator, PyObject *intern)
Fred Drake0582df92000-07-12 04:49:00 +00001162{
1163 int i;
1164 xmlparseobject *self;
Fred Drake71b63ff2002-06-28 22:29:01 +00001165
Martin v. Löwis894258c2001-09-23 10:20:10 +00001166 self = PyObject_GC_New(xmlparseobject, &Xmlparsetype);
Fred Drake0582df92000-07-12 04:49:00 +00001167 if (self == NULL)
1168 return NULL;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001169
Fred Drake2a3d7db2002-06-28 22:56:48 +00001170 self->buffer = NULL;
1171 self->buffer_size = CHARACTER_DATA_BUFFER_SIZE;
1172 self->buffer_used = 0;
Fred Drake85d835f2001-02-08 15:39:08 +00001173 self->ordered_attributes = 0;
1174 self->specified_attributes = 0;
Fred Drakebd6101c2001-02-14 18:29:45 +00001175 self->in_callback = 0;
Martin v. Löwis069dde22003-01-21 10:58:18 +00001176 self->ns_prefixes = 0;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001177 self->handlers = NULL;
Fred Drakecde79132001-04-25 16:01:30 +00001178 if (namespace_separator != NULL) {
Fred Drake0582df92000-07-12 04:49:00 +00001179 self->itself = XML_ParserCreateNS(encoding, *namespace_separator);
1180 }
Fred Drake85d835f2001-02-08 15:39:08 +00001181 else {
Fred Drake0582df92000-07-12 04:49:00 +00001182 self->itself = XML_ParserCreate(encoding);
1183 }
Gregory P. Smith25227712012-03-14 18:10:37 -07001184#if ((XML_MAJOR_VERSION >= 2) && (XML_MINOR_VERSION >= 1)) || defined(XML_HAS_SET_HASH_SALT)
1185 /* This feature was added upstream in libexpat 2.1.0. Our expat copy
1186 * has a backport of this feature where we also define XML_HAS_SET_HASH_SALT
1187 * to indicate that we can still use it. */
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07001188 XML_SetHashSalt(self->itself,
1189 (unsigned long)_Py_HashSecret.prefix);
Gregory P. Smith25227712012-03-14 18:10:37 -07001190#endif
Fred Drakeb91a36b2002-06-27 19:40:48 +00001191 self->intern = intern;
1192 Py_XINCREF(self->intern);
Martin v. Löwis894258c2001-09-23 10:20:10 +00001193 PyObject_GC_Track(self);
Fred Drake0582df92000-07-12 04:49:00 +00001194 if (self->itself == NULL) {
Fred Drake71b63ff2002-06-28 22:29:01 +00001195 PyErr_SetString(PyExc_RuntimeError,
Fred Drake0582df92000-07-12 04:49:00 +00001196 "XML_ParserCreate failed");
1197 Py_DECREF(self);
1198 return NULL;
1199 }
1200 XML_SetUserData(self->itself, (void *)self);
Fred Drake7c75bf22002-07-01 14:02:31 +00001201 XML_SetUnknownEncodingHandler(self->itself,
1202 (XML_UnknownEncodingHandler) PyUnknownEncodingHandler, NULL);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001203
Fred Drake2a3d7db2002-06-28 22:56:48 +00001204 for (i = 0; handler_info[i].name != NULL; i++)
Fred Drake0582df92000-07-12 04:49:00 +00001205 /* do nothing */;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001206
Fred Drake7c75bf22002-07-01 14:02:31 +00001207 self->handlers = malloc(sizeof(PyObject *) * i);
1208 if (!self->handlers) {
Fred Drake71b63ff2002-06-28 22:29:01 +00001209 Py_DECREF(self);
1210 return PyErr_NoMemory();
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001211 }
Martin v. Löwis5b68ce32001-10-21 08:53:52 +00001212 clear_handlers(self, 1);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001213
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001214 return (PyObject*)self;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001215}
1216
1217
1218static void
Fred Drake0582df92000-07-12 04:49:00 +00001219xmlparse_dealloc(xmlparseobject *self)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001220{
Fred Drake0582df92000-07-12 04:49:00 +00001221 int i;
Martin v. Löwis894258c2001-09-23 10:20:10 +00001222 PyObject_GC_UnTrack(self);
Fred Drake85d835f2001-02-08 15:39:08 +00001223 if (self->itself != NULL)
Fred Drake0582df92000-07-12 04:49:00 +00001224 XML_ParserFree(self->itself);
1225 self->itself = NULL;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001226
Fred Drake85d835f2001-02-08 15:39:08 +00001227 if (self->handlers != NULL) {
Fred Drakecde79132001-04-25 16:01:30 +00001228 PyObject *temp;
Fred Drake85d835f2001-02-08 15:39:08 +00001229 for (i = 0; handler_info[i].name != NULL; i++) {
Fred Drakecde79132001-04-25 16:01:30 +00001230 temp = self->handlers[i];
1231 self->handlers[i] = NULL;
1232 Py_XDECREF(temp);
Fred Drake85d835f2001-02-08 15:39:08 +00001233 }
1234 free(self->handlers);
Fred Drake71b63ff2002-06-28 22:29:01 +00001235 self->handlers = NULL;
Fred Drake0582df92000-07-12 04:49:00 +00001236 }
Fred Drake2a3d7db2002-06-28 22:56:48 +00001237 if (self->buffer != NULL) {
1238 free(self->buffer);
1239 self->buffer = NULL;
1240 }
Fred Drakeb91a36b2002-06-27 19:40:48 +00001241 Py_XDECREF(self->intern);
Martin v. Löwis894258c2001-09-23 10:20:10 +00001242 PyObject_GC_Del(self);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001243}
1244
Fred Drake0582df92000-07-12 04:49:00 +00001245static int
Alexander Belopolskye239d232010-12-08 23:31:48 +00001246handlername2int(PyObject *name)
Fred Drake0582df92000-07-12 04:49:00 +00001247{
1248 int i;
Fred Drake71b63ff2002-06-28 22:29:01 +00001249 for (i = 0; handler_info[i].name != NULL; i++) {
Alexander Belopolskye239d232010-12-08 23:31:48 +00001250 if (PyUnicode_CompareWithASCIIString(
1251 name, handler_info[i].name) == 0) {
Fred Drake0582df92000-07-12 04:49:00 +00001252 return i;
1253 }
1254 }
1255 return -1;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001256}
1257
1258static PyObject *
Fred Drake71b63ff2002-06-28 22:29:01 +00001259get_pybool(int istrue)
1260{
1261 PyObject *result = istrue ? Py_True : Py_False;
1262 Py_INCREF(result);
1263 return result;
1264}
1265
1266static PyObject *
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001267xmlparse_getattro(xmlparseobject *self, PyObject *nameobj)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001268{
Victor Stinner9e5bd6c2011-10-01 01:05:40 +02001269 Py_UCS4 first_char;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001270 int handlernum = -1;
1271
Alexander Belopolskye239d232010-12-08 23:31:48 +00001272 if (!PyUnicode_Check(nameobj))
1273 goto generic;
Victor Stinner9e5bd6c2011-10-01 01:05:40 +02001274 if (PyUnicode_READY(nameobj))
1275 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001276
Alexander Belopolskye239d232010-12-08 23:31:48 +00001277 handlernum = handlername2int(nameobj);
Fred Drake71b63ff2002-06-28 22:29:01 +00001278
1279 if (handlernum != -1) {
1280 PyObject *result = self->handlers[handlernum];
1281 if (result == NULL)
1282 result = Py_None;
1283 Py_INCREF(result);
1284 return result;
1285 }
Alexander Belopolskye239d232010-12-08 23:31:48 +00001286
Victor Stinner9e5bd6c2011-10-01 01:05:40 +02001287 first_char = PyUnicode_READ_CHAR(nameobj, 0);
1288 if (first_char == 'E') {
Alexander Belopolskye239d232010-12-08 23:31:48 +00001289 if (PyUnicode_CompareWithASCIIString(nameobj, "ErrorCode") == 0)
Christian Heimes217cfd12007-12-02 14:31:20 +00001290 return PyLong_FromLong((long)
Fred Drake71b63ff2002-06-28 22:29:01 +00001291 XML_GetErrorCode(self->itself));
Alexander Belopolskye239d232010-12-08 23:31:48 +00001292 if (PyUnicode_CompareWithASCIIString(nameobj, "ErrorLineNumber") == 0)
Christian Heimes217cfd12007-12-02 14:31:20 +00001293 return PyLong_FromLong((long)
Fred Drake71b63ff2002-06-28 22:29:01 +00001294 XML_GetErrorLineNumber(self->itself));
Alexander Belopolskye239d232010-12-08 23:31:48 +00001295 if (PyUnicode_CompareWithASCIIString(nameobj, "ErrorColumnNumber") == 0)
Christian Heimes217cfd12007-12-02 14:31:20 +00001296 return PyLong_FromLong((long)
Fred Drake71b63ff2002-06-28 22:29:01 +00001297 XML_GetErrorColumnNumber(self->itself));
Alexander Belopolskye239d232010-12-08 23:31:48 +00001298 if (PyUnicode_CompareWithASCIIString(nameobj, "ErrorByteIndex") == 0)
Christian Heimes217cfd12007-12-02 14:31:20 +00001299 return PyLong_FromLong((long)
Fred Drake71b63ff2002-06-28 22:29:01 +00001300 XML_GetErrorByteIndex(self->itself));
1301 }
Victor Stinner9e5bd6c2011-10-01 01:05:40 +02001302 if (first_char == 'C') {
Alexander Belopolskye239d232010-12-08 23:31:48 +00001303 if (PyUnicode_CompareWithASCIIString(nameobj, "CurrentLineNumber") == 0)
Christian Heimes217cfd12007-12-02 14:31:20 +00001304 return PyLong_FromLong((long)
Dave Cole3203efb2004-08-26 00:37:31 +00001305 XML_GetCurrentLineNumber(self->itself));
Alexander Belopolskye239d232010-12-08 23:31:48 +00001306 if (PyUnicode_CompareWithASCIIString(nameobj, "CurrentColumnNumber") == 0)
Christian Heimes217cfd12007-12-02 14:31:20 +00001307 return PyLong_FromLong((long)
Dave Cole3203efb2004-08-26 00:37:31 +00001308 XML_GetCurrentColumnNumber(self->itself));
Alexander Belopolskye239d232010-12-08 23:31:48 +00001309 if (PyUnicode_CompareWithASCIIString(nameobj, "CurrentByteIndex") == 0)
Christian Heimes217cfd12007-12-02 14:31:20 +00001310 return PyLong_FromLong((long)
Dave Cole3203efb2004-08-26 00:37:31 +00001311 XML_GetCurrentByteIndex(self->itself));
1312 }
Victor Stinner9e5bd6c2011-10-01 01:05:40 +02001313 if (first_char == 'b') {
Alexander Belopolskye239d232010-12-08 23:31:48 +00001314 if (PyUnicode_CompareWithASCIIString(nameobj, "buffer_size") == 0)
Christian Heimes217cfd12007-12-02 14:31:20 +00001315 return PyLong_FromLong((long) self->buffer_size);
Alexander Belopolskye239d232010-12-08 23:31:48 +00001316 if (PyUnicode_CompareWithASCIIString(nameobj, "buffer_text") == 0)
Fred Drake2a3d7db2002-06-28 22:56:48 +00001317 return get_pybool(self->buffer != NULL);
Alexander Belopolskye239d232010-12-08 23:31:48 +00001318 if (PyUnicode_CompareWithASCIIString(nameobj, "buffer_used") == 0)
Christian Heimes217cfd12007-12-02 14:31:20 +00001319 return PyLong_FromLong((long) self->buffer_used);
Fred Drake2a3d7db2002-06-28 22:56:48 +00001320 }
Alexander Belopolskye239d232010-12-08 23:31:48 +00001321 if (PyUnicode_CompareWithASCIIString(nameobj, "namespace_prefixes") == 0)
Martin v. Löwis069dde22003-01-21 10:58:18 +00001322 return get_pybool(self->ns_prefixes);
Alexander Belopolskye239d232010-12-08 23:31:48 +00001323 if (PyUnicode_CompareWithASCIIString(nameobj, "ordered_attributes") == 0)
Fred Drake71b63ff2002-06-28 22:29:01 +00001324 return get_pybool(self->ordered_attributes);
Alexander Belopolskye239d232010-12-08 23:31:48 +00001325 if (PyUnicode_CompareWithASCIIString(nameobj, "specified_attributes") == 0)
Fred Drake71b63ff2002-06-28 22:29:01 +00001326 return get_pybool((long) self->specified_attributes);
Alexander Belopolskye239d232010-12-08 23:31:48 +00001327 if (PyUnicode_CompareWithASCIIString(nameobj, "intern") == 0) {
Fred Drakeb91a36b2002-06-27 19:40:48 +00001328 if (self->intern == NULL) {
1329 Py_INCREF(Py_None);
1330 return Py_None;
1331 }
1332 else {
1333 Py_INCREF(self->intern);
1334 return self->intern;
1335 }
1336 }
Alexander Belopolskye239d232010-12-08 23:31:48 +00001337 generic:
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001338 return PyObject_GenericGetAttr((PyObject*)self, nameobj);
Neal Norwitz8dfc4a92007-08-11 06:39:53 +00001339}
1340
1341static PyObject *
1342xmlparse_dir(PyObject *self, PyObject* noargs)
1343{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001344#define APPEND(list, str) \
1345 do { \
1346 PyObject *o = PyUnicode_FromString(str); \
1347 if (o != NULL) \
1348 PyList_Append(list, o); \
1349 Py_XDECREF(o); \
Martin v. Löwis069dde22003-01-21 10:58:18 +00001350 } while (0)
Neal Norwitzfa56e2d2003-01-19 15:40:09 +00001351
Neal Norwitz8dfc4a92007-08-11 06:39:53 +00001352 int i;
1353 PyObject *rc = PyList_New(0);
1354 if (!rc)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001355 return NULL;
Neal Norwitz8dfc4a92007-08-11 06:39:53 +00001356 for (i = 0; handler_info[i].name != NULL; i++) {
1357 PyObject *o = get_handler_name(&handler_info[i]);
1358 if (o != NULL)
1359 PyList_Append(rc, o);
1360 Py_XDECREF(o);
1361 }
1362 APPEND(rc, "ErrorCode");
1363 APPEND(rc, "ErrorLineNumber");
1364 APPEND(rc, "ErrorColumnNumber");
1365 APPEND(rc, "ErrorByteIndex");
1366 APPEND(rc, "CurrentLineNumber");
1367 APPEND(rc, "CurrentColumnNumber");
1368 APPEND(rc, "CurrentByteIndex");
1369 APPEND(rc, "buffer_size");
1370 APPEND(rc, "buffer_text");
1371 APPEND(rc, "buffer_used");
1372 APPEND(rc, "namespace_prefixes");
1373 APPEND(rc, "ordered_attributes");
1374 APPEND(rc, "specified_attributes");
1375 APPEND(rc, "intern");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001376
Neal Norwitzfa56e2d2003-01-19 15:40:09 +00001377#undef APPEND
Neal Norwitz8dfc4a92007-08-11 06:39:53 +00001378
1379 if (PyErr_Occurred()) {
1380 Py_DECREF(rc);
1381 rc = NULL;
Fred Drake0582df92000-07-12 04:49:00 +00001382 }
Neal Norwitz8dfc4a92007-08-11 06:39:53 +00001383
1384 return rc;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001385}
1386
Fred Drake6f987622000-08-25 18:03:30 +00001387static int
Alexander Belopolskye239d232010-12-08 23:31:48 +00001388sethandler(xmlparseobject *self, PyObject *name, PyObject* v)
Fred Drake0582df92000-07-12 04:49:00 +00001389{
1390 int handlernum = handlername2int(name);
Fred Drake71b63ff2002-06-28 22:29:01 +00001391 if (handlernum >= 0) {
1392 xmlhandler c_handler = NULL;
1393 PyObject *temp = self->handlers[handlernum];
1394
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001395 if (v == Py_None) {
1396 /* If this is the character data handler, and a character
1397 data handler is already active, we need to be more
1398 careful. What we can safely do is replace the existing
1399 character data handler callback function with a no-op
1400 function that will refuse to call Python. The downside
1401 is that this doesn't completely remove the character
1402 data handler from the C layer if there's any callback
1403 active, so Expat does a little more work than it
1404 otherwise would, but that's really an odd case. A more
1405 elaborate system of handlers and state could remove the
1406 C handler more effectively. */
1407 if (handlernum == CharacterData && self->in_callback)
1408 c_handler = noop_character_data_handler;
Fred Drake71b63ff2002-06-28 22:29:01 +00001409 v = NULL;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001410 }
Fred Drake71b63ff2002-06-28 22:29:01 +00001411 else if (v != NULL) {
1412 Py_INCREF(v);
1413 c_handler = handler_info[handlernum].handler;
1414 }
Fred Drake0582df92000-07-12 04:49:00 +00001415 self->handlers[handlernum] = v;
Fred Drake71b63ff2002-06-28 22:29:01 +00001416 Py_XDECREF(temp);
1417 handler_info[handlernum].setter(self->itself, c_handler);
Fred Drake0582df92000-07-12 04:49:00 +00001418 return 1;
1419 }
1420 return 0;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001421}
1422
1423static int
Alexander Belopolskye239d232010-12-08 23:31:48 +00001424xmlparse_setattro(xmlparseobject *self, PyObject *name, PyObject *v)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001425{
Fred Drake6f987622000-08-25 18:03:30 +00001426 /* Set attribute 'name' to value 'v'. v==NULL means delete */
Fred Drake85d835f2001-02-08 15:39:08 +00001427 if (v == NULL) {
Fred Drake6f987622000-08-25 18:03:30 +00001428 PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
1429 return -1;
1430 }
Alexander Belopolskye239d232010-12-08 23:31:48 +00001431 assert(PyUnicode_Check(name));
1432 if (PyUnicode_CompareWithASCIIString(name, "buffer_text") == 0) {
Antoine Pitrou6f430e42012-08-15 23:18:25 +02001433 int b = PyObject_IsTrue(v);
1434 if (b < 0)
1435 return -1;
1436 if (b) {
Fred Drake2a3d7db2002-06-28 22:56:48 +00001437 if (self->buffer == NULL) {
1438 self->buffer = malloc(self->buffer_size);
1439 if (self->buffer == NULL) {
1440 PyErr_NoMemory();
1441 return -1;
1442 }
1443 self->buffer_used = 0;
1444 }
1445 }
1446 else if (self->buffer != NULL) {
1447 if (flush_character_buffer(self) < 0)
1448 return -1;
1449 free(self->buffer);
1450 self->buffer = NULL;
1451 }
1452 return 0;
1453 }
Alexander Belopolskye239d232010-12-08 23:31:48 +00001454 if (PyUnicode_CompareWithASCIIString(name, "namespace_prefixes") == 0) {
Antoine Pitrou6f430e42012-08-15 23:18:25 +02001455 int b = PyObject_IsTrue(v);
1456 if (b < 0)
1457 return -1;
1458 self->ns_prefixes = b;
Martin v. Löwis069dde22003-01-21 10:58:18 +00001459 XML_SetReturnNSTriplet(self->itself, self->ns_prefixes);
1460 return 0;
1461 }
Alexander Belopolskye239d232010-12-08 23:31:48 +00001462 if (PyUnicode_CompareWithASCIIString(name, "ordered_attributes") == 0) {
Antoine Pitrou6f430e42012-08-15 23:18:25 +02001463 int b = PyObject_IsTrue(v);
1464 if (b < 0)
1465 return -1;
1466 self->ordered_attributes = b;
Fred Drake85d835f2001-02-08 15:39:08 +00001467 return 0;
1468 }
Alexander Belopolskye239d232010-12-08 23:31:48 +00001469 if (PyUnicode_CompareWithASCIIString(name, "specified_attributes") == 0) {
Antoine Pitrou6f430e42012-08-15 23:18:25 +02001470 int b = PyObject_IsTrue(v);
1471 if (b < 0)
1472 return -1;
1473 self->specified_attributes = b;
Fred Drake6f987622000-08-25 18:03:30 +00001474 return 0;
1475 }
Christian Heimes2380ac72008-01-09 00:17:24 +00001476
Alexander Belopolskye239d232010-12-08 23:31:48 +00001477 if (PyUnicode_CompareWithASCIIString(name, "buffer_size") == 0) {
Christian Heimes2380ac72008-01-09 00:17:24 +00001478 long new_buffer_size;
1479 if (!PyLong_Check(v)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001480 PyErr_SetString(PyExc_TypeError, "buffer_size must be an integer");
1481 return -1;
Christian Heimes2380ac72008-01-09 00:17:24 +00001482 }
1483
1484 new_buffer_size=PyLong_AS_LONG(v);
1485 /* trivial case -- no change */
1486 if (new_buffer_size == self->buffer_size) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001487 return 0;
Christian Heimes2380ac72008-01-09 00:17:24 +00001488 }
1489
1490 if (new_buffer_size <= 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001491 PyErr_SetString(PyExc_ValueError, "buffer_size must be greater than zero");
1492 return -1;
Christian Heimes2380ac72008-01-09 00:17:24 +00001493 }
1494
1495 /* check maximum */
1496 if (new_buffer_size > INT_MAX) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001497 char errmsg[100];
1498 sprintf(errmsg, "buffer_size must not be greater than %i", INT_MAX);
1499 PyErr_SetString(PyExc_ValueError, errmsg);
1500 return -1;
Christian Heimes2380ac72008-01-09 00:17:24 +00001501 }
1502
1503 if (self->buffer != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001504 /* there is already a buffer */
1505 if (self->buffer_used != 0) {
1506 flush_character_buffer(self);
1507 }
1508 /* free existing buffer */
1509 free(self->buffer);
Christian Heimes2380ac72008-01-09 00:17:24 +00001510 }
1511 self->buffer = malloc(new_buffer_size);
1512 if (self->buffer == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001513 PyErr_NoMemory();
1514 return -1;
1515 }
Christian Heimes2380ac72008-01-09 00:17:24 +00001516 self->buffer_size = new_buffer_size;
1517 return 0;
1518 }
1519
Alexander Belopolskye239d232010-12-08 23:31:48 +00001520 if (PyUnicode_CompareWithASCIIString(name, "CharacterDataHandler") == 0) {
Fred Drake2a3d7db2002-06-28 22:56:48 +00001521 /* If we're changing the character data handler, flush all
1522 * cached data with the old handler. Not sure there's a
1523 * "right" thing to do, though, but this probably won't
1524 * happen.
1525 */
1526 if (flush_character_buffer(self) < 0)
1527 return -1;
1528 }
Fred Drake6f987622000-08-25 18:03:30 +00001529 if (sethandler(self, name, v)) {
1530 return 0;
1531 }
Alexander Belopolskye239d232010-12-08 23:31:48 +00001532 PyErr_SetObject(PyExc_AttributeError, name);
Fred Drake6f987622000-08-25 18:03:30 +00001533 return -1;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001534}
1535
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001536static int
1537xmlparse_traverse(xmlparseobject *op, visitproc visit, void *arg)
1538{
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001539 int i;
1540 for (i = 0; handler_info[i].name != NULL; i++)
1541 Py_VISIT(op->handlers[i]);
Fred Drakecde79132001-04-25 16:01:30 +00001542 return 0;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001543}
1544
1545static int
1546xmlparse_clear(xmlparseobject *op)
1547{
Martin v. Löwis5b68ce32001-10-21 08:53:52 +00001548 clear_handlers(op, 0);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001549 Py_CLEAR(op->intern);
Fred Drakecde79132001-04-25 16:01:30 +00001550 return 0;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001551}
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001552
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001553PyDoc_STRVAR(Xmlparsetype__doc__, "XML parser");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001554
1555static PyTypeObject Xmlparsetype = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001556 PyVarObject_HEAD_INIT(NULL, 0)
1557 "pyexpat.xmlparser", /*tp_name*/
Antoine Pitrou23683ef2011-01-04 00:00:31 +00001558 sizeof(xmlparseobject), /*tp_basicsize*/
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001559 0, /*tp_itemsize*/
1560 /* methods */
1561 (destructor)xmlparse_dealloc, /*tp_dealloc*/
1562 (printfunc)0, /*tp_print*/
1563 0, /*tp_getattr*/
Alexander Belopolskye239d232010-12-08 23:31:48 +00001564 0, /*tp_setattr*/
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001565 0, /*tp_reserved*/
1566 (reprfunc)0, /*tp_repr*/
1567 0, /*tp_as_number*/
1568 0, /*tp_as_sequence*/
1569 0, /*tp_as_mapping*/
1570 (hashfunc)0, /*tp_hash*/
1571 (ternaryfunc)0, /*tp_call*/
1572 (reprfunc)0, /*tp_str*/
1573 (getattrofunc)xmlparse_getattro, /* tp_getattro */
Alexander Belopolskye239d232010-12-08 23:31:48 +00001574 (setattrofunc)xmlparse_setattro, /* tp_setattro */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001575 0, /* tp_as_buffer */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001576 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /*tp_flags*/
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001577 Xmlparsetype__doc__, /* tp_doc - Documentation string */
1578 (traverseproc)xmlparse_traverse, /* tp_traverse */
1579 (inquiry)xmlparse_clear, /* tp_clear */
1580 0, /* tp_richcompare */
1581 0, /* tp_weaklistoffset */
1582 0, /* tp_iter */
1583 0, /* tp_iternext */
1584 xmlparse_methods, /* tp_methods */
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001585};
1586
1587/* End of code for xmlparser objects */
1588/* -------------------------------------------------------- */
1589
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001590PyDoc_STRVAR(pyexpat_ParserCreate__doc__,
Fred Drake0582df92000-07-12 04:49:00 +00001591"ParserCreate([encoding[, namespace_separator]]) -> parser\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001592Return a new XML parser object.");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001593
1594static PyObject *
Fred Drake0582df92000-07-12 04:49:00 +00001595pyexpat_ParserCreate(PyObject *notused, PyObject *args, PyObject *kw)
1596{
Fred Drakecde79132001-04-25 16:01:30 +00001597 char *encoding = NULL;
1598 char *namespace_separator = NULL;
Fred Drakeb91a36b2002-06-27 19:40:48 +00001599 PyObject *intern = NULL;
1600 PyObject *result;
1601 int intern_decref = 0;
Martin v. Löwis15e62742006-02-27 16:46:16 +00001602 static char *kwlist[] = {"encoding", "namespace_separator",
Jeremy Hyltonaf68c872005-12-10 18:50:16 +00001603 "intern", NULL};
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001604
Fred Drakeb91a36b2002-06-27 19:40:48 +00001605 if (!PyArg_ParseTupleAndKeywords(args, kw, "|zzO:ParserCreate", kwlist,
1606 &encoding, &namespace_separator, &intern))
Fred Drakecde79132001-04-25 16:01:30 +00001607 return NULL;
1608 if (namespace_separator != NULL
1609 && strlen(namespace_separator) > 1) {
1610 PyErr_SetString(PyExc_ValueError,
1611 "namespace_separator must be at most one"
1612 " character, omitted, or None");
1613 return NULL;
1614 }
Fred Drakeb91a36b2002-06-27 19:40:48 +00001615 /* Explicitly passing None means no interning is desired.
1616 Not passing anything means that a new dictionary is used. */
1617 if (intern == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001618 intern = NULL;
Fred Drakeb91a36b2002-06-27 19:40:48 +00001619 else if (intern == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001620 intern = PyDict_New();
1621 if (!intern)
1622 return NULL;
1623 intern_decref = 1;
Fred Drake71b63ff2002-06-28 22:29:01 +00001624 }
Fred Drakeb91a36b2002-06-27 19:40:48 +00001625 else if (!PyDict_Check(intern)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001626 PyErr_SetString(PyExc_TypeError, "intern must be a dictionary");
1627 return NULL;
Fred Drakeb91a36b2002-06-27 19:40:48 +00001628 }
1629
1630 result = newxmlparseobject(encoding, namespace_separator, intern);
1631 if (intern_decref) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001632 Py_DECREF(intern);
Fred Drakeb91a36b2002-06-27 19:40:48 +00001633 }
1634 return result;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001635}
1636
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001637PyDoc_STRVAR(pyexpat_ErrorString__doc__,
Fred Drake0582df92000-07-12 04:49:00 +00001638"ErrorString(errno) -> string\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001639Returns string error for given number.");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001640
1641static PyObject *
Fred Drake0582df92000-07-12 04:49:00 +00001642pyexpat_ErrorString(PyObject *self, PyObject *args)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001643{
Fred Drake0582df92000-07-12 04:49:00 +00001644 long code = 0;
1645
1646 if (!PyArg_ParseTuple(args, "l:ErrorString", &code))
1647 return NULL;
1648 return Py_BuildValue("z", XML_ErrorString((int)code));
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001649}
1650
1651/* List of methods defined in the module */
1652
1653static struct PyMethodDef pyexpat_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001654 {"ParserCreate", (PyCFunction)pyexpat_ParserCreate,
Fred Drake0582df92000-07-12 04:49:00 +00001655 METH_VARARGS|METH_KEYWORDS, pyexpat_ParserCreate__doc__},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001656 {"ErrorString", (PyCFunction)pyexpat_ErrorString,
1657 METH_VARARGS, pyexpat_ErrorString__doc__},
Fred Drake71b63ff2002-06-28 22:29:01 +00001658
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001659 {NULL, (PyCFunction)NULL, 0, NULL} /* sentinel */
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001660};
1661
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +00001662/* Module docstring */
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001663
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001664PyDoc_STRVAR(pyexpat_module_documentation,
1665"Python wrapper for Expat parser.");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001666
Fred Drakecde79132001-04-25 16:01:30 +00001667/* Initialization function for the module */
1668
1669#ifndef MODULE_NAME
1670#define MODULE_NAME "pyexpat"
1671#endif
1672
1673#ifndef MODULE_INITFUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00001674#define MODULE_INITFUNC PyInit_pyexpat
Fred Drakecde79132001-04-25 16:01:30 +00001675#endif
1676
Martin v. Löwis069dde22003-01-21 10:58:18 +00001677#ifndef PyMODINIT_FUNC
1678# ifdef MS_WINDOWS
1679# define PyMODINIT_FUNC __declspec(dllexport) void
1680# else
1681# define PyMODINIT_FUNC void
1682# endif
1683#endif
1684
Mark Hammond8235ea12002-07-19 06:55:41 +00001685PyMODINIT_FUNC MODULE_INITFUNC(void); /* avoid compiler warnings */
Fred Drakecde79132001-04-25 16:01:30 +00001686
Martin v. Löwis1a214512008-06-11 05:26:20 +00001687static struct PyModuleDef pyexpatmodule = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001688 PyModuleDef_HEAD_INIT,
1689 MODULE_NAME,
1690 pyexpat_module_documentation,
1691 -1,
1692 pyexpat_methods,
1693 NULL,
1694 NULL,
1695 NULL,
1696 NULL
Martin v. Löwis1a214512008-06-11 05:26:20 +00001697};
1698
Martin v. Löwis069dde22003-01-21 10:58:18 +00001699PyMODINIT_FUNC
1700MODULE_INITFUNC(void)
Fred Drake0582df92000-07-12 04:49:00 +00001701{
1702 PyObject *m, *d;
Neal Norwitz392c5be2007-08-25 17:20:32 +00001703 PyObject *errmod_name = PyUnicode_FromString(MODULE_NAME ".errors");
Fred Drake85d835f2001-02-08 15:39:08 +00001704 PyObject *errors_module;
1705 PyObject *modelmod_name;
1706 PyObject *model_module;
Fred Drake0582df92000-07-12 04:49:00 +00001707 PyObject *sys_modules;
Georg Brandlb4dac712010-10-15 14:46:48 +00001708 PyObject *tmpnum, *tmpstr;
1709 PyObject *codes_dict;
1710 PyObject *rev_codes_dict;
1711 int res;
Fredrik Lundhd7a42882005-12-13 20:43:04 +00001712 static struct PyExpat_CAPI capi;
Georg Brandlb4dac712010-10-15 14:46:48 +00001713 PyObject *capi_object;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001714
Fred Drake6f987622000-08-25 18:03:30 +00001715 if (errmod_name == NULL)
Martin v. Löwis1a214512008-06-11 05:26:20 +00001716 return NULL;
Neal Norwitz392c5be2007-08-25 17:20:32 +00001717 modelmod_name = PyUnicode_FromString(MODULE_NAME ".model");
Fred Drake85d835f2001-02-08 15:39:08 +00001718 if (modelmod_name == NULL)
Martin v. Löwis1a214512008-06-11 05:26:20 +00001719 return NULL;
Fred Drake6f987622000-08-25 18:03:30 +00001720
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001721 if (PyType_Ready(&Xmlparsetype) < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001722 return NULL;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001723
Fred Drake0582df92000-07-12 04:49:00 +00001724 /* Create the module and add the functions */
Martin v. Löwis1a214512008-06-11 05:26:20 +00001725 m = PyModule_Create(&pyexpatmodule);
Neal Norwitz1ac754f2006-01-19 06:09:39 +00001726 if (m == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001727 return NULL;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001728
Fred Drake0582df92000-07-12 04:49:00 +00001729 /* Add some symbolic constants to the module */
Fred Drakebd6101c2001-02-14 18:29:45 +00001730 if (ErrorObject == NULL) {
1731 ErrorObject = PyErr_NewException("xml.parsers.expat.ExpatError",
Fred Drake93adb692000-09-23 04:55:48 +00001732 NULL, NULL);
Fred Drakebd6101c2001-02-14 18:29:45 +00001733 if (ErrorObject == NULL)
Martin v. Löwis1a214512008-06-11 05:26:20 +00001734 return NULL;
Fred Drakebd6101c2001-02-14 18:29:45 +00001735 }
1736 Py_INCREF(ErrorObject);
Fred Drake93adb692000-09-23 04:55:48 +00001737 PyModule_AddObject(m, "error", ErrorObject);
Fred Drakebd6101c2001-02-14 18:29:45 +00001738 Py_INCREF(ErrorObject);
1739 PyModule_AddObject(m, "ExpatError", ErrorObject);
Fred Drake4ba298c2000-10-29 04:57:53 +00001740 Py_INCREF(&Xmlparsetype);
1741 PyModule_AddObject(m, "XMLParserType", (PyObject *) &Xmlparsetype);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001742
Fred Drake738293d2000-12-21 17:25:07 +00001743 PyModule_AddStringConstant(m, "EXPAT_VERSION",
1744 (char *) XML_ExpatVersion());
Fred Drake85d835f2001-02-08 15:39:08 +00001745 {
1746 XML_Expat_Version info = XML_ExpatVersionInfo();
1747 PyModule_AddObject(m, "version_info",
1748 Py_BuildValue("(iii)", info.major,
1749 info.minor, info.micro));
1750 }
Fred Drake0582df92000-07-12 04:49:00 +00001751 /* XXX When Expat supports some way of figuring out how it was
Fred Drake71b63ff2002-06-28 22:29:01 +00001752 compiled, this should check and set native_encoding
1753 appropriately.
Fred Drake0582df92000-07-12 04:49:00 +00001754 */
Fred Drake93adb692000-09-23 04:55:48 +00001755 PyModule_AddStringConstant(m, "native_encoding", "UTF-8");
Fred Drakec23b5232000-08-24 21:57:43 +00001756
Fred Drake85d835f2001-02-08 15:39:08 +00001757 sys_modules = PySys_GetObject("modules");
Fred Drake93adb692000-09-23 04:55:48 +00001758 d = PyModule_GetDict(m);
Fred Drake6f987622000-08-25 18:03:30 +00001759 errors_module = PyDict_GetItem(d, errmod_name);
1760 if (errors_module == NULL) {
Fred Drakecde79132001-04-25 16:01:30 +00001761 errors_module = PyModule_New(MODULE_NAME ".errors");
Fred Drake6f987622000-08-25 18:03:30 +00001762 if (errors_module != NULL) {
Fred Drake6f987622000-08-25 18:03:30 +00001763 PyDict_SetItem(sys_modules, errmod_name, errors_module);
Fred Drake93adb692000-09-23 04:55:48 +00001764 /* gives away the reference to errors_module */
1765 PyModule_AddObject(m, "errors", errors_module);
Fred Drakec23b5232000-08-24 21:57:43 +00001766 }
1767 }
Fred Drake6f987622000-08-25 18:03:30 +00001768 Py_DECREF(errmod_name);
Fred Drake85d835f2001-02-08 15:39:08 +00001769 model_module = PyDict_GetItem(d, modelmod_name);
1770 if (model_module == NULL) {
Fred Drakecde79132001-04-25 16:01:30 +00001771 model_module = PyModule_New(MODULE_NAME ".model");
Fred Drake85d835f2001-02-08 15:39:08 +00001772 if (model_module != NULL) {
1773 PyDict_SetItem(sys_modules, modelmod_name, model_module);
1774 /* gives away the reference to model_module */
1775 PyModule_AddObject(m, "model", model_module);
1776 }
1777 }
1778 Py_DECREF(modelmod_name);
1779 if (errors_module == NULL || model_module == NULL)
1780 /* Don't core dump later! */
Martin v. Löwis1a214512008-06-11 05:26:20 +00001781 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001782
Martin v. Löwisc847f402003-01-21 11:09:21 +00001783#if XML_COMBINED_VERSION > 19505
Martin v. Löwis069dde22003-01-21 10:58:18 +00001784 {
1785 const XML_Feature *features = XML_GetFeatureList();
1786 PyObject *list = PyList_New(0);
1787 if (list == NULL)
1788 /* just ignore it */
1789 PyErr_Clear();
1790 else {
1791 int i = 0;
1792 for (; features[i].feature != XML_FEATURE_END; ++i) {
1793 int ok;
1794 PyObject *item = Py_BuildValue("si", features[i].name,
1795 features[i].value);
1796 if (item == NULL) {
1797 Py_DECREF(list);
1798 list = NULL;
1799 break;
1800 }
1801 ok = PyList_Append(list, item);
1802 Py_DECREF(item);
1803 if (ok < 0) {
1804 PyErr_Clear();
1805 break;
1806 }
1807 }
1808 if (list != NULL)
1809 PyModule_AddObject(m, "features", list);
1810 }
1811 }
Martin v. Löwisc847f402003-01-21 11:09:21 +00001812#endif
Fred Drake6f987622000-08-25 18:03:30 +00001813
Georg Brandlb4dac712010-10-15 14:46:48 +00001814 codes_dict = PyDict_New();
1815 rev_codes_dict = PyDict_New();
1816 if (codes_dict == NULL || rev_codes_dict == NULL) {
1817 Py_XDECREF(codes_dict);
1818 Py_XDECREF(rev_codes_dict);
1819 return NULL;
1820 }
Victor Stinner0fcab4a2011-01-04 12:59:15 +00001821
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001822#define MYCONST(name) \
Georg Brandlb4dac712010-10-15 14:46:48 +00001823 if (PyModule_AddStringConstant(errors_module, #name, \
1824 (char *)XML_ErrorString(name)) < 0) \
1825 return NULL; \
1826 tmpnum = PyLong_FromLong(name); \
1827 if (tmpnum == NULL) return NULL; \
1828 res = PyDict_SetItemString(codes_dict, \
1829 XML_ErrorString(name), tmpnum); \
1830 if (res < 0) return NULL; \
1831 tmpstr = PyUnicode_FromString(XML_ErrorString(name)); \
1832 if (tmpstr == NULL) return NULL; \
1833 res = PyDict_SetItem(rev_codes_dict, tmpnum, tmpstr); \
1834 Py_DECREF(tmpstr); \
1835 Py_DECREF(tmpnum); \
1836 if (res < 0) return NULL; \
Fred Drake7bd9f412000-07-04 23:51:31 +00001837
Fred Drake0582df92000-07-12 04:49:00 +00001838 MYCONST(XML_ERROR_NO_MEMORY);
1839 MYCONST(XML_ERROR_SYNTAX);
1840 MYCONST(XML_ERROR_NO_ELEMENTS);
1841 MYCONST(XML_ERROR_INVALID_TOKEN);
1842 MYCONST(XML_ERROR_UNCLOSED_TOKEN);
1843 MYCONST(XML_ERROR_PARTIAL_CHAR);
1844 MYCONST(XML_ERROR_TAG_MISMATCH);
1845 MYCONST(XML_ERROR_DUPLICATE_ATTRIBUTE);
1846 MYCONST(XML_ERROR_JUNK_AFTER_DOC_ELEMENT);
1847 MYCONST(XML_ERROR_PARAM_ENTITY_REF);
1848 MYCONST(XML_ERROR_UNDEFINED_ENTITY);
1849 MYCONST(XML_ERROR_RECURSIVE_ENTITY_REF);
1850 MYCONST(XML_ERROR_ASYNC_ENTITY);
1851 MYCONST(XML_ERROR_BAD_CHAR_REF);
1852 MYCONST(XML_ERROR_BINARY_ENTITY_REF);
1853 MYCONST(XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF);
1854 MYCONST(XML_ERROR_MISPLACED_XML_PI);
1855 MYCONST(XML_ERROR_UNKNOWN_ENCODING);
1856 MYCONST(XML_ERROR_INCORRECT_ENCODING);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001857 MYCONST(XML_ERROR_UNCLOSED_CDATA_SECTION);
1858 MYCONST(XML_ERROR_EXTERNAL_ENTITY_HANDLING);
1859 MYCONST(XML_ERROR_NOT_STANDALONE);
Fred Drake283b6702004-08-04 22:28:16 +00001860 MYCONST(XML_ERROR_UNEXPECTED_STATE);
1861 MYCONST(XML_ERROR_ENTITY_DECLARED_IN_PE);
1862 MYCONST(XML_ERROR_FEATURE_REQUIRES_XML_DTD);
1863 MYCONST(XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING);
1864 /* Added in Expat 1.95.7. */
1865 MYCONST(XML_ERROR_UNBOUND_PREFIX);
1866 /* Added in Expat 1.95.8. */
1867 MYCONST(XML_ERROR_UNDECLARING_PREFIX);
1868 MYCONST(XML_ERROR_INCOMPLETE_PE);
1869 MYCONST(XML_ERROR_XML_DECL);
1870 MYCONST(XML_ERROR_TEXT_DECL);
1871 MYCONST(XML_ERROR_PUBLICID);
1872 MYCONST(XML_ERROR_SUSPENDED);
1873 MYCONST(XML_ERROR_NOT_SUSPENDED);
1874 MYCONST(XML_ERROR_ABORTED);
1875 MYCONST(XML_ERROR_FINISHED);
1876 MYCONST(XML_ERROR_SUSPEND_PE);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001877
Georg Brandlb4dac712010-10-15 14:46:48 +00001878 if (PyModule_AddStringConstant(errors_module, "__doc__",
1879 "Constants used to describe "
1880 "error conditions.") < 0)
1881 return NULL;
Fred Drake85d835f2001-02-08 15:39:08 +00001882
Georg Brandlb4dac712010-10-15 14:46:48 +00001883 if (PyModule_AddObject(errors_module, "codes", codes_dict) < 0)
1884 return NULL;
1885 if (PyModule_AddObject(errors_module, "messages", rev_codes_dict) < 0)
1886 return NULL;
Victor Stinner0fcab4a2011-01-04 12:59:15 +00001887
Fred Drake93adb692000-09-23 04:55:48 +00001888#undef MYCONST
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001889
Fred Drake85d835f2001-02-08 15:39:08 +00001890#define MYCONST(c) PyModule_AddIntConstant(m, #c, c)
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001891 MYCONST(XML_PARAM_ENTITY_PARSING_NEVER);
1892 MYCONST(XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE);
1893 MYCONST(XML_PARAM_ENTITY_PARSING_ALWAYS);
Fred Drake85d835f2001-02-08 15:39:08 +00001894#undef MYCONST
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001895
Fred Drake85d835f2001-02-08 15:39:08 +00001896#define MYCONST(c) PyModule_AddIntConstant(model_module, #c, c)
1897 PyModule_AddStringConstant(model_module, "__doc__",
1898 "Constants used to interpret content model information.");
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001899
Fred Drake85d835f2001-02-08 15:39:08 +00001900 MYCONST(XML_CTYPE_EMPTY);
1901 MYCONST(XML_CTYPE_ANY);
1902 MYCONST(XML_CTYPE_MIXED);
1903 MYCONST(XML_CTYPE_NAME);
1904 MYCONST(XML_CTYPE_CHOICE);
1905 MYCONST(XML_CTYPE_SEQ);
1906
1907 MYCONST(XML_CQUANT_NONE);
1908 MYCONST(XML_CQUANT_OPT);
1909 MYCONST(XML_CQUANT_REP);
1910 MYCONST(XML_CQUANT_PLUS);
1911#undef MYCONST
Fredrik Lundhc3345042005-12-13 19:49:55 +00001912
1913 /* initialize pyexpat dispatch table */
Fredrik Lundhd7a42882005-12-13 20:43:04 +00001914 capi.size = sizeof(capi);
Fredrik Lundhcc117db2005-12-13 21:55:36 +00001915 capi.magic = PyExpat_CAPI_MAGIC;
Fredrik Lundhd7a42882005-12-13 20:43:04 +00001916 capi.MAJOR_VERSION = XML_MAJOR_VERSION;
1917 capi.MINOR_VERSION = XML_MINOR_VERSION;
1918 capi.MICRO_VERSION = XML_MICRO_VERSION;
1919 capi.ErrorString = XML_ErrorString;
Fredrik Lundhcc117db2005-12-13 21:55:36 +00001920 capi.GetErrorCode = XML_GetErrorCode;
1921 capi.GetErrorColumnNumber = XML_GetErrorColumnNumber;
1922 capi.GetErrorLineNumber = XML_GetErrorLineNumber;
Fredrik Lundhd7a42882005-12-13 20:43:04 +00001923 capi.Parse = XML_Parse;
1924 capi.ParserCreate_MM = XML_ParserCreate_MM;
1925 capi.ParserFree = XML_ParserFree;
1926 capi.SetCharacterDataHandler = XML_SetCharacterDataHandler;
1927 capi.SetCommentHandler = XML_SetCommentHandler;
1928 capi.SetDefaultHandlerExpand = XML_SetDefaultHandlerExpand;
1929 capi.SetElementHandler = XML_SetElementHandler;
1930 capi.SetNamespaceDeclHandler = XML_SetNamespaceDeclHandler;
1931 capi.SetProcessingInstructionHandler = XML_SetProcessingInstructionHandler;
1932 capi.SetUnknownEncodingHandler = XML_SetUnknownEncodingHandler;
1933 capi.SetUserData = XML_SetUserData;
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03001934 capi.SetStartDoctypeDeclHandler = XML_SetStartDoctypeDeclHandler;
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03001935 capi.SetEncoding = XML_SetEncoding;
Eli Bendersky6dc32b32013-05-25 05:25:48 -07001936 capi.DefaultUnknownEncodingHandler = PyUnknownEncodingHandler;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001937
Benjamin Petersonb173f782009-05-05 22:31:58 +00001938 /* export using capsule */
1939 capi_object = PyCapsule_New(&capi, PyExpat_CAPSULE_NAME, NULL);
Fredrik Lundhd7a42882005-12-13 20:43:04 +00001940 if (capi_object)
1941 PyModule_AddObject(m, "expat_CAPI", capi_object);
Martin v. Löwis1a214512008-06-11 05:26:20 +00001942 return m;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001943}
1944
Fred Drake6f987622000-08-25 18:03:30 +00001945static void
Martin v. Löwis5b68ce32001-10-21 08:53:52 +00001946clear_handlers(xmlparseobject *self, int initial)
Fred Drake0582df92000-07-12 04:49:00 +00001947{
Fred Drakecde79132001-04-25 16:01:30 +00001948 int i = 0;
1949 PyObject *temp;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001950
Fred Drake71b63ff2002-06-28 22:29:01 +00001951 for (; handler_info[i].name != NULL; i++) {
Martin v. Löwis5b68ce32001-10-21 08:53:52 +00001952 if (initial)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001953 self->handlers[i] = NULL;
1954 else {
Fred Drakecde79132001-04-25 16:01:30 +00001955 temp = self->handlers[i];
1956 self->handlers[i] = NULL;
1957 Py_XDECREF(temp);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001958 handler_info[i].setter(self->itself, NULL);
Fred Drakecde79132001-04-25 16:01:30 +00001959 }
Fred Drakecde79132001-04-25 16:01:30 +00001960 }
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001961}
1962
Tim Peters0c322792002-07-17 16:49:03 +00001963static struct HandlerInfo handler_info[] = {
Fred Drake71b63ff2002-06-28 22:29:01 +00001964 {"StartElementHandler",
1965 (xmlhandlersetter)XML_SetStartElementHandler,
Fred Drake0582df92000-07-12 04:49:00 +00001966 (xmlhandler)my_StartElementHandler},
Fred Drake71b63ff2002-06-28 22:29:01 +00001967 {"EndElementHandler",
1968 (xmlhandlersetter)XML_SetEndElementHandler,
Fred Drake0582df92000-07-12 04:49:00 +00001969 (xmlhandler)my_EndElementHandler},
Fred Drake71b63ff2002-06-28 22:29:01 +00001970 {"ProcessingInstructionHandler",
Fred Drake0582df92000-07-12 04:49:00 +00001971 (xmlhandlersetter)XML_SetProcessingInstructionHandler,
1972 (xmlhandler)my_ProcessingInstructionHandler},
Fred Drake71b63ff2002-06-28 22:29:01 +00001973 {"CharacterDataHandler",
Fred Drake0582df92000-07-12 04:49:00 +00001974 (xmlhandlersetter)XML_SetCharacterDataHandler,
1975 (xmlhandler)my_CharacterDataHandler},
Fred Drake71b63ff2002-06-28 22:29:01 +00001976 {"UnparsedEntityDeclHandler",
Fred Drake0582df92000-07-12 04:49:00 +00001977 (xmlhandlersetter)XML_SetUnparsedEntityDeclHandler,
Fred Drake2a3d7db2002-06-28 22:56:48 +00001978 (xmlhandler)my_UnparsedEntityDeclHandler},
Fred Drake71b63ff2002-06-28 22:29:01 +00001979 {"NotationDeclHandler",
Fred Drake0582df92000-07-12 04:49:00 +00001980 (xmlhandlersetter)XML_SetNotationDeclHandler,
Fred Drake2a3d7db2002-06-28 22:56:48 +00001981 (xmlhandler)my_NotationDeclHandler},
Fred Drake71b63ff2002-06-28 22:29:01 +00001982 {"StartNamespaceDeclHandler",
1983 (xmlhandlersetter)XML_SetStartNamespaceDeclHandler,
Fred Drake2a3d7db2002-06-28 22:56:48 +00001984 (xmlhandler)my_StartNamespaceDeclHandler},
Fred Drake71b63ff2002-06-28 22:29:01 +00001985 {"EndNamespaceDeclHandler",
1986 (xmlhandlersetter)XML_SetEndNamespaceDeclHandler,
Fred Drake2a3d7db2002-06-28 22:56:48 +00001987 (xmlhandler)my_EndNamespaceDeclHandler},
Fred Drake0582df92000-07-12 04:49:00 +00001988 {"CommentHandler",
1989 (xmlhandlersetter)XML_SetCommentHandler,
1990 (xmlhandler)my_CommentHandler},
1991 {"StartCdataSectionHandler",
Fred Drake71b63ff2002-06-28 22:29:01 +00001992 (xmlhandlersetter)XML_SetStartCdataSectionHandler,
Fred Drake0582df92000-07-12 04:49:00 +00001993 (xmlhandler)my_StartCdataSectionHandler},
1994 {"EndCdataSectionHandler",
Fred Drake71b63ff2002-06-28 22:29:01 +00001995 (xmlhandlersetter)XML_SetEndCdataSectionHandler,
Fred Drake0582df92000-07-12 04:49:00 +00001996 (xmlhandler)my_EndCdataSectionHandler},
1997 {"DefaultHandler",
1998 (xmlhandlersetter)XML_SetDefaultHandler,
1999 (xmlhandler)my_DefaultHandler},
2000 {"DefaultHandlerExpand",
2001 (xmlhandlersetter)XML_SetDefaultHandlerExpand,
2002 (xmlhandler)my_DefaultHandlerExpandHandler},
2003 {"NotStandaloneHandler",
2004 (xmlhandlersetter)XML_SetNotStandaloneHandler,
2005 (xmlhandler)my_NotStandaloneHandler},
2006 {"ExternalEntityRefHandler",
2007 (xmlhandlersetter)XML_SetExternalEntityRefHandler,
Fred Drake2a3d7db2002-06-28 22:56:48 +00002008 (xmlhandler)my_ExternalEntityRefHandler},
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00002009 {"StartDoctypeDeclHandler",
Fred Drake71b63ff2002-06-28 22:29:01 +00002010 (xmlhandlersetter)XML_SetStartDoctypeDeclHandler,
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00002011 (xmlhandler)my_StartDoctypeDeclHandler},
2012 {"EndDoctypeDeclHandler",
Fred Drake71b63ff2002-06-28 22:29:01 +00002013 (xmlhandlersetter)XML_SetEndDoctypeDeclHandler,
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00002014 (xmlhandler)my_EndDoctypeDeclHandler},
Fred Drake85d835f2001-02-08 15:39:08 +00002015 {"EntityDeclHandler",
2016 (xmlhandlersetter)XML_SetEntityDeclHandler,
2017 (xmlhandler)my_EntityDeclHandler},
2018 {"XmlDeclHandler",
2019 (xmlhandlersetter)XML_SetXmlDeclHandler,
2020 (xmlhandler)my_XmlDeclHandler},
2021 {"ElementDeclHandler",
2022 (xmlhandlersetter)XML_SetElementDeclHandler,
2023 (xmlhandler)my_ElementDeclHandler},
2024 {"AttlistDeclHandler",
2025 (xmlhandlersetter)XML_SetAttlistDeclHandler,
2026 (xmlhandler)my_AttlistDeclHandler},
Martin v. Löwisc847f402003-01-21 11:09:21 +00002027#if XML_COMBINED_VERSION >= 19504
Martin v. Löwis069dde22003-01-21 10:58:18 +00002028 {"SkippedEntityHandler",
2029 (xmlhandlersetter)XML_SetSkippedEntityHandler,
2030 (xmlhandler)my_SkippedEntityHandler},
Martin v. Löwisc847f402003-01-21 11:09:21 +00002031#endif
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00002032
Fred Drake0582df92000-07-12 04:49:00 +00002033 {NULL, NULL, NULL} /* sentinel */
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00002034};