blob: 45680ae96296c18314ba3037bb1b75721e141216 [file] [log] [blame]
Martin v. Löwis7090ed12001-09-19 10:37:50 +00001#include "Python.h"
Fred Drake4113b132001-03-24 19:58:26 +00002#include <ctype.h>
3
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00004#include "frameobject.h"
Fred Drakea77254a2000-09-29 19:23:29 +00005#include "expat.h"
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00006
Fredrik Lundhc3345042005-12-13 19:49:55 +00007#include "pyexpat.h"
8
Martin v. Löwisc847f402003-01-21 11:09:21 +00009#define XML_COMBINED_VERSION (10000*XML_MAJOR_VERSION+100*XML_MINOR_VERSION+XML_MICRO_VERSION)
10
Jeremy Hylton9263f572003-06-27 16:13:17 +000011#define FIX_TRACE
Martin v. Löwis339d0f72001-08-17 18:39:25 +000012
Fred Drake0582df92000-07-12 04:49:00 +000013enum HandlerTypes {
14 StartElement,
15 EndElement,
16 ProcessingInstruction,
17 CharacterData,
18 UnparsedEntityDecl,
19 NotationDecl,
20 StartNamespaceDecl,
21 EndNamespaceDecl,
22 Comment,
23 StartCdataSection,
24 EndCdataSection,
25 Default,
26 DefaultHandlerExpand,
27 NotStandalone,
Martin v. Löwis0078f6c2001-01-21 10:18:10 +000028 ExternalEntityRef,
29 StartDoctypeDecl,
30 EndDoctypeDecl,
Fred Drake85d835f2001-02-08 15:39:08 +000031 EntityDecl,
32 XmlDecl,
33 ElementDecl,
34 AttlistDecl,
Martin v. Löwisc847f402003-01-21 11:09:21 +000035#if XML_COMBINED_VERSION >= 19504
Martin v. Löwis069dde22003-01-21 10:58:18 +000036 SkippedEntity,
Martin v. Löwisc847f402003-01-21 11:09:21 +000037#endif
Fred Drake85d835f2001-02-08 15:39:08 +000038 _DummyDecl
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000039};
40
41static PyObject *ErrorObject;
42
43/* ----------------------------------------------------- */
44
45/* Declarations for objects of type xmlparser */
46
47typedef struct {
Fred Drake0582df92000-07-12 04:49:00 +000048 PyObject_HEAD
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000049
Fred Drake0582df92000-07-12 04:49:00 +000050 XML_Parser itself;
Fred Drake85d835f2001-02-08 15:39:08 +000051 int ordered_attributes; /* Return attributes as a list. */
52 int specified_attributes; /* Report only specified attributes. */
Fred Drakebd6101c2001-02-14 18:29:45 +000053 int in_callback; /* Is a callback active? */
Martin v. Löwis069dde22003-01-21 10:58:18 +000054 int ns_prefixes; /* Namespace-triplets mode? */
Fred Drake2a3d7db2002-06-28 22:56:48 +000055 XML_Char *buffer; /* Buffer used when accumulating characters */
56 /* NULL if not enabled */
57 int buffer_size; /* Size of buffer, in XML_Char units */
58 int buffer_used; /* Buffer units in use */
Fred Drakeb91a36b2002-06-27 19:40:48 +000059 PyObject *intern; /* Dictionary to intern strings */
Fred Drake0582df92000-07-12 04:49:00 +000060 PyObject **handlers;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000061} xmlparseobject;
62
Fred Drake2a3d7db2002-06-28 22:56:48 +000063#define CHARACTER_DATA_BUFFER_SIZE 8192
64
Jeremy Hylton938ace62002-07-17 16:30:39 +000065static PyTypeObject Xmlparsetype;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000066
Fred Drake117ac852002-09-24 16:24:54 +000067typedef void (*xmlhandlersetter)(XML_Parser self, void *meth);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000068typedef void* xmlhandler;
69
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +000070struct HandlerInfo {
Fred Drake0582df92000-07-12 04:49:00 +000071 const char *name;
72 xmlhandlersetter setter;
73 xmlhandler handler;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +000074 PyCodeObject *tb_code;
Fred Drake71b63ff2002-06-28 22:29:01 +000075 PyObject *nameobj;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000076};
77
Jeremy Hylton938ace62002-07-17 16:30:39 +000078static struct HandlerInfo handler_info[64];
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000079
Fred Drakebd6101c2001-02-14 18:29:45 +000080/* Set an integer attribute on the error object; return true on success,
81 * false on an exception.
82 */
83static int
84set_error_attr(PyObject *err, char *name, int value)
85{
Christian Heimes217cfd12007-12-02 14:31:20 +000086 PyObject *v = PyLong_FromLong(value);
Fred Drake85d835f2001-02-08 15:39:08 +000087
Neal Norwitz2f5e9902006-03-08 06:36:45 +000088 if (v == NULL || PyObject_SetAttrString(err, name, v) == -1) {
89 Py_XDECREF(v);
Fred Drakebd6101c2001-02-14 18:29:45 +000090 return 0;
91 }
Michael W. Hudson0bb84542004-08-03 11:31:31 +000092 Py_DECREF(v);
Fred Drakebd6101c2001-02-14 18:29:45 +000093 return 1;
94}
95
96/* Build and set an Expat exception, including positioning
97 * information. Always returns NULL.
98 */
Fred Drake85d835f2001-02-08 15:39:08 +000099static PyObject *
Martin v. Löwis069dde22003-01-21 10:58:18 +0000100set_error(xmlparseobject *self, enum XML_Error code)
Fred Drake85d835f2001-02-08 15:39:08 +0000101{
102 PyObject *err;
Victor Stinner499dfcf2011-03-21 13:26:24 +0100103 PyObject *buffer;
Fred Drake85d835f2001-02-08 15:39:08 +0000104 XML_Parser parser = self->itself;
Fred Drakebd6101c2001-02-14 18:29:45 +0000105 int lineno = XML_GetErrorLineNumber(parser);
106 int column = XML_GetErrorColumnNumber(parser);
Fred Drake85d835f2001-02-08 15:39:08 +0000107
Victor Stinner499dfcf2011-03-21 13:26:24 +0100108 buffer = PyUnicode_FromFormat("%s: line %i, column %i",
109 XML_ErrorString(code), lineno, column);
110 if (buffer == NULL)
111 return NULL;
112 err = PyObject_CallFunction(ErrorObject, "O", buffer);
113 Py_DECREF(buffer);
Fred Drakebd6101c2001-02-14 18:29:45 +0000114 if ( err != NULL
115 && set_error_attr(err, "code", code)
116 && set_error_attr(err, "offset", column)
117 && set_error_attr(err, "lineno", lineno)) {
118 PyErr_SetObject(ErrorObject, err);
Fred Drake85d835f2001-02-08 15:39:08 +0000119 }
Neal Norwitz2f5e9902006-03-08 06:36:45 +0000120 Py_XDECREF(err);
Fred Drake85d835f2001-02-08 15:39:08 +0000121 return NULL;
122}
123
Fred Drake71b63ff2002-06-28 22:29:01 +0000124static int
125have_handler(xmlparseobject *self, int type)
126{
127 PyObject *handler = self->handlers[type];
128 return handler != NULL;
129}
130
131static PyObject *
132get_handler_name(struct HandlerInfo *hinfo)
133{
134 PyObject *name = hinfo->nameobj;
135 if (name == NULL) {
Neal Norwitz392c5be2007-08-25 17:20:32 +0000136 name = PyUnicode_FromString(hinfo->name);
Fred Drake71b63ff2002-06-28 22:29:01 +0000137 hinfo->nameobj = name;
138 }
139 Py_XINCREF(name);
140 return name;
141}
142
Fred Drake85d835f2001-02-08 15:39:08 +0000143
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000144/* Convert a string of XML_Chars into a Unicode string.
145 Returns None if str is a null pointer. */
146
Fred Drake0582df92000-07-12 04:49:00 +0000147static PyObject *
Fred Drakeb91a36b2002-06-27 19:40:48 +0000148conv_string_to_unicode(const XML_Char *str)
Fred Drake0582df92000-07-12 04:49:00 +0000149{
Fred Drake71b63ff2002-06-28 22:29:01 +0000150 /* XXX currently this code assumes that XML_Char is 8-bit,
Fred Drake0582df92000-07-12 04:49:00 +0000151 and hence in UTF-8. */
152 /* UTF-8 from Expat, Unicode desired */
153 if (str == NULL) {
154 Py_INCREF(Py_None);
155 return Py_None;
156 }
Fred Drake71b63ff2002-06-28 22:29:01 +0000157 return PyUnicode_DecodeUTF8(str, strlen(str), "strict");
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000158}
159
Fred Drake0582df92000-07-12 04:49:00 +0000160static PyObject *
161conv_string_len_to_unicode(const XML_Char *str, int len)
162{
Fred Drake71b63ff2002-06-28 22:29:01 +0000163 /* XXX currently this code assumes that XML_Char is 8-bit,
Fred Drake0582df92000-07-12 04:49:00 +0000164 and hence in UTF-8. */
165 /* UTF-8 from Expat, Unicode desired */
166 if (str == NULL) {
167 Py_INCREF(Py_None);
168 return Py_None;
169 }
Fred Drake6f987622000-08-25 18:03:30 +0000170 return PyUnicode_DecodeUTF8((const char *)str, len, "strict");
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000171}
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000172
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000173/* Callback routines */
174
Martin v. Löwis5b68ce32001-10-21 08:53:52 +0000175static void clear_handlers(xmlparseobject *self, int initial);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000176
Martin v. Löwis069dde22003-01-21 10:58:18 +0000177/* This handler is used when an error has been detected, in the hope
178 that actual parsing can be terminated early. This will only help
179 if an external entity reference is encountered. */
180static int
181error_external_entity_ref_handler(XML_Parser parser,
182 const XML_Char *context,
183 const XML_Char *base,
184 const XML_Char *systemId,
185 const XML_Char *publicId)
186{
187 return 0;
188}
189
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000190/* Dummy character data handler used when an error (exception) has
191 been detected, and the actual parsing can be terminated early.
192 This is needed since character data handler can't be safely removed
193 from within the character data handler, but can be replaced. It is
194 used only from the character data handler trampoline, and must be
195 used right after `flag_error()` is called. */
196static void
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000197noop_character_data_handler(void *userData, const XML_Char *data, int len)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000198{
199 /* Do nothing. */
200}
201
Fred Drake6f987622000-08-25 18:03:30 +0000202static void
203flag_error(xmlparseobject *self)
204{
Martin v. Löwis5b68ce32001-10-21 08:53:52 +0000205 clear_handlers(self, 0);
Martin v. Löwis069dde22003-01-21 10:58:18 +0000206 XML_SetExternalEntityRefHandler(self->itself,
207 error_external_entity_ref_handler);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000208}
209
210static PyCodeObject*
211getcode(enum HandlerTypes slot, char* func_name, int lineno)
212{
Fred Drakebd6101c2001-02-14 18:29:45 +0000213 if (handler_info[slot].tb_code == NULL) {
Fred Drakebd6101c2001-02-14 18:29:45 +0000214 handler_info[slot].tb_code =
Alexandre Vassalotti7b82b402009-07-21 04:30:03 +0000215 PyCode_NewEmpty(__FILE__, func_name, lineno);
Fred Drakebd6101c2001-02-14 18:29:45 +0000216 }
217 return handler_info[slot].tb_code;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000218}
219
Jeremy Hylton9263f572003-06-27 16:13:17 +0000220#ifdef FIX_TRACE
Martin v. Löwis7d6e19d2002-08-04 08:24:49 +0000221static int
222trace_frame(PyThreadState *tstate, PyFrameObject *f, int code, PyObject *val)
223{
224 int result = 0;
225 if (!tstate->use_tracing || tstate->tracing)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000226 return 0;
Martin v. Löwis7d6e19d2002-08-04 08:24:49 +0000227 if (tstate->c_profilefunc != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000228 tstate->tracing++;
229 result = tstate->c_profilefunc(tstate->c_profileobj,
230 f, code , val);
231 tstate->use_tracing = ((tstate->c_tracefunc != NULL)
232 || (tstate->c_profilefunc != NULL));
233 tstate->tracing--;
234 if (result)
235 return result;
Martin v. Löwis7d6e19d2002-08-04 08:24:49 +0000236 }
237 if (tstate->c_tracefunc != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000238 tstate->tracing++;
239 result = tstate->c_tracefunc(tstate->c_traceobj,
240 f, code , val);
241 tstate->use_tracing = ((tstate->c_tracefunc != NULL)
242 || (tstate->c_profilefunc != NULL));
243 tstate->tracing--;
244 }
Martin v. Löwis7d6e19d2002-08-04 08:24:49 +0000245 return result;
246}
Jeremy Hylton9263f572003-06-27 16:13:17 +0000247
248static int
249trace_frame_exc(PyThreadState *tstate, PyFrameObject *f)
250{
251 PyObject *type, *value, *traceback, *arg;
252 int err;
253
254 if (tstate->c_tracefunc == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000255 return 0;
Jeremy Hylton9263f572003-06-27 16:13:17 +0000256
257 PyErr_Fetch(&type, &value, &traceback);
258 if (value == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000259 value = Py_None;
260 Py_INCREF(value);
Jeremy Hylton9263f572003-06-27 16:13:17 +0000261 }
Raymond Hettinger8ae46892003-10-12 19:09:37 +0000262 arg = PyTuple_Pack(3, type, value, traceback);
Jeremy Hylton9263f572003-06-27 16:13:17 +0000263 if (arg == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000264 PyErr_Restore(type, value, traceback);
265 return 0;
Jeremy Hylton9263f572003-06-27 16:13:17 +0000266 }
267 err = trace_frame(tstate, f, PyTrace_EXCEPTION, arg);
268 Py_DECREF(arg);
269 if (err == 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000270 PyErr_Restore(type, value, traceback);
Jeremy Hylton9263f572003-06-27 16:13:17 +0000271 else {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000272 Py_XDECREF(type);
273 Py_XDECREF(value);
274 Py_XDECREF(traceback);
Jeremy Hylton9263f572003-06-27 16:13:17 +0000275 }
276 return err;
277}
Martin v. Löwis069dde22003-01-21 10:58:18 +0000278#endif
Martin v. Löwis7d6e19d2002-08-04 08:24:49 +0000279
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000280static PyObject*
Fred Drake39689c52004-08-13 03:12:57 +0000281call_with_frame(PyCodeObject *c, PyObject* func, PyObject* args,
282 xmlparseobject *self)
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000283{
Fred Drakebd6101c2001-02-14 18:29:45 +0000284 PyThreadState *tstate = PyThreadState_GET();
285 PyFrameObject *f;
Christian Heimesa6404ad2013-07-20 22:54:25 +0200286 PyObject *res, *globals;
Fred Drakebd6101c2001-02-14 18:29:45 +0000287
288 if (c == NULL)
289 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000290
Christian Heimesa6404ad2013-07-20 22:54:25 +0200291 globals = PyEval_GetGlobals();
292 if (globals == NULL) {
293 return NULL;
294 }
295
296 f = PyFrame_New(tstate, c, globals, NULL);
Fred Drakebd6101c2001-02-14 18:29:45 +0000297 if (f == NULL)
298 return NULL;
299 tstate->frame = f;
Jeremy Hylton9263f572003-06-27 16:13:17 +0000300#ifdef FIX_TRACE
301 if (trace_frame(tstate, f, PyTrace_CALL, Py_None) < 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000302 return NULL;
Martin v. Löwis7d6e19d2002-08-04 08:24:49 +0000303 }
Martin v. Löwis069dde22003-01-21 10:58:18 +0000304#endif
Fred Drakebd6101c2001-02-14 18:29:45 +0000305 res = PyEval_CallObject(func, args);
Jeremy Hylton9263f572003-06-27 16:13:17 +0000306 if (res == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000307 if (tstate->curexc_traceback == NULL)
308 PyTraceBack_Here(f);
Fred Drake39689c52004-08-13 03:12:57 +0000309 XML_StopParser(self->itself, XML_FALSE);
Jeremy Hylton9263f572003-06-27 16:13:17 +0000310#ifdef FIX_TRACE
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000311 if (trace_frame_exc(tstate, f) < 0) {
312 return NULL;
313 }
Jeremy Hylton9263f572003-06-27 16:13:17 +0000314 }
Martin v. Löwis7d6e19d2002-08-04 08:24:49 +0000315 else {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000316 if (trace_frame(tstate, f, PyTrace_RETURN, res) < 0) {
Serhiy Storchaka505ff752014-02-09 13:33:53 +0200317 Py_CLEAR(res);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000318 }
Martin v. Löwis7d6e19d2002-08-04 08:24:49 +0000319 }
Jeremy Hylton9263f572003-06-27 16:13:17 +0000320#else
321 }
Martin v. Löwis069dde22003-01-21 10:58:18 +0000322#endif
Fred Drakebd6101c2001-02-14 18:29:45 +0000323 tstate->frame = f->f_back;
324 Py_DECREF(f);
325 return res;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000326}
327
Fred Drakeb91a36b2002-06-27 19:40:48 +0000328static PyObject*
329string_intern(xmlparseobject *self, const char* str)
330{
Guido van Rossum4ca94712007-07-23 17:42:32 +0000331 PyObject *result = conv_string_to_unicode(str);
Fred Drakeb91a36b2002-06-27 19:40:48 +0000332 PyObject *value;
Neal Norwitz484d9a42005-09-30 04:46:49 +0000333 /* result can be NULL if the unicode conversion failed. */
334 if (!result)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000335 return result;
Fred Drakeb91a36b2002-06-27 19:40:48 +0000336 if (!self->intern)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000337 return result;
Fred Drakeb91a36b2002-06-27 19:40:48 +0000338 value = PyDict_GetItem(self->intern, result);
339 if (!value) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000340 if (PyDict_SetItem(self->intern, result, result) == 0)
Fred Drakeb91a36b2002-06-27 19:40:48 +0000341 return result;
342 else
343 return NULL;
344 }
345 Py_INCREF(value);
346 Py_DECREF(result);
347 return value;
348}
349
Fred Drake2a3d7db2002-06-28 22:56:48 +0000350/* Return 0 on success, -1 on exception.
351 * flag_error() will be called before return if needed.
352 */
353static int
354call_character_handler(xmlparseobject *self, const XML_Char *buffer, int len)
355{
356 PyObject *args;
357 PyObject *temp;
358
Georg Brandlc01537f2010-10-15 16:26:08 +0000359 if (!have_handler(self, CharacterData))
360 return -1;
361
Fred Drake2a3d7db2002-06-28 22:56:48 +0000362 args = PyTuple_New(1);
363 if (args == NULL)
364 return -1;
Guido van Rossum4ca94712007-07-23 17:42:32 +0000365 temp = (conv_string_len_to_unicode(buffer, len));
Fred Drake2a3d7db2002-06-28 22:56:48 +0000366 if (temp == NULL) {
367 Py_DECREF(args);
368 flag_error(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000369 XML_SetCharacterDataHandler(self->itself,
370 noop_character_data_handler);
Fred Drake2a3d7db2002-06-28 22:56:48 +0000371 return -1;
372 }
373 PyTuple_SET_ITEM(args, 0, temp);
374 /* temp is now a borrowed reference; consider it unused. */
375 self->in_callback = 1;
376 temp = call_with_frame(getcode(CharacterData, "CharacterData", __LINE__),
Fred Drake39689c52004-08-13 03:12:57 +0000377 self->handlers[CharacterData], args, self);
Fred Drake2a3d7db2002-06-28 22:56:48 +0000378 /* temp is an owned reference again, or NULL */
379 self->in_callback = 0;
380 Py_DECREF(args);
381 if (temp == NULL) {
382 flag_error(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000383 XML_SetCharacterDataHandler(self->itself,
384 noop_character_data_handler);
Fred Drake2a3d7db2002-06-28 22:56:48 +0000385 return -1;
386 }
387 Py_DECREF(temp);
388 return 0;
389}
390
391static int
392flush_character_buffer(xmlparseobject *self)
393{
394 int rc;
395 if (self->buffer == NULL || self->buffer_used == 0)
396 return 0;
397 rc = call_character_handler(self, self->buffer, self->buffer_used);
398 self->buffer_used = 0;
399 return rc;
400}
401
402static void
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000403my_CharacterDataHandler(void *userData, const XML_Char *data, int len)
Fred Drake2a3d7db2002-06-28 22:56:48 +0000404{
405 xmlparseobject *self = (xmlparseobject *) userData;
406 if (self->buffer == NULL)
407 call_character_handler(self, data, len);
408 else {
409 if ((self->buffer_used + len) > self->buffer_size) {
410 if (flush_character_buffer(self) < 0)
411 return;
412 /* handler might have changed; drop the rest on the floor
413 * if there isn't a handler anymore
414 */
415 if (!have_handler(self, CharacterData))
416 return;
417 }
418 if (len > self->buffer_size) {
419 call_character_handler(self, data, len);
420 self->buffer_used = 0;
421 }
422 else {
423 memcpy(self->buffer + self->buffer_used,
424 data, len * sizeof(XML_Char));
425 self->buffer_used += len;
426 }
427 }
428}
429
Fred Drake85d835f2001-02-08 15:39:08 +0000430static void
431my_StartElementHandler(void *userData,
Fred Drake71b63ff2002-06-28 22:29:01 +0000432 const XML_Char *name, const XML_Char *atts[])
Fred Drake85d835f2001-02-08 15:39:08 +0000433{
434 xmlparseobject *self = (xmlparseobject *)userData;
435
Fred Drake71b63ff2002-06-28 22:29:01 +0000436 if (have_handler(self, StartElement)) {
Fred Drake85d835f2001-02-08 15:39:08 +0000437 PyObject *container, *rv, *args;
438 int i, max;
439
Fred Drake2a3d7db2002-06-28 22:56:48 +0000440 if (flush_character_buffer(self) < 0)
441 return;
Fred Drake85d835f2001-02-08 15:39:08 +0000442 /* Set max to the number of slots filled in atts[]; max/2 is
443 * the number of attributes we need to process.
444 */
445 if (self->specified_attributes) {
446 max = XML_GetSpecifiedAttributeCount(self->itself);
447 }
448 else {
449 max = 0;
450 while (atts[max] != NULL)
451 max += 2;
452 }
453 /* Build the container. */
454 if (self->ordered_attributes)
455 container = PyList_New(max);
456 else
457 container = PyDict_New();
458 if (container == NULL) {
459 flag_error(self);
460 return;
461 }
462 for (i = 0; i < max; i += 2) {
Fred Drakeb91a36b2002-06-27 19:40:48 +0000463 PyObject *n = string_intern(self, (XML_Char *) atts[i]);
Fred Drake85d835f2001-02-08 15:39:08 +0000464 PyObject *v;
465 if (n == NULL) {
466 flag_error(self);
467 Py_DECREF(container);
468 return;
469 }
Guido van Rossum4ca94712007-07-23 17:42:32 +0000470 v = conv_string_to_unicode((XML_Char *) atts[i+1]);
Fred Drake85d835f2001-02-08 15:39:08 +0000471 if (v == NULL) {
472 flag_error(self);
473 Py_DECREF(container);
474 Py_DECREF(n);
475 return;
476 }
477 if (self->ordered_attributes) {
478 PyList_SET_ITEM(container, i, n);
479 PyList_SET_ITEM(container, i+1, v);
480 }
481 else if (PyDict_SetItem(container, n, v)) {
482 flag_error(self);
483 Py_DECREF(n);
484 Py_DECREF(v);
485 return;
486 }
487 else {
488 Py_DECREF(n);
489 Py_DECREF(v);
490 }
491 }
Neal Norwitz484d9a42005-09-30 04:46:49 +0000492 args = string_intern(self, name);
493 if (args != NULL)
494 args = Py_BuildValue("(NN)", args, container);
Fred Drake85d835f2001-02-08 15:39:08 +0000495 if (args == NULL) {
496 Py_DECREF(container);
497 return;
498 }
499 /* Container is now a borrowed reference; ignore it. */
Fred Drakebd6101c2001-02-14 18:29:45 +0000500 self->in_callback = 1;
501 rv = call_with_frame(getcode(StartElement, "StartElement", __LINE__),
Fred Drake39689c52004-08-13 03:12:57 +0000502 self->handlers[StartElement], args, self);
Fred Drakebd6101c2001-02-14 18:29:45 +0000503 self->in_callback = 0;
504 Py_DECREF(args);
Fred Drake85d835f2001-02-08 15:39:08 +0000505 if (rv == NULL) {
506 flag_error(self);
507 return;
Fred Drakebd6101c2001-02-14 18:29:45 +0000508 }
Fred Drake85d835f2001-02-08 15:39:08 +0000509 Py_DECREF(rv);
510 }
511}
512
513#define RC_HANDLER(RC, NAME, PARAMS, INIT, PARAM_FORMAT, CONVERSION, \
514 RETURN, GETUSERDATA) \
515static RC \
516my_##NAME##Handler PARAMS {\
517 xmlparseobject *self = GETUSERDATA ; \
518 PyObject *args = NULL; \
519 PyObject *rv = NULL; \
520 INIT \
521\
Fred Drake71b63ff2002-06-28 22:29:01 +0000522 if (have_handler(self, NAME)) { \
Fred Drake2a3d7db2002-06-28 22:56:48 +0000523 if (flush_character_buffer(self) < 0) \
524 return RETURN; \
Fred Drake85d835f2001-02-08 15:39:08 +0000525 args = Py_BuildValue PARAM_FORMAT ;\
Martin v. Löwis1d7c55f2001-11-10 13:57:55 +0000526 if (!args) { flag_error(self); return RETURN;} \
Fred Drakebd6101c2001-02-14 18:29:45 +0000527 self->in_callback = 1; \
Fred Drake85d835f2001-02-08 15:39:08 +0000528 rv = call_with_frame(getcode(NAME,#NAME,__LINE__), \
Fred Drake39689c52004-08-13 03:12:57 +0000529 self->handlers[NAME], args, self); \
Fred Drakebd6101c2001-02-14 18:29:45 +0000530 self->in_callback = 0; \
Fred Drake85d835f2001-02-08 15:39:08 +0000531 Py_DECREF(args); \
532 if (rv == NULL) { \
533 flag_error(self); \
534 return RETURN; \
535 } \
536 CONVERSION \
537 Py_DECREF(rv); \
538 } \
539 return RETURN; \
540}
541
Fred Drake6f987622000-08-25 18:03:30 +0000542#define VOID_HANDLER(NAME, PARAMS, PARAM_FORMAT) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000543 RC_HANDLER(void, NAME, PARAMS, ;, PARAM_FORMAT, ;, ;,\
544 (xmlparseobject *)userData)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000545
Fred Drake6f987622000-08-25 18:03:30 +0000546#define INT_HANDLER(NAME, PARAMS, PARAM_FORMAT)\
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000547 RC_HANDLER(int, NAME, PARAMS, int rc=0;, PARAM_FORMAT, \
548 rc = PyLong_AsLong(rv);, rc, \
549 (xmlparseobject *)userData)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000550
Fred Drake71b63ff2002-06-28 22:29:01 +0000551VOID_HANDLER(EndElement,
552 (void *userData, const XML_Char *name),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000553 ("(N)", string_intern(self, name)))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000554
Fred Drake6f987622000-08-25 18:03:30 +0000555VOID_HANDLER(ProcessingInstruction,
Fred Drake71b63ff2002-06-28 22:29:01 +0000556 (void *userData,
557 const XML_Char *target,
Fred Drake85d835f2001-02-08 15:39:08 +0000558 const XML_Char *data),
Guido van Rossum4ca94712007-07-23 17:42:32 +0000559 ("(NO&)", string_intern(self, target), conv_string_to_unicode ,data))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000560
Fred Drake6f987622000-08-25 18:03:30 +0000561VOID_HANDLER(UnparsedEntityDecl,
Fred Drake71b63ff2002-06-28 22:29:01 +0000562 (void *userData,
Fred Drake85d835f2001-02-08 15:39:08 +0000563 const XML_Char *entityName,
564 const XML_Char *base,
565 const XML_Char *systemId,
566 const XML_Char *publicId,
567 const XML_Char *notationName),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000568 ("(NNNNN)",
Fred Drake71b63ff2002-06-28 22:29:01 +0000569 string_intern(self, entityName), string_intern(self, base),
570 string_intern(self, systemId), string_intern(self, publicId),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000571 string_intern(self, notationName)))
Fred Drake85d835f2001-02-08 15:39:08 +0000572
Fred Drake85d835f2001-02-08 15:39:08 +0000573VOID_HANDLER(EntityDecl,
574 (void *userData,
575 const XML_Char *entityName,
576 int is_parameter_entity,
577 const XML_Char *value,
578 int value_length,
579 const XML_Char *base,
580 const XML_Char *systemId,
581 const XML_Char *publicId,
582 const XML_Char *notationName),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000583 ("NiNNNNN",
584 string_intern(self, entityName), is_parameter_entity,
Guido van Rossum4ca94712007-07-23 17:42:32 +0000585 (conv_string_len_to_unicode(value, value_length)),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000586 string_intern(self, base), string_intern(self, systemId),
587 string_intern(self, publicId),
588 string_intern(self, notationName)))
Fred Drake85d835f2001-02-08 15:39:08 +0000589
590VOID_HANDLER(XmlDecl,
591 (void *userData,
592 const XML_Char *version,
593 const XML_Char *encoding,
594 int standalone),
595 ("(O&O&i)",
Guido van Rossum4ca94712007-07-23 17:42:32 +0000596 conv_string_to_unicode ,version, conv_string_to_unicode ,encoding,
Fred Drake85d835f2001-02-08 15:39:08 +0000597 standalone))
598
599static PyObject *
600conv_content_model(XML_Content * const model,
Fred Drakeb91a36b2002-06-27 19:40:48 +0000601 PyObject *(*conv_string)(const XML_Char *))
Fred Drake85d835f2001-02-08 15:39:08 +0000602{
603 PyObject *result = NULL;
604 PyObject *children = PyTuple_New(model->numchildren);
605 int i;
606
607 if (children != NULL) {
Tim Peters9544fc52001-07-28 09:36:36 +0000608 assert(model->numchildren < INT_MAX);
609 for (i = 0; i < (int)model->numchildren; ++i) {
Fred Drake85d835f2001-02-08 15:39:08 +0000610 PyObject *child = conv_content_model(&model->children[i],
611 conv_string);
612 if (child == NULL) {
613 Py_XDECREF(children);
614 return NULL;
615 }
616 PyTuple_SET_ITEM(children, i, child);
617 }
618 result = Py_BuildValue("(iiO&N)",
619 model->type, model->quant,
620 conv_string,model->name, children);
621 }
622 return result;
623}
624
Fred Drake06dd8cf2003-02-02 03:54:17 +0000625static void
626my_ElementDeclHandler(void *userData,
627 const XML_Char *name,
628 XML_Content *model)
Fred Drake85d835f2001-02-08 15:39:08 +0000629{
Fred Drake06dd8cf2003-02-02 03:54:17 +0000630 xmlparseobject *self = (xmlparseobject *)userData;
631 PyObject *args = NULL;
Fred Drake85d835f2001-02-08 15:39:08 +0000632
Fred Drake06dd8cf2003-02-02 03:54:17 +0000633 if (have_handler(self, ElementDecl)) {
634 PyObject *rv = NULL;
635 PyObject *modelobj, *nameobj;
636
637 if (flush_character_buffer(self) < 0)
638 goto finally;
Guido van Rossum4ca94712007-07-23 17:42:32 +0000639 modelobj = conv_content_model(model, (conv_string_to_unicode));
Fred Drake06dd8cf2003-02-02 03:54:17 +0000640 if (modelobj == NULL) {
641 flag_error(self);
642 goto finally;
643 }
644 nameobj = string_intern(self, name);
645 if (nameobj == NULL) {
646 Py_DECREF(modelobj);
647 flag_error(self);
648 goto finally;
649 }
Michael W. Hudson0bb84542004-08-03 11:31:31 +0000650 args = Py_BuildValue("NN", nameobj, modelobj);
Fred Drake06dd8cf2003-02-02 03:54:17 +0000651 if (args == NULL) {
652 Py_DECREF(modelobj);
653 flag_error(self);
654 goto finally;
655 }
656 self->in_callback = 1;
657 rv = call_with_frame(getcode(ElementDecl, "ElementDecl", __LINE__),
Fred Drake39689c52004-08-13 03:12:57 +0000658 self->handlers[ElementDecl], args, self);
Fred Drake06dd8cf2003-02-02 03:54:17 +0000659 self->in_callback = 0;
660 if (rv == NULL) {
661 flag_error(self);
662 goto finally;
663 }
664 Py_DECREF(rv);
665 }
666 finally:
667 Py_XDECREF(args);
668 XML_FreeContentModel(self->itself, model);
669 return;
670}
Fred Drake85d835f2001-02-08 15:39:08 +0000671
672VOID_HANDLER(AttlistDecl,
673 (void *userData,
674 const XML_Char *elname,
675 const XML_Char *attname,
676 const XML_Char *att_type,
677 const XML_Char *dflt,
678 int isrequired),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000679 ("(NNO&O&i)",
680 string_intern(self, elname), string_intern(self, attname),
Guido van Rossum4ca94712007-07-23 17:42:32 +0000681 conv_string_to_unicode ,att_type, conv_string_to_unicode ,dflt,
Fred Drake85d835f2001-02-08 15:39:08 +0000682 isrequired))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000683
Martin v. Löwisc847f402003-01-21 11:09:21 +0000684#if XML_COMBINED_VERSION >= 19504
Martin v. Löwis069dde22003-01-21 10:58:18 +0000685VOID_HANDLER(SkippedEntity,
686 (void *userData,
687 const XML_Char *entityName,
688 int is_parameter_entity),
689 ("Ni",
690 string_intern(self, entityName), is_parameter_entity))
Martin v. Löwisc847f402003-01-21 11:09:21 +0000691#endif
Martin v. Löwis069dde22003-01-21 10:58:18 +0000692
Fred Drake71b63ff2002-06-28 22:29:01 +0000693VOID_HANDLER(NotationDecl,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000694 (void *userData,
695 const XML_Char *notationName,
696 const XML_Char *base,
697 const XML_Char *systemId,
698 const XML_Char *publicId),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000699 ("(NNNN)",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000700 string_intern(self, notationName), string_intern(self, base),
701 string_intern(self, systemId), string_intern(self, publicId)))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000702
Fred Drake6f987622000-08-25 18:03:30 +0000703VOID_HANDLER(StartNamespaceDecl,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000704 (void *userData,
705 const XML_Char *prefix,
706 const XML_Char *uri),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000707 ("(NN)",
708 string_intern(self, prefix), string_intern(self, uri)))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000709
Fred Drake6f987622000-08-25 18:03:30 +0000710VOID_HANDLER(EndNamespaceDecl,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000711 (void *userData,
712 const XML_Char *prefix),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000713 ("(N)", string_intern(self, prefix)))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000714
Fred Drake6f987622000-08-25 18:03:30 +0000715VOID_HANDLER(Comment,
Fred Drakeb91a36b2002-06-27 19:40:48 +0000716 (void *userData, const XML_Char *data),
Guido van Rossum4ca94712007-07-23 17:42:32 +0000717 ("(O&)", conv_string_to_unicode ,data))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000718
Fred Drake6f987622000-08-25 18:03:30 +0000719VOID_HANDLER(StartCdataSection,
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000720 (void *userData),
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000721 ("()"))
Fred Drake71b63ff2002-06-28 22:29:01 +0000722
Fred Drake6f987622000-08-25 18:03:30 +0000723VOID_HANDLER(EndCdataSection,
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000724 (void *userData),
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000725 ("()"))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000726
Fred Drake6f987622000-08-25 18:03:30 +0000727VOID_HANDLER(Default,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000728 (void *userData, const XML_Char *s, int len),
729 ("(N)", (conv_string_len_to_unicode(s,len))))
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000730
Fred Drake6f987622000-08-25 18:03:30 +0000731VOID_HANDLER(DefaultHandlerExpand,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000732 (void *userData, const XML_Char *s, int len),
733 ("(N)", (conv_string_len_to_unicode(s,len))))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000734
Fred Drake71b63ff2002-06-28 22:29:01 +0000735INT_HANDLER(NotStandalone,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000736 (void *userData),
737 ("()"))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000738
Fred Drake6f987622000-08-25 18:03:30 +0000739RC_HANDLER(int, ExternalEntityRef,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000740 (XML_Parser parser,
741 const XML_Char *context,
742 const XML_Char *base,
743 const XML_Char *systemId,
744 const XML_Char *publicId),
745 int rc=0;,
Fred Drakeb91a36b2002-06-27 19:40:48 +0000746 ("(O&NNN)",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000747 conv_string_to_unicode ,context, string_intern(self, base),
748 string_intern(self, systemId), string_intern(self, publicId)),
749 rc = PyLong_AsLong(rv);, rc,
750 XML_GetUserData(parser))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000751
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000752/* XXX UnknownEncodingHandler */
753
Fred Drake85d835f2001-02-08 15:39:08 +0000754VOID_HANDLER(StartDoctypeDecl,
755 (void *userData, const XML_Char *doctypeName,
756 const XML_Char *sysid, const XML_Char *pubid,
757 int has_internal_subset),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000758 ("(NNNi)", string_intern(self, doctypeName),
759 string_intern(self, sysid), string_intern(self, pubid),
Fred Drake85d835f2001-02-08 15:39:08 +0000760 has_internal_subset))
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000761
762VOID_HANDLER(EndDoctypeDecl, (void *userData), ("()"))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000763
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000764/* ---------------------------------------------------------------- */
765
Fred Drake71b63ff2002-06-28 22:29:01 +0000766static PyObject *
767get_parse_result(xmlparseobject *self, int rv)
768{
769 if (PyErr_Occurred()) {
770 return NULL;
771 }
772 if (rv == 0) {
Martin v. Löwis069dde22003-01-21 10:58:18 +0000773 return set_error(self, XML_GetErrorCode(self->itself));
Fred Drake71b63ff2002-06-28 22:29:01 +0000774 }
Fred Drake2a3d7db2002-06-28 22:56:48 +0000775 if (flush_character_buffer(self) < 0) {
776 return NULL;
777 }
Christian Heimes217cfd12007-12-02 14:31:20 +0000778 return PyLong_FromLong(rv);
Fred Drake71b63ff2002-06-28 22:29:01 +0000779}
780
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000781PyDoc_STRVAR(xmlparse_Parse__doc__,
Thomas Wouters35317302000-07-22 16:34:15 +0000782"Parse(data[, isfinal])\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000783Parse XML data. `isfinal' should be true at end of input.");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000784
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200785#define MAX_CHUNK_SIZE (1 << 20)
786
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000787static PyObject *
Fred Drake0582df92000-07-12 04:49:00 +0000788xmlparse_Parse(xmlparseobject *self, PyObject *args)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000789{
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200790 PyObject *data;
Fred Drake0582df92000-07-12 04:49:00 +0000791 int isFinal = 0;
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200792 const char *s;
793 Py_ssize_t slen;
794 Py_buffer view;
795 int rc;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000796
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200797 if (!PyArg_ParseTuple(args, "O|i:Parse", &data, &isFinal))
Fred Drake0582df92000-07-12 04:49:00 +0000798 return NULL;
Fred Drake71b63ff2002-06-28 22:29:01 +0000799
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200800 if (PyUnicode_Check(data)) {
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200801 view.buf = NULL;
Serhiy Storchaka36b365c2013-02-04 18:28:01 +0200802 s = PyUnicode_AsUTF8AndSize(data, &slen);
803 if (s == NULL)
804 return NULL;
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200805 /* Explicitly set UTF-8 encoding. Return code ignored. */
806 (void)XML_SetEncoding(self->itself, "utf-8");
807 }
808 else {
809 if (PyObject_GetBuffer(data, &view, PyBUF_SIMPLE) < 0)
810 return NULL;
811 s = view.buf;
812 slen = view.len;
813 }
814
815 while (slen > MAX_CHUNK_SIZE) {
816 rc = XML_Parse(self->itself, s, MAX_CHUNK_SIZE, 0);
817 if (!rc)
818 goto done;
819 s += MAX_CHUNK_SIZE;
820 slen -= MAX_CHUNK_SIZE;
821 }
822 rc = XML_Parse(self->itself, s, slen, isFinal);
823
824done:
825 if (view.buf != NULL)
826 PyBuffer_Release(&view);
827 return get_parse_result(self, rc);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000828}
829
Fred Drakeca1f4262000-09-21 20:10:23 +0000830/* File reading copied from cPickle */
831
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000832#define BUF_SIZE 2048
833
Fred Drake0582df92000-07-12 04:49:00 +0000834static int
835readinst(char *buf, int buf_size, PyObject *meth)
836{
Victor Stinner95f1dfc2011-01-10 23:00:36 +0000837 PyObject *str;
838 Py_ssize_t len;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000839 char *ptr;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000840
Victor Stinner95f1dfc2011-01-10 23:00:36 +0000841 str = PyObject_CallFunction(meth, "n", buf_size);
Martin v. Löwis9171f022004-10-13 19:50:11 +0000842 if (str == NULL)
Victor Stinner95f1dfc2011-01-10 23:00:36 +0000843 goto error;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000844
Christian Heimes72b710a2008-05-26 13:28:38 +0000845 if (PyBytes_Check(str))
846 ptr = PyBytes_AS_STRING(str);
Christian Heimes9c4756e2008-05-26 13:22:05 +0000847 else if (PyByteArray_Check(str))
848 ptr = PyByteArray_AS_STRING(str);
Guido van Rossum98297ee2007-11-06 21:34:58 +0000849 else {
Fred Drake71b63ff2002-06-28 22:29:01 +0000850 PyErr_Format(PyExc_TypeError,
Guido van Rossum4ca94712007-07-23 17:42:32 +0000851 "read() did not return a bytes object (type=%.400s)",
Christian Heimes90aa7642007-12-19 02:45:37 +0000852 Py_TYPE(str)->tp_name);
Victor Stinner95f1dfc2011-01-10 23:00:36 +0000853 goto error;
Fred Drake0582df92000-07-12 04:49:00 +0000854 }
Christian Heimes90aa7642007-12-19 02:45:37 +0000855 len = Py_SIZE(str);
Fred Drake0582df92000-07-12 04:49:00 +0000856 if (len > buf_size) {
857 PyErr_Format(PyExc_ValueError,
858 "read() returned too much data: "
Victor Stinner9d6f9362011-01-04 22:00:04 +0000859 "%i bytes requested, %zd returned",
Fred Drake0582df92000-07-12 04:49:00 +0000860 buf_size, len);
Victor Stinner95f1dfc2011-01-10 23:00:36 +0000861 goto error;
Fred Drake0582df92000-07-12 04:49:00 +0000862 }
Guido van Rossum98297ee2007-11-06 21:34:58 +0000863 memcpy(buf, ptr, len);
Victor Stinner95f1dfc2011-01-10 23:00:36 +0000864 Py_DECREF(str);
865 /* len <= buf_size <= INT_MAX */
Victor Stinner0fcab4a2011-01-04 12:59:15 +0000866 return (int)len;
Victor Stinner95f1dfc2011-01-10 23:00:36 +0000867
868error:
869 Py_XDECREF(str);
870 return -1;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000871}
872
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000873PyDoc_STRVAR(xmlparse_ParseFile__doc__,
Thomas Wouters35317302000-07-22 16:34:15 +0000874"ParseFile(file)\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000875Parse XML data from file-like object.");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000876
877static PyObject *
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000878xmlparse_ParseFile(xmlparseobject *self, PyObject *f)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000879{
Fred Drake0582df92000-07-12 04:49:00 +0000880 int rv = 1;
Fred Drake0582df92000-07-12 04:49:00 +0000881 PyObject *readmethod = NULL;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +0200882 _Py_IDENTIFIER(read);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000883
Martin v. Löwis1ee1b6f2011-10-10 18:11:30 +0200884 readmethod = _PyObject_GetAttrId(f, &PyId_read);
Benjamin Peterson4e7f2852010-08-08 16:54:58 +0000885 if (readmethod == NULL) {
Benjamin Peterson4e7f2852010-08-08 16:54:58 +0000886 PyErr_SetString(PyExc_TypeError,
887 "argument must have 'read' attribute");
888 return NULL;
Fred Drake0582df92000-07-12 04:49:00 +0000889 }
890 for (;;) {
891 int bytes_read;
892 void *buf = XML_GetBuffer(self->itself, BUF_SIZE);
Fred Drake7b6caff2003-07-21 17:05:56 +0000893 if (buf == NULL) {
Fred Drakef239c6d2003-07-21 17:22:43 +0000894 Py_XDECREF(readmethod);
Fred Drake0582df92000-07-12 04:49:00 +0000895 return PyErr_NoMemory();
Fred Drake7b6caff2003-07-21 17:05:56 +0000896 }
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000897
Benjamin Peterson4e7f2852010-08-08 16:54:58 +0000898 bytes_read = readinst(buf, BUF_SIZE, readmethod);
899 if (bytes_read < 0) {
900 Py_DECREF(readmethod);
901 return NULL;
Fred Drake0582df92000-07-12 04:49:00 +0000902 }
903 rv = XML_ParseBuffer(self->itself, bytes_read, bytes_read == 0);
Fred Drake7b6caff2003-07-21 17:05:56 +0000904 if (PyErr_Occurred()) {
905 Py_XDECREF(readmethod);
Fred Drake0582df92000-07-12 04:49:00 +0000906 return NULL;
Fred Drake7b6caff2003-07-21 17:05:56 +0000907 }
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000908
Fred Drake0582df92000-07-12 04:49:00 +0000909 if (!rv || bytes_read == 0)
910 break;
911 }
Fred Drake7b6caff2003-07-21 17:05:56 +0000912 Py_XDECREF(readmethod);
Fred Drake71b63ff2002-06-28 22:29:01 +0000913 return get_parse_result(self, rv);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000914}
915
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000916PyDoc_STRVAR(xmlparse_SetBase__doc__,
Thomas Wouters35317302000-07-22 16:34:15 +0000917"SetBase(base_url)\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000918Set the base URL for the parser.");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000919
920static PyObject *
Fred Drake0582df92000-07-12 04:49:00 +0000921xmlparse_SetBase(xmlparseobject *self, PyObject *args)
922{
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000923 char *base;
924
Fred Drake0582df92000-07-12 04:49:00 +0000925 if (!PyArg_ParseTuple(args, "s:SetBase", &base))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000926 return NULL;
Fred Drake0582df92000-07-12 04:49:00 +0000927 if (!XML_SetBase(self->itself, base)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000928 return PyErr_NoMemory();
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000929 }
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000930 Py_INCREF(Py_None);
931 return Py_None;
932}
933
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000934PyDoc_STRVAR(xmlparse_GetBase__doc__,
Thomas Wouters35317302000-07-22 16:34:15 +0000935"GetBase() -> url\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000936Return base URL string for the parser.");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000937
938static PyObject *
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000939xmlparse_GetBase(xmlparseobject *self, PyObject *unused)
Fred Drake0582df92000-07-12 04:49:00 +0000940{
Fred Drake0582df92000-07-12 04:49:00 +0000941 return Py_BuildValue("z", XML_GetBase(self->itself));
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000942}
943
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000944PyDoc_STRVAR(xmlparse_GetInputContext__doc__,
Fred Drakebd6101c2001-02-14 18:29:45 +0000945"GetInputContext() -> string\n\
946Return the untranslated text of the input that caused the current event.\n\
947If the event was generated by a large amount of text (such as a start tag\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000948for an element with many attributes), not all of the text may be available.");
Fred Drakebd6101c2001-02-14 18:29:45 +0000949
950static PyObject *
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000951xmlparse_GetInputContext(xmlparseobject *self, PyObject *unused)
Fred Drakebd6101c2001-02-14 18:29:45 +0000952{
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000953 if (self->in_callback) {
954 int offset, size;
955 const char *buffer
956 = XML_GetInputContext(self->itself, &offset, &size);
Fred Drakebd6101c2001-02-14 18:29:45 +0000957
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000958 if (buffer != NULL)
Christian Heimes72b710a2008-05-26 13:28:38 +0000959 return PyBytes_FromStringAndSize(buffer + offset,
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000960 size - offset);
961 else
962 Py_RETURN_NONE;
Fred Drakebd6101c2001-02-14 18:29:45 +0000963 }
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000964 else
965 Py_RETURN_NONE;
Fred Drakebd6101c2001-02-14 18:29:45 +0000966}
Fred Drakebd6101c2001-02-14 18:29:45 +0000967
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000968PyDoc_STRVAR(xmlparse_ExternalEntityParserCreate__doc__,
Fred Drake2d4ac202001-01-03 15:36:25 +0000969"ExternalEntityParserCreate(context[, encoding])\n\
Tim Peters51dc9682000-09-24 22:12:45 +0000970Create a parser for parsing an external entity based on the\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000971information passed to the ExternalEntityRefHandler.");
Lars Gustäbel4a30a072000-09-24 20:50:52 +0000972
973static PyObject *
974xmlparse_ExternalEntityParserCreate(xmlparseobject *self, PyObject *args)
975{
976 char *context;
977 char *encoding = NULL;
978 xmlparseobject *new_parser;
979 int i;
980
Martin v. Löwisc57428d2001-09-19 09:55:09 +0000981 if (!PyArg_ParseTuple(args, "z|s:ExternalEntityParserCreate",
Fred Drakecde79132001-04-25 16:01:30 +0000982 &context, &encoding)) {
983 return NULL;
Lars Gustäbel4a30a072000-09-24 20:50:52 +0000984 }
985
Martin v. Löwis894258c2001-09-23 10:20:10 +0000986 new_parser = PyObject_GC_New(xmlparseobject, &Xmlparsetype);
Fred Drake85d835f2001-02-08 15:39:08 +0000987 if (new_parser == NULL)
988 return NULL;
Fred Drake2a3d7db2002-06-28 22:56:48 +0000989 new_parser->buffer_size = self->buffer_size;
990 new_parser->buffer_used = 0;
Victor Stinnerb4ba9862010-09-10 22:25:19 +0000991 new_parser->buffer = NULL;
Fred Drake85d835f2001-02-08 15:39:08 +0000992 new_parser->ordered_attributes = self->ordered_attributes;
993 new_parser->specified_attributes = self->specified_attributes;
Fred Drakebd6101c2001-02-14 18:29:45 +0000994 new_parser->in_callback = 0;
Martin v. Löwis069dde22003-01-21 10:58:18 +0000995 new_parser->ns_prefixes = self->ns_prefixes;
Lars Gustäbel4a30a072000-09-24 20:50:52 +0000996 new_parser->itself = XML_ExternalEntityParserCreate(self->itself, context,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000997 encoding);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000998 new_parser->handlers = 0;
Fred Drakeb91a36b2002-06-27 19:40:48 +0000999 new_parser->intern = self->intern;
1000 Py_XINCREF(new_parser->intern);
Martin v. Löwis894258c2001-09-23 10:20:10 +00001001 PyObject_GC_Track(new_parser);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001002
Victor Stinnerb4ba9862010-09-10 22:25:19 +00001003 if (self->buffer != NULL) {
1004 new_parser->buffer = malloc(new_parser->buffer_size);
1005 if (new_parser->buffer == NULL) {
1006 Py_DECREF(new_parser);
1007 return PyErr_NoMemory();
1008 }
1009 }
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001010 if (!new_parser->itself) {
Fred Drake85d835f2001-02-08 15:39:08 +00001011 Py_DECREF(new_parser);
1012 return PyErr_NoMemory();
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001013 }
1014
1015 XML_SetUserData(new_parser->itself, (void *)new_parser);
1016
1017 /* allocate and clear handlers first */
Fred Drake2a3d7db2002-06-28 22:56:48 +00001018 for (i = 0; handler_info[i].name != NULL; i++)
Fred Drake85d835f2001-02-08 15:39:08 +00001019 /* do nothing */;
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001020
Fred Drake2a3d7db2002-06-28 22:56:48 +00001021 new_parser->handlers = malloc(sizeof(PyObject *) * i);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001022 if (!new_parser->handlers) {
Fred Drake85d835f2001-02-08 15:39:08 +00001023 Py_DECREF(new_parser);
1024 return PyErr_NoMemory();
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001025 }
Martin v. Löwis5b68ce32001-10-21 08:53:52 +00001026 clear_handlers(new_parser, 1);
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001027
1028 /* then copy handlers from self */
1029 for (i = 0; handler_info[i].name != NULL; i++) {
Fred Drake71b63ff2002-06-28 22:29:01 +00001030 PyObject *handler = self->handlers[i];
1031 if (handler != NULL) {
1032 Py_INCREF(handler);
1033 new_parser->handlers[i] = handler;
1034 handler_info[i].setter(new_parser->itself,
Fred Drake85d835f2001-02-08 15:39:08 +00001035 handler_info[i].handler);
1036 }
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001037 }
Fred Drake71b63ff2002-06-28 22:29:01 +00001038 return (PyObject *)new_parser;
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001039}
1040
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001041PyDoc_STRVAR(xmlparse_SetParamEntityParsing__doc__,
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001042"SetParamEntityParsing(flag) -> success\n\
1043Controls parsing of parameter entities (including the external DTD\n\
1044subset). Possible flag values are XML_PARAM_ENTITY_PARSING_NEVER,\n\
1045XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE and\n\
1046XML_PARAM_ENTITY_PARSING_ALWAYS. Returns true if setting the flag\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001047was successful.");
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001048
1049static PyObject*
Fred Drakebd6101c2001-02-14 18:29:45 +00001050xmlparse_SetParamEntityParsing(xmlparseobject *p, PyObject* args)
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001051{
Fred Drake85d835f2001-02-08 15:39:08 +00001052 int flag;
1053 if (!PyArg_ParseTuple(args, "i", &flag))
1054 return NULL;
Fred Drakebd6101c2001-02-14 18:29:45 +00001055 flag = XML_SetParamEntityParsing(p->itself, flag);
Christian Heimes217cfd12007-12-02 14:31:20 +00001056 return PyLong_FromLong(flag);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001057}
1058
Martin v. Löwisc847f402003-01-21 11:09:21 +00001059
1060#if XML_COMBINED_VERSION >= 19505
Martin v. Löwis069dde22003-01-21 10:58:18 +00001061PyDoc_STRVAR(xmlparse_UseForeignDTD__doc__,
1062"UseForeignDTD([flag])\n\
1063Allows the application to provide an artificial external subset if one is\n\
1064not specified as part of the document instance. This readily allows the\n\
1065use of a 'default' document type controlled by the application, while still\n\
1066getting the advantage of providing document type information to the parser.\n\
1067'flag' defaults to True if not provided.");
1068
1069static PyObject *
1070xmlparse_UseForeignDTD(xmlparseobject *self, PyObject *args)
1071{
Antoine Pitrou6f430e42012-08-15 23:18:25 +02001072 int flag = 1;
Martin v. Löwis069dde22003-01-21 10:58:18 +00001073 enum XML_Error rc;
Georg Brandld37b9d72012-09-24 13:41:52 +02001074 if (!PyArg_ParseTuple(args, "|p:UseForeignDTD", &flag))
Martin v. Löwis069dde22003-01-21 10:58:18 +00001075 return NULL;
Antoine Pitrou6f430e42012-08-15 23:18:25 +02001076 rc = XML_UseForeignDTD(self->itself, flag ? XML_TRUE : XML_FALSE);
Martin v. Löwis069dde22003-01-21 10:58:18 +00001077 if (rc != XML_ERROR_NONE) {
1078 return set_error(self, rc);
1079 }
1080 Py_INCREF(Py_None);
1081 return Py_None;
1082}
Martin v. Löwisc847f402003-01-21 11:09:21 +00001083#endif
Martin v. Löwis069dde22003-01-21 10:58:18 +00001084
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001085static PyObject *xmlparse_dir(PyObject *self, PyObject* noargs);
1086
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001087static struct PyMethodDef xmlparse_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001088 {"Parse", (PyCFunction)xmlparse_Parse,
1089 METH_VARARGS, xmlparse_Parse__doc__},
Fred Drake0582df92000-07-12 04:49:00 +00001090 {"ParseFile", (PyCFunction)xmlparse_ParseFile,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001091 METH_O, xmlparse_ParseFile__doc__},
Fred Drake0582df92000-07-12 04:49:00 +00001092 {"SetBase", (PyCFunction)xmlparse_SetBase,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001093 METH_VARARGS, xmlparse_SetBase__doc__},
Fred Drake0582df92000-07-12 04:49:00 +00001094 {"GetBase", (PyCFunction)xmlparse_GetBase,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001095 METH_NOARGS, xmlparse_GetBase__doc__},
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001096 {"ExternalEntityParserCreate", (PyCFunction)xmlparse_ExternalEntityParserCreate,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001097 METH_VARARGS, xmlparse_ExternalEntityParserCreate__doc__},
Fred Drakebd6101c2001-02-14 18:29:45 +00001098 {"SetParamEntityParsing", (PyCFunction)xmlparse_SetParamEntityParsing,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001099 METH_VARARGS, xmlparse_SetParamEntityParsing__doc__},
Fred Drakebd6101c2001-02-14 18:29:45 +00001100 {"GetInputContext", (PyCFunction)xmlparse_GetInputContext,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001101 METH_NOARGS, xmlparse_GetInputContext__doc__},
Martin v. Löwisc847f402003-01-21 11:09:21 +00001102#if XML_COMBINED_VERSION >= 19505
Martin v. Löwis069dde22003-01-21 10:58:18 +00001103 {"UseForeignDTD", (PyCFunction)xmlparse_UseForeignDTD,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001104 METH_VARARGS, xmlparse_UseForeignDTD__doc__},
Martin v. Löwisc847f402003-01-21 11:09:21 +00001105#endif
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001106 {"__dir__", xmlparse_dir, METH_NOARGS},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001107 {NULL, NULL} /* sentinel */
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001108};
1109
1110/* ---------- */
1111
1112
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001113
Fred Drake71b63ff2002-06-28 22:29:01 +00001114/* pyexpat international encoding support.
1115 Make it as simple as possible.
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001116*/
1117
Fred Drake71b63ff2002-06-28 22:29:01 +00001118static int
1119PyUnknownEncodingHandler(void *encodingHandlerData,
1120 const XML_Char *name,
1121 XML_Encoding *info)
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001122{
Eli Bendersky6dc32b32013-05-25 05:25:48 -07001123 static unsigned char template_buffer[256] = {0};
1124 PyObject* u;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001125 int i;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001126 void *data;
Eli Bendersky6dc32b32013-05-25 05:25:48 -07001127 unsigned int kind;
Fred Drake71b63ff2002-06-28 22:29:01 +00001128
Eli Bendersky6dc32b32013-05-25 05:25:48 -07001129 if (template_buffer[1] == 0) {
1130 for (i = 0; i < 256; i++)
1131 template_buffer[i] = i;
Tim Peters63cb99e2001-02-17 18:12:50 +00001132 }
Eli Bendersky6dc32b32013-05-25 05:25:48 -07001133
1134 u = PyUnicode_Decode((char*) template_buffer, 256, name, "replace");
1135 if (u == NULL || PyUnicode_READY(u))
1136 return XML_STATUS_ERROR;
1137
1138 if (PyUnicode_GET_LENGTH(u) != 256) {
1139 Py_DECREF(u);
1140 PyErr_SetString(PyExc_ValueError,
1141 "multi-byte encodings are not supported");
1142 return XML_STATUS_ERROR;
1143 }
1144
1145 kind = PyUnicode_KIND(u);
1146 data = PyUnicode_DATA(u);
1147 for (i = 0; i < 256; i++) {
1148 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
1149 if (ch != Py_UNICODE_REPLACEMENT_CHARACTER)
1150 info->map[i] = ch;
1151 else
1152 info->map[i] = -1;
1153 }
1154
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001155 info->data = NULL;
1156 info->convert = NULL;
1157 info->release = NULL;
Eli Bendersky6dc32b32013-05-25 05:25:48 -07001158 Py_DECREF(u);
1159
1160 return XML_STATUS_OK;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001161}
1162
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001163
1164static PyObject *
Fred Drakeb91a36b2002-06-27 19:40:48 +00001165newxmlparseobject(char *encoding, char *namespace_separator, PyObject *intern)
Fred Drake0582df92000-07-12 04:49:00 +00001166{
1167 int i;
1168 xmlparseobject *self;
Fred Drake71b63ff2002-06-28 22:29:01 +00001169
Martin v. Löwis894258c2001-09-23 10:20:10 +00001170 self = PyObject_GC_New(xmlparseobject, &Xmlparsetype);
Fred Drake0582df92000-07-12 04:49:00 +00001171 if (self == NULL)
1172 return NULL;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001173
Fred Drake2a3d7db2002-06-28 22:56:48 +00001174 self->buffer = NULL;
1175 self->buffer_size = CHARACTER_DATA_BUFFER_SIZE;
1176 self->buffer_used = 0;
Fred Drake85d835f2001-02-08 15:39:08 +00001177 self->ordered_attributes = 0;
1178 self->specified_attributes = 0;
Fred Drakebd6101c2001-02-14 18:29:45 +00001179 self->in_callback = 0;
Martin v. Löwis069dde22003-01-21 10:58:18 +00001180 self->ns_prefixes = 0;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001181 self->handlers = NULL;
Fred Drakecde79132001-04-25 16:01:30 +00001182 if (namespace_separator != NULL) {
Fred Drake0582df92000-07-12 04:49:00 +00001183 self->itself = XML_ParserCreateNS(encoding, *namespace_separator);
1184 }
Fred Drake85d835f2001-02-08 15:39:08 +00001185 else {
Fred Drake0582df92000-07-12 04:49:00 +00001186 self->itself = XML_ParserCreate(encoding);
1187 }
Gregory P. Smith25227712012-03-14 18:10:37 -07001188#if ((XML_MAJOR_VERSION >= 2) && (XML_MINOR_VERSION >= 1)) || defined(XML_HAS_SET_HASH_SALT)
1189 /* This feature was added upstream in libexpat 2.1.0. Our expat copy
1190 * has a backport of this feature where we also define XML_HAS_SET_HASH_SALT
1191 * to indicate that we can still use it. */
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07001192 XML_SetHashSalt(self->itself,
1193 (unsigned long)_Py_HashSecret.prefix);
Gregory P. Smith25227712012-03-14 18:10:37 -07001194#endif
Fred Drakeb91a36b2002-06-27 19:40:48 +00001195 self->intern = intern;
1196 Py_XINCREF(self->intern);
Martin v. Löwis894258c2001-09-23 10:20:10 +00001197 PyObject_GC_Track(self);
Fred Drake0582df92000-07-12 04:49:00 +00001198 if (self->itself == NULL) {
Fred Drake71b63ff2002-06-28 22:29:01 +00001199 PyErr_SetString(PyExc_RuntimeError,
Fred Drake0582df92000-07-12 04:49:00 +00001200 "XML_ParserCreate failed");
1201 Py_DECREF(self);
1202 return NULL;
1203 }
1204 XML_SetUserData(self->itself, (void *)self);
Fred Drake7c75bf22002-07-01 14:02:31 +00001205 XML_SetUnknownEncodingHandler(self->itself,
1206 (XML_UnknownEncodingHandler) PyUnknownEncodingHandler, NULL);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001207
Fred Drake2a3d7db2002-06-28 22:56:48 +00001208 for (i = 0; handler_info[i].name != NULL; i++)
Fred Drake0582df92000-07-12 04:49:00 +00001209 /* do nothing */;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001210
Fred Drake7c75bf22002-07-01 14:02:31 +00001211 self->handlers = malloc(sizeof(PyObject *) * i);
1212 if (!self->handlers) {
Fred Drake71b63ff2002-06-28 22:29:01 +00001213 Py_DECREF(self);
1214 return PyErr_NoMemory();
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001215 }
Martin v. Löwis5b68ce32001-10-21 08:53:52 +00001216 clear_handlers(self, 1);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001217
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001218 return (PyObject*)self;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001219}
1220
1221
1222static void
Fred Drake0582df92000-07-12 04:49:00 +00001223xmlparse_dealloc(xmlparseobject *self)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001224{
Fred Drake0582df92000-07-12 04:49:00 +00001225 int i;
Martin v. Löwis894258c2001-09-23 10:20:10 +00001226 PyObject_GC_UnTrack(self);
Fred Drake85d835f2001-02-08 15:39:08 +00001227 if (self->itself != NULL)
Fred Drake0582df92000-07-12 04:49:00 +00001228 XML_ParserFree(self->itself);
1229 self->itself = NULL;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001230
Fred Drake85d835f2001-02-08 15:39:08 +00001231 if (self->handlers != NULL) {
Fred Drakecde79132001-04-25 16:01:30 +00001232 PyObject *temp;
Fred Drake85d835f2001-02-08 15:39:08 +00001233 for (i = 0; handler_info[i].name != NULL; i++) {
Fred Drakecde79132001-04-25 16:01:30 +00001234 temp = self->handlers[i];
1235 self->handlers[i] = NULL;
1236 Py_XDECREF(temp);
Fred Drake85d835f2001-02-08 15:39:08 +00001237 }
1238 free(self->handlers);
Fred Drake71b63ff2002-06-28 22:29:01 +00001239 self->handlers = NULL;
Fred Drake0582df92000-07-12 04:49:00 +00001240 }
Fred Drake2a3d7db2002-06-28 22:56:48 +00001241 if (self->buffer != NULL) {
1242 free(self->buffer);
1243 self->buffer = NULL;
1244 }
Fred Drakeb91a36b2002-06-27 19:40:48 +00001245 Py_XDECREF(self->intern);
Martin v. Löwis894258c2001-09-23 10:20:10 +00001246 PyObject_GC_Del(self);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001247}
1248
Fred Drake0582df92000-07-12 04:49:00 +00001249static int
Alexander Belopolskye239d232010-12-08 23:31:48 +00001250handlername2int(PyObject *name)
Fred Drake0582df92000-07-12 04:49:00 +00001251{
1252 int i;
Fred Drake71b63ff2002-06-28 22:29:01 +00001253 for (i = 0; handler_info[i].name != NULL; i++) {
Alexander Belopolskye239d232010-12-08 23:31:48 +00001254 if (PyUnicode_CompareWithASCIIString(
1255 name, handler_info[i].name) == 0) {
Fred Drake0582df92000-07-12 04:49:00 +00001256 return i;
1257 }
1258 }
1259 return -1;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001260}
1261
1262static PyObject *
Fred Drake71b63ff2002-06-28 22:29:01 +00001263get_pybool(int istrue)
1264{
1265 PyObject *result = istrue ? Py_True : Py_False;
1266 Py_INCREF(result);
1267 return result;
1268}
1269
1270static PyObject *
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001271xmlparse_getattro(xmlparseobject *self, PyObject *nameobj)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001272{
Victor Stinner9e5bd6c2011-10-01 01:05:40 +02001273 Py_UCS4 first_char;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001274 int handlernum = -1;
1275
Alexander Belopolskye239d232010-12-08 23:31:48 +00001276 if (!PyUnicode_Check(nameobj))
1277 goto generic;
Victor Stinner9e5bd6c2011-10-01 01:05:40 +02001278 if (PyUnicode_READY(nameobj))
1279 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001280
Alexander Belopolskye239d232010-12-08 23:31:48 +00001281 handlernum = handlername2int(nameobj);
Fred Drake71b63ff2002-06-28 22:29:01 +00001282
1283 if (handlernum != -1) {
1284 PyObject *result = self->handlers[handlernum];
1285 if (result == NULL)
1286 result = Py_None;
1287 Py_INCREF(result);
1288 return result;
1289 }
Alexander Belopolskye239d232010-12-08 23:31:48 +00001290
Victor Stinner9e5bd6c2011-10-01 01:05:40 +02001291 first_char = PyUnicode_READ_CHAR(nameobj, 0);
1292 if (first_char == 'E') {
Alexander Belopolskye239d232010-12-08 23:31:48 +00001293 if (PyUnicode_CompareWithASCIIString(nameobj, "ErrorCode") == 0)
Christian Heimes217cfd12007-12-02 14:31:20 +00001294 return PyLong_FromLong((long)
Fred Drake71b63ff2002-06-28 22:29:01 +00001295 XML_GetErrorCode(self->itself));
Alexander Belopolskye239d232010-12-08 23:31:48 +00001296 if (PyUnicode_CompareWithASCIIString(nameobj, "ErrorLineNumber") == 0)
Christian Heimes217cfd12007-12-02 14:31:20 +00001297 return PyLong_FromLong((long)
Fred Drake71b63ff2002-06-28 22:29:01 +00001298 XML_GetErrorLineNumber(self->itself));
Alexander Belopolskye239d232010-12-08 23:31:48 +00001299 if (PyUnicode_CompareWithASCIIString(nameobj, "ErrorColumnNumber") == 0)
Christian Heimes217cfd12007-12-02 14:31:20 +00001300 return PyLong_FromLong((long)
Fred Drake71b63ff2002-06-28 22:29:01 +00001301 XML_GetErrorColumnNumber(self->itself));
Alexander Belopolskye239d232010-12-08 23:31:48 +00001302 if (PyUnicode_CompareWithASCIIString(nameobj, "ErrorByteIndex") == 0)
Christian Heimes217cfd12007-12-02 14:31:20 +00001303 return PyLong_FromLong((long)
Fred Drake71b63ff2002-06-28 22:29:01 +00001304 XML_GetErrorByteIndex(self->itself));
1305 }
Victor Stinner9e5bd6c2011-10-01 01:05:40 +02001306 if (first_char == 'C') {
Alexander Belopolskye239d232010-12-08 23:31:48 +00001307 if (PyUnicode_CompareWithASCIIString(nameobj, "CurrentLineNumber") == 0)
Christian Heimes217cfd12007-12-02 14:31:20 +00001308 return PyLong_FromLong((long)
Dave Cole3203efb2004-08-26 00:37:31 +00001309 XML_GetCurrentLineNumber(self->itself));
Alexander Belopolskye239d232010-12-08 23:31:48 +00001310 if (PyUnicode_CompareWithASCIIString(nameobj, "CurrentColumnNumber") == 0)
Christian Heimes217cfd12007-12-02 14:31:20 +00001311 return PyLong_FromLong((long)
Dave Cole3203efb2004-08-26 00:37:31 +00001312 XML_GetCurrentColumnNumber(self->itself));
Alexander Belopolskye239d232010-12-08 23:31:48 +00001313 if (PyUnicode_CompareWithASCIIString(nameobj, "CurrentByteIndex") == 0)
Christian Heimes217cfd12007-12-02 14:31:20 +00001314 return PyLong_FromLong((long)
Dave Cole3203efb2004-08-26 00:37:31 +00001315 XML_GetCurrentByteIndex(self->itself));
1316 }
Victor Stinner9e5bd6c2011-10-01 01:05:40 +02001317 if (first_char == 'b') {
Alexander Belopolskye239d232010-12-08 23:31:48 +00001318 if (PyUnicode_CompareWithASCIIString(nameobj, "buffer_size") == 0)
Christian Heimes217cfd12007-12-02 14:31:20 +00001319 return PyLong_FromLong((long) self->buffer_size);
Alexander Belopolskye239d232010-12-08 23:31:48 +00001320 if (PyUnicode_CompareWithASCIIString(nameobj, "buffer_text") == 0)
Fred Drake2a3d7db2002-06-28 22:56:48 +00001321 return get_pybool(self->buffer != NULL);
Alexander Belopolskye239d232010-12-08 23:31:48 +00001322 if (PyUnicode_CompareWithASCIIString(nameobj, "buffer_used") == 0)
Christian Heimes217cfd12007-12-02 14:31:20 +00001323 return PyLong_FromLong((long) self->buffer_used);
Fred Drake2a3d7db2002-06-28 22:56:48 +00001324 }
Alexander Belopolskye239d232010-12-08 23:31:48 +00001325 if (PyUnicode_CompareWithASCIIString(nameobj, "namespace_prefixes") == 0)
Martin v. Löwis069dde22003-01-21 10:58:18 +00001326 return get_pybool(self->ns_prefixes);
Alexander Belopolskye239d232010-12-08 23:31:48 +00001327 if (PyUnicode_CompareWithASCIIString(nameobj, "ordered_attributes") == 0)
Fred Drake71b63ff2002-06-28 22:29:01 +00001328 return get_pybool(self->ordered_attributes);
Alexander Belopolskye239d232010-12-08 23:31:48 +00001329 if (PyUnicode_CompareWithASCIIString(nameobj, "specified_attributes") == 0)
Fred Drake71b63ff2002-06-28 22:29:01 +00001330 return get_pybool((long) self->specified_attributes);
Alexander Belopolskye239d232010-12-08 23:31:48 +00001331 if (PyUnicode_CompareWithASCIIString(nameobj, "intern") == 0) {
Fred Drakeb91a36b2002-06-27 19:40:48 +00001332 if (self->intern == NULL) {
1333 Py_INCREF(Py_None);
1334 return Py_None;
1335 }
1336 else {
1337 Py_INCREF(self->intern);
1338 return self->intern;
1339 }
1340 }
Alexander Belopolskye239d232010-12-08 23:31:48 +00001341 generic:
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001342 return PyObject_GenericGetAttr((PyObject*)self, nameobj);
Neal Norwitz8dfc4a92007-08-11 06:39:53 +00001343}
1344
1345static PyObject *
1346xmlparse_dir(PyObject *self, PyObject* noargs)
1347{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001348#define APPEND(list, str) \
1349 do { \
1350 PyObject *o = PyUnicode_FromString(str); \
1351 if (o != NULL) \
1352 PyList_Append(list, o); \
1353 Py_XDECREF(o); \
Martin v. Löwis069dde22003-01-21 10:58:18 +00001354 } while (0)
Neal Norwitzfa56e2d2003-01-19 15:40:09 +00001355
Neal Norwitz8dfc4a92007-08-11 06:39:53 +00001356 int i;
1357 PyObject *rc = PyList_New(0);
1358 if (!rc)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001359 return NULL;
Neal Norwitz8dfc4a92007-08-11 06:39:53 +00001360 for (i = 0; handler_info[i].name != NULL; i++) {
1361 PyObject *o = get_handler_name(&handler_info[i]);
1362 if (o != NULL)
1363 PyList_Append(rc, o);
1364 Py_XDECREF(o);
1365 }
1366 APPEND(rc, "ErrorCode");
1367 APPEND(rc, "ErrorLineNumber");
1368 APPEND(rc, "ErrorColumnNumber");
1369 APPEND(rc, "ErrorByteIndex");
1370 APPEND(rc, "CurrentLineNumber");
1371 APPEND(rc, "CurrentColumnNumber");
1372 APPEND(rc, "CurrentByteIndex");
1373 APPEND(rc, "buffer_size");
1374 APPEND(rc, "buffer_text");
1375 APPEND(rc, "buffer_used");
1376 APPEND(rc, "namespace_prefixes");
1377 APPEND(rc, "ordered_attributes");
1378 APPEND(rc, "specified_attributes");
1379 APPEND(rc, "intern");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001380
Neal Norwitzfa56e2d2003-01-19 15:40:09 +00001381#undef APPEND
Neal Norwitz8dfc4a92007-08-11 06:39:53 +00001382
1383 if (PyErr_Occurred()) {
1384 Py_DECREF(rc);
1385 rc = NULL;
Fred Drake0582df92000-07-12 04:49:00 +00001386 }
Neal Norwitz8dfc4a92007-08-11 06:39:53 +00001387
1388 return rc;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001389}
1390
Fred Drake6f987622000-08-25 18:03:30 +00001391static int
Alexander Belopolskye239d232010-12-08 23:31:48 +00001392sethandler(xmlparseobject *self, PyObject *name, PyObject* v)
Fred Drake0582df92000-07-12 04:49:00 +00001393{
1394 int handlernum = handlername2int(name);
Fred Drake71b63ff2002-06-28 22:29:01 +00001395 if (handlernum >= 0) {
1396 xmlhandler c_handler = NULL;
1397 PyObject *temp = self->handlers[handlernum];
1398
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001399 if (v == Py_None) {
1400 /* If this is the character data handler, and a character
1401 data handler is already active, we need to be more
1402 careful. What we can safely do is replace the existing
1403 character data handler callback function with a no-op
1404 function that will refuse to call Python. The downside
1405 is that this doesn't completely remove the character
1406 data handler from the C layer if there's any callback
1407 active, so Expat does a little more work than it
1408 otherwise would, but that's really an odd case. A more
1409 elaborate system of handlers and state could remove the
1410 C handler more effectively. */
1411 if (handlernum == CharacterData && self->in_callback)
1412 c_handler = noop_character_data_handler;
Fred Drake71b63ff2002-06-28 22:29:01 +00001413 v = NULL;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001414 }
Fred Drake71b63ff2002-06-28 22:29:01 +00001415 else if (v != NULL) {
1416 Py_INCREF(v);
1417 c_handler = handler_info[handlernum].handler;
1418 }
Fred Drake0582df92000-07-12 04:49:00 +00001419 self->handlers[handlernum] = v;
Fred Drake71b63ff2002-06-28 22:29:01 +00001420 Py_XDECREF(temp);
1421 handler_info[handlernum].setter(self->itself, c_handler);
Fred Drake0582df92000-07-12 04:49:00 +00001422 return 1;
1423 }
1424 return 0;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001425}
1426
1427static int
Alexander Belopolskye239d232010-12-08 23:31:48 +00001428xmlparse_setattro(xmlparseobject *self, PyObject *name, PyObject *v)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001429{
Fred Drake6f987622000-08-25 18:03:30 +00001430 /* Set attribute 'name' to value 'v'. v==NULL means delete */
Fred Drake85d835f2001-02-08 15:39:08 +00001431 if (v == NULL) {
Fred Drake6f987622000-08-25 18:03:30 +00001432 PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
1433 return -1;
1434 }
Alexander Belopolskye239d232010-12-08 23:31:48 +00001435 assert(PyUnicode_Check(name));
1436 if (PyUnicode_CompareWithASCIIString(name, "buffer_text") == 0) {
Antoine Pitrou6f430e42012-08-15 23:18:25 +02001437 int b = PyObject_IsTrue(v);
1438 if (b < 0)
1439 return -1;
1440 if (b) {
Fred Drake2a3d7db2002-06-28 22:56:48 +00001441 if (self->buffer == NULL) {
1442 self->buffer = malloc(self->buffer_size);
1443 if (self->buffer == NULL) {
1444 PyErr_NoMemory();
1445 return -1;
1446 }
1447 self->buffer_used = 0;
1448 }
1449 }
1450 else if (self->buffer != NULL) {
1451 if (flush_character_buffer(self) < 0)
1452 return -1;
1453 free(self->buffer);
1454 self->buffer = NULL;
1455 }
1456 return 0;
1457 }
Alexander Belopolskye239d232010-12-08 23:31:48 +00001458 if (PyUnicode_CompareWithASCIIString(name, "namespace_prefixes") == 0) {
Antoine Pitrou6f430e42012-08-15 23:18:25 +02001459 int b = PyObject_IsTrue(v);
1460 if (b < 0)
1461 return -1;
1462 self->ns_prefixes = b;
Martin v. Löwis069dde22003-01-21 10:58:18 +00001463 XML_SetReturnNSTriplet(self->itself, self->ns_prefixes);
1464 return 0;
1465 }
Alexander Belopolskye239d232010-12-08 23:31:48 +00001466 if (PyUnicode_CompareWithASCIIString(name, "ordered_attributes") == 0) {
Antoine Pitrou6f430e42012-08-15 23:18:25 +02001467 int b = PyObject_IsTrue(v);
1468 if (b < 0)
1469 return -1;
1470 self->ordered_attributes = b;
Fred Drake85d835f2001-02-08 15:39:08 +00001471 return 0;
1472 }
Alexander Belopolskye239d232010-12-08 23:31:48 +00001473 if (PyUnicode_CompareWithASCIIString(name, "specified_attributes") == 0) {
Antoine Pitrou6f430e42012-08-15 23:18:25 +02001474 int b = PyObject_IsTrue(v);
1475 if (b < 0)
1476 return -1;
1477 self->specified_attributes = b;
Fred Drake6f987622000-08-25 18:03:30 +00001478 return 0;
1479 }
Christian Heimes2380ac72008-01-09 00:17:24 +00001480
Alexander Belopolskye239d232010-12-08 23:31:48 +00001481 if (PyUnicode_CompareWithASCIIString(name, "buffer_size") == 0) {
Christian Heimes2380ac72008-01-09 00:17:24 +00001482 long new_buffer_size;
1483 if (!PyLong_Check(v)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001484 PyErr_SetString(PyExc_TypeError, "buffer_size must be an integer");
1485 return -1;
Christian Heimes2380ac72008-01-09 00:17:24 +00001486 }
1487
1488 new_buffer_size=PyLong_AS_LONG(v);
1489 /* trivial case -- no change */
1490 if (new_buffer_size == self->buffer_size) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001491 return 0;
Christian Heimes2380ac72008-01-09 00:17:24 +00001492 }
1493
1494 if (new_buffer_size <= 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001495 PyErr_SetString(PyExc_ValueError, "buffer_size must be greater than zero");
1496 return -1;
Christian Heimes2380ac72008-01-09 00:17:24 +00001497 }
1498
1499 /* check maximum */
1500 if (new_buffer_size > INT_MAX) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001501 char errmsg[100];
1502 sprintf(errmsg, "buffer_size must not be greater than %i", INT_MAX);
1503 PyErr_SetString(PyExc_ValueError, errmsg);
1504 return -1;
Christian Heimes2380ac72008-01-09 00:17:24 +00001505 }
1506
1507 if (self->buffer != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001508 /* there is already a buffer */
1509 if (self->buffer_used != 0) {
Christian Heimes09994a92013-07-20 22:41:58 +02001510 if (flush_character_buffer(self) < 0) {
1511 return -1;
1512 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001513 }
1514 /* free existing buffer */
1515 free(self->buffer);
Christian Heimes2380ac72008-01-09 00:17:24 +00001516 }
1517 self->buffer = malloc(new_buffer_size);
1518 if (self->buffer == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001519 PyErr_NoMemory();
1520 return -1;
1521 }
Christian Heimes2380ac72008-01-09 00:17:24 +00001522 self->buffer_size = new_buffer_size;
1523 return 0;
1524 }
1525
Alexander Belopolskye239d232010-12-08 23:31:48 +00001526 if (PyUnicode_CompareWithASCIIString(name, "CharacterDataHandler") == 0) {
Fred Drake2a3d7db2002-06-28 22:56:48 +00001527 /* If we're changing the character data handler, flush all
1528 * cached data with the old handler. Not sure there's a
1529 * "right" thing to do, though, but this probably won't
1530 * happen.
1531 */
1532 if (flush_character_buffer(self) < 0)
1533 return -1;
1534 }
Fred Drake6f987622000-08-25 18:03:30 +00001535 if (sethandler(self, name, v)) {
1536 return 0;
1537 }
Alexander Belopolskye239d232010-12-08 23:31:48 +00001538 PyErr_SetObject(PyExc_AttributeError, name);
Fred Drake6f987622000-08-25 18:03:30 +00001539 return -1;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001540}
1541
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001542static int
1543xmlparse_traverse(xmlparseobject *op, visitproc visit, void *arg)
1544{
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001545 int i;
1546 for (i = 0; handler_info[i].name != NULL; i++)
1547 Py_VISIT(op->handlers[i]);
Fred Drakecde79132001-04-25 16:01:30 +00001548 return 0;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001549}
1550
1551static int
1552xmlparse_clear(xmlparseobject *op)
1553{
Martin v. Löwis5b68ce32001-10-21 08:53:52 +00001554 clear_handlers(op, 0);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001555 Py_CLEAR(op->intern);
Fred Drakecde79132001-04-25 16:01:30 +00001556 return 0;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001557}
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001558
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001559PyDoc_STRVAR(Xmlparsetype__doc__, "XML parser");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001560
1561static PyTypeObject Xmlparsetype = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001562 PyVarObject_HEAD_INIT(NULL, 0)
1563 "pyexpat.xmlparser", /*tp_name*/
Antoine Pitrou23683ef2011-01-04 00:00:31 +00001564 sizeof(xmlparseobject), /*tp_basicsize*/
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001565 0, /*tp_itemsize*/
1566 /* methods */
1567 (destructor)xmlparse_dealloc, /*tp_dealloc*/
1568 (printfunc)0, /*tp_print*/
1569 0, /*tp_getattr*/
Alexander Belopolskye239d232010-12-08 23:31:48 +00001570 0, /*tp_setattr*/
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001571 0, /*tp_reserved*/
1572 (reprfunc)0, /*tp_repr*/
1573 0, /*tp_as_number*/
1574 0, /*tp_as_sequence*/
1575 0, /*tp_as_mapping*/
1576 (hashfunc)0, /*tp_hash*/
1577 (ternaryfunc)0, /*tp_call*/
1578 (reprfunc)0, /*tp_str*/
1579 (getattrofunc)xmlparse_getattro, /* tp_getattro */
Alexander Belopolskye239d232010-12-08 23:31:48 +00001580 (setattrofunc)xmlparse_setattro, /* tp_setattro */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001581 0, /* tp_as_buffer */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001582 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /*tp_flags*/
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001583 Xmlparsetype__doc__, /* tp_doc - Documentation string */
1584 (traverseproc)xmlparse_traverse, /* tp_traverse */
1585 (inquiry)xmlparse_clear, /* tp_clear */
1586 0, /* tp_richcompare */
1587 0, /* tp_weaklistoffset */
1588 0, /* tp_iter */
1589 0, /* tp_iternext */
1590 xmlparse_methods, /* tp_methods */
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001591};
1592
1593/* End of code for xmlparser objects */
1594/* -------------------------------------------------------- */
1595
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001596PyDoc_STRVAR(pyexpat_ParserCreate__doc__,
Fred Drake0582df92000-07-12 04:49:00 +00001597"ParserCreate([encoding[, namespace_separator]]) -> parser\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001598Return a new XML parser object.");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001599
1600static PyObject *
Fred Drake0582df92000-07-12 04:49:00 +00001601pyexpat_ParserCreate(PyObject *notused, PyObject *args, PyObject *kw)
1602{
Fred Drakecde79132001-04-25 16:01:30 +00001603 char *encoding = NULL;
1604 char *namespace_separator = NULL;
Fred Drakeb91a36b2002-06-27 19:40:48 +00001605 PyObject *intern = NULL;
1606 PyObject *result;
1607 int intern_decref = 0;
Martin v. Löwis15e62742006-02-27 16:46:16 +00001608 static char *kwlist[] = {"encoding", "namespace_separator",
Jeremy Hyltonaf68c872005-12-10 18:50:16 +00001609 "intern", NULL};
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001610
Fred Drakeb91a36b2002-06-27 19:40:48 +00001611 if (!PyArg_ParseTupleAndKeywords(args, kw, "|zzO:ParserCreate", kwlist,
1612 &encoding, &namespace_separator, &intern))
Fred Drakecde79132001-04-25 16:01:30 +00001613 return NULL;
1614 if (namespace_separator != NULL
1615 && strlen(namespace_separator) > 1) {
1616 PyErr_SetString(PyExc_ValueError,
1617 "namespace_separator must be at most one"
1618 " character, omitted, or None");
1619 return NULL;
1620 }
Fred Drakeb91a36b2002-06-27 19:40:48 +00001621 /* Explicitly passing None means no interning is desired.
1622 Not passing anything means that a new dictionary is used. */
1623 if (intern == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001624 intern = NULL;
Fred Drakeb91a36b2002-06-27 19:40:48 +00001625 else if (intern == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001626 intern = PyDict_New();
1627 if (!intern)
1628 return NULL;
1629 intern_decref = 1;
Fred Drake71b63ff2002-06-28 22:29:01 +00001630 }
Fred Drakeb91a36b2002-06-27 19:40:48 +00001631 else if (!PyDict_Check(intern)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001632 PyErr_SetString(PyExc_TypeError, "intern must be a dictionary");
1633 return NULL;
Fred Drakeb91a36b2002-06-27 19:40:48 +00001634 }
1635
1636 result = newxmlparseobject(encoding, namespace_separator, intern);
1637 if (intern_decref) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001638 Py_DECREF(intern);
Fred Drakeb91a36b2002-06-27 19:40:48 +00001639 }
1640 return result;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001641}
1642
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001643PyDoc_STRVAR(pyexpat_ErrorString__doc__,
Fred Drake0582df92000-07-12 04:49:00 +00001644"ErrorString(errno) -> string\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001645Returns string error for given number.");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001646
1647static PyObject *
Fred Drake0582df92000-07-12 04:49:00 +00001648pyexpat_ErrorString(PyObject *self, PyObject *args)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001649{
Fred Drake0582df92000-07-12 04:49:00 +00001650 long code = 0;
1651
1652 if (!PyArg_ParseTuple(args, "l:ErrorString", &code))
1653 return NULL;
1654 return Py_BuildValue("z", XML_ErrorString((int)code));
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001655}
1656
1657/* List of methods defined in the module */
1658
1659static struct PyMethodDef pyexpat_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001660 {"ParserCreate", (PyCFunction)pyexpat_ParserCreate,
Fred Drake0582df92000-07-12 04:49:00 +00001661 METH_VARARGS|METH_KEYWORDS, pyexpat_ParserCreate__doc__},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001662 {"ErrorString", (PyCFunction)pyexpat_ErrorString,
1663 METH_VARARGS, pyexpat_ErrorString__doc__},
Fred Drake71b63ff2002-06-28 22:29:01 +00001664
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001665 {NULL, (PyCFunction)NULL, 0, NULL} /* sentinel */
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001666};
1667
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +00001668/* Module docstring */
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001669
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001670PyDoc_STRVAR(pyexpat_module_documentation,
1671"Python wrapper for Expat parser.");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001672
Fred Drakecde79132001-04-25 16:01:30 +00001673/* Initialization function for the module */
1674
1675#ifndef MODULE_NAME
1676#define MODULE_NAME "pyexpat"
1677#endif
1678
1679#ifndef MODULE_INITFUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00001680#define MODULE_INITFUNC PyInit_pyexpat
Fred Drakecde79132001-04-25 16:01:30 +00001681#endif
1682
Martin v. Löwis069dde22003-01-21 10:58:18 +00001683#ifndef PyMODINIT_FUNC
1684# ifdef MS_WINDOWS
1685# define PyMODINIT_FUNC __declspec(dllexport) void
1686# else
1687# define PyMODINIT_FUNC void
1688# endif
1689#endif
1690
Mark Hammond8235ea12002-07-19 06:55:41 +00001691PyMODINIT_FUNC MODULE_INITFUNC(void); /* avoid compiler warnings */
Fred Drakecde79132001-04-25 16:01:30 +00001692
Martin v. Löwis1a214512008-06-11 05:26:20 +00001693static struct PyModuleDef pyexpatmodule = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001694 PyModuleDef_HEAD_INIT,
1695 MODULE_NAME,
1696 pyexpat_module_documentation,
1697 -1,
1698 pyexpat_methods,
1699 NULL,
1700 NULL,
1701 NULL,
1702 NULL
Martin v. Löwis1a214512008-06-11 05:26:20 +00001703};
1704
Martin v. Löwis069dde22003-01-21 10:58:18 +00001705PyMODINIT_FUNC
1706MODULE_INITFUNC(void)
Fred Drake0582df92000-07-12 04:49:00 +00001707{
1708 PyObject *m, *d;
Neal Norwitz392c5be2007-08-25 17:20:32 +00001709 PyObject *errmod_name = PyUnicode_FromString(MODULE_NAME ".errors");
Fred Drake85d835f2001-02-08 15:39:08 +00001710 PyObject *errors_module;
1711 PyObject *modelmod_name;
1712 PyObject *model_module;
Fred Drake0582df92000-07-12 04:49:00 +00001713 PyObject *sys_modules;
Georg Brandlb4dac712010-10-15 14:46:48 +00001714 PyObject *tmpnum, *tmpstr;
1715 PyObject *codes_dict;
1716 PyObject *rev_codes_dict;
1717 int res;
Fredrik Lundhd7a42882005-12-13 20:43:04 +00001718 static struct PyExpat_CAPI capi;
Georg Brandlb4dac712010-10-15 14:46:48 +00001719 PyObject *capi_object;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001720
Fred Drake6f987622000-08-25 18:03:30 +00001721 if (errmod_name == NULL)
Martin v. Löwis1a214512008-06-11 05:26:20 +00001722 return NULL;
Neal Norwitz392c5be2007-08-25 17:20:32 +00001723 modelmod_name = PyUnicode_FromString(MODULE_NAME ".model");
Fred Drake85d835f2001-02-08 15:39:08 +00001724 if (modelmod_name == NULL)
Martin v. Löwis1a214512008-06-11 05:26:20 +00001725 return NULL;
Fred Drake6f987622000-08-25 18:03:30 +00001726
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001727 if (PyType_Ready(&Xmlparsetype) < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001728 return NULL;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001729
Fred Drake0582df92000-07-12 04:49:00 +00001730 /* Create the module and add the functions */
Martin v. Löwis1a214512008-06-11 05:26:20 +00001731 m = PyModule_Create(&pyexpatmodule);
Neal Norwitz1ac754f2006-01-19 06:09:39 +00001732 if (m == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001733 return NULL;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001734
Fred Drake0582df92000-07-12 04:49:00 +00001735 /* Add some symbolic constants to the module */
Fred Drakebd6101c2001-02-14 18:29:45 +00001736 if (ErrorObject == NULL) {
1737 ErrorObject = PyErr_NewException("xml.parsers.expat.ExpatError",
Fred Drake93adb692000-09-23 04:55:48 +00001738 NULL, NULL);
Fred Drakebd6101c2001-02-14 18:29:45 +00001739 if (ErrorObject == NULL)
Martin v. Löwis1a214512008-06-11 05:26:20 +00001740 return NULL;
Fred Drakebd6101c2001-02-14 18:29:45 +00001741 }
1742 Py_INCREF(ErrorObject);
Fred Drake93adb692000-09-23 04:55:48 +00001743 PyModule_AddObject(m, "error", ErrorObject);
Fred Drakebd6101c2001-02-14 18:29:45 +00001744 Py_INCREF(ErrorObject);
1745 PyModule_AddObject(m, "ExpatError", ErrorObject);
Fred Drake4ba298c2000-10-29 04:57:53 +00001746 Py_INCREF(&Xmlparsetype);
1747 PyModule_AddObject(m, "XMLParserType", (PyObject *) &Xmlparsetype);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001748
Fred Drake738293d2000-12-21 17:25:07 +00001749 PyModule_AddStringConstant(m, "EXPAT_VERSION",
1750 (char *) XML_ExpatVersion());
Fred Drake85d835f2001-02-08 15:39:08 +00001751 {
1752 XML_Expat_Version info = XML_ExpatVersionInfo();
1753 PyModule_AddObject(m, "version_info",
1754 Py_BuildValue("(iii)", info.major,
1755 info.minor, info.micro));
1756 }
Fred Drake0582df92000-07-12 04:49:00 +00001757 /* XXX When Expat supports some way of figuring out how it was
Fred Drake71b63ff2002-06-28 22:29:01 +00001758 compiled, this should check and set native_encoding
1759 appropriately.
Fred Drake0582df92000-07-12 04:49:00 +00001760 */
Fred Drake93adb692000-09-23 04:55:48 +00001761 PyModule_AddStringConstant(m, "native_encoding", "UTF-8");
Fred Drakec23b5232000-08-24 21:57:43 +00001762
Fred Drake85d835f2001-02-08 15:39:08 +00001763 sys_modules = PySys_GetObject("modules");
Fred Drake93adb692000-09-23 04:55:48 +00001764 d = PyModule_GetDict(m);
Fred Drake6f987622000-08-25 18:03:30 +00001765 errors_module = PyDict_GetItem(d, errmod_name);
1766 if (errors_module == NULL) {
Fred Drakecde79132001-04-25 16:01:30 +00001767 errors_module = PyModule_New(MODULE_NAME ".errors");
Fred Drake6f987622000-08-25 18:03:30 +00001768 if (errors_module != NULL) {
Fred Drake6f987622000-08-25 18:03:30 +00001769 PyDict_SetItem(sys_modules, errmod_name, errors_module);
Fred Drake93adb692000-09-23 04:55:48 +00001770 /* gives away the reference to errors_module */
1771 PyModule_AddObject(m, "errors", errors_module);
Fred Drakec23b5232000-08-24 21:57:43 +00001772 }
1773 }
Fred Drake6f987622000-08-25 18:03:30 +00001774 Py_DECREF(errmod_name);
Fred Drake85d835f2001-02-08 15:39:08 +00001775 model_module = PyDict_GetItem(d, modelmod_name);
1776 if (model_module == NULL) {
Fred Drakecde79132001-04-25 16:01:30 +00001777 model_module = PyModule_New(MODULE_NAME ".model");
Fred Drake85d835f2001-02-08 15:39:08 +00001778 if (model_module != NULL) {
1779 PyDict_SetItem(sys_modules, modelmod_name, model_module);
1780 /* gives away the reference to model_module */
1781 PyModule_AddObject(m, "model", model_module);
1782 }
1783 }
1784 Py_DECREF(modelmod_name);
1785 if (errors_module == NULL || model_module == NULL)
1786 /* Don't core dump later! */
Martin v. Löwis1a214512008-06-11 05:26:20 +00001787 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001788
Martin v. Löwisc847f402003-01-21 11:09:21 +00001789#if XML_COMBINED_VERSION > 19505
Martin v. Löwis069dde22003-01-21 10:58:18 +00001790 {
1791 const XML_Feature *features = XML_GetFeatureList();
1792 PyObject *list = PyList_New(0);
1793 if (list == NULL)
1794 /* just ignore it */
1795 PyErr_Clear();
1796 else {
1797 int i = 0;
1798 for (; features[i].feature != XML_FEATURE_END; ++i) {
1799 int ok;
1800 PyObject *item = Py_BuildValue("si", features[i].name,
1801 features[i].value);
1802 if (item == NULL) {
1803 Py_DECREF(list);
1804 list = NULL;
1805 break;
1806 }
1807 ok = PyList_Append(list, item);
1808 Py_DECREF(item);
1809 if (ok < 0) {
1810 PyErr_Clear();
1811 break;
1812 }
1813 }
1814 if (list != NULL)
1815 PyModule_AddObject(m, "features", list);
1816 }
1817 }
Martin v. Löwisc847f402003-01-21 11:09:21 +00001818#endif
Fred Drake6f987622000-08-25 18:03:30 +00001819
Georg Brandlb4dac712010-10-15 14:46:48 +00001820 codes_dict = PyDict_New();
1821 rev_codes_dict = PyDict_New();
1822 if (codes_dict == NULL || rev_codes_dict == NULL) {
1823 Py_XDECREF(codes_dict);
1824 Py_XDECREF(rev_codes_dict);
1825 return NULL;
1826 }
Victor Stinner0fcab4a2011-01-04 12:59:15 +00001827
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001828#define MYCONST(name) \
Georg Brandlb4dac712010-10-15 14:46:48 +00001829 if (PyModule_AddStringConstant(errors_module, #name, \
1830 (char *)XML_ErrorString(name)) < 0) \
1831 return NULL; \
1832 tmpnum = PyLong_FromLong(name); \
1833 if (tmpnum == NULL) return NULL; \
1834 res = PyDict_SetItemString(codes_dict, \
1835 XML_ErrorString(name), tmpnum); \
1836 if (res < 0) return NULL; \
1837 tmpstr = PyUnicode_FromString(XML_ErrorString(name)); \
1838 if (tmpstr == NULL) return NULL; \
1839 res = PyDict_SetItem(rev_codes_dict, tmpnum, tmpstr); \
1840 Py_DECREF(tmpstr); \
1841 Py_DECREF(tmpnum); \
1842 if (res < 0) return NULL; \
Fred Drake7bd9f412000-07-04 23:51:31 +00001843
Fred Drake0582df92000-07-12 04:49:00 +00001844 MYCONST(XML_ERROR_NO_MEMORY);
1845 MYCONST(XML_ERROR_SYNTAX);
1846 MYCONST(XML_ERROR_NO_ELEMENTS);
1847 MYCONST(XML_ERROR_INVALID_TOKEN);
1848 MYCONST(XML_ERROR_UNCLOSED_TOKEN);
1849 MYCONST(XML_ERROR_PARTIAL_CHAR);
1850 MYCONST(XML_ERROR_TAG_MISMATCH);
1851 MYCONST(XML_ERROR_DUPLICATE_ATTRIBUTE);
1852 MYCONST(XML_ERROR_JUNK_AFTER_DOC_ELEMENT);
1853 MYCONST(XML_ERROR_PARAM_ENTITY_REF);
1854 MYCONST(XML_ERROR_UNDEFINED_ENTITY);
1855 MYCONST(XML_ERROR_RECURSIVE_ENTITY_REF);
1856 MYCONST(XML_ERROR_ASYNC_ENTITY);
1857 MYCONST(XML_ERROR_BAD_CHAR_REF);
1858 MYCONST(XML_ERROR_BINARY_ENTITY_REF);
1859 MYCONST(XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF);
1860 MYCONST(XML_ERROR_MISPLACED_XML_PI);
1861 MYCONST(XML_ERROR_UNKNOWN_ENCODING);
1862 MYCONST(XML_ERROR_INCORRECT_ENCODING);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001863 MYCONST(XML_ERROR_UNCLOSED_CDATA_SECTION);
1864 MYCONST(XML_ERROR_EXTERNAL_ENTITY_HANDLING);
1865 MYCONST(XML_ERROR_NOT_STANDALONE);
Fred Drake283b6702004-08-04 22:28:16 +00001866 MYCONST(XML_ERROR_UNEXPECTED_STATE);
1867 MYCONST(XML_ERROR_ENTITY_DECLARED_IN_PE);
1868 MYCONST(XML_ERROR_FEATURE_REQUIRES_XML_DTD);
1869 MYCONST(XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING);
1870 /* Added in Expat 1.95.7. */
1871 MYCONST(XML_ERROR_UNBOUND_PREFIX);
1872 /* Added in Expat 1.95.8. */
1873 MYCONST(XML_ERROR_UNDECLARING_PREFIX);
1874 MYCONST(XML_ERROR_INCOMPLETE_PE);
1875 MYCONST(XML_ERROR_XML_DECL);
1876 MYCONST(XML_ERROR_TEXT_DECL);
1877 MYCONST(XML_ERROR_PUBLICID);
1878 MYCONST(XML_ERROR_SUSPENDED);
1879 MYCONST(XML_ERROR_NOT_SUSPENDED);
1880 MYCONST(XML_ERROR_ABORTED);
1881 MYCONST(XML_ERROR_FINISHED);
1882 MYCONST(XML_ERROR_SUSPEND_PE);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001883
Georg Brandlb4dac712010-10-15 14:46:48 +00001884 if (PyModule_AddStringConstant(errors_module, "__doc__",
1885 "Constants used to describe "
1886 "error conditions.") < 0)
1887 return NULL;
Fred Drake85d835f2001-02-08 15:39:08 +00001888
Georg Brandlb4dac712010-10-15 14:46:48 +00001889 if (PyModule_AddObject(errors_module, "codes", codes_dict) < 0)
1890 return NULL;
1891 if (PyModule_AddObject(errors_module, "messages", rev_codes_dict) < 0)
1892 return NULL;
Victor Stinner0fcab4a2011-01-04 12:59:15 +00001893
Fred Drake93adb692000-09-23 04:55:48 +00001894#undef MYCONST
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001895
Fred Drake85d835f2001-02-08 15:39:08 +00001896#define MYCONST(c) PyModule_AddIntConstant(m, #c, c)
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001897 MYCONST(XML_PARAM_ENTITY_PARSING_NEVER);
1898 MYCONST(XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE);
1899 MYCONST(XML_PARAM_ENTITY_PARSING_ALWAYS);
Fred Drake85d835f2001-02-08 15:39:08 +00001900#undef MYCONST
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001901
Fred Drake85d835f2001-02-08 15:39:08 +00001902#define MYCONST(c) PyModule_AddIntConstant(model_module, #c, c)
1903 PyModule_AddStringConstant(model_module, "__doc__",
1904 "Constants used to interpret content model information.");
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001905
Fred Drake85d835f2001-02-08 15:39:08 +00001906 MYCONST(XML_CTYPE_EMPTY);
1907 MYCONST(XML_CTYPE_ANY);
1908 MYCONST(XML_CTYPE_MIXED);
1909 MYCONST(XML_CTYPE_NAME);
1910 MYCONST(XML_CTYPE_CHOICE);
1911 MYCONST(XML_CTYPE_SEQ);
1912
1913 MYCONST(XML_CQUANT_NONE);
1914 MYCONST(XML_CQUANT_OPT);
1915 MYCONST(XML_CQUANT_REP);
1916 MYCONST(XML_CQUANT_PLUS);
1917#undef MYCONST
Fredrik Lundhc3345042005-12-13 19:49:55 +00001918
1919 /* initialize pyexpat dispatch table */
Fredrik Lundhd7a42882005-12-13 20:43:04 +00001920 capi.size = sizeof(capi);
Fredrik Lundhcc117db2005-12-13 21:55:36 +00001921 capi.magic = PyExpat_CAPI_MAGIC;
Fredrik Lundhd7a42882005-12-13 20:43:04 +00001922 capi.MAJOR_VERSION = XML_MAJOR_VERSION;
1923 capi.MINOR_VERSION = XML_MINOR_VERSION;
1924 capi.MICRO_VERSION = XML_MICRO_VERSION;
1925 capi.ErrorString = XML_ErrorString;
Fredrik Lundhcc117db2005-12-13 21:55:36 +00001926 capi.GetErrorCode = XML_GetErrorCode;
1927 capi.GetErrorColumnNumber = XML_GetErrorColumnNumber;
1928 capi.GetErrorLineNumber = XML_GetErrorLineNumber;
Fredrik Lundhd7a42882005-12-13 20:43:04 +00001929 capi.Parse = XML_Parse;
1930 capi.ParserCreate_MM = XML_ParserCreate_MM;
1931 capi.ParserFree = XML_ParserFree;
1932 capi.SetCharacterDataHandler = XML_SetCharacterDataHandler;
1933 capi.SetCommentHandler = XML_SetCommentHandler;
1934 capi.SetDefaultHandlerExpand = XML_SetDefaultHandlerExpand;
1935 capi.SetElementHandler = XML_SetElementHandler;
1936 capi.SetNamespaceDeclHandler = XML_SetNamespaceDeclHandler;
1937 capi.SetProcessingInstructionHandler = XML_SetProcessingInstructionHandler;
1938 capi.SetUnknownEncodingHandler = XML_SetUnknownEncodingHandler;
1939 capi.SetUserData = XML_SetUserData;
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03001940 capi.SetStartDoctypeDeclHandler = XML_SetStartDoctypeDeclHandler;
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03001941 capi.SetEncoding = XML_SetEncoding;
Eli Bendersky6dc32b32013-05-25 05:25:48 -07001942 capi.DefaultUnknownEncodingHandler = PyUnknownEncodingHandler;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001943
Benjamin Petersonb173f782009-05-05 22:31:58 +00001944 /* export using capsule */
1945 capi_object = PyCapsule_New(&capi, PyExpat_CAPSULE_NAME, NULL);
Fredrik Lundhd7a42882005-12-13 20:43:04 +00001946 if (capi_object)
1947 PyModule_AddObject(m, "expat_CAPI", capi_object);
Martin v. Löwis1a214512008-06-11 05:26:20 +00001948 return m;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001949}
1950
Fred Drake6f987622000-08-25 18:03:30 +00001951static void
Martin v. Löwis5b68ce32001-10-21 08:53:52 +00001952clear_handlers(xmlparseobject *self, int initial)
Fred Drake0582df92000-07-12 04:49:00 +00001953{
Fred Drakecde79132001-04-25 16:01:30 +00001954 int i = 0;
1955 PyObject *temp;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001956
Fred Drake71b63ff2002-06-28 22:29:01 +00001957 for (; handler_info[i].name != NULL; i++) {
Martin v. Löwis5b68ce32001-10-21 08:53:52 +00001958 if (initial)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001959 self->handlers[i] = NULL;
1960 else {
Fred Drakecde79132001-04-25 16:01:30 +00001961 temp = self->handlers[i];
1962 self->handlers[i] = NULL;
1963 Py_XDECREF(temp);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001964 handler_info[i].setter(self->itself, NULL);
Fred Drakecde79132001-04-25 16:01:30 +00001965 }
Fred Drakecde79132001-04-25 16:01:30 +00001966 }
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001967}
1968
Tim Peters0c322792002-07-17 16:49:03 +00001969static struct HandlerInfo handler_info[] = {
Fred Drake71b63ff2002-06-28 22:29:01 +00001970 {"StartElementHandler",
1971 (xmlhandlersetter)XML_SetStartElementHandler,
Fred Drake0582df92000-07-12 04:49:00 +00001972 (xmlhandler)my_StartElementHandler},
Fred Drake71b63ff2002-06-28 22:29:01 +00001973 {"EndElementHandler",
1974 (xmlhandlersetter)XML_SetEndElementHandler,
Fred Drake0582df92000-07-12 04:49:00 +00001975 (xmlhandler)my_EndElementHandler},
Fred Drake71b63ff2002-06-28 22:29:01 +00001976 {"ProcessingInstructionHandler",
Fred Drake0582df92000-07-12 04:49:00 +00001977 (xmlhandlersetter)XML_SetProcessingInstructionHandler,
1978 (xmlhandler)my_ProcessingInstructionHandler},
Fred Drake71b63ff2002-06-28 22:29:01 +00001979 {"CharacterDataHandler",
Fred Drake0582df92000-07-12 04:49:00 +00001980 (xmlhandlersetter)XML_SetCharacterDataHandler,
1981 (xmlhandler)my_CharacterDataHandler},
Fred Drake71b63ff2002-06-28 22:29:01 +00001982 {"UnparsedEntityDeclHandler",
Fred Drake0582df92000-07-12 04:49:00 +00001983 (xmlhandlersetter)XML_SetUnparsedEntityDeclHandler,
Fred Drake2a3d7db2002-06-28 22:56:48 +00001984 (xmlhandler)my_UnparsedEntityDeclHandler},
Fred Drake71b63ff2002-06-28 22:29:01 +00001985 {"NotationDeclHandler",
Fred Drake0582df92000-07-12 04:49:00 +00001986 (xmlhandlersetter)XML_SetNotationDeclHandler,
Fred Drake2a3d7db2002-06-28 22:56:48 +00001987 (xmlhandler)my_NotationDeclHandler},
Fred Drake71b63ff2002-06-28 22:29:01 +00001988 {"StartNamespaceDeclHandler",
1989 (xmlhandlersetter)XML_SetStartNamespaceDeclHandler,
Fred Drake2a3d7db2002-06-28 22:56:48 +00001990 (xmlhandler)my_StartNamespaceDeclHandler},
Fred Drake71b63ff2002-06-28 22:29:01 +00001991 {"EndNamespaceDeclHandler",
1992 (xmlhandlersetter)XML_SetEndNamespaceDeclHandler,
Fred Drake2a3d7db2002-06-28 22:56:48 +00001993 (xmlhandler)my_EndNamespaceDeclHandler},
Fred Drake0582df92000-07-12 04:49:00 +00001994 {"CommentHandler",
1995 (xmlhandlersetter)XML_SetCommentHandler,
1996 (xmlhandler)my_CommentHandler},
1997 {"StartCdataSectionHandler",
Fred Drake71b63ff2002-06-28 22:29:01 +00001998 (xmlhandlersetter)XML_SetStartCdataSectionHandler,
Fred Drake0582df92000-07-12 04:49:00 +00001999 (xmlhandler)my_StartCdataSectionHandler},
2000 {"EndCdataSectionHandler",
Fred Drake71b63ff2002-06-28 22:29:01 +00002001 (xmlhandlersetter)XML_SetEndCdataSectionHandler,
Fred Drake0582df92000-07-12 04:49:00 +00002002 (xmlhandler)my_EndCdataSectionHandler},
2003 {"DefaultHandler",
2004 (xmlhandlersetter)XML_SetDefaultHandler,
2005 (xmlhandler)my_DefaultHandler},
2006 {"DefaultHandlerExpand",
2007 (xmlhandlersetter)XML_SetDefaultHandlerExpand,
2008 (xmlhandler)my_DefaultHandlerExpandHandler},
2009 {"NotStandaloneHandler",
2010 (xmlhandlersetter)XML_SetNotStandaloneHandler,
2011 (xmlhandler)my_NotStandaloneHandler},
2012 {"ExternalEntityRefHandler",
2013 (xmlhandlersetter)XML_SetExternalEntityRefHandler,
Fred Drake2a3d7db2002-06-28 22:56:48 +00002014 (xmlhandler)my_ExternalEntityRefHandler},
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00002015 {"StartDoctypeDeclHandler",
Fred Drake71b63ff2002-06-28 22:29:01 +00002016 (xmlhandlersetter)XML_SetStartDoctypeDeclHandler,
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00002017 (xmlhandler)my_StartDoctypeDeclHandler},
2018 {"EndDoctypeDeclHandler",
Fred Drake71b63ff2002-06-28 22:29:01 +00002019 (xmlhandlersetter)XML_SetEndDoctypeDeclHandler,
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00002020 (xmlhandler)my_EndDoctypeDeclHandler},
Fred Drake85d835f2001-02-08 15:39:08 +00002021 {"EntityDeclHandler",
2022 (xmlhandlersetter)XML_SetEntityDeclHandler,
2023 (xmlhandler)my_EntityDeclHandler},
2024 {"XmlDeclHandler",
2025 (xmlhandlersetter)XML_SetXmlDeclHandler,
2026 (xmlhandler)my_XmlDeclHandler},
2027 {"ElementDeclHandler",
2028 (xmlhandlersetter)XML_SetElementDeclHandler,
2029 (xmlhandler)my_ElementDeclHandler},
2030 {"AttlistDeclHandler",
2031 (xmlhandlersetter)XML_SetAttlistDeclHandler,
2032 (xmlhandler)my_AttlistDeclHandler},
Martin v. Löwisc847f402003-01-21 11:09:21 +00002033#if XML_COMBINED_VERSION >= 19504
Martin v. Löwis069dde22003-01-21 10:58:18 +00002034 {"SkippedEntityHandler",
2035 (xmlhandlersetter)XML_SetSkippedEntityHandler,
2036 (xmlhandler)my_SkippedEntityHandler},
Martin v. Löwisc847f402003-01-21 11:09:21 +00002037#endif
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00002038
Fred Drake0582df92000-07-12 04:49:00 +00002039 {NULL, NULL, NULL} /* sentinel */
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00002040};