blob: 3f51c12cebce8ec36f41a230e0e11d165bb7d0f0 [file] [log] [blame]
Martin v. Löwis7090ed12001-09-19 10:37:50 +00001#include "Python.h"
Fred Drake4113b132001-03-24 19:58:26 +00002#include <ctype.h>
3
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00004#include "frameobject.h"
Fred Drakea77254a2000-09-29 19:23:29 +00005#include "expat.h"
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00006
Fredrik Lundhc3345042005-12-13 19:49:55 +00007#include "pyexpat.h"
8
Martin v. Löwisc847f402003-01-21 11:09:21 +00009#define XML_COMBINED_VERSION (10000*XML_MAJOR_VERSION+100*XML_MINOR_VERSION+XML_MICRO_VERSION)
10
Jeremy Hylton9263f572003-06-27 16:13:17 +000011#define FIX_TRACE
Martin v. Löwis339d0f72001-08-17 18:39:25 +000012
Christian Heimesfa535f52013-07-07 17:35:11 +020013static XML_Memory_Handling_Suite ExpatMemoryHandler = {
14 PyObject_Malloc, PyObject_Realloc, PyObject_Free};
15
Fred Drake0582df92000-07-12 04:49:00 +000016enum HandlerTypes {
17 StartElement,
18 EndElement,
19 ProcessingInstruction,
20 CharacterData,
21 UnparsedEntityDecl,
22 NotationDecl,
23 StartNamespaceDecl,
24 EndNamespaceDecl,
25 Comment,
26 StartCdataSection,
27 EndCdataSection,
28 Default,
29 DefaultHandlerExpand,
30 NotStandalone,
Martin v. Löwis0078f6c2001-01-21 10:18:10 +000031 ExternalEntityRef,
32 StartDoctypeDecl,
33 EndDoctypeDecl,
Fred Drake85d835f2001-02-08 15:39:08 +000034 EntityDecl,
35 XmlDecl,
36 ElementDecl,
37 AttlistDecl,
Martin v. Löwisc847f402003-01-21 11:09:21 +000038#if XML_COMBINED_VERSION >= 19504
Martin v. Löwis069dde22003-01-21 10:58:18 +000039 SkippedEntity,
Martin v. Löwisc847f402003-01-21 11:09:21 +000040#endif
Fred Drake85d835f2001-02-08 15:39:08 +000041 _DummyDecl
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000042};
43
44static PyObject *ErrorObject;
45
46/* ----------------------------------------------------- */
47
48/* Declarations for objects of type xmlparser */
49
50typedef struct {
Fred Drake0582df92000-07-12 04:49:00 +000051 PyObject_HEAD
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000052
Fred Drake0582df92000-07-12 04:49:00 +000053 XML_Parser itself;
Fred Drake85d835f2001-02-08 15:39:08 +000054 int ordered_attributes; /* Return attributes as a list. */
55 int specified_attributes; /* Report only specified attributes. */
Fred Drakebd6101c2001-02-14 18:29:45 +000056 int in_callback; /* Is a callback active? */
Martin v. Löwis069dde22003-01-21 10:58:18 +000057 int ns_prefixes; /* Namespace-triplets mode? */
Fred Drake2a3d7db2002-06-28 22:56:48 +000058 XML_Char *buffer; /* Buffer used when accumulating characters */
59 /* NULL if not enabled */
60 int buffer_size; /* Size of buffer, in XML_Char units */
61 int buffer_used; /* Buffer units in use */
Fred Drakeb91a36b2002-06-27 19:40:48 +000062 PyObject *intern; /* Dictionary to intern strings */
Fred Drake0582df92000-07-12 04:49:00 +000063 PyObject **handlers;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000064} xmlparseobject;
65
Fred Drake2a3d7db2002-06-28 22:56:48 +000066#define CHARACTER_DATA_BUFFER_SIZE 8192
67
Jeremy Hylton938ace62002-07-17 16:30:39 +000068static PyTypeObject Xmlparsetype;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000069
Fred Drake117ac852002-09-24 16:24:54 +000070typedef void (*xmlhandlersetter)(XML_Parser self, void *meth);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000071typedef void* xmlhandler;
72
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +000073struct HandlerInfo {
Fred Drake0582df92000-07-12 04:49:00 +000074 const char *name;
75 xmlhandlersetter setter;
76 xmlhandler handler;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +000077 PyCodeObject *tb_code;
Fred Drake71b63ff2002-06-28 22:29:01 +000078 PyObject *nameobj;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000079};
80
Jeremy Hylton938ace62002-07-17 16:30:39 +000081static struct HandlerInfo handler_info[64];
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000082
Fred Drakebd6101c2001-02-14 18:29:45 +000083/* Set an integer attribute on the error object; return true on success,
84 * false on an exception.
85 */
86static int
87set_error_attr(PyObject *err, char *name, int value)
88{
Christian Heimes217cfd12007-12-02 14:31:20 +000089 PyObject *v = PyLong_FromLong(value);
Fred Drake85d835f2001-02-08 15:39:08 +000090
Neal Norwitz2f5e9902006-03-08 06:36:45 +000091 if (v == NULL || PyObject_SetAttrString(err, name, v) == -1) {
92 Py_XDECREF(v);
Fred Drakebd6101c2001-02-14 18:29:45 +000093 return 0;
94 }
Michael W. Hudson0bb84542004-08-03 11:31:31 +000095 Py_DECREF(v);
Fred Drakebd6101c2001-02-14 18:29:45 +000096 return 1;
97}
98
99/* Build and set an Expat exception, including positioning
100 * information. Always returns NULL.
101 */
Fred Drake85d835f2001-02-08 15:39:08 +0000102static PyObject *
Martin v. Löwis069dde22003-01-21 10:58:18 +0000103set_error(xmlparseobject *self, enum XML_Error code)
Fred Drake85d835f2001-02-08 15:39:08 +0000104{
105 PyObject *err;
Victor Stinner499dfcf2011-03-21 13:26:24 +0100106 PyObject *buffer;
Fred Drake85d835f2001-02-08 15:39:08 +0000107 XML_Parser parser = self->itself;
Fred Drakebd6101c2001-02-14 18:29:45 +0000108 int lineno = XML_GetErrorLineNumber(parser);
109 int column = XML_GetErrorColumnNumber(parser);
Fred Drake85d835f2001-02-08 15:39:08 +0000110
Victor Stinner499dfcf2011-03-21 13:26:24 +0100111 buffer = PyUnicode_FromFormat("%s: line %i, column %i",
112 XML_ErrorString(code), lineno, column);
113 if (buffer == NULL)
114 return NULL;
115 err = PyObject_CallFunction(ErrorObject, "O", buffer);
116 Py_DECREF(buffer);
Fred Drakebd6101c2001-02-14 18:29:45 +0000117 if ( err != NULL
118 && set_error_attr(err, "code", code)
119 && set_error_attr(err, "offset", column)
120 && set_error_attr(err, "lineno", lineno)) {
121 PyErr_SetObject(ErrorObject, err);
Fred Drake85d835f2001-02-08 15:39:08 +0000122 }
Neal Norwitz2f5e9902006-03-08 06:36:45 +0000123 Py_XDECREF(err);
Fred Drake85d835f2001-02-08 15:39:08 +0000124 return NULL;
125}
126
Fred Drake71b63ff2002-06-28 22:29:01 +0000127static int
128have_handler(xmlparseobject *self, int type)
129{
130 PyObject *handler = self->handlers[type];
131 return handler != NULL;
132}
133
134static PyObject *
135get_handler_name(struct HandlerInfo *hinfo)
136{
137 PyObject *name = hinfo->nameobj;
138 if (name == NULL) {
Neal Norwitz392c5be2007-08-25 17:20:32 +0000139 name = PyUnicode_FromString(hinfo->name);
Fred Drake71b63ff2002-06-28 22:29:01 +0000140 hinfo->nameobj = name;
141 }
142 Py_XINCREF(name);
143 return name;
144}
145
Fred Drake85d835f2001-02-08 15:39:08 +0000146
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000147/* Convert a string of XML_Chars into a Unicode string.
148 Returns None if str is a null pointer. */
149
Fred Drake0582df92000-07-12 04:49:00 +0000150static PyObject *
Fred Drakeb91a36b2002-06-27 19:40:48 +0000151conv_string_to_unicode(const XML_Char *str)
Fred Drake0582df92000-07-12 04:49:00 +0000152{
Fred Drake71b63ff2002-06-28 22:29:01 +0000153 /* XXX currently this code assumes that XML_Char is 8-bit,
Fred Drake0582df92000-07-12 04:49:00 +0000154 and hence in UTF-8. */
155 /* UTF-8 from Expat, Unicode desired */
156 if (str == NULL) {
157 Py_INCREF(Py_None);
158 return Py_None;
159 }
Fred Drake71b63ff2002-06-28 22:29:01 +0000160 return PyUnicode_DecodeUTF8(str, strlen(str), "strict");
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000161}
162
Fred Drake0582df92000-07-12 04:49:00 +0000163static PyObject *
164conv_string_len_to_unicode(const XML_Char *str, int len)
165{
Fred Drake71b63ff2002-06-28 22:29:01 +0000166 /* XXX currently this code assumes that XML_Char is 8-bit,
Fred Drake0582df92000-07-12 04:49:00 +0000167 and hence in UTF-8. */
168 /* UTF-8 from Expat, Unicode desired */
169 if (str == NULL) {
170 Py_INCREF(Py_None);
171 return Py_None;
172 }
Fred Drake6f987622000-08-25 18:03:30 +0000173 return PyUnicode_DecodeUTF8((const char *)str, len, "strict");
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000174}
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000175
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000176/* Callback routines */
177
Martin v. Löwis5b68ce32001-10-21 08:53:52 +0000178static void clear_handlers(xmlparseobject *self, int initial);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000179
Martin v. Löwis069dde22003-01-21 10:58:18 +0000180/* This handler is used when an error has been detected, in the hope
181 that actual parsing can be terminated early. This will only help
182 if an external entity reference is encountered. */
183static int
184error_external_entity_ref_handler(XML_Parser parser,
185 const XML_Char *context,
186 const XML_Char *base,
187 const XML_Char *systemId,
188 const XML_Char *publicId)
189{
190 return 0;
191}
192
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000193/* Dummy character data handler used when an error (exception) has
194 been detected, and the actual parsing can be terminated early.
195 This is needed since character data handler can't be safely removed
196 from within the character data handler, but can be replaced. It is
197 used only from the character data handler trampoline, and must be
198 used right after `flag_error()` is called. */
199static void
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000200noop_character_data_handler(void *userData, const XML_Char *data, int len)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000201{
202 /* Do nothing. */
203}
204
Fred Drake6f987622000-08-25 18:03:30 +0000205static void
206flag_error(xmlparseobject *self)
207{
Martin v. Löwis5b68ce32001-10-21 08:53:52 +0000208 clear_handlers(self, 0);
Martin v. Löwis069dde22003-01-21 10:58:18 +0000209 XML_SetExternalEntityRefHandler(self->itself,
210 error_external_entity_ref_handler);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000211}
212
213static PyCodeObject*
214getcode(enum HandlerTypes slot, char* func_name, int lineno)
215{
Fred Drakebd6101c2001-02-14 18:29:45 +0000216 if (handler_info[slot].tb_code == NULL) {
Fred Drakebd6101c2001-02-14 18:29:45 +0000217 handler_info[slot].tb_code =
Alexandre Vassalotti7b82b402009-07-21 04:30:03 +0000218 PyCode_NewEmpty(__FILE__, func_name, lineno);
Fred Drakebd6101c2001-02-14 18:29:45 +0000219 }
220 return handler_info[slot].tb_code;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000221}
222
Jeremy Hylton9263f572003-06-27 16:13:17 +0000223#ifdef FIX_TRACE
Martin v. Löwis7d6e19d2002-08-04 08:24:49 +0000224static int
225trace_frame(PyThreadState *tstate, PyFrameObject *f, int code, PyObject *val)
226{
227 int result = 0;
228 if (!tstate->use_tracing || tstate->tracing)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000229 return 0;
Martin v. Löwis7d6e19d2002-08-04 08:24:49 +0000230 if (tstate->c_profilefunc != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000231 tstate->tracing++;
232 result = tstate->c_profilefunc(tstate->c_profileobj,
233 f, code , val);
234 tstate->use_tracing = ((tstate->c_tracefunc != NULL)
235 || (tstate->c_profilefunc != NULL));
236 tstate->tracing--;
237 if (result)
238 return result;
Martin v. Löwis7d6e19d2002-08-04 08:24:49 +0000239 }
240 if (tstate->c_tracefunc != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000241 tstate->tracing++;
242 result = tstate->c_tracefunc(tstate->c_traceobj,
243 f, code , val);
244 tstate->use_tracing = ((tstate->c_tracefunc != NULL)
245 || (tstate->c_profilefunc != NULL));
246 tstate->tracing--;
247 }
Martin v. Löwis7d6e19d2002-08-04 08:24:49 +0000248 return result;
249}
Jeremy Hylton9263f572003-06-27 16:13:17 +0000250
251static int
252trace_frame_exc(PyThreadState *tstate, PyFrameObject *f)
253{
254 PyObject *type, *value, *traceback, *arg;
255 int err;
256
257 if (tstate->c_tracefunc == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000258 return 0;
Jeremy Hylton9263f572003-06-27 16:13:17 +0000259
260 PyErr_Fetch(&type, &value, &traceback);
261 if (value == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000262 value = Py_None;
263 Py_INCREF(value);
Jeremy Hylton9263f572003-06-27 16:13:17 +0000264 }
Raymond Hettinger8ae46892003-10-12 19:09:37 +0000265 arg = PyTuple_Pack(3, type, value, traceback);
Jeremy Hylton9263f572003-06-27 16:13:17 +0000266 if (arg == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000267 PyErr_Restore(type, value, traceback);
268 return 0;
Jeremy Hylton9263f572003-06-27 16:13:17 +0000269 }
270 err = trace_frame(tstate, f, PyTrace_EXCEPTION, arg);
271 Py_DECREF(arg);
272 if (err == 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000273 PyErr_Restore(type, value, traceback);
Jeremy Hylton9263f572003-06-27 16:13:17 +0000274 else {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000275 Py_XDECREF(type);
276 Py_XDECREF(value);
277 Py_XDECREF(traceback);
Jeremy Hylton9263f572003-06-27 16:13:17 +0000278 }
279 return err;
280}
Martin v. Löwis069dde22003-01-21 10:58:18 +0000281#endif
Martin v. Löwis7d6e19d2002-08-04 08:24:49 +0000282
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000283static PyObject*
Fred Drake39689c52004-08-13 03:12:57 +0000284call_with_frame(PyCodeObject *c, PyObject* func, PyObject* args,
285 xmlparseobject *self)
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000286{
Fred Drakebd6101c2001-02-14 18:29:45 +0000287 PyThreadState *tstate = PyThreadState_GET();
288 PyFrameObject *f;
Christian Heimesa6404ad2013-07-20 22:54:25 +0200289 PyObject *res, *globals;
Fred Drakebd6101c2001-02-14 18:29:45 +0000290
291 if (c == NULL)
292 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000293
Christian Heimesa6404ad2013-07-20 22:54:25 +0200294 globals = PyEval_GetGlobals();
295 if (globals == NULL) {
296 return NULL;
297 }
298
299 f = PyFrame_New(tstate, c, globals, NULL);
Fred Drakebd6101c2001-02-14 18:29:45 +0000300 if (f == NULL)
301 return NULL;
302 tstate->frame = f;
Jeremy Hylton9263f572003-06-27 16:13:17 +0000303#ifdef FIX_TRACE
304 if (trace_frame(tstate, f, PyTrace_CALL, Py_None) < 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000305 return NULL;
Martin v. Löwis7d6e19d2002-08-04 08:24:49 +0000306 }
Martin v. Löwis069dde22003-01-21 10:58:18 +0000307#endif
Fred Drakebd6101c2001-02-14 18:29:45 +0000308 res = PyEval_CallObject(func, args);
Jeremy Hylton9263f572003-06-27 16:13:17 +0000309 if (res == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000310 if (tstate->curexc_traceback == NULL)
311 PyTraceBack_Here(f);
Fred Drake39689c52004-08-13 03:12:57 +0000312 XML_StopParser(self->itself, XML_FALSE);
Jeremy Hylton9263f572003-06-27 16:13:17 +0000313#ifdef FIX_TRACE
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000314 if (trace_frame_exc(tstate, f) < 0) {
315 return NULL;
316 }
Jeremy Hylton9263f572003-06-27 16:13:17 +0000317 }
Martin v. Löwis7d6e19d2002-08-04 08:24:49 +0000318 else {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000319 if (trace_frame(tstate, f, PyTrace_RETURN, res) < 0) {
320 Py_XDECREF(res);
321 res = NULL;
322 }
Martin v. Löwis7d6e19d2002-08-04 08:24:49 +0000323 }
Jeremy Hylton9263f572003-06-27 16:13:17 +0000324#else
325 }
Martin v. Löwis069dde22003-01-21 10:58:18 +0000326#endif
Fred Drakebd6101c2001-02-14 18:29:45 +0000327 tstate->frame = f->f_back;
328 Py_DECREF(f);
329 return res;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000330}
331
Fred Drakeb91a36b2002-06-27 19:40:48 +0000332static PyObject*
333string_intern(xmlparseobject *self, const char* str)
334{
Guido van Rossum4ca94712007-07-23 17:42:32 +0000335 PyObject *result = conv_string_to_unicode(str);
Fred Drakeb91a36b2002-06-27 19:40:48 +0000336 PyObject *value;
Neal Norwitz484d9a42005-09-30 04:46:49 +0000337 /* result can be NULL if the unicode conversion failed. */
338 if (!result)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000339 return result;
Fred Drakeb91a36b2002-06-27 19:40:48 +0000340 if (!self->intern)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000341 return result;
Fred Drakeb91a36b2002-06-27 19:40:48 +0000342 value = PyDict_GetItem(self->intern, result);
343 if (!value) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000344 if (PyDict_SetItem(self->intern, result, result) == 0)
Fred Drakeb91a36b2002-06-27 19:40:48 +0000345 return result;
346 else
347 return NULL;
348 }
349 Py_INCREF(value);
350 Py_DECREF(result);
351 return value;
352}
353
Fred Drake2a3d7db2002-06-28 22:56:48 +0000354/* Return 0 on success, -1 on exception.
355 * flag_error() will be called before return if needed.
356 */
357static int
358call_character_handler(xmlparseobject *self, const XML_Char *buffer, int len)
359{
360 PyObject *args;
361 PyObject *temp;
362
Georg Brandlc01537f2010-10-15 16:26:08 +0000363 if (!have_handler(self, CharacterData))
364 return -1;
365
Fred Drake2a3d7db2002-06-28 22:56:48 +0000366 args = PyTuple_New(1);
367 if (args == NULL)
368 return -1;
Guido van Rossum4ca94712007-07-23 17:42:32 +0000369 temp = (conv_string_len_to_unicode(buffer, len));
Fred Drake2a3d7db2002-06-28 22:56:48 +0000370 if (temp == NULL) {
371 Py_DECREF(args);
372 flag_error(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000373 XML_SetCharacterDataHandler(self->itself,
374 noop_character_data_handler);
Fred Drake2a3d7db2002-06-28 22:56:48 +0000375 return -1;
376 }
377 PyTuple_SET_ITEM(args, 0, temp);
378 /* temp is now a borrowed reference; consider it unused. */
379 self->in_callback = 1;
380 temp = call_with_frame(getcode(CharacterData, "CharacterData", __LINE__),
Fred Drake39689c52004-08-13 03:12:57 +0000381 self->handlers[CharacterData], args, self);
Fred Drake2a3d7db2002-06-28 22:56:48 +0000382 /* temp is an owned reference again, or NULL */
383 self->in_callback = 0;
384 Py_DECREF(args);
385 if (temp == NULL) {
386 flag_error(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000387 XML_SetCharacterDataHandler(self->itself,
388 noop_character_data_handler);
Fred Drake2a3d7db2002-06-28 22:56:48 +0000389 return -1;
390 }
391 Py_DECREF(temp);
392 return 0;
393}
394
395static int
396flush_character_buffer(xmlparseobject *self)
397{
398 int rc;
399 if (self->buffer == NULL || self->buffer_used == 0)
400 return 0;
401 rc = call_character_handler(self, self->buffer, self->buffer_used);
402 self->buffer_used = 0;
403 return rc;
404}
405
406static void
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000407my_CharacterDataHandler(void *userData, const XML_Char *data, int len)
Fred Drake2a3d7db2002-06-28 22:56:48 +0000408{
409 xmlparseobject *self = (xmlparseobject *) userData;
Victor Stinner9e09c262013-07-18 23:17:01 +0200410
411 if (PyErr_Occurred())
412 return;
413
Fred Drake2a3d7db2002-06-28 22:56:48 +0000414 if (self->buffer == NULL)
415 call_character_handler(self, data, len);
416 else {
417 if ((self->buffer_used + len) > self->buffer_size) {
418 if (flush_character_buffer(self) < 0)
419 return;
420 /* handler might have changed; drop the rest on the floor
421 * if there isn't a handler anymore
422 */
423 if (!have_handler(self, CharacterData))
424 return;
425 }
426 if (len > self->buffer_size) {
427 call_character_handler(self, data, len);
428 self->buffer_used = 0;
429 }
430 else {
431 memcpy(self->buffer + self->buffer_used,
432 data, len * sizeof(XML_Char));
433 self->buffer_used += len;
434 }
435 }
436}
437
Fred Drake85d835f2001-02-08 15:39:08 +0000438static void
439my_StartElementHandler(void *userData,
Fred Drake71b63ff2002-06-28 22:29:01 +0000440 const XML_Char *name, const XML_Char *atts[])
Fred Drake85d835f2001-02-08 15:39:08 +0000441{
442 xmlparseobject *self = (xmlparseobject *)userData;
443
Fred Drake71b63ff2002-06-28 22:29:01 +0000444 if (have_handler(self, StartElement)) {
Fred Drake85d835f2001-02-08 15:39:08 +0000445 PyObject *container, *rv, *args;
446 int i, max;
447
Victor Stinner9e09c262013-07-18 23:17:01 +0200448 if (PyErr_Occurred())
449 return;
450
Fred Drake2a3d7db2002-06-28 22:56:48 +0000451 if (flush_character_buffer(self) < 0)
452 return;
Fred Drake85d835f2001-02-08 15:39:08 +0000453 /* Set max to the number of slots filled in atts[]; max/2 is
454 * the number of attributes we need to process.
455 */
456 if (self->specified_attributes) {
457 max = XML_GetSpecifiedAttributeCount(self->itself);
458 }
459 else {
460 max = 0;
461 while (atts[max] != NULL)
462 max += 2;
463 }
464 /* Build the container. */
465 if (self->ordered_attributes)
466 container = PyList_New(max);
467 else
468 container = PyDict_New();
469 if (container == NULL) {
470 flag_error(self);
471 return;
472 }
473 for (i = 0; i < max; i += 2) {
Fred Drakeb91a36b2002-06-27 19:40:48 +0000474 PyObject *n = string_intern(self, (XML_Char *) atts[i]);
Fred Drake85d835f2001-02-08 15:39:08 +0000475 PyObject *v;
476 if (n == NULL) {
477 flag_error(self);
478 Py_DECREF(container);
479 return;
480 }
Guido van Rossum4ca94712007-07-23 17:42:32 +0000481 v = conv_string_to_unicode((XML_Char *) atts[i+1]);
Fred Drake85d835f2001-02-08 15:39:08 +0000482 if (v == NULL) {
483 flag_error(self);
484 Py_DECREF(container);
485 Py_DECREF(n);
486 return;
487 }
488 if (self->ordered_attributes) {
489 PyList_SET_ITEM(container, i, n);
490 PyList_SET_ITEM(container, i+1, v);
491 }
492 else if (PyDict_SetItem(container, n, v)) {
493 flag_error(self);
494 Py_DECREF(n);
495 Py_DECREF(v);
496 return;
497 }
498 else {
499 Py_DECREF(n);
500 Py_DECREF(v);
501 }
502 }
Neal Norwitz484d9a42005-09-30 04:46:49 +0000503 args = string_intern(self, name);
504 if (args != NULL)
505 args = Py_BuildValue("(NN)", args, container);
Fred Drake85d835f2001-02-08 15:39:08 +0000506 if (args == NULL) {
507 Py_DECREF(container);
508 return;
509 }
510 /* Container is now a borrowed reference; ignore it. */
Fred Drakebd6101c2001-02-14 18:29:45 +0000511 self->in_callback = 1;
512 rv = call_with_frame(getcode(StartElement, "StartElement", __LINE__),
Fred Drake39689c52004-08-13 03:12:57 +0000513 self->handlers[StartElement], args, self);
Fred Drakebd6101c2001-02-14 18:29:45 +0000514 self->in_callback = 0;
515 Py_DECREF(args);
Fred Drake85d835f2001-02-08 15:39:08 +0000516 if (rv == NULL) {
517 flag_error(self);
518 return;
Fred Drakebd6101c2001-02-14 18:29:45 +0000519 }
Fred Drake85d835f2001-02-08 15:39:08 +0000520 Py_DECREF(rv);
521 }
522}
523
524#define RC_HANDLER(RC, NAME, PARAMS, INIT, PARAM_FORMAT, CONVERSION, \
525 RETURN, GETUSERDATA) \
526static RC \
527my_##NAME##Handler PARAMS {\
528 xmlparseobject *self = GETUSERDATA ; \
529 PyObject *args = NULL; \
530 PyObject *rv = NULL; \
531 INIT \
532\
Fred Drake71b63ff2002-06-28 22:29:01 +0000533 if (have_handler(self, NAME)) { \
Victor Stinner9e09c262013-07-18 23:17:01 +0200534 if (PyErr_Occurred()) \
535 return RETURN; \
Fred Drake2a3d7db2002-06-28 22:56:48 +0000536 if (flush_character_buffer(self) < 0) \
537 return RETURN; \
Fred Drake85d835f2001-02-08 15:39:08 +0000538 args = Py_BuildValue PARAM_FORMAT ;\
Martin v. Löwis1d7c55f2001-11-10 13:57:55 +0000539 if (!args) { flag_error(self); return RETURN;} \
Fred Drakebd6101c2001-02-14 18:29:45 +0000540 self->in_callback = 1; \
Fred Drake85d835f2001-02-08 15:39:08 +0000541 rv = call_with_frame(getcode(NAME,#NAME,__LINE__), \
Fred Drake39689c52004-08-13 03:12:57 +0000542 self->handlers[NAME], args, self); \
Fred Drakebd6101c2001-02-14 18:29:45 +0000543 self->in_callback = 0; \
Fred Drake85d835f2001-02-08 15:39:08 +0000544 Py_DECREF(args); \
545 if (rv == NULL) { \
546 flag_error(self); \
547 return RETURN; \
548 } \
549 CONVERSION \
550 Py_DECREF(rv); \
551 } \
552 return RETURN; \
553}
554
Fred Drake6f987622000-08-25 18:03:30 +0000555#define VOID_HANDLER(NAME, PARAMS, PARAM_FORMAT) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000556 RC_HANDLER(void, NAME, PARAMS, ;, PARAM_FORMAT, ;, ;,\
557 (xmlparseobject *)userData)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000558
Fred Drake6f987622000-08-25 18:03:30 +0000559#define INT_HANDLER(NAME, PARAMS, PARAM_FORMAT)\
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000560 RC_HANDLER(int, NAME, PARAMS, int rc=0;, PARAM_FORMAT, \
561 rc = PyLong_AsLong(rv);, rc, \
562 (xmlparseobject *)userData)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000563
Fred Drake71b63ff2002-06-28 22:29:01 +0000564VOID_HANDLER(EndElement,
565 (void *userData, const XML_Char *name),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000566 ("(N)", string_intern(self, name)))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000567
Fred Drake6f987622000-08-25 18:03:30 +0000568VOID_HANDLER(ProcessingInstruction,
Fred Drake71b63ff2002-06-28 22:29:01 +0000569 (void *userData,
570 const XML_Char *target,
Fred Drake85d835f2001-02-08 15:39:08 +0000571 const XML_Char *data),
Guido van Rossum4ca94712007-07-23 17:42:32 +0000572 ("(NO&)", string_intern(self, target), conv_string_to_unicode ,data))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000573
Fred Drake6f987622000-08-25 18:03:30 +0000574VOID_HANDLER(UnparsedEntityDecl,
Fred Drake71b63ff2002-06-28 22:29:01 +0000575 (void *userData,
Fred Drake85d835f2001-02-08 15:39:08 +0000576 const XML_Char *entityName,
577 const XML_Char *base,
578 const XML_Char *systemId,
579 const XML_Char *publicId,
580 const XML_Char *notationName),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000581 ("(NNNNN)",
Fred Drake71b63ff2002-06-28 22:29:01 +0000582 string_intern(self, entityName), string_intern(self, base),
583 string_intern(self, systemId), string_intern(self, publicId),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000584 string_intern(self, notationName)))
Fred Drake85d835f2001-02-08 15:39:08 +0000585
Fred Drake85d835f2001-02-08 15:39:08 +0000586VOID_HANDLER(EntityDecl,
587 (void *userData,
588 const XML_Char *entityName,
589 int is_parameter_entity,
590 const XML_Char *value,
591 int value_length,
592 const XML_Char *base,
593 const XML_Char *systemId,
594 const XML_Char *publicId,
595 const XML_Char *notationName),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000596 ("NiNNNNN",
597 string_intern(self, entityName), is_parameter_entity,
Guido van Rossum4ca94712007-07-23 17:42:32 +0000598 (conv_string_len_to_unicode(value, value_length)),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000599 string_intern(self, base), string_intern(self, systemId),
600 string_intern(self, publicId),
601 string_intern(self, notationName)))
Fred Drake85d835f2001-02-08 15:39:08 +0000602
603VOID_HANDLER(XmlDecl,
604 (void *userData,
605 const XML_Char *version,
606 const XML_Char *encoding,
607 int standalone),
608 ("(O&O&i)",
Guido van Rossum4ca94712007-07-23 17:42:32 +0000609 conv_string_to_unicode ,version, conv_string_to_unicode ,encoding,
Fred Drake85d835f2001-02-08 15:39:08 +0000610 standalone))
611
612static PyObject *
613conv_content_model(XML_Content * const model,
Fred Drakeb91a36b2002-06-27 19:40:48 +0000614 PyObject *(*conv_string)(const XML_Char *))
Fred Drake85d835f2001-02-08 15:39:08 +0000615{
616 PyObject *result = NULL;
617 PyObject *children = PyTuple_New(model->numchildren);
618 int i;
619
620 if (children != NULL) {
Tim Peters9544fc52001-07-28 09:36:36 +0000621 assert(model->numchildren < INT_MAX);
622 for (i = 0; i < (int)model->numchildren; ++i) {
Fred Drake85d835f2001-02-08 15:39:08 +0000623 PyObject *child = conv_content_model(&model->children[i],
624 conv_string);
625 if (child == NULL) {
626 Py_XDECREF(children);
627 return NULL;
628 }
629 PyTuple_SET_ITEM(children, i, child);
630 }
631 result = Py_BuildValue("(iiO&N)",
632 model->type, model->quant,
633 conv_string,model->name, children);
634 }
635 return result;
636}
637
Fred Drake06dd8cf2003-02-02 03:54:17 +0000638static void
639my_ElementDeclHandler(void *userData,
640 const XML_Char *name,
641 XML_Content *model)
Fred Drake85d835f2001-02-08 15:39:08 +0000642{
Fred Drake06dd8cf2003-02-02 03:54:17 +0000643 xmlparseobject *self = (xmlparseobject *)userData;
644 PyObject *args = NULL;
Fred Drake85d835f2001-02-08 15:39:08 +0000645
Fred Drake06dd8cf2003-02-02 03:54:17 +0000646 if (have_handler(self, ElementDecl)) {
647 PyObject *rv = NULL;
648 PyObject *modelobj, *nameobj;
649
Victor Stinner9e09c262013-07-18 23:17:01 +0200650 if (PyErr_Occurred())
651 return;
652
Fred Drake06dd8cf2003-02-02 03:54:17 +0000653 if (flush_character_buffer(self) < 0)
654 goto finally;
Guido van Rossum4ca94712007-07-23 17:42:32 +0000655 modelobj = conv_content_model(model, (conv_string_to_unicode));
Fred Drake06dd8cf2003-02-02 03:54:17 +0000656 if (modelobj == NULL) {
657 flag_error(self);
658 goto finally;
659 }
660 nameobj = string_intern(self, name);
661 if (nameobj == NULL) {
662 Py_DECREF(modelobj);
663 flag_error(self);
664 goto finally;
665 }
Michael W. Hudson0bb84542004-08-03 11:31:31 +0000666 args = Py_BuildValue("NN", nameobj, modelobj);
Fred Drake06dd8cf2003-02-02 03:54:17 +0000667 if (args == NULL) {
668 Py_DECREF(modelobj);
669 flag_error(self);
670 goto finally;
671 }
672 self->in_callback = 1;
673 rv = call_with_frame(getcode(ElementDecl, "ElementDecl", __LINE__),
Fred Drake39689c52004-08-13 03:12:57 +0000674 self->handlers[ElementDecl], args, self);
Fred Drake06dd8cf2003-02-02 03:54:17 +0000675 self->in_callback = 0;
676 if (rv == NULL) {
677 flag_error(self);
678 goto finally;
679 }
680 Py_DECREF(rv);
681 }
682 finally:
683 Py_XDECREF(args);
684 XML_FreeContentModel(self->itself, model);
685 return;
686}
Fred Drake85d835f2001-02-08 15:39:08 +0000687
688VOID_HANDLER(AttlistDecl,
689 (void *userData,
690 const XML_Char *elname,
691 const XML_Char *attname,
692 const XML_Char *att_type,
693 const XML_Char *dflt,
694 int isrequired),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000695 ("(NNO&O&i)",
696 string_intern(self, elname), string_intern(self, attname),
Guido van Rossum4ca94712007-07-23 17:42:32 +0000697 conv_string_to_unicode ,att_type, conv_string_to_unicode ,dflt,
Fred Drake85d835f2001-02-08 15:39:08 +0000698 isrequired))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000699
Martin v. Löwisc847f402003-01-21 11:09:21 +0000700#if XML_COMBINED_VERSION >= 19504
Martin v. Löwis069dde22003-01-21 10:58:18 +0000701VOID_HANDLER(SkippedEntity,
702 (void *userData,
703 const XML_Char *entityName,
704 int is_parameter_entity),
705 ("Ni",
706 string_intern(self, entityName), is_parameter_entity))
Martin v. Löwisc847f402003-01-21 11:09:21 +0000707#endif
Martin v. Löwis069dde22003-01-21 10:58:18 +0000708
Fred Drake71b63ff2002-06-28 22:29:01 +0000709VOID_HANDLER(NotationDecl,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000710 (void *userData,
711 const XML_Char *notationName,
712 const XML_Char *base,
713 const XML_Char *systemId,
714 const XML_Char *publicId),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000715 ("(NNNN)",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000716 string_intern(self, notationName), string_intern(self, base),
717 string_intern(self, systemId), string_intern(self, publicId)))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000718
Fred Drake6f987622000-08-25 18:03:30 +0000719VOID_HANDLER(StartNamespaceDecl,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000720 (void *userData,
721 const XML_Char *prefix,
722 const XML_Char *uri),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000723 ("(NN)",
724 string_intern(self, prefix), string_intern(self, uri)))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000725
Fred Drake6f987622000-08-25 18:03:30 +0000726VOID_HANDLER(EndNamespaceDecl,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000727 (void *userData,
728 const XML_Char *prefix),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000729 ("(N)", string_intern(self, prefix)))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000730
Fred Drake6f987622000-08-25 18:03:30 +0000731VOID_HANDLER(Comment,
Fred Drakeb91a36b2002-06-27 19:40:48 +0000732 (void *userData, const XML_Char *data),
Guido van Rossum4ca94712007-07-23 17:42:32 +0000733 ("(O&)", conv_string_to_unicode ,data))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000734
Fred Drake6f987622000-08-25 18:03:30 +0000735VOID_HANDLER(StartCdataSection,
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000736 (void *userData),
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000737 ("()"))
Fred Drake71b63ff2002-06-28 22:29:01 +0000738
Fred Drake6f987622000-08-25 18:03:30 +0000739VOID_HANDLER(EndCdataSection,
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000740 (void *userData),
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000741 ("()"))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000742
Fred Drake6f987622000-08-25 18:03:30 +0000743VOID_HANDLER(Default,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000744 (void *userData, const XML_Char *s, int len),
745 ("(N)", (conv_string_len_to_unicode(s,len))))
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000746
Fred Drake6f987622000-08-25 18:03:30 +0000747VOID_HANDLER(DefaultHandlerExpand,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000748 (void *userData, const XML_Char *s, int len),
749 ("(N)", (conv_string_len_to_unicode(s,len))))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000750
Fred Drake71b63ff2002-06-28 22:29:01 +0000751INT_HANDLER(NotStandalone,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000752 (void *userData),
753 ("()"))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000754
Fred Drake6f987622000-08-25 18:03:30 +0000755RC_HANDLER(int, ExternalEntityRef,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000756 (XML_Parser parser,
757 const XML_Char *context,
758 const XML_Char *base,
759 const XML_Char *systemId,
760 const XML_Char *publicId),
761 int rc=0;,
Fred Drakeb91a36b2002-06-27 19:40:48 +0000762 ("(O&NNN)",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000763 conv_string_to_unicode ,context, string_intern(self, base),
764 string_intern(self, systemId), string_intern(self, publicId)),
765 rc = PyLong_AsLong(rv);, rc,
766 XML_GetUserData(parser))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000767
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000768/* XXX UnknownEncodingHandler */
769
Fred Drake85d835f2001-02-08 15:39:08 +0000770VOID_HANDLER(StartDoctypeDecl,
771 (void *userData, const XML_Char *doctypeName,
772 const XML_Char *sysid, const XML_Char *pubid,
773 int has_internal_subset),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000774 ("(NNNi)", string_intern(self, doctypeName),
775 string_intern(self, sysid), string_intern(self, pubid),
Fred Drake85d835f2001-02-08 15:39:08 +0000776 has_internal_subset))
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000777
778VOID_HANDLER(EndDoctypeDecl, (void *userData), ("()"))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000779
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000780/* ---------------------------------------------------------------- */
781
Fred Drake71b63ff2002-06-28 22:29:01 +0000782static PyObject *
783get_parse_result(xmlparseobject *self, int rv)
784{
785 if (PyErr_Occurred()) {
786 return NULL;
787 }
788 if (rv == 0) {
Martin v. Löwis069dde22003-01-21 10:58:18 +0000789 return set_error(self, XML_GetErrorCode(self->itself));
Fred Drake71b63ff2002-06-28 22:29:01 +0000790 }
Fred Drake2a3d7db2002-06-28 22:56:48 +0000791 if (flush_character_buffer(self) < 0) {
792 return NULL;
793 }
Christian Heimes217cfd12007-12-02 14:31:20 +0000794 return PyLong_FromLong(rv);
Fred Drake71b63ff2002-06-28 22:29:01 +0000795}
796
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000797PyDoc_STRVAR(xmlparse_Parse__doc__,
Thomas Wouters35317302000-07-22 16:34:15 +0000798"Parse(data[, isfinal])\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000799Parse XML data. `isfinal' should be true at end of input.");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000800
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200801#define MAX_CHUNK_SIZE (1 << 20)
802
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000803static PyObject *
Fred Drake0582df92000-07-12 04:49:00 +0000804xmlparse_Parse(xmlparseobject *self, PyObject *args)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000805{
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200806 PyObject *data;
Fred Drake0582df92000-07-12 04:49:00 +0000807 int isFinal = 0;
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200808 const char *s;
809 Py_ssize_t slen;
810 Py_buffer view;
811 int rc;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000812
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200813 if (!PyArg_ParseTuple(args, "O|i:Parse", &data, &isFinal))
Fred Drake0582df92000-07-12 04:49:00 +0000814 return NULL;
Fred Drake71b63ff2002-06-28 22:29:01 +0000815
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200816 if (PyUnicode_Check(data)) {
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200817 view.buf = NULL;
Serhiy Storchaka36b365c2013-02-04 18:28:01 +0200818 s = PyUnicode_AsUTF8AndSize(data, &slen);
819 if (s == NULL)
820 return NULL;
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200821 /* Explicitly set UTF-8 encoding. Return code ignored. */
822 (void)XML_SetEncoding(self->itself, "utf-8");
823 }
824 else {
825 if (PyObject_GetBuffer(data, &view, PyBUF_SIMPLE) < 0)
826 return NULL;
827 s = view.buf;
828 slen = view.len;
829 }
830
831 while (slen > MAX_CHUNK_SIZE) {
832 rc = XML_Parse(self->itself, s, MAX_CHUNK_SIZE, 0);
833 if (!rc)
834 goto done;
835 s += MAX_CHUNK_SIZE;
836 slen -= MAX_CHUNK_SIZE;
837 }
Christian Heimesba723202013-11-22 00:46:18 +0100838 assert(MAX_CHUNK_SIZE < INT_MAX && slen < INT_MAX);
839 rc = XML_Parse(self->itself, s, (int)slen, isFinal);
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200840
841done:
842 if (view.buf != NULL)
843 PyBuffer_Release(&view);
844 return get_parse_result(self, rc);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000845}
846
Fred Drakeca1f4262000-09-21 20:10:23 +0000847/* File reading copied from cPickle */
848
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000849#define BUF_SIZE 2048
850
Fred Drake0582df92000-07-12 04:49:00 +0000851static int
852readinst(char *buf, int buf_size, PyObject *meth)
853{
Victor Stinner95f1dfc2011-01-10 23:00:36 +0000854 PyObject *str;
855 Py_ssize_t len;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000856 char *ptr;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000857
Victor Stinner95f1dfc2011-01-10 23:00:36 +0000858 str = PyObject_CallFunction(meth, "n", buf_size);
Martin v. Löwis9171f022004-10-13 19:50:11 +0000859 if (str == NULL)
Victor Stinner95f1dfc2011-01-10 23:00:36 +0000860 goto error;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000861
Christian Heimes72b710a2008-05-26 13:28:38 +0000862 if (PyBytes_Check(str))
863 ptr = PyBytes_AS_STRING(str);
Christian Heimes9c4756e2008-05-26 13:22:05 +0000864 else if (PyByteArray_Check(str))
865 ptr = PyByteArray_AS_STRING(str);
Guido van Rossum98297ee2007-11-06 21:34:58 +0000866 else {
Fred Drake71b63ff2002-06-28 22:29:01 +0000867 PyErr_Format(PyExc_TypeError,
Guido van Rossum4ca94712007-07-23 17:42:32 +0000868 "read() did not return a bytes object (type=%.400s)",
Christian Heimes90aa7642007-12-19 02:45:37 +0000869 Py_TYPE(str)->tp_name);
Victor Stinner95f1dfc2011-01-10 23:00:36 +0000870 goto error;
Fred Drake0582df92000-07-12 04:49:00 +0000871 }
Christian Heimes90aa7642007-12-19 02:45:37 +0000872 len = Py_SIZE(str);
Fred Drake0582df92000-07-12 04:49:00 +0000873 if (len > buf_size) {
874 PyErr_Format(PyExc_ValueError,
875 "read() returned too much data: "
Victor Stinner9d6f9362011-01-04 22:00:04 +0000876 "%i bytes requested, %zd returned",
Fred Drake0582df92000-07-12 04:49:00 +0000877 buf_size, len);
Victor Stinner95f1dfc2011-01-10 23:00:36 +0000878 goto error;
Fred Drake0582df92000-07-12 04:49:00 +0000879 }
Guido van Rossum98297ee2007-11-06 21:34:58 +0000880 memcpy(buf, ptr, len);
Victor Stinner95f1dfc2011-01-10 23:00:36 +0000881 Py_DECREF(str);
882 /* len <= buf_size <= INT_MAX */
Victor Stinner0fcab4a2011-01-04 12:59:15 +0000883 return (int)len;
Victor Stinner95f1dfc2011-01-10 23:00:36 +0000884
885error:
886 Py_XDECREF(str);
887 return -1;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000888}
889
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000890PyDoc_STRVAR(xmlparse_ParseFile__doc__,
Thomas Wouters35317302000-07-22 16:34:15 +0000891"ParseFile(file)\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000892Parse XML data from file-like object.");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000893
894static PyObject *
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000895xmlparse_ParseFile(xmlparseobject *self, PyObject *f)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000896{
Fred Drake0582df92000-07-12 04:49:00 +0000897 int rv = 1;
Fred Drake0582df92000-07-12 04:49:00 +0000898 PyObject *readmethod = NULL;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +0200899 _Py_IDENTIFIER(read);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000900
Martin v. Löwis1ee1b6f2011-10-10 18:11:30 +0200901 readmethod = _PyObject_GetAttrId(f, &PyId_read);
Benjamin Peterson4e7f2852010-08-08 16:54:58 +0000902 if (readmethod == NULL) {
Benjamin Peterson4e7f2852010-08-08 16:54:58 +0000903 PyErr_SetString(PyExc_TypeError,
904 "argument must have 'read' attribute");
905 return NULL;
Fred Drake0582df92000-07-12 04:49:00 +0000906 }
907 for (;;) {
908 int bytes_read;
909 void *buf = XML_GetBuffer(self->itself, BUF_SIZE);
Fred Drake7b6caff2003-07-21 17:05:56 +0000910 if (buf == NULL) {
Fred Drakef239c6d2003-07-21 17:22:43 +0000911 Py_XDECREF(readmethod);
Fred Drake0582df92000-07-12 04:49:00 +0000912 return PyErr_NoMemory();
Fred Drake7b6caff2003-07-21 17:05:56 +0000913 }
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000914
Benjamin Peterson4e7f2852010-08-08 16:54:58 +0000915 bytes_read = readinst(buf, BUF_SIZE, readmethod);
916 if (bytes_read < 0) {
917 Py_DECREF(readmethod);
918 return NULL;
Fred Drake0582df92000-07-12 04:49:00 +0000919 }
920 rv = XML_ParseBuffer(self->itself, bytes_read, bytes_read == 0);
Fred Drake7b6caff2003-07-21 17:05:56 +0000921 if (PyErr_Occurred()) {
922 Py_XDECREF(readmethod);
Fred Drake0582df92000-07-12 04:49:00 +0000923 return NULL;
Fred Drake7b6caff2003-07-21 17:05:56 +0000924 }
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000925
Fred Drake0582df92000-07-12 04:49:00 +0000926 if (!rv || bytes_read == 0)
927 break;
928 }
Fred Drake7b6caff2003-07-21 17:05:56 +0000929 Py_XDECREF(readmethod);
Fred Drake71b63ff2002-06-28 22:29:01 +0000930 return get_parse_result(self, rv);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000931}
932
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000933PyDoc_STRVAR(xmlparse_SetBase__doc__,
Thomas Wouters35317302000-07-22 16:34:15 +0000934"SetBase(base_url)\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000935Set the base URL for the parser.");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000936
937static PyObject *
Fred Drake0582df92000-07-12 04:49:00 +0000938xmlparse_SetBase(xmlparseobject *self, PyObject *args)
939{
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000940 char *base;
941
Fred Drake0582df92000-07-12 04:49:00 +0000942 if (!PyArg_ParseTuple(args, "s:SetBase", &base))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000943 return NULL;
Fred Drake0582df92000-07-12 04:49:00 +0000944 if (!XML_SetBase(self->itself, base)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000945 return PyErr_NoMemory();
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000946 }
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000947 Py_INCREF(Py_None);
948 return Py_None;
949}
950
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000951PyDoc_STRVAR(xmlparse_GetBase__doc__,
Thomas Wouters35317302000-07-22 16:34:15 +0000952"GetBase() -> url\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000953Return base URL string for the parser.");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000954
955static PyObject *
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000956xmlparse_GetBase(xmlparseobject *self, PyObject *unused)
Fred Drake0582df92000-07-12 04:49:00 +0000957{
Fred Drake0582df92000-07-12 04:49:00 +0000958 return Py_BuildValue("z", XML_GetBase(self->itself));
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000959}
960
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000961PyDoc_STRVAR(xmlparse_GetInputContext__doc__,
Fred Drakebd6101c2001-02-14 18:29:45 +0000962"GetInputContext() -> string\n\
963Return the untranslated text of the input that caused the current event.\n\
964If the event was generated by a large amount of text (such as a start tag\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000965for an element with many attributes), not all of the text may be available.");
Fred Drakebd6101c2001-02-14 18:29:45 +0000966
967static PyObject *
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000968xmlparse_GetInputContext(xmlparseobject *self, PyObject *unused)
Fred Drakebd6101c2001-02-14 18:29:45 +0000969{
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000970 if (self->in_callback) {
971 int offset, size;
972 const char *buffer
973 = XML_GetInputContext(self->itself, &offset, &size);
Fred Drakebd6101c2001-02-14 18:29:45 +0000974
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000975 if (buffer != NULL)
Christian Heimes72b710a2008-05-26 13:28:38 +0000976 return PyBytes_FromStringAndSize(buffer + offset,
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000977 size - offset);
978 else
979 Py_RETURN_NONE;
Fred Drakebd6101c2001-02-14 18:29:45 +0000980 }
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000981 else
982 Py_RETURN_NONE;
Fred Drakebd6101c2001-02-14 18:29:45 +0000983}
Fred Drakebd6101c2001-02-14 18:29:45 +0000984
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000985PyDoc_STRVAR(xmlparse_ExternalEntityParserCreate__doc__,
Fred Drake2d4ac202001-01-03 15:36:25 +0000986"ExternalEntityParserCreate(context[, encoding])\n\
Tim Peters51dc9682000-09-24 22:12:45 +0000987Create a parser for parsing an external entity based on the\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000988information passed to the ExternalEntityRefHandler.");
Lars Gustäbel4a30a072000-09-24 20:50:52 +0000989
990static PyObject *
991xmlparse_ExternalEntityParserCreate(xmlparseobject *self, PyObject *args)
992{
993 char *context;
994 char *encoding = NULL;
995 xmlparseobject *new_parser;
996 int i;
997
Martin v. Löwisc57428d2001-09-19 09:55:09 +0000998 if (!PyArg_ParseTuple(args, "z|s:ExternalEntityParserCreate",
Fred Drakecde79132001-04-25 16:01:30 +0000999 &context, &encoding)) {
1000 return NULL;
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001001 }
1002
Martin v. Löwis894258c2001-09-23 10:20:10 +00001003 new_parser = PyObject_GC_New(xmlparseobject, &Xmlparsetype);
Fred Drake85d835f2001-02-08 15:39:08 +00001004 if (new_parser == NULL)
1005 return NULL;
Fred Drake2a3d7db2002-06-28 22:56:48 +00001006 new_parser->buffer_size = self->buffer_size;
1007 new_parser->buffer_used = 0;
Victor Stinnerb4ba9862010-09-10 22:25:19 +00001008 new_parser->buffer = NULL;
Fred Drake85d835f2001-02-08 15:39:08 +00001009 new_parser->ordered_attributes = self->ordered_attributes;
1010 new_parser->specified_attributes = self->specified_attributes;
Fred Drakebd6101c2001-02-14 18:29:45 +00001011 new_parser->in_callback = 0;
Martin v. Löwis069dde22003-01-21 10:58:18 +00001012 new_parser->ns_prefixes = self->ns_prefixes;
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001013 new_parser->itself = XML_ExternalEntityParserCreate(self->itself, context,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001014 encoding);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001015 new_parser->handlers = 0;
Fred Drakeb91a36b2002-06-27 19:40:48 +00001016 new_parser->intern = self->intern;
1017 Py_XINCREF(new_parser->intern);
Martin v. Löwis894258c2001-09-23 10:20:10 +00001018 PyObject_GC_Track(new_parser);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001019
Victor Stinnerb4ba9862010-09-10 22:25:19 +00001020 if (self->buffer != NULL) {
Victor Stinnerb6404912013-07-07 16:21:41 +02001021 new_parser->buffer = PyMem_Malloc(new_parser->buffer_size);
Victor Stinnerb4ba9862010-09-10 22:25:19 +00001022 if (new_parser->buffer == NULL) {
1023 Py_DECREF(new_parser);
1024 return PyErr_NoMemory();
1025 }
1026 }
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001027 if (!new_parser->itself) {
Fred Drake85d835f2001-02-08 15:39:08 +00001028 Py_DECREF(new_parser);
1029 return PyErr_NoMemory();
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001030 }
1031
1032 XML_SetUserData(new_parser->itself, (void *)new_parser);
1033
1034 /* allocate and clear handlers first */
Fred Drake2a3d7db2002-06-28 22:56:48 +00001035 for (i = 0; handler_info[i].name != NULL; i++)
Fred Drake85d835f2001-02-08 15:39:08 +00001036 /* do nothing */;
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001037
Victor Stinnerb6404912013-07-07 16:21:41 +02001038 new_parser->handlers = PyMem_Malloc(sizeof(PyObject *) * i);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001039 if (!new_parser->handlers) {
Fred Drake85d835f2001-02-08 15:39:08 +00001040 Py_DECREF(new_parser);
1041 return PyErr_NoMemory();
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001042 }
Martin v. Löwis5b68ce32001-10-21 08:53:52 +00001043 clear_handlers(new_parser, 1);
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001044
1045 /* then copy handlers from self */
1046 for (i = 0; handler_info[i].name != NULL; i++) {
Fred Drake71b63ff2002-06-28 22:29:01 +00001047 PyObject *handler = self->handlers[i];
1048 if (handler != NULL) {
1049 Py_INCREF(handler);
1050 new_parser->handlers[i] = handler;
1051 handler_info[i].setter(new_parser->itself,
Fred Drake85d835f2001-02-08 15:39:08 +00001052 handler_info[i].handler);
1053 }
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001054 }
Fred Drake71b63ff2002-06-28 22:29:01 +00001055 return (PyObject *)new_parser;
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001056}
1057
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001058PyDoc_STRVAR(xmlparse_SetParamEntityParsing__doc__,
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001059"SetParamEntityParsing(flag) -> success\n\
1060Controls parsing of parameter entities (including the external DTD\n\
1061subset). Possible flag values are XML_PARAM_ENTITY_PARSING_NEVER,\n\
1062XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE and\n\
1063XML_PARAM_ENTITY_PARSING_ALWAYS. Returns true if setting the flag\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001064was successful.");
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001065
1066static PyObject*
Fred Drakebd6101c2001-02-14 18:29:45 +00001067xmlparse_SetParamEntityParsing(xmlparseobject *p, PyObject* args)
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001068{
Fred Drake85d835f2001-02-08 15:39:08 +00001069 int flag;
1070 if (!PyArg_ParseTuple(args, "i", &flag))
1071 return NULL;
Fred Drakebd6101c2001-02-14 18:29:45 +00001072 flag = XML_SetParamEntityParsing(p->itself, flag);
Christian Heimes217cfd12007-12-02 14:31:20 +00001073 return PyLong_FromLong(flag);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001074}
1075
Martin v. Löwisc847f402003-01-21 11:09:21 +00001076
1077#if XML_COMBINED_VERSION >= 19505
Martin v. Löwis069dde22003-01-21 10:58:18 +00001078PyDoc_STRVAR(xmlparse_UseForeignDTD__doc__,
1079"UseForeignDTD([flag])\n\
1080Allows the application to provide an artificial external subset if one is\n\
1081not specified as part of the document instance. This readily allows the\n\
1082use of a 'default' document type controlled by the application, while still\n\
1083getting the advantage of providing document type information to the parser.\n\
1084'flag' defaults to True if not provided.");
1085
1086static PyObject *
1087xmlparse_UseForeignDTD(xmlparseobject *self, PyObject *args)
1088{
Antoine Pitrou6f430e42012-08-15 23:18:25 +02001089 int flag = 1;
Martin v. Löwis069dde22003-01-21 10:58:18 +00001090 enum XML_Error rc;
Georg Brandld37b9d72012-09-24 13:41:52 +02001091 if (!PyArg_ParseTuple(args, "|p:UseForeignDTD", &flag))
Martin v. Löwis069dde22003-01-21 10:58:18 +00001092 return NULL;
Antoine Pitrou6f430e42012-08-15 23:18:25 +02001093 rc = XML_UseForeignDTD(self->itself, flag ? XML_TRUE : XML_FALSE);
Martin v. Löwis069dde22003-01-21 10:58:18 +00001094 if (rc != XML_ERROR_NONE) {
1095 return set_error(self, rc);
1096 }
1097 Py_INCREF(Py_None);
1098 return Py_None;
1099}
Martin v. Löwisc847f402003-01-21 11:09:21 +00001100#endif
Martin v. Löwis069dde22003-01-21 10:58:18 +00001101
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001102static PyObject *xmlparse_dir(PyObject *self, PyObject* noargs);
1103
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001104static struct PyMethodDef xmlparse_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001105 {"Parse", (PyCFunction)xmlparse_Parse,
1106 METH_VARARGS, xmlparse_Parse__doc__},
Fred Drake0582df92000-07-12 04:49:00 +00001107 {"ParseFile", (PyCFunction)xmlparse_ParseFile,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001108 METH_O, xmlparse_ParseFile__doc__},
Fred Drake0582df92000-07-12 04:49:00 +00001109 {"SetBase", (PyCFunction)xmlparse_SetBase,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001110 METH_VARARGS, xmlparse_SetBase__doc__},
Fred Drake0582df92000-07-12 04:49:00 +00001111 {"GetBase", (PyCFunction)xmlparse_GetBase,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001112 METH_NOARGS, xmlparse_GetBase__doc__},
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001113 {"ExternalEntityParserCreate", (PyCFunction)xmlparse_ExternalEntityParserCreate,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001114 METH_VARARGS, xmlparse_ExternalEntityParserCreate__doc__},
Fred Drakebd6101c2001-02-14 18:29:45 +00001115 {"SetParamEntityParsing", (PyCFunction)xmlparse_SetParamEntityParsing,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001116 METH_VARARGS, xmlparse_SetParamEntityParsing__doc__},
Fred Drakebd6101c2001-02-14 18:29:45 +00001117 {"GetInputContext", (PyCFunction)xmlparse_GetInputContext,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001118 METH_NOARGS, xmlparse_GetInputContext__doc__},
Martin v. Löwisc847f402003-01-21 11:09:21 +00001119#if XML_COMBINED_VERSION >= 19505
Martin v. Löwis069dde22003-01-21 10:58:18 +00001120 {"UseForeignDTD", (PyCFunction)xmlparse_UseForeignDTD,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001121 METH_VARARGS, xmlparse_UseForeignDTD__doc__},
Martin v. Löwisc847f402003-01-21 11:09:21 +00001122#endif
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001123 {"__dir__", xmlparse_dir, METH_NOARGS},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001124 {NULL, NULL} /* sentinel */
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001125};
1126
1127/* ---------- */
1128
1129
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001130
Fred Drake71b63ff2002-06-28 22:29:01 +00001131/* pyexpat international encoding support.
1132 Make it as simple as possible.
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001133*/
1134
Fred Drake71b63ff2002-06-28 22:29:01 +00001135static int
1136PyUnknownEncodingHandler(void *encodingHandlerData,
1137 const XML_Char *name,
1138 XML_Encoding *info)
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001139{
Eli Bendersky6dc32b32013-05-25 05:25:48 -07001140 static unsigned char template_buffer[256] = {0};
1141 PyObject* u;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001142 int i;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001143 void *data;
Eli Bendersky6dc32b32013-05-25 05:25:48 -07001144 unsigned int kind;
Fred Drake71b63ff2002-06-28 22:29:01 +00001145
Victor Stinner9e09c262013-07-18 23:17:01 +02001146 if (PyErr_Occurred())
1147 return XML_STATUS_ERROR;
1148
Eli Bendersky6dc32b32013-05-25 05:25:48 -07001149 if (template_buffer[1] == 0) {
1150 for (i = 0; i < 256; i++)
1151 template_buffer[i] = i;
Tim Peters63cb99e2001-02-17 18:12:50 +00001152 }
Eli Bendersky6dc32b32013-05-25 05:25:48 -07001153
1154 u = PyUnicode_Decode((char*) template_buffer, 256, name, "replace");
Christian Heimesb5821552013-06-29 20:43:13 +02001155 if (u == NULL || PyUnicode_READY(u)) {
Christian Heimes72172422013-06-29 21:49:27 +02001156 Py_XDECREF(u);
Eli Bendersky6dc32b32013-05-25 05:25:48 -07001157 return XML_STATUS_ERROR;
Christian Heimesb5821552013-06-29 20:43:13 +02001158 }
Eli Bendersky6dc32b32013-05-25 05:25:48 -07001159
1160 if (PyUnicode_GET_LENGTH(u) != 256) {
1161 Py_DECREF(u);
1162 PyErr_SetString(PyExc_ValueError,
1163 "multi-byte encodings are not supported");
1164 return XML_STATUS_ERROR;
1165 }
1166
1167 kind = PyUnicode_KIND(u);
1168 data = PyUnicode_DATA(u);
1169 for (i = 0; i < 256; i++) {
1170 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
1171 if (ch != Py_UNICODE_REPLACEMENT_CHARACTER)
1172 info->map[i] = ch;
1173 else
1174 info->map[i] = -1;
1175 }
1176
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001177 info->data = NULL;
1178 info->convert = NULL;
1179 info->release = NULL;
Eli Bendersky6dc32b32013-05-25 05:25:48 -07001180 Py_DECREF(u);
1181
1182 return XML_STATUS_OK;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001183}
1184
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001185
1186static PyObject *
Fred Drakeb91a36b2002-06-27 19:40:48 +00001187newxmlparseobject(char *encoding, char *namespace_separator, PyObject *intern)
Fred Drake0582df92000-07-12 04:49:00 +00001188{
1189 int i;
1190 xmlparseobject *self;
Fred Drake71b63ff2002-06-28 22:29:01 +00001191
Martin v. Löwis894258c2001-09-23 10:20:10 +00001192 self = PyObject_GC_New(xmlparseobject, &Xmlparsetype);
Fred Drake0582df92000-07-12 04:49:00 +00001193 if (self == NULL)
1194 return NULL;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001195
Fred Drake2a3d7db2002-06-28 22:56:48 +00001196 self->buffer = NULL;
1197 self->buffer_size = CHARACTER_DATA_BUFFER_SIZE;
1198 self->buffer_used = 0;
Fred Drake85d835f2001-02-08 15:39:08 +00001199 self->ordered_attributes = 0;
1200 self->specified_attributes = 0;
Fred Drakebd6101c2001-02-14 18:29:45 +00001201 self->in_callback = 0;
Martin v. Löwis069dde22003-01-21 10:58:18 +00001202 self->ns_prefixes = 0;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001203 self->handlers = NULL;
Victor Stinner54b2d2e2013-07-15 17:15:57 +02001204 self->intern = intern;
1205 Py_XINCREF(self->intern);
1206 PyObject_GC_Track(self);
1207
Christian Heimesfa535f52013-07-07 17:35:11 +02001208 /* namespace_separator is either NULL or contains one char + \0 */
1209 self->itself = XML_ParserCreate_MM(encoding, &ExpatMemoryHandler,
1210 namespace_separator);
Victor Stinner54b2d2e2013-07-15 17:15:57 +02001211 if (self->itself == NULL) {
1212 PyErr_SetString(PyExc_RuntimeError,
1213 "XML_ParserCreate failed");
1214 Py_DECREF(self);
1215 return NULL;
1216 }
Gregory P. Smith25227712012-03-14 18:10:37 -07001217#if ((XML_MAJOR_VERSION >= 2) && (XML_MINOR_VERSION >= 1)) || defined(XML_HAS_SET_HASH_SALT)
1218 /* This feature was added upstream in libexpat 2.1.0. Our expat copy
1219 * has a backport of this feature where we also define XML_HAS_SET_HASH_SALT
1220 * to indicate that we can still use it. */
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07001221 XML_SetHashSalt(self->itself,
Christian Heimes985ecdc2013-11-20 11:46:18 +01001222 (unsigned long)_Py_HashSecret.expat.hashsalt);
Gregory P. Smith25227712012-03-14 18:10:37 -07001223#endif
Fred Drake0582df92000-07-12 04:49:00 +00001224 XML_SetUserData(self->itself, (void *)self);
Fred Drake7c75bf22002-07-01 14:02:31 +00001225 XML_SetUnknownEncodingHandler(self->itself,
1226 (XML_UnknownEncodingHandler) PyUnknownEncodingHandler, NULL);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001227
Fred Drake2a3d7db2002-06-28 22:56:48 +00001228 for (i = 0; handler_info[i].name != NULL; i++)
Fred Drake0582df92000-07-12 04:49:00 +00001229 /* do nothing */;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001230
Victor Stinnerb6404912013-07-07 16:21:41 +02001231 self->handlers = PyMem_Malloc(sizeof(PyObject *) * i);
Fred Drake7c75bf22002-07-01 14:02:31 +00001232 if (!self->handlers) {
Fred Drake71b63ff2002-06-28 22:29:01 +00001233 Py_DECREF(self);
1234 return PyErr_NoMemory();
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001235 }
Martin v. Löwis5b68ce32001-10-21 08:53:52 +00001236 clear_handlers(self, 1);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001237
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001238 return (PyObject*)self;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001239}
1240
1241
1242static void
Fred Drake0582df92000-07-12 04:49:00 +00001243xmlparse_dealloc(xmlparseobject *self)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001244{
Fred Drake0582df92000-07-12 04:49:00 +00001245 int i;
Martin v. Löwis894258c2001-09-23 10:20:10 +00001246 PyObject_GC_UnTrack(self);
Fred Drake85d835f2001-02-08 15:39:08 +00001247 if (self->itself != NULL)
Fred Drake0582df92000-07-12 04:49:00 +00001248 XML_ParserFree(self->itself);
1249 self->itself = NULL;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001250
Fred Drake85d835f2001-02-08 15:39:08 +00001251 if (self->handlers != NULL) {
Fred Drakecde79132001-04-25 16:01:30 +00001252 PyObject *temp;
Fred Drake85d835f2001-02-08 15:39:08 +00001253 for (i = 0; handler_info[i].name != NULL; i++) {
Fred Drakecde79132001-04-25 16:01:30 +00001254 temp = self->handlers[i];
1255 self->handlers[i] = NULL;
1256 Py_XDECREF(temp);
Fred Drake85d835f2001-02-08 15:39:08 +00001257 }
Victor Stinnerb6404912013-07-07 16:21:41 +02001258 PyMem_Free(self->handlers);
Fred Drake71b63ff2002-06-28 22:29:01 +00001259 self->handlers = NULL;
Fred Drake0582df92000-07-12 04:49:00 +00001260 }
Fred Drake2a3d7db2002-06-28 22:56:48 +00001261 if (self->buffer != NULL) {
Victor Stinnerb6404912013-07-07 16:21:41 +02001262 PyMem_Free(self->buffer);
Fred Drake2a3d7db2002-06-28 22:56:48 +00001263 self->buffer = NULL;
1264 }
Fred Drakeb91a36b2002-06-27 19:40:48 +00001265 Py_XDECREF(self->intern);
Martin v. Löwis894258c2001-09-23 10:20:10 +00001266 PyObject_GC_Del(self);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001267}
1268
Fred Drake0582df92000-07-12 04:49:00 +00001269static int
Alexander Belopolskye239d232010-12-08 23:31:48 +00001270handlername2int(PyObject *name)
Fred Drake0582df92000-07-12 04:49:00 +00001271{
1272 int i;
Fred Drake71b63ff2002-06-28 22:29:01 +00001273 for (i = 0; handler_info[i].name != NULL; i++) {
Alexander Belopolskye239d232010-12-08 23:31:48 +00001274 if (PyUnicode_CompareWithASCIIString(
1275 name, handler_info[i].name) == 0) {
Fred Drake0582df92000-07-12 04:49:00 +00001276 return i;
1277 }
1278 }
1279 return -1;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001280}
1281
1282static PyObject *
Fred Drake71b63ff2002-06-28 22:29:01 +00001283get_pybool(int istrue)
1284{
1285 PyObject *result = istrue ? Py_True : Py_False;
1286 Py_INCREF(result);
1287 return result;
1288}
1289
1290static PyObject *
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001291xmlparse_getattro(xmlparseobject *self, PyObject *nameobj)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001292{
Victor Stinner9e5bd6c2011-10-01 01:05:40 +02001293 Py_UCS4 first_char;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001294 int handlernum = -1;
1295
Alexander Belopolskye239d232010-12-08 23:31:48 +00001296 if (!PyUnicode_Check(nameobj))
1297 goto generic;
Victor Stinner9e5bd6c2011-10-01 01:05:40 +02001298 if (PyUnicode_READY(nameobj))
1299 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001300
Alexander Belopolskye239d232010-12-08 23:31:48 +00001301 handlernum = handlername2int(nameobj);
Fred Drake71b63ff2002-06-28 22:29:01 +00001302
1303 if (handlernum != -1) {
1304 PyObject *result = self->handlers[handlernum];
1305 if (result == NULL)
1306 result = Py_None;
1307 Py_INCREF(result);
1308 return result;
1309 }
Alexander Belopolskye239d232010-12-08 23:31:48 +00001310
Victor Stinner9e5bd6c2011-10-01 01:05:40 +02001311 first_char = PyUnicode_READ_CHAR(nameobj, 0);
1312 if (first_char == 'E') {
Alexander Belopolskye239d232010-12-08 23:31:48 +00001313 if (PyUnicode_CompareWithASCIIString(nameobj, "ErrorCode") == 0)
Christian Heimes217cfd12007-12-02 14:31:20 +00001314 return PyLong_FromLong((long)
Fred Drake71b63ff2002-06-28 22:29:01 +00001315 XML_GetErrorCode(self->itself));
Alexander Belopolskye239d232010-12-08 23:31:48 +00001316 if (PyUnicode_CompareWithASCIIString(nameobj, "ErrorLineNumber") == 0)
Christian Heimes217cfd12007-12-02 14:31:20 +00001317 return PyLong_FromLong((long)
Fred Drake71b63ff2002-06-28 22:29:01 +00001318 XML_GetErrorLineNumber(self->itself));
Alexander Belopolskye239d232010-12-08 23:31:48 +00001319 if (PyUnicode_CompareWithASCIIString(nameobj, "ErrorColumnNumber") == 0)
Christian Heimes217cfd12007-12-02 14:31:20 +00001320 return PyLong_FromLong((long)
Fred Drake71b63ff2002-06-28 22:29:01 +00001321 XML_GetErrorColumnNumber(self->itself));
Alexander Belopolskye239d232010-12-08 23:31:48 +00001322 if (PyUnicode_CompareWithASCIIString(nameobj, "ErrorByteIndex") == 0)
Christian Heimes217cfd12007-12-02 14:31:20 +00001323 return PyLong_FromLong((long)
Fred Drake71b63ff2002-06-28 22:29:01 +00001324 XML_GetErrorByteIndex(self->itself));
1325 }
Victor Stinner9e5bd6c2011-10-01 01:05:40 +02001326 if (first_char == 'C') {
Alexander Belopolskye239d232010-12-08 23:31:48 +00001327 if (PyUnicode_CompareWithASCIIString(nameobj, "CurrentLineNumber") == 0)
Christian Heimes217cfd12007-12-02 14:31:20 +00001328 return PyLong_FromLong((long)
Dave Cole3203efb2004-08-26 00:37:31 +00001329 XML_GetCurrentLineNumber(self->itself));
Alexander Belopolskye239d232010-12-08 23:31:48 +00001330 if (PyUnicode_CompareWithASCIIString(nameobj, "CurrentColumnNumber") == 0)
Christian Heimes217cfd12007-12-02 14:31:20 +00001331 return PyLong_FromLong((long)
Dave Cole3203efb2004-08-26 00:37:31 +00001332 XML_GetCurrentColumnNumber(self->itself));
Alexander Belopolskye239d232010-12-08 23:31:48 +00001333 if (PyUnicode_CompareWithASCIIString(nameobj, "CurrentByteIndex") == 0)
Christian Heimes217cfd12007-12-02 14:31:20 +00001334 return PyLong_FromLong((long)
Dave Cole3203efb2004-08-26 00:37:31 +00001335 XML_GetCurrentByteIndex(self->itself));
1336 }
Victor Stinner9e5bd6c2011-10-01 01:05:40 +02001337 if (first_char == 'b') {
Alexander Belopolskye239d232010-12-08 23:31:48 +00001338 if (PyUnicode_CompareWithASCIIString(nameobj, "buffer_size") == 0)
Christian Heimes217cfd12007-12-02 14:31:20 +00001339 return PyLong_FromLong((long) self->buffer_size);
Alexander Belopolskye239d232010-12-08 23:31:48 +00001340 if (PyUnicode_CompareWithASCIIString(nameobj, "buffer_text") == 0)
Fred Drake2a3d7db2002-06-28 22:56:48 +00001341 return get_pybool(self->buffer != NULL);
Alexander Belopolskye239d232010-12-08 23:31:48 +00001342 if (PyUnicode_CompareWithASCIIString(nameobj, "buffer_used") == 0)
Christian Heimes217cfd12007-12-02 14:31:20 +00001343 return PyLong_FromLong((long) self->buffer_used);
Fred Drake2a3d7db2002-06-28 22:56:48 +00001344 }
Alexander Belopolskye239d232010-12-08 23:31:48 +00001345 if (PyUnicode_CompareWithASCIIString(nameobj, "namespace_prefixes") == 0)
Martin v. Löwis069dde22003-01-21 10:58:18 +00001346 return get_pybool(self->ns_prefixes);
Alexander Belopolskye239d232010-12-08 23:31:48 +00001347 if (PyUnicode_CompareWithASCIIString(nameobj, "ordered_attributes") == 0)
Fred Drake71b63ff2002-06-28 22:29:01 +00001348 return get_pybool(self->ordered_attributes);
Alexander Belopolskye239d232010-12-08 23:31:48 +00001349 if (PyUnicode_CompareWithASCIIString(nameobj, "specified_attributes") == 0)
Fred Drake71b63ff2002-06-28 22:29:01 +00001350 return get_pybool((long) self->specified_attributes);
Alexander Belopolskye239d232010-12-08 23:31:48 +00001351 if (PyUnicode_CompareWithASCIIString(nameobj, "intern") == 0) {
Fred Drakeb91a36b2002-06-27 19:40:48 +00001352 if (self->intern == NULL) {
1353 Py_INCREF(Py_None);
1354 return Py_None;
1355 }
1356 else {
1357 Py_INCREF(self->intern);
1358 return self->intern;
1359 }
1360 }
Alexander Belopolskye239d232010-12-08 23:31:48 +00001361 generic:
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001362 return PyObject_GenericGetAttr((PyObject*)self, nameobj);
Neal Norwitz8dfc4a92007-08-11 06:39:53 +00001363}
1364
1365static PyObject *
1366xmlparse_dir(PyObject *self, PyObject* noargs)
1367{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001368#define APPEND(list, str) \
1369 do { \
1370 PyObject *o = PyUnicode_FromString(str); \
1371 if (o != NULL) \
1372 PyList_Append(list, o); \
1373 Py_XDECREF(o); \
Martin v. Löwis069dde22003-01-21 10:58:18 +00001374 } while (0)
Neal Norwitzfa56e2d2003-01-19 15:40:09 +00001375
Neal Norwitz8dfc4a92007-08-11 06:39:53 +00001376 int i;
1377 PyObject *rc = PyList_New(0);
1378 if (!rc)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001379 return NULL;
Neal Norwitz8dfc4a92007-08-11 06:39:53 +00001380 for (i = 0; handler_info[i].name != NULL; i++) {
1381 PyObject *o = get_handler_name(&handler_info[i]);
1382 if (o != NULL)
1383 PyList_Append(rc, o);
1384 Py_XDECREF(o);
1385 }
1386 APPEND(rc, "ErrorCode");
1387 APPEND(rc, "ErrorLineNumber");
1388 APPEND(rc, "ErrorColumnNumber");
1389 APPEND(rc, "ErrorByteIndex");
1390 APPEND(rc, "CurrentLineNumber");
1391 APPEND(rc, "CurrentColumnNumber");
1392 APPEND(rc, "CurrentByteIndex");
1393 APPEND(rc, "buffer_size");
1394 APPEND(rc, "buffer_text");
1395 APPEND(rc, "buffer_used");
1396 APPEND(rc, "namespace_prefixes");
1397 APPEND(rc, "ordered_attributes");
1398 APPEND(rc, "specified_attributes");
1399 APPEND(rc, "intern");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001400
Neal Norwitzfa56e2d2003-01-19 15:40:09 +00001401#undef APPEND
Neal Norwitz8dfc4a92007-08-11 06:39:53 +00001402
1403 if (PyErr_Occurred()) {
1404 Py_DECREF(rc);
1405 rc = NULL;
Fred Drake0582df92000-07-12 04:49:00 +00001406 }
Neal Norwitz8dfc4a92007-08-11 06:39:53 +00001407
1408 return rc;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001409}
1410
Fred Drake6f987622000-08-25 18:03:30 +00001411static int
Alexander Belopolskye239d232010-12-08 23:31:48 +00001412sethandler(xmlparseobject *self, PyObject *name, PyObject* v)
Fred Drake0582df92000-07-12 04:49:00 +00001413{
1414 int handlernum = handlername2int(name);
Fred Drake71b63ff2002-06-28 22:29:01 +00001415 if (handlernum >= 0) {
1416 xmlhandler c_handler = NULL;
1417 PyObject *temp = self->handlers[handlernum];
1418
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001419 if (v == Py_None) {
1420 /* If this is the character data handler, and a character
1421 data handler is already active, we need to be more
1422 careful. What we can safely do is replace the existing
1423 character data handler callback function with a no-op
1424 function that will refuse to call Python. The downside
1425 is that this doesn't completely remove the character
1426 data handler from the C layer if there's any callback
1427 active, so Expat does a little more work than it
1428 otherwise would, but that's really an odd case. A more
1429 elaborate system of handlers and state could remove the
1430 C handler more effectively. */
1431 if (handlernum == CharacterData && self->in_callback)
1432 c_handler = noop_character_data_handler;
Fred Drake71b63ff2002-06-28 22:29:01 +00001433 v = NULL;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001434 }
Fred Drake71b63ff2002-06-28 22:29:01 +00001435 else if (v != NULL) {
1436 Py_INCREF(v);
1437 c_handler = handler_info[handlernum].handler;
1438 }
Fred Drake0582df92000-07-12 04:49:00 +00001439 self->handlers[handlernum] = v;
Fred Drake71b63ff2002-06-28 22:29:01 +00001440 Py_XDECREF(temp);
1441 handler_info[handlernum].setter(self->itself, c_handler);
Fred Drake0582df92000-07-12 04:49:00 +00001442 return 1;
1443 }
1444 return 0;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001445}
1446
1447static int
Alexander Belopolskye239d232010-12-08 23:31:48 +00001448xmlparse_setattro(xmlparseobject *self, PyObject *name, PyObject *v)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001449{
Fred Drake6f987622000-08-25 18:03:30 +00001450 /* Set attribute 'name' to value 'v'. v==NULL means delete */
Fred Drake85d835f2001-02-08 15:39:08 +00001451 if (v == NULL) {
Fred Drake6f987622000-08-25 18:03:30 +00001452 PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
1453 return -1;
1454 }
Alexander Belopolskye239d232010-12-08 23:31:48 +00001455 assert(PyUnicode_Check(name));
1456 if (PyUnicode_CompareWithASCIIString(name, "buffer_text") == 0) {
Antoine Pitrou6f430e42012-08-15 23:18:25 +02001457 int b = PyObject_IsTrue(v);
1458 if (b < 0)
1459 return -1;
1460 if (b) {
Fred Drake2a3d7db2002-06-28 22:56:48 +00001461 if (self->buffer == NULL) {
Victor Stinnerb6404912013-07-07 16:21:41 +02001462 self->buffer = PyMem_Malloc(self->buffer_size);
Fred Drake2a3d7db2002-06-28 22:56:48 +00001463 if (self->buffer == NULL) {
1464 PyErr_NoMemory();
1465 return -1;
1466 }
1467 self->buffer_used = 0;
1468 }
1469 }
1470 else if (self->buffer != NULL) {
1471 if (flush_character_buffer(self) < 0)
1472 return -1;
Victor Stinnerb6404912013-07-07 16:21:41 +02001473 PyMem_Free(self->buffer);
Fred Drake2a3d7db2002-06-28 22:56:48 +00001474 self->buffer = NULL;
1475 }
1476 return 0;
1477 }
Alexander Belopolskye239d232010-12-08 23:31:48 +00001478 if (PyUnicode_CompareWithASCIIString(name, "namespace_prefixes") == 0) {
Antoine Pitrou6f430e42012-08-15 23:18:25 +02001479 int b = PyObject_IsTrue(v);
1480 if (b < 0)
1481 return -1;
1482 self->ns_prefixes = b;
Martin v. Löwis069dde22003-01-21 10:58:18 +00001483 XML_SetReturnNSTriplet(self->itself, self->ns_prefixes);
1484 return 0;
1485 }
Alexander Belopolskye239d232010-12-08 23:31:48 +00001486 if (PyUnicode_CompareWithASCIIString(name, "ordered_attributes") == 0) {
Antoine Pitrou6f430e42012-08-15 23:18:25 +02001487 int b = PyObject_IsTrue(v);
1488 if (b < 0)
1489 return -1;
1490 self->ordered_attributes = b;
Fred Drake85d835f2001-02-08 15:39:08 +00001491 return 0;
1492 }
Alexander Belopolskye239d232010-12-08 23:31:48 +00001493 if (PyUnicode_CompareWithASCIIString(name, "specified_attributes") == 0) {
Antoine Pitrou6f430e42012-08-15 23:18:25 +02001494 int b = PyObject_IsTrue(v);
1495 if (b < 0)
1496 return -1;
1497 self->specified_attributes = b;
Fred Drake6f987622000-08-25 18:03:30 +00001498 return 0;
1499 }
Christian Heimes2380ac72008-01-09 00:17:24 +00001500
Alexander Belopolskye239d232010-12-08 23:31:48 +00001501 if (PyUnicode_CompareWithASCIIString(name, "buffer_size") == 0) {
Christian Heimes2380ac72008-01-09 00:17:24 +00001502 long new_buffer_size;
1503 if (!PyLong_Check(v)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001504 PyErr_SetString(PyExc_TypeError, "buffer_size must be an integer");
1505 return -1;
Christian Heimes2380ac72008-01-09 00:17:24 +00001506 }
1507
1508 new_buffer_size=PyLong_AS_LONG(v);
1509 /* trivial case -- no change */
1510 if (new_buffer_size == self->buffer_size) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001511 return 0;
Christian Heimes2380ac72008-01-09 00:17:24 +00001512 }
1513
1514 if (new_buffer_size <= 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001515 PyErr_SetString(PyExc_ValueError, "buffer_size must be greater than zero");
1516 return -1;
Christian Heimes2380ac72008-01-09 00:17:24 +00001517 }
1518
1519 /* check maximum */
1520 if (new_buffer_size > INT_MAX) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001521 char errmsg[100];
1522 sprintf(errmsg, "buffer_size must not be greater than %i", INT_MAX);
1523 PyErr_SetString(PyExc_ValueError, errmsg);
1524 return -1;
Christian Heimes2380ac72008-01-09 00:17:24 +00001525 }
1526
1527 if (self->buffer != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001528 /* there is already a buffer */
1529 if (self->buffer_used != 0) {
Christian Heimes09994a92013-07-20 22:41:58 +02001530 if (flush_character_buffer(self) < 0) {
1531 return -1;
1532 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001533 }
1534 /* free existing buffer */
Victor Stinnerb6404912013-07-07 16:21:41 +02001535 PyMem_Free(self->buffer);
Christian Heimes2380ac72008-01-09 00:17:24 +00001536 }
Victor Stinnerb6404912013-07-07 16:21:41 +02001537 self->buffer = PyMem_Malloc(new_buffer_size);
Christian Heimes2380ac72008-01-09 00:17:24 +00001538 if (self->buffer == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001539 PyErr_NoMemory();
1540 return -1;
1541 }
Christian Heimes2380ac72008-01-09 00:17:24 +00001542 self->buffer_size = new_buffer_size;
1543 return 0;
1544 }
1545
Alexander Belopolskye239d232010-12-08 23:31:48 +00001546 if (PyUnicode_CompareWithASCIIString(name, "CharacterDataHandler") == 0) {
Fred Drake2a3d7db2002-06-28 22:56:48 +00001547 /* If we're changing the character data handler, flush all
1548 * cached data with the old handler. Not sure there's a
1549 * "right" thing to do, though, but this probably won't
1550 * happen.
1551 */
1552 if (flush_character_buffer(self) < 0)
1553 return -1;
1554 }
Fred Drake6f987622000-08-25 18:03:30 +00001555 if (sethandler(self, name, v)) {
1556 return 0;
1557 }
Alexander Belopolskye239d232010-12-08 23:31:48 +00001558 PyErr_SetObject(PyExc_AttributeError, name);
Fred Drake6f987622000-08-25 18:03:30 +00001559 return -1;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001560}
1561
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001562static int
1563xmlparse_traverse(xmlparseobject *op, visitproc visit, void *arg)
1564{
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001565 int i;
1566 for (i = 0; handler_info[i].name != NULL; i++)
1567 Py_VISIT(op->handlers[i]);
Fred Drakecde79132001-04-25 16:01:30 +00001568 return 0;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001569}
1570
1571static int
1572xmlparse_clear(xmlparseobject *op)
1573{
Martin v. Löwis5b68ce32001-10-21 08:53:52 +00001574 clear_handlers(op, 0);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001575 Py_CLEAR(op->intern);
Fred Drakecde79132001-04-25 16:01:30 +00001576 return 0;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001577}
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001578
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001579PyDoc_STRVAR(Xmlparsetype__doc__, "XML parser");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001580
1581static PyTypeObject Xmlparsetype = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001582 PyVarObject_HEAD_INIT(NULL, 0)
1583 "pyexpat.xmlparser", /*tp_name*/
Antoine Pitrou23683ef2011-01-04 00:00:31 +00001584 sizeof(xmlparseobject), /*tp_basicsize*/
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001585 0, /*tp_itemsize*/
1586 /* methods */
1587 (destructor)xmlparse_dealloc, /*tp_dealloc*/
1588 (printfunc)0, /*tp_print*/
1589 0, /*tp_getattr*/
Alexander Belopolskye239d232010-12-08 23:31:48 +00001590 0, /*tp_setattr*/
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001591 0, /*tp_reserved*/
1592 (reprfunc)0, /*tp_repr*/
1593 0, /*tp_as_number*/
1594 0, /*tp_as_sequence*/
1595 0, /*tp_as_mapping*/
1596 (hashfunc)0, /*tp_hash*/
1597 (ternaryfunc)0, /*tp_call*/
1598 (reprfunc)0, /*tp_str*/
1599 (getattrofunc)xmlparse_getattro, /* tp_getattro */
Alexander Belopolskye239d232010-12-08 23:31:48 +00001600 (setattrofunc)xmlparse_setattro, /* tp_setattro */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001601 0, /* tp_as_buffer */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001602 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /*tp_flags*/
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001603 Xmlparsetype__doc__, /* tp_doc - Documentation string */
1604 (traverseproc)xmlparse_traverse, /* tp_traverse */
1605 (inquiry)xmlparse_clear, /* tp_clear */
1606 0, /* tp_richcompare */
1607 0, /* tp_weaklistoffset */
1608 0, /* tp_iter */
1609 0, /* tp_iternext */
1610 xmlparse_methods, /* tp_methods */
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001611};
1612
1613/* End of code for xmlparser objects */
1614/* -------------------------------------------------------- */
1615
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001616PyDoc_STRVAR(pyexpat_ParserCreate__doc__,
Fred Drake0582df92000-07-12 04:49:00 +00001617"ParserCreate([encoding[, namespace_separator]]) -> parser\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001618Return a new XML parser object.");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001619
1620static PyObject *
Fred Drake0582df92000-07-12 04:49:00 +00001621pyexpat_ParserCreate(PyObject *notused, PyObject *args, PyObject *kw)
1622{
Fred Drakecde79132001-04-25 16:01:30 +00001623 char *encoding = NULL;
1624 char *namespace_separator = NULL;
Fred Drakeb91a36b2002-06-27 19:40:48 +00001625 PyObject *intern = NULL;
1626 PyObject *result;
1627 int intern_decref = 0;
Martin v. Löwis15e62742006-02-27 16:46:16 +00001628 static char *kwlist[] = {"encoding", "namespace_separator",
Jeremy Hyltonaf68c872005-12-10 18:50:16 +00001629 "intern", NULL};
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001630
Fred Drakeb91a36b2002-06-27 19:40:48 +00001631 if (!PyArg_ParseTupleAndKeywords(args, kw, "|zzO:ParserCreate", kwlist,
1632 &encoding, &namespace_separator, &intern))
Fred Drakecde79132001-04-25 16:01:30 +00001633 return NULL;
1634 if (namespace_separator != NULL
1635 && strlen(namespace_separator) > 1) {
1636 PyErr_SetString(PyExc_ValueError,
1637 "namespace_separator must be at most one"
1638 " character, omitted, or None");
1639 return NULL;
1640 }
Fred Drakeb91a36b2002-06-27 19:40:48 +00001641 /* Explicitly passing None means no interning is desired.
1642 Not passing anything means that a new dictionary is used. */
1643 if (intern == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001644 intern = NULL;
Fred Drakeb91a36b2002-06-27 19:40:48 +00001645 else if (intern == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001646 intern = PyDict_New();
1647 if (!intern)
1648 return NULL;
1649 intern_decref = 1;
Fred Drake71b63ff2002-06-28 22:29:01 +00001650 }
Fred Drakeb91a36b2002-06-27 19:40:48 +00001651 else if (!PyDict_Check(intern)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001652 PyErr_SetString(PyExc_TypeError, "intern must be a dictionary");
1653 return NULL;
Fred Drakeb91a36b2002-06-27 19:40:48 +00001654 }
1655
1656 result = newxmlparseobject(encoding, namespace_separator, intern);
1657 if (intern_decref) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001658 Py_DECREF(intern);
Fred Drakeb91a36b2002-06-27 19:40:48 +00001659 }
1660 return result;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001661}
1662
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001663PyDoc_STRVAR(pyexpat_ErrorString__doc__,
Fred Drake0582df92000-07-12 04:49:00 +00001664"ErrorString(errno) -> string\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001665Returns string error for given number.");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001666
1667static PyObject *
Fred Drake0582df92000-07-12 04:49:00 +00001668pyexpat_ErrorString(PyObject *self, PyObject *args)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001669{
Fred Drake0582df92000-07-12 04:49:00 +00001670 long code = 0;
1671
1672 if (!PyArg_ParseTuple(args, "l:ErrorString", &code))
1673 return NULL;
1674 return Py_BuildValue("z", XML_ErrorString((int)code));
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001675}
1676
1677/* List of methods defined in the module */
1678
1679static struct PyMethodDef pyexpat_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001680 {"ParserCreate", (PyCFunction)pyexpat_ParserCreate,
Fred Drake0582df92000-07-12 04:49:00 +00001681 METH_VARARGS|METH_KEYWORDS, pyexpat_ParserCreate__doc__},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001682 {"ErrorString", (PyCFunction)pyexpat_ErrorString,
1683 METH_VARARGS, pyexpat_ErrorString__doc__},
Fred Drake71b63ff2002-06-28 22:29:01 +00001684
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001685 {NULL, (PyCFunction)NULL, 0, NULL} /* sentinel */
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001686};
1687
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +00001688/* Module docstring */
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001689
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001690PyDoc_STRVAR(pyexpat_module_documentation,
1691"Python wrapper for Expat parser.");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001692
Fred Drakecde79132001-04-25 16:01:30 +00001693/* Initialization function for the module */
1694
1695#ifndef MODULE_NAME
1696#define MODULE_NAME "pyexpat"
1697#endif
1698
1699#ifndef MODULE_INITFUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00001700#define MODULE_INITFUNC PyInit_pyexpat
Fred Drakecde79132001-04-25 16:01:30 +00001701#endif
1702
Martin v. Löwis069dde22003-01-21 10:58:18 +00001703#ifndef PyMODINIT_FUNC
1704# ifdef MS_WINDOWS
1705# define PyMODINIT_FUNC __declspec(dllexport) void
1706# else
1707# define PyMODINIT_FUNC void
1708# endif
1709#endif
1710
Mark Hammond8235ea12002-07-19 06:55:41 +00001711PyMODINIT_FUNC MODULE_INITFUNC(void); /* avoid compiler warnings */
Fred Drakecde79132001-04-25 16:01:30 +00001712
Martin v. Löwis1a214512008-06-11 05:26:20 +00001713static struct PyModuleDef pyexpatmodule = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001714 PyModuleDef_HEAD_INIT,
1715 MODULE_NAME,
1716 pyexpat_module_documentation,
1717 -1,
1718 pyexpat_methods,
1719 NULL,
1720 NULL,
1721 NULL,
1722 NULL
Martin v. Löwis1a214512008-06-11 05:26:20 +00001723};
1724
Martin v. Löwis069dde22003-01-21 10:58:18 +00001725PyMODINIT_FUNC
1726MODULE_INITFUNC(void)
Fred Drake0582df92000-07-12 04:49:00 +00001727{
1728 PyObject *m, *d;
Neal Norwitz392c5be2007-08-25 17:20:32 +00001729 PyObject *errmod_name = PyUnicode_FromString(MODULE_NAME ".errors");
Fred Drake85d835f2001-02-08 15:39:08 +00001730 PyObject *errors_module;
1731 PyObject *modelmod_name;
1732 PyObject *model_module;
Fred Drake0582df92000-07-12 04:49:00 +00001733 PyObject *sys_modules;
Georg Brandlb4dac712010-10-15 14:46:48 +00001734 PyObject *tmpnum, *tmpstr;
1735 PyObject *codes_dict;
1736 PyObject *rev_codes_dict;
1737 int res;
Fredrik Lundhd7a42882005-12-13 20:43:04 +00001738 static struct PyExpat_CAPI capi;
Georg Brandlb4dac712010-10-15 14:46:48 +00001739 PyObject *capi_object;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001740
Fred Drake6f987622000-08-25 18:03:30 +00001741 if (errmod_name == NULL)
Martin v. Löwis1a214512008-06-11 05:26:20 +00001742 return NULL;
Neal Norwitz392c5be2007-08-25 17:20:32 +00001743 modelmod_name = PyUnicode_FromString(MODULE_NAME ".model");
Fred Drake85d835f2001-02-08 15:39:08 +00001744 if (modelmod_name == NULL)
Martin v. Löwis1a214512008-06-11 05:26:20 +00001745 return NULL;
Fred Drake6f987622000-08-25 18:03:30 +00001746
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001747 if (PyType_Ready(&Xmlparsetype) < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001748 return NULL;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001749
Fred Drake0582df92000-07-12 04:49:00 +00001750 /* Create the module and add the functions */
Martin v. Löwis1a214512008-06-11 05:26:20 +00001751 m = PyModule_Create(&pyexpatmodule);
Neal Norwitz1ac754f2006-01-19 06:09:39 +00001752 if (m == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001753 return NULL;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001754
Fred Drake0582df92000-07-12 04:49:00 +00001755 /* Add some symbolic constants to the module */
Fred Drakebd6101c2001-02-14 18:29:45 +00001756 if (ErrorObject == NULL) {
1757 ErrorObject = PyErr_NewException("xml.parsers.expat.ExpatError",
Fred Drake93adb692000-09-23 04:55:48 +00001758 NULL, NULL);
Fred Drakebd6101c2001-02-14 18:29:45 +00001759 if (ErrorObject == NULL)
Martin v. Löwis1a214512008-06-11 05:26:20 +00001760 return NULL;
Fred Drakebd6101c2001-02-14 18:29:45 +00001761 }
1762 Py_INCREF(ErrorObject);
Fred Drake93adb692000-09-23 04:55:48 +00001763 PyModule_AddObject(m, "error", ErrorObject);
Fred Drakebd6101c2001-02-14 18:29:45 +00001764 Py_INCREF(ErrorObject);
1765 PyModule_AddObject(m, "ExpatError", ErrorObject);
Fred Drake4ba298c2000-10-29 04:57:53 +00001766 Py_INCREF(&Xmlparsetype);
1767 PyModule_AddObject(m, "XMLParserType", (PyObject *) &Xmlparsetype);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001768
Fred Drake738293d2000-12-21 17:25:07 +00001769 PyModule_AddStringConstant(m, "EXPAT_VERSION",
1770 (char *) XML_ExpatVersion());
Fred Drake85d835f2001-02-08 15:39:08 +00001771 {
1772 XML_Expat_Version info = XML_ExpatVersionInfo();
1773 PyModule_AddObject(m, "version_info",
1774 Py_BuildValue("(iii)", info.major,
1775 info.minor, info.micro));
1776 }
Fred Drake0582df92000-07-12 04:49:00 +00001777 /* XXX When Expat supports some way of figuring out how it was
Fred Drake71b63ff2002-06-28 22:29:01 +00001778 compiled, this should check and set native_encoding
1779 appropriately.
Fred Drake0582df92000-07-12 04:49:00 +00001780 */
Fred Drake93adb692000-09-23 04:55:48 +00001781 PyModule_AddStringConstant(m, "native_encoding", "UTF-8");
Fred Drakec23b5232000-08-24 21:57:43 +00001782
Fred Drake85d835f2001-02-08 15:39:08 +00001783 sys_modules = PySys_GetObject("modules");
Fred Drake93adb692000-09-23 04:55:48 +00001784 d = PyModule_GetDict(m);
Fred Drake6f987622000-08-25 18:03:30 +00001785 errors_module = PyDict_GetItem(d, errmod_name);
1786 if (errors_module == NULL) {
Fred Drakecde79132001-04-25 16:01:30 +00001787 errors_module = PyModule_New(MODULE_NAME ".errors");
Fred Drake6f987622000-08-25 18:03:30 +00001788 if (errors_module != NULL) {
Fred Drake6f987622000-08-25 18:03:30 +00001789 PyDict_SetItem(sys_modules, errmod_name, errors_module);
Fred Drake93adb692000-09-23 04:55:48 +00001790 /* gives away the reference to errors_module */
1791 PyModule_AddObject(m, "errors", errors_module);
Fred Drakec23b5232000-08-24 21:57:43 +00001792 }
1793 }
Fred Drake6f987622000-08-25 18:03:30 +00001794 Py_DECREF(errmod_name);
Fred Drake85d835f2001-02-08 15:39:08 +00001795 model_module = PyDict_GetItem(d, modelmod_name);
1796 if (model_module == NULL) {
Fred Drakecde79132001-04-25 16:01:30 +00001797 model_module = PyModule_New(MODULE_NAME ".model");
Fred Drake85d835f2001-02-08 15:39:08 +00001798 if (model_module != NULL) {
1799 PyDict_SetItem(sys_modules, modelmod_name, model_module);
1800 /* gives away the reference to model_module */
1801 PyModule_AddObject(m, "model", model_module);
1802 }
1803 }
1804 Py_DECREF(modelmod_name);
1805 if (errors_module == NULL || model_module == NULL)
1806 /* Don't core dump later! */
Martin v. Löwis1a214512008-06-11 05:26:20 +00001807 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001808
Martin v. Löwisc847f402003-01-21 11:09:21 +00001809#if XML_COMBINED_VERSION > 19505
Martin v. Löwis069dde22003-01-21 10:58:18 +00001810 {
1811 const XML_Feature *features = XML_GetFeatureList();
1812 PyObject *list = PyList_New(0);
1813 if (list == NULL)
1814 /* just ignore it */
1815 PyErr_Clear();
1816 else {
1817 int i = 0;
1818 for (; features[i].feature != XML_FEATURE_END; ++i) {
1819 int ok;
1820 PyObject *item = Py_BuildValue("si", features[i].name,
1821 features[i].value);
1822 if (item == NULL) {
1823 Py_DECREF(list);
1824 list = NULL;
1825 break;
1826 }
1827 ok = PyList_Append(list, item);
1828 Py_DECREF(item);
1829 if (ok < 0) {
1830 PyErr_Clear();
1831 break;
1832 }
1833 }
1834 if (list != NULL)
1835 PyModule_AddObject(m, "features", list);
1836 }
1837 }
Martin v. Löwisc847f402003-01-21 11:09:21 +00001838#endif
Fred Drake6f987622000-08-25 18:03:30 +00001839
Georg Brandlb4dac712010-10-15 14:46:48 +00001840 codes_dict = PyDict_New();
1841 rev_codes_dict = PyDict_New();
1842 if (codes_dict == NULL || rev_codes_dict == NULL) {
1843 Py_XDECREF(codes_dict);
1844 Py_XDECREF(rev_codes_dict);
1845 return NULL;
1846 }
Victor Stinner0fcab4a2011-01-04 12:59:15 +00001847
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001848#define MYCONST(name) \
Georg Brandlb4dac712010-10-15 14:46:48 +00001849 if (PyModule_AddStringConstant(errors_module, #name, \
1850 (char *)XML_ErrorString(name)) < 0) \
1851 return NULL; \
1852 tmpnum = PyLong_FromLong(name); \
1853 if (tmpnum == NULL) return NULL; \
1854 res = PyDict_SetItemString(codes_dict, \
1855 XML_ErrorString(name), tmpnum); \
1856 if (res < 0) return NULL; \
1857 tmpstr = PyUnicode_FromString(XML_ErrorString(name)); \
1858 if (tmpstr == NULL) return NULL; \
1859 res = PyDict_SetItem(rev_codes_dict, tmpnum, tmpstr); \
1860 Py_DECREF(tmpstr); \
1861 Py_DECREF(tmpnum); \
1862 if (res < 0) return NULL; \
Fred Drake7bd9f412000-07-04 23:51:31 +00001863
Fred Drake0582df92000-07-12 04:49:00 +00001864 MYCONST(XML_ERROR_NO_MEMORY);
1865 MYCONST(XML_ERROR_SYNTAX);
1866 MYCONST(XML_ERROR_NO_ELEMENTS);
1867 MYCONST(XML_ERROR_INVALID_TOKEN);
1868 MYCONST(XML_ERROR_UNCLOSED_TOKEN);
1869 MYCONST(XML_ERROR_PARTIAL_CHAR);
1870 MYCONST(XML_ERROR_TAG_MISMATCH);
1871 MYCONST(XML_ERROR_DUPLICATE_ATTRIBUTE);
1872 MYCONST(XML_ERROR_JUNK_AFTER_DOC_ELEMENT);
1873 MYCONST(XML_ERROR_PARAM_ENTITY_REF);
1874 MYCONST(XML_ERROR_UNDEFINED_ENTITY);
1875 MYCONST(XML_ERROR_RECURSIVE_ENTITY_REF);
1876 MYCONST(XML_ERROR_ASYNC_ENTITY);
1877 MYCONST(XML_ERROR_BAD_CHAR_REF);
1878 MYCONST(XML_ERROR_BINARY_ENTITY_REF);
1879 MYCONST(XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF);
1880 MYCONST(XML_ERROR_MISPLACED_XML_PI);
1881 MYCONST(XML_ERROR_UNKNOWN_ENCODING);
1882 MYCONST(XML_ERROR_INCORRECT_ENCODING);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001883 MYCONST(XML_ERROR_UNCLOSED_CDATA_SECTION);
1884 MYCONST(XML_ERROR_EXTERNAL_ENTITY_HANDLING);
1885 MYCONST(XML_ERROR_NOT_STANDALONE);
Fred Drake283b6702004-08-04 22:28:16 +00001886 MYCONST(XML_ERROR_UNEXPECTED_STATE);
1887 MYCONST(XML_ERROR_ENTITY_DECLARED_IN_PE);
1888 MYCONST(XML_ERROR_FEATURE_REQUIRES_XML_DTD);
1889 MYCONST(XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING);
1890 /* Added in Expat 1.95.7. */
1891 MYCONST(XML_ERROR_UNBOUND_PREFIX);
1892 /* Added in Expat 1.95.8. */
1893 MYCONST(XML_ERROR_UNDECLARING_PREFIX);
1894 MYCONST(XML_ERROR_INCOMPLETE_PE);
1895 MYCONST(XML_ERROR_XML_DECL);
1896 MYCONST(XML_ERROR_TEXT_DECL);
1897 MYCONST(XML_ERROR_PUBLICID);
1898 MYCONST(XML_ERROR_SUSPENDED);
1899 MYCONST(XML_ERROR_NOT_SUSPENDED);
1900 MYCONST(XML_ERROR_ABORTED);
1901 MYCONST(XML_ERROR_FINISHED);
1902 MYCONST(XML_ERROR_SUSPEND_PE);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001903
Georg Brandlb4dac712010-10-15 14:46:48 +00001904 if (PyModule_AddStringConstant(errors_module, "__doc__",
1905 "Constants used to describe "
1906 "error conditions.") < 0)
1907 return NULL;
Fred Drake85d835f2001-02-08 15:39:08 +00001908
Georg Brandlb4dac712010-10-15 14:46:48 +00001909 if (PyModule_AddObject(errors_module, "codes", codes_dict) < 0)
1910 return NULL;
1911 if (PyModule_AddObject(errors_module, "messages", rev_codes_dict) < 0)
1912 return NULL;
Victor Stinner0fcab4a2011-01-04 12:59:15 +00001913
Fred Drake93adb692000-09-23 04:55:48 +00001914#undef MYCONST
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001915
Fred Drake85d835f2001-02-08 15:39:08 +00001916#define MYCONST(c) PyModule_AddIntConstant(m, #c, c)
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001917 MYCONST(XML_PARAM_ENTITY_PARSING_NEVER);
1918 MYCONST(XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE);
1919 MYCONST(XML_PARAM_ENTITY_PARSING_ALWAYS);
Fred Drake85d835f2001-02-08 15:39:08 +00001920#undef MYCONST
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001921
Fred Drake85d835f2001-02-08 15:39:08 +00001922#define MYCONST(c) PyModule_AddIntConstant(model_module, #c, c)
1923 PyModule_AddStringConstant(model_module, "__doc__",
1924 "Constants used to interpret content model information.");
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001925
Fred Drake85d835f2001-02-08 15:39:08 +00001926 MYCONST(XML_CTYPE_EMPTY);
1927 MYCONST(XML_CTYPE_ANY);
1928 MYCONST(XML_CTYPE_MIXED);
1929 MYCONST(XML_CTYPE_NAME);
1930 MYCONST(XML_CTYPE_CHOICE);
1931 MYCONST(XML_CTYPE_SEQ);
1932
1933 MYCONST(XML_CQUANT_NONE);
1934 MYCONST(XML_CQUANT_OPT);
1935 MYCONST(XML_CQUANT_REP);
1936 MYCONST(XML_CQUANT_PLUS);
1937#undef MYCONST
Fredrik Lundhc3345042005-12-13 19:49:55 +00001938
1939 /* initialize pyexpat dispatch table */
Fredrik Lundhd7a42882005-12-13 20:43:04 +00001940 capi.size = sizeof(capi);
Fredrik Lundhcc117db2005-12-13 21:55:36 +00001941 capi.magic = PyExpat_CAPI_MAGIC;
Fredrik Lundhd7a42882005-12-13 20:43:04 +00001942 capi.MAJOR_VERSION = XML_MAJOR_VERSION;
1943 capi.MINOR_VERSION = XML_MINOR_VERSION;
1944 capi.MICRO_VERSION = XML_MICRO_VERSION;
1945 capi.ErrorString = XML_ErrorString;
Fredrik Lundhcc117db2005-12-13 21:55:36 +00001946 capi.GetErrorCode = XML_GetErrorCode;
1947 capi.GetErrorColumnNumber = XML_GetErrorColumnNumber;
1948 capi.GetErrorLineNumber = XML_GetErrorLineNumber;
Fredrik Lundhd7a42882005-12-13 20:43:04 +00001949 capi.Parse = XML_Parse;
1950 capi.ParserCreate_MM = XML_ParserCreate_MM;
1951 capi.ParserFree = XML_ParserFree;
1952 capi.SetCharacterDataHandler = XML_SetCharacterDataHandler;
1953 capi.SetCommentHandler = XML_SetCommentHandler;
1954 capi.SetDefaultHandlerExpand = XML_SetDefaultHandlerExpand;
1955 capi.SetElementHandler = XML_SetElementHandler;
1956 capi.SetNamespaceDeclHandler = XML_SetNamespaceDeclHandler;
1957 capi.SetProcessingInstructionHandler = XML_SetProcessingInstructionHandler;
1958 capi.SetUnknownEncodingHandler = XML_SetUnknownEncodingHandler;
1959 capi.SetUserData = XML_SetUserData;
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03001960 capi.SetStartDoctypeDeclHandler = XML_SetStartDoctypeDeclHandler;
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03001961 capi.SetEncoding = XML_SetEncoding;
Eli Bendersky6dc32b32013-05-25 05:25:48 -07001962 capi.DefaultUnknownEncodingHandler = PyUnknownEncodingHandler;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001963
Benjamin Petersonb173f782009-05-05 22:31:58 +00001964 /* export using capsule */
1965 capi_object = PyCapsule_New(&capi, PyExpat_CAPSULE_NAME, NULL);
Fredrik Lundhd7a42882005-12-13 20:43:04 +00001966 if (capi_object)
1967 PyModule_AddObject(m, "expat_CAPI", capi_object);
Martin v. Löwis1a214512008-06-11 05:26:20 +00001968 return m;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001969}
1970
Fred Drake6f987622000-08-25 18:03:30 +00001971static void
Martin v. Löwis5b68ce32001-10-21 08:53:52 +00001972clear_handlers(xmlparseobject *self, int initial)
Fred Drake0582df92000-07-12 04:49:00 +00001973{
Fred Drakecde79132001-04-25 16:01:30 +00001974 int i = 0;
1975 PyObject *temp;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001976
Fred Drake71b63ff2002-06-28 22:29:01 +00001977 for (; handler_info[i].name != NULL; i++) {
Martin v. Löwis5b68ce32001-10-21 08:53:52 +00001978 if (initial)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001979 self->handlers[i] = NULL;
1980 else {
Fred Drakecde79132001-04-25 16:01:30 +00001981 temp = self->handlers[i];
1982 self->handlers[i] = NULL;
1983 Py_XDECREF(temp);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001984 handler_info[i].setter(self->itself, NULL);
Fred Drakecde79132001-04-25 16:01:30 +00001985 }
Fred Drakecde79132001-04-25 16:01:30 +00001986 }
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001987}
1988
Tim Peters0c322792002-07-17 16:49:03 +00001989static struct HandlerInfo handler_info[] = {
Fred Drake71b63ff2002-06-28 22:29:01 +00001990 {"StartElementHandler",
1991 (xmlhandlersetter)XML_SetStartElementHandler,
Fred Drake0582df92000-07-12 04:49:00 +00001992 (xmlhandler)my_StartElementHandler},
Fred Drake71b63ff2002-06-28 22:29:01 +00001993 {"EndElementHandler",
1994 (xmlhandlersetter)XML_SetEndElementHandler,
Fred Drake0582df92000-07-12 04:49:00 +00001995 (xmlhandler)my_EndElementHandler},
Fred Drake71b63ff2002-06-28 22:29:01 +00001996 {"ProcessingInstructionHandler",
Fred Drake0582df92000-07-12 04:49:00 +00001997 (xmlhandlersetter)XML_SetProcessingInstructionHandler,
1998 (xmlhandler)my_ProcessingInstructionHandler},
Fred Drake71b63ff2002-06-28 22:29:01 +00001999 {"CharacterDataHandler",
Fred Drake0582df92000-07-12 04:49:00 +00002000 (xmlhandlersetter)XML_SetCharacterDataHandler,
2001 (xmlhandler)my_CharacterDataHandler},
Fred Drake71b63ff2002-06-28 22:29:01 +00002002 {"UnparsedEntityDeclHandler",
Fred Drake0582df92000-07-12 04:49:00 +00002003 (xmlhandlersetter)XML_SetUnparsedEntityDeclHandler,
Fred Drake2a3d7db2002-06-28 22:56:48 +00002004 (xmlhandler)my_UnparsedEntityDeclHandler},
Fred Drake71b63ff2002-06-28 22:29:01 +00002005 {"NotationDeclHandler",
Fred Drake0582df92000-07-12 04:49:00 +00002006 (xmlhandlersetter)XML_SetNotationDeclHandler,
Fred Drake2a3d7db2002-06-28 22:56:48 +00002007 (xmlhandler)my_NotationDeclHandler},
Fred Drake71b63ff2002-06-28 22:29:01 +00002008 {"StartNamespaceDeclHandler",
2009 (xmlhandlersetter)XML_SetStartNamespaceDeclHandler,
Fred Drake2a3d7db2002-06-28 22:56:48 +00002010 (xmlhandler)my_StartNamespaceDeclHandler},
Fred Drake71b63ff2002-06-28 22:29:01 +00002011 {"EndNamespaceDeclHandler",
2012 (xmlhandlersetter)XML_SetEndNamespaceDeclHandler,
Fred Drake2a3d7db2002-06-28 22:56:48 +00002013 (xmlhandler)my_EndNamespaceDeclHandler},
Fred Drake0582df92000-07-12 04:49:00 +00002014 {"CommentHandler",
2015 (xmlhandlersetter)XML_SetCommentHandler,
2016 (xmlhandler)my_CommentHandler},
2017 {"StartCdataSectionHandler",
Fred Drake71b63ff2002-06-28 22:29:01 +00002018 (xmlhandlersetter)XML_SetStartCdataSectionHandler,
Fred Drake0582df92000-07-12 04:49:00 +00002019 (xmlhandler)my_StartCdataSectionHandler},
2020 {"EndCdataSectionHandler",
Fred Drake71b63ff2002-06-28 22:29:01 +00002021 (xmlhandlersetter)XML_SetEndCdataSectionHandler,
Fred Drake0582df92000-07-12 04:49:00 +00002022 (xmlhandler)my_EndCdataSectionHandler},
2023 {"DefaultHandler",
2024 (xmlhandlersetter)XML_SetDefaultHandler,
2025 (xmlhandler)my_DefaultHandler},
2026 {"DefaultHandlerExpand",
2027 (xmlhandlersetter)XML_SetDefaultHandlerExpand,
2028 (xmlhandler)my_DefaultHandlerExpandHandler},
2029 {"NotStandaloneHandler",
2030 (xmlhandlersetter)XML_SetNotStandaloneHandler,
2031 (xmlhandler)my_NotStandaloneHandler},
2032 {"ExternalEntityRefHandler",
2033 (xmlhandlersetter)XML_SetExternalEntityRefHandler,
Fred Drake2a3d7db2002-06-28 22:56:48 +00002034 (xmlhandler)my_ExternalEntityRefHandler},
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00002035 {"StartDoctypeDeclHandler",
Fred Drake71b63ff2002-06-28 22:29:01 +00002036 (xmlhandlersetter)XML_SetStartDoctypeDeclHandler,
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00002037 (xmlhandler)my_StartDoctypeDeclHandler},
2038 {"EndDoctypeDeclHandler",
Fred Drake71b63ff2002-06-28 22:29:01 +00002039 (xmlhandlersetter)XML_SetEndDoctypeDeclHandler,
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00002040 (xmlhandler)my_EndDoctypeDeclHandler},
Fred Drake85d835f2001-02-08 15:39:08 +00002041 {"EntityDeclHandler",
2042 (xmlhandlersetter)XML_SetEntityDeclHandler,
2043 (xmlhandler)my_EntityDeclHandler},
2044 {"XmlDeclHandler",
2045 (xmlhandlersetter)XML_SetXmlDeclHandler,
2046 (xmlhandler)my_XmlDeclHandler},
2047 {"ElementDeclHandler",
2048 (xmlhandlersetter)XML_SetElementDeclHandler,
2049 (xmlhandler)my_ElementDeclHandler},
2050 {"AttlistDeclHandler",
2051 (xmlhandlersetter)XML_SetAttlistDeclHandler,
2052 (xmlhandler)my_AttlistDeclHandler},
Martin v. Löwisc847f402003-01-21 11:09:21 +00002053#if XML_COMBINED_VERSION >= 19504
Martin v. Löwis069dde22003-01-21 10:58:18 +00002054 {"SkippedEntityHandler",
2055 (xmlhandlersetter)XML_SetSkippedEntityHandler,
2056 (xmlhandler)my_SkippedEntityHandler},
Martin v. Löwisc847f402003-01-21 11:09:21 +00002057#endif
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00002058
Fred Drake0582df92000-07-12 04:49:00 +00002059 {NULL, NULL, NULL} /* sentinel */
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00002060};