blob: 156dbf13ba30498fb3d9374f9ec1c2c5ec89db4c [file] [log] [blame]
Martin v. Löwis7090ed12001-09-19 10:37:50 +00001#include "Python.h"
Fred Drake4113b132001-03-24 19:58:26 +00002#include <ctype.h>
3
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00004#include "frameobject.h"
Fred Drakea77254a2000-09-29 19:23:29 +00005#include "expat.h"
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00006
Fredrik Lundhc3345042005-12-13 19:49:55 +00007#include "pyexpat.h"
8
Martin v. Löwisc847f402003-01-21 11:09:21 +00009#define XML_COMBINED_VERSION (10000*XML_MAJOR_VERSION+100*XML_MINOR_VERSION+XML_MICRO_VERSION)
10
Jeremy Hylton9263f572003-06-27 16:13:17 +000011#define FIX_TRACE
Martin v. Löwis339d0f72001-08-17 18:39:25 +000012
Christian Heimesfa535f52013-07-07 17:35:11 +020013static XML_Memory_Handling_Suite ExpatMemoryHandler = {
14 PyObject_Malloc, PyObject_Realloc, PyObject_Free};
15
Fred Drake0582df92000-07-12 04:49:00 +000016enum HandlerTypes {
17 StartElement,
18 EndElement,
19 ProcessingInstruction,
20 CharacterData,
21 UnparsedEntityDecl,
22 NotationDecl,
23 StartNamespaceDecl,
24 EndNamespaceDecl,
25 Comment,
26 StartCdataSection,
27 EndCdataSection,
28 Default,
29 DefaultHandlerExpand,
30 NotStandalone,
Martin v. Löwis0078f6c2001-01-21 10:18:10 +000031 ExternalEntityRef,
32 StartDoctypeDecl,
33 EndDoctypeDecl,
Fred Drake85d835f2001-02-08 15:39:08 +000034 EntityDecl,
35 XmlDecl,
36 ElementDecl,
37 AttlistDecl,
Martin v. Löwisc847f402003-01-21 11:09:21 +000038#if XML_COMBINED_VERSION >= 19504
Martin v. Löwis069dde22003-01-21 10:58:18 +000039 SkippedEntity,
Martin v. Löwisc847f402003-01-21 11:09:21 +000040#endif
Fred Drake85d835f2001-02-08 15:39:08 +000041 _DummyDecl
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000042};
43
44static PyObject *ErrorObject;
45
46/* ----------------------------------------------------- */
47
48/* Declarations for objects of type xmlparser */
49
50typedef struct {
Fred Drake0582df92000-07-12 04:49:00 +000051 PyObject_HEAD
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000052
Fred Drake0582df92000-07-12 04:49:00 +000053 XML_Parser itself;
Fred Drake85d835f2001-02-08 15:39:08 +000054 int ordered_attributes; /* Return attributes as a list. */
55 int specified_attributes; /* Report only specified attributes. */
Fred Drakebd6101c2001-02-14 18:29:45 +000056 int in_callback; /* Is a callback active? */
Martin v. Löwis069dde22003-01-21 10:58:18 +000057 int ns_prefixes; /* Namespace-triplets mode? */
Fred Drake2a3d7db2002-06-28 22:56:48 +000058 XML_Char *buffer; /* Buffer used when accumulating characters */
59 /* NULL if not enabled */
60 int buffer_size; /* Size of buffer, in XML_Char units */
61 int buffer_used; /* Buffer units in use */
Fred Drakeb91a36b2002-06-27 19:40:48 +000062 PyObject *intern; /* Dictionary to intern strings */
Fred Drake0582df92000-07-12 04:49:00 +000063 PyObject **handlers;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000064} xmlparseobject;
65
Fred Drake2a3d7db2002-06-28 22:56:48 +000066#define CHARACTER_DATA_BUFFER_SIZE 8192
67
Jeremy Hylton938ace62002-07-17 16:30:39 +000068static PyTypeObject Xmlparsetype;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000069
Fred Drake117ac852002-09-24 16:24:54 +000070typedef void (*xmlhandlersetter)(XML_Parser self, void *meth);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000071typedef void* xmlhandler;
72
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +000073struct HandlerInfo {
Fred Drake0582df92000-07-12 04:49:00 +000074 const char *name;
75 xmlhandlersetter setter;
76 xmlhandler handler;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +000077 PyCodeObject *tb_code;
Fred Drake71b63ff2002-06-28 22:29:01 +000078 PyObject *nameobj;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000079};
80
Jeremy Hylton938ace62002-07-17 16:30:39 +000081static struct HandlerInfo handler_info[64];
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000082
Fred Drakebd6101c2001-02-14 18:29:45 +000083/* Set an integer attribute on the error object; return true on success,
84 * false on an exception.
85 */
86static int
87set_error_attr(PyObject *err, char *name, int value)
88{
Christian Heimes217cfd12007-12-02 14:31:20 +000089 PyObject *v = PyLong_FromLong(value);
Fred Drake85d835f2001-02-08 15:39:08 +000090
Neal Norwitz2f5e9902006-03-08 06:36:45 +000091 if (v == NULL || PyObject_SetAttrString(err, name, v) == -1) {
92 Py_XDECREF(v);
Fred Drakebd6101c2001-02-14 18:29:45 +000093 return 0;
94 }
Michael W. Hudson0bb84542004-08-03 11:31:31 +000095 Py_DECREF(v);
Fred Drakebd6101c2001-02-14 18:29:45 +000096 return 1;
97}
98
99/* Build and set an Expat exception, including positioning
100 * information. Always returns NULL.
101 */
Fred Drake85d835f2001-02-08 15:39:08 +0000102static PyObject *
Martin v. Löwis069dde22003-01-21 10:58:18 +0000103set_error(xmlparseobject *self, enum XML_Error code)
Fred Drake85d835f2001-02-08 15:39:08 +0000104{
105 PyObject *err;
Victor Stinner499dfcf2011-03-21 13:26:24 +0100106 PyObject *buffer;
Fred Drake85d835f2001-02-08 15:39:08 +0000107 XML_Parser parser = self->itself;
Fred Drakebd6101c2001-02-14 18:29:45 +0000108 int lineno = XML_GetErrorLineNumber(parser);
109 int column = XML_GetErrorColumnNumber(parser);
Fred Drake85d835f2001-02-08 15:39:08 +0000110
Victor Stinner499dfcf2011-03-21 13:26:24 +0100111 buffer = PyUnicode_FromFormat("%s: line %i, column %i",
112 XML_ErrorString(code), lineno, column);
113 if (buffer == NULL)
114 return NULL;
115 err = PyObject_CallFunction(ErrorObject, "O", buffer);
116 Py_DECREF(buffer);
Fred Drakebd6101c2001-02-14 18:29:45 +0000117 if ( err != NULL
118 && set_error_attr(err, "code", code)
119 && set_error_attr(err, "offset", column)
120 && set_error_attr(err, "lineno", lineno)) {
121 PyErr_SetObject(ErrorObject, err);
Fred Drake85d835f2001-02-08 15:39:08 +0000122 }
Neal Norwitz2f5e9902006-03-08 06:36:45 +0000123 Py_XDECREF(err);
Fred Drake85d835f2001-02-08 15:39:08 +0000124 return NULL;
125}
126
Fred Drake71b63ff2002-06-28 22:29:01 +0000127static int
128have_handler(xmlparseobject *self, int type)
129{
130 PyObject *handler = self->handlers[type];
131 return handler != NULL;
132}
133
134static PyObject *
135get_handler_name(struct HandlerInfo *hinfo)
136{
137 PyObject *name = hinfo->nameobj;
138 if (name == NULL) {
Neal Norwitz392c5be2007-08-25 17:20:32 +0000139 name = PyUnicode_FromString(hinfo->name);
Fred Drake71b63ff2002-06-28 22:29:01 +0000140 hinfo->nameobj = name;
141 }
142 Py_XINCREF(name);
143 return name;
144}
145
Fred Drake85d835f2001-02-08 15:39:08 +0000146
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000147/* Convert a string of XML_Chars into a Unicode string.
148 Returns None if str is a null pointer. */
149
Fred Drake0582df92000-07-12 04:49:00 +0000150static PyObject *
Fred Drakeb91a36b2002-06-27 19:40:48 +0000151conv_string_to_unicode(const XML_Char *str)
Fred Drake0582df92000-07-12 04:49:00 +0000152{
Fred Drake71b63ff2002-06-28 22:29:01 +0000153 /* XXX currently this code assumes that XML_Char is 8-bit,
Fred Drake0582df92000-07-12 04:49:00 +0000154 and hence in UTF-8. */
155 /* UTF-8 from Expat, Unicode desired */
156 if (str == NULL) {
157 Py_INCREF(Py_None);
158 return Py_None;
159 }
Fred Drake71b63ff2002-06-28 22:29:01 +0000160 return PyUnicode_DecodeUTF8(str, strlen(str), "strict");
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000161}
162
Fred Drake0582df92000-07-12 04:49:00 +0000163static PyObject *
164conv_string_len_to_unicode(const XML_Char *str, int len)
165{
Fred Drake71b63ff2002-06-28 22:29:01 +0000166 /* XXX currently this code assumes that XML_Char is 8-bit,
Fred Drake0582df92000-07-12 04:49:00 +0000167 and hence in UTF-8. */
168 /* UTF-8 from Expat, Unicode desired */
169 if (str == NULL) {
170 Py_INCREF(Py_None);
171 return Py_None;
172 }
Fred Drake6f987622000-08-25 18:03:30 +0000173 return PyUnicode_DecodeUTF8((const char *)str, len, "strict");
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000174}
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000175
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000176/* Callback routines */
177
Martin v. Löwis5b68ce32001-10-21 08:53:52 +0000178static void clear_handlers(xmlparseobject *self, int initial);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000179
Martin v. Löwis069dde22003-01-21 10:58:18 +0000180/* This handler is used when an error has been detected, in the hope
181 that actual parsing can be terminated early. This will only help
182 if an external entity reference is encountered. */
183static int
184error_external_entity_ref_handler(XML_Parser parser,
185 const XML_Char *context,
186 const XML_Char *base,
187 const XML_Char *systemId,
188 const XML_Char *publicId)
189{
190 return 0;
191}
192
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000193/* Dummy character data handler used when an error (exception) has
194 been detected, and the actual parsing can be terminated early.
195 This is needed since character data handler can't be safely removed
196 from within the character data handler, but can be replaced. It is
197 used only from the character data handler trampoline, and must be
198 used right after `flag_error()` is called. */
199static void
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000200noop_character_data_handler(void *userData, const XML_Char *data, int len)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000201{
202 /* Do nothing. */
203}
204
Fred Drake6f987622000-08-25 18:03:30 +0000205static void
206flag_error(xmlparseobject *self)
207{
Martin v. Löwis5b68ce32001-10-21 08:53:52 +0000208 clear_handlers(self, 0);
Martin v. Löwis069dde22003-01-21 10:58:18 +0000209 XML_SetExternalEntityRefHandler(self->itself,
210 error_external_entity_ref_handler);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000211}
212
213static PyCodeObject*
214getcode(enum HandlerTypes slot, char* func_name, int lineno)
215{
Fred Drakebd6101c2001-02-14 18:29:45 +0000216 if (handler_info[slot].tb_code == NULL) {
Fred Drakebd6101c2001-02-14 18:29:45 +0000217 handler_info[slot].tb_code =
Alexandre Vassalotti7b82b402009-07-21 04:30:03 +0000218 PyCode_NewEmpty(__FILE__, func_name, lineno);
Fred Drakebd6101c2001-02-14 18:29:45 +0000219 }
220 return handler_info[slot].tb_code;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000221}
222
Jeremy Hylton9263f572003-06-27 16:13:17 +0000223#ifdef FIX_TRACE
Martin v. Löwis7d6e19d2002-08-04 08:24:49 +0000224static int
225trace_frame(PyThreadState *tstate, PyFrameObject *f, int code, PyObject *val)
226{
227 int result = 0;
228 if (!tstate->use_tracing || tstate->tracing)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000229 return 0;
Martin v. Löwis7d6e19d2002-08-04 08:24:49 +0000230 if (tstate->c_profilefunc != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000231 tstate->tracing++;
232 result = tstate->c_profilefunc(tstate->c_profileobj,
233 f, code , val);
234 tstate->use_tracing = ((tstate->c_tracefunc != NULL)
235 || (tstate->c_profilefunc != NULL));
236 tstate->tracing--;
237 if (result)
238 return result;
Martin v. Löwis7d6e19d2002-08-04 08:24:49 +0000239 }
240 if (tstate->c_tracefunc != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000241 tstate->tracing++;
242 result = tstate->c_tracefunc(tstate->c_traceobj,
243 f, code , val);
244 tstate->use_tracing = ((tstate->c_tracefunc != NULL)
245 || (tstate->c_profilefunc != NULL));
246 tstate->tracing--;
247 }
Martin v. Löwis7d6e19d2002-08-04 08:24:49 +0000248 return result;
249}
Jeremy Hylton9263f572003-06-27 16:13:17 +0000250
251static int
252trace_frame_exc(PyThreadState *tstate, PyFrameObject *f)
253{
254 PyObject *type, *value, *traceback, *arg;
255 int err;
256
257 if (tstate->c_tracefunc == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000258 return 0;
Jeremy Hylton9263f572003-06-27 16:13:17 +0000259
260 PyErr_Fetch(&type, &value, &traceback);
261 if (value == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000262 value = Py_None;
263 Py_INCREF(value);
Jeremy Hylton9263f572003-06-27 16:13:17 +0000264 }
Raymond Hettinger8ae46892003-10-12 19:09:37 +0000265 arg = PyTuple_Pack(3, type, value, traceback);
Jeremy Hylton9263f572003-06-27 16:13:17 +0000266 if (arg == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000267 PyErr_Restore(type, value, traceback);
268 return 0;
Jeremy Hylton9263f572003-06-27 16:13:17 +0000269 }
270 err = trace_frame(tstate, f, PyTrace_EXCEPTION, arg);
271 Py_DECREF(arg);
272 if (err == 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000273 PyErr_Restore(type, value, traceback);
Jeremy Hylton9263f572003-06-27 16:13:17 +0000274 else {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000275 Py_XDECREF(type);
276 Py_XDECREF(value);
277 Py_XDECREF(traceback);
Jeremy Hylton9263f572003-06-27 16:13:17 +0000278 }
279 return err;
280}
Martin v. Löwis069dde22003-01-21 10:58:18 +0000281#endif
Martin v. Löwis7d6e19d2002-08-04 08:24:49 +0000282
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000283static PyObject*
Fred Drake39689c52004-08-13 03:12:57 +0000284call_with_frame(PyCodeObject *c, PyObject* func, PyObject* args,
285 xmlparseobject *self)
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000286{
Fred Drakebd6101c2001-02-14 18:29:45 +0000287 PyThreadState *tstate = PyThreadState_GET();
288 PyFrameObject *f;
Christian Heimesa6404ad2013-07-20 22:54:25 +0200289 PyObject *res, *globals;
Fred Drakebd6101c2001-02-14 18:29:45 +0000290
291 if (c == NULL)
292 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000293
Christian Heimesa6404ad2013-07-20 22:54:25 +0200294 globals = PyEval_GetGlobals();
295 if (globals == NULL) {
296 return NULL;
297 }
298
299 f = PyFrame_New(tstate, c, globals, NULL);
Fred Drakebd6101c2001-02-14 18:29:45 +0000300 if (f == NULL)
301 return NULL;
302 tstate->frame = f;
Jeremy Hylton9263f572003-06-27 16:13:17 +0000303#ifdef FIX_TRACE
304 if (trace_frame(tstate, f, PyTrace_CALL, Py_None) < 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000305 return NULL;
Martin v. Löwis7d6e19d2002-08-04 08:24:49 +0000306 }
Martin v. Löwis069dde22003-01-21 10:58:18 +0000307#endif
Fred Drakebd6101c2001-02-14 18:29:45 +0000308 res = PyEval_CallObject(func, args);
Jeremy Hylton9263f572003-06-27 16:13:17 +0000309 if (res == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000310 if (tstate->curexc_traceback == NULL)
311 PyTraceBack_Here(f);
Fred Drake39689c52004-08-13 03:12:57 +0000312 XML_StopParser(self->itself, XML_FALSE);
Jeremy Hylton9263f572003-06-27 16:13:17 +0000313#ifdef FIX_TRACE
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000314 if (trace_frame_exc(tstate, f) < 0) {
315 return NULL;
316 }
Jeremy Hylton9263f572003-06-27 16:13:17 +0000317 }
Martin v. Löwis7d6e19d2002-08-04 08:24:49 +0000318 else {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000319 if (trace_frame(tstate, f, PyTrace_RETURN, res) < 0) {
320 Py_XDECREF(res);
321 res = NULL;
322 }
Martin v. Löwis7d6e19d2002-08-04 08:24:49 +0000323 }
Jeremy Hylton9263f572003-06-27 16:13:17 +0000324#else
325 }
Martin v. Löwis069dde22003-01-21 10:58:18 +0000326#endif
Fred Drakebd6101c2001-02-14 18:29:45 +0000327 tstate->frame = f->f_back;
328 Py_DECREF(f);
329 return res;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000330}
331
Fred Drakeb91a36b2002-06-27 19:40:48 +0000332static PyObject*
333string_intern(xmlparseobject *self, const char* str)
334{
Guido van Rossum4ca94712007-07-23 17:42:32 +0000335 PyObject *result = conv_string_to_unicode(str);
Fred Drakeb91a36b2002-06-27 19:40:48 +0000336 PyObject *value;
Neal Norwitz484d9a42005-09-30 04:46:49 +0000337 /* result can be NULL if the unicode conversion failed. */
338 if (!result)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000339 return result;
Fred Drakeb91a36b2002-06-27 19:40:48 +0000340 if (!self->intern)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000341 return result;
Fred Drakeb91a36b2002-06-27 19:40:48 +0000342 value = PyDict_GetItem(self->intern, result);
343 if (!value) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000344 if (PyDict_SetItem(self->intern, result, result) == 0)
Fred Drakeb91a36b2002-06-27 19:40:48 +0000345 return result;
346 else
347 return NULL;
348 }
349 Py_INCREF(value);
350 Py_DECREF(result);
351 return value;
352}
353
Fred Drake2a3d7db2002-06-28 22:56:48 +0000354/* Return 0 on success, -1 on exception.
355 * flag_error() will be called before return if needed.
356 */
357static int
358call_character_handler(xmlparseobject *self, const XML_Char *buffer, int len)
359{
360 PyObject *args;
361 PyObject *temp;
362
Georg Brandlc01537f2010-10-15 16:26:08 +0000363 if (!have_handler(self, CharacterData))
364 return -1;
365
Fred Drake2a3d7db2002-06-28 22:56:48 +0000366 args = PyTuple_New(1);
367 if (args == NULL)
368 return -1;
Guido van Rossum4ca94712007-07-23 17:42:32 +0000369 temp = (conv_string_len_to_unicode(buffer, len));
Fred Drake2a3d7db2002-06-28 22:56:48 +0000370 if (temp == NULL) {
371 Py_DECREF(args);
372 flag_error(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000373 XML_SetCharacterDataHandler(self->itself,
374 noop_character_data_handler);
Fred Drake2a3d7db2002-06-28 22:56:48 +0000375 return -1;
376 }
377 PyTuple_SET_ITEM(args, 0, temp);
378 /* temp is now a borrowed reference; consider it unused. */
379 self->in_callback = 1;
380 temp = call_with_frame(getcode(CharacterData, "CharacterData", __LINE__),
Fred Drake39689c52004-08-13 03:12:57 +0000381 self->handlers[CharacterData], args, self);
Fred Drake2a3d7db2002-06-28 22:56:48 +0000382 /* temp is an owned reference again, or NULL */
383 self->in_callback = 0;
384 Py_DECREF(args);
385 if (temp == NULL) {
386 flag_error(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000387 XML_SetCharacterDataHandler(self->itself,
388 noop_character_data_handler);
Fred Drake2a3d7db2002-06-28 22:56:48 +0000389 return -1;
390 }
391 Py_DECREF(temp);
392 return 0;
393}
394
395static int
396flush_character_buffer(xmlparseobject *self)
397{
398 int rc;
399 if (self->buffer == NULL || self->buffer_used == 0)
400 return 0;
401 rc = call_character_handler(self, self->buffer, self->buffer_used);
402 self->buffer_used = 0;
403 return rc;
404}
405
406static void
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000407my_CharacterDataHandler(void *userData, const XML_Char *data, int len)
Fred Drake2a3d7db2002-06-28 22:56:48 +0000408{
409 xmlparseobject *self = (xmlparseobject *) userData;
Victor Stinner9e09c262013-07-18 23:17:01 +0200410
411 if (PyErr_Occurred())
412 return;
413
Fred Drake2a3d7db2002-06-28 22:56:48 +0000414 if (self->buffer == NULL)
415 call_character_handler(self, data, len);
416 else {
417 if ((self->buffer_used + len) > self->buffer_size) {
418 if (flush_character_buffer(self) < 0)
419 return;
420 /* handler might have changed; drop the rest on the floor
421 * if there isn't a handler anymore
422 */
423 if (!have_handler(self, CharacterData))
424 return;
425 }
426 if (len > self->buffer_size) {
427 call_character_handler(self, data, len);
428 self->buffer_used = 0;
429 }
430 else {
431 memcpy(self->buffer + self->buffer_used,
432 data, len * sizeof(XML_Char));
433 self->buffer_used += len;
434 }
435 }
436}
437
Fred Drake85d835f2001-02-08 15:39:08 +0000438static void
439my_StartElementHandler(void *userData,
Fred Drake71b63ff2002-06-28 22:29:01 +0000440 const XML_Char *name, const XML_Char *atts[])
Fred Drake85d835f2001-02-08 15:39:08 +0000441{
442 xmlparseobject *self = (xmlparseobject *)userData;
443
Fred Drake71b63ff2002-06-28 22:29:01 +0000444 if (have_handler(self, StartElement)) {
Fred Drake85d835f2001-02-08 15:39:08 +0000445 PyObject *container, *rv, *args;
446 int i, max;
447
Victor Stinner9e09c262013-07-18 23:17:01 +0200448 if (PyErr_Occurred())
449 return;
450
Fred Drake2a3d7db2002-06-28 22:56:48 +0000451 if (flush_character_buffer(self) < 0)
452 return;
Fred Drake85d835f2001-02-08 15:39:08 +0000453 /* Set max to the number of slots filled in atts[]; max/2 is
454 * the number of attributes we need to process.
455 */
456 if (self->specified_attributes) {
457 max = XML_GetSpecifiedAttributeCount(self->itself);
458 }
459 else {
460 max = 0;
461 while (atts[max] != NULL)
462 max += 2;
463 }
464 /* Build the container. */
465 if (self->ordered_attributes)
466 container = PyList_New(max);
467 else
468 container = PyDict_New();
469 if (container == NULL) {
470 flag_error(self);
471 return;
472 }
473 for (i = 0; i < max; i += 2) {
Fred Drakeb91a36b2002-06-27 19:40:48 +0000474 PyObject *n = string_intern(self, (XML_Char *) atts[i]);
Fred Drake85d835f2001-02-08 15:39:08 +0000475 PyObject *v;
476 if (n == NULL) {
477 flag_error(self);
478 Py_DECREF(container);
479 return;
480 }
Guido van Rossum4ca94712007-07-23 17:42:32 +0000481 v = conv_string_to_unicode((XML_Char *) atts[i+1]);
Fred Drake85d835f2001-02-08 15:39:08 +0000482 if (v == NULL) {
483 flag_error(self);
484 Py_DECREF(container);
485 Py_DECREF(n);
486 return;
487 }
488 if (self->ordered_attributes) {
489 PyList_SET_ITEM(container, i, n);
490 PyList_SET_ITEM(container, i+1, v);
491 }
492 else if (PyDict_SetItem(container, n, v)) {
493 flag_error(self);
494 Py_DECREF(n);
495 Py_DECREF(v);
496 return;
497 }
498 else {
499 Py_DECREF(n);
500 Py_DECREF(v);
501 }
502 }
Neal Norwitz484d9a42005-09-30 04:46:49 +0000503 args = string_intern(self, name);
504 if (args != NULL)
505 args = Py_BuildValue("(NN)", args, container);
Fred Drake85d835f2001-02-08 15:39:08 +0000506 if (args == NULL) {
507 Py_DECREF(container);
508 return;
509 }
510 /* Container is now a borrowed reference; ignore it. */
Fred Drakebd6101c2001-02-14 18:29:45 +0000511 self->in_callback = 1;
512 rv = call_with_frame(getcode(StartElement, "StartElement", __LINE__),
Fred Drake39689c52004-08-13 03:12:57 +0000513 self->handlers[StartElement], args, self);
Fred Drakebd6101c2001-02-14 18:29:45 +0000514 self->in_callback = 0;
515 Py_DECREF(args);
Fred Drake85d835f2001-02-08 15:39:08 +0000516 if (rv == NULL) {
517 flag_error(self);
518 return;
Fred Drakebd6101c2001-02-14 18:29:45 +0000519 }
Fred Drake85d835f2001-02-08 15:39:08 +0000520 Py_DECREF(rv);
521 }
522}
523
524#define RC_HANDLER(RC, NAME, PARAMS, INIT, PARAM_FORMAT, CONVERSION, \
525 RETURN, GETUSERDATA) \
526static RC \
527my_##NAME##Handler PARAMS {\
528 xmlparseobject *self = GETUSERDATA ; \
529 PyObject *args = NULL; \
530 PyObject *rv = NULL; \
531 INIT \
532\
Fred Drake71b63ff2002-06-28 22:29:01 +0000533 if (have_handler(self, NAME)) { \
Victor Stinner9e09c262013-07-18 23:17:01 +0200534 if (PyErr_Occurred()) \
535 return RETURN; \
Fred Drake2a3d7db2002-06-28 22:56:48 +0000536 if (flush_character_buffer(self) < 0) \
537 return RETURN; \
Fred Drake85d835f2001-02-08 15:39:08 +0000538 args = Py_BuildValue PARAM_FORMAT ;\
Martin v. Löwis1d7c55f2001-11-10 13:57:55 +0000539 if (!args) { flag_error(self); return RETURN;} \
Fred Drakebd6101c2001-02-14 18:29:45 +0000540 self->in_callback = 1; \
Fred Drake85d835f2001-02-08 15:39:08 +0000541 rv = call_with_frame(getcode(NAME,#NAME,__LINE__), \
Fred Drake39689c52004-08-13 03:12:57 +0000542 self->handlers[NAME], args, self); \
Fred Drakebd6101c2001-02-14 18:29:45 +0000543 self->in_callback = 0; \
Fred Drake85d835f2001-02-08 15:39:08 +0000544 Py_DECREF(args); \
545 if (rv == NULL) { \
546 flag_error(self); \
547 return RETURN; \
548 } \
549 CONVERSION \
550 Py_DECREF(rv); \
551 } \
552 return RETURN; \
553}
554
Fred Drake6f987622000-08-25 18:03:30 +0000555#define VOID_HANDLER(NAME, PARAMS, PARAM_FORMAT) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000556 RC_HANDLER(void, NAME, PARAMS, ;, PARAM_FORMAT, ;, ;,\
557 (xmlparseobject *)userData)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000558
Fred Drake6f987622000-08-25 18:03:30 +0000559#define INT_HANDLER(NAME, PARAMS, PARAM_FORMAT)\
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000560 RC_HANDLER(int, NAME, PARAMS, int rc=0;, PARAM_FORMAT, \
561 rc = PyLong_AsLong(rv);, rc, \
562 (xmlparseobject *)userData)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000563
Fred Drake71b63ff2002-06-28 22:29:01 +0000564VOID_HANDLER(EndElement,
565 (void *userData, const XML_Char *name),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000566 ("(N)", string_intern(self, name)))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000567
Fred Drake6f987622000-08-25 18:03:30 +0000568VOID_HANDLER(ProcessingInstruction,
Fred Drake71b63ff2002-06-28 22:29:01 +0000569 (void *userData,
570 const XML_Char *target,
Fred Drake85d835f2001-02-08 15:39:08 +0000571 const XML_Char *data),
Guido van Rossum4ca94712007-07-23 17:42:32 +0000572 ("(NO&)", string_intern(self, target), conv_string_to_unicode ,data))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000573
Fred Drake6f987622000-08-25 18:03:30 +0000574VOID_HANDLER(UnparsedEntityDecl,
Fred Drake71b63ff2002-06-28 22:29:01 +0000575 (void *userData,
Fred Drake85d835f2001-02-08 15:39:08 +0000576 const XML_Char *entityName,
577 const XML_Char *base,
578 const XML_Char *systemId,
579 const XML_Char *publicId,
580 const XML_Char *notationName),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000581 ("(NNNNN)",
Fred Drake71b63ff2002-06-28 22:29:01 +0000582 string_intern(self, entityName), string_intern(self, base),
583 string_intern(self, systemId), string_intern(self, publicId),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000584 string_intern(self, notationName)))
Fred Drake85d835f2001-02-08 15:39:08 +0000585
Fred Drake85d835f2001-02-08 15:39:08 +0000586VOID_HANDLER(EntityDecl,
587 (void *userData,
588 const XML_Char *entityName,
589 int is_parameter_entity,
590 const XML_Char *value,
591 int value_length,
592 const XML_Char *base,
593 const XML_Char *systemId,
594 const XML_Char *publicId,
595 const XML_Char *notationName),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000596 ("NiNNNNN",
597 string_intern(self, entityName), is_parameter_entity,
Guido van Rossum4ca94712007-07-23 17:42:32 +0000598 (conv_string_len_to_unicode(value, value_length)),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000599 string_intern(self, base), string_intern(self, systemId),
600 string_intern(self, publicId),
601 string_intern(self, notationName)))
Fred Drake85d835f2001-02-08 15:39:08 +0000602
603VOID_HANDLER(XmlDecl,
604 (void *userData,
605 const XML_Char *version,
606 const XML_Char *encoding,
607 int standalone),
608 ("(O&O&i)",
Guido van Rossum4ca94712007-07-23 17:42:32 +0000609 conv_string_to_unicode ,version, conv_string_to_unicode ,encoding,
Fred Drake85d835f2001-02-08 15:39:08 +0000610 standalone))
611
612static PyObject *
613conv_content_model(XML_Content * const model,
Fred Drakeb91a36b2002-06-27 19:40:48 +0000614 PyObject *(*conv_string)(const XML_Char *))
Fred Drake85d835f2001-02-08 15:39:08 +0000615{
616 PyObject *result = NULL;
617 PyObject *children = PyTuple_New(model->numchildren);
618 int i;
619
620 if (children != NULL) {
Tim Peters9544fc52001-07-28 09:36:36 +0000621 assert(model->numchildren < INT_MAX);
622 for (i = 0; i < (int)model->numchildren; ++i) {
Fred Drake85d835f2001-02-08 15:39:08 +0000623 PyObject *child = conv_content_model(&model->children[i],
624 conv_string);
625 if (child == NULL) {
626 Py_XDECREF(children);
627 return NULL;
628 }
629 PyTuple_SET_ITEM(children, i, child);
630 }
631 result = Py_BuildValue("(iiO&N)",
632 model->type, model->quant,
633 conv_string,model->name, children);
634 }
635 return result;
636}
637
Fred Drake06dd8cf2003-02-02 03:54:17 +0000638static void
639my_ElementDeclHandler(void *userData,
640 const XML_Char *name,
641 XML_Content *model)
Fred Drake85d835f2001-02-08 15:39:08 +0000642{
Fred Drake06dd8cf2003-02-02 03:54:17 +0000643 xmlparseobject *self = (xmlparseobject *)userData;
644 PyObject *args = NULL;
Fred Drake85d835f2001-02-08 15:39:08 +0000645
Fred Drake06dd8cf2003-02-02 03:54:17 +0000646 if (have_handler(self, ElementDecl)) {
647 PyObject *rv = NULL;
648 PyObject *modelobj, *nameobj;
649
Victor Stinner9e09c262013-07-18 23:17:01 +0200650 if (PyErr_Occurred())
651 return;
652
Fred Drake06dd8cf2003-02-02 03:54:17 +0000653 if (flush_character_buffer(self) < 0)
654 goto finally;
Guido van Rossum4ca94712007-07-23 17:42:32 +0000655 modelobj = conv_content_model(model, (conv_string_to_unicode));
Fred Drake06dd8cf2003-02-02 03:54:17 +0000656 if (modelobj == NULL) {
657 flag_error(self);
658 goto finally;
659 }
660 nameobj = string_intern(self, name);
661 if (nameobj == NULL) {
662 Py_DECREF(modelobj);
663 flag_error(self);
664 goto finally;
665 }
Michael W. Hudson0bb84542004-08-03 11:31:31 +0000666 args = Py_BuildValue("NN", nameobj, modelobj);
Fred Drake06dd8cf2003-02-02 03:54:17 +0000667 if (args == NULL) {
668 Py_DECREF(modelobj);
669 flag_error(self);
670 goto finally;
671 }
672 self->in_callback = 1;
673 rv = call_with_frame(getcode(ElementDecl, "ElementDecl", __LINE__),
Fred Drake39689c52004-08-13 03:12:57 +0000674 self->handlers[ElementDecl], args, self);
Fred Drake06dd8cf2003-02-02 03:54:17 +0000675 self->in_callback = 0;
676 if (rv == NULL) {
677 flag_error(self);
678 goto finally;
679 }
680 Py_DECREF(rv);
681 }
682 finally:
683 Py_XDECREF(args);
684 XML_FreeContentModel(self->itself, model);
685 return;
686}
Fred Drake85d835f2001-02-08 15:39:08 +0000687
688VOID_HANDLER(AttlistDecl,
689 (void *userData,
690 const XML_Char *elname,
691 const XML_Char *attname,
692 const XML_Char *att_type,
693 const XML_Char *dflt,
694 int isrequired),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000695 ("(NNO&O&i)",
696 string_intern(self, elname), string_intern(self, attname),
Guido van Rossum4ca94712007-07-23 17:42:32 +0000697 conv_string_to_unicode ,att_type, conv_string_to_unicode ,dflt,
Fred Drake85d835f2001-02-08 15:39:08 +0000698 isrequired))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000699
Martin v. Löwisc847f402003-01-21 11:09:21 +0000700#if XML_COMBINED_VERSION >= 19504
Martin v. Löwis069dde22003-01-21 10:58:18 +0000701VOID_HANDLER(SkippedEntity,
702 (void *userData,
703 const XML_Char *entityName,
704 int is_parameter_entity),
705 ("Ni",
706 string_intern(self, entityName), is_parameter_entity))
Martin v. Löwisc847f402003-01-21 11:09:21 +0000707#endif
Martin v. Löwis069dde22003-01-21 10:58:18 +0000708
Fred Drake71b63ff2002-06-28 22:29:01 +0000709VOID_HANDLER(NotationDecl,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000710 (void *userData,
711 const XML_Char *notationName,
712 const XML_Char *base,
713 const XML_Char *systemId,
714 const XML_Char *publicId),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000715 ("(NNNN)",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000716 string_intern(self, notationName), string_intern(self, base),
717 string_intern(self, systemId), string_intern(self, publicId)))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000718
Fred Drake6f987622000-08-25 18:03:30 +0000719VOID_HANDLER(StartNamespaceDecl,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000720 (void *userData,
721 const XML_Char *prefix,
722 const XML_Char *uri),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000723 ("(NN)",
724 string_intern(self, prefix), string_intern(self, uri)))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000725
Fred Drake6f987622000-08-25 18:03:30 +0000726VOID_HANDLER(EndNamespaceDecl,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000727 (void *userData,
728 const XML_Char *prefix),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000729 ("(N)", string_intern(self, prefix)))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000730
Fred Drake6f987622000-08-25 18:03:30 +0000731VOID_HANDLER(Comment,
Fred Drakeb91a36b2002-06-27 19:40:48 +0000732 (void *userData, const XML_Char *data),
Guido van Rossum4ca94712007-07-23 17:42:32 +0000733 ("(O&)", conv_string_to_unicode ,data))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000734
Fred Drake6f987622000-08-25 18:03:30 +0000735VOID_HANDLER(StartCdataSection,
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000736 (void *userData),
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000737 ("()"))
Fred Drake71b63ff2002-06-28 22:29:01 +0000738
Fred Drake6f987622000-08-25 18:03:30 +0000739VOID_HANDLER(EndCdataSection,
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000740 (void *userData),
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000741 ("()"))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000742
Fred Drake6f987622000-08-25 18:03:30 +0000743VOID_HANDLER(Default,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000744 (void *userData, const XML_Char *s, int len),
745 ("(N)", (conv_string_len_to_unicode(s,len))))
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000746
Fred Drake6f987622000-08-25 18:03:30 +0000747VOID_HANDLER(DefaultHandlerExpand,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000748 (void *userData, const XML_Char *s, int len),
749 ("(N)", (conv_string_len_to_unicode(s,len))))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000750
Fred Drake71b63ff2002-06-28 22:29:01 +0000751INT_HANDLER(NotStandalone,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000752 (void *userData),
753 ("()"))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000754
Fred Drake6f987622000-08-25 18:03:30 +0000755RC_HANDLER(int, ExternalEntityRef,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000756 (XML_Parser parser,
757 const XML_Char *context,
758 const XML_Char *base,
759 const XML_Char *systemId,
760 const XML_Char *publicId),
761 int rc=0;,
Fred Drakeb91a36b2002-06-27 19:40:48 +0000762 ("(O&NNN)",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000763 conv_string_to_unicode ,context, string_intern(self, base),
764 string_intern(self, systemId), string_intern(self, publicId)),
765 rc = PyLong_AsLong(rv);, rc,
766 XML_GetUserData(parser))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000767
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000768/* XXX UnknownEncodingHandler */
769
Fred Drake85d835f2001-02-08 15:39:08 +0000770VOID_HANDLER(StartDoctypeDecl,
771 (void *userData, const XML_Char *doctypeName,
772 const XML_Char *sysid, const XML_Char *pubid,
773 int has_internal_subset),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000774 ("(NNNi)", string_intern(self, doctypeName),
775 string_intern(self, sysid), string_intern(self, pubid),
Fred Drake85d835f2001-02-08 15:39:08 +0000776 has_internal_subset))
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000777
778VOID_HANDLER(EndDoctypeDecl, (void *userData), ("()"))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000779
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000780/* ---------------------------------------------------------------- */
781
Fred Drake71b63ff2002-06-28 22:29:01 +0000782static PyObject *
783get_parse_result(xmlparseobject *self, int rv)
784{
785 if (PyErr_Occurred()) {
786 return NULL;
787 }
788 if (rv == 0) {
Martin v. Löwis069dde22003-01-21 10:58:18 +0000789 return set_error(self, XML_GetErrorCode(self->itself));
Fred Drake71b63ff2002-06-28 22:29:01 +0000790 }
Fred Drake2a3d7db2002-06-28 22:56:48 +0000791 if (flush_character_buffer(self) < 0) {
792 return NULL;
793 }
Christian Heimes217cfd12007-12-02 14:31:20 +0000794 return PyLong_FromLong(rv);
Fred Drake71b63ff2002-06-28 22:29:01 +0000795}
796
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000797PyDoc_STRVAR(xmlparse_Parse__doc__,
Thomas Wouters35317302000-07-22 16:34:15 +0000798"Parse(data[, isfinal])\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000799Parse XML data. `isfinal' should be true at end of input.");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000800
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200801#define MAX_CHUNK_SIZE (1 << 20)
802
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000803static PyObject *
Fred Drake0582df92000-07-12 04:49:00 +0000804xmlparse_Parse(xmlparseobject *self, PyObject *args)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000805{
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200806 PyObject *data;
Fred Drake0582df92000-07-12 04:49:00 +0000807 int isFinal = 0;
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200808 const char *s;
809 Py_ssize_t slen;
810 Py_buffer view;
811 int rc;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000812
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200813 if (!PyArg_ParseTuple(args, "O|i:Parse", &data, &isFinal))
Fred Drake0582df92000-07-12 04:49:00 +0000814 return NULL;
Fred Drake71b63ff2002-06-28 22:29:01 +0000815
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200816 if (PyUnicode_Check(data)) {
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200817 view.buf = NULL;
Serhiy Storchaka36b365c2013-02-04 18:28:01 +0200818 s = PyUnicode_AsUTF8AndSize(data, &slen);
819 if (s == NULL)
820 return NULL;
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200821 /* Explicitly set UTF-8 encoding. Return code ignored. */
822 (void)XML_SetEncoding(self->itself, "utf-8");
823 }
824 else {
825 if (PyObject_GetBuffer(data, &view, PyBUF_SIMPLE) < 0)
826 return NULL;
827 s = view.buf;
828 slen = view.len;
829 }
830
831 while (slen > MAX_CHUNK_SIZE) {
832 rc = XML_Parse(self->itself, s, MAX_CHUNK_SIZE, 0);
833 if (!rc)
834 goto done;
835 s += MAX_CHUNK_SIZE;
836 slen -= MAX_CHUNK_SIZE;
837 }
838 rc = XML_Parse(self->itself, s, slen, isFinal);
839
840done:
841 if (view.buf != NULL)
842 PyBuffer_Release(&view);
843 return get_parse_result(self, rc);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000844}
845
Fred Drakeca1f4262000-09-21 20:10:23 +0000846/* File reading copied from cPickle */
847
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000848#define BUF_SIZE 2048
849
Fred Drake0582df92000-07-12 04:49:00 +0000850static int
851readinst(char *buf, int buf_size, PyObject *meth)
852{
Victor Stinner95f1dfc2011-01-10 23:00:36 +0000853 PyObject *str;
854 Py_ssize_t len;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000855 char *ptr;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000856
Victor Stinner95f1dfc2011-01-10 23:00:36 +0000857 str = PyObject_CallFunction(meth, "n", buf_size);
Martin v. Löwis9171f022004-10-13 19:50:11 +0000858 if (str == NULL)
Victor Stinner95f1dfc2011-01-10 23:00:36 +0000859 goto error;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000860
Christian Heimes72b710a2008-05-26 13:28:38 +0000861 if (PyBytes_Check(str))
862 ptr = PyBytes_AS_STRING(str);
Christian Heimes9c4756e2008-05-26 13:22:05 +0000863 else if (PyByteArray_Check(str))
864 ptr = PyByteArray_AS_STRING(str);
Guido van Rossum98297ee2007-11-06 21:34:58 +0000865 else {
Fred Drake71b63ff2002-06-28 22:29:01 +0000866 PyErr_Format(PyExc_TypeError,
Guido van Rossum4ca94712007-07-23 17:42:32 +0000867 "read() did not return a bytes object (type=%.400s)",
Christian Heimes90aa7642007-12-19 02:45:37 +0000868 Py_TYPE(str)->tp_name);
Victor Stinner95f1dfc2011-01-10 23:00:36 +0000869 goto error;
Fred Drake0582df92000-07-12 04:49:00 +0000870 }
Christian Heimes90aa7642007-12-19 02:45:37 +0000871 len = Py_SIZE(str);
Fred Drake0582df92000-07-12 04:49:00 +0000872 if (len > buf_size) {
873 PyErr_Format(PyExc_ValueError,
874 "read() returned too much data: "
Victor Stinner9d6f9362011-01-04 22:00:04 +0000875 "%i bytes requested, %zd returned",
Fred Drake0582df92000-07-12 04:49:00 +0000876 buf_size, len);
Victor Stinner95f1dfc2011-01-10 23:00:36 +0000877 goto error;
Fred Drake0582df92000-07-12 04:49:00 +0000878 }
Guido van Rossum98297ee2007-11-06 21:34:58 +0000879 memcpy(buf, ptr, len);
Victor Stinner95f1dfc2011-01-10 23:00:36 +0000880 Py_DECREF(str);
881 /* len <= buf_size <= INT_MAX */
Victor Stinner0fcab4a2011-01-04 12:59:15 +0000882 return (int)len;
Victor Stinner95f1dfc2011-01-10 23:00:36 +0000883
884error:
885 Py_XDECREF(str);
886 return -1;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000887}
888
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000889PyDoc_STRVAR(xmlparse_ParseFile__doc__,
Thomas Wouters35317302000-07-22 16:34:15 +0000890"ParseFile(file)\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000891Parse XML data from file-like object.");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000892
893static PyObject *
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000894xmlparse_ParseFile(xmlparseobject *self, PyObject *f)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000895{
Fred Drake0582df92000-07-12 04:49:00 +0000896 int rv = 1;
Fred Drake0582df92000-07-12 04:49:00 +0000897 PyObject *readmethod = NULL;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +0200898 _Py_IDENTIFIER(read);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000899
Martin v. Löwis1ee1b6f2011-10-10 18:11:30 +0200900 readmethod = _PyObject_GetAttrId(f, &PyId_read);
Benjamin Peterson4e7f2852010-08-08 16:54:58 +0000901 if (readmethod == NULL) {
Benjamin Peterson4e7f2852010-08-08 16:54:58 +0000902 PyErr_SetString(PyExc_TypeError,
903 "argument must have 'read' attribute");
904 return NULL;
Fred Drake0582df92000-07-12 04:49:00 +0000905 }
906 for (;;) {
907 int bytes_read;
908 void *buf = XML_GetBuffer(self->itself, BUF_SIZE);
Fred Drake7b6caff2003-07-21 17:05:56 +0000909 if (buf == NULL) {
Fred Drakef239c6d2003-07-21 17:22:43 +0000910 Py_XDECREF(readmethod);
Fred Drake0582df92000-07-12 04:49:00 +0000911 return PyErr_NoMemory();
Fred Drake7b6caff2003-07-21 17:05:56 +0000912 }
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000913
Benjamin Peterson4e7f2852010-08-08 16:54:58 +0000914 bytes_read = readinst(buf, BUF_SIZE, readmethod);
915 if (bytes_read < 0) {
916 Py_DECREF(readmethod);
917 return NULL;
Fred Drake0582df92000-07-12 04:49:00 +0000918 }
919 rv = XML_ParseBuffer(self->itself, bytes_read, bytes_read == 0);
Fred Drake7b6caff2003-07-21 17:05:56 +0000920 if (PyErr_Occurred()) {
921 Py_XDECREF(readmethod);
Fred Drake0582df92000-07-12 04:49:00 +0000922 return NULL;
Fred Drake7b6caff2003-07-21 17:05:56 +0000923 }
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000924
Fred Drake0582df92000-07-12 04:49:00 +0000925 if (!rv || bytes_read == 0)
926 break;
927 }
Fred Drake7b6caff2003-07-21 17:05:56 +0000928 Py_XDECREF(readmethod);
Fred Drake71b63ff2002-06-28 22:29:01 +0000929 return get_parse_result(self, rv);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000930}
931
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000932PyDoc_STRVAR(xmlparse_SetBase__doc__,
Thomas Wouters35317302000-07-22 16:34:15 +0000933"SetBase(base_url)\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000934Set the base URL for the parser.");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000935
936static PyObject *
Fred Drake0582df92000-07-12 04:49:00 +0000937xmlparse_SetBase(xmlparseobject *self, PyObject *args)
938{
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000939 char *base;
940
Fred Drake0582df92000-07-12 04:49:00 +0000941 if (!PyArg_ParseTuple(args, "s:SetBase", &base))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000942 return NULL;
Fred Drake0582df92000-07-12 04:49:00 +0000943 if (!XML_SetBase(self->itself, base)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000944 return PyErr_NoMemory();
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000945 }
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000946 Py_INCREF(Py_None);
947 return Py_None;
948}
949
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000950PyDoc_STRVAR(xmlparse_GetBase__doc__,
Thomas Wouters35317302000-07-22 16:34:15 +0000951"GetBase() -> url\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000952Return base URL string for the parser.");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000953
954static PyObject *
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000955xmlparse_GetBase(xmlparseobject *self, PyObject *unused)
Fred Drake0582df92000-07-12 04:49:00 +0000956{
Fred Drake0582df92000-07-12 04:49:00 +0000957 return Py_BuildValue("z", XML_GetBase(self->itself));
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000958}
959
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000960PyDoc_STRVAR(xmlparse_GetInputContext__doc__,
Fred Drakebd6101c2001-02-14 18:29:45 +0000961"GetInputContext() -> string\n\
962Return the untranslated text of the input that caused the current event.\n\
963If the event was generated by a large amount of text (such as a start tag\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000964for an element with many attributes), not all of the text may be available.");
Fred Drakebd6101c2001-02-14 18:29:45 +0000965
966static PyObject *
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000967xmlparse_GetInputContext(xmlparseobject *self, PyObject *unused)
Fred Drakebd6101c2001-02-14 18:29:45 +0000968{
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000969 if (self->in_callback) {
970 int offset, size;
971 const char *buffer
972 = XML_GetInputContext(self->itself, &offset, &size);
Fred Drakebd6101c2001-02-14 18:29:45 +0000973
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000974 if (buffer != NULL)
Christian Heimes72b710a2008-05-26 13:28:38 +0000975 return PyBytes_FromStringAndSize(buffer + offset,
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000976 size - offset);
977 else
978 Py_RETURN_NONE;
Fred Drakebd6101c2001-02-14 18:29:45 +0000979 }
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000980 else
981 Py_RETURN_NONE;
Fred Drakebd6101c2001-02-14 18:29:45 +0000982}
Fred Drakebd6101c2001-02-14 18:29:45 +0000983
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000984PyDoc_STRVAR(xmlparse_ExternalEntityParserCreate__doc__,
Fred Drake2d4ac202001-01-03 15:36:25 +0000985"ExternalEntityParserCreate(context[, encoding])\n\
Tim Peters51dc9682000-09-24 22:12:45 +0000986Create a parser for parsing an external entity based on the\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000987information passed to the ExternalEntityRefHandler.");
Lars Gustäbel4a30a072000-09-24 20:50:52 +0000988
989static PyObject *
990xmlparse_ExternalEntityParserCreate(xmlparseobject *self, PyObject *args)
991{
992 char *context;
993 char *encoding = NULL;
994 xmlparseobject *new_parser;
995 int i;
996
Martin v. Löwisc57428d2001-09-19 09:55:09 +0000997 if (!PyArg_ParseTuple(args, "z|s:ExternalEntityParserCreate",
Fred Drakecde79132001-04-25 16:01:30 +0000998 &context, &encoding)) {
999 return NULL;
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001000 }
1001
Martin v. Löwis894258c2001-09-23 10:20:10 +00001002 new_parser = PyObject_GC_New(xmlparseobject, &Xmlparsetype);
Fred Drake85d835f2001-02-08 15:39:08 +00001003 if (new_parser == NULL)
1004 return NULL;
Fred Drake2a3d7db2002-06-28 22:56:48 +00001005 new_parser->buffer_size = self->buffer_size;
1006 new_parser->buffer_used = 0;
Victor Stinnerb4ba9862010-09-10 22:25:19 +00001007 new_parser->buffer = NULL;
Fred Drake85d835f2001-02-08 15:39:08 +00001008 new_parser->ordered_attributes = self->ordered_attributes;
1009 new_parser->specified_attributes = self->specified_attributes;
Fred Drakebd6101c2001-02-14 18:29:45 +00001010 new_parser->in_callback = 0;
Martin v. Löwis069dde22003-01-21 10:58:18 +00001011 new_parser->ns_prefixes = self->ns_prefixes;
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001012 new_parser->itself = XML_ExternalEntityParserCreate(self->itself, context,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001013 encoding);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001014 new_parser->handlers = 0;
Fred Drakeb91a36b2002-06-27 19:40:48 +00001015 new_parser->intern = self->intern;
1016 Py_XINCREF(new_parser->intern);
Martin v. Löwis894258c2001-09-23 10:20:10 +00001017 PyObject_GC_Track(new_parser);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001018
Victor Stinnerb4ba9862010-09-10 22:25:19 +00001019 if (self->buffer != NULL) {
Victor Stinnerb6404912013-07-07 16:21:41 +02001020 new_parser->buffer = PyMem_Malloc(new_parser->buffer_size);
Victor Stinnerb4ba9862010-09-10 22:25:19 +00001021 if (new_parser->buffer == NULL) {
1022 Py_DECREF(new_parser);
1023 return PyErr_NoMemory();
1024 }
1025 }
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001026 if (!new_parser->itself) {
Fred Drake85d835f2001-02-08 15:39:08 +00001027 Py_DECREF(new_parser);
1028 return PyErr_NoMemory();
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001029 }
1030
1031 XML_SetUserData(new_parser->itself, (void *)new_parser);
1032
1033 /* allocate and clear handlers first */
Fred Drake2a3d7db2002-06-28 22:56:48 +00001034 for (i = 0; handler_info[i].name != NULL; i++)
Fred Drake85d835f2001-02-08 15:39:08 +00001035 /* do nothing */;
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001036
Victor Stinnerb6404912013-07-07 16:21:41 +02001037 new_parser->handlers = PyMem_Malloc(sizeof(PyObject *) * i);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001038 if (!new_parser->handlers) {
Fred Drake85d835f2001-02-08 15:39:08 +00001039 Py_DECREF(new_parser);
1040 return PyErr_NoMemory();
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001041 }
Martin v. Löwis5b68ce32001-10-21 08:53:52 +00001042 clear_handlers(new_parser, 1);
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001043
1044 /* then copy handlers from self */
1045 for (i = 0; handler_info[i].name != NULL; i++) {
Fred Drake71b63ff2002-06-28 22:29:01 +00001046 PyObject *handler = self->handlers[i];
1047 if (handler != NULL) {
1048 Py_INCREF(handler);
1049 new_parser->handlers[i] = handler;
1050 handler_info[i].setter(new_parser->itself,
Fred Drake85d835f2001-02-08 15:39:08 +00001051 handler_info[i].handler);
1052 }
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001053 }
Fred Drake71b63ff2002-06-28 22:29:01 +00001054 return (PyObject *)new_parser;
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001055}
1056
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001057PyDoc_STRVAR(xmlparse_SetParamEntityParsing__doc__,
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001058"SetParamEntityParsing(flag) -> success\n\
1059Controls parsing of parameter entities (including the external DTD\n\
1060subset). Possible flag values are XML_PARAM_ENTITY_PARSING_NEVER,\n\
1061XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE and\n\
1062XML_PARAM_ENTITY_PARSING_ALWAYS. Returns true if setting the flag\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001063was successful.");
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001064
1065static PyObject*
Fred Drakebd6101c2001-02-14 18:29:45 +00001066xmlparse_SetParamEntityParsing(xmlparseobject *p, PyObject* args)
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001067{
Fred Drake85d835f2001-02-08 15:39:08 +00001068 int flag;
1069 if (!PyArg_ParseTuple(args, "i", &flag))
1070 return NULL;
Fred Drakebd6101c2001-02-14 18:29:45 +00001071 flag = XML_SetParamEntityParsing(p->itself, flag);
Christian Heimes217cfd12007-12-02 14:31:20 +00001072 return PyLong_FromLong(flag);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001073}
1074
Martin v. Löwisc847f402003-01-21 11:09:21 +00001075
1076#if XML_COMBINED_VERSION >= 19505
Martin v. Löwis069dde22003-01-21 10:58:18 +00001077PyDoc_STRVAR(xmlparse_UseForeignDTD__doc__,
1078"UseForeignDTD([flag])\n\
1079Allows the application to provide an artificial external subset if one is\n\
1080not specified as part of the document instance. This readily allows the\n\
1081use of a 'default' document type controlled by the application, while still\n\
1082getting the advantage of providing document type information to the parser.\n\
1083'flag' defaults to True if not provided.");
1084
1085static PyObject *
1086xmlparse_UseForeignDTD(xmlparseobject *self, PyObject *args)
1087{
Antoine Pitrou6f430e42012-08-15 23:18:25 +02001088 int flag = 1;
Martin v. Löwis069dde22003-01-21 10:58:18 +00001089 enum XML_Error rc;
Georg Brandld37b9d72012-09-24 13:41:52 +02001090 if (!PyArg_ParseTuple(args, "|p:UseForeignDTD", &flag))
Martin v. Löwis069dde22003-01-21 10:58:18 +00001091 return NULL;
Antoine Pitrou6f430e42012-08-15 23:18:25 +02001092 rc = XML_UseForeignDTD(self->itself, flag ? XML_TRUE : XML_FALSE);
Martin v. Löwis069dde22003-01-21 10:58:18 +00001093 if (rc != XML_ERROR_NONE) {
1094 return set_error(self, rc);
1095 }
1096 Py_INCREF(Py_None);
1097 return Py_None;
1098}
Martin v. Löwisc847f402003-01-21 11:09:21 +00001099#endif
Martin v. Löwis069dde22003-01-21 10:58:18 +00001100
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001101static PyObject *xmlparse_dir(PyObject *self, PyObject* noargs);
1102
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001103static struct PyMethodDef xmlparse_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001104 {"Parse", (PyCFunction)xmlparse_Parse,
1105 METH_VARARGS, xmlparse_Parse__doc__},
Fred Drake0582df92000-07-12 04:49:00 +00001106 {"ParseFile", (PyCFunction)xmlparse_ParseFile,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001107 METH_O, xmlparse_ParseFile__doc__},
Fred Drake0582df92000-07-12 04:49:00 +00001108 {"SetBase", (PyCFunction)xmlparse_SetBase,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001109 METH_VARARGS, xmlparse_SetBase__doc__},
Fred Drake0582df92000-07-12 04:49:00 +00001110 {"GetBase", (PyCFunction)xmlparse_GetBase,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001111 METH_NOARGS, xmlparse_GetBase__doc__},
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001112 {"ExternalEntityParserCreate", (PyCFunction)xmlparse_ExternalEntityParserCreate,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001113 METH_VARARGS, xmlparse_ExternalEntityParserCreate__doc__},
Fred Drakebd6101c2001-02-14 18:29:45 +00001114 {"SetParamEntityParsing", (PyCFunction)xmlparse_SetParamEntityParsing,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001115 METH_VARARGS, xmlparse_SetParamEntityParsing__doc__},
Fred Drakebd6101c2001-02-14 18:29:45 +00001116 {"GetInputContext", (PyCFunction)xmlparse_GetInputContext,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001117 METH_NOARGS, xmlparse_GetInputContext__doc__},
Martin v. Löwisc847f402003-01-21 11:09:21 +00001118#if XML_COMBINED_VERSION >= 19505
Martin v. Löwis069dde22003-01-21 10:58:18 +00001119 {"UseForeignDTD", (PyCFunction)xmlparse_UseForeignDTD,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001120 METH_VARARGS, xmlparse_UseForeignDTD__doc__},
Martin v. Löwisc847f402003-01-21 11:09:21 +00001121#endif
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001122 {"__dir__", xmlparse_dir, METH_NOARGS},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001123 {NULL, NULL} /* sentinel */
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001124};
1125
1126/* ---------- */
1127
1128
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001129
Fred Drake71b63ff2002-06-28 22:29:01 +00001130/* pyexpat international encoding support.
1131 Make it as simple as possible.
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001132*/
1133
Fred Drake71b63ff2002-06-28 22:29:01 +00001134static int
1135PyUnknownEncodingHandler(void *encodingHandlerData,
1136 const XML_Char *name,
1137 XML_Encoding *info)
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001138{
Eli Bendersky6dc32b32013-05-25 05:25:48 -07001139 static unsigned char template_buffer[256] = {0};
1140 PyObject* u;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001141 int i;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001142 void *data;
Eli Bendersky6dc32b32013-05-25 05:25:48 -07001143 unsigned int kind;
Fred Drake71b63ff2002-06-28 22:29:01 +00001144
Victor Stinner9e09c262013-07-18 23:17:01 +02001145 if (PyErr_Occurred())
1146 return XML_STATUS_ERROR;
1147
Eli Bendersky6dc32b32013-05-25 05:25:48 -07001148 if (template_buffer[1] == 0) {
1149 for (i = 0; i < 256; i++)
1150 template_buffer[i] = i;
Tim Peters63cb99e2001-02-17 18:12:50 +00001151 }
Eli Bendersky6dc32b32013-05-25 05:25:48 -07001152
1153 u = PyUnicode_Decode((char*) template_buffer, 256, name, "replace");
Christian Heimesb5821552013-06-29 20:43:13 +02001154 if (u == NULL || PyUnicode_READY(u)) {
Christian Heimes72172422013-06-29 21:49:27 +02001155 Py_XDECREF(u);
Eli Bendersky6dc32b32013-05-25 05:25:48 -07001156 return XML_STATUS_ERROR;
Christian Heimesb5821552013-06-29 20:43:13 +02001157 }
Eli Bendersky6dc32b32013-05-25 05:25:48 -07001158
1159 if (PyUnicode_GET_LENGTH(u) != 256) {
1160 Py_DECREF(u);
1161 PyErr_SetString(PyExc_ValueError,
1162 "multi-byte encodings are not supported");
1163 return XML_STATUS_ERROR;
1164 }
1165
1166 kind = PyUnicode_KIND(u);
1167 data = PyUnicode_DATA(u);
1168 for (i = 0; i < 256; i++) {
1169 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
1170 if (ch != Py_UNICODE_REPLACEMENT_CHARACTER)
1171 info->map[i] = ch;
1172 else
1173 info->map[i] = -1;
1174 }
1175
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001176 info->data = NULL;
1177 info->convert = NULL;
1178 info->release = NULL;
Eli Bendersky6dc32b32013-05-25 05:25:48 -07001179 Py_DECREF(u);
1180
1181 return XML_STATUS_OK;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001182}
1183
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001184
1185static PyObject *
Fred Drakeb91a36b2002-06-27 19:40:48 +00001186newxmlparseobject(char *encoding, char *namespace_separator, PyObject *intern)
Fred Drake0582df92000-07-12 04:49:00 +00001187{
1188 int i;
1189 xmlparseobject *self;
Fred Drake71b63ff2002-06-28 22:29:01 +00001190
Martin v. Löwis894258c2001-09-23 10:20:10 +00001191 self = PyObject_GC_New(xmlparseobject, &Xmlparsetype);
Fred Drake0582df92000-07-12 04:49:00 +00001192 if (self == NULL)
1193 return NULL;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001194
Fred Drake2a3d7db2002-06-28 22:56:48 +00001195 self->buffer = NULL;
1196 self->buffer_size = CHARACTER_DATA_BUFFER_SIZE;
1197 self->buffer_used = 0;
Fred Drake85d835f2001-02-08 15:39:08 +00001198 self->ordered_attributes = 0;
1199 self->specified_attributes = 0;
Fred Drakebd6101c2001-02-14 18:29:45 +00001200 self->in_callback = 0;
Martin v. Löwis069dde22003-01-21 10:58:18 +00001201 self->ns_prefixes = 0;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001202 self->handlers = NULL;
Victor Stinner54b2d2e2013-07-15 17:15:57 +02001203 self->intern = intern;
1204 Py_XINCREF(self->intern);
1205 PyObject_GC_Track(self);
1206
Christian Heimesfa535f52013-07-07 17:35:11 +02001207 /* namespace_separator is either NULL or contains one char + \0 */
1208 self->itself = XML_ParserCreate_MM(encoding, &ExpatMemoryHandler,
1209 namespace_separator);
Victor Stinner54b2d2e2013-07-15 17:15:57 +02001210 if (self->itself == NULL) {
1211 PyErr_SetString(PyExc_RuntimeError,
1212 "XML_ParserCreate failed");
1213 Py_DECREF(self);
1214 return NULL;
1215 }
Gregory P. Smith25227712012-03-14 18:10:37 -07001216#if ((XML_MAJOR_VERSION >= 2) && (XML_MINOR_VERSION >= 1)) || defined(XML_HAS_SET_HASH_SALT)
1217 /* This feature was added upstream in libexpat 2.1.0. Our expat copy
1218 * has a backport of this feature where we also define XML_HAS_SET_HASH_SALT
1219 * to indicate that we can still use it. */
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07001220 XML_SetHashSalt(self->itself,
1221 (unsigned long)_Py_HashSecret.prefix);
Gregory P. Smith25227712012-03-14 18:10:37 -07001222#endif
Fred Drake0582df92000-07-12 04:49:00 +00001223 XML_SetUserData(self->itself, (void *)self);
Fred Drake7c75bf22002-07-01 14:02:31 +00001224 XML_SetUnknownEncodingHandler(self->itself,
1225 (XML_UnknownEncodingHandler) PyUnknownEncodingHandler, NULL);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001226
Fred Drake2a3d7db2002-06-28 22:56:48 +00001227 for (i = 0; handler_info[i].name != NULL; i++)
Fred Drake0582df92000-07-12 04:49:00 +00001228 /* do nothing */;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001229
Victor Stinnerb6404912013-07-07 16:21:41 +02001230 self->handlers = PyMem_Malloc(sizeof(PyObject *) * i);
Fred Drake7c75bf22002-07-01 14:02:31 +00001231 if (!self->handlers) {
Fred Drake71b63ff2002-06-28 22:29:01 +00001232 Py_DECREF(self);
1233 return PyErr_NoMemory();
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001234 }
Martin v. Löwis5b68ce32001-10-21 08:53:52 +00001235 clear_handlers(self, 1);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001236
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001237 return (PyObject*)self;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001238}
1239
1240
1241static void
Fred Drake0582df92000-07-12 04:49:00 +00001242xmlparse_dealloc(xmlparseobject *self)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001243{
Fred Drake0582df92000-07-12 04:49:00 +00001244 int i;
Martin v. Löwis894258c2001-09-23 10:20:10 +00001245 PyObject_GC_UnTrack(self);
Fred Drake85d835f2001-02-08 15:39:08 +00001246 if (self->itself != NULL)
Fred Drake0582df92000-07-12 04:49:00 +00001247 XML_ParserFree(self->itself);
1248 self->itself = NULL;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001249
Fred Drake85d835f2001-02-08 15:39:08 +00001250 if (self->handlers != NULL) {
Fred Drakecde79132001-04-25 16:01:30 +00001251 PyObject *temp;
Fred Drake85d835f2001-02-08 15:39:08 +00001252 for (i = 0; handler_info[i].name != NULL; i++) {
Fred Drakecde79132001-04-25 16:01:30 +00001253 temp = self->handlers[i];
1254 self->handlers[i] = NULL;
1255 Py_XDECREF(temp);
Fred Drake85d835f2001-02-08 15:39:08 +00001256 }
Victor Stinnerb6404912013-07-07 16:21:41 +02001257 PyMem_Free(self->handlers);
Fred Drake71b63ff2002-06-28 22:29:01 +00001258 self->handlers = NULL;
Fred Drake0582df92000-07-12 04:49:00 +00001259 }
Fred Drake2a3d7db2002-06-28 22:56:48 +00001260 if (self->buffer != NULL) {
Victor Stinnerb6404912013-07-07 16:21:41 +02001261 PyMem_Free(self->buffer);
Fred Drake2a3d7db2002-06-28 22:56:48 +00001262 self->buffer = NULL;
1263 }
Fred Drakeb91a36b2002-06-27 19:40:48 +00001264 Py_XDECREF(self->intern);
Martin v. Löwis894258c2001-09-23 10:20:10 +00001265 PyObject_GC_Del(self);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001266}
1267
Fred Drake0582df92000-07-12 04:49:00 +00001268static int
Alexander Belopolskye239d232010-12-08 23:31:48 +00001269handlername2int(PyObject *name)
Fred Drake0582df92000-07-12 04:49:00 +00001270{
1271 int i;
Fred Drake71b63ff2002-06-28 22:29:01 +00001272 for (i = 0; handler_info[i].name != NULL; i++) {
Alexander Belopolskye239d232010-12-08 23:31:48 +00001273 if (PyUnicode_CompareWithASCIIString(
1274 name, handler_info[i].name) == 0) {
Fred Drake0582df92000-07-12 04:49:00 +00001275 return i;
1276 }
1277 }
1278 return -1;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001279}
1280
1281static PyObject *
Fred Drake71b63ff2002-06-28 22:29:01 +00001282get_pybool(int istrue)
1283{
1284 PyObject *result = istrue ? Py_True : Py_False;
1285 Py_INCREF(result);
1286 return result;
1287}
1288
1289static PyObject *
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001290xmlparse_getattro(xmlparseobject *self, PyObject *nameobj)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001291{
Victor Stinner9e5bd6c2011-10-01 01:05:40 +02001292 Py_UCS4 first_char;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001293 int handlernum = -1;
1294
Alexander Belopolskye239d232010-12-08 23:31:48 +00001295 if (!PyUnicode_Check(nameobj))
1296 goto generic;
Victor Stinner9e5bd6c2011-10-01 01:05:40 +02001297 if (PyUnicode_READY(nameobj))
1298 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001299
Alexander Belopolskye239d232010-12-08 23:31:48 +00001300 handlernum = handlername2int(nameobj);
Fred Drake71b63ff2002-06-28 22:29:01 +00001301
1302 if (handlernum != -1) {
1303 PyObject *result = self->handlers[handlernum];
1304 if (result == NULL)
1305 result = Py_None;
1306 Py_INCREF(result);
1307 return result;
1308 }
Alexander Belopolskye239d232010-12-08 23:31:48 +00001309
Victor Stinner9e5bd6c2011-10-01 01:05:40 +02001310 first_char = PyUnicode_READ_CHAR(nameobj, 0);
1311 if (first_char == 'E') {
Alexander Belopolskye239d232010-12-08 23:31:48 +00001312 if (PyUnicode_CompareWithASCIIString(nameobj, "ErrorCode") == 0)
Christian Heimes217cfd12007-12-02 14:31:20 +00001313 return PyLong_FromLong((long)
Fred Drake71b63ff2002-06-28 22:29:01 +00001314 XML_GetErrorCode(self->itself));
Alexander Belopolskye239d232010-12-08 23:31:48 +00001315 if (PyUnicode_CompareWithASCIIString(nameobj, "ErrorLineNumber") == 0)
Christian Heimes217cfd12007-12-02 14:31:20 +00001316 return PyLong_FromLong((long)
Fred Drake71b63ff2002-06-28 22:29:01 +00001317 XML_GetErrorLineNumber(self->itself));
Alexander Belopolskye239d232010-12-08 23:31:48 +00001318 if (PyUnicode_CompareWithASCIIString(nameobj, "ErrorColumnNumber") == 0)
Christian Heimes217cfd12007-12-02 14:31:20 +00001319 return PyLong_FromLong((long)
Fred Drake71b63ff2002-06-28 22:29:01 +00001320 XML_GetErrorColumnNumber(self->itself));
Alexander Belopolskye239d232010-12-08 23:31:48 +00001321 if (PyUnicode_CompareWithASCIIString(nameobj, "ErrorByteIndex") == 0)
Christian Heimes217cfd12007-12-02 14:31:20 +00001322 return PyLong_FromLong((long)
Fred Drake71b63ff2002-06-28 22:29:01 +00001323 XML_GetErrorByteIndex(self->itself));
1324 }
Victor Stinner9e5bd6c2011-10-01 01:05:40 +02001325 if (first_char == 'C') {
Alexander Belopolskye239d232010-12-08 23:31:48 +00001326 if (PyUnicode_CompareWithASCIIString(nameobj, "CurrentLineNumber") == 0)
Christian Heimes217cfd12007-12-02 14:31:20 +00001327 return PyLong_FromLong((long)
Dave Cole3203efb2004-08-26 00:37:31 +00001328 XML_GetCurrentLineNumber(self->itself));
Alexander Belopolskye239d232010-12-08 23:31:48 +00001329 if (PyUnicode_CompareWithASCIIString(nameobj, "CurrentColumnNumber") == 0)
Christian Heimes217cfd12007-12-02 14:31:20 +00001330 return PyLong_FromLong((long)
Dave Cole3203efb2004-08-26 00:37:31 +00001331 XML_GetCurrentColumnNumber(self->itself));
Alexander Belopolskye239d232010-12-08 23:31:48 +00001332 if (PyUnicode_CompareWithASCIIString(nameobj, "CurrentByteIndex") == 0)
Christian Heimes217cfd12007-12-02 14:31:20 +00001333 return PyLong_FromLong((long)
Dave Cole3203efb2004-08-26 00:37:31 +00001334 XML_GetCurrentByteIndex(self->itself));
1335 }
Victor Stinner9e5bd6c2011-10-01 01:05:40 +02001336 if (first_char == 'b') {
Alexander Belopolskye239d232010-12-08 23:31:48 +00001337 if (PyUnicode_CompareWithASCIIString(nameobj, "buffer_size") == 0)
Christian Heimes217cfd12007-12-02 14:31:20 +00001338 return PyLong_FromLong((long) self->buffer_size);
Alexander Belopolskye239d232010-12-08 23:31:48 +00001339 if (PyUnicode_CompareWithASCIIString(nameobj, "buffer_text") == 0)
Fred Drake2a3d7db2002-06-28 22:56:48 +00001340 return get_pybool(self->buffer != NULL);
Alexander Belopolskye239d232010-12-08 23:31:48 +00001341 if (PyUnicode_CompareWithASCIIString(nameobj, "buffer_used") == 0)
Christian Heimes217cfd12007-12-02 14:31:20 +00001342 return PyLong_FromLong((long) self->buffer_used);
Fred Drake2a3d7db2002-06-28 22:56:48 +00001343 }
Alexander Belopolskye239d232010-12-08 23:31:48 +00001344 if (PyUnicode_CompareWithASCIIString(nameobj, "namespace_prefixes") == 0)
Martin v. Löwis069dde22003-01-21 10:58:18 +00001345 return get_pybool(self->ns_prefixes);
Alexander Belopolskye239d232010-12-08 23:31:48 +00001346 if (PyUnicode_CompareWithASCIIString(nameobj, "ordered_attributes") == 0)
Fred Drake71b63ff2002-06-28 22:29:01 +00001347 return get_pybool(self->ordered_attributes);
Alexander Belopolskye239d232010-12-08 23:31:48 +00001348 if (PyUnicode_CompareWithASCIIString(nameobj, "specified_attributes") == 0)
Fred Drake71b63ff2002-06-28 22:29:01 +00001349 return get_pybool((long) self->specified_attributes);
Alexander Belopolskye239d232010-12-08 23:31:48 +00001350 if (PyUnicode_CompareWithASCIIString(nameobj, "intern") == 0) {
Fred Drakeb91a36b2002-06-27 19:40:48 +00001351 if (self->intern == NULL) {
1352 Py_INCREF(Py_None);
1353 return Py_None;
1354 }
1355 else {
1356 Py_INCREF(self->intern);
1357 return self->intern;
1358 }
1359 }
Alexander Belopolskye239d232010-12-08 23:31:48 +00001360 generic:
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001361 return PyObject_GenericGetAttr((PyObject*)self, nameobj);
Neal Norwitz8dfc4a92007-08-11 06:39:53 +00001362}
1363
1364static PyObject *
1365xmlparse_dir(PyObject *self, PyObject* noargs)
1366{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001367#define APPEND(list, str) \
1368 do { \
1369 PyObject *o = PyUnicode_FromString(str); \
1370 if (o != NULL) \
1371 PyList_Append(list, o); \
1372 Py_XDECREF(o); \
Martin v. Löwis069dde22003-01-21 10:58:18 +00001373 } while (0)
Neal Norwitzfa56e2d2003-01-19 15:40:09 +00001374
Neal Norwitz8dfc4a92007-08-11 06:39:53 +00001375 int i;
1376 PyObject *rc = PyList_New(0);
1377 if (!rc)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001378 return NULL;
Neal Norwitz8dfc4a92007-08-11 06:39:53 +00001379 for (i = 0; handler_info[i].name != NULL; i++) {
1380 PyObject *o = get_handler_name(&handler_info[i]);
1381 if (o != NULL)
1382 PyList_Append(rc, o);
1383 Py_XDECREF(o);
1384 }
1385 APPEND(rc, "ErrorCode");
1386 APPEND(rc, "ErrorLineNumber");
1387 APPEND(rc, "ErrorColumnNumber");
1388 APPEND(rc, "ErrorByteIndex");
1389 APPEND(rc, "CurrentLineNumber");
1390 APPEND(rc, "CurrentColumnNumber");
1391 APPEND(rc, "CurrentByteIndex");
1392 APPEND(rc, "buffer_size");
1393 APPEND(rc, "buffer_text");
1394 APPEND(rc, "buffer_used");
1395 APPEND(rc, "namespace_prefixes");
1396 APPEND(rc, "ordered_attributes");
1397 APPEND(rc, "specified_attributes");
1398 APPEND(rc, "intern");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001399
Neal Norwitzfa56e2d2003-01-19 15:40:09 +00001400#undef APPEND
Neal Norwitz8dfc4a92007-08-11 06:39:53 +00001401
1402 if (PyErr_Occurred()) {
1403 Py_DECREF(rc);
1404 rc = NULL;
Fred Drake0582df92000-07-12 04:49:00 +00001405 }
Neal Norwitz8dfc4a92007-08-11 06:39:53 +00001406
1407 return rc;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001408}
1409
Fred Drake6f987622000-08-25 18:03:30 +00001410static int
Alexander Belopolskye239d232010-12-08 23:31:48 +00001411sethandler(xmlparseobject *self, PyObject *name, PyObject* v)
Fred Drake0582df92000-07-12 04:49:00 +00001412{
1413 int handlernum = handlername2int(name);
Fred Drake71b63ff2002-06-28 22:29:01 +00001414 if (handlernum >= 0) {
1415 xmlhandler c_handler = NULL;
1416 PyObject *temp = self->handlers[handlernum];
1417
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001418 if (v == Py_None) {
1419 /* If this is the character data handler, and a character
1420 data handler is already active, we need to be more
1421 careful. What we can safely do is replace the existing
1422 character data handler callback function with a no-op
1423 function that will refuse to call Python. The downside
1424 is that this doesn't completely remove the character
1425 data handler from the C layer if there's any callback
1426 active, so Expat does a little more work than it
1427 otherwise would, but that's really an odd case. A more
1428 elaborate system of handlers and state could remove the
1429 C handler more effectively. */
1430 if (handlernum == CharacterData && self->in_callback)
1431 c_handler = noop_character_data_handler;
Fred Drake71b63ff2002-06-28 22:29:01 +00001432 v = NULL;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001433 }
Fred Drake71b63ff2002-06-28 22:29:01 +00001434 else if (v != NULL) {
1435 Py_INCREF(v);
1436 c_handler = handler_info[handlernum].handler;
1437 }
Fred Drake0582df92000-07-12 04:49:00 +00001438 self->handlers[handlernum] = v;
Fred Drake71b63ff2002-06-28 22:29:01 +00001439 Py_XDECREF(temp);
1440 handler_info[handlernum].setter(self->itself, c_handler);
Fred Drake0582df92000-07-12 04:49:00 +00001441 return 1;
1442 }
1443 return 0;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001444}
1445
1446static int
Alexander Belopolskye239d232010-12-08 23:31:48 +00001447xmlparse_setattro(xmlparseobject *self, PyObject *name, PyObject *v)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001448{
Fred Drake6f987622000-08-25 18:03:30 +00001449 /* Set attribute 'name' to value 'v'. v==NULL means delete */
Fred Drake85d835f2001-02-08 15:39:08 +00001450 if (v == NULL) {
Fred Drake6f987622000-08-25 18:03:30 +00001451 PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
1452 return -1;
1453 }
Alexander Belopolskye239d232010-12-08 23:31:48 +00001454 assert(PyUnicode_Check(name));
1455 if (PyUnicode_CompareWithASCIIString(name, "buffer_text") == 0) {
Antoine Pitrou6f430e42012-08-15 23:18:25 +02001456 int b = PyObject_IsTrue(v);
1457 if (b < 0)
1458 return -1;
1459 if (b) {
Fred Drake2a3d7db2002-06-28 22:56:48 +00001460 if (self->buffer == NULL) {
Victor Stinnerb6404912013-07-07 16:21:41 +02001461 self->buffer = PyMem_Malloc(self->buffer_size);
Fred Drake2a3d7db2002-06-28 22:56:48 +00001462 if (self->buffer == NULL) {
1463 PyErr_NoMemory();
1464 return -1;
1465 }
1466 self->buffer_used = 0;
1467 }
1468 }
1469 else if (self->buffer != NULL) {
1470 if (flush_character_buffer(self) < 0)
1471 return -1;
Victor Stinnerb6404912013-07-07 16:21:41 +02001472 PyMem_Free(self->buffer);
Fred Drake2a3d7db2002-06-28 22:56:48 +00001473 self->buffer = NULL;
1474 }
1475 return 0;
1476 }
Alexander Belopolskye239d232010-12-08 23:31:48 +00001477 if (PyUnicode_CompareWithASCIIString(name, "namespace_prefixes") == 0) {
Antoine Pitrou6f430e42012-08-15 23:18:25 +02001478 int b = PyObject_IsTrue(v);
1479 if (b < 0)
1480 return -1;
1481 self->ns_prefixes = b;
Martin v. Löwis069dde22003-01-21 10:58:18 +00001482 XML_SetReturnNSTriplet(self->itself, self->ns_prefixes);
1483 return 0;
1484 }
Alexander Belopolskye239d232010-12-08 23:31:48 +00001485 if (PyUnicode_CompareWithASCIIString(name, "ordered_attributes") == 0) {
Antoine Pitrou6f430e42012-08-15 23:18:25 +02001486 int b = PyObject_IsTrue(v);
1487 if (b < 0)
1488 return -1;
1489 self->ordered_attributes = b;
Fred Drake85d835f2001-02-08 15:39:08 +00001490 return 0;
1491 }
Alexander Belopolskye239d232010-12-08 23:31:48 +00001492 if (PyUnicode_CompareWithASCIIString(name, "specified_attributes") == 0) {
Antoine Pitrou6f430e42012-08-15 23:18:25 +02001493 int b = PyObject_IsTrue(v);
1494 if (b < 0)
1495 return -1;
1496 self->specified_attributes = b;
Fred Drake6f987622000-08-25 18:03:30 +00001497 return 0;
1498 }
Christian Heimes2380ac72008-01-09 00:17:24 +00001499
Alexander Belopolskye239d232010-12-08 23:31:48 +00001500 if (PyUnicode_CompareWithASCIIString(name, "buffer_size") == 0) {
Christian Heimes2380ac72008-01-09 00:17:24 +00001501 long new_buffer_size;
1502 if (!PyLong_Check(v)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001503 PyErr_SetString(PyExc_TypeError, "buffer_size must be an integer");
1504 return -1;
Christian Heimes2380ac72008-01-09 00:17:24 +00001505 }
1506
1507 new_buffer_size=PyLong_AS_LONG(v);
1508 /* trivial case -- no change */
1509 if (new_buffer_size == self->buffer_size) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001510 return 0;
Christian Heimes2380ac72008-01-09 00:17:24 +00001511 }
1512
1513 if (new_buffer_size <= 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001514 PyErr_SetString(PyExc_ValueError, "buffer_size must be greater than zero");
1515 return -1;
Christian Heimes2380ac72008-01-09 00:17:24 +00001516 }
1517
1518 /* check maximum */
1519 if (new_buffer_size > INT_MAX) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001520 char errmsg[100];
1521 sprintf(errmsg, "buffer_size must not be greater than %i", INT_MAX);
1522 PyErr_SetString(PyExc_ValueError, errmsg);
1523 return -1;
Christian Heimes2380ac72008-01-09 00:17:24 +00001524 }
1525
1526 if (self->buffer != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001527 /* there is already a buffer */
1528 if (self->buffer_used != 0) {
Christian Heimes09994a92013-07-20 22:41:58 +02001529 if (flush_character_buffer(self) < 0) {
1530 return -1;
1531 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001532 }
1533 /* free existing buffer */
Victor Stinnerb6404912013-07-07 16:21:41 +02001534 PyMem_Free(self->buffer);
Christian Heimes2380ac72008-01-09 00:17:24 +00001535 }
Victor Stinnerb6404912013-07-07 16:21:41 +02001536 self->buffer = PyMem_Malloc(new_buffer_size);
Christian Heimes2380ac72008-01-09 00:17:24 +00001537 if (self->buffer == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001538 PyErr_NoMemory();
1539 return -1;
1540 }
Christian Heimes2380ac72008-01-09 00:17:24 +00001541 self->buffer_size = new_buffer_size;
1542 return 0;
1543 }
1544
Alexander Belopolskye239d232010-12-08 23:31:48 +00001545 if (PyUnicode_CompareWithASCIIString(name, "CharacterDataHandler") == 0) {
Fred Drake2a3d7db2002-06-28 22:56:48 +00001546 /* If we're changing the character data handler, flush all
1547 * cached data with the old handler. Not sure there's a
1548 * "right" thing to do, though, but this probably won't
1549 * happen.
1550 */
1551 if (flush_character_buffer(self) < 0)
1552 return -1;
1553 }
Fred Drake6f987622000-08-25 18:03:30 +00001554 if (sethandler(self, name, v)) {
1555 return 0;
1556 }
Alexander Belopolskye239d232010-12-08 23:31:48 +00001557 PyErr_SetObject(PyExc_AttributeError, name);
Fred Drake6f987622000-08-25 18:03:30 +00001558 return -1;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001559}
1560
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001561static int
1562xmlparse_traverse(xmlparseobject *op, visitproc visit, void *arg)
1563{
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001564 int i;
1565 for (i = 0; handler_info[i].name != NULL; i++)
1566 Py_VISIT(op->handlers[i]);
Fred Drakecde79132001-04-25 16:01:30 +00001567 return 0;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001568}
1569
1570static int
1571xmlparse_clear(xmlparseobject *op)
1572{
Martin v. Löwis5b68ce32001-10-21 08:53:52 +00001573 clear_handlers(op, 0);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001574 Py_CLEAR(op->intern);
Fred Drakecde79132001-04-25 16:01:30 +00001575 return 0;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001576}
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001577
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001578PyDoc_STRVAR(Xmlparsetype__doc__, "XML parser");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001579
1580static PyTypeObject Xmlparsetype = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001581 PyVarObject_HEAD_INIT(NULL, 0)
1582 "pyexpat.xmlparser", /*tp_name*/
Antoine Pitrou23683ef2011-01-04 00:00:31 +00001583 sizeof(xmlparseobject), /*tp_basicsize*/
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001584 0, /*tp_itemsize*/
1585 /* methods */
1586 (destructor)xmlparse_dealloc, /*tp_dealloc*/
1587 (printfunc)0, /*tp_print*/
1588 0, /*tp_getattr*/
Alexander Belopolskye239d232010-12-08 23:31:48 +00001589 0, /*tp_setattr*/
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001590 0, /*tp_reserved*/
1591 (reprfunc)0, /*tp_repr*/
1592 0, /*tp_as_number*/
1593 0, /*tp_as_sequence*/
1594 0, /*tp_as_mapping*/
1595 (hashfunc)0, /*tp_hash*/
1596 (ternaryfunc)0, /*tp_call*/
1597 (reprfunc)0, /*tp_str*/
1598 (getattrofunc)xmlparse_getattro, /* tp_getattro */
Alexander Belopolskye239d232010-12-08 23:31:48 +00001599 (setattrofunc)xmlparse_setattro, /* tp_setattro */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001600 0, /* tp_as_buffer */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001601 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /*tp_flags*/
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001602 Xmlparsetype__doc__, /* tp_doc - Documentation string */
1603 (traverseproc)xmlparse_traverse, /* tp_traverse */
1604 (inquiry)xmlparse_clear, /* tp_clear */
1605 0, /* tp_richcompare */
1606 0, /* tp_weaklistoffset */
1607 0, /* tp_iter */
1608 0, /* tp_iternext */
1609 xmlparse_methods, /* tp_methods */
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001610};
1611
1612/* End of code for xmlparser objects */
1613/* -------------------------------------------------------- */
1614
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001615PyDoc_STRVAR(pyexpat_ParserCreate__doc__,
Fred Drake0582df92000-07-12 04:49:00 +00001616"ParserCreate([encoding[, namespace_separator]]) -> parser\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001617Return a new XML parser object.");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001618
1619static PyObject *
Fred Drake0582df92000-07-12 04:49:00 +00001620pyexpat_ParserCreate(PyObject *notused, PyObject *args, PyObject *kw)
1621{
Fred Drakecde79132001-04-25 16:01:30 +00001622 char *encoding = NULL;
1623 char *namespace_separator = NULL;
Fred Drakeb91a36b2002-06-27 19:40:48 +00001624 PyObject *intern = NULL;
1625 PyObject *result;
1626 int intern_decref = 0;
Martin v. Löwis15e62742006-02-27 16:46:16 +00001627 static char *kwlist[] = {"encoding", "namespace_separator",
Jeremy Hyltonaf68c872005-12-10 18:50:16 +00001628 "intern", NULL};
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001629
Fred Drakeb91a36b2002-06-27 19:40:48 +00001630 if (!PyArg_ParseTupleAndKeywords(args, kw, "|zzO:ParserCreate", kwlist,
1631 &encoding, &namespace_separator, &intern))
Fred Drakecde79132001-04-25 16:01:30 +00001632 return NULL;
1633 if (namespace_separator != NULL
1634 && strlen(namespace_separator) > 1) {
1635 PyErr_SetString(PyExc_ValueError,
1636 "namespace_separator must be at most one"
1637 " character, omitted, or None");
1638 return NULL;
1639 }
Fred Drakeb91a36b2002-06-27 19:40:48 +00001640 /* Explicitly passing None means no interning is desired.
1641 Not passing anything means that a new dictionary is used. */
1642 if (intern == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001643 intern = NULL;
Fred Drakeb91a36b2002-06-27 19:40:48 +00001644 else if (intern == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001645 intern = PyDict_New();
1646 if (!intern)
1647 return NULL;
1648 intern_decref = 1;
Fred Drake71b63ff2002-06-28 22:29:01 +00001649 }
Fred Drakeb91a36b2002-06-27 19:40:48 +00001650 else if (!PyDict_Check(intern)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001651 PyErr_SetString(PyExc_TypeError, "intern must be a dictionary");
1652 return NULL;
Fred Drakeb91a36b2002-06-27 19:40:48 +00001653 }
1654
1655 result = newxmlparseobject(encoding, namespace_separator, intern);
1656 if (intern_decref) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001657 Py_DECREF(intern);
Fred Drakeb91a36b2002-06-27 19:40:48 +00001658 }
1659 return result;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001660}
1661
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001662PyDoc_STRVAR(pyexpat_ErrorString__doc__,
Fred Drake0582df92000-07-12 04:49:00 +00001663"ErrorString(errno) -> string\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001664Returns string error for given number.");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001665
1666static PyObject *
Fred Drake0582df92000-07-12 04:49:00 +00001667pyexpat_ErrorString(PyObject *self, PyObject *args)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001668{
Fred Drake0582df92000-07-12 04:49:00 +00001669 long code = 0;
1670
1671 if (!PyArg_ParseTuple(args, "l:ErrorString", &code))
1672 return NULL;
1673 return Py_BuildValue("z", XML_ErrorString((int)code));
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001674}
1675
1676/* List of methods defined in the module */
1677
1678static struct PyMethodDef pyexpat_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001679 {"ParserCreate", (PyCFunction)pyexpat_ParserCreate,
Fred Drake0582df92000-07-12 04:49:00 +00001680 METH_VARARGS|METH_KEYWORDS, pyexpat_ParserCreate__doc__},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001681 {"ErrorString", (PyCFunction)pyexpat_ErrorString,
1682 METH_VARARGS, pyexpat_ErrorString__doc__},
Fred Drake71b63ff2002-06-28 22:29:01 +00001683
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001684 {NULL, (PyCFunction)NULL, 0, NULL} /* sentinel */
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001685};
1686
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +00001687/* Module docstring */
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001688
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001689PyDoc_STRVAR(pyexpat_module_documentation,
1690"Python wrapper for Expat parser.");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001691
Fred Drakecde79132001-04-25 16:01:30 +00001692/* Initialization function for the module */
1693
1694#ifndef MODULE_NAME
1695#define MODULE_NAME "pyexpat"
1696#endif
1697
1698#ifndef MODULE_INITFUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00001699#define MODULE_INITFUNC PyInit_pyexpat
Fred Drakecde79132001-04-25 16:01:30 +00001700#endif
1701
Martin v. Löwis069dde22003-01-21 10:58:18 +00001702#ifndef PyMODINIT_FUNC
1703# ifdef MS_WINDOWS
1704# define PyMODINIT_FUNC __declspec(dllexport) void
1705# else
1706# define PyMODINIT_FUNC void
1707# endif
1708#endif
1709
Mark Hammond8235ea12002-07-19 06:55:41 +00001710PyMODINIT_FUNC MODULE_INITFUNC(void); /* avoid compiler warnings */
Fred Drakecde79132001-04-25 16:01:30 +00001711
Martin v. Löwis1a214512008-06-11 05:26:20 +00001712static struct PyModuleDef pyexpatmodule = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001713 PyModuleDef_HEAD_INIT,
1714 MODULE_NAME,
1715 pyexpat_module_documentation,
1716 -1,
1717 pyexpat_methods,
1718 NULL,
1719 NULL,
1720 NULL,
1721 NULL
Martin v. Löwis1a214512008-06-11 05:26:20 +00001722};
1723
Martin v. Löwis069dde22003-01-21 10:58:18 +00001724PyMODINIT_FUNC
1725MODULE_INITFUNC(void)
Fred Drake0582df92000-07-12 04:49:00 +00001726{
1727 PyObject *m, *d;
Neal Norwitz392c5be2007-08-25 17:20:32 +00001728 PyObject *errmod_name = PyUnicode_FromString(MODULE_NAME ".errors");
Fred Drake85d835f2001-02-08 15:39:08 +00001729 PyObject *errors_module;
1730 PyObject *modelmod_name;
1731 PyObject *model_module;
Fred Drake0582df92000-07-12 04:49:00 +00001732 PyObject *sys_modules;
Georg Brandlb4dac712010-10-15 14:46:48 +00001733 PyObject *tmpnum, *tmpstr;
1734 PyObject *codes_dict;
1735 PyObject *rev_codes_dict;
1736 int res;
Fredrik Lundhd7a42882005-12-13 20:43:04 +00001737 static struct PyExpat_CAPI capi;
Georg Brandlb4dac712010-10-15 14:46:48 +00001738 PyObject *capi_object;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001739
Fred Drake6f987622000-08-25 18:03:30 +00001740 if (errmod_name == NULL)
Martin v. Löwis1a214512008-06-11 05:26:20 +00001741 return NULL;
Neal Norwitz392c5be2007-08-25 17:20:32 +00001742 modelmod_name = PyUnicode_FromString(MODULE_NAME ".model");
Fred Drake85d835f2001-02-08 15:39:08 +00001743 if (modelmod_name == NULL)
Martin v. Löwis1a214512008-06-11 05:26:20 +00001744 return NULL;
Fred Drake6f987622000-08-25 18:03:30 +00001745
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001746 if (PyType_Ready(&Xmlparsetype) < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001747 return NULL;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001748
Fred Drake0582df92000-07-12 04:49:00 +00001749 /* Create the module and add the functions */
Martin v. Löwis1a214512008-06-11 05:26:20 +00001750 m = PyModule_Create(&pyexpatmodule);
Neal Norwitz1ac754f2006-01-19 06:09:39 +00001751 if (m == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001752 return NULL;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001753
Fred Drake0582df92000-07-12 04:49:00 +00001754 /* Add some symbolic constants to the module */
Fred Drakebd6101c2001-02-14 18:29:45 +00001755 if (ErrorObject == NULL) {
1756 ErrorObject = PyErr_NewException("xml.parsers.expat.ExpatError",
Fred Drake93adb692000-09-23 04:55:48 +00001757 NULL, NULL);
Fred Drakebd6101c2001-02-14 18:29:45 +00001758 if (ErrorObject == NULL)
Martin v. Löwis1a214512008-06-11 05:26:20 +00001759 return NULL;
Fred Drakebd6101c2001-02-14 18:29:45 +00001760 }
1761 Py_INCREF(ErrorObject);
Fred Drake93adb692000-09-23 04:55:48 +00001762 PyModule_AddObject(m, "error", ErrorObject);
Fred Drakebd6101c2001-02-14 18:29:45 +00001763 Py_INCREF(ErrorObject);
1764 PyModule_AddObject(m, "ExpatError", ErrorObject);
Fred Drake4ba298c2000-10-29 04:57:53 +00001765 Py_INCREF(&Xmlparsetype);
1766 PyModule_AddObject(m, "XMLParserType", (PyObject *) &Xmlparsetype);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001767
Fred Drake738293d2000-12-21 17:25:07 +00001768 PyModule_AddStringConstant(m, "EXPAT_VERSION",
1769 (char *) XML_ExpatVersion());
Fred Drake85d835f2001-02-08 15:39:08 +00001770 {
1771 XML_Expat_Version info = XML_ExpatVersionInfo();
1772 PyModule_AddObject(m, "version_info",
1773 Py_BuildValue("(iii)", info.major,
1774 info.minor, info.micro));
1775 }
Fred Drake0582df92000-07-12 04:49:00 +00001776 /* XXX When Expat supports some way of figuring out how it was
Fred Drake71b63ff2002-06-28 22:29:01 +00001777 compiled, this should check and set native_encoding
1778 appropriately.
Fred Drake0582df92000-07-12 04:49:00 +00001779 */
Fred Drake93adb692000-09-23 04:55:48 +00001780 PyModule_AddStringConstant(m, "native_encoding", "UTF-8");
Fred Drakec23b5232000-08-24 21:57:43 +00001781
Fred Drake85d835f2001-02-08 15:39:08 +00001782 sys_modules = PySys_GetObject("modules");
Fred Drake93adb692000-09-23 04:55:48 +00001783 d = PyModule_GetDict(m);
Fred Drake6f987622000-08-25 18:03:30 +00001784 errors_module = PyDict_GetItem(d, errmod_name);
1785 if (errors_module == NULL) {
Fred Drakecde79132001-04-25 16:01:30 +00001786 errors_module = PyModule_New(MODULE_NAME ".errors");
Fred Drake6f987622000-08-25 18:03:30 +00001787 if (errors_module != NULL) {
Fred Drake6f987622000-08-25 18:03:30 +00001788 PyDict_SetItem(sys_modules, errmod_name, errors_module);
Fred Drake93adb692000-09-23 04:55:48 +00001789 /* gives away the reference to errors_module */
1790 PyModule_AddObject(m, "errors", errors_module);
Fred Drakec23b5232000-08-24 21:57:43 +00001791 }
1792 }
Fred Drake6f987622000-08-25 18:03:30 +00001793 Py_DECREF(errmod_name);
Fred Drake85d835f2001-02-08 15:39:08 +00001794 model_module = PyDict_GetItem(d, modelmod_name);
1795 if (model_module == NULL) {
Fred Drakecde79132001-04-25 16:01:30 +00001796 model_module = PyModule_New(MODULE_NAME ".model");
Fred Drake85d835f2001-02-08 15:39:08 +00001797 if (model_module != NULL) {
1798 PyDict_SetItem(sys_modules, modelmod_name, model_module);
1799 /* gives away the reference to model_module */
1800 PyModule_AddObject(m, "model", model_module);
1801 }
1802 }
1803 Py_DECREF(modelmod_name);
1804 if (errors_module == NULL || model_module == NULL)
1805 /* Don't core dump later! */
Martin v. Löwis1a214512008-06-11 05:26:20 +00001806 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001807
Martin v. Löwisc847f402003-01-21 11:09:21 +00001808#if XML_COMBINED_VERSION > 19505
Martin v. Löwis069dde22003-01-21 10:58:18 +00001809 {
1810 const XML_Feature *features = XML_GetFeatureList();
1811 PyObject *list = PyList_New(0);
1812 if (list == NULL)
1813 /* just ignore it */
1814 PyErr_Clear();
1815 else {
1816 int i = 0;
1817 for (; features[i].feature != XML_FEATURE_END; ++i) {
1818 int ok;
1819 PyObject *item = Py_BuildValue("si", features[i].name,
1820 features[i].value);
1821 if (item == NULL) {
1822 Py_DECREF(list);
1823 list = NULL;
1824 break;
1825 }
1826 ok = PyList_Append(list, item);
1827 Py_DECREF(item);
1828 if (ok < 0) {
1829 PyErr_Clear();
1830 break;
1831 }
1832 }
1833 if (list != NULL)
1834 PyModule_AddObject(m, "features", list);
1835 }
1836 }
Martin v. Löwisc847f402003-01-21 11:09:21 +00001837#endif
Fred Drake6f987622000-08-25 18:03:30 +00001838
Georg Brandlb4dac712010-10-15 14:46:48 +00001839 codes_dict = PyDict_New();
1840 rev_codes_dict = PyDict_New();
1841 if (codes_dict == NULL || rev_codes_dict == NULL) {
1842 Py_XDECREF(codes_dict);
1843 Py_XDECREF(rev_codes_dict);
1844 return NULL;
1845 }
Victor Stinner0fcab4a2011-01-04 12:59:15 +00001846
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001847#define MYCONST(name) \
Georg Brandlb4dac712010-10-15 14:46:48 +00001848 if (PyModule_AddStringConstant(errors_module, #name, \
1849 (char *)XML_ErrorString(name)) < 0) \
1850 return NULL; \
1851 tmpnum = PyLong_FromLong(name); \
1852 if (tmpnum == NULL) return NULL; \
1853 res = PyDict_SetItemString(codes_dict, \
1854 XML_ErrorString(name), tmpnum); \
1855 if (res < 0) return NULL; \
1856 tmpstr = PyUnicode_FromString(XML_ErrorString(name)); \
1857 if (tmpstr == NULL) return NULL; \
1858 res = PyDict_SetItem(rev_codes_dict, tmpnum, tmpstr); \
1859 Py_DECREF(tmpstr); \
1860 Py_DECREF(tmpnum); \
1861 if (res < 0) return NULL; \
Fred Drake7bd9f412000-07-04 23:51:31 +00001862
Fred Drake0582df92000-07-12 04:49:00 +00001863 MYCONST(XML_ERROR_NO_MEMORY);
1864 MYCONST(XML_ERROR_SYNTAX);
1865 MYCONST(XML_ERROR_NO_ELEMENTS);
1866 MYCONST(XML_ERROR_INVALID_TOKEN);
1867 MYCONST(XML_ERROR_UNCLOSED_TOKEN);
1868 MYCONST(XML_ERROR_PARTIAL_CHAR);
1869 MYCONST(XML_ERROR_TAG_MISMATCH);
1870 MYCONST(XML_ERROR_DUPLICATE_ATTRIBUTE);
1871 MYCONST(XML_ERROR_JUNK_AFTER_DOC_ELEMENT);
1872 MYCONST(XML_ERROR_PARAM_ENTITY_REF);
1873 MYCONST(XML_ERROR_UNDEFINED_ENTITY);
1874 MYCONST(XML_ERROR_RECURSIVE_ENTITY_REF);
1875 MYCONST(XML_ERROR_ASYNC_ENTITY);
1876 MYCONST(XML_ERROR_BAD_CHAR_REF);
1877 MYCONST(XML_ERROR_BINARY_ENTITY_REF);
1878 MYCONST(XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF);
1879 MYCONST(XML_ERROR_MISPLACED_XML_PI);
1880 MYCONST(XML_ERROR_UNKNOWN_ENCODING);
1881 MYCONST(XML_ERROR_INCORRECT_ENCODING);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001882 MYCONST(XML_ERROR_UNCLOSED_CDATA_SECTION);
1883 MYCONST(XML_ERROR_EXTERNAL_ENTITY_HANDLING);
1884 MYCONST(XML_ERROR_NOT_STANDALONE);
Fred Drake283b6702004-08-04 22:28:16 +00001885 MYCONST(XML_ERROR_UNEXPECTED_STATE);
1886 MYCONST(XML_ERROR_ENTITY_DECLARED_IN_PE);
1887 MYCONST(XML_ERROR_FEATURE_REQUIRES_XML_DTD);
1888 MYCONST(XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING);
1889 /* Added in Expat 1.95.7. */
1890 MYCONST(XML_ERROR_UNBOUND_PREFIX);
1891 /* Added in Expat 1.95.8. */
1892 MYCONST(XML_ERROR_UNDECLARING_PREFIX);
1893 MYCONST(XML_ERROR_INCOMPLETE_PE);
1894 MYCONST(XML_ERROR_XML_DECL);
1895 MYCONST(XML_ERROR_TEXT_DECL);
1896 MYCONST(XML_ERROR_PUBLICID);
1897 MYCONST(XML_ERROR_SUSPENDED);
1898 MYCONST(XML_ERROR_NOT_SUSPENDED);
1899 MYCONST(XML_ERROR_ABORTED);
1900 MYCONST(XML_ERROR_FINISHED);
1901 MYCONST(XML_ERROR_SUSPEND_PE);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001902
Georg Brandlb4dac712010-10-15 14:46:48 +00001903 if (PyModule_AddStringConstant(errors_module, "__doc__",
1904 "Constants used to describe "
1905 "error conditions.") < 0)
1906 return NULL;
Fred Drake85d835f2001-02-08 15:39:08 +00001907
Georg Brandlb4dac712010-10-15 14:46:48 +00001908 if (PyModule_AddObject(errors_module, "codes", codes_dict) < 0)
1909 return NULL;
1910 if (PyModule_AddObject(errors_module, "messages", rev_codes_dict) < 0)
1911 return NULL;
Victor Stinner0fcab4a2011-01-04 12:59:15 +00001912
Fred Drake93adb692000-09-23 04:55:48 +00001913#undef MYCONST
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001914
Fred Drake85d835f2001-02-08 15:39:08 +00001915#define MYCONST(c) PyModule_AddIntConstant(m, #c, c)
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001916 MYCONST(XML_PARAM_ENTITY_PARSING_NEVER);
1917 MYCONST(XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE);
1918 MYCONST(XML_PARAM_ENTITY_PARSING_ALWAYS);
Fred Drake85d835f2001-02-08 15:39:08 +00001919#undef MYCONST
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001920
Fred Drake85d835f2001-02-08 15:39:08 +00001921#define MYCONST(c) PyModule_AddIntConstant(model_module, #c, c)
1922 PyModule_AddStringConstant(model_module, "__doc__",
1923 "Constants used to interpret content model information.");
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001924
Fred Drake85d835f2001-02-08 15:39:08 +00001925 MYCONST(XML_CTYPE_EMPTY);
1926 MYCONST(XML_CTYPE_ANY);
1927 MYCONST(XML_CTYPE_MIXED);
1928 MYCONST(XML_CTYPE_NAME);
1929 MYCONST(XML_CTYPE_CHOICE);
1930 MYCONST(XML_CTYPE_SEQ);
1931
1932 MYCONST(XML_CQUANT_NONE);
1933 MYCONST(XML_CQUANT_OPT);
1934 MYCONST(XML_CQUANT_REP);
1935 MYCONST(XML_CQUANT_PLUS);
1936#undef MYCONST
Fredrik Lundhc3345042005-12-13 19:49:55 +00001937
1938 /* initialize pyexpat dispatch table */
Fredrik Lundhd7a42882005-12-13 20:43:04 +00001939 capi.size = sizeof(capi);
Fredrik Lundhcc117db2005-12-13 21:55:36 +00001940 capi.magic = PyExpat_CAPI_MAGIC;
Fredrik Lundhd7a42882005-12-13 20:43:04 +00001941 capi.MAJOR_VERSION = XML_MAJOR_VERSION;
1942 capi.MINOR_VERSION = XML_MINOR_VERSION;
1943 capi.MICRO_VERSION = XML_MICRO_VERSION;
1944 capi.ErrorString = XML_ErrorString;
Fredrik Lundhcc117db2005-12-13 21:55:36 +00001945 capi.GetErrorCode = XML_GetErrorCode;
1946 capi.GetErrorColumnNumber = XML_GetErrorColumnNumber;
1947 capi.GetErrorLineNumber = XML_GetErrorLineNumber;
Fredrik Lundhd7a42882005-12-13 20:43:04 +00001948 capi.Parse = XML_Parse;
1949 capi.ParserCreate_MM = XML_ParserCreate_MM;
1950 capi.ParserFree = XML_ParserFree;
1951 capi.SetCharacterDataHandler = XML_SetCharacterDataHandler;
1952 capi.SetCommentHandler = XML_SetCommentHandler;
1953 capi.SetDefaultHandlerExpand = XML_SetDefaultHandlerExpand;
1954 capi.SetElementHandler = XML_SetElementHandler;
1955 capi.SetNamespaceDeclHandler = XML_SetNamespaceDeclHandler;
1956 capi.SetProcessingInstructionHandler = XML_SetProcessingInstructionHandler;
1957 capi.SetUnknownEncodingHandler = XML_SetUnknownEncodingHandler;
1958 capi.SetUserData = XML_SetUserData;
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03001959 capi.SetStartDoctypeDeclHandler = XML_SetStartDoctypeDeclHandler;
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03001960 capi.SetEncoding = XML_SetEncoding;
Eli Bendersky6dc32b32013-05-25 05:25:48 -07001961 capi.DefaultUnknownEncodingHandler = PyUnknownEncodingHandler;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001962
Benjamin Petersonb173f782009-05-05 22:31:58 +00001963 /* export using capsule */
1964 capi_object = PyCapsule_New(&capi, PyExpat_CAPSULE_NAME, NULL);
Fredrik Lundhd7a42882005-12-13 20:43:04 +00001965 if (capi_object)
1966 PyModule_AddObject(m, "expat_CAPI", capi_object);
Martin v. Löwis1a214512008-06-11 05:26:20 +00001967 return m;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001968}
1969
Fred Drake6f987622000-08-25 18:03:30 +00001970static void
Martin v. Löwis5b68ce32001-10-21 08:53:52 +00001971clear_handlers(xmlparseobject *self, int initial)
Fred Drake0582df92000-07-12 04:49:00 +00001972{
Fred Drakecde79132001-04-25 16:01:30 +00001973 int i = 0;
1974 PyObject *temp;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001975
Fred Drake71b63ff2002-06-28 22:29:01 +00001976 for (; handler_info[i].name != NULL; i++) {
Martin v. Löwis5b68ce32001-10-21 08:53:52 +00001977 if (initial)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001978 self->handlers[i] = NULL;
1979 else {
Fred Drakecde79132001-04-25 16:01:30 +00001980 temp = self->handlers[i];
1981 self->handlers[i] = NULL;
1982 Py_XDECREF(temp);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001983 handler_info[i].setter(self->itself, NULL);
Fred Drakecde79132001-04-25 16:01:30 +00001984 }
Fred Drakecde79132001-04-25 16:01:30 +00001985 }
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001986}
1987
Tim Peters0c322792002-07-17 16:49:03 +00001988static struct HandlerInfo handler_info[] = {
Fred Drake71b63ff2002-06-28 22:29:01 +00001989 {"StartElementHandler",
1990 (xmlhandlersetter)XML_SetStartElementHandler,
Fred Drake0582df92000-07-12 04:49:00 +00001991 (xmlhandler)my_StartElementHandler},
Fred Drake71b63ff2002-06-28 22:29:01 +00001992 {"EndElementHandler",
1993 (xmlhandlersetter)XML_SetEndElementHandler,
Fred Drake0582df92000-07-12 04:49:00 +00001994 (xmlhandler)my_EndElementHandler},
Fred Drake71b63ff2002-06-28 22:29:01 +00001995 {"ProcessingInstructionHandler",
Fred Drake0582df92000-07-12 04:49:00 +00001996 (xmlhandlersetter)XML_SetProcessingInstructionHandler,
1997 (xmlhandler)my_ProcessingInstructionHandler},
Fred Drake71b63ff2002-06-28 22:29:01 +00001998 {"CharacterDataHandler",
Fred Drake0582df92000-07-12 04:49:00 +00001999 (xmlhandlersetter)XML_SetCharacterDataHandler,
2000 (xmlhandler)my_CharacterDataHandler},
Fred Drake71b63ff2002-06-28 22:29:01 +00002001 {"UnparsedEntityDeclHandler",
Fred Drake0582df92000-07-12 04:49:00 +00002002 (xmlhandlersetter)XML_SetUnparsedEntityDeclHandler,
Fred Drake2a3d7db2002-06-28 22:56:48 +00002003 (xmlhandler)my_UnparsedEntityDeclHandler},
Fred Drake71b63ff2002-06-28 22:29:01 +00002004 {"NotationDeclHandler",
Fred Drake0582df92000-07-12 04:49:00 +00002005 (xmlhandlersetter)XML_SetNotationDeclHandler,
Fred Drake2a3d7db2002-06-28 22:56:48 +00002006 (xmlhandler)my_NotationDeclHandler},
Fred Drake71b63ff2002-06-28 22:29:01 +00002007 {"StartNamespaceDeclHandler",
2008 (xmlhandlersetter)XML_SetStartNamespaceDeclHandler,
Fred Drake2a3d7db2002-06-28 22:56:48 +00002009 (xmlhandler)my_StartNamespaceDeclHandler},
Fred Drake71b63ff2002-06-28 22:29:01 +00002010 {"EndNamespaceDeclHandler",
2011 (xmlhandlersetter)XML_SetEndNamespaceDeclHandler,
Fred Drake2a3d7db2002-06-28 22:56:48 +00002012 (xmlhandler)my_EndNamespaceDeclHandler},
Fred Drake0582df92000-07-12 04:49:00 +00002013 {"CommentHandler",
2014 (xmlhandlersetter)XML_SetCommentHandler,
2015 (xmlhandler)my_CommentHandler},
2016 {"StartCdataSectionHandler",
Fred Drake71b63ff2002-06-28 22:29:01 +00002017 (xmlhandlersetter)XML_SetStartCdataSectionHandler,
Fred Drake0582df92000-07-12 04:49:00 +00002018 (xmlhandler)my_StartCdataSectionHandler},
2019 {"EndCdataSectionHandler",
Fred Drake71b63ff2002-06-28 22:29:01 +00002020 (xmlhandlersetter)XML_SetEndCdataSectionHandler,
Fred Drake0582df92000-07-12 04:49:00 +00002021 (xmlhandler)my_EndCdataSectionHandler},
2022 {"DefaultHandler",
2023 (xmlhandlersetter)XML_SetDefaultHandler,
2024 (xmlhandler)my_DefaultHandler},
2025 {"DefaultHandlerExpand",
2026 (xmlhandlersetter)XML_SetDefaultHandlerExpand,
2027 (xmlhandler)my_DefaultHandlerExpandHandler},
2028 {"NotStandaloneHandler",
2029 (xmlhandlersetter)XML_SetNotStandaloneHandler,
2030 (xmlhandler)my_NotStandaloneHandler},
2031 {"ExternalEntityRefHandler",
2032 (xmlhandlersetter)XML_SetExternalEntityRefHandler,
Fred Drake2a3d7db2002-06-28 22:56:48 +00002033 (xmlhandler)my_ExternalEntityRefHandler},
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00002034 {"StartDoctypeDeclHandler",
Fred Drake71b63ff2002-06-28 22:29:01 +00002035 (xmlhandlersetter)XML_SetStartDoctypeDeclHandler,
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00002036 (xmlhandler)my_StartDoctypeDeclHandler},
2037 {"EndDoctypeDeclHandler",
Fred Drake71b63ff2002-06-28 22:29:01 +00002038 (xmlhandlersetter)XML_SetEndDoctypeDeclHandler,
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00002039 (xmlhandler)my_EndDoctypeDeclHandler},
Fred Drake85d835f2001-02-08 15:39:08 +00002040 {"EntityDeclHandler",
2041 (xmlhandlersetter)XML_SetEntityDeclHandler,
2042 (xmlhandler)my_EntityDeclHandler},
2043 {"XmlDeclHandler",
2044 (xmlhandlersetter)XML_SetXmlDeclHandler,
2045 (xmlhandler)my_XmlDeclHandler},
2046 {"ElementDeclHandler",
2047 (xmlhandlersetter)XML_SetElementDeclHandler,
2048 (xmlhandler)my_ElementDeclHandler},
2049 {"AttlistDeclHandler",
2050 (xmlhandlersetter)XML_SetAttlistDeclHandler,
2051 (xmlhandler)my_AttlistDeclHandler},
Martin v. Löwisc847f402003-01-21 11:09:21 +00002052#if XML_COMBINED_VERSION >= 19504
Martin v. Löwis069dde22003-01-21 10:58:18 +00002053 {"SkippedEntityHandler",
2054 (xmlhandlersetter)XML_SetSkippedEntityHandler,
2055 (xmlhandler)my_SkippedEntityHandler},
Martin v. Löwisc847f402003-01-21 11:09:21 +00002056#endif
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00002057
Fred Drake0582df92000-07-12 04:49:00 +00002058 {NULL, NULL, NULL} /* sentinel */
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00002059};