blob: 97f2b5677171a90da060ffbe3a6a46ebb06ed035 [file] [log] [blame]
Martin v. Löwis7090ed12001-09-19 10:37:50 +00001#include "Python.h"
Fred Drake4113b132001-03-24 19:58:26 +00002#include <ctype.h>
3
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00004#include "frameobject.h"
Fred Drakea77254a2000-09-29 19:23:29 +00005#include "expat.h"
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00006
Fredrik Lundhc3345042005-12-13 19:49:55 +00007#include "pyexpat.h"
8
Martin v. Löwisc847f402003-01-21 11:09:21 +00009#define XML_COMBINED_VERSION (10000*XML_MAJOR_VERSION+100*XML_MINOR_VERSION+XML_MICRO_VERSION)
10
Jeremy Hylton9263f572003-06-27 16:13:17 +000011#define FIX_TRACE
Martin v. Löwis339d0f72001-08-17 18:39:25 +000012
Christian Heimesfa535f52013-07-07 17:35:11 +020013static XML_Memory_Handling_Suite ExpatMemoryHandler = {
14 PyObject_Malloc, PyObject_Realloc, PyObject_Free};
15
Fred Drake0582df92000-07-12 04:49:00 +000016enum HandlerTypes {
17 StartElement,
18 EndElement,
19 ProcessingInstruction,
20 CharacterData,
21 UnparsedEntityDecl,
22 NotationDecl,
23 StartNamespaceDecl,
24 EndNamespaceDecl,
25 Comment,
26 StartCdataSection,
27 EndCdataSection,
28 Default,
29 DefaultHandlerExpand,
30 NotStandalone,
Martin v. Löwis0078f6c2001-01-21 10:18:10 +000031 ExternalEntityRef,
32 StartDoctypeDecl,
33 EndDoctypeDecl,
Fred Drake85d835f2001-02-08 15:39:08 +000034 EntityDecl,
35 XmlDecl,
36 ElementDecl,
37 AttlistDecl,
Martin v. Löwisc847f402003-01-21 11:09:21 +000038#if XML_COMBINED_VERSION >= 19504
Martin v. Löwis069dde22003-01-21 10:58:18 +000039 SkippedEntity,
Martin v. Löwisc847f402003-01-21 11:09:21 +000040#endif
Fred Drake85d835f2001-02-08 15:39:08 +000041 _DummyDecl
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000042};
43
44static PyObject *ErrorObject;
45
46/* ----------------------------------------------------- */
47
48/* Declarations for objects of type xmlparser */
49
50typedef struct {
Fred Drake0582df92000-07-12 04:49:00 +000051 PyObject_HEAD
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000052
Fred Drake0582df92000-07-12 04:49:00 +000053 XML_Parser itself;
Fred Drake85d835f2001-02-08 15:39:08 +000054 int ordered_attributes; /* Return attributes as a list. */
55 int specified_attributes; /* Report only specified attributes. */
Fred Drakebd6101c2001-02-14 18:29:45 +000056 int in_callback; /* Is a callback active? */
Martin v. Löwis069dde22003-01-21 10:58:18 +000057 int ns_prefixes; /* Namespace-triplets mode? */
Fred Drake2a3d7db2002-06-28 22:56:48 +000058 XML_Char *buffer; /* Buffer used when accumulating characters */
59 /* NULL if not enabled */
60 int buffer_size; /* Size of buffer, in XML_Char units */
61 int buffer_used; /* Buffer units in use */
Fred Drakeb91a36b2002-06-27 19:40:48 +000062 PyObject *intern; /* Dictionary to intern strings */
Fred Drake0582df92000-07-12 04:49:00 +000063 PyObject **handlers;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000064} xmlparseobject;
65
Fred Drake2a3d7db2002-06-28 22:56:48 +000066#define CHARACTER_DATA_BUFFER_SIZE 8192
67
Jeremy Hylton938ace62002-07-17 16:30:39 +000068static PyTypeObject Xmlparsetype;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000069
Fred Drake117ac852002-09-24 16:24:54 +000070typedef void (*xmlhandlersetter)(XML_Parser self, void *meth);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000071typedef void* xmlhandler;
72
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +000073struct HandlerInfo {
Fred Drake0582df92000-07-12 04:49:00 +000074 const char *name;
75 xmlhandlersetter setter;
76 xmlhandler handler;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +000077 PyCodeObject *tb_code;
Fred Drake71b63ff2002-06-28 22:29:01 +000078 PyObject *nameobj;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000079};
80
Jeremy Hylton938ace62002-07-17 16:30:39 +000081static struct HandlerInfo handler_info[64];
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000082
Fred Drakebd6101c2001-02-14 18:29:45 +000083/* Set an integer attribute on the error object; return true on success,
84 * false on an exception.
85 */
86static int
87set_error_attr(PyObject *err, char *name, int value)
88{
Christian Heimes217cfd12007-12-02 14:31:20 +000089 PyObject *v = PyLong_FromLong(value);
Fred Drake85d835f2001-02-08 15:39:08 +000090
Neal Norwitz2f5e9902006-03-08 06:36:45 +000091 if (v == NULL || PyObject_SetAttrString(err, name, v) == -1) {
92 Py_XDECREF(v);
Fred Drakebd6101c2001-02-14 18:29:45 +000093 return 0;
94 }
Michael W. Hudson0bb84542004-08-03 11:31:31 +000095 Py_DECREF(v);
Fred Drakebd6101c2001-02-14 18:29:45 +000096 return 1;
97}
98
99/* Build and set an Expat exception, including positioning
100 * information. Always returns NULL.
101 */
Fred Drake85d835f2001-02-08 15:39:08 +0000102static PyObject *
Martin v. Löwis069dde22003-01-21 10:58:18 +0000103set_error(xmlparseobject *self, enum XML_Error code)
Fred Drake85d835f2001-02-08 15:39:08 +0000104{
105 PyObject *err;
Victor Stinner499dfcf2011-03-21 13:26:24 +0100106 PyObject *buffer;
Fred Drake85d835f2001-02-08 15:39:08 +0000107 XML_Parser parser = self->itself;
Fred Drakebd6101c2001-02-14 18:29:45 +0000108 int lineno = XML_GetErrorLineNumber(parser);
109 int column = XML_GetErrorColumnNumber(parser);
Fred Drake85d835f2001-02-08 15:39:08 +0000110
Victor Stinner499dfcf2011-03-21 13:26:24 +0100111 buffer = PyUnicode_FromFormat("%s: line %i, column %i",
112 XML_ErrorString(code), lineno, column);
113 if (buffer == NULL)
114 return NULL;
115 err = PyObject_CallFunction(ErrorObject, "O", buffer);
116 Py_DECREF(buffer);
Fred Drakebd6101c2001-02-14 18:29:45 +0000117 if ( err != NULL
118 && set_error_attr(err, "code", code)
119 && set_error_attr(err, "offset", column)
120 && set_error_attr(err, "lineno", lineno)) {
121 PyErr_SetObject(ErrorObject, err);
Fred Drake85d835f2001-02-08 15:39:08 +0000122 }
Neal Norwitz2f5e9902006-03-08 06:36:45 +0000123 Py_XDECREF(err);
Fred Drake85d835f2001-02-08 15:39:08 +0000124 return NULL;
125}
126
Fred Drake71b63ff2002-06-28 22:29:01 +0000127static int
128have_handler(xmlparseobject *self, int type)
129{
130 PyObject *handler = self->handlers[type];
131 return handler != NULL;
132}
133
134static PyObject *
135get_handler_name(struct HandlerInfo *hinfo)
136{
137 PyObject *name = hinfo->nameobj;
138 if (name == NULL) {
Neal Norwitz392c5be2007-08-25 17:20:32 +0000139 name = PyUnicode_FromString(hinfo->name);
Fred Drake71b63ff2002-06-28 22:29:01 +0000140 hinfo->nameobj = name;
141 }
142 Py_XINCREF(name);
143 return name;
144}
145
Fred Drake85d835f2001-02-08 15:39:08 +0000146
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000147/* Convert a string of XML_Chars into a Unicode string.
148 Returns None if str is a null pointer. */
149
Fred Drake0582df92000-07-12 04:49:00 +0000150static PyObject *
Fred Drakeb91a36b2002-06-27 19:40:48 +0000151conv_string_to_unicode(const XML_Char *str)
Fred Drake0582df92000-07-12 04:49:00 +0000152{
Fred Drake71b63ff2002-06-28 22:29:01 +0000153 /* XXX currently this code assumes that XML_Char is 8-bit,
Fred Drake0582df92000-07-12 04:49:00 +0000154 and hence in UTF-8. */
155 /* UTF-8 from Expat, Unicode desired */
156 if (str == NULL) {
157 Py_INCREF(Py_None);
158 return Py_None;
159 }
Fred Drake71b63ff2002-06-28 22:29:01 +0000160 return PyUnicode_DecodeUTF8(str, strlen(str), "strict");
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000161}
162
Fred Drake0582df92000-07-12 04:49:00 +0000163static PyObject *
164conv_string_len_to_unicode(const XML_Char *str, int len)
165{
Fred Drake71b63ff2002-06-28 22:29:01 +0000166 /* XXX currently this code assumes that XML_Char is 8-bit,
Fred Drake0582df92000-07-12 04:49:00 +0000167 and hence in UTF-8. */
168 /* UTF-8 from Expat, Unicode desired */
169 if (str == NULL) {
170 Py_INCREF(Py_None);
171 return Py_None;
172 }
Fred Drake6f987622000-08-25 18:03:30 +0000173 return PyUnicode_DecodeUTF8((const char *)str, len, "strict");
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000174}
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000175
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000176/* Callback routines */
177
Martin v. Löwis5b68ce32001-10-21 08:53:52 +0000178static void clear_handlers(xmlparseobject *self, int initial);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000179
Martin v. Löwis069dde22003-01-21 10:58:18 +0000180/* This handler is used when an error has been detected, in the hope
181 that actual parsing can be terminated early. This will only help
182 if an external entity reference is encountered. */
183static int
184error_external_entity_ref_handler(XML_Parser parser,
185 const XML_Char *context,
186 const XML_Char *base,
187 const XML_Char *systemId,
188 const XML_Char *publicId)
189{
190 return 0;
191}
192
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000193/* Dummy character data handler used when an error (exception) has
194 been detected, and the actual parsing can be terminated early.
195 This is needed since character data handler can't be safely removed
196 from within the character data handler, but can be replaced. It is
197 used only from the character data handler trampoline, and must be
198 used right after `flag_error()` is called. */
199static void
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000200noop_character_data_handler(void *userData, const XML_Char *data, int len)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000201{
202 /* Do nothing. */
203}
204
Fred Drake6f987622000-08-25 18:03:30 +0000205static void
206flag_error(xmlparseobject *self)
207{
Martin v. Löwis5b68ce32001-10-21 08:53:52 +0000208 clear_handlers(self, 0);
Martin v. Löwis069dde22003-01-21 10:58:18 +0000209 XML_SetExternalEntityRefHandler(self->itself,
210 error_external_entity_ref_handler);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000211}
212
213static PyCodeObject*
214getcode(enum HandlerTypes slot, char* func_name, int lineno)
215{
Fred Drakebd6101c2001-02-14 18:29:45 +0000216 if (handler_info[slot].tb_code == NULL) {
Fred Drakebd6101c2001-02-14 18:29:45 +0000217 handler_info[slot].tb_code =
Alexandre Vassalotti7b82b402009-07-21 04:30:03 +0000218 PyCode_NewEmpty(__FILE__, func_name, lineno);
Fred Drakebd6101c2001-02-14 18:29:45 +0000219 }
220 return handler_info[slot].tb_code;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000221}
222
Jeremy Hylton9263f572003-06-27 16:13:17 +0000223#ifdef FIX_TRACE
Martin v. Löwis7d6e19d2002-08-04 08:24:49 +0000224static int
225trace_frame(PyThreadState *tstate, PyFrameObject *f, int code, PyObject *val)
226{
227 int result = 0;
228 if (!tstate->use_tracing || tstate->tracing)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000229 return 0;
Martin v. Löwis7d6e19d2002-08-04 08:24:49 +0000230 if (tstate->c_profilefunc != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000231 tstate->tracing++;
232 result = tstate->c_profilefunc(tstate->c_profileobj,
233 f, code , val);
234 tstate->use_tracing = ((tstate->c_tracefunc != NULL)
235 || (tstate->c_profilefunc != NULL));
236 tstate->tracing--;
237 if (result)
238 return result;
Martin v. Löwis7d6e19d2002-08-04 08:24:49 +0000239 }
240 if (tstate->c_tracefunc != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000241 tstate->tracing++;
242 result = tstate->c_tracefunc(tstate->c_traceobj,
243 f, code , val);
244 tstate->use_tracing = ((tstate->c_tracefunc != NULL)
245 || (tstate->c_profilefunc != NULL));
246 tstate->tracing--;
247 }
Martin v. Löwis7d6e19d2002-08-04 08:24:49 +0000248 return result;
249}
Jeremy Hylton9263f572003-06-27 16:13:17 +0000250
251static int
252trace_frame_exc(PyThreadState *tstate, PyFrameObject *f)
253{
254 PyObject *type, *value, *traceback, *arg;
255 int err;
256
257 if (tstate->c_tracefunc == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000258 return 0;
Jeremy Hylton9263f572003-06-27 16:13:17 +0000259
260 PyErr_Fetch(&type, &value, &traceback);
261 if (value == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000262 value = Py_None;
263 Py_INCREF(value);
Jeremy Hylton9263f572003-06-27 16:13:17 +0000264 }
Raymond Hettinger8ae46892003-10-12 19:09:37 +0000265 arg = PyTuple_Pack(3, type, value, traceback);
Jeremy Hylton9263f572003-06-27 16:13:17 +0000266 if (arg == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000267 PyErr_Restore(type, value, traceback);
268 return 0;
Jeremy Hylton9263f572003-06-27 16:13:17 +0000269 }
270 err = trace_frame(tstate, f, PyTrace_EXCEPTION, arg);
271 Py_DECREF(arg);
272 if (err == 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000273 PyErr_Restore(type, value, traceback);
Jeremy Hylton9263f572003-06-27 16:13:17 +0000274 else {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000275 Py_XDECREF(type);
276 Py_XDECREF(value);
277 Py_XDECREF(traceback);
Jeremy Hylton9263f572003-06-27 16:13:17 +0000278 }
279 return err;
280}
Martin v. Löwis069dde22003-01-21 10:58:18 +0000281#endif
Martin v. Löwis7d6e19d2002-08-04 08:24:49 +0000282
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000283static PyObject*
Fred Drake39689c52004-08-13 03:12:57 +0000284call_with_frame(PyCodeObject *c, PyObject* func, PyObject* args,
285 xmlparseobject *self)
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000286{
Fred Drakebd6101c2001-02-14 18:29:45 +0000287 PyThreadState *tstate = PyThreadState_GET();
288 PyFrameObject *f;
Christian Heimesa6404ad2013-07-20 22:54:25 +0200289 PyObject *res, *globals;
Fred Drakebd6101c2001-02-14 18:29:45 +0000290
291 if (c == NULL)
292 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000293
Christian Heimesa6404ad2013-07-20 22:54:25 +0200294 globals = PyEval_GetGlobals();
295 if (globals == NULL) {
296 return NULL;
297 }
298
299 f = PyFrame_New(tstate, c, globals, NULL);
Fred Drakebd6101c2001-02-14 18:29:45 +0000300 if (f == NULL)
301 return NULL;
302 tstate->frame = f;
Jeremy Hylton9263f572003-06-27 16:13:17 +0000303#ifdef FIX_TRACE
304 if (trace_frame(tstate, f, PyTrace_CALL, Py_None) < 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000305 return NULL;
Martin v. Löwis7d6e19d2002-08-04 08:24:49 +0000306 }
Martin v. Löwis069dde22003-01-21 10:58:18 +0000307#endif
Fred Drakebd6101c2001-02-14 18:29:45 +0000308 res = PyEval_CallObject(func, args);
Jeremy Hylton9263f572003-06-27 16:13:17 +0000309 if (res == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000310 if (tstate->curexc_traceback == NULL)
311 PyTraceBack_Here(f);
Fred Drake39689c52004-08-13 03:12:57 +0000312 XML_StopParser(self->itself, XML_FALSE);
Jeremy Hylton9263f572003-06-27 16:13:17 +0000313#ifdef FIX_TRACE
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000314 if (trace_frame_exc(tstate, f) < 0) {
315 return NULL;
316 }
Jeremy Hylton9263f572003-06-27 16:13:17 +0000317 }
Martin v. Löwis7d6e19d2002-08-04 08:24:49 +0000318 else {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000319 if (trace_frame(tstate, f, PyTrace_RETURN, res) < 0) {
Serhiy Storchaka505ff752014-02-09 13:33:53 +0200320 Py_CLEAR(res);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000321 }
Martin v. Löwis7d6e19d2002-08-04 08:24:49 +0000322 }
Jeremy Hylton9263f572003-06-27 16:13:17 +0000323#else
324 }
Martin v. Löwis069dde22003-01-21 10:58:18 +0000325#endif
Fred Drakebd6101c2001-02-14 18:29:45 +0000326 tstate->frame = f->f_back;
327 Py_DECREF(f);
328 return res;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000329}
330
Fred Drakeb91a36b2002-06-27 19:40:48 +0000331static PyObject*
332string_intern(xmlparseobject *self, const char* str)
333{
Guido van Rossum4ca94712007-07-23 17:42:32 +0000334 PyObject *result = conv_string_to_unicode(str);
Fred Drakeb91a36b2002-06-27 19:40:48 +0000335 PyObject *value;
Neal Norwitz484d9a42005-09-30 04:46:49 +0000336 /* result can be NULL if the unicode conversion failed. */
337 if (!result)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000338 return result;
Fred Drakeb91a36b2002-06-27 19:40:48 +0000339 if (!self->intern)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000340 return result;
Fred Drakeb91a36b2002-06-27 19:40:48 +0000341 value = PyDict_GetItem(self->intern, result);
342 if (!value) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000343 if (PyDict_SetItem(self->intern, result, result) == 0)
Fred Drakeb91a36b2002-06-27 19:40:48 +0000344 return result;
345 else
346 return NULL;
347 }
348 Py_INCREF(value);
349 Py_DECREF(result);
350 return value;
351}
352
Fred Drake2a3d7db2002-06-28 22:56:48 +0000353/* Return 0 on success, -1 on exception.
354 * flag_error() will be called before return if needed.
355 */
356static int
357call_character_handler(xmlparseobject *self, const XML_Char *buffer, int len)
358{
359 PyObject *args;
360 PyObject *temp;
361
Georg Brandlc01537f2010-10-15 16:26:08 +0000362 if (!have_handler(self, CharacterData))
363 return -1;
364
Fred Drake2a3d7db2002-06-28 22:56:48 +0000365 args = PyTuple_New(1);
366 if (args == NULL)
367 return -1;
Guido van Rossum4ca94712007-07-23 17:42:32 +0000368 temp = (conv_string_len_to_unicode(buffer, len));
Fred Drake2a3d7db2002-06-28 22:56:48 +0000369 if (temp == NULL) {
370 Py_DECREF(args);
371 flag_error(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000372 XML_SetCharacterDataHandler(self->itself,
373 noop_character_data_handler);
Fred Drake2a3d7db2002-06-28 22:56:48 +0000374 return -1;
375 }
376 PyTuple_SET_ITEM(args, 0, temp);
377 /* temp is now a borrowed reference; consider it unused. */
378 self->in_callback = 1;
379 temp = call_with_frame(getcode(CharacterData, "CharacterData", __LINE__),
Fred Drake39689c52004-08-13 03:12:57 +0000380 self->handlers[CharacterData], args, self);
Fred Drake2a3d7db2002-06-28 22:56:48 +0000381 /* temp is an owned reference again, or NULL */
382 self->in_callback = 0;
383 Py_DECREF(args);
384 if (temp == NULL) {
385 flag_error(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000386 XML_SetCharacterDataHandler(self->itself,
387 noop_character_data_handler);
Fred Drake2a3d7db2002-06-28 22:56:48 +0000388 return -1;
389 }
390 Py_DECREF(temp);
391 return 0;
392}
393
394static int
395flush_character_buffer(xmlparseobject *self)
396{
397 int rc;
398 if (self->buffer == NULL || self->buffer_used == 0)
399 return 0;
400 rc = call_character_handler(self, self->buffer, self->buffer_used);
401 self->buffer_used = 0;
402 return rc;
403}
404
405static void
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000406my_CharacterDataHandler(void *userData, const XML_Char *data, int len)
Fred Drake2a3d7db2002-06-28 22:56:48 +0000407{
408 xmlparseobject *self = (xmlparseobject *) userData;
Victor Stinner9e09c262013-07-18 23:17:01 +0200409
410 if (PyErr_Occurred())
411 return;
412
Fred Drake2a3d7db2002-06-28 22:56:48 +0000413 if (self->buffer == NULL)
414 call_character_handler(self, data, len);
415 else {
416 if ((self->buffer_used + len) > self->buffer_size) {
417 if (flush_character_buffer(self) < 0)
418 return;
419 /* handler might have changed; drop the rest on the floor
420 * if there isn't a handler anymore
421 */
422 if (!have_handler(self, CharacterData))
423 return;
424 }
425 if (len > self->buffer_size) {
426 call_character_handler(self, data, len);
427 self->buffer_used = 0;
428 }
429 else {
430 memcpy(self->buffer + self->buffer_used,
431 data, len * sizeof(XML_Char));
432 self->buffer_used += len;
433 }
434 }
435}
436
Fred Drake85d835f2001-02-08 15:39:08 +0000437static void
438my_StartElementHandler(void *userData,
Fred Drake71b63ff2002-06-28 22:29:01 +0000439 const XML_Char *name, const XML_Char *atts[])
Fred Drake85d835f2001-02-08 15:39:08 +0000440{
441 xmlparseobject *self = (xmlparseobject *)userData;
442
Fred Drake71b63ff2002-06-28 22:29:01 +0000443 if (have_handler(self, StartElement)) {
Fred Drake85d835f2001-02-08 15:39:08 +0000444 PyObject *container, *rv, *args;
445 int i, max;
446
Victor Stinner9e09c262013-07-18 23:17:01 +0200447 if (PyErr_Occurred())
448 return;
449
Fred Drake2a3d7db2002-06-28 22:56:48 +0000450 if (flush_character_buffer(self) < 0)
451 return;
Fred Drake85d835f2001-02-08 15:39:08 +0000452 /* Set max to the number of slots filled in atts[]; max/2 is
453 * the number of attributes we need to process.
454 */
455 if (self->specified_attributes) {
456 max = XML_GetSpecifiedAttributeCount(self->itself);
457 }
458 else {
459 max = 0;
460 while (atts[max] != NULL)
461 max += 2;
462 }
463 /* Build the container. */
464 if (self->ordered_attributes)
465 container = PyList_New(max);
466 else
467 container = PyDict_New();
468 if (container == NULL) {
469 flag_error(self);
470 return;
471 }
472 for (i = 0; i < max; i += 2) {
Fred Drakeb91a36b2002-06-27 19:40:48 +0000473 PyObject *n = string_intern(self, (XML_Char *) atts[i]);
Fred Drake85d835f2001-02-08 15:39:08 +0000474 PyObject *v;
475 if (n == NULL) {
476 flag_error(self);
477 Py_DECREF(container);
478 return;
479 }
Guido van Rossum4ca94712007-07-23 17:42:32 +0000480 v = conv_string_to_unicode((XML_Char *) atts[i+1]);
Fred Drake85d835f2001-02-08 15:39:08 +0000481 if (v == NULL) {
482 flag_error(self);
483 Py_DECREF(container);
484 Py_DECREF(n);
485 return;
486 }
487 if (self->ordered_attributes) {
488 PyList_SET_ITEM(container, i, n);
489 PyList_SET_ITEM(container, i+1, v);
490 }
491 else if (PyDict_SetItem(container, n, v)) {
492 flag_error(self);
493 Py_DECREF(n);
494 Py_DECREF(v);
495 return;
496 }
497 else {
498 Py_DECREF(n);
499 Py_DECREF(v);
500 }
501 }
Neal Norwitz484d9a42005-09-30 04:46:49 +0000502 args = string_intern(self, name);
503 if (args != NULL)
504 args = Py_BuildValue("(NN)", args, container);
Fred Drake85d835f2001-02-08 15:39:08 +0000505 if (args == NULL) {
506 Py_DECREF(container);
507 return;
508 }
509 /* Container is now a borrowed reference; ignore it. */
Fred Drakebd6101c2001-02-14 18:29:45 +0000510 self->in_callback = 1;
511 rv = call_with_frame(getcode(StartElement, "StartElement", __LINE__),
Fred Drake39689c52004-08-13 03:12:57 +0000512 self->handlers[StartElement], args, self);
Fred Drakebd6101c2001-02-14 18:29:45 +0000513 self->in_callback = 0;
514 Py_DECREF(args);
Fred Drake85d835f2001-02-08 15:39:08 +0000515 if (rv == NULL) {
516 flag_error(self);
517 return;
Fred Drakebd6101c2001-02-14 18:29:45 +0000518 }
Fred Drake85d835f2001-02-08 15:39:08 +0000519 Py_DECREF(rv);
520 }
521}
522
523#define RC_HANDLER(RC, NAME, PARAMS, INIT, PARAM_FORMAT, CONVERSION, \
524 RETURN, GETUSERDATA) \
525static RC \
526my_##NAME##Handler PARAMS {\
527 xmlparseobject *self = GETUSERDATA ; \
528 PyObject *args = NULL; \
529 PyObject *rv = NULL; \
530 INIT \
531\
Fred Drake71b63ff2002-06-28 22:29:01 +0000532 if (have_handler(self, NAME)) { \
Victor Stinner9e09c262013-07-18 23:17:01 +0200533 if (PyErr_Occurred()) \
534 return RETURN; \
Fred Drake2a3d7db2002-06-28 22:56:48 +0000535 if (flush_character_buffer(self) < 0) \
536 return RETURN; \
Fred Drake85d835f2001-02-08 15:39:08 +0000537 args = Py_BuildValue PARAM_FORMAT ;\
Martin v. Löwis1d7c55f2001-11-10 13:57:55 +0000538 if (!args) { flag_error(self); return RETURN;} \
Fred Drakebd6101c2001-02-14 18:29:45 +0000539 self->in_callback = 1; \
Fred Drake85d835f2001-02-08 15:39:08 +0000540 rv = call_with_frame(getcode(NAME,#NAME,__LINE__), \
Fred Drake39689c52004-08-13 03:12:57 +0000541 self->handlers[NAME], args, self); \
Fred Drakebd6101c2001-02-14 18:29:45 +0000542 self->in_callback = 0; \
Fred Drake85d835f2001-02-08 15:39:08 +0000543 Py_DECREF(args); \
544 if (rv == NULL) { \
545 flag_error(self); \
546 return RETURN; \
547 } \
548 CONVERSION \
549 Py_DECREF(rv); \
550 } \
551 return RETURN; \
552}
553
Fred Drake6f987622000-08-25 18:03:30 +0000554#define VOID_HANDLER(NAME, PARAMS, PARAM_FORMAT) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000555 RC_HANDLER(void, NAME, PARAMS, ;, PARAM_FORMAT, ;, ;,\
556 (xmlparseobject *)userData)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000557
Fred Drake6f987622000-08-25 18:03:30 +0000558#define INT_HANDLER(NAME, PARAMS, PARAM_FORMAT)\
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000559 RC_HANDLER(int, NAME, PARAMS, int rc=0;, PARAM_FORMAT, \
560 rc = PyLong_AsLong(rv);, rc, \
561 (xmlparseobject *)userData)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000562
Fred Drake71b63ff2002-06-28 22:29:01 +0000563VOID_HANDLER(EndElement,
564 (void *userData, const XML_Char *name),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000565 ("(N)", string_intern(self, name)))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000566
Fred Drake6f987622000-08-25 18:03:30 +0000567VOID_HANDLER(ProcessingInstruction,
Fred Drake71b63ff2002-06-28 22:29:01 +0000568 (void *userData,
569 const XML_Char *target,
Fred Drake85d835f2001-02-08 15:39:08 +0000570 const XML_Char *data),
Guido van Rossum4ca94712007-07-23 17:42:32 +0000571 ("(NO&)", string_intern(self, target), conv_string_to_unicode ,data))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000572
Fred Drake6f987622000-08-25 18:03:30 +0000573VOID_HANDLER(UnparsedEntityDecl,
Fred Drake71b63ff2002-06-28 22:29:01 +0000574 (void *userData,
Fred Drake85d835f2001-02-08 15:39:08 +0000575 const XML_Char *entityName,
576 const XML_Char *base,
577 const XML_Char *systemId,
578 const XML_Char *publicId,
579 const XML_Char *notationName),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000580 ("(NNNNN)",
Fred Drake71b63ff2002-06-28 22:29:01 +0000581 string_intern(self, entityName), string_intern(self, base),
582 string_intern(self, systemId), string_intern(self, publicId),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000583 string_intern(self, notationName)))
Fred Drake85d835f2001-02-08 15:39:08 +0000584
Fred Drake85d835f2001-02-08 15:39:08 +0000585VOID_HANDLER(EntityDecl,
586 (void *userData,
587 const XML_Char *entityName,
588 int is_parameter_entity,
589 const XML_Char *value,
590 int value_length,
591 const XML_Char *base,
592 const XML_Char *systemId,
593 const XML_Char *publicId,
594 const XML_Char *notationName),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000595 ("NiNNNNN",
596 string_intern(self, entityName), is_parameter_entity,
Guido van Rossum4ca94712007-07-23 17:42:32 +0000597 (conv_string_len_to_unicode(value, value_length)),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000598 string_intern(self, base), string_intern(self, systemId),
599 string_intern(self, publicId),
600 string_intern(self, notationName)))
Fred Drake85d835f2001-02-08 15:39:08 +0000601
602VOID_HANDLER(XmlDecl,
603 (void *userData,
604 const XML_Char *version,
605 const XML_Char *encoding,
606 int standalone),
607 ("(O&O&i)",
Guido van Rossum4ca94712007-07-23 17:42:32 +0000608 conv_string_to_unicode ,version, conv_string_to_unicode ,encoding,
Fred Drake85d835f2001-02-08 15:39:08 +0000609 standalone))
610
611static PyObject *
612conv_content_model(XML_Content * const model,
Fred Drakeb91a36b2002-06-27 19:40:48 +0000613 PyObject *(*conv_string)(const XML_Char *))
Fred Drake85d835f2001-02-08 15:39:08 +0000614{
615 PyObject *result = NULL;
616 PyObject *children = PyTuple_New(model->numchildren);
617 int i;
618
619 if (children != NULL) {
Tim Peters9544fc52001-07-28 09:36:36 +0000620 assert(model->numchildren < INT_MAX);
621 for (i = 0; i < (int)model->numchildren; ++i) {
Fred Drake85d835f2001-02-08 15:39:08 +0000622 PyObject *child = conv_content_model(&model->children[i],
623 conv_string);
624 if (child == NULL) {
625 Py_XDECREF(children);
626 return NULL;
627 }
628 PyTuple_SET_ITEM(children, i, child);
629 }
630 result = Py_BuildValue("(iiO&N)",
631 model->type, model->quant,
632 conv_string,model->name, children);
633 }
634 return result;
635}
636
Fred Drake06dd8cf2003-02-02 03:54:17 +0000637static void
638my_ElementDeclHandler(void *userData,
639 const XML_Char *name,
640 XML_Content *model)
Fred Drake85d835f2001-02-08 15:39:08 +0000641{
Fred Drake06dd8cf2003-02-02 03:54:17 +0000642 xmlparseobject *self = (xmlparseobject *)userData;
643 PyObject *args = NULL;
Fred Drake85d835f2001-02-08 15:39:08 +0000644
Fred Drake06dd8cf2003-02-02 03:54:17 +0000645 if (have_handler(self, ElementDecl)) {
646 PyObject *rv = NULL;
647 PyObject *modelobj, *nameobj;
648
Victor Stinner9e09c262013-07-18 23:17:01 +0200649 if (PyErr_Occurred())
650 return;
651
Fred Drake06dd8cf2003-02-02 03:54:17 +0000652 if (flush_character_buffer(self) < 0)
653 goto finally;
Guido van Rossum4ca94712007-07-23 17:42:32 +0000654 modelobj = conv_content_model(model, (conv_string_to_unicode));
Fred Drake06dd8cf2003-02-02 03:54:17 +0000655 if (modelobj == NULL) {
656 flag_error(self);
657 goto finally;
658 }
659 nameobj = string_intern(self, name);
660 if (nameobj == NULL) {
661 Py_DECREF(modelobj);
662 flag_error(self);
663 goto finally;
664 }
Michael W. Hudson0bb84542004-08-03 11:31:31 +0000665 args = Py_BuildValue("NN", nameobj, modelobj);
Fred Drake06dd8cf2003-02-02 03:54:17 +0000666 if (args == NULL) {
667 Py_DECREF(modelobj);
668 flag_error(self);
669 goto finally;
670 }
671 self->in_callback = 1;
672 rv = call_with_frame(getcode(ElementDecl, "ElementDecl", __LINE__),
Fred Drake39689c52004-08-13 03:12:57 +0000673 self->handlers[ElementDecl], args, self);
Fred Drake06dd8cf2003-02-02 03:54:17 +0000674 self->in_callback = 0;
675 if (rv == NULL) {
676 flag_error(self);
677 goto finally;
678 }
679 Py_DECREF(rv);
680 }
681 finally:
682 Py_XDECREF(args);
683 XML_FreeContentModel(self->itself, model);
684 return;
685}
Fred Drake85d835f2001-02-08 15:39:08 +0000686
687VOID_HANDLER(AttlistDecl,
688 (void *userData,
689 const XML_Char *elname,
690 const XML_Char *attname,
691 const XML_Char *att_type,
692 const XML_Char *dflt,
693 int isrequired),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000694 ("(NNO&O&i)",
695 string_intern(self, elname), string_intern(self, attname),
Guido van Rossum4ca94712007-07-23 17:42:32 +0000696 conv_string_to_unicode ,att_type, conv_string_to_unicode ,dflt,
Fred Drake85d835f2001-02-08 15:39:08 +0000697 isrequired))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000698
Martin v. Löwisc847f402003-01-21 11:09:21 +0000699#if XML_COMBINED_VERSION >= 19504
Martin v. Löwis069dde22003-01-21 10:58:18 +0000700VOID_HANDLER(SkippedEntity,
701 (void *userData,
702 const XML_Char *entityName,
703 int is_parameter_entity),
704 ("Ni",
705 string_intern(self, entityName), is_parameter_entity))
Martin v. Löwisc847f402003-01-21 11:09:21 +0000706#endif
Martin v. Löwis069dde22003-01-21 10:58:18 +0000707
Fred Drake71b63ff2002-06-28 22:29:01 +0000708VOID_HANDLER(NotationDecl,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000709 (void *userData,
710 const XML_Char *notationName,
711 const XML_Char *base,
712 const XML_Char *systemId,
713 const XML_Char *publicId),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000714 ("(NNNN)",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000715 string_intern(self, notationName), string_intern(self, base),
716 string_intern(self, systemId), string_intern(self, publicId)))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000717
Fred Drake6f987622000-08-25 18:03:30 +0000718VOID_HANDLER(StartNamespaceDecl,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000719 (void *userData,
720 const XML_Char *prefix,
721 const XML_Char *uri),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000722 ("(NN)",
723 string_intern(self, prefix), string_intern(self, uri)))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000724
Fred Drake6f987622000-08-25 18:03:30 +0000725VOID_HANDLER(EndNamespaceDecl,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000726 (void *userData,
727 const XML_Char *prefix),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000728 ("(N)", string_intern(self, prefix)))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000729
Fred Drake6f987622000-08-25 18:03:30 +0000730VOID_HANDLER(Comment,
Fred Drakeb91a36b2002-06-27 19:40:48 +0000731 (void *userData, const XML_Char *data),
Guido van Rossum4ca94712007-07-23 17:42:32 +0000732 ("(O&)", conv_string_to_unicode ,data))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000733
Fred Drake6f987622000-08-25 18:03:30 +0000734VOID_HANDLER(StartCdataSection,
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000735 (void *userData),
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000736 ("()"))
Fred Drake71b63ff2002-06-28 22:29:01 +0000737
Fred Drake6f987622000-08-25 18:03:30 +0000738VOID_HANDLER(EndCdataSection,
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000739 (void *userData),
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000740 ("()"))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000741
Fred Drake6f987622000-08-25 18:03:30 +0000742VOID_HANDLER(Default,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000743 (void *userData, const XML_Char *s, int len),
744 ("(N)", (conv_string_len_to_unicode(s,len))))
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000745
Fred Drake6f987622000-08-25 18:03:30 +0000746VOID_HANDLER(DefaultHandlerExpand,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000747 (void *userData, const XML_Char *s, int len),
748 ("(N)", (conv_string_len_to_unicode(s,len))))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000749
Fred Drake71b63ff2002-06-28 22:29:01 +0000750INT_HANDLER(NotStandalone,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000751 (void *userData),
752 ("()"))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000753
Fred Drake6f987622000-08-25 18:03:30 +0000754RC_HANDLER(int, ExternalEntityRef,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000755 (XML_Parser parser,
756 const XML_Char *context,
757 const XML_Char *base,
758 const XML_Char *systemId,
759 const XML_Char *publicId),
760 int rc=0;,
Fred Drakeb91a36b2002-06-27 19:40:48 +0000761 ("(O&NNN)",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000762 conv_string_to_unicode ,context, string_intern(self, base),
763 string_intern(self, systemId), string_intern(self, publicId)),
764 rc = PyLong_AsLong(rv);, rc,
765 XML_GetUserData(parser))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000766
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000767/* XXX UnknownEncodingHandler */
768
Fred Drake85d835f2001-02-08 15:39:08 +0000769VOID_HANDLER(StartDoctypeDecl,
770 (void *userData, const XML_Char *doctypeName,
771 const XML_Char *sysid, const XML_Char *pubid,
772 int has_internal_subset),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000773 ("(NNNi)", string_intern(self, doctypeName),
774 string_intern(self, sysid), string_intern(self, pubid),
Fred Drake85d835f2001-02-08 15:39:08 +0000775 has_internal_subset))
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000776
777VOID_HANDLER(EndDoctypeDecl, (void *userData), ("()"))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000778
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000779/* ---------------------------------------------------------------- */
780
Fred Drake71b63ff2002-06-28 22:29:01 +0000781static PyObject *
782get_parse_result(xmlparseobject *self, int rv)
783{
784 if (PyErr_Occurred()) {
785 return NULL;
786 }
787 if (rv == 0) {
Martin v. Löwis069dde22003-01-21 10:58:18 +0000788 return set_error(self, XML_GetErrorCode(self->itself));
Fred Drake71b63ff2002-06-28 22:29:01 +0000789 }
Fred Drake2a3d7db2002-06-28 22:56:48 +0000790 if (flush_character_buffer(self) < 0) {
791 return NULL;
792 }
Christian Heimes217cfd12007-12-02 14:31:20 +0000793 return PyLong_FromLong(rv);
Fred Drake71b63ff2002-06-28 22:29:01 +0000794}
795
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000796PyDoc_STRVAR(xmlparse_Parse__doc__,
Thomas Wouters35317302000-07-22 16:34:15 +0000797"Parse(data[, isfinal])\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000798Parse XML data. `isfinal' should be true at end of input.");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000799
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200800#define MAX_CHUNK_SIZE (1 << 20)
801
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000802static PyObject *
Fred Drake0582df92000-07-12 04:49:00 +0000803xmlparse_Parse(xmlparseobject *self, PyObject *args)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000804{
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200805 PyObject *data;
Fred Drake0582df92000-07-12 04:49:00 +0000806 int isFinal = 0;
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200807 const char *s;
808 Py_ssize_t slen;
809 Py_buffer view;
810 int rc;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000811
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200812 if (!PyArg_ParseTuple(args, "O|i:Parse", &data, &isFinal))
Fred Drake0582df92000-07-12 04:49:00 +0000813 return NULL;
Fred Drake71b63ff2002-06-28 22:29:01 +0000814
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200815 if (PyUnicode_Check(data)) {
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200816 view.buf = NULL;
Serhiy Storchaka36b365c2013-02-04 18:28:01 +0200817 s = PyUnicode_AsUTF8AndSize(data, &slen);
818 if (s == NULL)
819 return NULL;
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200820 /* Explicitly set UTF-8 encoding. Return code ignored. */
821 (void)XML_SetEncoding(self->itself, "utf-8");
822 }
823 else {
824 if (PyObject_GetBuffer(data, &view, PyBUF_SIMPLE) < 0)
825 return NULL;
826 s = view.buf;
827 slen = view.len;
828 }
829
830 while (slen > MAX_CHUNK_SIZE) {
831 rc = XML_Parse(self->itself, s, MAX_CHUNK_SIZE, 0);
832 if (!rc)
833 goto done;
834 s += MAX_CHUNK_SIZE;
835 slen -= MAX_CHUNK_SIZE;
836 }
Christian Heimesba723202013-11-22 00:46:18 +0100837 assert(MAX_CHUNK_SIZE < INT_MAX && slen < INT_MAX);
838 rc = XML_Parse(self->itself, s, (int)slen, isFinal);
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200839
840done:
841 if (view.buf != NULL)
842 PyBuffer_Release(&view);
843 return get_parse_result(self, rc);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000844}
845
Fred Drakeca1f4262000-09-21 20:10:23 +0000846/* File reading copied from cPickle */
847
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000848#define BUF_SIZE 2048
849
Fred Drake0582df92000-07-12 04:49:00 +0000850static int
851readinst(char *buf, int buf_size, PyObject *meth)
852{
Victor Stinner95f1dfc2011-01-10 23:00:36 +0000853 PyObject *str;
854 Py_ssize_t len;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000855 char *ptr;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000856
Victor Stinner95f1dfc2011-01-10 23:00:36 +0000857 str = PyObject_CallFunction(meth, "n", buf_size);
Martin v. Löwis9171f022004-10-13 19:50:11 +0000858 if (str == NULL)
Victor Stinner95f1dfc2011-01-10 23:00:36 +0000859 goto error;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000860
Christian Heimes72b710a2008-05-26 13:28:38 +0000861 if (PyBytes_Check(str))
862 ptr = PyBytes_AS_STRING(str);
Christian Heimes9c4756e2008-05-26 13:22:05 +0000863 else if (PyByteArray_Check(str))
864 ptr = PyByteArray_AS_STRING(str);
Guido van Rossum98297ee2007-11-06 21:34:58 +0000865 else {
Fred Drake71b63ff2002-06-28 22:29:01 +0000866 PyErr_Format(PyExc_TypeError,
Guido van Rossum4ca94712007-07-23 17:42:32 +0000867 "read() did not return a bytes object (type=%.400s)",
Christian Heimes90aa7642007-12-19 02:45:37 +0000868 Py_TYPE(str)->tp_name);
Victor Stinner95f1dfc2011-01-10 23:00:36 +0000869 goto error;
Fred Drake0582df92000-07-12 04:49:00 +0000870 }
Christian Heimes90aa7642007-12-19 02:45:37 +0000871 len = Py_SIZE(str);
Fred Drake0582df92000-07-12 04:49:00 +0000872 if (len > buf_size) {
873 PyErr_Format(PyExc_ValueError,
874 "read() returned too much data: "
Victor Stinner9d6f9362011-01-04 22:00:04 +0000875 "%i bytes requested, %zd returned",
Fred Drake0582df92000-07-12 04:49:00 +0000876 buf_size, len);
Victor Stinner95f1dfc2011-01-10 23:00:36 +0000877 goto error;
Fred Drake0582df92000-07-12 04:49:00 +0000878 }
Guido van Rossum98297ee2007-11-06 21:34:58 +0000879 memcpy(buf, ptr, len);
Victor Stinner95f1dfc2011-01-10 23:00:36 +0000880 Py_DECREF(str);
881 /* len <= buf_size <= INT_MAX */
Victor Stinner0fcab4a2011-01-04 12:59:15 +0000882 return (int)len;
Victor Stinner95f1dfc2011-01-10 23:00:36 +0000883
884error:
885 Py_XDECREF(str);
886 return -1;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000887}
888
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000889PyDoc_STRVAR(xmlparse_ParseFile__doc__,
Thomas Wouters35317302000-07-22 16:34:15 +0000890"ParseFile(file)\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000891Parse XML data from file-like object.");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000892
893static PyObject *
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000894xmlparse_ParseFile(xmlparseobject *self, PyObject *f)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000895{
Fred Drake0582df92000-07-12 04:49:00 +0000896 int rv = 1;
Fred Drake0582df92000-07-12 04:49:00 +0000897 PyObject *readmethod = NULL;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +0200898 _Py_IDENTIFIER(read);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000899
Martin v. Löwis1ee1b6f2011-10-10 18:11:30 +0200900 readmethod = _PyObject_GetAttrId(f, &PyId_read);
Benjamin Peterson4e7f2852010-08-08 16:54:58 +0000901 if (readmethod == NULL) {
Benjamin Peterson4e7f2852010-08-08 16:54:58 +0000902 PyErr_SetString(PyExc_TypeError,
903 "argument must have 'read' attribute");
904 return NULL;
Fred Drake0582df92000-07-12 04:49:00 +0000905 }
906 for (;;) {
907 int bytes_read;
908 void *buf = XML_GetBuffer(self->itself, BUF_SIZE);
Fred Drake7b6caff2003-07-21 17:05:56 +0000909 if (buf == NULL) {
Fred Drakef239c6d2003-07-21 17:22:43 +0000910 Py_XDECREF(readmethod);
Ned Deilye7d532f2014-03-27 16:39:58 -0700911 return get_parse_result(self, 0);
Fred Drake7b6caff2003-07-21 17:05:56 +0000912 }
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000913
Benjamin Peterson4e7f2852010-08-08 16:54:58 +0000914 bytes_read = readinst(buf, BUF_SIZE, readmethod);
915 if (bytes_read < 0) {
916 Py_DECREF(readmethod);
917 return NULL;
Fred Drake0582df92000-07-12 04:49:00 +0000918 }
919 rv = XML_ParseBuffer(self->itself, bytes_read, bytes_read == 0);
Fred Drake7b6caff2003-07-21 17:05:56 +0000920 if (PyErr_Occurred()) {
921 Py_XDECREF(readmethod);
Fred Drake0582df92000-07-12 04:49:00 +0000922 return NULL;
Fred Drake7b6caff2003-07-21 17:05:56 +0000923 }
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000924
Fred Drake0582df92000-07-12 04:49:00 +0000925 if (!rv || bytes_read == 0)
926 break;
927 }
Fred Drake7b6caff2003-07-21 17:05:56 +0000928 Py_XDECREF(readmethod);
Fred Drake71b63ff2002-06-28 22:29:01 +0000929 return get_parse_result(self, rv);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000930}
931
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000932PyDoc_STRVAR(xmlparse_SetBase__doc__,
Thomas Wouters35317302000-07-22 16:34:15 +0000933"SetBase(base_url)\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000934Set the base URL for the parser.");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000935
936static PyObject *
Fred Drake0582df92000-07-12 04:49:00 +0000937xmlparse_SetBase(xmlparseobject *self, PyObject *args)
938{
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000939 char *base;
940
Fred Drake0582df92000-07-12 04:49:00 +0000941 if (!PyArg_ParseTuple(args, "s:SetBase", &base))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000942 return NULL;
Fred Drake0582df92000-07-12 04:49:00 +0000943 if (!XML_SetBase(self->itself, base)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000944 return PyErr_NoMemory();
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000945 }
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000946 Py_INCREF(Py_None);
947 return Py_None;
948}
949
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000950PyDoc_STRVAR(xmlparse_GetBase__doc__,
Thomas Wouters35317302000-07-22 16:34:15 +0000951"GetBase() -> url\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000952Return base URL string for the parser.");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000953
954static PyObject *
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000955xmlparse_GetBase(xmlparseobject *self, PyObject *unused)
Fred Drake0582df92000-07-12 04:49:00 +0000956{
Fred Drake0582df92000-07-12 04:49:00 +0000957 return Py_BuildValue("z", XML_GetBase(self->itself));
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000958}
959
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000960PyDoc_STRVAR(xmlparse_GetInputContext__doc__,
Fred Drakebd6101c2001-02-14 18:29:45 +0000961"GetInputContext() -> string\n\
962Return the untranslated text of the input that caused the current event.\n\
963If the event was generated by a large amount of text (such as a start tag\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000964for an element with many attributes), not all of the text may be available.");
Fred Drakebd6101c2001-02-14 18:29:45 +0000965
966static PyObject *
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000967xmlparse_GetInputContext(xmlparseobject *self, PyObject *unused)
Fred Drakebd6101c2001-02-14 18:29:45 +0000968{
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000969 if (self->in_callback) {
970 int offset, size;
971 const char *buffer
972 = XML_GetInputContext(self->itself, &offset, &size);
Fred Drakebd6101c2001-02-14 18:29:45 +0000973
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000974 if (buffer != NULL)
Christian Heimes72b710a2008-05-26 13:28:38 +0000975 return PyBytes_FromStringAndSize(buffer + offset,
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000976 size - offset);
977 else
978 Py_RETURN_NONE;
Fred Drakebd6101c2001-02-14 18:29:45 +0000979 }
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000980 else
981 Py_RETURN_NONE;
Fred Drakebd6101c2001-02-14 18:29:45 +0000982}
Fred Drakebd6101c2001-02-14 18:29:45 +0000983
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000984PyDoc_STRVAR(xmlparse_ExternalEntityParserCreate__doc__,
Fred Drake2d4ac202001-01-03 15:36:25 +0000985"ExternalEntityParserCreate(context[, encoding])\n\
Tim Peters51dc9682000-09-24 22:12:45 +0000986Create a parser for parsing an external entity based on the\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000987information passed to the ExternalEntityRefHandler.");
Lars Gustäbel4a30a072000-09-24 20:50:52 +0000988
989static PyObject *
990xmlparse_ExternalEntityParserCreate(xmlparseobject *self, PyObject *args)
991{
992 char *context;
993 char *encoding = NULL;
994 xmlparseobject *new_parser;
995 int i;
996
Martin v. Löwisc57428d2001-09-19 09:55:09 +0000997 if (!PyArg_ParseTuple(args, "z|s:ExternalEntityParserCreate",
Fred Drakecde79132001-04-25 16:01:30 +0000998 &context, &encoding)) {
999 return NULL;
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001000 }
1001
Martin v. Löwis894258c2001-09-23 10:20:10 +00001002 new_parser = PyObject_GC_New(xmlparseobject, &Xmlparsetype);
Fred Drake85d835f2001-02-08 15:39:08 +00001003 if (new_parser == NULL)
1004 return NULL;
Fred Drake2a3d7db2002-06-28 22:56:48 +00001005 new_parser->buffer_size = self->buffer_size;
1006 new_parser->buffer_used = 0;
Victor Stinnerb4ba9862010-09-10 22:25:19 +00001007 new_parser->buffer = NULL;
Fred Drake85d835f2001-02-08 15:39:08 +00001008 new_parser->ordered_attributes = self->ordered_attributes;
1009 new_parser->specified_attributes = self->specified_attributes;
Fred Drakebd6101c2001-02-14 18:29:45 +00001010 new_parser->in_callback = 0;
Martin v. Löwis069dde22003-01-21 10:58:18 +00001011 new_parser->ns_prefixes = self->ns_prefixes;
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001012 new_parser->itself = XML_ExternalEntityParserCreate(self->itself, context,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001013 encoding);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001014 new_parser->handlers = 0;
Fred Drakeb91a36b2002-06-27 19:40:48 +00001015 new_parser->intern = self->intern;
1016 Py_XINCREF(new_parser->intern);
Martin v. Löwis894258c2001-09-23 10:20:10 +00001017 PyObject_GC_Track(new_parser);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001018
Victor Stinnerb4ba9862010-09-10 22:25:19 +00001019 if (self->buffer != NULL) {
Victor Stinnerb6404912013-07-07 16:21:41 +02001020 new_parser->buffer = PyMem_Malloc(new_parser->buffer_size);
Victor Stinnerb4ba9862010-09-10 22:25:19 +00001021 if (new_parser->buffer == NULL) {
1022 Py_DECREF(new_parser);
1023 return PyErr_NoMemory();
1024 }
1025 }
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001026 if (!new_parser->itself) {
Fred Drake85d835f2001-02-08 15:39:08 +00001027 Py_DECREF(new_parser);
1028 return PyErr_NoMemory();
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001029 }
1030
1031 XML_SetUserData(new_parser->itself, (void *)new_parser);
1032
1033 /* allocate and clear handlers first */
Fred Drake2a3d7db2002-06-28 22:56:48 +00001034 for (i = 0; handler_info[i].name != NULL; i++)
Fred Drake85d835f2001-02-08 15:39:08 +00001035 /* do nothing */;
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001036
Victor Stinnerb6404912013-07-07 16:21:41 +02001037 new_parser->handlers = PyMem_Malloc(sizeof(PyObject *) * i);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001038 if (!new_parser->handlers) {
Fred Drake85d835f2001-02-08 15:39:08 +00001039 Py_DECREF(new_parser);
1040 return PyErr_NoMemory();
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001041 }
Martin v. Löwis5b68ce32001-10-21 08:53:52 +00001042 clear_handlers(new_parser, 1);
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001043
1044 /* then copy handlers from self */
1045 for (i = 0; handler_info[i].name != NULL; i++) {
Fred Drake71b63ff2002-06-28 22:29:01 +00001046 PyObject *handler = self->handlers[i];
1047 if (handler != NULL) {
1048 Py_INCREF(handler);
1049 new_parser->handlers[i] = handler;
1050 handler_info[i].setter(new_parser->itself,
Fred Drake85d835f2001-02-08 15:39:08 +00001051 handler_info[i].handler);
1052 }
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001053 }
Fred Drake71b63ff2002-06-28 22:29:01 +00001054 return (PyObject *)new_parser;
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001055}
1056
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001057PyDoc_STRVAR(xmlparse_SetParamEntityParsing__doc__,
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001058"SetParamEntityParsing(flag) -> success\n\
1059Controls parsing of parameter entities (including the external DTD\n\
1060subset). Possible flag values are XML_PARAM_ENTITY_PARSING_NEVER,\n\
1061XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE and\n\
1062XML_PARAM_ENTITY_PARSING_ALWAYS. Returns true if setting the flag\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001063was successful.");
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001064
1065static PyObject*
Fred Drakebd6101c2001-02-14 18:29:45 +00001066xmlparse_SetParamEntityParsing(xmlparseobject *p, PyObject* args)
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001067{
Fred Drake85d835f2001-02-08 15:39:08 +00001068 int flag;
1069 if (!PyArg_ParseTuple(args, "i", &flag))
1070 return NULL;
Fred Drakebd6101c2001-02-14 18:29:45 +00001071 flag = XML_SetParamEntityParsing(p->itself, flag);
Christian Heimes217cfd12007-12-02 14:31:20 +00001072 return PyLong_FromLong(flag);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001073}
1074
Martin v. Löwisc847f402003-01-21 11:09:21 +00001075
1076#if XML_COMBINED_VERSION >= 19505
Martin v. Löwis069dde22003-01-21 10:58:18 +00001077PyDoc_STRVAR(xmlparse_UseForeignDTD__doc__,
1078"UseForeignDTD([flag])\n\
1079Allows the application to provide an artificial external subset if one is\n\
1080not specified as part of the document instance. This readily allows the\n\
1081use of a 'default' document type controlled by the application, while still\n\
1082getting the advantage of providing document type information to the parser.\n\
1083'flag' defaults to True if not provided.");
1084
1085static PyObject *
1086xmlparse_UseForeignDTD(xmlparseobject *self, PyObject *args)
1087{
Antoine Pitrou6f430e42012-08-15 23:18:25 +02001088 int flag = 1;
Martin v. Löwis069dde22003-01-21 10:58:18 +00001089 enum XML_Error rc;
Georg Brandld37b9d72012-09-24 13:41:52 +02001090 if (!PyArg_ParseTuple(args, "|p:UseForeignDTD", &flag))
Martin v. Löwis069dde22003-01-21 10:58:18 +00001091 return NULL;
Antoine Pitrou6f430e42012-08-15 23:18:25 +02001092 rc = XML_UseForeignDTD(self->itself, flag ? XML_TRUE : XML_FALSE);
Martin v. Löwis069dde22003-01-21 10:58:18 +00001093 if (rc != XML_ERROR_NONE) {
1094 return set_error(self, rc);
1095 }
1096 Py_INCREF(Py_None);
1097 return Py_None;
1098}
Martin v. Löwisc847f402003-01-21 11:09:21 +00001099#endif
Martin v. Löwis069dde22003-01-21 10:58:18 +00001100
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001101static PyObject *xmlparse_dir(PyObject *self, PyObject* noargs);
1102
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001103static struct PyMethodDef xmlparse_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001104 {"Parse", (PyCFunction)xmlparse_Parse,
1105 METH_VARARGS, xmlparse_Parse__doc__},
Fred Drake0582df92000-07-12 04:49:00 +00001106 {"ParseFile", (PyCFunction)xmlparse_ParseFile,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001107 METH_O, xmlparse_ParseFile__doc__},
Fred Drake0582df92000-07-12 04:49:00 +00001108 {"SetBase", (PyCFunction)xmlparse_SetBase,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001109 METH_VARARGS, xmlparse_SetBase__doc__},
Fred Drake0582df92000-07-12 04:49:00 +00001110 {"GetBase", (PyCFunction)xmlparse_GetBase,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001111 METH_NOARGS, xmlparse_GetBase__doc__},
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001112 {"ExternalEntityParserCreate", (PyCFunction)xmlparse_ExternalEntityParserCreate,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001113 METH_VARARGS, xmlparse_ExternalEntityParserCreate__doc__},
Fred Drakebd6101c2001-02-14 18:29:45 +00001114 {"SetParamEntityParsing", (PyCFunction)xmlparse_SetParamEntityParsing,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001115 METH_VARARGS, xmlparse_SetParamEntityParsing__doc__},
Fred Drakebd6101c2001-02-14 18:29:45 +00001116 {"GetInputContext", (PyCFunction)xmlparse_GetInputContext,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001117 METH_NOARGS, xmlparse_GetInputContext__doc__},
Martin v. Löwisc847f402003-01-21 11:09:21 +00001118#if XML_COMBINED_VERSION >= 19505
Martin v. Löwis069dde22003-01-21 10:58:18 +00001119 {"UseForeignDTD", (PyCFunction)xmlparse_UseForeignDTD,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001120 METH_VARARGS, xmlparse_UseForeignDTD__doc__},
Martin v. Löwisc847f402003-01-21 11:09:21 +00001121#endif
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001122 {"__dir__", xmlparse_dir, METH_NOARGS},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001123 {NULL, NULL} /* sentinel */
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001124};
1125
1126/* ---------- */
1127
1128
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001129
Fred Drake71b63ff2002-06-28 22:29:01 +00001130/* pyexpat international encoding support.
1131 Make it as simple as possible.
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001132*/
1133
Fred Drake71b63ff2002-06-28 22:29:01 +00001134static int
1135PyUnknownEncodingHandler(void *encodingHandlerData,
1136 const XML_Char *name,
1137 XML_Encoding *info)
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001138{
Eli Bendersky6dc32b32013-05-25 05:25:48 -07001139 static unsigned char template_buffer[256] = {0};
1140 PyObject* u;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001141 int i;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001142 void *data;
Eli Bendersky6dc32b32013-05-25 05:25:48 -07001143 unsigned int kind;
Fred Drake71b63ff2002-06-28 22:29:01 +00001144
Victor Stinner9e09c262013-07-18 23:17:01 +02001145 if (PyErr_Occurred())
1146 return XML_STATUS_ERROR;
1147
Eli Bendersky6dc32b32013-05-25 05:25:48 -07001148 if (template_buffer[1] == 0) {
1149 for (i = 0; i < 256; i++)
1150 template_buffer[i] = i;
Tim Peters63cb99e2001-02-17 18:12:50 +00001151 }
Eli Bendersky6dc32b32013-05-25 05:25:48 -07001152
1153 u = PyUnicode_Decode((char*) template_buffer, 256, name, "replace");
Christian Heimesb5821552013-06-29 20:43:13 +02001154 if (u == NULL || PyUnicode_READY(u)) {
Christian Heimes72172422013-06-29 21:49:27 +02001155 Py_XDECREF(u);
Eli Bendersky6dc32b32013-05-25 05:25:48 -07001156 return XML_STATUS_ERROR;
Christian Heimesb5821552013-06-29 20:43:13 +02001157 }
Eli Bendersky6dc32b32013-05-25 05:25:48 -07001158
1159 if (PyUnicode_GET_LENGTH(u) != 256) {
1160 Py_DECREF(u);
1161 PyErr_SetString(PyExc_ValueError,
1162 "multi-byte encodings are not supported");
1163 return XML_STATUS_ERROR;
1164 }
1165
1166 kind = PyUnicode_KIND(u);
1167 data = PyUnicode_DATA(u);
1168 for (i = 0; i < 256; i++) {
1169 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
1170 if (ch != Py_UNICODE_REPLACEMENT_CHARACTER)
1171 info->map[i] = ch;
1172 else
1173 info->map[i] = -1;
1174 }
1175
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001176 info->data = NULL;
1177 info->convert = NULL;
1178 info->release = NULL;
Eli Bendersky6dc32b32013-05-25 05:25:48 -07001179 Py_DECREF(u);
1180
1181 return XML_STATUS_OK;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001182}
1183
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001184
1185static PyObject *
Fred Drakeb91a36b2002-06-27 19:40:48 +00001186newxmlparseobject(char *encoding, char *namespace_separator, PyObject *intern)
Fred Drake0582df92000-07-12 04:49:00 +00001187{
1188 int i;
1189 xmlparseobject *self;
Fred Drake71b63ff2002-06-28 22:29:01 +00001190
Martin v. Löwis894258c2001-09-23 10:20:10 +00001191 self = PyObject_GC_New(xmlparseobject, &Xmlparsetype);
Fred Drake0582df92000-07-12 04:49:00 +00001192 if (self == NULL)
1193 return NULL;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001194
Fred Drake2a3d7db2002-06-28 22:56:48 +00001195 self->buffer = NULL;
1196 self->buffer_size = CHARACTER_DATA_BUFFER_SIZE;
1197 self->buffer_used = 0;
Fred Drake85d835f2001-02-08 15:39:08 +00001198 self->ordered_attributes = 0;
1199 self->specified_attributes = 0;
Fred Drakebd6101c2001-02-14 18:29:45 +00001200 self->in_callback = 0;
Martin v. Löwis069dde22003-01-21 10:58:18 +00001201 self->ns_prefixes = 0;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001202 self->handlers = NULL;
Victor Stinner54b2d2e2013-07-15 17:15:57 +02001203 self->intern = intern;
1204 Py_XINCREF(self->intern);
1205 PyObject_GC_Track(self);
1206
Christian Heimesfa535f52013-07-07 17:35:11 +02001207 /* namespace_separator is either NULL or contains one char + \0 */
1208 self->itself = XML_ParserCreate_MM(encoding, &ExpatMemoryHandler,
1209 namespace_separator);
Victor Stinner54b2d2e2013-07-15 17:15:57 +02001210 if (self->itself == NULL) {
1211 PyErr_SetString(PyExc_RuntimeError,
1212 "XML_ParserCreate failed");
1213 Py_DECREF(self);
1214 return NULL;
1215 }
Gregory P. Smith25227712012-03-14 18:10:37 -07001216#if ((XML_MAJOR_VERSION >= 2) && (XML_MINOR_VERSION >= 1)) || defined(XML_HAS_SET_HASH_SALT)
1217 /* This feature was added upstream in libexpat 2.1.0. Our expat copy
1218 * has a backport of this feature where we also define XML_HAS_SET_HASH_SALT
1219 * to indicate that we can still use it. */
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07001220 XML_SetHashSalt(self->itself,
Christian Heimes985ecdc2013-11-20 11:46:18 +01001221 (unsigned long)_Py_HashSecret.expat.hashsalt);
Gregory P. Smith25227712012-03-14 18:10:37 -07001222#endif
Fred Drake0582df92000-07-12 04:49:00 +00001223 XML_SetUserData(self->itself, (void *)self);
Fred Drake7c75bf22002-07-01 14:02:31 +00001224 XML_SetUnknownEncodingHandler(self->itself,
1225 (XML_UnknownEncodingHandler) PyUnknownEncodingHandler, NULL);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001226
Fred Drake2a3d7db2002-06-28 22:56:48 +00001227 for (i = 0; handler_info[i].name != NULL; i++)
Fred Drake0582df92000-07-12 04:49:00 +00001228 /* do nothing */;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001229
Victor Stinnerb6404912013-07-07 16:21:41 +02001230 self->handlers = PyMem_Malloc(sizeof(PyObject *) * i);
Fred Drake7c75bf22002-07-01 14:02:31 +00001231 if (!self->handlers) {
Fred Drake71b63ff2002-06-28 22:29:01 +00001232 Py_DECREF(self);
1233 return PyErr_NoMemory();
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001234 }
Martin v. Löwis5b68ce32001-10-21 08:53:52 +00001235 clear_handlers(self, 1);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001236
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001237 return (PyObject*)self;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001238}
1239
1240
1241static void
Fred Drake0582df92000-07-12 04:49:00 +00001242xmlparse_dealloc(xmlparseobject *self)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001243{
Fred Drake0582df92000-07-12 04:49:00 +00001244 int i;
Martin v. Löwis894258c2001-09-23 10:20:10 +00001245 PyObject_GC_UnTrack(self);
Fred Drake85d835f2001-02-08 15:39:08 +00001246 if (self->itself != NULL)
Fred Drake0582df92000-07-12 04:49:00 +00001247 XML_ParserFree(self->itself);
1248 self->itself = NULL;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001249
Fred Drake85d835f2001-02-08 15:39:08 +00001250 if (self->handlers != NULL) {
Fred Drakecde79132001-04-25 16:01:30 +00001251 PyObject *temp;
Fred Drake85d835f2001-02-08 15:39:08 +00001252 for (i = 0; handler_info[i].name != NULL; i++) {
Fred Drakecde79132001-04-25 16:01:30 +00001253 temp = self->handlers[i];
1254 self->handlers[i] = NULL;
1255 Py_XDECREF(temp);
Fred Drake85d835f2001-02-08 15:39:08 +00001256 }
Victor Stinnerb6404912013-07-07 16:21:41 +02001257 PyMem_Free(self->handlers);
Fred Drake71b63ff2002-06-28 22:29:01 +00001258 self->handlers = NULL;
Fred Drake0582df92000-07-12 04:49:00 +00001259 }
Fred Drake2a3d7db2002-06-28 22:56:48 +00001260 if (self->buffer != NULL) {
Victor Stinnerb6404912013-07-07 16:21:41 +02001261 PyMem_Free(self->buffer);
Fred Drake2a3d7db2002-06-28 22:56:48 +00001262 self->buffer = NULL;
1263 }
Fred Drakeb91a36b2002-06-27 19:40:48 +00001264 Py_XDECREF(self->intern);
Martin v. Löwis894258c2001-09-23 10:20:10 +00001265 PyObject_GC_Del(self);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001266}
1267
Fred Drake0582df92000-07-12 04:49:00 +00001268static int
Alexander Belopolskye239d232010-12-08 23:31:48 +00001269handlername2int(PyObject *name)
Fred Drake0582df92000-07-12 04:49:00 +00001270{
1271 int i;
Fred Drake71b63ff2002-06-28 22:29:01 +00001272 for (i = 0; handler_info[i].name != NULL; i++) {
Alexander Belopolskye239d232010-12-08 23:31:48 +00001273 if (PyUnicode_CompareWithASCIIString(
1274 name, handler_info[i].name) == 0) {
Fred Drake0582df92000-07-12 04:49:00 +00001275 return i;
1276 }
1277 }
1278 return -1;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001279}
1280
1281static PyObject *
Fred Drake71b63ff2002-06-28 22:29:01 +00001282get_pybool(int istrue)
1283{
1284 PyObject *result = istrue ? Py_True : Py_False;
1285 Py_INCREF(result);
1286 return result;
1287}
1288
1289static PyObject *
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001290xmlparse_getattro(xmlparseobject *self, PyObject *nameobj)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001291{
Victor Stinner9e5bd6c2011-10-01 01:05:40 +02001292 Py_UCS4 first_char;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001293 int handlernum = -1;
1294
Alexander Belopolskye239d232010-12-08 23:31:48 +00001295 if (!PyUnicode_Check(nameobj))
1296 goto generic;
Victor Stinner9e5bd6c2011-10-01 01:05:40 +02001297 if (PyUnicode_READY(nameobj))
1298 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001299
Alexander Belopolskye239d232010-12-08 23:31:48 +00001300 handlernum = handlername2int(nameobj);
Fred Drake71b63ff2002-06-28 22:29:01 +00001301
1302 if (handlernum != -1) {
1303 PyObject *result = self->handlers[handlernum];
1304 if (result == NULL)
1305 result = Py_None;
1306 Py_INCREF(result);
1307 return result;
1308 }
Alexander Belopolskye239d232010-12-08 23:31:48 +00001309
Victor Stinner9e5bd6c2011-10-01 01:05:40 +02001310 first_char = PyUnicode_READ_CHAR(nameobj, 0);
1311 if (first_char == 'E') {
Alexander Belopolskye239d232010-12-08 23:31:48 +00001312 if (PyUnicode_CompareWithASCIIString(nameobj, "ErrorCode") == 0)
Christian Heimes217cfd12007-12-02 14:31:20 +00001313 return PyLong_FromLong((long)
Fred Drake71b63ff2002-06-28 22:29:01 +00001314 XML_GetErrorCode(self->itself));
Alexander Belopolskye239d232010-12-08 23:31:48 +00001315 if (PyUnicode_CompareWithASCIIString(nameobj, "ErrorLineNumber") == 0)
Christian Heimes217cfd12007-12-02 14:31:20 +00001316 return PyLong_FromLong((long)
Fred Drake71b63ff2002-06-28 22:29:01 +00001317 XML_GetErrorLineNumber(self->itself));
Alexander Belopolskye239d232010-12-08 23:31:48 +00001318 if (PyUnicode_CompareWithASCIIString(nameobj, "ErrorColumnNumber") == 0)
Christian Heimes217cfd12007-12-02 14:31:20 +00001319 return PyLong_FromLong((long)
Fred Drake71b63ff2002-06-28 22:29:01 +00001320 XML_GetErrorColumnNumber(self->itself));
Alexander Belopolskye239d232010-12-08 23:31:48 +00001321 if (PyUnicode_CompareWithASCIIString(nameobj, "ErrorByteIndex") == 0)
Christian Heimes217cfd12007-12-02 14:31:20 +00001322 return PyLong_FromLong((long)
Fred Drake71b63ff2002-06-28 22:29:01 +00001323 XML_GetErrorByteIndex(self->itself));
1324 }
Victor Stinner9e5bd6c2011-10-01 01:05:40 +02001325 if (first_char == 'C') {
Alexander Belopolskye239d232010-12-08 23:31:48 +00001326 if (PyUnicode_CompareWithASCIIString(nameobj, "CurrentLineNumber") == 0)
Christian Heimes217cfd12007-12-02 14:31:20 +00001327 return PyLong_FromLong((long)
Dave Cole3203efb2004-08-26 00:37:31 +00001328 XML_GetCurrentLineNumber(self->itself));
Alexander Belopolskye239d232010-12-08 23:31:48 +00001329 if (PyUnicode_CompareWithASCIIString(nameobj, "CurrentColumnNumber") == 0)
Christian Heimes217cfd12007-12-02 14:31:20 +00001330 return PyLong_FromLong((long)
Dave Cole3203efb2004-08-26 00:37:31 +00001331 XML_GetCurrentColumnNumber(self->itself));
Alexander Belopolskye239d232010-12-08 23:31:48 +00001332 if (PyUnicode_CompareWithASCIIString(nameobj, "CurrentByteIndex") == 0)
Christian Heimes217cfd12007-12-02 14:31:20 +00001333 return PyLong_FromLong((long)
Dave Cole3203efb2004-08-26 00:37:31 +00001334 XML_GetCurrentByteIndex(self->itself));
1335 }
Victor Stinner9e5bd6c2011-10-01 01:05:40 +02001336 if (first_char == 'b') {
Alexander Belopolskye239d232010-12-08 23:31:48 +00001337 if (PyUnicode_CompareWithASCIIString(nameobj, "buffer_size") == 0)
Christian Heimes217cfd12007-12-02 14:31:20 +00001338 return PyLong_FromLong((long) self->buffer_size);
Alexander Belopolskye239d232010-12-08 23:31:48 +00001339 if (PyUnicode_CompareWithASCIIString(nameobj, "buffer_text") == 0)
Fred Drake2a3d7db2002-06-28 22:56:48 +00001340 return get_pybool(self->buffer != NULL);
Alexander Belopolskye239d232010-12-08 23:31:48 +00001341 if (PyUnicode_CompareWithASCIIString(nameobj, "buffer_used") == 0)
Christian Heimes217cfd12007-12-02 14:31:20 +00001342 return PyLong_FromLong((long) self->buffer_used);
Fred Drake2a3d7db2002-06-28 22:56:48 +00001343 }
Alexander Belopolskye239d232010-12-08 23:31:48 +00001344 if (PyUnicode_CompareWithASCIIString(nameobj, "namespace_prefixes") == 0)
Martin v. Löwis069dde22003-01-21 10:58:18 +00001345 return get_pybool(self->ns_prefixes);
Alexander Belopolskye239d232010-12-08 23:31:48 +00001346 if (PyUnicode_CompareWithASCIIString(nameobj, "ordered_attributes") == 0)
Fred Drake71b63ff2002-06-28 22:29:01 +00001347 return get_pybool(self->ordered_attributes);
Alexander Belopolskye239d232010-12-08 23:31:48 +00001348 if (PyUnicode_CompareWithASCIIString(nameobj, "specified_attributes") == 0)
Fred Drake71b63ff2002-06-28 22:29:01 +00001349 return get_pybool((long) self->specified_attributes);
Alexander Belopolskye239d232010-12-08 23:31:48 +00001350 if (PyUnicode_CompareWithASCIIString(nameobj, "intern") == 0) {
Fred Drakeb91a36b2002-06-27 19:40:48 +00001351 if (self->intern == NULL) {
1352 Py_INCREF(Py_None);
1353 return Py_None;
1354 }
1355 else {
1356 Py_INCREF(self->intern);
1357 return self->intern;
1358 }
1359 }
Alexander Belopolskye239d232010-12-08 23:31:48 +00001360 generic:
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001361 return PyObject_GenericGetAttr((PyObject*)self, nameobj);
Neal Norwitz8dfc4a92007-08-11 06:39:53 +00001362}
1363
1364static PyObject *
1365xmlparse_dir(PyObject *self, PyObject* noargs)
1366{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001367#define APPEND(list, str) \
1368 do { \
1369 PyObject *o = PyUnicode_FromString(str); \
1370 if (o != NULL) \
1371 PyList_Append(list, o); \
1372 Py_XDECREF(o); \
Martin v. Löwis069dde22003-01-21 10:58:18 +00001373 } while (0)
Neal Norwitzfa56e2d2003-01-19 15:40:09 +00001374
Neal Norwitz8dfc4a92007-08-11 06:39:53 +00001375 int i;
1376 PyObject *rc = PyList_New(0);
1377 if (!rc)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001378 return NULL;
Neal Norwitz8dfc4a92007-08-11 06:39:53 +00001379 for (i = 0; handler_info[i].name != NULL; i++) {
1380 PyObject *o = get_handler_name(&handler_info[i]);
1381 if (o != NULL)
1382 PyList_Append(rc, o);
1383 Py_XDECREF(o);
1384 }
1385 APPEND(rc, "ErrorCode");
1386 APPEND(rc, "ErrorLineNumber");
1387 APPEND(rc, "ErrorColumnNumber");
1388 APPEND(rc, "ErrorByteIndex");
1389 APPEND(rc, "CurrentLineNumber");
1390 APPEND(rc, "CurrentColumnNumber");
1391 APPEND(rc, "CurrentByteIndex");
1392 APPEND(rc, "buffer_size");
1393 APPEND(rc, "buffer_text");
1394 APPEND(rc, "buffer_used");
1395 APPEND(rc, "namespace_prefixes");
1396 APPEND(rc, "ordered_attributes");
1397 APPEND(rc, "specified_attributes");
1398 APPEND(rc, "intern");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001399
Neal Norwitzfa56e2d2003-01-19 15:40:09 +00001400#undef APPEND
Neal Norwitz8dfc4a92007-08-11 06:39:53 +00001401
1402 if (PyErr_Occurred()) {
1403 Py_DECREF(rc);
1404 rc = NULL;
Fred Drake0582df92000-07-12 04:49:00 +00001405 }
Neal Norwitz8dfc4a92007-08-11 06:39:53 +00001406
1407 return rc;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001408}
1409
Fred Drake6f987622000-08-25 18:03:30 +00001410static int
Alexander Belopolskye239d232010-12-08 23:31:48 +00001411sethandler(xmlparseobject *self, PyObject *name, PyObject* v)
Fred Drake0582df92000-07-12 04:49:00 +00001412{
1413 int handlernum = handlername2int(name);
Fred Drake71b63ff2002-06-28 22:29:01 +00001414 if (handlernum >= 0) {
1415 xmlhandler c_handler = NULL;
1416 PyObject *temp = self->handlers[handlernum];
1417
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001418 if (v == Py_None) {
1419 /* If this is the character data handler, and a character
1420 data handler is already active, we need to be more
1421 careful. What we can safely do is replace the existing
1422 character data handler callback function with a no-op
1423 function that will refuse to call Python. The downside
1424 is that this doesn't completely remove the character
1425 data handler from the C layer if there's any callback
1426 active, so Expat does a little more work than it
1427 otherwise would, but that's really an odd case. A more
1428 elaborate system of handlers and state could remove the
1429 C handler more effectively. */
1430 if (handlernum == CharacterData && self->in_callback)
1431 c_handler = noop_character_data_handler;
Fred Drake71b63ff2002-06-28 22:29:01 +00001432 v = NULL;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001433 }
Fred Drake71b63ff2002-06-28 22:29:01 +00001434 else if (v != NULL) {
1435 Py_INCREF(v);
1436 c_handler = handler_info[handlernum].handler;
1437 }
Fred Drake0582df92000-07-12 04:49:00 +00001438 self->handlers[handlernum] = v;
Fred Drake71b63ff2002-06-28 22:29:01 +00001439 Py_XDECREF(temp);
1440 handler_info[handlernum].setter(self->itself, c_handler);
Fred Drake0582df92000-07-12 04:49:00 +00001441 return 1;
1442 }
1443 return 0;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001444}
1445
1446static int
Alexander Belopolskye239d232010-12-08 23:31:48 +00001447xmlparse_setattro(xmlparseobject *self, PyObject *name, PyObject *v)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001448{
Fred Drake6f987622000-08-25 18:03:30 +00001449 /* Set attribute 'name' to value 'v'. v==NULL means delete */
Fred Drake85d835f2001-02-08 15:39:08 +00001450 if (v == NULL) {
Fred Drake6f987622000-08-25 18:03:30 +00001451 PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
1452 return -1;
1453 }
Alexander Belopolskye239d232010-12-08 23:31:48 +00001454 assert(PyUnicode_Check(name));
1455 if (PyUnicode_CompareWithASCIIString(name, "buffer_text") == 0) {
Antoine Pitrou6f430e42012-08-15 23:18:25 +02001456 int b = PyObject_IsTrue(v);
1457 if (b < 0)
1458 return -1;
1459 if (b) {
Fred Drake2a3d7db2002-06-28 22:56:48 +00001460 if (self->buffer == NULL) {
Victor Stinnerb6404912013-07-07 16:21:41 +02001461 self->buffer = PyMem_Malloc(self->buffer_size);
Fred Drake2a3d7db2002-06-28 22:56:48 +00001462 if (self->buffer == NULL) {
1463 PyErr_NoMemory();
1464 return -1;
1465 }
1466 self->buffer_used = 0;
1467 }
1468 }
1469 else if (self->buffer != NULL) {
1470 if (flush_character_buffer(self) < 0)
1471 return -1;
Victor Stinnerb6404912013-07-07 16:21:41 +02001472 PyMem_Free(self->buffer);
Fred Drake2a3d7db2002-06-28 22:56:48 +00001473 self->buffer = NULL;
1474 }
1475 return 0;
1476 }
Alexander Belopolskye239d232010-12-08 23:31:48 +00001477 if (PyUnicode_CompareWithASCIIString(name, "namespace_prefixes") == 0) {
Antoine Pitrou6f430e42012-08-15 23:18:25 +02001478 int b = PyObject_IsTrue(v);
1479 if (b < 0)
1480 return -1;
1481 self->ns_prefixes = b;
Martin v. Löwis069dde22003-01-21 10:58:18 +00001482 XML_SetReturnNSTriplet(self->itself, self->ns_prefixes);
1483 return 0;
1484 }
Alexander Belopolskye239d232010-12-08 23:31:48 +00001485 if (PyUnicode_CompareWithASCIIString(name, "ordered_attributes") == 0) {
Antoine Pitrou6f430e42012-08-15 23:18:25 +02001486 int b = PyObject_IsTrue(v);
1487 if (b < 0)
1488 return -1;
1489 self->ordered_attributes = b;
Fred Drake85d835f2001-02-08 15:39:08 +00001490 return 0;
1491 }
Alexander Belopolskye239d232010-12-08 23:31:48 +00001492 if (PyUnicode_CompareWithASCIIString(name, "specified_attributes") == 0) {
Antoine Pitrou6f430e42012-08-15 23:18:25 +02001493 int b = PyObject_IsTrue(v);
1494 if (b < 0)
1495 return -1;
1496 self->specified_attributes = b;
Fred Drake6f987622000-08-25 18:03:30 +00001497 return 0;
1498 }
Christian Heimes2380ac72008-01-09 00:17:24 +00001499
Alexander Belopolskye239d232010-12-08 23:31:48 +00001500 if (PyUnicode_CompareWithASCIIString(name, "buffer_size") == 0) {
Christian Heimes2380ac72008-01-09 00:17:24 +00001501 long new_buffer_size;
1502 if (!PyLong_Check(v)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001503 PyErr_SetString(PyExc_TypeError, "buffer_size must be an integer");
1504 return -1;
Christian Heimes2380ac72008-01-09 00:17:24 +00001505 }
1506
1507 new_buffer_size=PyLong_AS_LONG(v);
1508 /* trivial case -- no change */
1509 if (new_buffer_size == self->buffer_size) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001510 return 0;
Christian Heimes2380ac72008-01-09 00:17:24 +00001511 }
1512
1513 if (new_buffer_size <= 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001514 PyErr_SetString(PyExc_ValueError, "buffer_size must be greater than zero");
1515 return -1;
Christian Heimes2380ac72008-01-09 00:17:24 +00001516 }
1517
1518 /* check maximum */
1519 if (new_buffer_size > INT_MAX) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001520 char errmsg[100];
1521 sprintf(errmsg, "buffer_size must not be greater than %i", INT_MAX);
1522 PyErr_SetString(PyExc_ValueError, errmsg);
1523 return -1;
Christian Heimes2380ac72008-01-09 00:17:24 +00001524 }
1525
1526 if (self->buffer != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001527 /* there is already a buffer */
1528 if (self->buffer_used != 0) {
Christian Heimes09994a92013-07-20 22:41:58 +02001529 if (flush_character_buffer(self) < 0) {
1530 return -1;
1531 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001532 }
1533 /* free existing buffer */
Victor Stinnerb6404912013-07-07 16:21:41 +02001534 PyMem_Free(self->buffer);
Christian Heimes2380ac72008-01-09 00:17:24 +00001535 }
Victor Stinnerb6404912013-07-07 16:21:41 +02001536 self->buffer = PyMem_Malloc(new_buffer_size);
Christian Heimes2380ac72008-01-09 00:17:24 +00001537 if (self->buffer == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001538 PyErr_NoMemory();
1539 return -1;
1540 }
Christian Heimes2380ac72008-01-09 00:17:24 +00001541 self->buffer_size = new_buffer_size;
1542 return 0;
1543 }
1544
Alexander Belopolskye239d232010-12-08 23:31:48 +00001545 if (PyUnicode_CompareWithASCIIString(name, "CharacterDataHandler") == 0) {
Fred Drake2a3d7db2002-06-28 22:56:48 +00001546 /* If we're changing the character data handler, flush all
1547 * cached data with the old handler. Not sure there's a
1548 * "right" thing to do, though, but this probably won't
1549 * happen.
1550 */
1551 if (flush_character_buffer(self) < 0)
1552 return -1;
1553 }
Fred Drake6f987622000-08-25 18:03:30 +00001554 if (sethandler(self, name, v)) {
1555 return 0;
1556 }
Alexander Belopolskye239d232010-12-08 23:31:48 +00001557 PyErr_SetObject(PyExc_AttributeError, name);
Fred Drake6f987622000-08-25 18:03:30 +00001558 return -1;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001559}
1560
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001561static int
1562xmlparse_traverse(xmlparseobject *op, visitproc visit, void *arg)
1563{
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001564 int i;
1565 for (i = 0; handler_info[i].name != NULL; i++)
1566 Py_VISIT(op->handlers[i]);
Fred Drakecde79132001-04-25 16:01:30 +00001567 return 0;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001568}
1569
1570static int
1571xmlparse_clear(xmlparseobject *op)
1572{
Martin v. Löwis5b68ce32001-10-21 08:53:52 +00001573 clear_handlers(op, 0);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001574 Py_CLEAR(op->intern);
Fred Drakecde79132001-04-25 16:01:30 +00001575 return 0;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001576}
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001577
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001578PyDoc_STRVAR(Xmlparsetype__doc__, "XML parser");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001579
1580static PyTypeObject Xmlparsetype = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001581 PyVarObject_HEAD_INIT(NULL, 0)
1582 "pyexpat.xmlparser", /*tp_name*/
Antoine Pitrou23683ef2011-01-04 00:00:31 +00001583 sizeof(xmlparseobject), /*tp_basicsize*/
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001584 0, /*tp_itemsize*/
1585 /* methods */
1586 (destructor)xmlparse_dealloc, /*tp_dealloc*/
1587 (printfunc)0, /*tp_print*/
1588 0, /*tp_getattr*/
Alexander Belopolskye239d232010-12-08 23:31:48 +00001589 0, /*tp_setattr*/
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001590 0, /*tp_reserved*/
1591 (reprfunc)0, /*tp_repr*/
1592 0, /*tp_as_number*/
1593 0, /*tp_as_sequence*/
1594 0, /*tp_as_mapping*/
1595 (hashfunc)0, /*tp_hash*/
1596 (ternaryfunc)0, /*tp_call*/
1597 (reprfunc)0, /*tp_str*/
1598 (getattrofunc)xmlparse_getattro, /* tp_getattro */
Alexander Belopolskye239d232010-12-08 23:31:48 +00001599 (setattrofunc)xmlparse_setattro, /* tp_setattro */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001600 0, /* tp_as_buffer */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001601 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /*tp_flags*/
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001602 Xmlparsetype__doc__, /* tp_doc - Documentation string */
1603 (traverseproc)xmlparse_traverse, /* tp_traverse */
1604 (inquiry)xmlparse_clear, /* tp_clear */
1605 0, /* tp_richcompare */
1606 0, /* tp_weaklistoffset */
1607 0, /* tp_iter */
1608 0, /* tp_iternext */
1609 xmlparse_methods, /* tp_methods */
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001610};
1611
1612/* End of code for xmlparser objects */
1613/* -------------------------------------------------------- */
1614
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001615PyDoc_STRVAR(pyexpat_ParserCreate__doc__,
Fred Drake0582df92000-07-12 04:49:00 +00001616"ParserCreate([encoding[, namespace_separator]]) -> parser\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001617Return a new XML parser object.");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001618
1619static PyObject *
Fred Drake0582df92000-07-12 04:49:00 +00001620pyexpat_ParserCreate(PyObject *notused, PyObject *args, PyObject *kw)
1621{
Fred Drakecde79132001-04-25 16:01:30 +00001622 char *encoding = NULL;
1623 char *namespace_separator = NULL;
Fred Drakeb91a36b2002-06-27 19:40:48 +00001624 PyObject *intern = NULL;
1625 PyObject *result;
1626 int intern_decref = 0;
Martin v. Löwis15e62742006-02-27 16:46:16 +00001627 static char *kwlist[] = {"encoding", "namespace_separator",
Jeremy Hyltonaf68c872005-12-10 18:50:16 +00001628 "intern", NULL};
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001629
Fred Drakeb91a36b2002-06-27 19:40:48 +00001630 if (!PyArg_ParseTupleAndKeywords(args, kw, "|zzO:ParserCreate", kwlist,
1631 &encoding, &namespace_separator, &intern))
Fred Drakecde79132001-04-25 16:01:30 +00001632 return NULL;
1633 if (namespace_separator != NULL
1634 && strlen(namespace_separator) > 1) {
1635 PyErr_SetString(PyExc_ValueError,
1636 "namespace_separator must be at most one"
1637 " character, omitted, or None");
1638 return NULL;
1639 }
Fred Drakeb91a36b2002-06-27 19:40:48 +00001640 /* Explicitly passing None means no interning is desired.
1641 Not passing anything means that a new dictionary is used. */
1642 if (intern == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001643 intern = NULL;
Fred Drakeb91a36b2002-06-27 19:40:48 +00001644 else if (intern == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001645 intern = PyDict_New();
1646 if (!intern)
1647 return NULL;
1648 intern_decref = 1;
Fred Drake71b63ff2002-06-28 22:29:01 +00001649 }
Fred Drakeb91a36b2002-06-27 19:40:48 +00001650 else if (!PyDict_Check(intern)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001651 PyErr_SetString(PyExc_TypeError, "intern must be a dictionary");
1652 return NULL;
Fred Drakeb91a36b2002-06-27 19:40:48 +00001653 }
1654
1655 result = newxmlparseobject(encoding, namespace_separator, intern);
1656 if (intern_decref) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001657 Py_DECREF(intern);
Fred Drakeb91a36b2002-06-27 19:40:48 +00001658 }
1659 return result;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001660}
1661
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001662PyDoc_STRVAR(pyexpat_ErrorString__doc__,
Fred Drake0582df92000-07-12 04:49:00 +00001663"ErrorString(errno) -> string\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001664Returns string error for given number.");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001665
1666static PyObject *
Fred Drake0582df92000-07-12 04:49:00 +00001667pyexpat_ErrorString(PyObject *self, PyObject *args)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001668{
Fred Drake0582df92000-07-12 04:49:00 +00001669 long code = 0;
1670
1671 if (!PyArg_ParseTuple(args, "l:ErrorString", &code))
1672 return NULL;
1673 return Py_BuildValue("z", XML_ErrorString((int)code));
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001674}
1675
1676/* List of methods defined in the module */
1677
1678static struct PyMethodDef pyexpat_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001679 {"ParserCreate", (PyCFunction)pyexpat_ParserCreate,
Fred Drake0582df92000-07-12 04:49:00 +00001680 METH_VARARGS|METH_KEYWORDS, pyexpat_ParserCreate__doc__},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001681 {"ErrorString", (PyCFunction)pyexpat_ErrorString,
1682 METH_VARARGS, pyexpat_ErrorString__doc__},
Fred Drake71b63ff2002-06-28 22:29:01 +00001683
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001684 {NULL, (PyCFunction)NULL, 0, NULL} /* sentinel */
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001685};
1686
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +00001687/* Module docstring */
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001688
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001689PyDoc_STRVAR(pyexpat_module_documentation,
1690"Python wrapper for Expat parser.");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001691
Fred Drakecde79132001-04-25 16:01:30 +00001692/* Initialization function for the module */
1693
1694#ifndef MODULE_NAME
1695#define MODULE_NAME "pyexpat"
1696#endif
1697
1698#ifndef MODULE_INITFUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00001699#define MODULE_INITFUNC PyInit_pyexpat
Fred Drakecde79132001-04-25 16:01:30 +00001700#endif
1701
Martin v. Löwis069dde22003-01-21 10:58:18 +00001702#ifndef PyMODINIT_FUNC
1703# ifdef MS_WINDOWS
1704# define PyMODINIT_FUNC __declspec(dllexport) void
1705# else
1706# define PyMODINIT_FUNC void
1707# endif
1708#endif
1709
Mark Hammond8235ea12002-07-19 06:55:41 +00001710PyMODINIT_FUNC MODULE_INITFUNC(void); /* avoid compiler warnings */
Fred Drakecde79132001-04-25 16:01:30 +00001711
Martin v. Löwis1a214512008-06-11 05:26:20 +00001712static struct PyModuleDef pyexpatmodule = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001713 PyModuleDef_HEAD_INIT,
1714 MODULE_NAME,
1715 pyexpat_module_documentation,
1716 -1,
1717 pyexpat_methods,
1718 NULL,
1719 NULL,
1720 NULL,
1721 NULL
Martin v. Löwis1a214512008-06-11 05:26:20 +00001722};
1723
Martin v. Löwis069dde22003-01-21 10:58:18 +00001724PyMODINIT_FUNC
1725MODULE_INITFUNC(void)
Fred Drake0582df92000-07-12 04:49:00 +00001726{
1727 PyObject *m, *d;
Neal Norwitz392c5be2007-08-25 17:20:32 +00001728 PyObject *errmod_name = PyUnicode_FromString(MODULE_NAME ".errors");
Fred Drake85d835f2001-02-08 15:39:08 +00001729 PyObject *errors_module;
1730 PyObject *modelmod_name;
1731 PyObject *model_module;
Fred Drake0582df92000-07-12 04:49:00 +00001732 PyObject *sys_modules;
Georg Brandlb4dac712010-10-15 14:46:48 +00001733 PyObject *tmpnum, *tmpstr;
1734 PyObject *codes_dict;
1735 PyObject *rev_codes_dict;
1736 int res;
Fredrik Lundhd7a42882005-12-13 20:43:04 +00001737 static struct PyExpat_CAPI capi;
Georg Brandlb4dac712010-10-15 14:46:48 +00001738 PyObject *capi_object;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001739
Fred Drake6f987622000-08-25 18:03:30 +00001740 if (errmod_name == NULL)
Martin v. Löwis1a214512008-06-11 05:26:20 +00001741 return NULL;
Neal Norwitz392c5be2007-08-25 17:20:32 +00001742 modelmod_name = PyUnicode_FromString(MODULE_NAME ".model");
Fred Drake85d835f2001-02-08 15:39:08 +00001743 if (modelmod_name == NULL)
Martin v. Löwis1a214512008-06-11 05:26:20 +00001744 return NULL;
Fred Drake6f987622000-08-25 18:03:30 +00001745
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001746 if (PyType_Ready(&Xmlparsetype) < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001747 return NULL;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001748
Fred Drake0582df92000-07-12 04:49:00 +00001749 /* Create the module and add the functions */
Martin v. Löwis1a214512008-06-11 05:26:20 +00001750 m = PyModule_Create(&pyexpatmodule);
Neal Norwitz1ac754f2006-01-19 06:09:39 +00001751 if (m == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001752 return NULL;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001753
Fred Drake0582df92000-07-12 04:49:00 +00001754 /* Add some symbolic constants to the module */
Fred Drakebd6101c2001-02-14 18:29:45 +00001755 if (ErrorObject == NULL) {
1756 ErrorObject = PyErr_NewException("xml.parsers.expat.ExpatError",
Fred Drake93adb692000-09-23 04:55:48 +00001757 NULL, NULL);
Fred Drakebd6101c2001-02-14 18:29:45 +00001758 if (ErrorObject == NULL)
Martin v. Löwis1a214512008-06-11 05:26:20 +00001759 return NULL;
Fred Drakebd6101c2001-02-14 18:29:45 +00001760 }
1761 Py_INCREF(ErrorObject);
Fred Drake93adb692000-09-23 04:55:48 +00001762 PyModule_AddObject(m, "error", ErrorObject);
Fred Drakebd6101c2001-02-14 18:29:45 +00001763 Py_INCREF(ErrorObject);
1764 PyModule_AddObject(m, "ExpatError", ErrorObject);
Fred Drake4ba298c2000-10-29 04:57:53 +00001765 Py_INCREF(&Xmlparsetype);
1766 PyModule_AddObject(m, "XMLParserType", (PyObject *) &Xmlparsetype);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001767
Fred Drake738293d2000-12-21 17:25:07 +00001768 PyModule_AddStringConstant(m, "EXPAT_VERSION",
1769 (char *) XML_ExpatVersion());
Fred Drake85d835f2001-02-08 15:39:08 +00001770 {
1771 XML_Expat_Version info = XML_ExpatVersionInfo();
1772 PyModule_AddObject(m, "version_info",
1773 Py_BuildValue("(iii)", info.major,
1774 info.minor, info.micro));
1775 }
Fred Drake0582df92000-07-12 04:49:00 +00001776 /* XXX When Expat supports some way of figuring out how it was
Fred Drake71b63ff2002-06-28 22:29:01 +00001777 compiled, this should check and set native_encoding
1778 appropriately.
Fred Drake0582df92000-07-12 04:49:00 +00001779 */
Fred Drake93adb692000-09-23 04:55:48 +00001780 PyModule_AddStringConstant(m, "native_encoding", "UTF-8");
Fred Drakec23b5232000-08-24 21:57:43 +00001781
Fred Drake85d835f2001-02-08 15:39:08 +00001782 sys_modules = PySys_GetObject("modules");
Fred Drake93adb692000-09-23 04:55:48 +00001783 d = PyModule_GetDict(m);
Fred Drake6f987622000-08-25 18:03:30 +00001784 errors_module = PyDict_GetItem(d, errmod_name);
1785 if (errors_module == NULL) {
Fred Drakecde79132001-04-25 16:01:30 +00001786 errors_module = PyModule_New(MODULE_NAME ".errors");
Fred Drake6f987622000-08-25 18:03:30 +00001787 if (errors_module != NULL) {
Fred Drake6f987622000-08-25 18:03:30 +00001788 PyDict_SetItem(sys_modules, errmod_name, errors_module);
Fred Drake93adb692000-09-23 04:55:48 +00001789 /* gives away the reference to errors_module */
1790 PyModule_AddObject(m, "errors", errors_module);
Fred Drakec23b5232000-08-24 21:57:43 +00001791 }
1792 }
Fred Drake6f987622000-08-25 18:03:30 +00001793 Py_DECREF(errmod_name);
Fred Drake85d835f2001-02-08 15:39:08 +00001794 model_module = PyDict_GetItem(d, modelmod_name);
1795 if (model_module == NULL) {
Fred Drakecde79132001-04-25 16:01:30 +00001796 model_module = PyModule_New(MODULE_NAME ".model");
Fred Drake85d835f2001-02-08 15:39:08 +00001797 if (model_module != NULL) {
1798 PyDict_SetItem(sys_modules, modelmod_name, model_module);
1799 /* gives away the reference to model_module */
1800 PyModule_AddObject(m, "model", model_module);
1801 }
1802 }
1803 Py_DECREF(modelmod_name);
1804 if (errors_module == NULL || model_module == NULL)
1805 /* Don't core dump later! */
Martin v. Löwis1a214512008-06-11 05:26:20 +00001806 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001807
Martin v. Löwisc847f402003-01-21 11:09:21 +00001808#if XML_COMBINED_VERSION > 19505
Martin v. Löwis069dde22003-01-21 10:58:18 +00001809 {
1810 const XML_Feature *features = XML_GetFeatureList();
1811 PyObject *list = PyList_New(0);
1812 if (list == NULL)
1813 /* just ignore it */
1814 PyErr_Clear();
1815 else {
1816 int i = 0;
1817 for (; features[i].feature != XML_FEATURE_END; ++i) {
1818 int ok;
1819 PyObject *item = Py_BuildValue("si", features[i].name,
1820 features[i].value);
1821 if (item == NULL) {
1822 Py_DECREF(list);
1823 list = NULL;
1824 break;
1825 }
1826 ok = PyList_Append(list, item);
1827 Py_DECREF(item);
1828 if (ok < 0) {
1829 PyErr_Clear();
1830 break;
1831 }
1832 }
1833 if (list != NULL)
1834 PyModule_AddObject(m, "features", list);
1835 }
1836 }
Martin v. Löwisc847f402003-01-21 11:09:21 +00001837#endif
Fred Drake6f987622000-08-25 18:03:30 +00001838
Georg Brandlb4dac712010-10-15 14:46:48 +00001839 codes_dict = PyDict_New();
1840 rev_codes_dict = PyDict_New();
1841 if (codes_dict == NULL || rev_codes_dict == NULL) {
1842 Py_XDECREF(codes_dict);
1843 Py_XDECREF(rev_codes_dict);
1844 return NULL;
1845 }
Victor Stinner0fcab4a2011-01-04 12:59:15 +00001846
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001847#define MYCONST(name) \
Georg Brandlb4dac712010-10-15 14:46:48 +00001848 if (PyModule_AddStringConstant(errors_module, #name, \
1849 (char *)XML_ErrorString(name)) < 0) \
1850 return NULL; \
1851 tmpnum = PyLong_FromLong(name); \
1852 if (tmpnum == NULL) return NULL; \
1853 res = PyDict_SetItemString(codes_dict, \
1854 XML_ErrorString(name), tmpnum); \
1855 if (res < 0) return NULL; \
1856 tmpstr = PyUnicode_FromString(XML_ErrorString(name)); \
1857 if (tmpstr == NULL) return NULL; \
1858 res = PyDict_SetItem(rev_codes_dict, tmpnum, tmpstr); \
1859 Py_DECREF(tmpstr); \
1860 Py_DECREF(tmpnum); \
1861 if (res < 0) return NULL; \
Fred Drake7bd9f412000-07-04 23:51:31 +00001862
Fred Drake0582df92000-07-12 04:49:00 +00001863 MYCONST(XML_ERROR_NO_MEMORY);
1864 MYCONST(XML_ERROR_SYNTAX);
1865 MYCONST(XML_ERROR_NO_ELEMENTS);
1866 MYCONST(XML_ERROR_INVALID_TOKEN);
1867 MYCONST(XML_ERROR_UNCLOSED_TOKEN);
1868 MYCONST(XML_ERROR_PARTIAL_CHAR);
1869 MYCONST(XML_ERROR_TAG_MISMATCH);
1870 MYCONST(XML_ERROR_DUPLICATE_ATTRIBUTE);
1871 MYCONST(XML_ERROR_JUNK_AFTER_DOC_ELEMENT);
1872 MYCONST(XML_ERROR_PARAM_ENTITY_REF);
1873 MYCONST(XML_ERROR_UNDEFINED_ENTITY);
1874 MYCONST(XML_ERROR_RECURSIVE_ENTITY_REF);
1875 MYCONST(XML_ERROR_ASYNC_ENTITY);
1876 MYCONST(XML_ERROR_BAD_CHAR_REF);
1877 MYCONST(XML_ERROR_BINARY_ENTITY_REF);
1878 MYCONST(XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF);
1879 MYCONST(XML_ERROR_MISPLACED_XML_PI);
1880 MYCONST(XML_ERROR_UNKNOWN_ENCODING);
1881 MYCONST(XML_ERROR_INCORRECT_ENCODING);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001882 MYCONST(XML_ERROR_UNCLOSED_CDATA_SECTION);
1883 MYCONST(XML_ERROR_EXTERNAL_ENTITY_HANDLING);
1884 MYCONST(XML_ERROR_NOT_STANDALONE);
Fred Drake283b6702004-08-04 22:28:16 +00001885 MYCONST(XML_ERROR_UNEXPECTED_STATE);
1886 MYCONST(XML_ERROR_ENTITY_DECLARED_IN_PE);
1887 MYCONST(XML_ERROR_FEATURE_REQUIRES_XML_DTD);
1888 MYCONST(XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING);
1889 /* Added in Expat 1.95.7. */
1890 MYCONST(XML_ERROR_UNBOUND_PREFIX);
1891 /* Added in Expat 1.95.8. */
1892 MYCONST(XML_ERROR_UNDECLARING_PREFIX);
1893 MYCONST(XML_ERROR_INCOMPLETE_PE);
1894 MYCONST(XML_ERROR_XML_DECL);
1895 MYCONST(XML_ERROR_TEXT_DECL);
1896 MYCONST(XML_ERROR_PUBLICID);
1897 MYCONST(XML_ERROR_SUSPENDED);
1898 MYCONST(XML_ERROR_NOT_SUSPENDED);
1899 MYCONST(XML_ERROR_ABORTED);
1900 MYCONST(XML_ERROR_FINISHED);
1901 MYCONST(XML_ERROR_SUSPEND_PE);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001902
Georg Brandlb4dac712010-10-15 14:46:48 +00001903 if (PyModule_AddStringConstant(errors_module, "__doc__",
1904 "Constants used to describe "
1905 "error conditions.") < 0)
1906 return NULL;
Fred Drake85d835f2001-02-08 15:39:08 +00001907
Georg Brandlb4dac712010-10-15 14:46:48 +00001908 if (PyModule_AddObject(errors_module, "codes", codes_dict) < 0)
1909 return NULL;
1910 if (PyModule_AddObject(errors_module, "messages", rev_codes_dict) < 0)
1911 return NULL;
Victor Stinner0fcab4a2011-01-04 12:59:15 +00001912
Fred Drake93adb692000-09-23 04:55:48 +00001913#undef MYCONST
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001914
Fred Drake85d835f2001-02-08 15:39:08 +00001915#define MYCONST(c) PyModule_AddIntConstant(m, #c, c)
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001916 MYCONST(XML_PARAM_ENTITY_PARSING_NEVER);
1917 MYCONST(XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE);
1918 MYCONST(XML_PARAM_ENTITY_PARSING_ALWAYS);
Fred Drake85d835f2001-02-08 15:39:08 +00001919#undef MYCONST
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001920
Fred Drake85d835f2001-02-08 15:39:08 +00001921#define MYCONST(c) PyModule_AddIntConstant(model_module, #c, c)
1922 PyModule_AddStringConstant(model_module, "__doc__",
1923 "Constants used to interpret content model information.");
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001924
Fred Drake85d835f2001-02-08 15:39:08 +00001925 MYCONST(XML_CTYPE_EMPTY);
1926 MYCONST(XML_CTYPE_ANY);
1927 MYCONST(XML_CTYPE_MIXED);
1928 MYCONST(XML_CTYPE_NAME);
1929 MYCONST(XML_CTYPE_CHOICE);
1930 MYCONST(XML_CTYPE_SEQ);
1931
1932 MYCONST(XML_CQUANT_NONE);
1933 MYCONST(XML_CQUANT_OPT);
1934 MYCONST(XML_CQUANT_REP);
1935 MYCONST(XML_CQUANT_PLUS);
1936#undef MYCONST
Fredrik Lundhc3345042005-12-13 19:49:55 +00001937
1938 /* initialize pyexpat dispatch table */
Fredrik Lundhd7a42882005-12-13 20:43:04 +00001939 capi.size = sizeof(capi);
Fredrik Lundhcc117db2005-12-13 21:55:36 +00001940 capi.magic = PyExpat_CAPI_MAGIC;
Fredrik Lundhd7a42882005-12-13 20:43:04 +00001941 capi.MAJOR_VERSION = XML_MAJOR_VERSION;
1942 capi.MINOR_VERSION = XML_MINOR_VERSION;
1943 capi.MICRO_VERSION = XML_MICRO_VERSION;
1944 capi.ErrorString = XML_ErrorString;
Fredrik Lundhcc117db2005-12-13 21:55:36 +00001945 capi.GetErrorCode = XML_GetErrorCode;
1946 capi.GetErrorColumnNumber = XML_GetErrorColumnNumber;
1947 capi.GetErrorLineNumber = XML_GetErrorLineNumber;
Fredrik Lundhd7a42882005-12-13 20:43:04 +00001948 capi.Parse = XML_Parse;
1949 capi.ParserCreate_MM = XML_ParserCreate_MM;
1950 capi.ParserFree = XML_ParserFree;
1951 capi.SetCharacterDataHandler = XML_SetCharacterDataHandler;
1952 capi.SetCommentHandler = XML_SetCommentHandler;
1953 capi.SetDefaultHandlerExpand = XML_SetDefaultHandlerExpand;
1954 capi.SetElementHandler = XML_SetElementHandler;
1955 capi.SetNamespaceDeclHandler = XML_SetNamespaceDeclHandler;
1956 capi.SetProcessingInstructionHandler = XML_SetProcessingInstructionHandler;
1957 capi.SetUnknownEncodingHandler = XML_SetUnknownEncodingHandler;
1958 capi.SetUserData = XML_SetUserData;
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03001959 capi.SetStartDoctypeDeclHandler = XML_SetStartDoctypeDeclHandler;
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03001960 capi.SetEncoding = XML_SetEncoding;
Eli Bendersky6dc32b32013-05-25 05:25:48 -07001961 capi.DefaultUnknownEncodingHandler = PyUnknownEncodingHandler;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001962
Benjamin Petersonb173f782009-05-05 22:31:58 +00001963 /* export using capsule */
1964 capi_object = PyCapsule_New(&capi, PyExpat_CAPSULE_NAME, NULL);
Fredrik Lundhd7a42882005-12-13 20:43:04 +00001965 if (capi_object)
1966 PyModule_AddObject(m, "expat_CAPI", capi_object);
Martin v. Löwis1a214512008-06-11 05:26:20 +00001967 return m;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001968}
1969
Fred Drake6f987622000-08-25 18:03:30 +00001970static void
Martin v. Löwis5b68ce32001-10-21 08:53:52 +00001971clear_handlers(xmlparseobject *self, int initial)
Fred Drake0582df92000-07-12 04:49:00 +00001972{
Fred Drakecde79132001-04-25 16:01:30 +00001973 int i = 0;
1974 PyObject *temp;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001975
Fred Drake71b63ff2002-06-28 22:29:01 +00001976 for (; handler_info[i].name != NULL; i++) {
Martin v. Löwis5b68ce32001-10-21 08:53:52 +00001977 if (initial)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001978 self->handlers[i] = NULL;
1979 else {
Fred Drakecde79132001-04-25 16:01:30 +00001980 temp = self->handlers[i];
1981 self->handlers[i] = NULL;
1982 Py_XDECREF(temp);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001983 handler_info[i].setter(self->itself, NULL);
Fred Drakecde79132001-04-25 16:01:30 +00001984 }
Fred Drakecde79132001-04-25 16:01:30 +00001985 }
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001986}
1987
Tim Peters0c322792002-07-17 16:49:03 +00001988static struct HandlerInfo handler_info[] = {
Fred Drake71b63ff2002-06-28 22:29:01 +00001989 {"StartElementHandler",
1990 (xmlhandlersetter)XML_SetStartElementHandler,
Fred Drake0582df92000-07-12 04:49:00 +00001991 (xmlhandler)my_StartElementHandler},
Fred Drake71b63ff2002-06-28 22:29:01 +00001992 {"EndElementHandler",
1993 (xmlhandlersetter)XML_SetEndElementHandler,
Fred Drake0582df92000-07-12 04:49:00 +00001994 (xmlhandler)my_EndElementHandler},
Fred Drake71b63ff2002-06-28 22:29:01 +00001995 {"ProcessingInstructionHandler",
Fred Drake0582df92000-07-12 04:49:00 +00001996 (xmlhandlersetter)XML_SetProcessingInstructionHandler,
1997 (xmlhandler)my_ProcessingInstructionHandler},
Fred Drake71b63ff2002-06-28 22:29:01 +00001998 {"CharacterDataHandler",
Fred Drake0582df92000-07-12 04:49:00 +00001999 (xmlhandlersetter)XML_SetCharacterDataHandler,
2000 (xmlhandler)my_CharacterDataHandler},
Fred Drake71b63ff2002-06-28 22:29:01 +00002001 {"UnparsedEntityDeclHandler",
Fred Drake0582df92000-07-12 04:49:00 +00002002 (xmlhandlersetter)XML_SetUnparsedEntityDeclHandler,
Fred Drake2a3d7db2002-06-28 22:56:48 +00002003 (xmlhandler)my_UnparsedEntityDeclHandler},
Fred Drake71b63ff2002-06-28 22:29:01 +00002004 {"NotationDeclHandler",
Fred Drake0582df92000-07-12 04:49:00 +00002005 (xmlhandlersetter)XML_SetNotationDeclHandler,
Fred Drake2a3d7db2002-06-28 22:56:48 +00002006 (xmlhandler)my_NotationDeclHandler},
Fred Drake71b63ff2002-06-28 22:29:01 +00002007 {"StartNamespaceDeclHandler",
2008 (xmlhandlersetter)XML_SetStartNamespaceDeclHandler,
Fred Drake2a3d7db2002-06-28 22:56:48 +00002009 (xmlhandler)my_StartNamespaceDeclHandler},
Fred Drake71b63ff2002-06-28 22:29:01 +00002010 {"EndNamespaceDeclHandler",
2011 (xmlhandlersetter)XML_SetEndNamespaceDeclHandler,
Fred Drake2a3d7db2002-06-28 22:56:48 +00002012 (xmlhandler)my_EndNamespaceDeclHandler},
Fred Drake0582df92000-07-12 04:49:00 +00002013 {"CommentHandler",
2014 (xmlhandlersetter)XML_SetCommentHandler,
2015 (xmlhandler)my_CommentHandler},
2016 {"StartCdataSectionHandler",
Fred Drake71b63ff2002-06-28 22:29:01 +00002017 (xmlhandlersetter)XML_SetStartCdataSectionHandler,
Fred Drake0582df92000-07-12 04:49:00 +00002018 (xmlhandler)my_StartCdataSectionHandler},
2019 {"EndCdataSectionHandler",
Fred Drake71b63ff2002-06-28 22:29:01 +00002020 (xmlhandlersetter)XML_SetEndCdataSectionHandler,
Fred Drake0582df92000-07-12 04:49:00 +00002021 (xmlhandler)my_EndCdataSectionHandler},
2022 {"DefaultHandler",
2023 (xmlhandlersetter)XML_SetDefaultHandler,
2024 (xmlhandler)my_DefaultHandler},
2025 {"DefaultHandlerExpand",
2026 (xmlhandlersetter)XML_SetDefaultHandlerExpand,
2027 (xmlhandler)my_DefaultHandlerExpandHandler},
2028 {"NotStandaloneHandler",
2029 (xmlhandlersetter)XML_SetNotStandaloneHandler,
2030 (xmlhandler)my_NotStandaloneHandler},
2031 {"ExternalEntityRefHandler",
2032 (xmlhandlersetter)XML_SetExternalEntityRefHandler,
Fred Drake2a3d7db2002-06-28 22:56:48 +00002033 (xmlhandler)my_ExternalEntityRefHandler},
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00002034 {"StartDoctypeDeclHandler",
Fred Drake71b63ff2002-06-28 22:29:01 +00002035 (xmlhandlersetter)XML_SetStartDoctypeDeclHandler,
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00002036 (xmlhandler)my_StartDoctypeDeclHandler},
2037 {"EndDoctypeDeclHandler",
Fred Drake71b63ff2002-06-28 22:29:01 +00002038 (xmlhandlersetter)XML_SetEndDoctypeDeclHandler,
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00002039 (xmlhandler)my_EndDoctypeDeclHandler},
Fred Drake85d835f2001-02-08 15:39:08 +00002040 {"EntityDeclHandler",
2041 (xmlhandlersetter)XML_SetEntityDeclHandler,
2042 (xmlhandler)my_EntityDeclHandler},
2043 {"XmlDeclHandler",
2044 (xmlhandlersetter)XML_SetXmlDeclHandler,
2045 (xmlhandler)my_XmlDeclHandler},
2046 {"ElementDeclHandler",
2047 (xmlhandlersetter)XML_SetElementDeclHandler,
2048 (xmlhandler)my_ElementDeclHandler},
2049 {"AttlistDeclHandler",
2050 (xmlhandlersetter)XML_SetAttlistDeclHandler,
2051 (xmlhandler)my_AttlistDeclHandler},
Martin v. Löwisc847f402003-01-21 11:09:21 +00002052#if XML_COMBINED_VERSION >= 19504
Martin v. Löwis069dde22003-01-21 10:58:18 +00002053 {"SkippedEntityHandler",
2054 (xmlhandlersetter)XML_SetSkippedEntityHandler,
2055 (xmlhandler)my_SkippedEntityHandler},
Martin v. Löwisc847f402003-01-21 11:09:21 +00002056#endif
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00002057
Fred Drake0582df92000-07-12 04:49:00 +00002058 {NULL, NULL, NULL} /* sentinel */
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00002059};