blob: 8c9a07bc6004182995091b7c65bf6ea364498cbc [file] [log] [blame]
Martin v. Löwis7090ed12001-09-19 10:37:50 +00001#include "Python.h"
Fred Drake4113b132001-03-24 19:58:26 +00002#include <ctype.h>
3
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00004#include "frameobject.h"
Fred Drakea77254a2000-09-29 19:23:29 +00005#include "expat.h"
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00006
Fredrik Lundhc3345042005-12-13 19:49:55 +00007#include "pyexpat.h"
8
Martin v. Löwisc847f402003-01-21 11:09:21 +00009#define XML_COMBINED_VERSION (10000*XML_MAJOR_VERSION+100*XML_MINOR_VERSION+XML_MICRO_VERSION)
10
Jeremy Hylton9263f572003-06-27 16:13:17 +000011#define FIX_TRACE
Martin v. Löwis339d0f72001-08-17 18:39:25 +000012
Christian Heimesfa535f52013-07-07 17:35:11 +020013static XML_Memory_Handling_Suite ExpatMemoryHandler = {
14 PyObject_Malloc, PyObject_Realloc, PyObject_Free};
15
Fred Drake0582df92000-07-12 04:49:00 +000016enum HandlerTypes {
17 StartElement,
18 EndElement,
19 ProcessingInstruction,
20 CharacterData,
21 UnparsedEntityDecl,
22 NotationDecl,
23 StartNamespaceDecl,
24 EndNamespaceDecl,
25 Comment,
26 StartCdataSection,
27 EndCdataSection,
28 Default,
29 DefaultHandlerExpand,
30 NotStandalone,
Martin v. Löwis0078f6c2001-01-21 10:18:10 +000031 ExternalEntityRef,
32 StartDoctypeDecl,
33 EndDoctypeDecl,
Fred Drake85d835f2001-02-08 15:39:08 +000034 EntityDecl,
35 XmlDecl,
36 ElementDecl,
37 AttlistDecl,
Martin v. Löwisc847f402003-01-21 11:09:21 +000038#if XML_COMBINED_VERSION >= 19504
Martin v. Löwis069dde22003-01-21 10:58:18 +000039 SkippedEntity,
Martin v. Löwisc847f402003-01-21 11:09:21 +000040#endif
Fred Drake85d835f2001-02-08 15:39:08 +000041 _DummyDecl
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000042};
43
44static PyObject *ErrorObject;
45
46/* ----------------------------------------------------- */
47
48/* Declarations for objects of type xmlparser */
49
50typedef struct {
Fred Drake0582df92000-07-12 04:49:00 +000051 PyObject_HEAD
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000052
Fred Drake0582df92000-07-12 04:49:00 +000053 XML_Parser itself;
Fred Drake85d835f2001-02-08 15:39:08 +000054 int ordered_attributes; /* Return attributes as a list. */
55 int specified_attributes; /* Report only specified attributes. */
Fred Drakebd6101c2001-02-14 18:29:45 +000056 int in_callback; /* Is a callback active? */
Martin v. Löwis069dde22003-01-21 10:58:18 +000057 int ns_prefixes; /* Namespace-triplets mode? */
Fred Drake2a3d7db2002-06-28 22:56:48 +000058 XML_Char *buffer; /* Buffer used when accumulating characters */
59 /* NULL if not enabled */
60 int buffer_size; /* Size of buffer, in XML_Char units */
61 int buffer_used; /* Buffer units in use */
Fred Drakeb91a36b2002-06-27 19:40:48 +000062 PyObject *intern; /* Dictionary to intern strings */
Fred Drake0582df92000-07-12 04:49:00 +000063 PyObject **handlers;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000064} xmlparseobject;
65
Fred Drake2a3d7db2002-06-28 22:56:48 +000066#define CHARACTER_DATA_BUFFER_SIZE 8192
67
Jeremy Hylton938ace62002-07-17 16:30:39 +000068static PyTypeObject Xmlparsetype;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000069
Fred Drake117ac852002-09-24 16:24:54 +000070typedef void (*xmlhandlersetter)(XML_Parser self, void *meth);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000071typedef void* xmlhandler;
72
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +000073struct HandlerInfo {
Fred Drake0582df92000-07-12 04:49:00 +000074 const char *name;
75 xmlhandlersetter setter;
76 xmlhandler handler;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +000077 PyCodeObject *tb_code;
Fred Drake71b63ff2002-06-28 22:29:01 +000078 PyObject *nameobj;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000079};
80
Jeremy Hylton938ace62002-07-17 16:30:39 +000081static struct HandlerInfo handler_info[64];
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000082
Fred Drakebd6101c2001-02-14 18:29:45 +000083/* Set an integer attribute on the error object; return true on success,
84 * false on an exception.
85 */
86static int
87set_error_attr(PyObject *err, char *name, int value)
88{
Christian Heimes217cfd12007-12-02 14:31:20 +000089 PyObject *v = PyLong_FromLong(value);
Fred Drake85d835f2001-02-08 15:39:08 +000090
Neal Norwitz2f5e9902006-03-08 06:36:45 +000091 if (v == NULL || PyObject_SetAttrString(err, name, v) == -1) {
92 Py_XDECREF(v);
Fred Drakebd6101c2001-02-14 18:29:45 +000093 return 0;
94 }
Michael W. Hudson0bb84542004-08-03 11:31:31 +000095 Py_DECREF(v);
Fred Drakebd6101c2001-02-14 18:29:45 +000096 return 1;
97}
98
99/* Build and set an Expat exception, including positioning
100 * information. Always returns NULL.
101 */
Fred Drake85d835f2001-02-08 15:39:08 +0000102static PyObject *
Martin v. Löwis069dde22003-01-21 10:58:18 +0000103set_error(xmlparseobject *self, enum XML_Error code)
Fred Drake85d835f2001-02-08 15:39:08 +0000104{
105 PyObject *err;
Victor Stinner499dfcf2011-03-21 13:26:24 +0100106 PyObject *buffer;
Fred Drake85d835f2001-02-08 15:39:08 +0000107 XML_Parser parser = self->itself;
Fred Drakebd6101c2001-02-14 18:29:45 +0000108 int lineno = XML_GetErrorLineNumber(parser);
109 int column = XML_GetErrorColumnNumber(parser);
Fred Drake85d835f2001-02-08 15:39:08 +0000110
Victor Stinner499dfcf2011-03-21 13:26:24 +0100111 buffer = PyUnicode_FromFormat("%s: line %i, column %i",
112 XML_ErrorString(code), lineno, column);
113 if (buffer == NULL)
114 return NULL;
115 err = PyObject_CallFunction(ErrorObject, "O", buffer);
116 Py_DECREF(buffer);
Fred Drakebd6101c2001-02-14 18:29:45 +0000117 if ( err != NULL
118 && set_error_attr(err, "code", code)
119 && set_error_attr(err, "offset", column)
120 && set_error_attr(err, "lineno", lineno)) {
121 PyErr_SetObject(ErrorObject, err);
Fred Drake85d835f2001-02-08 15:39:08 +0000122 }
Neal Norwitz2f5e9902006-03-08 06:36:45 +0000123 Py_XDECREF(err);
Fred Drake85d835f2001-02-08 15:39:08 +0000124 return NULL;
125}
126
Fred Drake71b63ff2002-06-28 22:29:01 +0000127static int
128have_handler(xmlparseobject *self, int type)
129{
130 PyObject *handler = self->handlers[type];
131 return handler != NULL;
132}
133
134static PyObject *
135get_handler_name(struct HandlerInfo *hinfo)
136{
137 PyObject *name = hinfo->nameobj;
138 if (name == NULL) {
Neal Norwitz392c5be2007-08-25 17:20:32 +0000139 name = PyUnicode_FromString(hinfo->name);
Fred Drake71b63ff2002-06-28 22:29:01 +0000140 hinfo->nameobj = name;
141 }
142 Py_XINCREF(name);
143 return name;
144}
145
Fred Drake85d835f2001-02-08 15:39:08 +0000146
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000147/* Convert a string of XML_Chars into a Unicode string.
148 Returns None if str is a null pointer. */
149
Fred Drake0582df92000-07-12 04:49:00 +0000150static PyObject *
Fred Drakeb91a36b2002-06-27 19:40:48 +0000151conv_string_to_unicode(const XML_Char *str)
Fred Drake0582df92000-07-12 04:49:00 +0000152{
Fred Drake71b63ff2002-06-28 22:29:01 +0000153 /* XXX currently this code assumes that XML_Char is 8-bit,
Fred Drake0582df92000-07-12 04:49:00 +0000154 and hence in UTF-8. */
155 /* UTF-8 from Expat, Unicode desired */
156 if (str == NULL) {
157 Py_INCREF(Py_None);
158 return Py_None;
159 }
Fred Drake71b63ff2002-06-28 22:29:01 +0000160 return PyUnicode_DecodeUTF8(str, strlen(str), "strict");
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000161}
162
Fred Drake0582df92000-07-12 04:49:00 +0000163static PyObject *
164conv_string_len_to_unicode(const XML_Char *str, int len)
165{
Fred Drake71b63ff2002-06-28 22:29:01 +0000166 /* XXX currently this code assumes that XML_Char is 8-bit,
Fred Drake0582df92000-07-12 04:49:00 +0000167 and hence in UTF-8. */
168 /* UTF-8 from Expat, Unicode desired */
169 if (str == NULL) {
170 Py_INCREF(Py_None);
171 return Py_None;
172 }
Fred Drake6f987622000-08-25 18:03:30 +0000173 return PyUnicode_DecodeUTF8((const char *)str, len, "strict");
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000174}
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000175
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000176/* Callback routines */
177
Martin v. Löwis5b68ce32001-10-21 08:53:52 +0000178static void clear_handlers(xmlparseobject *self, int initial);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000179
Martin v. Löwis069dde22003-01-21 10:58:18 +0000180/* This handler is used when an error has been detected, in the hope
181 that actual parsing can be terminated early. This will only help
182 if an external entity reference is encountered. */
183static int
184error_external_entity_ref_handler(XML_Parser parser,
185 const XML_Char *context,
186 const XML_Char *base,
187 const XML_Char *systemId,
188 const XML_Char *publicId)
189{
190 return 0;
191}
192
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000193/* Dummy character data handler used when an error (exception) has
194 been detected, and the actual parsing can be terminated early.
195 This is needed since character data handler can't be safely removed
196 from within the character data handler, but can be replaced. It is
197 used only from the character data handler trampoline, and must be
198 used right after `flag_error()` is called. */
199static void
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000200noop_character_data_handler(void *userData, const XML_Char *data, int len)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000201{
202 /* Do nothing. */
203}
204
Fred Drake6f987622000-08-25 18:03:30 +0000205static void
206flag_error(xmlparseobject *self)
207{
Martin v. Löwis5b68ce32001-10-21 08:53:52 +0000208 clear_handlers(self, 0);
Martin v. Löwis069dde22003-01-21 10:58:18 +0000209 XML_SetExternalEntityRefHandler(self->itself,
210 error_external_entity_ref_handler);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000211}
212
213static PyCodeObject*
214getcode(enum HandlerTypes slot, char* func_name, int lineno)
215{
Fred Drakebd6101c2001-02-14 18:29:45 +0000216 if (handler_info[slot].tb_code == NULL) {
Fred Drakebd6101c2001-02-14 18:29:45 +0000217 handler_info[slot].tb_code =
Alexandre Vassalotti7b82b402009-07-21 04:30:03 +0000218 PyCode_NewEmpty(__FILE__, func_name, lineno);
Fred Drakebd6101c2001-02-14 18:29:45 +0000219 }
220 return handler_info[slot].tb_code;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000221}
222
Jeremy Hylton9263f572003-06-27 16:13:17 +0000223#ifdef FIX_TRACE
Martin v. Löwis7d6e19d2002-08-04 08:24:49 +0000224static int
225trace_frame(PyThreadState *tstate, PyFrameObject *f, int code, PyObject *val)
226{
227 int result = 0;
228 if (!tstate->use_tracing || tstate->tracing)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000229 return 0;
Martin v. Löwis7d6e19d2002-08-04 08:24:49 +0000230 if (tstate->c_profilefunc != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000231 tstate->tracing++;
232 result = tstate->c_profilefunc(tstate->c_profileobj,
233 f, code , val);
234 tstate->use_tracing = ((tstate->c_tracefunc != NULL)
235 || (tstate->c_profilefunc != NULL));
236 tstate->tracing--;
237 if (result)
238 return result;
Martin v. Löwis7d6e19d2002-08-04 08:24:49 +0000239 }
240 if (tstate->c_tracefunc != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000241 tstate->tracing++;
242 result = tstate->c_tracefunc(tstate->c_traceobj,
243 f, code , val);
244 tstate->use_tracing = ((tstate->c_tracefunc != NULL)
245 || (tstate->c_profilefunc != NULL));
246 tstate->tracing--;
247 }
Martin v. Löwis7d6e19d2002-08-04 08:24:49 +0000248 return result;
249}
Jeremy Hylton9263f572003-06-27 16:13:17 +0000250
251static int
252trace_frame_exc(PyThreadState *tstate, PyFrameObject *f)
253{
254 PyObject *type, *value, *traceback, *arg;
255 int err;
256
257 if (tstate->c_tracefunc == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000258 return 0;
Jeremy Hylton9263f572003-06-27 16:13:17 +0000259
260 PyErr_Fetch(&type, &value, &traceback);
261 if (value == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000262 value = Py_None;
263 Py_INCREF(value);
Jeremy Hylton9263f572003-06-27 16:13:17 +0000264 }
Raymond Hettinger8ae46892003-10-12 19:09:37 +0000265 arg = PyTuple_Pack(3, type, value, traceback);
Jeremy Hylton9263f572003-06-27 16:13:17 +0000266 if (arg == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000267 PyErr_Restore(type, value, traceback);
268 return 0;
Jeremy Hylton9263f572003-06-27 16:13:17 +0000269 }
270 err = trace_frame(tstate, f, PyTrace_EXCEPTION, arg);
271 Py_DECREF(arg);
272 if (err == 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000273 PyErr_Restore(type, value, traceback);
Jeremy Hylton9263f572003-06-27 16:13:17 +0000274 else {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000275 Py_XDECREF(type);
276 Py_XDECREF(value);
277 Py_XDECREF(traceback);
Jeremy Hylton9263f572003-06-27 16:13:17 +0000278 }
279 return err;
280}
Martin v. Löwis069dde22003-01-21 10:58:18 +0000281#endif
Martin v. Löwis7d6e19d2002-08-04 08:24:49 +0000282
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000283static PyObject*
Fred Drake39689c52004-08-13 03:12:57 +0000284call_with_frame(PyCodeObject *c, PyObject* func, PyObject* args,
285 xmlparseobject *self)
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000286{
Fred Drakebd6101c2001-02-14 18:29:45 +0000287 PyThreadState *tstate = PyThreadState_GET();
288 PyFrameObject *f;
289 PyObject *res;
290
291 if (c == NULL)
292 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000293
Jeremy Hylton9263f572003-06-27 16:13:17 +0000294 f = PyFrame_New(tstate, c, PyEval_GetGlobals(), NULL);
Fred Drakebd6101c2001-02-14 18:29:45 +0000295 if (f == NULL)
296 return NULL;
297 tstate->frame = f;
Jeremy Hylton9263f572003-06-27 16:13:17 +0000298#ifdef FIX_TRACE
299 if (trace_frame(tstate, f, PyTrace_CALL, Py_None) < 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000300 return NULL;
Martin v. Löwis7d6e19d2002-08-04 08:24:49 +0000301 }
Martin v. Löwis069dde22003-01-21 10:58:18 +0000302#endif
Fred Drakebd6101c2001-02-14 18:29:45 +0000303 res = PyEval_CallObject(func, args);
Jeremy Hylton9263f572003-06-27 16:13:17 +0000304 if (res == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000305 if (tstate->curexc_traceback == NULL)
306 PyTraceBack_Here(f);
Fred Drake39689c52004-08-13 03:12:57 +0000307 XML_StopParser(self->itself, XML_FALSE);
Jeremy Hylton9263f572003-06-27 16:13:17 +0000308#ifdef FIX_TRACE
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000309 if (trace_frame_exc(tstate, f) < 0) {
310 return NULL;
311 }
Jeremy Hylton9263f572003-06-27 16:13:17 +0000312 }
Martin v. Löwis7d6e19d2002-08-04 08:24:49 +0000313 else {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000314 if (trace_frame(tstate, f, PyTrace_RETURN, res) < 0) {
315 Py_XDECREF(res);
316 res = NULL;
317 }
Martin v. Löwis7d6e19d2002-08-04 08:24:49 +0000318 }
Jeremy Hylton9263f572003-06-27 16:13:17 +0000319#else
320 }
Martin v. Löwis069dde22003-01-21 10:58:18 +0000321#endif
Fred Drakebd6101c2001-02-14 18:29:45 +0000322 tstate->frame = f->f_back;
323 Py_DECREF(f);
324 return res;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000325}
326
Fred Drakeb91a36b2002-06-27 19:40:48 +0000327static PyObject*
328string_intern(xmlparseobject *self, const char* str)
329{
Guido van Rossum4ca94712007-07-23 17:42:32 +0000330 PyObject *result = conv_string_to_unicode(str);
Fred Drakeb91a36b2002-06-27 19:40:48 +0000331 PyObject *value;
Neal Norwitz484d9a42005-09-30 04:46:49 +0000332 /* result can be NULL if the unicode conversion failed. */
333 if (!result)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000334 return result;
Fred Drakeb91a36b2002-06-27 19:40:48 +0000335 if (!self->intern)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000336 return result;
Fred Drakeb91a36b2002-06-27 19:40:48 +0000337 value = PyDict_GetItem(self->intern, result);
338 if (!value) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000339 if (PyDict_SetItem(self->intern, result, result) == 0)
Fred Drakeb91a36b2002-06-27 19:40:48 +0000340 return result;
341 else
342 return NULL;
343 }
344 Py_INCREF(value);
345 Py_DECREF(result);
346 return value;
347}
348
Fred Drake2a3d7db2002-06-28 22:56:48 +0000349/* Return 0 on success, -1 on exception.
350 * flag_error() will be called before return if needed.
351 */
352static int
353call_character_handler(xmlparseobject *self, const XML_Char *buffer, int len)
354{
355 PyObject *args;
356 PyObject *temp;
357
Georg Brandlc01537f2010-10-15 16:26:08 +0000358 if (!have_handler(self, CharacterData))
359 return -1;
360
Fred Drake2a3d7db2002-06-28 22:56:48 +0000361 args = PyTuple_New(1);
362 if (args == NULL)
363 return -1;
Guido van Rossum4ca94712007-07-23 17:42:32 +0000364 temp = (conv_string_len_to_unicode(buffer, len));
Fred Drake2a3d7db2002-06-28 22:56:48 +0000365 if (temp == NULL) {
366 Py_DECREF(args);
367 flag_error(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000368 XML_SetCharacterDataHandler(self->itself,
369 noop_character_data_handler);
Fred Drake2a3d7db2002-06-28 22:56:48 +0000370 return -1;
371 }
372 PyTuple_SET_ITEM(args, 0, temp);
373 /* temp is now a borrowed reference; consider it unused. */
374 self->in_callback = 1;
375 temp = call_with_frame(getcode(CharacterData, "CharacterData", __LINE__),
Fred Drake39689c52004-08-13 03:12:57 +0000376 self->handlers[CharacterData], args, self);
Fred Drake2a3d7db2002-06-28 22:56:48 +0000377 /* temp is an owned reference again, or NULL */
378 self->in_callback = 0;
379 Py_DECREF(args);
380 if (temp == NULL) {
381 flag_error(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000382 XML_SetCharacterDataHandler(self->itself,
383 noop_character_data_handler);
Fred Drake2a3d7db2002-06-28 22:56:48 +0000384 return -1;
385 }
386 Py_DECREF(temp);
387 return 0;
388}
389
390static int
391flush_character_buffer(xmlparseobject *self)
392{
393 int rc;
394 if (self->buffer == NULL || self->buffer_used == 0)
395 return 0;
396 rc = call_character_handler(self, self->buffer, self->buffer_used);
397 self->buffer_used = 0;
398 return rc;
399}
400
401static void
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000402my_CharacterDataHandler(void *userData, const XML_Char *data, int len)
Fred Drake2a3d7db2002-06-28 22:56:48 +0000403{
404 xmlparseobject *self = (xmlparseobject *) userData;
405 if (self->buffer == NULL)
406 call_character_handler(self, data, len);
407 else {
408 if ((self->buffer_used + len) > self->buffer_size) {
409 if (flush_character_buffer(self) < 0)
410 return;
411 /* handler might have changed; drop the rest on the floor
412 * if there isn't a handler anymore
413 */
414 if (!have_handler(self, CharacterData))
415 return;
416 }
417 if (len > self->buffer_size) {
418 call_character_handler(self, data, len);
419 self->buffer_used = 0;
420 }
421 else {
422 memcpy(self->buffer + self->buffer_used,
423 data, len * sizeof(XML_Char));
424 self->buffer_used += len;
425 }
426 }
427}
428
Fred Drake85d835f2001-02-08 15:39:08 +0000429static void
430my_StartElementHandler(void *userData,
Fred Drake71b63ff2002-06-28 22:29:01 +0000431 const XML_Char *name, const XML_Char *atts[])
Fred Drake85d835f2001-02-08 15:39:08 +0000432{
433 xmlparseobject *self = (xmlparseobject *)userData;
434
Fred Drake71b63ff2002-06-28 22:29:01 +0000435 if (have_handler(self, StartElement)) {
Fred Drake85d835f2001-02-08 15:39:08 +0000436 PyObject *container, *rv, *args;
437 int i, max;
438
Fred Drake2a3d7db2002-06-28 22:56:48 +0000439 if (flush_character_buffer(self) < 0)
440 return;
Fred Drake85d835f2001-02-08 15:39:08 +0000441 /* Set max to the number of slots filled in atts[]; max/2 is
442 * the number of attributes we need to process.
443 */
444 if (self->specified_attributes) {
445 max = XML_GetSpecifiedAttributeCount(self->itself);
446 }
447 else {
448 max = 0;
449 while (atts[max] != NULL)
450 max += 2;
451 }
452 /* Build the container. */
453 if (self->ordered_attributes)
454 container = PyList_New(max);
455 else
456 container = PyDict_New();
457 if (container == NULL) {
458 flag_error(self);
459 return;
460 }
461 for (i = 0; i < max; i += 2) {
Fred Drakeb91a36b2002-06-27 19:40:48 +0000462 PyObject *n = string_intern(self, (XML_Char *) atts[i]);
Fred Drake85d835f2001-02-08 15:39:08 +0000463 PyObject *v;
464 if (n == NULL) {
465 flag_error(self);
466 Py_DECREF(container);
467 return;
468 }
Guido van Rossum4ca94712007-07-23 17:42:32 +0000469 v = conv_string_to_unicode((XML_Char *) atts[i+1]);
Fred Drake85d835f2001-02-08 15:39:08 +0000470 if (v == NULL) {
471 flag_error(self);
472 Py_DECREF(container);
473 Py_DECREF(n);
474 return;
475 }
476 if (self->ordered_attributes) {
477 PyList_SET_ITEM(container, i, n);
478 PyList_SET_ITEM(container, i+1, v);
479 }
480 else if (PyDict_SetItem(container, n, v)) {
481 flag_error(self);
482 Py_DECREF(n);
483 Py_DECREF(v);
484 return;
485 }
486 else {
487 Py_DECREF(n);
488 Py_DECREF(v);
489 }
490 }
Neal Norwitz484d9a42005-09-30 04:46:49 +0000491 args = string_intern(self, name);
492 if (args != NULL)
493 args = Py_BuildValue("(NN)", args, container);
Fred Drake85d835f2001-02-08 15:39:08 +0000494 if (args == NULL) {
495 Py_DECREF(container);
496 return;
497 }
498 /* Container is now a borrowed reference; ignore it. */
Fred Drakebd6101c2001-02-14 18:29:45 +0000499 self->in_callback = 1;
500 rv = call_with_frame(getcode(StartElement, "StartElement", __LINE__),
Fred Drake39689c52004-08-13 03:12:57 +0000501 self->handlers[StartElement], args, self);
Fred Drakebd6101c2001-02-14 18:29:45 +0000502 self->in_callback = 0;
503 Py_DECREF(args);
Fred Drake85d835f2001-02-08 15:39:08 +0000504 if (rv == NULL) {
505 flag_error(self);
506 return;
Fred Drakebd6101c2001-02-14 18:29:45 +0000507 }
Fred Drake85d835f2001-02-08 15:39:08 +0000508 Py_DECREF(rv);
509 }
510}
511
512#define RC_HANDLER(RC, NAME, PARAMS, INIT, PARAM_FORMAT, CONVERSION, \
513 RETURN, GETUSERDATA) \
514static RC \
515my_##NAME##Handler PARAMS {\
516 xmlparseobject *self = GETUSERDATA ; \
517 PyObject *args = NULL; \
518 PyObject *rv = NULL; \
519 INIT \
520\
Fred Drake71b63ff2002-06-28 22:29:01 +0000521 if (have_handler(self, NAME)) { \
Fred Drake2a3d7db2002-06-28 22:56:48 +0000522 if (flush_character_buffer(self) < 0) \
523 return RETURN; \
Fred Drake85d835f2001-02-08 15:39:08 +0000524 args = Py_BuildValue PARAM_FORMAT ;\
Martin v. Löwis1d7c55f2001-11-10 13:57:55 +0000525 if (!args) { flag_error(self); return RETURN;} \
Fred Drakebd6101c2001-02-14 18:29:45 +0000526 self->in_callback = 1; \
Fred Drake85d835f2001-02-08 15:39:08 +0000527 rv = call_with_frame(getcode(NAME,#NAME,__LINE__), \
Fred Drake39689c52004-08-13 03:12:57 +0000528 self->handlers[NAME], args, self); \
Fred Drakebd6101c2001-02-14 18:29:45 +0000529 self->in_callback = 0; \
Fred Drake85d835f2001-02-08 15:39:08 +0000530 Py_DECREF(args); \
531 if (rv == NULL) { \
532 flag_error(self); \
533 return RETURN; \
534 } \
535 CONVERSION \
536 Py_DECREF(rv); \
537 } \
538 return RETURN; \
539}
540
Fred Drake6f987622000-08-25 18:03:30 +0000541#define VOID_HANDLER(NAME, PARAMS, PARAM_FORMAT) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000542 RC_HANDLER(void, NAME, PARAMS, ;, PARAM_FORMAT, ;, ;,\
543 (xmlparseobject *)userData)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000544
Fred Drake6f987622000-08-25 18:03:30 +0000545#define INT_HANDLER(NAME, PARAMS, PARAM_FORMAT)\
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000546 RC_HANDLER(int, NAME, PARAMS, int rc=0;, PARAM_FORMAT, \
547 rc = PyLong_AsLong(rv);, rc, \
548 (xmlparseobject *)userData)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000549
Fred Drake71b63ff2002-06-28 22:29:01 +0000550VOID_HANDLER(EndElement,
551 (void *userData, const XML_Char *name),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000552 ("(N)", string_intern(self, name)))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000553
Fred Drake6f987622000-08-25 18:03:30 +0000554VOID_HANDLER(ProcessingInstruction,
Fred Drake71b63ff2002-06-28 22:29:01 +0000555 (void *userData,
556 const XML_Char *target,
Fred Drake85d835f2001-02-08 15:39:08 +0000557 const XML_Char *data),
Guido van Rossum4ca94712007-07-23 17:42:32 +0000558 ("(NO&)", string_intern(self, target), conv_string_to_unicode ,data))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000559
Fred Drake6f987622000-08-25 18:03:30 +0000560VOID_HANDLER(UnparsedEntityDecl,
Fred Drake71b63ff2002-06-28 22:29:01 +0000561 (void *userData,
Fred Drake85d835f2001-02-08 15:39:08 +0000562 const XML_Char *entityName,
563 const XML_Char *base,
564 const XML_Char *systemId,
565 const XML_Char *publicId,
566 const XML_Char *notationName),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000567 ("(NNNNN)",
Fred Drake71b63ff2002-06-28 22:29:01 +0000568 string_intern(self, entityName), string_intern(self, base),
569 string_intern(self, systemId), string_intern(self, publicId),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000570 string_intern(self, notationName)))
Fred Drake85d835f2001-02-08 15:39:08 +0000571
Fred Drake85d835f2001-02-08 15:39:08 +0000572VOID_HANDLER(EntityDecl,
573 (void *userData,
574 const XML_Char *entityName,
575 int is_parameter_entity,
576 const XML_Char *value,
577 int value_length,
578 const XML_Char *base,
579 const XML_Char *systemId,
580 const XML_Char *publicId,
581 const XML_Char *notationName),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000582 ("NiNNNNN",
583 string_intern(self, entityName), is_parameter_entity,
Guido van Rossum4ca94712007-07-23 17:42:32 +0000584 (conv_string_len_to_unicode(value, value_length)),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000585 string_intern(self, base), string_intern(self, systemId),
586 string_intern(self, publicId),
587 string_intern(self, notationName)))
Fred Drake85d835f2001-02-08 15:39:08 +0000588
589VOID_HANDLER(XmlDecl,
590 (void *userData,
591 const XML_Char *version,
592 const XML_Char *encoding,
593 int standalone),
594 ("(O&O&i)",
Guido van Rossum4ca94712007-07-23 17:42:32 +0000595 conv_string_to_unicode ,version, conv_string_to_unicode ,encoding,
Fred Drake85d835f2001-02-08 15:39:08 +0000596 standalone))
597
598static PyObject *
599conv_content_model(XML_Content * const model,
Fred Drakeb91a36b2002-06-27 19:40:48 +0000600 PyObject *(*conv_string)(const XML_Char *))
Fred Drake85d835f2001-02-08 15:39:08 +0000601{
602 PyObject *result = NULL;
603 PyObject *children = PyTuple_New(model->numchildren);
604 int i;
605
606 if (children != NULL) {
Tim Peters9544fc52001-07-28 09:36:36 +0000607 assert(model->numchildren < INT_MAX);
608 for (i = 0; i < (int)model->numchildren; ++i) {
Fred Drake85d835f2001-02-08 15:39:08 +0000609 PyObject *child = conv_content_model(&model->children[i],
610 conv_string);
611 if (child == NULL) {
612 Py_XDECREF(children);
613 return NULL;
614 }
615 PyTuple_SET_ITEM(children, i, child);
616 }
617 result = Py_BuildValue("(iiO&N)",
618 model->type, model->quant,
619 conv_string,model->name, children);
620 }
621 return result;
622}
623
Fred Drake06dd8cf2003-02-02 03:54:17 +0000624static void
625my_ElementDeclHandler(void *userData,
626 const XML_Char *name,
627 XML_Content *model)
Fred Drake85d835f2001-02-08 15:39:08 +0000628{
Fred Drake06dd8cf2003-02-02 03:54:17 +0000629 xmlparseobject *self = (xmlparseobject *)userData;
630 PyObject *args = NULL;
Fred Drake85d835f2001-02-08 15:39:08 +0000631
Fred Drake06dd8cf2003-02-02 03:54:17 +0000632 if (have_handler(self, ElementDecl)) {
633 PyObject *rv = NULL;
634 PyObject *modelobj, *nameobj;
635
636 if (flush_character_buffer(self) < 0)
637 goto finally;
Guido van Rossum4ca94712007-07-23 17:42:32 +0000638 modelobj = conv_content_model(model, (conv_string_to_unicode));
Fred Drake06dd8cf2003-02-02 03:54:17 +0000639 if (modelobj == NULL) {
640 flag_error(self);
641 goto finally;
642 }
643 nameobj = string_intern(self, name);
644 if (nameobj == NULL) {
645 Py_DECREF(modelobj);
646 flag_error(self);
647 goto finally;
648 }
Michael W. Hudson0bb84542004-08-03 11:31:31 +0000649 args = Py_BuildValue("NN", nameobj, modelobj);
Fred Drake06dd8cf2003-02-02 03:54:17 +0000650 if (args == NULL) {
651 Py_DECREF(modelobj);
652 flag_error(self);
653 goto finally;
654 }
655 self->in_callback = 1;
656 rv = call_with_frame(getcode(ElementDecl, "ElementDecl", __LINE__),
Fred Drake39689c52004-08-13 03:12:57 +0000657 self->handlers[ElementDecl], args, self);
Fred Drake06dd8cf2003-02-02 03:54:17 +0000658 self->in_callback = 0;
659 if (rv == NULL) {
660 flag_error(self);
661 goto finally;
662 }
663 Py_DECREF(rv);
664 }
665 finally:
666 Py_XDECREF(args);
667 XML_FreeContentModel(self->itself, model);
668 return;
669}
Fred Drake85d835f2001-02-08 15:39:08 +0000670
671VOID_HANDLER(AttlistDecl,
672 (void *userData,
673 const XML_Char *elname,
674 const XML_Char *attname,
675 const XML_Char *att_type,
676 const XML_Char *dflt,
677 int isrequired),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000678 ("(NNO&O&i)",
679 string_intern(self, elname), string_intern(self, attname),
Guido van Rossum4ca94712007-07-23 17:42:32 +0000680 conv_string_to_unicode ,att_type, conv_string_to_unicode ,dflt,
Fred Drake85d835f2001-02-08 15:39:08 +0000681 isrequired))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000682
Martin v. Löwisc847f402003-01-21 11:09:21 +0000683#if XML_COMBINED_VERSION >= 19504
Martin v. Löwis069dde22003-01-21 10:58:18 +0000684VOID_HANDLER(SkippedEntity,
685 (void *userData,
686 const XML_Char *entityName,
687 int is_parameter_entity),
688 ("Ni",
689 string_intern(self, entityName), is_parameter_entity))
Martin v. Löwisc847f402003-01-21 11:09:21 +0000690#endif
Martin v. Löwis069dde22003-01-21 10:58:18 +0000691
Fred Drake71b63ff2002-06-28 22:29:01 +0000692VOID_HANDLER(NotationDecl,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000693 (void *userData,
694 const XML_Char *notationName,
695 const XML_Char *base,
696 const XML_Char *systemId,
697 const XML_Char *publicId),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000698 ("(NNNN)",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000699 string_intern(self, notationName), string_intern(self, base),
700 string_intern(self, systemId), string_intern(self, publicId)))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000701
Fred Drake6f987622000-08-25 18:03:30 +0000702VOID_HANDLER(StartNamespaceDecl,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000703 (void *userData,
704 const XML_Char *prefix,
705 const XML_Char *uri),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000706 ("(NN)",
707 string_intern(self, prefix), string_intern(self, uri)))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000708
Fred Drake6f987622000-08-25 18:03:30 +0000709VOID_HANDLER(EndNamespaceDecl,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000710 (void *userData,
711 const XML_Char *prefix),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000712 ("(N)", string_intern(self, prefix)))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000713
Fred Drake6f987622000-08-25 18:03:30 +0000714VOID_HANDLER(Comment,
Fred Drakeb91a36b2002-06-27 19:40:48 +0000715 (void *userData, const XML_Char *data),
Guido van Rossum4ca94712007-07-23 17:42:32 +0000716 ("(O&)", conv_string_to_unicode ,data))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000717
Fred Drake6f987622000-08-25 18:03:30 +0000718VOID_HANDLER(StartCdataSection,
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000719 (void *userData),
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000720 ("()"))
Fred Drake71b63ff2002-06-28 22:29:01 +0000721
Fred Drake6f987622000-08-25 18:03:30 +0000722VOID_HANDLER(EndCdataSection,
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000723 (void *userData),
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000724 ("()"))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000725
Fred Drake6f987622000-08-25 18:03:30 +0000726VOID_HANDLER(Default,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000727 (void *userData, const XML_Char *s, int len),
728 ("(N)", (conv_string_len_to_unicode(s,len))))
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000729
Fred Drake6f987622000-08-25 18:03:30 +0000730VOID_HANDLER(DefaultHandlerExpand,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000731 (void *userData, const XML_Char *s, int len),
732 ("(N)", (conv_string_len_to_unicode(s,len))))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000733
Fred Drake71b63ff2002-06-28 22:29:01 +0000734INT_HANDLER(NotStandalone,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000735 (void *userData),
736 ("()"))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000737
Fred Drake6f987622000-08-25 18:03:30 +0000738RC_HANDLER(int, ExternalEntityRef,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000739 (XML_Parser parser,
740 const XML_Char *context,
741 const XML_Char *base,
742 const XML_Char *systemId,
743 const XML_Char *publicId),
744 int rc=0;,
Fred Drakeb91a36b2002-06-27 19:40:48 +0000745 ("(O&NNN)",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000746 conv_string_to_unicode ,context, string_intern(self, base),
747 string_intern(self, systemId), string_intern(self, publicId)),
748 rc = PyLong_AsLong(rv);, rc,
749 XML_GetUserData(parser))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000750
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000751/* XXX UnknownEncodingHandler */
752
Fred Drake85d835f2001-02-08 15:39:08 +0000753VOID_HANDLER(StartDoctypeDecl,
754 (void *userData, const XML_Char *doctypeName,
755 const XML_Char *sysid, const XML_Char *pubid,
756 int has_internal_subset),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000757 ("(NNNi)", string_intern(self, doctypeName),
758 string_intern(self, sysid), string_intern(self, pubid),
Fred Drake85d835f2001-02-08 15:39:08 +0000759 has_internal_subset))
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000760
761VOID_HANDLER(EndDoctypeDecl, (void *userData), ("()"))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000762
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000763/* ---------------------------------------------------------------- */
764
Fred Drake71b63ff2002-06-28 22:29:01 +0000765static PyObject *
766get_parse_result(xmlparseobject *self, int rv)
767{
768 if (PyErr_Occurred()) {
769 return NULL;
770 }
771 if (rv == 0) {
Martin v. Löwis069dde22003-01-21 10:58:18 +0000772 return set_error(self, XML_GetErrorCode(self->itself));
Fred Drake71b63ff2002-06-28 22:29:01 +0000773 }
Fred Drake2a3d7db2002-06-28 22:56:48 +0000774 if (flush_character_buffer(self) < 0) {
775 return NULL;
776 }
Christian Heimes217cfd12007-12-02 14:31:20 +0000777 return PyLong_FromLong(rv);
Fred Drake71b63ff2002-06-28 22:29:01 +0000778}
779
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000780PyDoc_STRVAR(xmlparse_Parse__doc__,
Thomas Wouters35317302000-07-22 16:34:15 +0000781"Parse(data[, isfinal])\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000782Parse XML data. `isfinal' should be true at end of input.");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000783
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200784#define MAX_CHUNK_SIZE (1 << 20)
785
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000786static PyObject *
Fred Drake0582df92000-07-12 04:49:00 +0000787xmlparse_Parse(xmlparseobject *self, PyObject *args)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000788{
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200789 PyObject *data;
Fred Drake0582df92000-07-12 04:49:00 +0000790 int isFinal = 0;
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200791 const char *s;
792 Py_ssize_t slen;
793 Py_buffer view;
794 int rc;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000795
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200796 if (!PyArg_ParseTuple(args, "O|i:Parse", &data, &isFinal))
Fred Drake0582df92000-07-12 04:49:00 +0000797 return NULL;
Fred Drake71b63ff2002-06-28 22:29:01 +0000798
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200799 if (PyUnicode_Check(data)) {
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200800 view.buf = NULL;
Serhiy Storchaka36b365c2013-02-04 18:28:01 +0200801 s = PyUnicode_AsUTF8AndSize(data, &slen);
802 if (s == NULL)
803 return NULL;
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200804 /* Explicitly set UTF-8 encoding. Return code ignored. */
805 (void)XML_SetEncoding(self->itself, "utf-8");
806 }
807 else {
808 if (PyObject_GetBuffer(data, &view, PyBUF_SIMPLE) < 0)
809 return NULL;
810 s = view.buf;
811 slen = view.len;
812 }
813
814 while (slen > MAX_CHUNK_SIZE) {
815 rc = XML_Parse(self->itself, s, MAX_CHUNK_SIZE, 0);
816 if (!rc)
817 goto done;
818 s += MAX_CHUNK_SIZE;
819 slen -= MAX_CHUNK_SIZE;
820 }
821 rc = XML_Parse(self->itself, s, slen, isFinal);
822
823done:
824 if (view.buf != NULL)
825 PyBuffer_Release(&view);
826 return get_parse_result(self, rc);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000827}
828
Fred Drakeca1f4262000-09-21 20:10:23 +0000829/* File reading copied from cPickle */
830
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000831#define BUF_SIZE 2048
832
Fred Drake0582df92000-07-12 04:49:00 +0000833static int
834readinst(char *buf, int buf_size, PyObject *meth)
835{
Victor Stinner95f1dfc2011-01-10 23:00:36 +0000836 PyObject *str;
837 Py_ssize_t len;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000838 char *ptr;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000839
Victor Stinner95f1dfc2011-01-10 23:00:36 +0000840 str = PyObject_CallFunction(meth, "n", buf_size);
Martin v. Löwis9171f022004-10-13 19:50:11 +0000841 if (str == NULL)
Victor Stinner95f1dfc2011-01-10 23:00:36 +0000842 goto error;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000843
Christian Heimes72b710a2008-05-26 13:28:38 +0000844 if (PyBytes_Check(str))
845 ptr = PyBytes_AS_STRING(str);
Christian Heimes9c4756e2008-05-26 13:22:05 +0000846 else if (PyByteArray_Check(str))
847 ptr = PyByteArray_AS_STRING(str);
Guido van Rossum98297ee2007-11-06 21:34:58 +0000848 else {
Fred Drake71b63ff2002-06-28 22:29:01 +0000849 PyErr_Format(PyExc_TypeError,
Guido van Rossum4ca94712007-07-23 17:42:32 +0000850 "read() did not return a bytes object (type=%.400s)",
Christian Heimes90aa7642007-12-19 02:45:37 +0000851 Py_TYPE(str)->tp_name);
Victor Stinner95f1dfc2011-01-10 23:00:36 +0000852 goto error;
Fred Drake0582df92000-07-12 04:49:00 +0000853 }
Christian Heimes90aa7642007-12-19 02:45:37 +0000854 len = Py_SIZE(str);
Fred Drake0582df92000-07-12 04:49:00 +0000855 if (len > buf_size) {
856 PyErr_Format(PyExc_ValueError,
857 "read() returned too much data: "
Victor Stinner9d6f9362011-01-04 22:00:04 +0000858 "%i bytes requested, %zd returned",
Fred Drake0582df92000-07-12 04:49:00 +0000859 buf_size, len);
Victor Stinner95f1dfc2011-01-10 23:00:36 +0000860 goto error;
Fred Drake0582df92000-07-12 04:49:00 +0000861 }
Guido van Rossum98297ee2007-11-06 21:34:58 +0000862 memcpy(buf, ptr, len);
Victor Stinner95f1dfc2011-01-10 23:00:36 +0000863 Py_DECREF(str);
864 /* len <= buf_size <= INT_MAX */
Victor Stinner0fcab4a2011-01-04 12:59:15 +0000865 return (int)len;
Victor Stinner95f1dfc2011-01-10 23:00:36 +0000866
867error:
868 Py_XDECREF(str);
869 return -1;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000870}
871
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000872PyDoc_STRVAR(xmlparse_ParseFile__doc__,
Thomas Wouters35317302000-07-22 16:34:15 +0000873"ParseFile(file)\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000874Parse XML data from file-like object.");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000875
876static PyObject *
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000877xmlparse_ParseFile(xmlparseobject *self, PyObject *f)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000878{
Fred Drake0582df92000-07-12 04:49:00 +0000879 int rv = 1;
Fred Drake0582df92000-07-12 04:49:00 +0000880 PyObject *readmethod = NULL;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +0200881 _Py_IDENTIFIER(read);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000882
Martin v. Löwis1ee1b6f2011-10-10 18:11:30 +0200883 readmethod = _PyObject_GetAttrId(f, &PyId_read);
Benjamin Peterson4e7f2852010-08-08 16:54:58 +0000884 if (readmethod == NULL) {
Benjamin Peterson4e7f2852010-08-08 16:54:58 +0000885 PyErr_SetString(PyExc_TypeError,
886 "argument must have 'read' attribute");
887 return NULL;
Fred Drake0582df92000-07-12 04:49:00 +0000888 }
889 for (;;) {
890 int bytes_read;
891 void *buf = XML_GetBuffer(self->itself, BUF_SIZE);
Fred Drake7b6caff2003-07-21 17:05:56 +0000892 if (buf == NULL) {
Fred Drakef239c6d2003-07-21 17:22:43 +0000893 Py_XDECREF(readmethod);
Fred Drake0582df92000-07-12 04:49:00 +0000894 return PyErr_NoMemory();
Fred Drake7b6caff2003-07-21 17:05:56 +0000895 }
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000896
Benjamin Peterson4e7f2852010-08-08 16:54:58 +0000897 bytes_read = readinst(buf, BUF_SIZE, readmethod);
898 if (bytes_read < 0) {
899 Py_DECREF(readmethod);
900 return NULL;
Fred Drake0582df92000-07-12 04:49:00 +0000901 }
902 rv = XML_ParseBuffer(self->itself, bytes_read, bytes_read == 0);
Fred Drake7b6caff2003-07-21 17:05:56 +0000903 if (PyErr_Occurred()) {
904 Py_XDECREF(readmethod);
Fred Drake0582df92000-07-12 04:49:00 +0000905 return NULL;
Fred Drake7b6caff2003-07-21 17:05:56 +0000906 }
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000907
Fred Drake0582df92000-07-12 04:49:00 +0000908 if (!rv || bytes_read == 0)
909 break;
910 }
Fred Drake7b6caff2003-07-21 17:05:56 +0000911 Py_XDECREF(readmethod);
Fred Drake71b63ff2002-06-28 22:29:01 +0000912 return get_parse_result(self, rv);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000913}
914
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000915PyDoc_STRVAR(xmlparse_SetBase__doc__,
Thomas Wouters35317302000-07-22 16:34:15 +0000916"SetBase(base_url)\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000917Set the base URL for the parser.");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000918
919static PyObject *
Fred Drake0582df92000-07-12 04:49:00 +0000920xmlparse_SetBase(xmlparseobject *self, PyObject *args)
921{
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000922 char *base;
923
Fred Drake0582df92000-07-12 04:49:00 +0000924 if (!PyArg_ParseTuple(args, "s:SetBase", &base))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000925 return NULL;
Fred Drake0582df92000-07-12 04:49:00 +0000926 if (!XML_SetBase(self->itself, base)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000927 return PyErr_NoMemory();
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000928 }
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000929 Py_INCREF(Py_None);
930 return Py_None;
931}
932
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000933PyDoc_STRVAR(xmlparse_GetBase__doc__,
Thomas Wouters35317302000-07-22 16:34:15 +0000934"GetBase() -> url\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000935Return base URL string for the parser.");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000936
937static PyObject *
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000938xmlparse_GetBase(xmlparseobject *self, PyObject *unused)
Fred Drake0582df92000-07-12 04:49:00 +0000939{
Fred Drake0582df92000-07-12 04:49:00 +0000940 return Py_BuildValue("z", XML_GetBase(self->itself));
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000941}
942
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000943PyDoc_STRVAR(xmlparse_GetInputContext__doc__,
Fred Drakebd6101c2001-02-14 18:29:45 +0000944"GetInputContext() -> string\n\
945Return the untranslated text of the input that caused the current event.\n\
946If the event was generated by a large amount of text (such as a start tag\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000947for an element with many attributes), not all of the text may be available.");
Fred Drakebd6101c2001-02-14 18:29:45 +0000948
949static PyObject *
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000950xmlparse_GetInputContext(xmlparseobject *self, PyObject *unused)
Fred Drakebd6101c2001-02-14 18:29:45 +0000951{
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000952 if (self->in_callback) {
953 int offset, size;
954 const char *buffer
955 = XML_GetInputContext(self->itself, &offset, &size);
Fred Drakebd6101c2001-02-14 18:29:45 +0000956
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000957 if (buffer != NULL)
Christian Heimes72b710a2008-05-26 13:28:38 +0000958 return PyBytes_FromStringAndSize(buffer + offset,
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000959 size - offset);
960 else
961 Py_RETURN_NONE;
Fred Drakebd6101c2001-02-14 18:29:45 +0000962 }
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000963 else
964 Py_RETURN_NONE;
Fred Drakebd6101c2001-02-14 18:29:45 +0000965}
Fred Drakebd6101c2001-02-14 18:29:45 +0000966
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000967PyDoc_STRVAR(xmlparse_ExternalEntityParserCreate__doc__,
Fred Drake2d4ac202001-01-03 15:36:25 +0000968"ExternalEntityParserCreate(context[, encoding])\n\
Tim Peters51dc9682000-09-24 22:12:45 +0000969Create a parser for parsing an external entity based on the\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000970information passed to the ExternalEntityRefHandler.");
Lars Gustäbel4a30a072000-09-24 20:50:52 +0000971
972static PyObject *
973xmlparse_ExternalEntityParserCreate(xmlparseobject *self, PyObject *args)
974{
975 char *context;
976 char *encoding = NULL;
977 xmlparseobject *new_parser;
978 int i;
979
Martin v. Löwisc57428d2001-09-19 09:55:09 +0000980 if (!PyArg_ParseTuple(args, "z|s:ExternalEntityParserCreate",
Fred Drakecde79132001-04-25 16:01:30 +0000981 &context, &encoding)) {
982 return NULL;
Lars Gustäbel4a30a072000-09-24 20:50:52 +0000983 }
984
Martin v. Löwis894258c2001-09-23 10:20:10 +0000985 new_parser = PyObject_GC_New(xmlparseobject, &Xmlparsetype);
Fred Drake85d835f2001-02-08 15:39:08 +0000986 if (new_parser == NULL)
987 return NULL;
Fred Drake2a3d7db2002-06-28 22:56:48 +0000988 new_parser->buffer_size = self->buffer_size;
989 new_parser->buffer_used = 0;
Victor Stinnerb4ba9862010-09-10 22:25:19 +0000990 new_parser->buffer = NULL;
Fred Drake85d835f2001-02-08 15:39:08 +0000991 new_parser->ordered_attributes = self->ordered_attributes;
992 new_parser->specified_attributes = self->specified_attributes;
Fred Drakebd6101c2001-02-14 18:29:45 +0000993 new_parser->in_callback = 0;
Martin v. Löwis069dde22003-01-21 10:58:18 +0000994 new_parser->ns_prefixes = self->ns_prefixes;
Lars Gustäbel4a30a072000-09-24 20:50:52 +0000995 new_parser->itself = XML_ExternalEntityParserCreate(self->itself, context,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000996 encoding);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000997 new_parser->handlers = 0;
Fred Drakeb91a36b2002-06-27 19:40:48 +0000998 new_parser->intern = self->intern;
999 Py_XINCREF(new_parser->intern);
Martin v. Löwis894258c2001-09-23 10:20:10 +00001000 PyObject_GC_Track(new_parser);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001001
Victor Stinnerb4ba9862010-09-10 22:25:19 +00001002 if (self->buffer != NULL) {
Victor Stinnerb6404912013-07-07 16:21:41 +02001003 new_parser->buffer = PyMem_Malloc(new_parser->buffer_size);
Victor Stinnerb4ba9862010-09-10 22:25:19 +00001004 if (new_parser->buffer == NULL) {
1005 Py_DECREF(new_parser);
1006 return PyErr_NoMemory();
1007 }
1008 }
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001009 if (!new_parser->itself) {
Fred Drake85d835f2001-02-08 15:39:08 +00001010 Py_DECREF(new_parser);
1011 return PyErr_NoMemory();
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001012 }
1013
1014 XML_SetUserData(new_parser->itself, (void *)new_parser);
1015
1016 /* allocate and clear handlers first */
Fred Drake2a3d7db2002-06-28 22:56:48 +00001017 for (i = 0; handler_info[i].name != NULL; i++)
Fred Drake85d835f2001-02-08 15:39:08 +00001018 /* do nothing */;
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001019
Victor Stinnerb6404912013-07-07 16:21:41 +02001020 new_parser->handlers = PyMem_Malloc(sizeof(PyObject *) * i);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001021 if (!new_parser->handlers) {
Fred Drake85d835f2001-02-08 15:39:08 +00001022 Py_DECREF(new_parser);
1023 return PyErr_NoMemory();
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001024 }
Martin v. Löwis5b68ce32001-10-21 08:53:52 +00001025 clear_handlers(new_parser, 1);
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001026
1027 /* then copy handlers from self */
1028 for (i = 0; handler_info[i].name != NULL; i++) {
Fred Drake71b63ff2002-06-28 22:29:01 +00001029 PyObject *handler = self->handlers[i];
1030 if (handler != NULL) {
1031 Py_INCREF(handler);
1032 new_parser->handlers[i] = handler;
1033 handler_info[i].setter(new_parser->itself,
Fred Drake85d835f2001-02-08 15:39:08 +00001034 handler_info[i].handler);
1035 }
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001036 }
Fred Drake71b63ff2002-06-28 22:29:01 +00001037 return (PyObject *)new_parser;
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001038}
1039
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001040PyDoc_STRVAR(xmlparse_SetParamEntityParsing__doc__,
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001041"SetParamEntityParsing(flag) -> success\n\
1042Controls parsing of parameter entities (including the external DTD\n\
1043subset). Possible flag values are XML_PARAM_ENTITY_PARSING_NEVER,\n\
1044XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE and\n\
1045XML_PARAM_ENTITY_PARSING_ALWAYS. Returns true if setting the flag\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001046was successful.");
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001047
1048static PyObject*
Fred Drakebd6101c2001-02-14 18:29:45 +00001049xmlparse_SetParamEntityParsing(xmlparseobject *p, PyObject* args)
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001050{
Fred Drake85d835f2001-02-08 15:39:08 +00001051 int flag;
1052 if (!PyArg_ParseTuple(args, "i", &flag))
1053 return NULL;
Fred Drakebd6101c2001-02-14 18:29:45 +00001054 flag = XML_SetParamEntityParsing(p->itself, flag);
Christian Heimes217cfd12007-12-02 14:31:20 +00001055 return PyLong_FromLong(flag);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001056}
1057
Martin v. Löwisc847f402003-01-21 11:09:21 +00001058
1059#if XML_COMBINED_VERSION >= 19505
Martin v. Löwis069dde22003-01-21 10:58:18 +00001060PyDoc_STRVAR(xmlparse_UseForeignDTD__doc__,
1061"UseForeignDTD([flag])\n\
1062Allows the application to provide an artificial external subset if one is\n\
1063not specified as part of the document instance. This readily allows the\n\
1064use of a 'default' document type controlled by the application, while still\n\
1065getting the advantage of providing document type information to the parser.\n\
1066'flag' defaults to True if not provided.");
1067
1068static PyObject *
1069xmlparse_UseForeignDTD(xmlparseobject *self, PyObject *args)
1070{
Antoine Pitrou6f430e42012-08-15 23:18:25 +02001071 int flag = 1;
Martin v. Löwis069dde22003-01-21 10:58:18 +00001072 enum XML_Error rc;
Georg Brandld37b9d72012-09-24 13:41:52 +02001073 if (!PyArg_ParseTuple(args, "|p:UseForeignDTD", &flag))
Martin v. Löwis069dde22003-01-21 10:58:18 +00001074 return NULL;
Antoine Pitrou6f430e42012-08-15 23:18:25 +02001075 rc = XML_UseForeignDTD(self->itself, flag ? XML_TRUE : XML_FALSE);
Martin v. Löwis069dde22003-01-21 10:58:18 +00001076 if (rc != XML_ERROR_NONE) {
1077 return set_error(self, rc);
1078 }
1079 Py_INCREF(Py_None);
1080 return Py_None;
1081}
Martin v. Löwisc847f402003-01-21 11:09:21 +00001082#endif
Martin v. Löwis069dde22003-01-21 10:58:18 +00001083
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001084static PyObject *xmlparse_dir(PyObject *self, PyObject* noargs);
1085
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001086static struct PyMethodDef xmlparse_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001087 {"Parse", (PyCFunction)xmlparse_Parse,
1088 METH_VARARGS, xmlparse_Parse__doc__},
Fred Drake0582df92000-07-12 04:49:00 +00001089 {"ParseFile", (PyCFunction)xmlparse_ParseFile,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001090 METH_O, xmlparse_ParseFile__doc__},
Fred Drake0582df92000-07-12 04:49:00 +00001091 {"SetBase", (PyCFunction)xmlparse_SetBase,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001092 METH_VARARGS, xmlparse_SetBase__doc__},
Fred Drake0582df92000-07-12 04:49:00 +00001093 {"GetBase", (PyCFunction)xmlparse_GetBase,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001094 METH_NOARGS, xmlparse_GetBase__doc__},
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001095 {"ExternalEntityParserCreate", (PyCFunction)xmlparse_ExternalEntityParserCreate,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001096 METH_VARARGS, xmlparse_ExternalEntityParserCreate__doc__},
Fred Drakebd6101c2001-02-14 18:29:45 +00001097 {"SetParamEntityParsing", (PyCFunction)xmlparse_SetParamEntityParsing,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001098 METH_VARARGS, xmlparse_SetParamEntityParsing__doc__},
Fred Drakebd6101c2001-02-14 18:29:45 +00001099 {"GetInputContext", (PyCFunction)xmlparse_GetInputContext,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001100 METH_NOARGS, xmlparse_GetInputContext__doc__},
Martin v. Löwisc847f402003-01-21 11:09:21 +00001101#if XML_COMBINED_VERSION >= 19505
Martin v. Löwis069dde22003-01-21 10:58:18 +00001102 {"UseForeignDTD", (PyCFunction)xmlparse_UseForeignDTD,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001103 METH_VARARGS, xmlparse_UseForeignDTD__doc__},
Martin v. Löwisc847f402003-01-21 11:09:21 +00001104#endif
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001105 {"__dir__", xmlparse_dir, METH_NOARGS},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001106 {NULL, NULL} /* sentinel */
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001107};
1108
1109/* ---------- */
1110
1111
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001112
Fred Drake71b63ff2002-06-28 22:29:01 +00001113/* pyexpat international encoding support.
1114 Make it as simple as possible.
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001115*/
1116
Fred Drake71b63ff2002-06-28 22:29:01 +00001117static int
1118PyUnknownEncodingHandler(void *encodingHandlerData,
1119 const XML_Char *name,
1120 XML_Encoding *info)
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001121{
Eli Bendersky6dc32b32013-05-25 05:25:48 -07001122 static unsigned char template_buffer[256] = {0};
1123 PyObject* u;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001124 int i;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001125 void *data;
Eli Bendersky6dc32b32013-05-25 05:25:48 -07001126 unsigned int kind;
Fred Drake71b63ff2002-06-28 22:29:01 +00001127
Eli Bendersky6dc32b32013-05-25 05:25:48 -07001128 if (template_buffer[1] == 0) {
1129 for (i = 0; i < 256; i++)
1130 template_buffer[i] = i;
Tim Peters63cb99e2001-02-17 18:12:50 +00001131 }
Eli Bendersky6dc32b32013-05-25 05:25:48 -07001132
1133 u = PyUnicode_Decode((char*) template_buffer, 256, name, "replace");
Christian Heimesb5821552013-06-29 20:43:13 +02001134 if (u == NULL || PyUnicode_READY(u)) {
Christian Heimes72172422013-06-29 21:49:27 +02001135 Py_XDECREF(u);
Eli Bendersky6dc32b32013-05-25 05:25:48 -07001136 return XML_STATUS_ERROR;
Christian Heimesb5821552013-06-29 20:43:13 +02001137 }
Eli Bendersky6dc32b32013-05-25 05:25:48 -07001138
1139 if (PyUnicode_GET_LENGTH(u) != 256) {
1140 Py_DECREF(u);
1141 PyErr_SetString(PyExc_ValueError,
1142 "multi-byte encodings are not supported");
1143 return XML_STATUS_ERROR;
1144 }
1145
1146 kind = PyUnicode_KIND(u);
1147 data = PyUnicode_DATA(u);
1148 for (i = 0; i < 256; i++) {
1149 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
1150 if (ch != Py_UNICODE_REPLACEMENT_CHARACTER)
1151 info->map[i] = ch;
1152 else
1153 info->map[i] = -1;
1154 }
1155
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001156 info->data = NULL;
1157 info->convert = NULL;
1158 info->release = NULL;
Eli Bendersky6dc32b32013-05-25 05:25:48 -07001159 Py_DECREF(u);
1160
1161 return XML_STATUS_OK;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001162}
1163
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001164
1165static PyObject *
Fred Drakeb91a36b2002-06-27 19:40:48 +00001166newxmlparseobject(char *encoding, char *namespace_separator, PyObject *intern)
Fred Drake0582df92000-07-12 04:49:00 +00001167{
1168 int i;
1169 xmlparseobject *self;
Fred Drake71b63ff2002-06-28 22:29:01 +00001170
Martin v. Löwis894258c2001-09-23 10:20:10 +00001171 self = PyObject_GC_New(xmlparseobject, &Xmlparsetype);
Fred Drake0582df92000-07-12 04:49:00 +00001172 if (self == NULL)
1173 return NULL;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001174
Fred Drake2a3d7db2002-06-28 22:56:48 +00001175 self->buffer = NULL;
1176 self->buffer_size = CHARACTER_DATA_BUFFER_SIZE;
1177 self->buffer_used = 0;
Fred Drake85d835f2001-02-08 15:39:08 +00001178 self->ordered_attributes = 0;
1179 self->specified_attributes = 0;
Fred Drakebd6101c2001-02-14 18:29:45 +00001180 self->in_callback = 0;
Martin v. Löwis069dde22003-01-21 10:58:18 +00001181 self->ns_prefixes = 0;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001182 self->handlers = NULL;
Victor Stinner54b2d2e2013-07-15 17:15:57 +02001183 self->intern = intern;
1184 Py_XINCREF(self->intern);
1185 PyObject_GC_Track(self);
1186
Christian Heimesfa535f52013-07-07 17:35:11 +02001187 /* namespace_separator is either NULL or contains one char + \0 */
1188 self->itself = XML_ParserCreate_MM(encoding, &ExpatMemoryHandler,
1189 namespace_separator);
Victor Stinner54b2d2e2013-07-15 17:15:57 +02001190 if (self->itself == NULL) {
1191 PyErr_SetString(PyExc_RuntimeError,
1192 "XML_ParserCreate failed");
1193 Py_DECREF(self);
1194 return NULL;
1195 }
Gregory P. Smith25227712012-03-14 18:10:37 -07001196#if ((XML_MAJOR_VERSION >= 2) && (XML_MINOR_VERSION >= 1)) || defined(XML_HAS_SET_HASH_SALT)
1197 /* This feature was added upstream in libexpat 2.1.0. Our expat copy
1198 * has a backport of this feature where we also define XML_HAS_SET_HASH_SALT
1199 * to indicate that we can still use it. */
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07001200 XML_SetHashSalt(self->itself,
1201 (unsigned long)_Py_HashSecret.prefix);
Gregory P. Smith25227712012-03-14 18:10:37 -07001202#endif
Fred Drake0582df92000-07-12 04:49:00 +00001203 XML_SetUserData(self->itself, (void *)self);
Fred Drake7c75bf22002-07-01 14:02:31 +00001204 XML_SetUnknownEncodingHandler(self->itself,
1205 (XML_UnknownEncodingHandler) PyUnknownEncodingHandler, NULL);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001206
Fred Drake2a3d7db2002-06-28 22:56:48 +00001207 for (i = 0; handler_info[i].name != NULL; i++)
Fred Drake0582df92000-07-12 04:49:00 +00001208 /* do nothing */;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001209
Victor Stinnerb6404912013-07-07 16:21:41 +02001210 self->handlers = PyMem_Malloc(sizeof(PyObject *) * i);
Fred Drake7c75bf22002-07-01 14:02:31 +00001211 if (!self->handlers) {
Fred Drake71b63ff2002-06-28 22:29:01 +00001212 Py_DECREF(self);
1213 return PyErr_NoMemory();
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001214 }
Martin v. Löwis5b68ce32001-10-21 08:53:52 +00001215 clear_handlers(self, 1);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001216
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001217 return (PyObject*)self;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001218}
1219
1220
1221static void
Fred Drake0582df92000-07-12 04:49:00 +00001222xmlparse_dealloc(xmlparseobject *self)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001223{
Fred Drake0582df92000-07-12 04:49:00 +00001224 int i;
Martin v. Löwis894258c2001-09-23 10:20:10 +00001225 PyObject_GC_UnTrack(self);
Fred Drake85d835f2001-02-08 15:39:08 +00001226 if (self->itself != NULL)
Fred Drake0582df92000-07-12 04:49:00 +00001227 XML_ParserFree(self->itself);
1228 self->itself = NULL;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001229
Fred Drake85d835f2001-02-08 15:39:08 +00001230 if (self->handlers != NULL) {
Fred Drakecde79132001-04-25 16:01:30 +00001231 PyObject *temp;
Fred Drake85d835f2001-02-08 15:39:08 +00001232 for (i = 0; handler_info[i].name != NULL; i++) {
Fred Drakecde79132001-04-25 16:01:30 +00001233 temp = self->handlers[i];
1234 self->handlers[i] = NULL;
1235 Py_XDECREF(temp);
Fred Drake85d835f2001-02-08 15:39:08 +00001236 }
Victor Stinnerb6404912013-07-07 16:21:41 +02001237 PyMem_Free(self->handlers);
Fred Drake71b63ff2002-06-28 22:29:01 +00001238 self->handlers = NULL;
Fred Drake0582df92000-07-12 04:49:00 +00001239 }
Fred Drake2a3d7db2002-06-28 22:56:48 +00001240 if (self->buffer != NULL) {
Victor Stinnerb6404912013-07-07 16:21:41 +02001241 PyMem_Free(self->buffer);
Fred Drake2a3d7db2002-06-28 22:56:48 +00001242 self->buffer = NULL;
1243 }
Fred Drakeb91a36b2002-06-27 19:40:48 +00001244 Py_XDECREF(self->intern);
Martin v. Löwis894258c2001-09-23 10:20:10 +00001245 PyObject_GC_Del(self);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001246}
1247
Fred Drake0582df92000-07-12 04:49:00 +00001248static int
Alexander Belopolskye239d232010-12-08 23:31:48 +00001249handlername2int(PyObject *name)
Fred Drake0582df92000-07-12 04:49:00 +00001250{
1251 int i;
Fred Drake71b63ff2002-06-28 22:29:01 +00001252 for (i = 0; handler_info[i].name != NULL; i++) {
Alexander Belopolskye239d232010-12-08 23:31:48 +00001253 if (PyUnicode_CompareWithASCIIString(
1254 name, handler_info[i].name) == 0) {
Fred Drake0582df92000-07-12 04:49:00 +00001255 return i;
1256 }
1257 }
1258 return -1;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001259}
1260
1261static PyObject *
Fred Drake71b63ff2002-06-28 22:29:01 +00001262get_pybool(int istrue)
1263{
1264 PyObject *result = istrue ? Py_True : Py_False;
1265 Py_INCREF(result);
1266 return result;
1267}
1268
1269static PyObject *
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001270xmlparse_getattro(xmlparseobject *self, PyObject *nameobj)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001271{
Victor Stinner9e5bd6c2011-10-01 01:05:40 +02001272 Py_UCS4 first_char;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001273 int handlernum = -1;
1274
Alexander Belopolskye239d232010-12-08 23:31:48 +00001275 if (!PyUnicode_Check(nameobj))
1276 goto generic;
Victor Stinner9e5bd6c2011-10-01 01:05:40 +02001277 if (PyUnicode_READY(nameobj))
1278 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001279
Alexander Belopolskye239d232010-12-08 23:31:48 +00001280 handlernum = handlername2int(nameobj);
Fred Drake71b63ff2002-06-28 22:29:01 +00001281
1282 if (handlernum != -1) {
1283 PyObject *result = self->handlers[handlernum];
1284 if (result == NULL)
1285 result = Py_None;
1286 Py_INCREF(result);
1287 return result;
1288 }
Alexander Belopolskye239d232010-12-08 23:31:48 +00001289
Victor Stinner9e5bd6c2011-10-01 01:05:40 +02001290 first_char = PyUnicode_READ_CHAR(nameobj, 0);
1291 if (first_char == 'E') {
Alexander Belopolskye239d232010-12-08 23:31:48 +00001292 if (PyUnicode_CompareWithASCIIString(nameobj, "ErrorCode") == 0)
Christian Heimes217cfd12007-12-02 14:31:20 +00001293 return PyLong_FromLong((long)
Fred Drake71b63ff2002-06-28 22:29:01 +00001294 XML_GetErrorCode(self->itself));
Alexander Belopolskye239d232010-12-08 23:31:48 +00001295 if (PyUnicode_CompareWithASCIIString(nameobj, "ErrorLineNumber") == 0)
Christian Heimes217cfd12007-12-02 14:31:20 +00001296 return PyLong_FromLong((long)
Fred Drake71b63ff2002-06-28 22:29:01 +00001297 XML_GetErrorLineNumber(self->itself));
Alexander Belopolskye239d232010-12-08 23:31:48 +00001298 if (PyUnicode_CompareWithASCIIString(nameobj, "ErrorColumnNumber") == 0)
Christian Heimes217cfd12007-12-02 14:31:20 +00001299 return PyLong_FromLong((long)
Fred Drake71b63ff2002-06-28 22:29:01 +00001300 XML_GetErrorColumnNumber(self->itself));
Alexander Belopolskye239d232010-12-08 23:31:48 +00001301 if (PyUnicode_CompareWithASCIIString(nameobj, "ErrorByteIndex") == 0)
Christian Heimes217cfd12007-12-02 14:31:20 +00001302 return PyLong_FromLong((long)
Fred Drake71b63ff2002-06-28 22:29:01 +00001303 XML_GetErrorByteIndex(self->itself));
1304 }
Victor Stinner9e5bd6c2011-10-01 01:05:40 +02001305 if (first_char == 'C') {
Alexander Belopolskye239d232010-12-08 23:31:48 +00001306 if (PyUnicode_CompareWithASCIIString(nameobj, "CurrentLineNumber") == 0)
Christian Heimes217cfd12007-12-02 14:31:20 +00001307 return PyLong_FromLong((long)
Dave Cole3203efb2004-08-26 00:37:31 +00001308 XML_GetCurrentLineNumber(self->itself));
Alexander Belopolskye239d232010-12-08 23:31:48 +00001309 if (PyUnicode_CompareWithASCIIString(nameobj, "CurrentColumnNumber") == 0)
Christian Heimes217cfd12007-12-02 14:31:20 +00001310 return PyLong_FromLong((long)
Dave Cole3203efb2004-08-26 00:37:31 +00001311 XML_GetCurrentColumnNumber(self->itself));
Alexander Belopolskye239d232010-12-08 23:31:48 +00001312 if (PyUnicode_CompareWithASCIIString(nameobj, "CurrentByteIndex") == 0)
Christian Heimes217cfd12007-12-02 14:31:20 +00001313 return PyLong_FromLong((long)
Dave Cole3203efb2004-08-26 00:37:31 +00001314 XML_GetCurrentByteIndex(self->itself));
1315 }
Victor Stinner9e5bd6c2011-10-01 01:05:40 +02001316 if (first_char == 'b') {
Alexander Belopolskye239d232010-12-08 23:31:48 +00001317 if (PyUnicode_CompareWithASCIIString(nameobj, "buffer_size") == 0)
Christian Heimes217cfd12007-12-02 14:31:20 +00001318 return PyLong_FromLong((long) self->buffer_size);
Alexander Belopolskye239d232010-12-08 23:31:48 +00001319 if (PyUnicode_CompareWithASCIIString(nameobj, "buffer_text") == 0)
Fred Drake2a3d7db2002-06-28 22:56:48 +00001320 return get_pybool(self->buffer != NULL);
Alexander Belopolskye239d232010-12-08 23:31:48 +00001321 if (PyUnicode_CompareWithASCIIString(nameobj, "buffer_used") == 0)
Christian Heimes217cfd12007-12-02 14:31:20 +00001322 return PyLong_FromLong((long) self->buffer_used);
Fred Drake2a3d7db2002-06-28 22:56:48 +00001323 }
Alexander Belopolskye239d232010-12-08 23:31:48 +00001324 if (PyUnicode_CompareWithASCIIString(nameobj, "namespace_prefixes") == 0)
Martin v. Löwis069dde22003-01-21 10:58:18 +00001325 return get_pybool(self->ns_prefixes);
Alexander Belopolskye239d232010-12-08 23:31:48 +00001326 if (PyUnicode_CompareWithASCIIString(nameobj, "ordered_attributes") == 0)
Fred Drake71b63ff2002-06-28 22:29:01 +00001327 return get_pybool(self->ordered_attributes);
Alexander Belopolskye239d232010-12-08 23:31:48 +00001328 if (PyUnicode_CompareWithASCIIString(nameobj, "specified_attributes") == 0)
Fred Drake71b63ff2002-06-28 22:29:01 +00001329 return get_pybool((long) self->specified_attributes);
Alexander Belopolskye239d232010-12-08 23:31:48 +00001330 if (PyUnicode_CompareWithASCIIString(nameobj, "intern") == 0) {
Fred Drakeb91a36b2002-06-27 19:40:48 +00001331 if (self->intern == NULL) {
1332 Py_INCREF(Py_None);
1333 return Py_None;
1334 }
1335 else {
1336 Py_INCREF(self->intern);
1337 return self->intern;
1338 }
1339 }
Alexander Belopolskye239d232010-12-08 23:31:48 +00001340 generic:
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001341 return PyObject_GenericGetAttr((PyObject*)self, nameobj);
Neal Norwitz8dfc4a92007-08-11 06:39:53 +00001342}
1343
1344static PyObject *
1345xmlparse_dir(PyObject *self, PyObject* noargs)
1346{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001347#define APPEND(list, str) \
1348 do { \
1349 PyObject *o = PyUnicode_FromString(str); \
1350 if (o != NULL) \
1351 PyList_Append(list, o); \
1352 Py_XDECREF(o); \
Martin v. Löwis069dde22003-01-21 10:58:18 +00001353 } while (0)
Neal Norwitzfa56e2d2003-01-19 15:40:09 +00001354
Neal Norwitz8dfc4a92007-08-11 06:39:53 +00001355 int i;
1356 PyObject *rc = PyList_New(0);
1357 if (!rc)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001358 return NULL;
Neal Norwitz8dfc4a92007-08-11 06:39:53 +00001359 for (i = 0; handler_info[i].name != NULL; i++) {
1360 PyObject *o = get_handler_name(&handler_info[i]);
1361 if (o != NULL)
1362 PyList_Append(rc, o);
1363 Py_XDECREF(o);
1364 }
1365 APPEND(rc, "ErrorCode");
1366 APPEND(rc, "ErrorLineNumber");
1367 APPEND(rc, "ErrorColumnNumber");
1368 APPEND(rc, "ErrorByteIndex");
1369 APPEND(rc, "CurrentLineNumber");
1370 APPEND(rc, "CurrentColumnNumber");
1371 APPEND(rc, "CurrentByteIndex");
1372 APPEND(rc, "buffer_size");
1373 APPEND(rc, "buffer_text");
1374 APPEND(rc, "buffer_used");
1375 APPEND(rc, "namespace_prefixes");
1376 APPEND(rc, "ordered_attributes");
1377 APPEND(rc, "specified_attributes");
1378 APPEND(rc, "intern");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001379
Neal Norwitzfa56e2d2003-01-19 15:40:09 +00001380#undef APPEND
Neal Norwitz8dfc4a92007-08-11 06:39:53 +00001381
1382 if (PyErr_Occurred()) {
1383 Py_DECREF(rc);
1384 rc = NULL;
Fred Drake0582df92000-07-12 04:49:00 +00001385 }
Neal Norwitz8dfc4a92007-08-11 06:39:53 +00001386
1387 return rc;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001388}
1389
Fred Drake6f987622000-08-25 18:03:30 +00001390static int
Alexander Belopolskye239d232010-12-08 23:31:48 +00001391sethandler(xmlparseobject *self, PyObject *name, PyObject* v)
Fred Drake0582df92000-07-12 04:49:00 +00001392{
1393 int handlernum = handlername2int(name);
Fred Drake71b63ff2002-06-28 22:29:01 +00001394 if (handlernum >= 0) {
1395 xmlhandler c_handler = NULL;
1396 PyObject *temp = self->handlers[handlernum];
1397
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001398 if (v == Py_None) {
1399 /* If this is the character data handler, and a character
1400 data handler is already active, we need to be more
1401 careful. What we can safely do is replace the existing
1402 character data handler callback function with a no-op
1403 function that will refuse to call Python. The downside
1404 is that this doesn't completely remove the character
1405 data handler from the C layer if there's any callback
1406 active, so Expat does a little more work than it
1407 otherwise would, but that's really an odd case. A more
1408 elaborate system of handlers and state could remove the
1409 C handler more effectively. */
1410 if (handlernum == CharacterData && self->in_callback)
1411 c_handler = noop_character_data_handler;
Fred Drake71b63ff2002-06-28 22:29:01 +00001412 v = NULL;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001413 }
Fred Drake71b63ff2002-06-28 22:29:01 +00001414 else if (v != NULL) {
1415 Py_INCREF(v);
1416 c_handler = handler_info[handlernum].handler;
1417 }
Fred Drake0582df92000-07-12 04:49:00 +00001418 self->handlers[handlernum] = v;
Fred Drake71b63ff2002-06-28 22:29:01 +00001419 Py_XDECREF(temp);
1420 handler_info[handlernum].setter(self->itself, c_handler);
Fred Drake0582df92000-07-12 04:49:00 +00001421 return 1;
1422 }
1423 return 0;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001424}
1425
1426static int
Alexander Belopolskye239d232010-12-08 23:31:48 +00001427xmlparse_setattro(xmlparseobject *self, PyObject *name, PyObject *v)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001428{
Fred Drake6f987622000-08-25 18:03:30 +00001429 /* Set attribute 'name' to value 'v'. v==NULL means delete */
Fred Drake85d835f2001-02-08 15:39:08 +00001430 if (v == NULL) {
Fred Drake6f987622000-08-25 18:03:30 +00001431 PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
1432 return -1;
1433 }
Alexander Belopolskye239d232010-12-08 23:31:48 +00001434 assert(PyUnicode_Check(name));
1435 if (PyUnicode_CompareWithASCIIString(name, "buffer_text") == 0) {
Antoine Pitrou6f430e42012-08-15 23:18:25 +02001436 int b = PyObject_IsTrue(v);
1437 if (b < 0)
1438 return -1;
1439 if (b) {
Fred Drake2a3d7db2002-06-28 22:56:48 +00001440 if (self->buffer == NULL) {
Victor Stinnerb6404912013-07-07 16:21:41 +02001441 self->buffer = PyMem_Malloc(self->buffer_size);
Fred Drake2a3d7db2002-06-28 22:56:48 +00001442 if (self->buffer == NULL) {
1443 PyErr_NoMemory();
1444 return -1;
1445 }
1446 self->buffer_used = 0;
1447 }
1448 }
1449 else if (self->buffer != NULL) {
1450 if (flush_character_buffer(self) < 0)
1451 return -1;
Victor Stinnerb6404912013-07-07 16:21:41 +02001452 PyMem_Free(self->buffer);
Fred Drake2a3d7db2002-06-28 22:56:48 +00001453 self->buffer = NULL;
1454 }
1455 return 0;
1456 }
Alexander Belopolskye239d232010-12-08 23:31:48 +00001457 if (PyUnicode_CompareWithASCIIString(name, "namespace_prefixes") == 0) {
Antoine Pitrou6f430e42012-08-15 23:18:25 +02001458 int b = PyObject_IsTrue(v);
1459 if (b < 0)
1460 return -1;
1461 self->ns_prefixes = b;
Martin v. Löwis069dde22003-01-21 10:58:18 +00001462 XML_SetReturnNSTriplet(self->itself, self->ns_prefixes);
1463 return 0;
1464 }
Alexander Belopolskye239d232010-12-08 23:31:48 +00001465 if (PyUnicode_CompareWithASCIIString(name, "ordered_attributes") == 0) {
Antoine Pitrou6f430e42012-08-15 23:18:25 +02001466 int b = PyObject_IsTrue(v);
1467 if (b < 0)
1468 return -1;
1469 self->ordered_attributes = b;
Fred Drake85d835f2001-02-08 15:39:08 +00001470 return 0;
1471 }
Alexander Belopolskye239d232010-12-08 23:31:48 +00001472 if (PyUnicode_CompareWithASCIIString(name, "specified_attributes") == 0) {
Antoine Pitrou6f430e42012-08-15 23:18:25 +02001473 int b = PyObject_IsTrue(v);
1474 if (b < 0)
1475 return -1;
1476 self->specified_attributes = b;
Fred Drake6f987622000-08-25 18:03:30 +00001477 return 0;
1478 }
Christian Heimes2380ac72008-01-09 00:17:24 +00001479
Alexander Belopolskye239d232010-12-08 23:31:48 +00001480 if (PyUnicode_CompareWithASCIIString(name, "buffer_size") == 0) {
Christian Heimes2380ac72008-01-09 00:17:24 +00001481 long new_buffer_size;
1482 if (!PyLong_Check(v)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001483 PyErr_SetString(PyExc_TypeError, "buffer_size must be an integer");
1484 return -1;
Christian Heimes2380ac72008-01-09 00:17:24 +00001485 }
1486
1487 new_buffer_size=PyLong_AS_LONG(v);
1488 /* trivial case -- no change */
1489 if (new_buffer_size == self->buffer_size) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001490 return 0;
Christian Heimes2380ac72008-01-09 00:17:24 +00001491 }
1492
1493 if (new_buffer_size <= 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001494 PyErr_SetString(PyExc_ValueError, "buffer_size must be greater than zero");
1495 return -1;
Christian Heimes2380ac72008-01-09 00:17:24 +00001496 }
1497
1498 /* check maximum */
1499 if (new_buffer_size > INT_MAX) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001500 char errmsg[100];
1501 sprintf(errmsg, "buffer_size must not be greater than %i", INT_MAX);
1502 PyErr_SetString(PyExc_ValueError, errmsg);
1503 return -1;
Christian Heimes2380ac72008-01-09 00:17:24 +00001504 }
1505
1506 if (self->buffer != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001507 /* there is already a buffer */
1508 if (self->buffer_used != 0) {
1509 flush_character_buffer(self);
1510 }
1511 /* free existing buffer */
Victor Stinnerb6404912013-07-07 16:21:41 +02001512 PyMem_Free(self->buffer);
Christian Heimes2380ac72008-01-09 00:17:24 +00001513 }
Victor Stinnerb6404912013-07-07 16:21:41 +02001514 self->buffer = PyMem_Malloc(new_buffer_size);
Christian Heimes2380ac72008-01-09 00:17:24 +00001515 if (self->buffer == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001516 PyErr_NoMemory();
1517 return -1;
1518 }
Christian Heimes2380ac72008-01-09 00:17:24 +00001519 self->buffer_size = new_buffer_size;
1520 return 0;
1521 }
1522
Alexander Belopolskye239d232010-12-08 23:31:48 +00001523 if (PyUnicode_CompareWithASCIIString(name, "CharacterDataHandler") == 0) {
Fred Drake2a3d7db2002-06-28 22:56:48 +00001524 /* If we're changing the character data handler, flush all
1525 * cached data with the old handler. Not sure there's a
1526 * "right" thing to do, though, but this probably won't
1527 * happen.
1528 */
1529 if (flush_character_buffer(self) < 0)
1530 return -1;
1531 }
Fred Drake6f987622000-08-25 18:03:30 +00001532 if (sethandler(self, name, v)) {
1533 return 0;
1534 }
Alexander Belopolskye239d232010-12-08 23:31:48 +00001535 PyErr_SetObject(PyExc_AttributeError, name);
Fred Drake6f987622000-08-25 18:03:30 +00001536 return -1;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001537}
1538
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001539static int
1540xmlparse_traverse(xmlparseobject *op, visitproc visit, void *arg)
1541{
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001542 int i;
1543 for (i = 0; handler_info[i].name != NULL; i++)
1544 Py_VISIT(op->handlers[i]);
Fred Drakecde79132001-04-25 16:01:30 +00001545 return 0;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001546}
1547
1548static int
1549xmlparse_clear(xmlparseobject *op)
1550{
Martin v. Löwis5b68ce32001-10-21 08:53:52 +00001551 clear_handlers(op, 0);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001552 Py_CLEAR(op->intern);
Fred Drakecde79132001-04-25 16:01:30 +00001553 return 0;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001554}
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001555
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001556PyDoc_STRVAR(Xmlparsetype__doc__, "XML parser");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001557
1558static PyTypeObject Xmlparsetype = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001559 PyVarObject_HEAD_INIT(NULL, 0)
1560 "pyexpat.xmlparser", /*tp_name*/
Antoine Pitrou23683ef2011-01-04 00:00:31 +00001561 sizeof(xmlparseobject), /*tp_basicsize*/
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001562 0, /*tp_itemsize*/
1563 /* methods */
1564 (destructor)xmlparse_dealloc, /*tp_dealloc*/
1565 (printfunc)0, /*tp_print*/
1566 0, /*tp_getattr*/
Alexander Belopolskye239d232010-12-08 23:31:48 +00001567 0, /*tp_setattr*/
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001568 0, /*tp_reserved*/
1569 (reprfunc)0, /*tp_repr*/
1570 0, /*tp_as_number*/
1571 0, /*tp_as_sequence*/
1572 0, /*tp_as_mapping*/
1573 (hashfunc)0, /*tp_hash*/
1574 (ternaryfunc)0, /*tp_call*/
1575 (reprfunc)0, /*tp_str*/
1576 (getattrofunc)xmlparse_getattro, /* tp_getattro */
Alexander Belopolskye239d232010-12-08 23:31:48 +00001577 (setattrofunc)xmlparse_setattro, /* tp_setattro */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001578 0, /* tp_as_buffer */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001579 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /*tp_flags*/
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001580 Xmlparsetype__doc__, /* tp_doc - Documentation string */
1581 (traverseproc)xmlparse_traverse, /* tp_traverse */
1582 (inquiry)xmlparse_clear, /* tp_clear */
1583 0, /* tp_richcompare */
1584 0, /* tp_weaklistoffset */
1585 0, /* tp_iter */
1586 0, /* tp_iternext */
1587 xmlparse_methods, /* tp_methods */
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001588};
1589
1590/* End of code for xmlparser objects */
1591/* -------------------------------------------------------- */
1592
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001593PyDoc_STRVAR(pyexpat_ParserCreate__doc__,
Fred Drake0582df92000-07-12 04:49:00 +00001594"ParserCreate([encoding[, namespace_separator]]) -> parser\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001595Return a new XML parser object.");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001596
1597static PyObject *
Fred Drake0582df92000-07-12 04:49:00 +00001598pyexpat_ParserCreate(PyObject *notused, PyObject *args, PyObject *kw)
1599{
Fred Drakecde79132001-04-25 16:01:30 +00001600 char *encoding = NULL;
1601 char *namespace_separator = NULL;
Fred Drakeb91a36b2002-06-27 19:40:48 +00001602 PyObject *intern = NULL;
1603 PyObject *result;
1604 int intern_decref = 0;
Martin v. Löwis15e62742006-02-27 16:46:16 +00001605 static char *kwlist[] = {"encoding", "namespace_separator",
Jeremy Hyltonaf68c872005-12-10 18:50:16 +00001606 "intern", NULL};
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001607
Fred Drakeb91a36b2002-06-27 19:40:48 +00001608 if (!PyArg_ParseTupleAndKeywords(args, kw, "|zzO:ParserCreate", kwlist,
1609 &encoding, &namespace_separator, &intern))
Fred Drakecde79132001-04-25 16:01:30 +00001610 return NULL;
1611 if (namespace_separator != NULL
1612 && strlen(namespace_separator) > 1) {
1613 PyErr_SetString(PyExc_ValueError,
1614 "namespace_separator must be at most one"
1615 " character, omitted, or None");
1616 return NULL;
1617 }
Fred Drakeb91a36b2002-06-27 19:40:48 +00001618 /* Explicitly passing None means no interning is desired.
1619 Not passing anything means that a new dictionary is used. */
1620 if (intern == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001621 intern = NULL;
Fred Drakeb91a36b2002-06-27 19:40:48 +00001622 else if (intern == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001623 intern = PyDict_New();
1624 if (!intern)
1625 return NULL;
1626 intern_decref = 1;
Fred Drake71b63ff2002-06-28 22:29:01 +00001627 }
Fred Drakeb91a36b2002-06-27 19:40:48 +00001628 else if (!PyDict_Check(intern)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001629 PyErr_SetString(PyExc_TypeError, "intern must be a dictionary");
1630 return NULL;
Fred Drakeb91a36b2002-06-27 19:40:48 +00001631 }
1632
1633 result = newxmlparseobject(encoding, namespace_separator, intern);
1634 if (intern_decref) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001635 Py_DECREF(intern);
Fred Drakeb91a36b2002-06-27 19:40:48 +00001636 }
1637 return result;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001638}
1639
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001640PyDoc_STRVAR(pyexpat_ErrorString__doc__,
Fred Drake0582df92000-07-12 04:49:00 +00001641"ErrorString(errno) -> string\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001642Returns string error for given number.");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001643
1644static PyObject *
Fred Drake0582df92000-07-12 04:49:00 +00001645pyexpat_ErrorString(PyObject *self, PyObject *args)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001646{
Fred Drake0582df92000-07-12 04:49:00 +00001647 long code = 0;
1648
1649 if (!PyArg_ParseTuple(args, "l:ErrorString", &code))
1650 return NULL;
1651 return Py_BuildValue("z", XML_ErrorString((int)code));
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001652}
1653
1654/* List of methods defined in the module */
1655
1656static struct PyMethodDef pyexpat_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001657 {"ParserCreate", (PyCFunction)pyexpat_ParserCreate,
Fred Drake0582df92000-07-12 04:49:00 +00001658 METH_VARARGS|METH_KEYWORDS, pyexpat_ParserCreate__doc__},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001659 {"ErrorString", (PyCFunction)pyexpat_ErrorString,
1660 METH_VARARGS, pyexpat_ErrorString__doc__},
Fred Drake71b63ff2002-06-28 22:29:01 +00001661
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001662 {NULL, (PyCFunction)NULL, 0, NULL} /* sentinel */
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001663};
1664
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +00001665/* Module docstring */
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001666
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001667PyDoc_STRVAR(pyexpat_module_documentation,
1668"Python wrapper for Expat parser.");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001669
Fred Drakecde79132001-04-25 16:01:30 +00001670/* Initialization function for the module */
1671
1672#ifndef MODULE_NAME
1673#define MODULE_NAME "pyexpat"
1674#endif
1675
1676#ifndef MODULE_INITFUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00001677#define MODULE_INITFUNC PyInit_pyexpat
Fred Drakecde79132001-04-25 16:01:30 +00001678#endif
1679
Martin v. Löwis069dde22003-01-21 10:58:18 +00001680#ifndef PyMODINIT_FUNC
1681# ifdef MS_WINDOWS
1682# define PyMODINIT_FUNC __declspec(dllexport) void
1683# else
1684# define PyMODINIT_FUNC void
1685# endif
1686#endif
1687
Mark Hammond8235ea12002-07-19 06:55:41 +00001688PyMODINIT_FUNC MODULE_INITFUNC(void); /* avoid compiler warnings */
Fred Drakecde79132001-04-25 16:01:30 +00001689
Martin v. Löwis1a214512008-06-11 05:26:20 +00001690static struct PyModuleDef pyexpatmodule = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001691 PyModuleDef_HEAD_INIT,
1692 MODULE_NAME,
1693 pyexpat_module_documentation,
1694 -1,
1695 pyexpat_methods,
1696 NULL,
1697 NULL,
1698 NULL,
1699 NULL
Martin v. Löwis1a214512008-06-11 05:26:20 +00001700};
1701
Martin v. Löwis069dde22003-01-21 10:58:18 +00001702PyMODINIT_FUNC
1703MODULE_INITFUNC(void)
Fred Drake0582df92000-07-12 04:49:00 +00001704{
1705 PyObject *m, *d;
Neal Norwitz392c5be2007-08-25 17:20:32 +00001706 PyObject *errmod_name = PyUnicode_FromString(MODULE_NAME ".errors");
Fred Drake85d835f2001-02-08 15:39:08 +00001707 PyObject *errors_module;
1708 PyObject *modelmod_name;
1709 PyObject *model_module;
Fred Drake0582df92000-07-12 04:49:00 +00001710 PyObject *sys_modules;
Georg Brandlb4dac712010-10-15 14:46:48 +00001711 PyObject *tmpnum, *tmpstr;
1712 PyObject *codes_dict;
1713 PyObject *rev_codes_dict;
1714 int res;
Fredrik Lundhd7a42882005-12-13 20:43:04 +00001715 static struct PyExpat_CAPI capi;
Georg Brandlb4dac712010-10-15 14:46:48 +00001716 PyObject *capi_object;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001717
Fred Drake6f987622000-08-25 18:03:30 +00001718 if (errmod_name == NULL)
Martin v. Löwis1a214512008-06-11 05:26:20 +00001719 return NULL;
Neal Norwitz392c5be2007-08-25 17:20:32 +00001720 modelmod_name = PyUnicode_FromString(MODULE_NAME ".model");
Fred Drake85d835f2001-02-08 15:39:08 +00001721 if (modelmod_name == NULL)
Martin v. Löwis1a214512008-06-11 05:26:20 +00001722 return NULL;
Fred Drake6f987622000-08-25 18:03:30 +00001723
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001724 if (PyType_Ready(&Xmlparsetype) < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001725 return NULL;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001726
Fred Drake0582df92000-07-12 04:49:00 +00001727 /* Create the module and add the functions */
Martin v. Löwis1a214512008-06-11 05:26:20 +00001728 m = PyModule_Create(&pyexpatmodule);
Neal Norwitz1ac754f2006-01-19 06:09:39 +00001729 if (m == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001730 return NULL;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001731
Fred Drake0582df92000-07-12 04:49:00 +00001732 /* Add some symbolic constants to the module */
Fred Drakebd6101c2001-02-14 18:29:45 +00001733 if (ErrorObject == NULL) {
1734 ErrorObject = PyErr_NewException("xml.parsers.expat.ExpatError",
Fred Drake93adb692000-09-23 04:55:48 +00001735 NULL, NULL);
Fred Drakebd6101c2001-02-14 18:29:45 +00001736 if (ErrorObject == NULL)
Martin v. Löwis1a214512008-06-11 05:26:20 +00001737 return NULL;
Fred Drakebd6101c2001-02-14 18:29:45 +00001738 }
1739 Py_INCREF(ErrorObject);
Fred Drake93adb692000-09-23 04:55:48 +00001740 PyModule_AddObject(m, "error", ErrorObject);
Fred Drakebd6101c2001-02-14 18:29:45 +00001741 Py_INCREF(ErrorObject);
1742 PyModule_AddObject(m, "ExpatError", ErrorObject);
Fred Drake4ba298c2000-10-29 04:57:53 +00001743 Py_INCREF(&Xmlparsetype);
1744 PyModule_AddObject(m, "XMLParserType", (PyObject *) &Xmlparsetype);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001745
Fred Drake738293d2000-12-21 17:25:07 +00001746 PyModule_AddStringConstant(m, "EXPAT_VERSION",
1747 (char *) XML_ExpatVersion());
Fred Drake85d835f2001-02-08 15:39:08 +00001748 {
1749 XML_Expat_Version info = XML_ExpatVersionInfo();
1750 PyModule_AddObject(m, "version_info",
1751 Py_BuildValue("(iii)", info.major,
1752 info.minor, info.micro));
1753 }
Fred Drake0582df92000-07-12 04:49:00 +00001754 /* XXX When Expat supports some way of figuring out how it was
Fred Drake71b63ff2002-06-28 22:29:01 +00001755 compiled, this should check and set native_encoding
1756 appropriately.
Fred Drake0582df92000-07-12 04:49:00 +00001757 */
Fred Drake93adb692000-09-23 04:55:48 +00001758 PyModule_AddStringConstant(m, "native_encoding", "UTF-8");
Fred Drakec23b5232000-08-24 21:57:43 +00001759
Fred Drake85d835f2001-02-08 15:39:08 +00001760 sys_modules = PySys_GetObject("modules");
Fred Drake93adb692000-09-23 04:55:48 +00001761 d = PyModule_GetDict(m);
Fred Drake6f987622000-08-25 18:03:30 +00001762 errors_module = PyDict_GetItem(d, errmod_name);
1763 if (errors_module == NULL) {
Fred Drakecde79132001-04-25 16:01:30 +00001764 errors_module = PyModule_New(MODULE_NAME ".errors");
Fred Drake6f987622000-08-25 18:03:30 +00001765 if (errors_module != NULL) {
Fred Drake6f987622000-08-25 18:03:30 +00001766 PyDict_SetItem(sys_modules, errmod_name, errors_module);
Fred Drake93adb692000-09-23 04:55:48 +00001767 /* gives away the reference to errors_module */
1768 PyModule_AddObject(m, "errors", errors_module);
Fred Drakec23b5232000-08-24 21:57:43 +00001769 }
1770 }
Fred Drake6f987622000-08-25 18:03:30 +00001771 Py_DECREF(errmod_name);
Fred Drake85d835f2001-02-08 15:39:08 +00001772 model_module = PyDict_GetItem(d, modelmod_name);
1773 if (model_module == NULL) {
Fred Drakecde79132001-04-25 16:01:30 +00001774 model_module = PyModule_New(MODULE_NAME ".model");
Fred Drake85d835f2001-02-08 15:39:08 +00001775 if (model_module != NULL) {
1776 PyDict_SetItem(sys_modules, modelmod_name, model_module);
1777 /* gives away the reference to model_module */
1778 PyModule_AddObject(m, "model", model_module);
1779 }
1780 }
1781 Py_DECREF(modelmod_name);
1782 if (errors_module == NULL || model_module == NULL)
1783 /* Don't core dump later! */
Martin v. Löwis1a214512008-06-11 05:26:20 +00001784 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001785
Martin v. Löwisc847f402003-01-21 11:09:21 +00001786#if XML_COMBINED_VERSION > 19505
Martin v. Löwis069dde22003-01-21 10:58:18 +00001787 {
1788 const XML_Feature *features = XML_GetFeatureList();
1789 PyObject *list = PyList_New(0);
1790 if (list == NULL)
1791 /* just ignore it */
1792 PyErr_Clear();
1793 else {
1794 int i = 0;
1795 for (; features[i].feature != XML_FEATURE_END; ++i) {
1796 int ok;
1797 PyObject *item = Py_BuildValue("si", features[i].name,
1798 features[i].value);
1799 if (item == NULL) {
1800 Py_DECREF(list);
1801 list = NULL;
1802 break;
1803 }
1804 ok = PyList_Append(list, item);
1805 Py_DECREF(item);
1806 if (ok < 0) {
1807 PyErr_Clear();
1808 break;
1809 }
1810 }
1811 if (list != NULL)
1812 PyModule_AddObject(m, "features", list);
1813 }
1814 }
Martin v. Löwisc847f402003-01-21 11:09:21 +00001815#endif
Fred Drake6f987622000-08-25 18:03:30 +00001816
Georg Brandlb4dac712010-10-15 14:46:48 +00001817 codes_dict = PyDict_New();
1818 rev_codes_dict = PyDict_New();
1819 if (codes_dict == NULL || rev_codes_dict == NULL) {
1820 Py_XDECREF(codes_dict);
1821 Py_XDECREF(rev_codes_dict);
1822 return NULL;
1823 }
Victor Stinner0fcab4a2011-01-04 12:59:15 +00001824
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001825#define MYCONST(name) \
Georg Brandlb4dac712010-10-15 14:46:48 +00001826 if (PyModule_AddStringConstant(errors_module, #name, \
1827 (char *)XML_ErrorString(name)) < 0) \
1828 return NULL; \
1829 tmpnum = PyLong_FromLong(name); \
1830 if (tmpnum == NULL) return NULL; \
1831 res = PyDict_SetItemString(codes_dict, \
1832 XML_ErrorString(name), tmpnum); \
1833 if (res < 0) return NULL; \
1834 tmpstr = PyUnicode_FromString(XML_ErrorString(name)); \
1835 if (tmpstr == NULL) return NULL; \
1836 res = PyDict_SetItem(rev_codes_dict, tmpnum, tmpstr); \
1837 Py_DECREF(tmpstr); \
1838 Py_DECREF(tmpnum); \
1839 if (res < 0) return NULL; \
Fred Drake7bd9f412000-07-04 23:51:31 +00001840
Fred Drake0582df92000-07-12 04:49:00 +00001841 MYCONST(XML_ERROR_NO_MEMORY);
1842 MYCONST(XML_ERROR_SYNTAX);
1843 MYCONST(XML_ERROR_NO_ELEMENTS);
1844 MYCONST(XML_ERROR_INVALID_TOKEN);
1845 MYCONST(XML_ERROR_UNCLOSED_TOKEN);
1846 MYCONST(XML_ERROR_PARTIAL_CHAR);
1847 MYCONST(XML_ERROR_TAG_MISMATCH);
1848 MYCONST(XML_ERROR_DUPLICATE_ATTRIBUTE);
1849 MYCONST(XML_ERROR_JUNK_AFTER_DOC_ELEMENT);
1850 MYCONST(XML_ERROR_PARAM_ENTITY_REF);
1851 MYCONST(XML_ERROR_UNDEFINED_ENTITY);
1852 MYCONST(XML_ERROR_RECURSIVE_ENTITY_REF);
1853 MYCONST(XML_ERROR_ASYNC_ENTITY);
1854 MYCONST(XML_ERROR_BAD_CHAR_REF);
1855 MYCONST(XML_ERROR_BINARY_ENTITY_REF);
1856 MYCONST(XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF);
1857 MYCONST(XML_ERROR_MISPLACED_XML_PI);
1858 MYCONST(XML_ERROR_UNKNOWN_ENCODING);
1859 MYCONST(XML_ERROR_INCORRECT_ENCODING);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001860 MYCONST(XML_ERROR_UNCLOSED_CDATA_SECTION);
1861 MYCONST(XML_ERROR_EXTERNAL_ENTITY_HANDLING);
1862 MYCONST(XML_ERROR_NOT_STANDALONE);
Fred Drake283b6702004-08-04 22:28:16 +00001863 MYCONST(XML_ERROR_UNEXPECTED_STATE);
1864 MYCONST(XML_ERROR_ENTITY_DECLARED_IN_PE);
1865 MYCONST(XML_ERROR_FEATURE_REQUIRES_XML_DTD);
1866 MYCONST(XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING);
1867 /* Added in Expat 1.95.7. */
1868 MYCONST(XML_ERROR_UNBOUND_PREFIX);
1869 /* Added in Expat 1.95.8. */
1870 MYCONST(XML_ERROR_UNDECLARING_PREFIX);
1871 MYCONST(XML_ERROR_INCOMPLETE_PE);
1872 MYCONST(XML_ERROR_XML_DECL);
1873 MYCONST(XML_ERROR_TEXT_DECL);
1874 MYCONST(XML_ERROR_PUBLICID);
1875 MYCONST(XML_ERROR_SUSPENDED);
1876 MYCONST(XML_ERROR_NOT_SUSPENDED);
1877 MYCONST(XML_ERROR_ABORTED);
1878 MYCONST(XML_ERROR_FINISHED);
1879 MYCONST(XML_ERROR_SUSPEND_PE);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001880
Georg Brandlb4dac712010-10-15 14:46:48 +00001881 if (PyModule_AddStringConstant(errors_module, "__doc__",
1882 "Constants used to describe "
1883 "error conditions.") < 0)
1884 return NULL;
Fred Drake85d835f2001-02-08 15:39:08 +00001885
Georg Brandlb4dac712010-10-15 14:46:48 +00001886 if (PyModule_AddObject(errors_module, "codes", codes_dict) < 0)
1887 return NULL;
1888 if (PyModule_AddObject(errors_module, "messages", rev_codes_dict) < 0)
1889 return NULL;
Victor Stinner0fcab4a2011-01-04 12:59:15 +00001890
Fred Drake93adb692000-09-23 04:55:48 +00001891#undef MYCONST
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001892
Fred Drake85d835f2001-02-08 15:39:08 +00001893#define MYCONST(c) PyModule_AddIntConstant(m, #c, c)
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001894 MYCONST(XML_PARAM_ENTITY_PARSING_NEVER);
1895 MYCONST(XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE);
1896 MYCONST(XML_PARAM_ENTITY_PARSING_ALWAYS);
Fred Drake85d835f2001-02-08 15:39:08 +00001897#undef MYCONST
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001898
Fred Drake85d835f2001-02-08 15:39:08 +00001899#define MYCONST(c) PyModule_AddIntConstant(model_module, #c, c)
1900 PyModule_AddStringConstant(model_module, "__doc__",
1901 "Constants used to interpret content model information.");
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001902
Fred Drake85d835f2001-02-08 15:39:08 +00001903 MYCONST(XML_CTYPE_EMPTY);
1904 MYCONST(XML_CTYPE_ANY);
1905 MYCONST(XML_CTYPE_MIXED);
1906 MYCONST(XML_CTYPE_NAME);
1907 MYCONST(XML_CTYPE_CHOICE);
1908 MYCONST(XML_CTYPE_SEQ);
1909
1910 MYCONST(XML_CQUANT_NONE);
1911 MYCONST(XML_CQUANT_OPT);
1912 MYCONST(XML_CQUANT_REP);
1913 MYCONST(XML_CQUANT_PLUS);
1914#undef MYCONST
Fredrik Lundhc3345042005-12-13 19:49:55 +00001915
1916 /* initialize pyexpat dispatch table */
Fredrik Lundhd7a42882005-12-13 20:43:04 +00001917 capi.size = sizeof(capi);
Fredrik Lundhcc117db2005-12-13 21:55:36 +00001918 capi.magic = PyExpat_CAPI_MAGIC;
Fredrik Lundhd7a42882005-12-13 20:43:04 +00001919 capi.MAJOR_VERSION = XML_MAJOR_VERSION;
1920 capi.MINOR_VERSION = XML_MINOR_VERSION;
1921 capi.MICRO_VERSION = XML_MICRO_VERSION;
1922 capi.ErrorString = XML_ErrorString;
Fredrik Lundhcc117db2005-12-13 21:55:36 +00001923 capi.GetErrorCode = XML_GetErrorCode;
1924 capi.GetErrorColumnNumber = XML_GetErrorColumnNumber;
1925 capi.GetErrorLineNumber = XML_GetErrorLineNumber;
Fredrik Lundhd7a42882005-12-13 20:43:04 +00001926 capi.Parse = XML_Parse;
1927 capi.ParserCreate_MM = XML_ParserCreate_MM;
1928 capi.ParserFree = XML_ParserFree;
1929 capi.SetCharacterDataHandler = XML_SetCharacterDataHandler;
1930 capi.SetCommentHandler = XML_SetCommentHandler;
1931 capi.SetDefaultHandlerExpand = XML_SetDefaultHandlerExpand;
1932 capi.SetElementHandler = XML_SetElementHandler;
1933 capi.SetNamespaceDeclHandler = XML_SetNamespaceDeclHandler;
1934 capi.SetProcessingInstructionHandler = XML_SetProcessingInstructionHandler;
1935 capi.SetUnknownEncodingHandler = XML_SetUnknownEncodingHandler;
1936 capi.SetUserData = XML_SetUserData;
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03001937 capi.SetStartDoctypeDeclHandler = XML_SetStartDoctypeDeclHandler;
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03001938 capi.SetEncoding = XML_SetEncoding;
Eli Bendersky6dc32b32013-05-25 05:25:48 -07001939 capi.DefaultUnknownEncodingHandler = PyUnknownEncodingHandler;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001940
Benjamin Petersonb173f782009-05-05 22:31:58 +00001941 /* export using capsule */
1942 capi_object = PyCapsule_New(&capi, PyExpat_CAPSULE_NAME, NULL);
Fredrik Lundhd7a42882005-12-13 20:43:04 +00001943 if (capi_object)
1944 PyModule_AddObject(m, "expat_CAPI", capi_object);
Martin v. Löwis1a214512008-06-11 05:26:20 +00001945 return m;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001946}
1947
Fred Drake6f987622000-08-25 18:03:30 +00001948static void
Martin v. Löwis5b68ce32001-10-21 08:53:52 +00001949clear_handlers(xmlparseobject *self, int initial)
Fred Drake0582df92000-07-12 04:49:00 +00001950{
Fred Drakecde79132001-04-25 16:01:30 +00001951 int i = 0;
1952 PyObject *temp;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001953
Fred Drake71b63ff2002-06-28 22:29:01 +00001954 for (; handler_info[i].name != NULL; i++) {
Martin v. Löwis5b68ce32001-10-21 08:53:52 +00001955 if (initial)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001956 self->handlers[i] = NULL;
1957 else {
Fred Drakecde79132001-04-25 16:01:30 +00001958 temp = self->handlers[i];
1959 self->handlers[i] = NULL;
1960 Py_XDECREF(temp);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001961 handler_info[i].setter(self->itself, NULL);
Fred Drakecde79132001-04-25 16:01:30 +00001962 }
Fred Drakecde79132001-04-25 16:01:30 +00001963 }
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001964}
1965
Tim Peters0c322792002-07-17 16:49:03 +00001966static struct HandlerInfo handler_info[] = {
Fred Drake71b63ff2002-06-28 22:29:01 +00001967 {"StartElementHandler",
1968 (xmlhandlersetter)XML_SetStartElementHandler,
Fred Drake0582df92000-07-12 04:49:00 +00001969 (xmlhandler)my_StartElementHandler},
Fred Drake71b63ff2002-06-28 22:29:01 +00001970 {"EndElementHandler",
1971 (xmlhandlersetter)XML_SetEndElementHandler,
Fred Drake0582df92000-07-12 04:49:00 +00001972 (xmlhandler)my_EndElementHandler},
Fred Drake71b63ff2002-06-28 22:29:01 +00001973 {"ProcessingInstructionHandler",
Fred Drake0582df92000-07-12 04:49:00 +00001974 (xmlhandlersetter)XML_SetProcessingInstructionHandler,
1975 (xmlhandler)my_ProcessingInstructionHandler},
Fred Drake71b63ff2002-06-28 22:29:01 +00001976 {"CharacterDataHandler",
Fred Drake0582df92000-07-12 04:49:00 +00001977 (xmlhandlersetter)XML_SetCharacterDataHandler,
1978 (xmlhandler)my_CharacterDataHandler},
Fred Drake71b63ff2002-06-28 22:29:01 +00001979 {"UnparsedEntityDeclHandler",
Fred Drake0582df92000-07-12 04:49:00 +00001980 (xmlhandlersetter)XML_SetUnparsedEntityDeclHandler,
Fred Drake2a3d7db2002-06-28 22:56:48 +00001981 (xmlhandler)my_UnparsedEntityDeclHandler},
Fred Drake71b63ff2002-06-28 22:29:01 +00001982 {"NotationDeclHandler",
Fred Drake0582df92000-07-12 04:49:00 +00001983 (xmlhandlersetter)XML_SetNotationDeclHandler,
Fred Drake2a3d7db2002-06-28 22:56:48 +00001984 (xmlhandler)my_NotationDeclHandler},
Fred Drake71b63ff2002-06-28 22:29:01 +00001985 {"StartNamespaceDeclHandler",
1986 (xmlhandlersetter)XML_SetStartNamespaceDeclHandler,
Fred Drake2a3d7db2002-06-28 22:56:48 +00001987 (xmlhandler)my_StartNamespaceDeclHandler},
Fred Drake71b63ff2002-06-28 22:29:01 +00001988 {"EndNamespaceDeclHandler",
1989 (xmlhandlersetter)XML_SetEndNamespaceDeclHandler,
Fred Drake2a3d7db2002-06-28 22:56:48 +00001990 (xmlhandler)my_EndNamespaceDeclHandler},
Fred Drake0582df92000-07-12 04:49:00 +00001991 {"CommentHandler",
1992 (xmlhandlersetter)XML_SetCommentHandler,
1993 (xmlhandler)my_CommentHandler},
1994 {"StartCdataSectionHandler",
Fred Drake71b63ff2002-06-28 22:29:01 +00001995 (xmlhandlersetter)XML_SetStartCdataSectionHandler,
Fred Drake0582df92000-07-12 04:49:00 +00001996 (xmlhandler)my_StartCdataSectionHandler},
1997 {"EndCdataSectionHandler",
Fred Drake71b63ff2002-06-28 22:29:01 +00001998 (xmlhandlersetter)XML_SetEndCdataSectionHandler,
Fred Drake0582df92000-07-12 04:49:00 +00001999 (xmlhandler)my_EndCdataSectionHandler},
2000 {"DefaultHandler",
2001 (xmlhandlersetter)XML_SetDefaultHandler,
2002 (xmlhandler)my_DefaultHandler},
2003 {"DefaultHandlerExpand",
2004 (xmlhandlersetter)XML_SetDefaultHandlerExpand,
2005 (xmlhandler)my_DefaultHandlerExpandHandler},
2006 {"NotStandaloneHandler",
2007 (xmlhandlersetter)XML_SetNotStandaloneHandler,
2008 (xmlhandler)my_NotStandaloneHandler},
2009 {"ExternalEntityRefHandler",
2010 (xmlhandlersetter)XML_SetExternalEntityRefHandler,
Fred Drake2a3d7db2002-06-28 22:56:48 +00002011 (xmlhandler)my_ExternalEntityRefHandler},
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00002012 {"StartDoctypeDeclHandler",
Fred Drake71b63ff2002-06-28 22:29:01 +00002013 (xmlhandlersetter)XML_SetStartDoctypeDeclHandler,
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00002014 (xmlhandler)my_StartDoctypeDeclHandler},
2015 {"EndDoctypeDeclHandler",
Fred Drake71b63ff2002-06-28 22:29:01 +00002016 (xmlhandlersetter)XML_SetEndDoctypeDeclHandler,
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00002017 (xmlhandler)my_EndDoctypeDeclHandler},
Fred Drake85d835f2001-02-08 15:39:08 +00002018 {"EntityDeclHandler",
2019 (xmlhandlersetter)XML_SetEntityDeclHandler,
2020 (xmlhandler)my_EntityDeclHandler},
2021 {"XmlDeclHandler",
2022 (xmlhandlersetter)XML_SetXmlDeclHandler,
2023 (xmlhandler)my_XmlDeclHandler},
2024 {"ElementDeclHandler",
2025 (xmlhandlersetter)XML_SetElementDeclHandler,
2026 (xmlhandler)my_ElementDeclHandler},
2027 {"AttlistDeclHandler",
2028 (xmlhandlersetter)XML_SetAttlistDeclHandler,
2029 (xmlhandler)my_AttlistDeclHandler},
Martin v. Löwisc847f402003-01-21 11:09:21 +00002030#if XML_COMBINED_VERSION >= 19504
Martin v. Löwis069dde22003-01-21 10:58:18 +00002031 {"SkippedEntityHandler",
2032 (xmlhandlersetter)XML_SetSkippedEntityHandler,
2033 (xmlhandler)my_SkippedEntityHandler},
Martin v. Löwisc847f402003-01-21 11:09:21 +00002034#endif
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00002035
Fred Drake0582df92000-07-12 04:49:00 +00002036 {NULL, NULL, NULL} /* sentinel */
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00002037};