blob: 82d9d6a9160d17a1833bdeb9c12d4245432a6457 [file] [log] [blame]
Martin v. Löwis7090ed12001-09-19 10:37:50 +00001#include "Python.h"
Fred Drake4113b132001-03-24 19:58:26 +00002#include <ctype.h>
3
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00004#include "frameobject.h"
Fred Drakea77254a2000-09-29 19:23:29 +00005#include "expat.h"
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00006
Fredrik Lundhc3345042005-12-13 19:49:55 +00007#include "pyexpat.h"
8
Martin v. Löwisc847f402003-01-21 11:09:21 +00009#define XML_COMBINED_VERSION (10000*XML_MAJOR_VERSION+100*XML_MINOR_VERSION+XML_MICRO_VERSION)
10
Jeremy Hylton9263f572003-06-27 16:13:17 +000011#define FIX_TRACE
Martin v. Löwis339d0f72001-08-17 18:39:25 +000012
Christian Heimesfa535f52013-07-07 17:35:11 +020013static XML_Memory_Handling_Suite ExpatMemoryHandler = {
14 PyObject_Malloc, PyObject_Realloc, PyObject_Free};
15
Fred Drake0582df92000-07-12 04:49:00 +000016enum HandlerTypes {
17 StartElement,
18 EndElement,
19 ProcessingInstruction,
20 CharacterData,
21 UnparsedEntityDecl,
22 NotationDecl,
23 StartNamespaceDecl,
24 EndNamespaceDecl,
25 Comment,
26 StartCdataSection,
27 EndCdataSection,
28 Default,
29 DefaultHandlerExpand,
30 NotStandalone,
Martin v. Löwis0078f6c2001-01-21 10:18:10 +000031 ExternalEntityRef,
32 StartDoctypeDecl,
33 EndDoctypeDecl,
Fred Drake85d835f2001-02-08 15:39:08 +000034 EntityDecl,
35 XmlDecl,
36 ElementDecl,
37 AttlistDecl,
Martin v. Löwisc847f402003-01-21 11:09:21 +000038#if XML_COMBINED_VERSION >= 19504
Martin v. Löwis069dde22003-01-21 10:58:18 +000039 SkippedEntity,
Martin v. Löwisc847f402003-01-21 11:09:21 +000040#endif
Fred Drake85d835f2001-02-08 15:39:08 +000041 _DummyDecl
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000042};
43
44static PyObject *ErrorObject;
45
46/* ----------------------------------------------------- */
47
48/* Declarations for objects of type xmlparser */
49
50typedef struct {
Fred Drake0582df92000-07-12 04:49:00 +000051 PyObject_HEAD
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000052
Fred Drake0582df92000-07-12 04:49:00 +000053 XML_Parser itself;
Fred Drake85d835f2001-02-08 15:39:08 +000054 int ordered_attributes; /* Return attributes as a list. */
55 int specified_attributes; /* Report only specified attributes. */
Fred Drakebd6101c2001-02-14 18:29:45 +000056 int in_callback; /* Is a callback active? */
Martin v. Löwis069dde22003-01-21 10:58:18 +000057 int ns_prefixes; /* Namespace-triplets mode? */
Fred Drake2a3d7db2002-06-28 22:56:48 +000058 XML_Char *buffer; /* Buffer used when accumulating characters */
59 /* NULL if not enabled */
60 int buffer_size; /* Size of buffer, in XML_Char units */
61 int buffer_used; /* Buffer units in use */
Fred Drakeb91a36b2002-06-27 19:40:48 +000062 PyObject *intern; /* Dictionary to intern strings */
Fred Drake0582df92000-07-12 04:49:00 +000063 PyObject **handlers;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000064} xmlparseobject;
65
Fred Drake2a3d7db2002-06-28 22:56:48 +000066#define CHARACTER_DATA_BUFFER_SIZE 8192
67
Jeremy Hylton938ace62002-07-17 16:30:39 +000068static PyTypeObject Xmlparsetype;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000069
Fred Drake117ac852002-09-24 16:24:54 +000070typedef void (*xmlhandlersetter)(XML_Parser self, void *meth);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000071typedef void* xmlhandler;
72
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +000073struct HandlerInfo {
Fred Drake0582df92000-07-12 04:49:00 +000074 const char *name;
75 xmlhandlersetter setter;
76 xmlhandler handler;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +000077 PyCodeObject *tb_code;
Fred Drake71b63ff2002-06-28 22:29:01 +000078 PyObject *nameobj;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000079};
80
Jeremy Hylton938ace62002-07-17 16:30:39 +000081static struct HandlerInfo handler_info[64];
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000082
Fred Drakebd6101c2001-02-14 18:29:45 +000083/* Set an integer attribute on the error object; return true on success,
84 * false on an exception.
85 */
86static int
87set_error_attr(PyObject *err, char *name, int value)
88{
Christian Heimes217cfd12007-12-02 14:31:20 +000089 PyObject *v = PyLong_FromLong(value);
Fred Drake85d835f2001-02-08 15:39:08 +000090
Neal Norwitz2f5e9902006-03-08 06:36:45 +000091 if (v == NULL || PyObject_SetAttrString(err, name, v) == -1) {
92 Py_XDECREF(v);
Fred Drakebd6101c2001-02-14 18:29:45 +000093 return 0;
94 }
Michael W. Hudson0bb84542004-08-03 11:31:31 +000095 Py_DECREF(v);
Fred Drakebd6101c2001-02-14 18:29:45 +000096 return 1;
97}
98
99/* Build and set an Expat exception, including positioning
100 * information. Always returns NULL.
101 */
Fred Drake85d835f2001-02-08 15:39:08 +0000102static PyObject *
Martin v. Löwis069dde22003-01-21 10:58:18 +0000103set_error(xmlparseobject *self, enum XML_Error code)
Fred Drake85d835f2001-02-08 15:39:08 +0000104{
105 PyObject *err;
Victor Stinner499dfcf2011-03-21 13:26:24 +0100106 PyObject *buffer;
Fred Drake85d835f2001-02-08 15:39:08 +0000107 XML_Parser parser = self->itself;
Fred Drakebd6101c2001-02-14 18:29:45 +0000108 int lineno = XML_GetErrorLineNumber(parser);
109 int column = XML_GetErrorColumnNumber(parser);
Fred Drake85d835f2001-02-08 15:39:08 +0000110
Victor Stinner499dfcf2011-03-21 13:26:24 +0100111 buffer = PyUnicode_FromFormat("%s: line %i, column %i",
112 XML_ErrorString(code), lineno, column);
113 if (buffer == NULL)
114 return NULL;
115 err = PyObject_CallFunction(ErrorObject, "O", buffer);
116 Py_DECREF(buffer);
Fred Drakebd6101c2001-02-14 18:29:45 +0000117 if ( err != NULL
118 && set_error_attr(err, "code", code)
119 && set_error_attr(err, "offset", column)
120 && set_error_attr(err, "lineno", lineno)) {
121 PyErr_SetObject(ErrorObject, err);
Fred Drake85d835f2001-02-08 15:39:08 +0000122 }
Neal Norwitz2f5e9902006-03-08 06:36:45 +0000123 Py_XDECREF(err);
Fred Drake85d835f2001-02-08 15:39:08 +0000124 return NULL;
125}
126
Fred Drake71b63ff2002-06-28 22:29:01 +0000127static int
128have_handler(xmlparseobject *self, int type)
129{
130 PyObject *handler = self->handlers[type];
131 return handler != NULL;
132}
133
134static PyObject *
135get_handler_name(struct HandlerInfo *hinfo)
136{
137 PyObject *name = hinfo->nameobj;
138 if (name == NULL) {
Neal Norwitz392c5be2007-08-25 17:20:32 +0000139 name = PyUnicode_FromString(hinfo->name);
Fred Drake71b63ff2002-06-28 22:29:01 +0000140 hinfo->nameobj = name;
141 }
142 Py_XINCREF(name);
143 return name;
144}
145
Fred Drake85d835f2001-02-08 15:39:08 +0000146
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000147/* Convert a string of XML_Chars into a Unicode string.
148 Returns None if str is a null pointer. */
149
Fred Drake0582df92000-07-12 04:49:00 +0000150static PyObject *
Fred Drakeb91a36b2002-06-27 19:40:48 +0000151conv_string_to_unicode(const XML_Char *str)
Fred Drake0582df92000-07-12 04:49:00 +0000152{
Fred Drake71b63ff2002-06-28 22:29:01 +0000153 /* XXX currently this code assumes that XML_Char is 8-bit,
Fred Drake0582df92000-07-12 04:49:00 +0000154 and hence in UTF-8. */
155 /* UTF-8 from Expat, Unicode desired */
156 if (str == NULL) {
157 Py_INCREF(Py_None);
158 return Py_None;
159 }
Fred Drake71b63ff2002-06-28 22:29:01 +0000160 return PyUnicode_DecodeUTF8(str, strlen(str), "strict");
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000161}
162
Fred Drake0582df92000-07-12 04:49:00 +0000163static PyObject *
164conv_string_len_to_unicode(const XML_Char *str, int len)
165{
Fred Drake71b63ff2002-06-28 22:29:01 +0000166 /* XXX currently this code assumes that XML_Char is 8-bit,
Fred Drake0582df92000-07-12 04:49:00 +0000167 and hence in UTF-8. */
168 /* UTF-8 from Expat, Unicode desired */
169 if (str == NULL) {
170 Py_INCREF(Py_None);
171 return Py_None;
172 }
Fred Drake6f987622000-08-25 18:03:30 +0000173 return PyUnicode_DecodeUTF8((const char *)str, len, "strict");
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000174}
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000175
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000176/* Callback routines */
177
Martin v. Löwis5b68ce32001-10-21 08:53:52 +0000178static void clear_handlers(xmlparseobject *self, int initial);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000179
Martin v. Löwis069dde22003-01-21 10:58:18 +0000180/* This handler is used when an error has been detected, in the hope
181 that actual parsing can be terminated early. This will only help
182 if an external entity reference is encountered. */
183static int
184error_external_entity_ref_handler(XML_Parser parser,
185 const XML_Char *context,
186 const XML_Char *base,
187 const XML_Char *systemId,
188 const XML_Char *publicId)
189{
190 return 0;
191}
192
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000193/* Dummy character data handler used when an error (exception) has
194 been detected, and the actual parsing can be terminated early.
195 This is needed since character data handler can't be safely removed
196 from within the character data handler, but can be replaced. It is
197 used only from the character data handler trampoline, and must be
198 used right after `flag_error()` is called. */
199static void
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000200noop_character_data_handler(void *userData, const XML_Char *data, int len)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000201{
202 /* Do nothing. */
203}
204
Fred Drake6f987622000-08-25 18:03:30 +0000205static void
206flag_error(xmlparseobject *self)
207{
Martin v. Löwis5b68ce32001-10-21 08:53:52 +0000208 clear_handlers(self, 0);
Martin v. Löwis069dde22003-01-21 10:58:18 +0000209 XML_SetExternalEntityRefHandler(self->itself,
210 error_external_entity_ref_handler);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000211}
212
213static PyCodeObject*
214getcode(enum HandlerTypes slot, char* func_name, int lineno)
215{
Fred Drakebd6101c2001-02-14 18:29:45 +0000216 if (handler_info[slot].tb_code == NULL) {
Fred Drakebd6101c2001-02-14 18:29:45 +0000217 handler_info[slot].tb_code =
Alexandre Vassalotti7b82b402009-07-21 04:30:03 +0000218 PyCode_NewEmpty(__FILE__, func_name, lineno);
Fred Drakebd6101c2001-02-14 18:29:45 +0000219 }
220 return handler_info[slot].tb_code;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000221}
222
Jeremy Hylton9263f572003-06-27 16:13:17 +0000223#ifdef FIX_TRACE
Martin v. Löwis7d6e19d2002-08-04 08:24:49 +0000224static int
225trace_frame(PyThreadState *tstate, PyFrameObject *f, int code, PyObject *val)
226{
227 int result = 0;
228 if (!tstate->use_tracing || tstate->tracing)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000229 return 0;
Martin v. Löwis7d6e19d2002-08-04 08:24:49 +0000230 if (tstate->c_profilefunc != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000231 tstate->tracing++;
232 result = tstate->c_profilefunc(tstate->c_profileobj,
233 f, code , val);
234 tstate->use_tracing = ((tstate->c_tracefunc != NULL)
235 || (tstate->c_profilefunc != NULL));
236 tstate->tracing--;
237 if (result)
238 return result;
Martin v. Löwis7d6e19d2002-08-04 08:24:49 +0000239 }
240 if (tstate->c_tracefunc != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000241 tstate->tracing++;
242 result = tstate->c_tracefunc(tstate->c_traceobj,
243 f, code , val);
244 tstate->use_tracing = ((tstate->c_tracefunc != NULL)
245 || (tstate->c_profilefunc != NULL));
246 tstate->tracing--;
247 }
Martin v. Löwis7d6e19d2002-08-04 08:24:49 +0000248 return result;
249}
Jeremy Hylton9263f572003-06-27 16:13:17 +0000250
251static int
252trace_frame_exc(PyThreadState *tstate, PyFrameObject *f)
253{
254 PyObject *type, *value, *traceback, *arg;
255 int err;
256
257 if (tstate->c_tracefunc == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000258 return 0;
Jeremy Hylton9263f572003-06-27 16:13:17 +0000259
260 PyErr_Fetch(&type, &value, &traceback);
261 if (value == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000262 value = Py_None;
263 Py_INCREF(value);
Jeremy Hylton9263f572003-06-27 16:13:17 +0000264 }
Raymond Hettinger8ae46892003-10-12 19:09:37 +0000265 arg = PyTuple_Pack(3, type, value, traceback);
Jeremy Hylton9263f572003-06-27 16:13:17 +0000266 if (arg == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000267 PyErr_Restore(type, value, traceback);
268 return 0;
Jeremy Hylton9263f572003-06-27 16:13:17 +0000269 }
270 err = trace_frame(tstate, f, PyTrace_EXCEPTION, arg);
271 Py_DECREF(arg);
272 if (err == 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000273 PyErr_Restore(type, value, traceback);
Jeremy Hylton9263f572003-06-27 16:13:17 +0000274 else {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000275 Py_XDECREF(type);
276 Py_XDECREF(value);
277 Py_XDECREF(traceback);
Jeremy Hylton9263f572003-06-27 16:13:17 +0000278 }
279 return err;
280}
Martin v. Löwis069dde22003-01-21 10:58:18 +0000281#endif
Martin v. Löwis7d6e19d2002-08-04 08:24:49 +0000282
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000283static PyObject*
Fred Drake39689c52004-08-13 03:12:57 +0000284call_with_frame(PyCodeObject *c, PyObject* func, PyObject* args,
285 xmlparseobject *self)
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000286{
Fred Drakebd6101c2001-02-14 18:29:45 +0000287 PyThreadState *tstate = PyThreadState_GET();
288 PyFrameObject *f;
289 PyObject *res;
290
291 if (c == NULL)
292 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000293
Jeremy Hylton9263f572003-06-27 16:13:17 +0000294 f = PyFrame_New(tstate, c, PyEval_GetGlobals(), NULL);
Fred Drakebd6101c2001-02-14 18:29:45 +0000295 if (f == NULL)
296 return NULL;
297 tstate->frame = f;
Jeremy Hylton9263f572003-06-27 16:13:17 +0000298#ifdef FIX_TRACE
299 if (trace_frame(tstate, f, PyTrace_CALL, Py_None) < 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000300 return NULL;
Martin v. Löwis7d6e19d2002-08-04 08:24:49 +0000301 }
Martin v. Löwis069dde22003-01-21 10:58:18 +0000302#endif
Fred Drakebd6101c2001-02-14 18:29:45 +0000303 res = PyEval_CallObject(func, args);
Jeremy Hylton9263f572003-06-27 16:13:17 +0000304 if (res == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000305 if (tstate->curexc_traceback == NULL)
306 PyTraceBack_Here(f);
Fred Drake39689c52004-08-13 03:12:57 +0000307 XML_StopParser(self->itself, XML_FALSE);
Jeremy Hylton9263f572003-06-27 16:13:17 +0000308#ifdef FIX_TRACE
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000309 if (trace_frame_exc(tstate, f) < 0) {
310 return NULL;
311 }
Jeremy Hylton9263f572003-06-27 16:13:17 +0000312 }
Martin v. Löwis7d6e19d2002-08-04 08:24:49 +0000313 else {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000314 if (trace_frame(tstate, f, PyTrace_RETURN, res) < 0) {
315 Py_XDECREF(res);
316 res = NULL;
317 }
Martin v. Löwis7d6e19d2002-08-04 08:24:49 +0000318 }
Jeremy Hylton9263f572003-06-27 16:13:17 +0000319#else
320 }
Martin v. Löwis069dde22003-01-21 10:58:18 +0000321#endif
Fred Drakebd6101c2001-02-14 18:29:45 +0000322 tstate->frame = f->f_back;
323 Py_DECREF(f);
324 return res;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000325}
326
Fred Drakeb91a36b2002-06-27 19:40:48 +0000327static PyObject*
328string_intern(xmlparseobject *self, const char* str)
329{
Guido van Rossum4ca94712007-07-23 17:42:32 +0000330 PyObject *result = conv_string_to_unicode(str);
Fred Drakeb91a36b2002-06-27 19:40:48 +0000331 PyObject *value;
Neal Norwitz484d9a42005-09-30 04:46:49 +0000332 /* result can be NULL if the unicode conversion failed. */
333 if (!result)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000334 return result;
Fred Drakeb91a36b2002-06-27 19:40:48 +0000335 if (!self->intern)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000336 return result;
Fred Drakeb91a36b2002-06-27 19:40:48 +0000337 value = PyDict_GetItem(self->intern, result);
338 if (!value) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000339 if (PyDict_SetItem(self->intern, result, result) == 0)
Fred Drakeb91a36b2002-06-27 19:40:48 +0000340 return result;
341 else
342 return NULL;
343 }
344 Py_INCREF(value);
345 Py_DECREF(result);
346 return value;
347}
348
Fred Drake2a3d7db2002-06-28 22:56:48 +0000349/* Return 0 on success, -1 on exception.
350 * flag_error() will be called before return if needed.
351 */
352static int
353call_character_handler(xmlparseobject *self, const XML_Char *buffer, int len)
354{
355 PyObject *args;
356 PyObject *temp;
357
Georg Brandlc01537f2010-10-15 16:26:08 +0000358 if (!have_handler(self, CharacterData))
359 return -1;
360
Fred Drake2a3d7db2002-06-28 22:56:48 +0000361 args = PyTuple_New(1);
362 if (args == NULL)
363 return -1;
Guido van Rossum4ca94712007-07-23 17:42:32 +0000364 temp = (conv_string_len_to_unicode(buffer, len));
Fred Drake2a3d7db2002-06-28 22:56:48 +0000365 if (temp == NULL) {
366 Py_DECREF(args);
367 flag_error(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000368 XML_SetCharacterDataHandler(self->itself,
369 noop_character_data_handler);
Fred Drake2a3d7db2002-06-28 22:56:48 +0000370 return -1;
371 }
372 PyTuple_SET_ITEM(args, 0, temp);
373 /* temp is now a borrowed reference; consider it unused. */
374 self->in_callback = 1;
375 temp = call_with_frame(getcode(CharacterData, "CharacterData", __LINE__),
Fred Drake39689c52004-08-13 03:12:57 +0000376 self->handlers[CharacterData], args, self);
Fred Drake2a3d7db2002-06-28 22:56:48 +0000377 /* temp is an owned reference again, or NULL */
378 self->in_callback = 0;
379 Py_DECREF(args);
380 if (temp == NULL) {
381 flag_error(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000382 XML_SetCharacterDataHandler(self->itself,
383 noop_character_data_handler);
Fred Drake2a3d7db2002-06-28 22:56:48 +0000384 return -1;
385 }
386 Py_DECREF(temp);
387 return 0;
388}
389
390static int
391flush_character_buffer(xmlparseobject *self)
392{
393 int rc;
394 if (self->buffer == NULL || self->buffer_used == 0)
395 return 0;
396 rc = call_character_handler(self, self->buffer, self->buffer_used);
397 self->buffer_used = 0;
398 return rc;
399}
400
401static void
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000402my_CharacterDataHandler(void *userData, const XML_Char *data, int len)
Fred Drake2a3d7db2002-06-28 22:56:48 +0000403{
404 xmlparseobject *self = (xmlparseobject *) userData;
Victor Stinner9e09c262013-07-18 23:17:01 +0200405
406 if (PyErr_Occurred())
407 return;
408
Fred Drake2a3d7db2002-06-28 22:56:48 +0000409 if (self->buffer == NULL)
410 call_character_handler(self, data, len);
411 else {
412 if ((self->buffer_used + len) > self->buffer_size) {
413 if (flush_character_buffer(self) < 0)
414 return;
415 /* handler might have changed; drop the rest on the floor
416 * if there isn't a handler anymore
417 */
418 if (!have_handler(self, CharacterData))
419 return;
420 }
421 if (len > self->buffer_size) {
422 call_character_handler(self, data, len);
423 self->buffer_used = 0;
424 }
425 else {
426 memcpy(self->buffer + self->buffer_used,
427 data, len * sizeof(XML_Char));
428 self->buffer_used += len;
429 }
430 }
431}
432
Fred Drake85d835f2001-02-08 15:39:08 +0000433static void
434my_StartElementHandler(void *userData,
Fred Drake71b63ff2002-06-28 22:29:01 +0000435 const XML_Char *name, const XML_Char *atts[])
Fred Drake85d835f2001-02-08 15:39:08 +0000436{
437 xmlparseobject *self = (xmlparseobject *)userData;
438
Fred Drake71b63ff2002-06-28 22:29:01 +0000439 if (have_handler(self, StartElement)) {
Fred Drake85d835f2001-02-08 15:39:08 +0000440 PyObject *container, *rv, *args;
441 int i, max;
442
Victor Stinner9e09c262013-07-18 23:17:01 +0200443 if (PyErr_Occurred())
444 return;
445
Fred Drake2a3d7db2002-06-28 22:56:48 +0000446 if (flush_character_buffer(self) < 0)
447 return;
Fred Drake85d835f2001-02-08 15:39:08 +0000448 /* Set max to the number of slots filled in atts[]; max/2 is
449 * the number of attributes we need to process.
450 */
451 if (self->specified_attributes) {
452 max = XML_GetSpecifiedAttributeCount(self->itself);
453 }
454 else {
455 max = 0;
456 while (atts[max] != NULL)
457 max += 2;
458 }
459 /* Build the container. */
460 if (self->ordered_attributes)
461 container = PyList_New(max);
462 else
463 container = PyDict_New();
464 if (container == NULL) {
465 flag_error(self);
466 return;
467 }
468 for (i = 0; i < max; i += 2) {
Fred Drakeb91a36b2002-06-27 19:40:48 +0000469 PyObject *n = string_intern(self, (XML_Char *) atts[i]);
Fred Drake85d835f2001-02-08 15:39:08 +0000470 PyObject *v;
471 if (n == NULL) {
472 flag_error(self);
473 Py_DECREF(container);
474 return;
475 }
Guido van Rossum4ca94712007-07-23 17:42:32 +0000476 v = conv_string_to_unicode((XML_Char *) atts[i+1]);
Fred Drake85d835f2001-02-08 15:39:08 +0000477 if (v == NULL) {
478 flag_error(self);
479 Py_DECREF(container);
480 Py_DECREF(n);
481 return;
482 }
483 if (self->ordered_attributes) {
484 PyList_SET_ITEM(container, i, n);
485 PyList_SET_ITEM(container, i+1, v);
486 }
487 else if (PyDict_SetItem(container, n, v)) {
488 flag_error(self);
489 Py_DECREF(n);
490 Py_DECREF(v);
491 return;
492 }
493 else {
494 Py_DECREF(n);
495 Py_DECREF(v);
496 }
497 }
Neal Norwitz484d9a42005-09-30 04:46:49 +0000498 args = string_intern(self, name);
499 if (args != NULL)
500 args = Py_BuildValue("(NN)", args, container);
Fred Drake85d835f2001-02-08 15:39:08 +0000501 if (args == NULL) {
502 Py_DECREF(container);
503 return;
504 }
505 /* Container is now a borrowed reference; ignore it. */
Fred Drakebd6101c2001-02-14 18:29:45 +0000506 self->in_callback = 1;
507 rv = call_with_frame(getcode(StartElement, "StartElement", __LINE__),
Fred Drake39689c52004-08-13 03:12:57 +0000508 self->handlers[StartElement], args, self);
Fred Drakebd6101c2001-02-14 18:29:45 +0000509 self->in_callback = 0;
510 Py_DECREF(args);
Fred Drake85d835f2001-02-08 15:39:08 +0000511 if (rv == NULL) {
512 flag_error(self);
513 return;
Fred Drakebd6101c2001-02-14 18:29:45 +0000514 }
Fred Drake85d835f2001-02-08 15:39:08 +0000515 Py_DECREF(rv);
516 }
517}
518
519#define RC_HANDLER(RC, NAME, PARAMS, INIT, PARAM_FORMAT, CONVERSION, \
520 RETURN, GETUSERDATA) \
521static RC \
522my_##NAME##Handler PARAMS {\
523 xmlparseobject *self = GETUSERDATA ; \
524 PyObject *args = NULL; \
525 PyObject *rv = NULL; \
526 INIT \
527\
Fred Drake71b63ff2002-06-28 22:29:01 +0000528 if (have_handler(self, NAME)) { \
Victor Stinner9e09c262013-07-18 23:17:01 +0200529 if (PyErr_Occurred()) \
530 return RETURN; \
Fred Drake2a3d7db2002-06-28 22:56:48 +0000531 if (flush_character_buffer(self) < 0) \
532 return RETURN; \
Fred Drake85d835f2001-02-08 15:39:08 +0000533 args = Py_BuildValue PARAM_FORMAT ;\
Martin v. Löwis1d7c55f2001-11-10 13:57:55 +0000534 if (!args) { flag_error(self); return RETURN;} \
Fred Drakebd6101c2001-02-14 18:29:45 +0000535 self->in_callback = 1; \
Fred Drake85d835f2001-02-08 15:39:08 +0000536 rv = call_with_frame(getcode(NAME,#NAME,__LINE__), \
Fred Drake39689c52004-08-13 03:12:57 +0000537 self->handlers[NAME], args, self); \
Fred Drakebd6101c2001-02-14 18:29:45 +0000538 self->in_callback = 0; \
Fred Drake85d835f2001-02-08 15:39:08 +0000539 Py_DECREF(args); \
540 if (rv == NULL) { \
541 flag_error(self); \
542 return RETURN; \
543 } \
544 CONVERSION \
545 Py_DECREF(rv); \
546 } \
547 return RETURN; \
548}
549
Fred Drake6f987622000-08-25 18:03:30 +0000550#define VOID_HANDLER(NAME, PARAMS, PARAM_FORMAT) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000551 RC_HANDLER(void, NAME, PARAMS, ;, PARAM_FORMAT, ;, ;,\
552 (xmlparseobject *)userData)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000553
Fred Drake6f987622000-08-25 18:03:30 +0000554#define INT_HANDLER(NAME, PARAMS, PARAM_FORMAT)\
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000555 RC_HANDLER(int, NAME, PARAMS, int rc=0;, PARAM_FORMAT, \
556 rc = PyLong_AsLong(rv);, rc, \
557 (xmlparseobject *)userData)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000558
Fred Drake71b63ff2002-06-28 22:29:01 +0000559VOID_HANDLER(EndElement,
560 (void *userData, const XML_Char *name),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000561 ("(N)", string_intern(self, name)))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000562
Fred Drake6f987622000-08-25 18:03:30 +0000563VOID_HANDLER(ProcessingInstruction,
Fred Drake71b63ff2002-06-28 22:29:01 +0000564 (void *userData,
565 const XML_Char *target,
Fred Drake85d835f2001-02-08 15:39:08 +0000566 const XML_Char *data),
Guido van Rossum4ca94712007-07-23 17:42:32 +0000567 ("(NO&)", string_intern(self, target), conv_string_to_unicode ,data))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000568
Fred Drake6f987622000-08-25 18:03:30 +0000569VOID_HANDLER(UnparsedEntityDecl,
Fred Drake71b63ff2002-06-28 22:29:01 +0000570 (void *userData,
Fred Drake85d835f2001-02-08 15:39:08 +0000571 const XML_Char *entityName,
572 const XML_Char *base,
573 const XML_Char *systemId,
574 const XML_Char *publicId,
575 const XML_Char *notationName),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000576 ("(NNNNN)",
Fred Drake71b63ff2002-06-28 22:29:01 +0000577 string_intern(self, entityName), string_intern(self, base),
578 string_intern(self, systemId), string_intern(self, publicId),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000579 string_intern(self, notationName)))
Fred Drake85d835f2001-02-08 15:39:08 +0000580
Fred Drake85d835f2001-02-08 15:39:08 +0000581VOID_HANDLER(EntityDecl,
582 (void *userData,
583 const XML_Char *entityName,
584 int is_parameter_entity,
585 const XML_Char *value,
586 int value_length,
587 const XML_Char *base,
588 const XML_Char *systemId,
589 const XML_Char *publicId,
590 const XML_Char *notationName),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000591 ("NiNNNNN",
592 string_intern(self, entityName), is_parameter_entity,
Guido van Rossum4ca94712007-07-23 17:42:32 +0000593 (conv_string_len_to_unicode(value, value_length)),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000594 string_intern(self, base), string_intern(self, systemId),
595 string_intern(self, publicId),
596 string_intern(self, notationName)))
Fred Drake85d835f2001-02-08 15:39:08 +0000597
598VOID_HANDLER(XmlDecl,
599 (void *userData,
600 const XML_Char *version,
601 const XML_Char *encoding,
602 int standalone),
603 ("(O&O&i)",
Guido van Rossum4ca94712007-07-23 17:42:32 +0000604 conv_string_to_unicode ,version, conv_string_to_unicode ,encoding,
Fred Drake85d835f2001-02-08 15:39:08 +0000605 standalone))
606
607static PyObject *
608conv_content_model(XML_Content * const model,
Fred Drakeb91a36b2002-06-27 19:40:48 +0000609 PyObject *(*conv_string)(const XML_Char *))
Fred Drake85d835f2001-02-08 15:39:08 +0000610{
611 PyObject *result = NULL;
612 PyObject *children = PyTuple_New(model->numchildren);
613 int i;
614
615 if (children != NULL) {
Tim Peters9544fc52001-07-28 09:36:36 +0000616 assert(model->numchildren < INT_MAX);
617 for (i = 0; i < (int)model->numchildren; ++i) {
Fred Drake85d835f2001-02-08 15:39:08 +0000618 PyObject *child = conv_content_model(&model->children[i],
619 conv_string);
620 if (child == NULL) {
621 Py_XDECREF(children);
622 return NULL;
623 }
624 PyTuple_SET_ITEM(children, i, child);
625 }
626 result = Py_BuildValue("(iiO&N)",
627 model->type, model->quant,
628 conv_string,model->name, children);
629 }
630 return result;
631}
632
Fred Drake06dd8cf2003-02-02 03:54:17 +0000633static void
634my_ElementDeclHandler(void *userData,
635 const XML_Char *name,
636 XML_Content *model)
Fred Drake85d835f2001-02-08 15:39:08 +0000637{
Fred Drake06dd8cf2003-02-02 03:54:17 +0000638 xmlparseobject *self = (xmlparseobject *)userData;
639 PyObject *args = NULL;
Fred Drake85d835f2001-02-08 15:39:08 +0000640
Fred Drake06dd8cf2003-02-02 03:54:17 +0000641 if (have_handler(self, ElementDecl)) {
642 PyObject *rv = NULL;
643 PyObject *modelobj, *nameobj;
644
Victor Stinner9e09c262013-07-18 23:17:01 +0200645 if (PyErr_Occurred())
646 return;
647
Fred Drake06dd8cf2003-02-02 03:54:17 +0000648 if (flush_character_buffer(self) < 0)
649 goto finally;
Guido van Rossum4ca94712007-07-23 17:42:32 +0000650 modelobj = conv_content_model(model, (conv_string_to_unicode));
Fred Drake06dd8cf2003-02-02 03:54:17 +0000651 if (modelobj == NULL) {
652 flag_error(self);
653 goto finally;
654 }
655 nameobj = string_intern(self, name);
656 if (nameobj == NULL) {
657 Py_DECREF(modelobj);
658 flag_error(self);
659 goto finally;
660 }
Michael W. Hudson0bb84542004-08-03 11:31:31 +0000661 args = Py_BuildValue("NN", nameobj, modelobj);
Fred Drake06dd8cf2003-02-02 03:54:17 +0000662 if (args == NULL) {
663 Py_DECREF(modelobj);
664 flag_error(self);
665 goto finally;
666 }
667 self->in_callback = 1;
668 rv = call_with_frame(getcode(ElementDecl, "ElementDecl", __LINE__),
Fred Drake39689c52004-08-13 03:12:57 +0000669 self->handlers[ElementDecl], args, self);
Fred Drake06dd8cf2003-02-02 03:54:17 +0000670 self->in_callback = 0;
671 if (rv == NULL) {
672 flag_error(self);
673 goto finally;
674 }
675 Py_DECREF(rv);
676 }
677 finally:
678 Py_XDECREF(args);
679 XML_FreeContentModel(self->itself, model);
680 return;
681}
Fred Drake85d835f2001-02-08 15:39:08 +0000682
683VOID_HANDLER(AttlistDecl,
684 (void *userData,
685 const XML_Char *elname,
686 const XML_Char *attname,
687 const XML_Char *att_type,
688 const XML_Char *dflt,
689 int isrequired),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000690 ("(NNO&O&i)",
691 string_intern(self, elname), string_intern(self, attname),
Guido van Rossum4ca94712007-07-23 17:42:32 +0000692 conv_string_to_unicode ,att_type, conv_string_to_unicode ,dflt,
Fred Drake85d835f2001-02-08 15:39:08 +0000693 isrequired))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000694
Martin v. Löwisc847f402003-01-21 11:09:21 +0000695#if XML_COMBINED_VERSION >= 19504
Martin v. Löwis069dde22003-01-21 10:58:18 +0000696VOID_HANDLER(SkippedEntity,
697 (void *userData,
698 const XML_Char *entityName,
699 int is_parameter_entity),
700 ("Ni",
701 string_intern(self, entityName), is_parameter_entity))
Martin v. Löwisc847f402003-01-21 11:09:21 +0000702#endif
Martin v. Löwis069dde22003-01-21 10:58:18 +0000703
Fred Drake71b63ff2002-06-28 22:29:01 +0000704VOID_HANDLER(NotationDecl,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000705 (void *userData,
706 const XML_Char *notationName,
707 const XML_Char *base,
708 const XML_Char *systemId,
709 const XML_Char *publicId),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000710 ("(NNNN)",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000711 string_intern(self, notationName), string_intern(self, base),
712 string_intern(self, systemId), string_intern(self, publicId)))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000713
Fred Drake6f987622000-08-25 18:03:30 +0000714VOID_HANDLER(StartNamespaceDecl,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000715 (void *userData,
716 const XML_Char *prefix,
717 const XML_Char *uri),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000718 ("(NN)",
719 string_intern(self, prefix), string_intern(self, uri)))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000720
Fred Drake6f987622000-08-25 18:03:30 +0000721VOID_HANDLER(EndNamespaceDecl,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000722 (void *userData,
723 const XML_Char *prefix),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000724 ("(N)", string_intern(self, prefix)))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000725
Fred Drake6f987622000-08-25 18:03:30 +0000726VOID_HANDLER(Comment,
Fred Drakeb91a36b2002-06-27 19:40:48 +0000727 (void *userData, const XML_Char *data),
Guido van Rossum4ca94712007-07-23 17:42:32 +0000728 ("(O&)", conv_string_to_unicode ,data))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000729
Fred Drake6f987622000-08-25 18:03:30 +0000730VOID_HANDLER(StartCdataSection,
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000731 (void *userData),
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000732 ("()"))
Fred Drake71b63ff2002-06-28 22:29:01 +0000733
Fred Drake6f987622000-08-25 18:03:30 +0000734VOID_HANDLER(EndCdataSection,
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000735 (void *userData),
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000736 ("()"))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000737
Fred Drake6f987622000-08-25 18:03:30 +0000738VOID_HANDLER(Default,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000739 (void *userData, const XML_Char *s, int len),
740 ("(N)", (conv_string_len_to_unicode(s,len))))
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000741
Fred Drake6f987622000-08-25 18:03:30 +0000742VOID_HANDLER(DefaultHandlerExpand,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000743 (void *userData, const XML_Char *s, int len),
744 ("(N)", (conv_string_len_to_unicode(s,len))))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000745
Fred Drake71b63ff2002-06-28 22:29:01 +0000746INT_HANDLER(NotStandalone,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000747 (void *userData),
748 ("()"))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000749
Fred Drake6f987622000-08-25 18:03:30 +0000750RC_HANDLER(int, ExternalEntityRef,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000751 (XML_Parser parser,
752 const XML_Char *context,
753 const XML_Char *base,
754 const XML_Char *systemId,
755 const XML_Char *publicId),
756 int rc=0;,
Fred Drakeb91a36b2002-06-27 19:40:48 +0000757 ("(O&NNN)",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000758 conv_string_to_unicode ,context, string_intern(self, base),
759 string_intern(self, systemId), string_intern(self, publicId)),
760 rc = PyLong_AsLong(rv);, rc,
761 XML_GetUserData(parser))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000762
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000763/* XXX UnknownEncodingHandler */
764
Fred Drake85d835f2001-02-08 15:39:08 +0000765VOID_HANDLER(StartDoctypeDecl,
766 (void *userData, const XML_Char *doctypeName,
767 const XML_Char *sysid, const XML_Char *pubid,
768 int has_internal_subset),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000769 ("(NNNi)", string_intern(self, doctypeName),
770 string_intern(self, sysid), string_intern(self, pubid),
Fred Drake85d835f2001-02-08 15:39:08 +0000771 has_internal_subset))
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000772
773VOID_HANDLER(EndDoctypeDecl, (void *userData), ("()"))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000774
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000775/* ---------------------------------------------------------------- */
776
Fred Drake71b63ff2002-06-28 22:29:01 +0000777static PyObject *
778get_parse_result(xmlparseobject *self, int rv)
779{
780 if (PyErr_Occurred()) {
781 return NULL;
782 }
783 if (rv == 0) {
Martin v. Löwis069dde22003-01-21 10:58:18 +0000784 return set_error(self, XML_GetErrorCode(self->itself));
Fred Drake71b63ff2002-06-28 22:29:01 +0000785 }
Fred Drake2a3d7db2002-06-28 22:56:48 +0000786 if (flush_character_buffer(self) < 0) {
787 return NULL;
788 }
Christian Heimes217cfd12007-12-02 14:31:20 +0000789 return PyLong_FromLong(rv);
Fred Drake71b63ff2002-06-28 22:29:01 +0000790}
791
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000792PyDoc_STRVAR(xmlparse_Parse__doc__,
Thomas Wouters35317302000-07-22 16:34:15 +0000793"Parse(data[, isfinal])\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000794Parse XML data. `isfinal' should be true at end of input.");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000795
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200796#define MAX_CHUNK_SIZE (1 << 20)
797
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000798static PyObject *
Fred Drake0582df92000-07-12 04:49:00 +0000799xmlparse_Parse(xmlparseobject *self, PyObject *args)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000800{
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200801 PyObject *data;
Fred Drake0582df92000-07-12 04:49:00 +0000802 int isFinal = 0;
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200803 const char *s;
804 Py_ssize_t slen;
805 Py_buffer view;
806 int rc;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000807
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200808 if (!PyArg_ParseTuple(args, "O|i:Parse", &data, &isFinal))
Fred Drake0582df92000-07-12 04:49:00 +0000809 return NULL;
Fred Drake71b63ff2002-06-28 22:29:01 +0000810
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200811 if (PyUnicode_Check(data)) {
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200812 view.buf = NULL;
Serhiy Storchaka36b365c2013-02-04 18:28:01 +0200813 s = PyUnicode_AsUTF8AndSize(data, &slen);
814 if (s == NULL)
815 return NULL;
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200816 /* Explicitly set UTF-8 encoding. Return code ignored. */
817 (void)XML_SetEncoding(self->itself, "utf-8");
818 }
819 else {
820 if (PyObject_GetBuffer(data, &view, PyBUF_SIMPLE) < 0)
821 return NULL;
822 s = view.buf;
823 slen = view.len;
824 }
825
826 while (slen > MAX_CHUNK_SIZE) {
827 rc = XML_Parse(self->itself, s, MAX_CHUNK_SIZE, 0);
828 if (!rc)
829 goto done;
830 s += MAX_CHUNK_SIZE;
831 slen -= MAX_CHUNK_SIZE;
832 }
833 rc = XML_Parse(self->itself, s, slen, isFinal);
834
835done:
836 if (view.buf != NULL)
837 PyBuffer_Release(&view);
838 return get_parse_result(self, rc);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000839}
840
Fred Drakeca1f4262000-09-21 20:10:23 +0000841/* File reading copied from cPickle */
842
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000843#define BUF_SIZE 2048
844
Fred Drake0582df92000-07-12 04:49:00 +0000845static int
846readinst(char *buf, int buf_size, PyObject *meth)
847{
Victor Stinner95f1dfc2011-01-10 23:00:36 +0000848 PyObject *str;
849 Py_ssize_t len;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000850 char *ptr;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000851
Victor Stinner95f1dfc2011-01-10 23:00:36 +0000852 str = PyObject_CallFunction(meth, "n", buf_size);
Martin v. Löwis9171f022004-10-13 19:50:11 +0000853 if (str == NULL)
Victor Stinner95f1dfc2011-01-10 23:00:36 +0000854 goto error;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000855
Christian Heimes72b710a2008-05-26 13:28:38 +0000856 if (PyBytes_Check(str))
857 ptr = PyBytes_AS_STRING(str);
Christian Heimes9c4756e2008-05-26 13:22:05 +0000858 else if (PyByteArray_Check(str))
859 ptr = PyByteArray_AS_STRING(str);
Guido van Rossum98297ee2007-11-06 21:34:58 +0000860 else {
Fred Drake71b63ff2002-06-28 22:29:01 +0000861 PyErr_Format(PyExc_TypeError,
Guido van Rossum4ca94712007-07-23 17:42:32 +0000862 "read() did not return a bytes object (type=%.400s)",
Christian Heimes90aa7642007-12-19 02:45:37 +0000863 Py_TYPE(str)->tp_name);
Victor Stinner95f1dfc2011-01-10 23:00:36 +0000864 goto error;
Fred Drake0582df92000-07-12 04:49:00 +0000865 }
Christian Heimes90aa7642007-12-19 02:45:37 +0000866 len = Py_SIZE(str);
Fred Drake0582df92000-07-12 04:49:00 +0000867 if (len > buf_size) {
868 PyErr_Format(PyExc_ValueError,
869 "read() returned too much data: "
Victor Stinner9d6f9362011-01-04 22:00:04 +0000870 "%i bytes requested, %zd returned",
Fred Drake0582df92000-07-12 04:49:00 +0000871 buf_size, len);
Victor Stinner95f1dfc2011-01-10 23:00:36 +0000872 goto error;
Fred Drake0582df92000-07-12 04:49:00 +0000873 }
Guido van Rossum98297ee2007-11-06 21:34:58 +0000874 memcpy(buf, ptr, len);
Victor Stinner95f1dfc2011-01-10 23:00:36 +0000875 Py_DECREF(str);
876 /* len <= buf_size <= INT_MAX */
Victor Stinner0fcab4a2011-01-04 12:59:15 +0000877 return (int)len;
Victor Stinner95f1dfc2011-01-10 23:00:36 +0000878
879error:
880 Py_XDECREF(str);
881 return -1;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000882}
883
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000884PyDoc_STRVAR(xmlparse_ParseFile__doc__,
Thomas Wouters35317302000-07-22 16:34:15 +0000885"ParseFile(file)\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000886Parse XML data from file-like object.");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000887
888static PyObject *
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000889xmlparse_ParseFile(xmlparseobject *self, PyObject *f)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000890{
Fred Drake0582df92000-07-12 04:49:00 +0000891 int rv = 1;
Fred Drake0582df92000-07-12 04:49:00 +0000892 PyObject *readmethod = NULL;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +0200893 _Py_IDENTIFIER(read);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000894
Martin v. Löwis1ee1b6f2011-10-10 18:11:30 +0200895 readmethod = _PyObject_GetAttrId(f, &PyId_read);
Benjamin Peterson4e7f2852010-08-08 16:54:58 +0000896 if (readmethod == NULL) {
Benjamin Peterson4e7f2852010-08-08 16:54:58 +0000897 PyErr_SetString(PyExc_TypeError,
898 "argument must have 'read' attribute");
899 return NULL;
Fred Drake0582df92000-07-12 04:49:00 +0000900 }
901 for (;;) {
902 int bytes_read;
903 void *buf = XML_GetBuffer(self->itself, BUF_SIZE);
Fred Drake7b6caff2003-07-21 17:05:56 +0000904 if (buf == NULL) {
Fred Drakef239c6d2003-07-21 17:22:43 +0000905 Py_XDECREF(readmethod);
Fred Drake0582df92000-07-12 04:49:00 +0000906 return PyErr_NoMemory();
Fred Drake7b6caff2003-07-21 17:05:56 +0000907 }
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000908
Benjamin Peterson4e7f2852010-08-08 16:54:58 +0000909 bytes_read = readinst(buf, BUF_SIZE, readmethod);
910 if (bytes_read < 0) {
911 Py_DECREF(readmethod);
912 return NULL;
Fred Drake0582df92000-07-12 04:49:00 +0000913 }
914 rv = XML_ParseBuffer(self->itself, bytes_read, bytes_read == 0);
Fred Drake7b6caff2003-07-21 17:05:56 +0000915 if (PyErr_Occurred()) {
916 Py_XDECREF(readmethod);
Fred Drake0582df92000-07-12 04:49:00 +0000917 return NULL;
Fred Drake7b6caff2003-07-21 17:05:56 +0000918 }
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000919
Fred Drake0582df92000-07-12 04:49:00 +0000920 if (!rv || bytes_read == 0)
921 break;
922 }
Fred Drake7b6caff2003-07-21 17:05:56 +0000923 Py_XDECREF(readmethod);
Fred Drake71b63ff2002-06-28 22:29:01 +0000924 return get_parse_result(self, rv);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000925}
926
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000927PyDoc_STRVAR(xmlparse_SetBase__doc__,
Thomas Wouters35317302000-07-22 16:34:15 +0000928"SetBase(base_url)\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000929Set the base URL for the parser.");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000930
931static PyObject *
Fred Drake0582df92000-07-12 04:49:00 +0000932xmlparse_SetBase(xmlparseobject *self, PyObject *args)
933{
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000934 char *base;
935
Fred Drake0582df92000-07-12 04:49:00 +0000936 if (!PyArg_ParseTuple(args, "s:SetBase", &base))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000937 return NULL;
Fred Drake0582df92000-07-12 04:49:00 +0000938 if (!XML_SetBase(self->itself, base)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000939 return PyErr_NoMemory();
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000940 }
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000941 Py_INCREF(Py_None);
942 return Py_None;
943}
944
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000945PyDoc_STRVAR(xmlparse_GetBase__doc__,
Thomas Wouters35317302000-07-22 16:34:15 +0000946"GetBase() -> url\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000947Return base URL string for the parser.");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000948
949static PyObject *
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000950xmlparse_GetBase(xmlparseobject *self, PyObject *unused)
Fred Drake0582df92000-07-12 04:49:00 +0000951{
Fred Drake0582df92000-07-12 04:49:00 +0000952 return Py_BuildValue("z", XML_GetBase(self->itself));
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000953}
954
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000955PyDoc_STRVAR(xmlparse_GetInputContext__doc__,
Fred Drakebd6101c2001-02-14 18:29:45 +0000956"GetInputContext() -> string\n\
957Return the untranslated text of the input that caused the current event.\n\
958If the event was generated by a large amount of text (such as a start tag\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000959for an element with many attributes), not all of the text may be available.");
Fred Drakebd6101c2001-02-14 18:29:45 +0000960
961static PyObject *
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000962xmlparse_GetInputContext(xmlparseobject *self, PyObject *unused)
Fred Drakebd6101c2001-02-14 18:29:45 +0000963{
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000964 if (self->in_callback) {
965 int offset, size;
966 const char *buffer
967 = XML_GetInputContext(self->itself, &offset, &size);
Fred Drakebd6101c2001-02-14 18:29:45 +0000968
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000969 if (buffer != NULL)
Christian Heimes72b710a2008-05-26 13:28:38 +0000970 return PyBytes_FromStringAndSize(buffer + offset,
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000971 size - offset);
972 else
973 Py_RETURN_NONE;
Fred Drakebd6101c2001-02-14 18:29:45 +0000974 }
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000975 else
976 Py_RETURN_NONE;
Fred Drakebd6101c2001-02-14 18:29:45 +0000977}
Fred Drakebd6101c2001-02-14 18:29:45 +0000978
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000979PyDoc_STRVAR(xmlparse_ExternalEntityParserCreate__doc__,
Fred Drake2d4ac202001-01-03 15:36:25 +0000980"ExternalEntityParserCreate(context[, encoding])\n\
Tim Peters51dc9682000-09-24 22:12:45 +0000981Create a parser for parsing an external entity based on the\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000982information passed to the ExternalEntityRefHandler.");
Lars Gustäbel4a30a072000-09-24 20:50:52 +0000983
984static PyObject *
985xmlparse_ExternalEntityParserCreate(xmlparseobject *self, PyObject *args)
986{
987 char *context;
988 char *encoding = NULL;
989 xmlparseobject *new_parser;
990 int i;
991
Martin v. Löwisc57428d2001-09-19 09:55:09 +0000992 if (!PyArg_ParseTuple(args, "z|s:ExternalEntityParserCreate",
Fred Drakecde79132001-04-25 16:01:30 +0000993 &context, &encoding)) {
994 return NULL;
Lars Gustäbel4a30a072000-09-24 20:50:52 +0000995 }
996
Martin v. Löwis894258c2001-09-23 10:20:10 +0000997 new_parser = PyObject_GC_New(xmlparseobject, &Xmlparsetype);
Fred Drake85d835f2001-02-08 15:39:08 +0000998 if (new_parser == NULL)
999 return NULL;
Fred Drake2a3d7db2002-06-28 22:56:48 +00001000 new_parser->buffer_size = self->buffer_size;
1001 new_parser->buffer_used = 0;
Victor Stinnerb4ba9862010-09-10 22:25:19 +00001002 new_parser->buffer = NULL;
Fred Drake85d835f2001-02-08 15:39:08 +00001003 new_parser->ordered_attributes = self->ordered_attributes;
1004 new_parser->specified_attributes = self->specified_attributes;
Fred Drakebd6101c2001-02-14 18:29:45 +00001005 new_parser->in_callback = 0;
Martin v. Löwis069dde22003-01-21 10:58:18 +00001006 new_parser->ns_prefixes = self->ns_prefixes;
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001007 new_parser->itself = XML_ExternalEntityParserCreate(self->itself, context,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001008 encoding);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001009 new_parser->handlers = 0;
Fred Drakeb91a36b2002-06-27 19:40:48 +00001010 new_parser->intern = self->intern;
1011 Py_XINCREF(new_parser->intern);
Martin v. Löwis894258c2001-09-23 10:20:10 +00001012 PyObject_GC_Track(new_parser);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001013
Victor Stinnerb4ba9862010-09-10 22:25:19 +00001014 if (self->buffer != NULL) {
Victor Stinnerb6404912013-07-07 16:21:41 +02001015 new_parser->buffer = PyMem_Malloc(new_parser->buffer_size);
Victor Stinnerb4ba9862010-09-10 22:25:19 +00001016 if (new_parser->buffer == NULL) {
1017 Py_DECREF(new_parser);
1018 return PyErr_NoMemory();
1019 }
1020 }
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001021 if (!new_parser->itself) {
Fred Drake85d835f2001-02-08 15:39:08 +00001022 Py_DECREF(new_parser);
1023 return PyErr_NoMemory();
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001024 }
1025
1026 XML_SetUserData(new_parser->itself, (void *)new_parser);
1027
1028 /* allocate and clear handlers first */
Fred Drake2a3d7db2002-06-28 22:56:48 +00001029 for (i = 0; handler_info[i].name != NULL; i++)
Fred Drake85d835f2001-02-08 15:39:08 +00001030 /* do nothing */;
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001031
Victor Stinnerb6404912013-07-07 16:21:41 +02001032 new_parser->handlers = PyMem_Malloc(sizeof(PyObject *) * i);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001033 if (!new_parser->handlers) {
Fred Drake85d835f2001-02-08 15:39:08 +00001034 Py_DECREF(new_parser);
1035 return PyErr_NoMemory();
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001036 }
Martin v. Löwis5b68ce32001-10-21 08:53:52 +00001037 clear_handlers(new_parser, 1);
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001038
1039 /* then copy handlers from self */
1040 for (i = 0; handler_info[i].name != NULL; i++) {
Fred Drake71b63ff2002-06-28 22:29:01 +00001041 PyObject *handler = self->handlers[i];
1042 if (handler != NULL) {
1043 Py_INCREF(handler);
1044 new_parser->handlers[i] = handler;
1045 handler_info[i].setter(new_parser->itself,
Fred Drake85d835f2001-02-08 15:39:08 +00001046 handler_info[i].handler);
1047 }
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001048 }
Fred Drake71b63ff2002-06-28 22:29:01 +00001049 return (PyObject *)new_parser;
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001050}
1051
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001052PyDoc_STRVAR(xmlparse_SetParamEntityParsing__doc__,
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001053"SetParamEntityParsing(flag) -> success\n\
1054Controls parsing of parameter entities (including the external DTD\n\
1055subset). Possible flag values are XML_PARAM_ENTITY_PARSING_NEVER,\n\
1056XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE and\n\
1057XML_PARAM_ENTITY_PARSING_ALWAYS. Returns true if setting the flag\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001058was successful.");
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001059
1060static PyObject*
Fred Drakebd6101c2001-02-14 18:29:45 +00001061xmlparse_SetParamEntityParsing(xmlparseobject *p, PyObject* args)
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001062{
Fred Drake85d835f2001-02-08 15:39:08 +00001063 int flag;
1064 if (!PyArg_ParseTuple(args, "i", &flag))
1065 return NULL;
Fred Drakebd6101c2001-02-14 18:29:45 +00001066 flag = XML_SetParamEntityParsing(p->itself, flag);
Christian Heimes217cfd12007-12-02 14:31:20 +00001067 return PyLong_FromLong(flag);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001068}
1069
Martin v. Löwisc847f402003-01-21 11:09:21 +00001070
1071#if XML_COMBINED_VERSION >= 19505
Martin v. Löwis069dde22003-01-21 10:58:18 +00001072PyDoc_STRVAR(xmlparse_UseForeignDTD__doc__,
1073"UseForeignDTD([flag])\n\
1074Allows the application to provide an artificial external subset if one is\n\
1075not specified as part of the document instance. This readily allows the\n\
1076use of a 'default' document type controlled by the application, while still\n\
1077getting the advantage of providing document type information to the parser.\n\
1078'flag' defaults to True if not provided.");
1079
1080static PyObject *
1081xmlparse_UseForeignDTD(xmlparseobject *self, PyObject *args)
1082{
Antoine Pitrou6f430e42012-08-15 23:18:25 +02001083 int flag = 1;
Martin v. Löwis069dde22003-01-21 10:58:18 +00001084 enum XML_Error rc;
Georg Brandld37b9d72012-09-24 13:41:52 +02001085 if (!PyArg_ParseTuple(args, "|p:UseForeignDTD", &flag))
Martin v. Löwis069dde22003-01-21 10:58:18 +00001086 return NULL;
Antoine Pitrou6f430e42012-08-15 23:18:25 +02001087 rc = XML_UseForeignDTD(self->itself, flag ? XML_TRUE : XML_FALSE);
Martin v. Löwis069dde22003-01-21 10:58:18 +00001088 if (rc != XML_ERROR_NONE) {
1089 return set_error(self, rc);
1090 }
1091 Py_INCREF(Py_None);
1092 return Py_None;
1093}
Martin v. Löwisc847f402003-01-21 11:09:21 +00001094#endif
Martin v. Löwis069dde22003-01-21 10:58:18 +00001095
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001096static PyObject *xmlparse_dir(PyObject *self, PyObject* noargs);
1097
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001098static struct PyMethodDef xmlparse_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001099 {"Parse", (PyCFunction)xmlparse_Parse,
1100 METH_VARARGS, xmlparse_Parse__doc__},
Fred Drake0582df92000-07-12 04:49:00 +00001101 {"ParseFile", (PyCFunction)xmlparse_ParseFile,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001102 METH_O, xmlparse_ParseFile__doc__},
Fred Drake0582df92000-07-12 04:49:00 +00001103 {"SetBase", (PyCFunction)xmlparse_SetBase,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001104 METH_VARARGS, xmlparse_SetBase__doc__},
Fred Drake0582df92000-07-12 04:49:00 +00001105 {"GetBase", (PyCFunction)xmlparse_GetBase,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001106 METH_NOARGS, xmlparse_GetBase__doc__},
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001107 {"ExternalEntityParserCreate", (PyCFunction)xmlparse_ExternalEntityParserCreate,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001108 METH_VARARGS, xmlparse_ExternalEntityParserCreate__doc__},
Fred Drakebd6101c2001-02-14 18:29:45 +00001109 {"SetParamEntityParsing", (PyCFunction)xmlparse_SetParamEntityParsing,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001110 METH_VARARGS, xmlparse_SetParamEntityParsing__doc__},
Fred Drakebd6101c2001-02-14 18:29:45 +00001111 {"GetInputContext", (PyCFunction)xmlparse_GetInputContext,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001112 METH_NOARGS, xmlparse_GetInputContext__doc__},
Martin v. Löwisc847f402003-01-21 11:09:21 +00001113#if XML_COMBINED_VERSION >= 19505
Martin v. Löwis069dde22003-01-21 10:58:18 +00001114 {"UseForeignDTD", (PyCFunction)xmlparse_UseForeignDTD,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001115 METH_VARARGS, xmlparse_UseForeignDTD__doc__},
Martin v. Löwisc847f402003-01-21 11:09:21 +00001116#endif
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001117 {"__dir__", xmlparse_dir, METH_NOARGS},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001118 {NULL, NULL} /* sentinel */
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001119};
1120
1121/* ---------- */
1122
1123
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001124
Fred Drake71b63ff2002-06-28 22:29:01 +00001125/* pyexpat international encoding support.
1126 Make it as simple as possible.
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001127*/
1128
Fred Drake71b63ff2002-06-28 22:29:01 +00001129static int
1130PyUnknownEncodingHandler(void *encodingHandlerData,
1131 const XML_Char *name,
1132 XML_Encoding *info)
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001133{
Eli Bendersky6dc32b32013-05-25 05:25:48 -07001134 static unsigned char template_buffer[256] = {0};
1135 PyObject* u;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001136 int i;
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001137 void *data;
Eli Bendersky6dc32b32013-05-25 05:25:48 -07001138 unsigned int kind;
Fred Drake71b63ff2002-06-28 22:29:01 +00001139
Victor Stinner9e09c262013-07-18 23:17:01 +02001140 if (PyErr_Occurred())
1141 return XML_STATUS_ERROR;
1142
Eli Bendersky6dc32b32013-05-25 05:25:48 -07001143 if (template_buffer[1] == 0) {
1144 for (i = 0; i < 256; i++)
1145 template_buffer[i] = i;
Tim Peters63cb99e2001-02-17 18:12:50 +00001146 }
Eli Bendersky6dc32b32013-05-25 05:25:48 -07001147
1148 u = PyUnicode_Decode((char*) template_buffer, 256, name, "replace");
Christian Heimesb5821552013-06-29 20:43:13 +02001149 if (u == NULL || PyUnicode_READY(u)) {
Christian Heimes72172422013-06-29 21:49:27 +02001150 Py_XDECREF(u);
Eli Bendersky6dc32b32013-05-25 05:25:48 -07001151 return XML_STATUS_ERROR;
Christian Heimesb5821552013-06-29 20:43:13 +02001152 }
Eli Bendersky6dc32b32013-05-25 05:25:48 -07001153
1154 if (PyUnicode_GET_LENGTH(u) != 256) {
1155 Py_DECREF(u);
1156 PyErr_SetString(PyExc_ValueError,
1157 "multi-byte encodings are not supported");
1158 return XML_STATUS_ERROR;
1159 }
1160
1161 kind = PyUnicode_KIND(u);
1162 data = PyUnicode_DATA(u);
1163 for (i = 0; i < 256; i++) {
1164 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
1165 if (ch != Py_UNICODE_REPLACEMENT_CHARACTER)
1166 info->map[i] = ch;
1167 else
1168 info->map[i] = -1;
1169 }
1170
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001171 info->data = NULL;
1172 info->convert = NULL;
1173 info->release = NULL;
Eli Bendersky6dc32b32013-05-25 05:25:48 -07001174 Py_DECREF(u);
1175
1176 return XML_STATUS_OK;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001177}
1178
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001179
1180static PyObject *
Fred Drakeb91a36b2002-06-27 19:40:48 +00001181newxmlparseobject(char *encoding, char *namespace_separator, PyObject *intern)
Fred Drake0582df92000-07-12 04:49:00 +00001182{
1183 int i;
1184 xmlparseobject *self;
Fred Drake71b63ff2002-06-28 22:29:01 +00001185
Martin v. Löwis894258c2001-09-23 10:20:10 +00001186 self = PyObject_GC_New(xmlparseobject, &Xmlparsetype);
Fred Drake0582df92000-07-12 04:49:00 +00001187 if (self == NULL)
1188 return NULL;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001189
Fred Drake2a3d7db2002-06-28 22:56:48 +00001190 self->buffer = NULL;
1191 self->buffer_size = CHARACTER_DATA_BUFFER_SIZE;
1192 self->buffer_used = 0;
Fred Drake85d835f2001-02-08 15:39:08 +00001193 self->ordered_attributes = 0;
1194 self->specified_attributes = 0;
Fred Drakebd6101c2001-02-14 18:29:45 +00001195 self->in_callback = 0;
Martin v. Löwis069dde22003-01-21 10:58:18 +00001196 self->ns_prefixes = 0;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001197 self->handlers = NULL;
Victor Stinner54b2d2e2013-07-15 17:15:57 +02001198 self->intern = intern;
1199 Py_XINCREF(self->intern);
1200 PyObject_GC_Track(self);
1201
Christian Heimesfa535f52013-07-07 17:35:11 +02001202 /* namespace_separator is either NULL or contains one char + \0 */
1203 self->itself = XML_ParserCreate_MM(encoding, &ExpatMemoryHandler,
1204 namespace_separator);
Victor Stinner54b2d2e2013-07-15 17:15:57 +02001205 if (self->itself == NULL) {
1206 PyErr_SetString(PyExc_RuntimeError,
1207 "XML_ParserCreate failed");
1208 Py_DECREF(self);
1209 return NULL;
1210 }
Gregory P. Smith25227712012-03-14 18:10:37 -07001211#if ((XML_MAJOR_VERSION >= 2) && (XML_MINOR_VERSION >= 1)) || defined(XML_HAS_SET_HASH_SALT)
1212 /* This feature was added upstream in libexpat 2.1.0. Our expat copy
1213 * has a backport of this feature where we also define XML_HAS_SET_HASH_SALT
1214 * to indicate that we can still use it. */
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07001215 XML_SetHashSalt(self->itself,
1216 (unsigned long)_Py_HashSecret.prefix);
Gregory P. Smith25227712012-03-14 18:10:37 -07001217#endif
Fred Drake0582df92000-07-12 04:49:00 +00001218 XML_SetUserData(self->itself, (void *)self);
Fred Drake7c75bf22002-07-01 14:02:31 +00001219 XML_SetUnknownEncodingHandler(self->itself,
1220 (XML_UnknownEncodingHandler) PyUnknownEncodingHandler, NULL);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001221
Fred Drake2a3d7db2002-06-28 22:56:48 +00001222 for (i = 0; handler_info[i].name != NULL; i++)
Fred Drake0582df92000-07-12 04:49:00 +00001223 /* do nothing */;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001224
Victor Stinnerb6404912013-07-07 16:21:41 +02001225 self->handlers = PyMem_Malloc(sizeof(PyObject *) * i);
Fred Drake7c75bf22002-07-01 14:02:31 +00001226 if (!self->handlers) {
Fred Drake71b63ff2002-06-28 22:29:01 +00001227 Py_DECREF(self);
1228 return PyErr_NoMemory();
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001229 }
Martin v. Löwis5b68ce32001-10-21 08:53:52 +00001230 clear_handlers(self, 1);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001231
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001232 return (PyObject*)self;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001233}
1234
1235
1236static void
Fred Drake0582df92000-07-12 04:49:00 +00001237xmlparse_dealloc(xmlparseobject *self)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001238{
Fred Drake0582df92000-07-12 04:49:00 +00001239 int i;
Martin v. Löwis894258c2001-09-23 10:20:10 +00001240 PyObject_GC_UnTrack(self);
Fred Drake85d835f2001-02-08 15:39:08 +00001241 if (self->itself != NULL)
Fred Drake0582df92000-07-12 04:49:00 +00001242 XML_ParserFree(self->itself);
1243 self->itself = NULL;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001244
Fred Drake85d835f2001-02-08 15:39:08 +00001245 if (self->handlers != NULL) {
Fred Drakecde79132001-04-25 16:01:30 +00001246 PyObject *temp;
Fred Drake85d835f2001-02-08 15:39:08 +00001247 for (i = 0; handler_info[i].name != NULL; i++) {
Fred Drakecde79132001-04-25 16:01:30 +00001248 temp = self->handlers[i];
1249 self->handlers[i] = NULL;
1250 Py_XDECREF(temp);
Fred Drake85d835f2001-02-08 15:39:08 +00001251 }
Victor Stinnerb6404912013-07-07 16:21:41 +02001252 PyMem_Free(self->handlers);
Fred Drake71b63ff2002-06-28 22:29:01 +00001253 self->handlers = NULL;
Fred Drake0582df92000-07-12 04:49:00 +00001254 }
Fred Drake2a3d7db2002-06-28 22:56:48 +00001255 if (self->buffer != NULL) {
Victor Stinnerb6404912013-07-07 16:21:41 +02001256 PyMem_Free(self->buffer);
Fred Drake2a3d7db2002-06-28 22:56:48 +00001257 self->buffer = NULL;
1258 }
Fred Drakeb91a36b2002-06-27 19:40:48 +00001259 Py_XDECREF(self->intern);
Martin v. Löwis894258c2001-09-23 10:20:10 +00001260 PyObject_GC_Del(self);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001261}
1262
Fred Drake0582df92000-07-12 04:49:00 +00001263static int
Alexander Belopolskye239d232010-12-08 23:31:48 +00001264handlername2int(PyObject *name)
Fred Drake0582df92000-07-12 04:49:00 +00001265{
1266 int i;
Fred Drake71b63ff2002-06-28 22:29:01 +00001267 for (i = 0; handler_info[i].name != NULL; i++) {
Alexander Belopolskye239d232010-12-08 23:31:48 +00001268 if (PyUnicode_CompareWithASCIIString(
1269 name, handler_info[i].name) == 0) {
Fred Drake0582df92000-07-12 04:49:00 +00001270 return i;
1271 }
1272 }
1273 return -1;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001274}
1275
1276static PyObject *
Fred Drake71b63ff2002-06-28 22:29:01 +00001277get_pybool(int istrue)
1278{
1279 PyObject *result = istrue ? Py_True : Py_False;
1280 Py_INCREF(result);
1281 return result;
1282}
1283
1284static PyObject *
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001285xmlparse_getattro(xmlparseobject *self, PyObject *nameobj)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001286{
Victor Stinner9e5bd6c2011-10-01 01:05:40 +02001287 Py_UCS4 first_char;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001288 int handlernum = -1;
1289
Alexander Belopolskye239d232010-12-08 23:31:48 +00001290 if (!PyUnicode_Check(nameobj))
1291 goto generic;
Victor Stinner9e5bd6c2011-10-01 01:05:40 +02001292 if (PyUnicode_READY(nameobj))
1293 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001294
Alexander Belopolskye239d232010-12-08 23:31:48 +00001295 handlernum = handlername2int(nameobj);
Fred Drake71b63ff2002-06-28 22:29:01 +00001296
1297 if (handlernum != -1) {
1298 PyObject *result = self->handlers[handlernum];
1299 if (result == NULL)
1300 result = Py_None;
1301 Py_INCREF(result);
1302 return result;
1303 }
Alexander Belopolskye239d232010-12-08 23:31:48 +00001304
Victor Stinner9e5bd6c2011-10-01 01:05:40 +02001305 first_char = PyUnicode_READ_CHAR(nameobj, 0);
1306 if (first_char == 'E') {
Alexander Belopolskye239d232010-12-08 23:31:48 +00001307 if (PyUnicode_CompareWithASCIIString(nameobj, "ErrorCode") == 0)
Christian Heimes217cfd12007-12-02 14:31:20 +00001308 return PyLong_FromLong((long)
Fred Drake71b63ff2002-06-28 22:29:01 +00001309 XML_GetErrorCode(self->itself));
Alexander Belopolskye239d232010-12-08 23:31:48 +00001310 if (PyUnicode_CompareWithASCIIString(nameobj, "ErrorLineNumber") == 0)
Christian Heimes217cfd12007-12-02 14:31:20 +00001311 return PyLong_FromLong((long)
Fred Drake71b63ff2002-06-28 22:29:01 +00001312 XML_GetErrorLineNumber(self->itself));
Alexander Belopolskye239d232010-12-08 23:31:48 +00001313 if (PyUnicode_CompareWithASCIIString(nameobj, "ErrorColumnNumber") == 0)
Christian Heimes217cfd12007-12-02 14:31:20 +00001314 return PyLong_FromLong((long)
Fred Drake71b63ff2002-06-28 22:29:01 +00001315 XML_GetErrorColumnNumber(self->itself));
Alexander Belopolskye239d232010-12-08 23:31:48 +00001316 if (PyUnicode_CompareWithASCIIString(nameobj, "ErrorByteIndex") == 0)
Christian Heimes217cfd12007-12-02 14:31:20 +00001317 return PyLong_FromLong((long)
Fred Drake71b63ff2002-06-28 22:29:01 +00001318 XML_GetErrorByteIndex(self->itself));
1319 }
Victor Stinner9e5bd6c2011-10-01 01:05:40 +02001320 if (first_char == 'C') {
Alexander Belopolskye239d232010-12-08 23:31:48 +00001321 if (PyUnicode_CompareWithASCIIString(nameobj, "CurrentLineNumber") == 0)
Christian Heimes217cfd12007-12-02 14:31:20 +00001322 return PyLong_FromLong((long)
Dave Cole3203efb2004-08-26 00:37:31 +00001323 XML_GetCurrentLineNumber(self->itself));
Alexander Belopolskye239d232010-12-08 23:31:48 +00001324 if (PyUnicode_CompareWithASCIIString(nameobj, "CurrentColumnNumber") == 0)
Christian Heimes217cfd12007-12-02 14:31:20 +00001325 return PyLong_FromLong((long)
Dave Cole3203efb2004-08-26 00:37:31 +00001326 XML_GetCurrentColumnNumber(self->itself));
Alexander Belopolskye239d232010-12-08 23:31:48 +00001327 if (PyUnicode_CompareWithASCIIString(nameobj, "CurrentByteIndex") == 0)
Christian Heimes217cfd12007-12-02 14:31:20 +00001328 return PyLong_FromLong((long)
Dave Cole3203efb2004-08-26 00:37:31 +00001329 XML_GetCurrentByteIndex(self->itself));
1330 }
Victor Stinner9e5bd6c2011-10-01 01:05:40 +02001331 if (first_char == 'b') {
Alexander Belopolskye239d232010-12-08 23:31:48 +00001332 if (PyUnicode_CompareWithASCIIString(nameobj, "buffer_size") == 0)
Christian Heimes217cfd12007-12-02 14:31:20 +00001333 return PyLong_FromLong((long) self->buffer_size);
Alexander Belopolskye239d232010-12-08 23:31:48 +00001334 if (PyUnicode_CompareWithASCIIString(nameobj, "buffer_text") == 0)
Fred Drake2a3d7db2002-06-28 22:56:48 +00001335 return get_pybool(self->buffer != NULL);
Alexander Belopolskye239d232010-12-08 23:31:48 +00001336 if (PyUnicode_CompareWithASCIIString(nameobj, "buffer_used") == 0)
Christian Heimes217cfd12007-12-02 14:31:20 +00001337 return PyLong_FromLong((long) self->buffer_used);
Fred Drake2a3d7db2002-06-28 22:56:48 +00001338 }
Alexander Belopolskye239d232010-12-08 23:31:48 +00001339 if (PyUnicode_CompareWithASCIIString(nameobj, "namespace_prefixes") == 0)
Martin v. Löwis069dde22003-01-21 10:58:18 +00001340 return get_pybool(self->ns_prefixes);
Alexander Belopolskye239d232010-12-08 23:31:48 +00001341 if (PyUnicode_CompareWithASCIIString(nameobj, "ordered_attributes") == 0)
Fred Drake71b63ff2002-06-28 22:29:01 +00001342 return get_pybool(self->ordered_attributes);
Alexander Belopolskye239d232010-12-08 23:31:48 +00001343 if (PyUnicode_CompareWithASCIIString(nameobj, "specified_attributes") == 0)
Fred Drake71b63ff2002-06-28 22:29:01 +00001344 return get_pybool((long) self->specified_attributes);
Alexander Belopolskye239d232010-12-08 23:31:48 +00001345 if (PyUnicode_CompareWithASCIIString(nameobj, "intern") == 0) {
Fred Drakeb91a36b2002-06-27 19:40:48 +00001346 if (self->intern == NULL) {
1347 Py_INCREF(Py_None);
1348 return Py_None;
1349 }
1350 else {
1351 Py_INCREF(self->intern);
1352 return self->intern;
1353 }
1354 }
Alexander Belopolskye239d232010-12-08 23:31:48 +00001355 generic:
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001356 return PyObject_GenericGetAttr((PyObject*)self, nameobj);
Neal Norwitz8dfc4a92007-08-11 06:39:53 +00001357}
1358
1359static PyObject *
1360xmlparse_dir(PyObject *self, PyObject* noargs)
1361{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001362#define APPEND(list, str) \
1363 do { \
1364 PyObject *o = PyUnicode_FromString(str); \
1365 if (o != NULL) \
1366 PyList_Append(list, o); \
1367 Py_XDECREF(o); \
Martin v. Löwis069dde22003-01-21 10:58:18 +00001368 } while (0)
Neal Norwitzfa56e2d2003-01-19 15:40:09 +00001369
Neal Norwitz8dfc4a92007-08-11 06:39:53 +00001370 int i;
1371 PyObject *rc = PyList_New(0);
1372 if (!rc)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001373 return NULL;
Neal Norwitz8dfc4a92007-08-11 06:39:53 +00001374 for (i = 0; handler_info[i].name != NULL; i++) {
1375 PyObject *o = get_handler_name(&handler_info[i]);
1376 if (o != NULL)
1377 PyList_Append(rc, o);
1378 Py_XDECREF(o);
1379 }
1380 APPEND(rc, "ErrorCode");
1381 APPEND(rc, "ErrorLineNumber");
1382 APPEND(rc, "ErrorColumnNumber");
1383 APPEND(rc, "ErrorByteIndex");
1384 APPEND(rc, "CurrentLineNumber");
1385 APPEND(rc, "CurrentColumnNumber");
1386 APPEND(rc, "CurrentByteIndex");
1387 APPEND(rc, "buffer_size");
1388 APPEND(rc, "buffer_text");
1389 APPEND(rc, "buffer_used");
1390 APPEND(rc, "namespace_prefixes");
1391 APPEND(rc, "ordered_attributes");
1392 APPEND(rc, "specified_attributes");
1393 APPEND(rc, "intern");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001394
Neal Norwitzfa56e2d2003-01-19 15:40:09 +00001395#undef APPEND
Neal Norwitz8dfc4a92007-08-11 06:39:53 +00001396
1397 if (PyErr_Occurred()) {
1398 Py_DECREF(rc);
1399 rc = NULL;
Fred Drake0582df92000-07-12 04:49:00 +00001400 }
Neal Norwitz8dfc4a92007-08-11 06:39:53 +00001401
1402 return rc;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001403}
1404
Fred Drake6f987622000-08-25 18:03:30 +00001405static int
Alexander Belopolskye239d232010-12-08 23:31:48 +00001406sethandler(xmlparseobject *self, PyObject *name, PyObject* v)
Fred Drake0582df92000-07-12 04:49:00 +00001407{
1408 int handlernum = handlername2int(name);
Fred Drake71b63ff2002-06-28 22:29:01 +00001409 if (handlernum >= 0) {
1410 xmlhandler c_handler = NULL;
1411 PyObject *temp = self->handlers[handlernum];
1412
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001413 if (v == Py_None) {
1414 /* If this is the character data handler, and a character
1415 data handler is already active, we need to be more
1416 careful. What we can safely do is replace the existing
1417 character data handler callback function with a no-op
1418 function that will refuse to call Python. The downside
1419 is that this doesn't completely remove the character
1420 data handler from the C layer if there's any callback
1421 active, so Expat does a little more work than it
1422 otherwise would, but that's really an odd case. A more
1423 elaborate system of handlers and state could remove the
1424 C handler more effectively. */
1425 if (handlernum == CharacterData && self->in_callback)
1426 c_handler = noop_character_data_handler;
Fred Drake71b63ff2002-06-28 22:29:01 +00001427 v = NULL;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001428 }
Fred Drake71b63ff2002-06-28 22:29:01 +00001429 else if (v != NULL) {
1430 Py_INCREF(v);
1431 c_handler = handler_info[handlernum].handler;
1432 }
Fred Drake0582df92000-07-12 04:49:00 +00001433 self->handlers[handlernum] = v;
Fred Drake71b63ff2002-06-28 22:29:01 +00001434 Py_XDECREF(temp);
1435 handler_info[handlernum].setter(self->itself, c_handler);
Fred Drake0582df92000-07-12 04:49:00 +00001436 return 1;
1437 }
1438 return 0;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001439}
1440
1441static int
Alexander Belopolskye239d232010-12-08 23:31:48 +00001442xmlparse_setattro(xmlparseobject *self, PyObject *name, PyObject *v)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001443{
Fred Drake6f987622000-08-25 18:03:30 +00001444 /* Set attribute 'name' to value 'v'. v==NULL means delete */
Fred Drake85d835f2001-02-08 15:39:08 +00001445 if (v == NULL) {
Fred Drake6f987622000-08-25 18:03:30 +00001446 PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
1447 return -1;
1448 }
Alexander Belopolskye239d232010-12-08 23:31:48 +00001449 assert(PyUnicode_Check(name));
1450 if (PyUnicode_CompareWithASCIIString(name, "buffer_text") == 0) {
Antoine Pitrou6f430e42012-08-15 23:18:25 +02001451 int b = PyObject_IsTrue(v);
1452 if (b < 0)
1453 return -1;
1454 if (b) {
Fred Drake2a3d7db2002-06-28 22:56:48 +00001455 if (self->buffer == NULL) {
Victor Stinnerb6404912013-07-07 16:21:41 +02001456 self->buffer = PyMem_Malloc(self->buffer_size);
Fred Drake2a3d7db2002-06-28 22:56:48 +00001457 if (self->buffer == NULL) {
1458 PyErr_NoMemory();
1459 return -1;
1460 }
1461 self->buffer_used = 0;
1462 }
1463 }
1464 else if (self->buffer != NULL) {
1465 if (flush_character_buffer(self) < 0)
1466 return -1;
Victor Stinnerb6404912013-07-07 16:21:41 +02001467 PyMem_Free(self->buffer);
Fred Drake2a3d7db2002-06-28 22:56:48 +00001468 self->buffer = NULL;
1469 }
1470 return 0;
1471 }
Alexander Belopolskye239d232010-12-08 23:31:48 +00001472 if (PyUnicode_CompareWithASCIIString(name, "namespace_prefixes") == 0) {
Antoine Pitrou6f430e42012-08-15 23:18:25 +02001473 int b = PyObject_IsTrue(v);
1474 if (b < 0)
1475 return -1;
1476 self->ns_prefixes = b;
Martin v. Löwis069dde22003-01-21 10:58:18 +00001477 XML_SetReturnNSTriplet(self->itself, self->ns_prefixes);
1478 return 0;
1479 }
Alexander Belopolskye239d232010-12-08 23:31:48 +00001480 if (PyUnicode_CompareWithASCIIString(name, "ordered_attributes") == 0) {
Antoine Pitrou6f430e42012-08-15 23:18:25 +02001481 int b = PyObject_IsTrue(v);
1482 if (b < 0)
1483 return -1;
1484 self->ordered_attributes = b;
Fred Drake85d835f2001-02-08 15:39:08 +00001485 return 0;
1486 }
Alexander Belopolskye239d232010-12-08 23:31:48 +00001487 if (PyUnicode_CompareWithASCIIString(name, "specified_attributes") == 0) {
Antoine Pitrou6f430e42012-08-15 23:18:25 +02001488 int b = PyObject_IsTrue(v);
1489 if (b < 0)
1490 return -1;
1491 self->specified_attributes = b;
Fred Drake6f987622000-08-25 18:03:30 +00001492 return 0;
1493 }
Christian Heimes2380ac72008-01-09 00:17:24 +00001494
Alexander Belopolskye239d232010-12-08 23:31:48 +00001495 if (PyUnicode_CompareWithASCIIString(name, "buffer_size") == 0) {
Christian Heimes2380ac72008-01-09 00:17:24 +00001496 long new_buffer_size;
1497 if (!PyLong_Check(v)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001498 PyErr_SetString(PyExc_TypeError, "buffer_size must be an integer");
1499 return -1;
Christian Heimes2380ac72008-01-09 00:17:24 +00001500 }
1501
1502 new_buffer_size=PyLong_AS_LONG(v);
1503 /* trivial case -- no change */
1504 if (new_buffer_size == self->buffer_size) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001505 return 0;
Christian Heimes2380ac72008-01-09 00:17:24 +00001506 }
1507
1508 if (new_buffer_size <= 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001509 PyErr_SetString(PyExc_ValueError, "buffer_size must be greater than zero");
1510 return -1;
Christian Heimes2380ac72008-01-09 00:17:24 +00001511 }
1512
1513 /* check maximum */
1514 if (new_buffer_size > INT_MAX) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001515 char errmsg[100];
1516 sprintf(errmsg, "buffer_size must not be greater than %i", INT_MAX);
1517 PyErr_SetString(PyExc_ValueError, errmsg);
1518 return -1;
Christian Heimes2380ac72008-01-09 00:17:24 +00001519 }
1520
1521 if (self->buffer != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001522 /* there is already a buffer */
1523 if (self->buffer_used != 0) {
1524 flush_character_buffer(self);
1525 }
1526 /* free existing buffer */
Victor Stinnerb6404912013-07-07 16:21:41 +02001527 PyMem_Free(self->buffer);
Christian Heimes2380ac72008-01-09 00:17:24 +00001528 }
Victor Stinnerb6404912013-07-07 16:21:41 +02001529 self->buffer = PyMem_Malloc(new_buffer_size);
Christian Heimes2380ac72008-01-09 00:17:24 +00001530 if (self->buffer == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001531 PyErr_NoMemory();
1532 return -1;
1533 }
Christian Heimes2380ac72008-01-09 00:17:24 +00001534 self->buffer_size = new_buffer_size;
1535 return 0;
1536 }
1537
Alexander Belopolskye239d232010-12-08 23:31:48 +00001538 if (PyUnicode_CompareWithASCIIString(name, "CharacterDataHandler") == 0) {
Fred Drake2a3d7db2002-06-28 22:56:48 +00001539 /* If we're changing the character data handler, flush all
1540 * cached data with the old handler. Not sure there's a
1541 * "right" thing to do, though, but this probably won't
1542 * happen.
1543 */
1544 if (flush_character_buffer(self) < 0)
1545 return -1;
1546 }
Fred Drake6f987622000-08-25 18:03:30 +00001547 if (sethandler(self, name, v)) {
1548 return 0;
1549 }
Alexander Belopolskye239d232010-12-08 23:31:48 +00001550 PyErr_SetObject(PyExc_AttributeError, name);
Fred Drake6f987622000-08-25 18:03:30 +00001551 return -1;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001552}
1553
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001554static int
1555xmlparse_traverse(xmlparseobject *op, visitproc visit, void *arg)
1556{
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001557 int i;
1558 for (i = 0; handler_info[i].name != NULL; i++)
1559 Py_VISIT(op->handlers[i]);
Fred Drakecde79132001-04-25 16:01:30 +00001560 return 0;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001561}
1562
1563static int
1564xmlparse_clear(xmlparseobject *op)
1565{
Martin v. Löwis5b68ce32001-10-21 08:53:52 +00001566 clear_handlers(op, 0);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001567 Py_CLEAR(op->intern);
Fred Drakecde79132001-04-25 16:01:30 +00001568 return 0;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001569}
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001570
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001571PyDoc_STRVAR(Xmlparsetype__doc__, "XML parser");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001572
1573static PyTypeObject Xmlparsetype = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001574 PyVarObject_HEAD_INIT(NULL, 0)
1575 "pyexpat.xmlparser", /*tp_name*/
Antoine Pitrou23683ef2011-01-04 00:00:31 +00001576 sizeof(xmlparseobject), /*tp_basicsize*/
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001577 0, /*tp_itemsize*/
1578 /* methods */
1579 (destructor)xmlparse_dealloc, /*tp_dealloc*/
1580 (printfunc)0, /*tp_print*/
1581 0, /*tp_getattr*/
Alexander Belopolskye239d232010-12-08 23:31:48 +00001582 0, /*tp_setattr*/
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001583 0, /*tp_reserved*/
1584 (reprfunc)0, /*tp_repr*/
1585 0, /*tp_as_number*/
1586 0, /*tp_as_sequence*/
1587 0, /*tp_as_mapping*/
1588 (hashfunc)0, /*tp_hash*/
1589 (ternaryfunc)0, /*tp_call*/
1590 (reprfunc)0, /*tp_str*/
1591 (getattrofunc)xmlparse_getattro, /* tp_getattro */
Alexander Belopolskye239d232010-12-08 23:31:48 +00001592 (setattrofunc)xmlparse_setattro, /* tp_setattro */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001593 0, /* tp_as_buffer */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001594 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /*tp_flags*/
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001595 Xmlparsetype__doc__, /* tp_doc - Documentation string */
1596 (traverseproc)xmlparse_traverse, /* tp_traverse */
1597 (inquiry)xmlparse_clear, /* tp_clear */
1598 0, /* tp_richcompare */
1599 0, /* tp_weaklistoffset */
1600 0, /* tp_iter */
1601 0, /* tp_iternext */
1602 xmlparse_methods, /* tp_methods */
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001603};
1604
1605/* End of code for xmlparser objects */
1606/* -------------------------------------------------------- */
1607
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001608PyDoc_STRVAR(pyexpat_ParserCreate__doc__,
Fred Drake0582df92000-07-12 04:49:00 +00001609"ParserCreate([encoding[, namespace_separator]]) -> parser\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001610Return a new XML parser object.");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001611
1612static PyObject *
Fred Drake0582df92000-07-12 04:49:00 +00001613pyexpat_ParserCreate(PyObject *notused, PyObject *args, PyObject *kw)
1614{
Fred Drakecde79132001-04-25 16:01:30 +00001615 char *encoding = NULL;
1616 char *namespace_separator = NULL;
Fred Drakeb91a36b2002-06-27 19:40:48 +00001617 PyObject *intern = NULL;
1618 PyObject *result;
1619 int intern_decref = 0;
Martin v. Löwis15e62742006-02-27 16:46:16 +00001620 static char *kwlist[] = {"encoding", "namespace_separator",
Jeremy Hyltonaf68c872005-12-10 18:50:16 +00001621 "intern", NULL};
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001622
Fred Drakeb91a36b2002-06-27 19:40:48 +00001623 if (!PyArg_ParseTupleAndKeywords(args, kw, "|zzO:ParserCreate", kwlist,
1624 &encoding, &namespace_separator, &intern))
Fred Drakecde79132001-04-25 16:01:30 +00001625 return NULL;
1626 if (namespace_separator != NULL
1627 && strlen(namespace_separator) > 1) {
1628 PyErr_SetString(PyExc_ValueError,
1629 "namespace_separator must be at most one"
1630 " character, omitted, or None");
1631 return NULL;
1632 }
Fred Drakeb91a36b2002-06-27 19:40:48 +00001633 /* Explicitly passing None means no interning is desired.
1634 Not passing anything means that a new dictionary is used. */
1635 if (intern == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001636 intern = NULL;
Fred Drakeb91a36b2002-06-27 19:40:48 +00001637 else if (intern == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001638 intern = PyDict_New();
1639 if (!intern)
1640 return NULL;
1641 intern_decref = 1;
Fred Drake71b63ff2002-06-28 22:29:01 +00001642 }
Fred Drakeb91a36b2002-06-27 19:40:48 +00001643 else if (!PyDict_Check(intern)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001644 PyErr_SetString(PyExc_TypeError, "intern must be a dictionary");
1645 return NULL;
Fred Drakeb91a36b2002-06-27 19:40:48 +00001646 }
1647
1648 result = newxmlparseobject(encoding, namespace_separator, intern);
1649 if (intern_decref) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001650 Py_DECREF(intern);
Fred Drakeb91a36b2002-06-27 19:40:48 +00001651 }
1652 return result;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001653}
1654
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001655PyDoc_STRVAR(pyexpat_ErrorString__doc__,
Fred Drake0582df92000-07-12 04:49:00 +00001656"ErrorString(errno) -> string\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001657Returns string error for given number.");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001658
1659static PyObject *
Fred Drake0582df92000-07-12 04:49:00 +00001660pyexpat_ErrorString(PyObject *self, PyObject *args)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001661{
Fred Drake0582df92000-07-12 04:49:00 +00001662 long code = 0;
1663
1664 if (!PyArg_ParseTuple(args, "l:ErrorString", &code))
1665 return NULL;
1666 return Py_BuildValue("z", XML_ErrorString((int)code));
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001667}
1668
1669/* List of methods defined in the module */
1670
1671static struct PyMethodDef pyexpat_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001672 {"ParserCreate", (PyCFunction)pyexpat_ParserCreate,
Fred Drake0582df92000-07-12 04:49:00 +00001673 METH_VARARGS|METH_KEYWORDS, pyexpat_ParserCreate__doc__},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001674 {"ErrorString", (PyCFunction)pyexpat_ErrorString,
1675 METH_VARARGS, pyexpat_ErrorString__doc__},
Fred Drake71b63ff2002-06-28 22:29:01 +00001676
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001677 {NULL, (PyCFunction)NULL, 0, NULL} /* sentinel */
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001678};
1679
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +00001680/* Module docstring */
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001681
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001682PyDoc_STRVAR(pyexpat_module_documentation,
1683"Python wrapper for Expat parser.");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001684
Fred Drakecde79132001-04-25 16:01:30 +00001685/* Initialization function for the module */
1686
1687#ifndef MODULE_NAME
1688#define MODULE_NAME "pyexpat"
1689#endif
1690
1691#ifndef MODULE_INITFUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00001692#define MODULE_INITFUNC PyInit_pyexpat
Fred Drakecde79132001-04-25 16:01:30 +00001693#endif
1694
Martin v. Löwis069dde22003-01-21 10:58:18 +00001695#ifndef PyMODINIT_FUNC
1696# ifdef MS_WINDOWS
1697# define PyMODINIT_FUNC __declspec(dllexport) void
1698# else
1699# define PyMODINIT_FUNC void
1700# endif
1701#endif
1702
Mark Hammond8235ea12002-07-19 06:55:41 +00001703PyMODINIT_FUNC MODULE_INITFUNC(void); /* avoid compiler warnings */
Fred Drakecde79132001-04-25 16:01:30 +00001704
Martin v. Löwis1a214512008-06-11 05:26:20 +00001705static struct PyModuleDef pyexpatmodule = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001706 PyModuleDef_HEAD_INIT,
1707 MODULE_NAME,
1708 pyexpat_module_documentation,
1709 -1,
1710 pyexpat_methods,
1711 NULL,
1712 NULL,
1713 NULL,
1714 NULL
Martin v. Löwis1a214512008-06-11 05:26:20 +00001715};
1716
Martin v. Löwis069dde22003-01-21 10:58:18 +00001717PyMODINIT_FUNC
1718MODULE_INITFUNC(void)
Fred Drake0582df92000-07-12 04:49:00 +00001719{
1720 PyObject *m, *d;
Neal Norwitz392c5be2007-08-25 17:20:32 +00001721 PyObject *errmod_name = PyUnicode_FromString(MODULE_NAME ".errors");
Fred Drake85d835f2001-02-08 15:39:08 +00001722 PyObject *errors_module;
1723 PyObject *modelmod_name;
1724 PyObject *model_module;
Fred Drake0582df92000-07-12 04:49:00 +00001725 PyObject *sys_modules;
Georg Brandlb4dac712010-10-15 14:46:48 +00001726 PyObject *tmpnum, *tmpstr;
1727 PyObject *codes_dict;
1728 PyObject *rev_codes_dict;
1729 int res;
Fredrik Lundhd7a42882005-12-13 20:43:04 +00001730 static struct PyExpat_CAPI capi;
Georg Brandlb4dac712010-10-15 14:46:48 +00001731 PyObject *capi_object;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001732
Fred Drake6f987622000-08-25 18:03:30 +00001733 if (errmod_name == NULL)
Martin v. Löwis1a214512008-06-11 05:26:20 +00001734 return NULL;
Neal Norwitz392c5be2007-08-25 17:20:32 +00001735 modelmod_name = PyUnicode_FromString(MODULE_NAME ".model");
Fred Drake85d835f2001-02-08 15:39:08 +00001736 if (modelmod_name == NULL)
Martin v. Löwis1a214512008-06-11 05:26:20 +00001737 return NULL;
Fred Drake6f987622000-08-25 18:03:30 +00001738
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001739 if (PyType_Ready(&Xmlparsetype) < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001740 return NULL;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001741
Fred Drake0582df92000-07-12 04:49:00 +00001742 /* Create the module and add the functions */
Martin v. Löwis1a214512008-06-11 05:26:20 +00001743 m = PyModule_Create(&pyexpatmodule);
Neal Norwitz1ac754f2006-01-19 06:09:39 +00001744 if (m == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001745 return NULL;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001746
Fred Drake0582df92000-07-12 04:49:00 +00001747 /* Add some symbolic constants to the module */
Fred Drakebd6101c2001-02-14 18:29:45 +00001748 if (ErrorObject == NULL) {
1749 ErrorObject = PyErr_NewException("xml.parsers.expat.ExpatError",
Fred Drake93adb692000-09-23 04:55:48 +00001750 NULL, NULL);
Fred Drakebd6101c2001-02-14 18:29:45 +00001751 if (ErrorObject == NULL)
Martin v. Löwis1a214512008-06-11 05:26:20 +00001752 return NULL;
Fred Drakebd6101c2001-02-14 18:29:45 +00001753 }
1754 Py_INCREF(ErrorObject);
Fred Drake93adb692000-09-23 04:55:48 +00001755 PyModule_AddObject(m, "error", ErrorObject);
Fred Drakebd6101c2001-02-14 18:29:45 +00001756 Py_INCREF(ErrorObject);
1757 PyModule_AddObject(m, "ExpatError", ErrorObject);
Fred Drake4ba298c2000-10-29 04:57:53 +00001758 Py_INCREF(&Xmlparsetype);
1759 PyModule_AddObject(m, "XMLParserType", (PyObject *) &Xmlparsetype);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001760
Fred Drake738293d2000-12-21 17:25:07 +00001761 PyModule_AddStringConstant(m, "EXPAT_VERSION",
1762 (char *) XML_ExpatVersion());
Fred Drake85d835f2001-02-08 15:39:08 +00001763 {
1764 XML_Expat_Version info = XML_ExpatVersionInfo();
1765 PyModule_AddObject(m, "version_info",
1766 Py_BuildValue("(iii)", info.major,
1767 info.minor, info.micro));
1768 }
Fred Drake0582df92000-07-12 04:49:00 +00001769 /* XXX When Expat supports some way of figuring out how it was
Fred Drake71b63ff2002-06-28 22:29:01 +00001770 compiled, this should check and set native_encoding
1771 appropriately.
Fred Drake0582df92000-07-12 04:49:00 +00001772 */
Fred Drake93adb692000-09-23 04:55:48 +00001773 PyModule_AddStringConstant(m, "native_encoding", "UTF-8");
Fred Drakec23b5232000-08-24 21:57:43 +00001774
Fred Drake85d835f2001-02-08 15:39:08 +00001775 sys_modules = PySys_GetObject("modules");
Fred Drake93adb692000-09-23 04:55:48 +00001776 d = PyModule_GetDict(m);
Fred Drake6f987622000-08-25 18:03:30 +00001777 errors_module = PyDict_GetItem(d, errmod_name);
1778 if (errors_module == NULL) {
Fred Drakecde79132001-04-25 16:01:30 +00001779 errors_module = PyModule_New(MODULE_NAME ".errors");
Fred Drake6f987622000-08-25 18:03:30 +00001780 if (errors_module != NULL) {
Fred Drake6f987622000-08-25 18:03:30 +00001781 PyDict_SetItem(sys_modules, errmod_name, errors_module);
Fred Drake93adb692000-09-23 04:55:48 +00001782 /* gives away the reference to errors_module */
1783 PyModule_AddObject(m, "errors", errors_module);
Fred Drakec23b5232000-08-24 21:57:43 +00001784 }
1785 }
Fred Drake6f987622000-08-25 18:03:30 +00001786 Py_DECREF(errmod_name);
Fred Drake85d835f2001-02-08 15:39:08 +00001787 model_module = PyDict_GetItem(d, modelmod_name);
1788 if (model_module == NULL) {
Fred Drakecde79132001-04-25 16:01:30 +00001789 model_module = PyModule_New(MODULE_NAME ".model");
Fred Drake85d835f2001-02-08 15:39:08 +00001790 if (model_module != NULL) {
1791 PyDict_SetItem(sys_modules, modelmod_name, model_module);
1792 /* gives away the reference to model_module */
1793 PyModule_AddObject(m, "model", model_module);
1794 }
1795 }
1796 Py_DECREF(modelmod_name);
1797 if (errors_module == NULL || model_module == NULL)
1798 /* Don't core dump later! */
Martin v. Löwis1a214512008-06-11 05:26:20 +00001799 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001800
Martin v. Löwisc847f402003-01-21 11:09:21 +00001801#if XML_COMBINED_VERSION > 19505
Martin v. Löwis069dde22003-01-21 10:58:18 +00001802 {
1803 const XML_Feature *features = XML_GetFeatureList();
1804 PyObject *list = PyList_New(0);
1805 if (list == NULL)
1806 /* just ignore it */
1807 PyErr_Clear();
1808 else {
1809 int i = 0;
1810 for (; features[i].feature != XML_FEATURE_END; ++i) {
1811 int ok;
1812 PyObject *item = Py_BuildValue("si", features[i].name,
1813 features[i].value);
1814 if (item == NULL) {
1815 Py_DECREF(list);
1816 list = NULL;
1817 break;
1818 }
1819 ok = PyList_Append(list, item);
1820 Py_DECREF(item);
1821 if (ok < 0) {
1822 PyErr_Clear();
1823 break;
1824 }
1825 }
1826 if (list != NULL)
1827 PyModule_AddObject(m, "features", list);
1828 }
1829 }
Martin v. Löwisc847f402003-01-21 11:09:21 +00001830#endif
Fred Drake6f987622000-08-25 18:03:30 +00001831
Georg Brandlb4dac712010-10-15 14:46:48 +00001832 codes_dict = PyDict_New();
1833 rev_codes_dict = PyDict_New();
1834 if (codes_dict == NULL || rev_codes_dict == NULL) {
1835 Py_XDECREF(codes_dict);
1836 Py_XDECREF(rev_codes_dict);
1837 return NULL;
1838 }
Victor Stinner0fcab4a2011-01-04 12:59:15 +00001839
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001840#define MYCONST(name) \
Georg Brandlb4dac712010-10-15 14:46:48 +00001841 if (PyModule_AddStringConstant(errors_module, #name, \
1842 (char *)XML_ErrorString(name)) < 0) \
1843 return NULL; \
1844 tmpnum = PyLong_FromLong(name); \
1845 if (tmpnum == NULL) return NULL; \
1846 res = PyDict_SetItemString(codes_dict, \
1847 XML_ErrorString(name), tmpnum); \
1848 if (res < 0) return NULL; \
1849 tmpstr = PyUnicode_FromString(XML_ErrorString(name)); \
1850 if (tmpstr == NULL) return NULL; \
1851 res = PyDict_SetItem(rev_codes_dict, tmpnum, tmpstr); \
1852 Py_DECREF(tmpstr); \
1853 Py_DECREF(tmpnum); \
1854 if (res < 0) return NULL; \
Fred Drake7bd9f412000-07-04 23:51:31 +00001855
Fred Drake0582df92000-07-12 04:49:00 +00001856 MYCONST(XML_ERROR_NO_MEMORY);
1857 MYCONST(XML_ERROR_SYNTAX);
1858 MYCONST(XML_ERROR_NO_ELEMENTS);
1859 MYCONST(XML_ERROR_INVALID_TOKEN);
1860 MYCONST(XML_ERROR_UNCLOSED_TOKEN);
1861 MYCONST(XML_ERROR_PARTIAL_CHAR);
1862 MYCONST(XML_ERROR_TAG_MISMATCH);
1863 MYCONST(XML_ERROR_DUPLICATE_ATTRIBUTE);
1864 MYCONST(XML_ERROR_JUNK_AFTER_DOC_ELEMENT);
1865 MYCONST(XML_ERROR_PARAM_ENTITY_REF);
1866 MYCONST(XML_ERROR_UNDEFINED_ENTITY);
1867 MYCONST(XML_ERROR_RECURSIVE_ENTITY_REF);
1868 MYCONST(XML_ERROR_ASYNC_ENTITY);
1869 MYCONST(XML_ERROR_BAD_CHAR_REF);
1870 MYCONST(XML_ERROR_BINARY_ENTITY_REF);
1871 MYCONST(XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF);
1872 MYCONST(XML_ERROR_MISPLACED_XML_PI);
1873 MYCONST(XML_ERROR_UNKNOWN_ENCODING);
1874 MYCONST(XML_ERROR_INCORRECT_ENCODING);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001875 MYCONST(XML_ERROR_UNCLOSED_CDATA_SECTION);
1876 MYCONST(XML_ERROR_EXTERNAL_ENTITY_HANDLING);
1877 MYCONST(XML_ERROR_NOT_STANDALONE);
Fred Drake283b6702004-08-04 22:28:16 +00001878 MYCONST(XML_ERROR_UNEXPECTED_STATE);
1879 MYCONST(XML_ERROR_ENTITY_DECLARED_IN_PE);
1880 MYCONST(XML_ERROR_FEATURE_REQUIRES_XML_DTD);
1881 MYCONST(XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING);
1882 /* Added in Expat 1.95.7. */
1883 MYCONST(XML_ERROR_UNBOUND_PREFIX);
1884 /* Added in Expat 1.95.8. */
1885 MYCONST(XML_ERROR_UNDECLARING_PREFIX);
1886 MYCONST(XML_ERROR_INCOMPLETE_PE);
1887 MYCONST(XML_ERROR_XML_DECL);
1888 MYCONST(XML_ERROR_TEXT_DECL);
1889 MYCONST(XML_ERROR_PUBLICID);
1890 MYCONST(XML_ERROR_SUSPENDED);
1891 MYCONST(XML_ERROR_NOT_SUSPENDED);
1892 MYCONST(XML_ERROR_ABORTED);
1893 MYCONST(XML_ERROR_FINISHED);
1894 MYCONST(XML_ERROR_SUSPEND_PE);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001895
Georg Brandlb4dac712010-10-15 14:46:48 +00001896 if (PyModule_AddStringConstant(errors_module, "__doc__",
1897 "Constants used to describe "
1898 "error conditions.") < 0)
1899 return NULL;
Fred Drake85d835f2001-02-08 15:39:08 +00001900
Georg Brandlb4dac712010-10-15 14:46:48 +00001901 if (PyModule_AddObject(errors_module, "codes", codes_dict) < 0)
1902 return NULL;
1903 if (PyModule_AddObject(errors_module, "messages", rev_codes_dict) < 0)
1904 return NULL;
Victor Stinner0fcab4a2011-01-04 12:59:15 +00001905
Fred Drake93adb692000-09-23 04:55:48 +00001906#undef MYCONST
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001907
Fred Drake85d835f2001-02-08 15:39:08 +00001908#define MYCONST(c) PyModule_AddIntConstant(m, #c, c)
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001909 MYCONST(XML_PARAM_ENTITY_PARSING_NEVER);
1910 MYCONST(XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE);
1911 MYCONST(XML_PARAM_ENTITY_PARSING_ALWAYS);
Fred Drake85d835f2001-02-08 15:39:08 +00001912#undef MYCONST
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001913
Fred Drake85d835f2001-02-08 15:39:08 +00001914#define MYCONST(c) PyModule_AddIntConstant(model_module, #c, c)
1915 PyModule_AddStringConstant(model_module, "__doc__",
1916 "Constants used to interpret content model information.");
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001917
Fred Drake85d835f2001-02-08 15:39:08 +00001918 MYCONST(XML_CTYPE_EMPTY);
1919 MYCONST(XML_CTYPE_ANY);
1920 MYCONST(XML_CTYPE_MIXED);
1921 MYCONST(XML_CTYPE_NAME);
1922 MYCONST(XML_CTYPE_CHOICE);
1923 MYCONST(XML_CTYPE_SEQ);
1924
1925 MYCONST(XML_CQUANT_NONE);
1926 MYCONST(XML_CQUANT_OPT);
1927 MYCONST(XML_CQUANT_REP);
1928 MYCONST(XML_CQUANT_PLUS);
1929#undef MYCONST
Fredrik Lundhc3345042005-12-13 19:49:55 +00001930
1931 /* initialize pyexpat dispatch table */
Fredrik Lundhd7a42882005-12-13 20:43:04 +00001932 capi.size = sizeof(capi);
Fredrik Lundhcc117db2005-12-13 21:55:36 +00001933 capi.magic = PyExpat_CAPI_MAGIC;
Fredrik Lundhd7a42882005-12-13 20:43:04 +00001934 capi.MAJOR_VERSION = XML_MAJOR_VERSION;
1935 capi.MINOR_VERSION = XML_MINOR_VERSION;
1936 capi.MICRO_VERSION = XML_MICRO_VERSION;
1937 capi.ErrorString = XML_ErrorString;
Fredrik Lundhcc117db2005-12-13 21:55:36 +00001938 capi.GetErrorCode = XML_GetErrorCode;
1939 capi.GetErrorColumnNumber = XML_GetErrorColumnNumber;
1940 capi.GetErrorLineNumber = XML_GetErrorLineNumber;
Fredrik Lundhd7a42882005-12-13 20:43:04 +00001941 capi.Parse = XML_Parse;
1942 capi.ParserCreate_MM = XML_ParserCreate_MM;
1943 capi.ParserFree = XML_ParserFree;
1944 capi.SetCharacterDataHandler = XML_SetCharacterDataHandler;
1945 capi.SetCommentHandler = XML_SetCommentHandler;
1946 capi.SetDefaultHandlerExpand = XML_SetDefaultHandlerExpand;
1947 capi.SetElementHandler = XML_SetElementHandler;
1948 capi.SetNamespaceDeclHandler = XML_SetNamespaceDeclHandler;
1949 capi.SetProcessingInstructionHandler = XML_SetProcessingInstructionHandler;
1950 capi.SetUnknownEncodingHandler = XML_SetUnknownEncodingHandler;
1951 capi.SetUserData = XML_SetUserData;
Eli Bendersky2b6b73e2012-06-01 11:32:34 +03001952 capi.SetStartDoctypeDeclHandler = XML_SetStartDoctypeDeclHandler;
Serhiy Storchaka66d53fa2013-05-22 17:07:51 +03001953 capi.SetEncoding = XML_SetEncoding;
Eli Bendersky6dc32b32013-05-25 05:25:48 -07001954 capi.DefaultUnknownEncodingHandler = PyUnknownEncodingHandler;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001955
Benjamin Petersonb173f782009-05-05 22:31:58 +00001956 /* export using capsule */
1957 capi_object = PyCapsule_New(&capi, PyExpat_CAPSULE_NAME, NULL);
Fredrik Lundhd7a42882005-12-13 20:43:04 +00001958 if (capi_object)
1959 PyModule_AddObject(m, "expat_CAPI", capi_object);
Martin v. Löwis1a214512008-06-11 05:26:20 +00001960 return m;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001961}
1962
Fred Drake6f987622000-08-25 18:03:30 +00001963static void
Martin v. Löwis5b68ce32001-10-21 08:53:52 +00001964clear_handlers(xmlparseobject *self, int initial)
Fred Drake0582df92000-07-12 04:49:00 +00001965{
Fred Drakecde79132001-04-25 16:01:30 +00001966 int i = 0;
1967 PyObject *temp;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001968
Fred Drake71b63ff2002-06-28 22:29:01 +00001969 for (; handler_info[i].name != NULL; i++) {
Martin v. Löwis5b68ce32001-10-21 08:53:52 +00001970 if (initial)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001971 self->handlers[i] = NULL;
1972 else {
Fred Drakecde79132001-04-25 16:01:30 +00001973 temp = self->handlers[i];
1974 self->handlers[i] = NULL;
1975 Py_XDECREF(temp);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001976 handler_info[i].setter(self->itself, NULL);
Fred Drakecde79132001-04-25 16:01:30 +00001977 }
Fred Drakecde79132001-04-25 16:01:30 +00001978 }
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001979}
1980
Tim Peters0c322792002-07-17 16:49:03 +00001981static struct HandlerInfo handler_info[] = {
Fred Drake71b63ff2002-06-28 22:29:01 +00001982 {"StartElementHandler",
1983 (xmlhandlersetter)XML_SetStartElementHandler,
Fred Drake0582df92000-07-12 04:49:00 +00001984 (xmlhandler)my_StartElementHandler},
Fred Drake71b63ff2002-06-28 22:29:01 +00001985 {"EndElementHandler",
1986 (xmlhandlersetter)XML_SetEndElementHandler,
Fred Drake0582df92000-07-12 04:49:00 +00001987 (xmlhandler)my_EndElementHandler},
Fred Drake71b63ff2002-06-28 22:29:01 +00001988 {"ProcessingInstructionHandler",
Fred Drake0582df92000-07-12 04:49:00 +00001989 (xmlhandlersetter)XML_SetProcessingInstructionHandler,
1990 (xmlhandler)my_ProcessingInstructionHandler},
Fred Drake71b63ff2002-06-28 22:29:01 +00001991 {"CharacterDataHandler",
Fred Drake0582df92000-07-12 04:49:00 +00001992 (xmlhandlersetter)XML_SetCharacterDataHandler,
1993 (xmlhandler)my_CharacterDataHandler},
Fred Drake71b63ff2002-06-28 22:29:01 +00001994 {"UnparsedEntityDeclHandler",
Fred Drake0582df92000-07-12 04:49:00 +00001995 (xmlhandlersetter)XML_SetUnparsedEntityDeclHandler,
Fred Drake2a3d7db2002-06-28 22:56:48 +00001996 (xmlhandler)my_UnparsedEntityDeclHandler},
Fred Drake71b63ff2002-06-28 22:29:01 +00001997 {"NotationDeclHandler",
Fred Drake0582df92000-07-12 04:49:00 +00001998 (xmlhandlersetter)XML_SetNotationDeclHandler,
Fred Drake2a3d7db2002-06-28 22:56:48 +00001999 (xmlhandler)my_NotationDeclHandler},
Fred Drake71b63ff2002-06-28 22:29:01 +00002000 {"StartNamespaceDeclHandler",
2001 (xmlhandlersetter)XML_SetStartNamespaceDeclHandler,
Fred Drake2a3d7db2002-06-28 22:56:48 +00002002 (xmlhandler)my_StartNamespaceDeclHandler},
Fred Drake71b63ff2002-06-28 22:29:01 +00002003 {"EndNamespaceDeclHandler",
2004 (xmlhandlersetter)XML_SetEndNamespaceDeclHandler,
Fred Drake2a3d7db2002-06-28 22:56:48 +00002005 (xmlhandler)my_EndNamespaceDeclHandler},
Fred Drake0582df92000-07-12 04:49:00 +00002006 {"CommentHandler",
2007 (xmlhandlersetter)XML_SetCommentHandler,
2008 (xmlhandler)my_CommentHandler},
2009 {"StartCdataSectionHandler",
Fred Drake71b63ff2002-06-28 22:29:01 +00002010 (xmlhandlersetter)XML_SetStartCdataSectionHandler,
Fred Drake0582df92000-07-12 04:49:00 +00002011 (xmlhandler)my_StartCdataSectionHandler},
2012 {"EndCdataSectionHandler",
Fred Drake71b63ff2002-06-28 22:29:01 +00002013 (xmlhandlersetter)XML_SetEndCdataSectionHandler,
Fred Drake0582df92000-07-12 04:49:00 +00002014 (xmlhandler)my_EndCdataSectionHandler},
2015 {"DefaultHandler",
2016 (xmlhandlersetter)XML_SetDefaultHandler,
2017 (xmlhandler)my_DefaultHandler},
2018 {"DefaultHandlerExpand",
2019 (xmlhandlersetter)XML_SetDefaultHandlerExpand,
2020 (xmlhandler)my_DefaultHandlerExpandHandler},
2021 {"NotStandaloneHandler",
2022 (xmlhandlersetter)XML_SetNotStandaloneHandler,
2023 (xmlhandler)my_NotStandaloneHandler},
2024 {"ExternalEntityRefHandler",
2025 (xmlhandlersetter)XML_SetExternalEntityRefHandler,
Fred Drake2a3d7db2002-06-28 22:56:48 +00002026 (xmlhandler)my_ExternalEntityRefHandler},
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00002027 {"StartDoctypeDeclHandler",
Fred Drake71b63ff2002-06-28 22:29:01 +00002028 (xmlhandlersetter)XML_SetStartDoctypeDeclHandler,
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00002029 (xmlhandler)my_StartDoctypeDeclHandler},
2030 {"EndDoctypeDeclHandler",
Fred Drake71b63ff2002-06-28 22:29:01 +00002031 (xmlhandlersetter)XML_SetEndDoctypeDeclHandler,
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00002032 (xmlhandler)my_EndDoctypeDeclHandler},
Fred Drake85d835f2001-02-08 15:39:08 +00002033 {"EntityDeclHandler",
2034 (xmlhandlersetter)XML_SetEntityDeclHandler,
2035 (xmlhandler)my_EntityDeclHandler},
2036 {"XmlDeclHandler",
2037 (xmlhandlersetter)XML_SetXmlDeclHandler,
2038 (xmlhandler)my_XmlDeclHandler},
2039 {"ElementDeclHandler",
2040 (xmlhandlersetter)XML_SetElementDeclHandler,
2041 (xmlhandler)my_ElementDeclHandler},
2042 {"AttlistDeclHandler",
2043 (xmlhandlersetter)XML_SetAttlistDeclHandler,
2044 (xmlhandler)my_AttlistDeclHandler},
Martin v. Löwisc847f402003-01-21 11:09:21 +00002045#if XML_COMBINED_VERSION >= 19504
Martin v. Löwis069dde22003-01-21 10:58:18 +00002046 {"SkippedEntityHandler",
2047 (xmlhandlersetter)XML_SetSkippedEntityHandler,
2048 (xmlhandler)my_SkippedEntityHandler},
Martin v. Löwisc847f402003-01-21 11:09:21 +00002049#endif
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00002050
Fred Drake0582df92000-07-12 04:49:00 +00002051 {NULL, NULL, NULL} /* sentinel */
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00002052};