blob: 9d22d3a0518e6aab7bf77da05201309b8adc1b38 [file] [log] [blame]
Martin v. Löwis7090ed12001-09-19 10:37:50 +00001#include "Python.h"
Fred Drake4113b132001-03-24 19:58:26 +00002#include <ctype.h>
3
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00004#include "frameobject.h"
Fred Drakea77254a2000-09-29 19:23:29 +00005#include "expat.h"
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00006
Fredrik Lundhc3345042005-12-13 19:49:55 +00007#include "pyexpat.h"
8
Martin v. Löwisc847f402003-01-21 11:09:21 +00009#define XML_COMBINED_VERSION (10000*XML_MAJOR_VERSION+100*XML_MINOR_VERSION+XML_MICRO_VERSION)
10
Jeremy Hylton9263f572003-06-27 16:13:17 +000011#define FIX_TRACE
Martin v. Löwis339d0f72001-08-17 18:39:25 +000012
Fred Drake0582df92000-07-12 04:49:00 +000013enum HandlerTypes {
14 StartElement,
15 EndElement,
16 ProcessingInstruction,
17 CharacterData,
18 UnparsedEntityDecl,
19 NotationDecl,
20 StartNamespaceDecl,
21 EndNamespaceDecl,
22 Comment,
23 StartCdataSection,
24 EndCdataSection,
25 Default,
26 DefaultHandlerExpand,
27 NotStandalone,
Martin v. Löwis0078f6c2001-01-21 10:18:10 +000028 ExternalEntityRef,
29 StartDoctypeDecl,
30 EndDoctypeDecl,
Fred Drake85d835f2001-02-08 15:39:08 +000031 EntityDecl,
32 XmlDecl,
33 ElementDecl,
34 AttlistDecl,
Martin v. Löwisc847f402003-01-21 11:09:21 +000035#if XML_COMBINED_VERSION >= 19504
Martin v. Löwis069dde22003-01-21 10:58:18 +000036 SkippedEntity,
Martin v. Löwisc847f402003-01-21 11:09:21 +000037#endif
Fred Drake85d835f2001-02-08 15:39:08 +000038 _DummyDecl
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000039};
40
41static PyObject *ErrorObject;
42
43/* ----------------------------------------------------- */
44
45/* Declarations for objects of type xmlparser */
46
47typedef struct {
Fred Drake0582df92000-07-12 04:49:00 +000048 PyObject_HEAD
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000049
Fred Drake0582df92000-07-12 04:49:00 +000050 XML_Parser itself;
Fred Drake85d835f2001-02-08 15:39:08 +000051 int ordered_attributes; /* Return attributes as a list. */
52 int specified_attributes; /* Report only specified attributes. */
Fred Drakebd6101c2001-02-14 18:29:45 +000053 int in_callback; /* Is a callback active? */
Martin v. Löwis069dde22003-01-21 10:58:18 +000054 int ns_prefixes; /* Namespace-triplets mode? */
Fred Drake2a3d7db2002-06-28 22:56:48 +000055 XML_Char *buffer; /* Buffer used when accumulating characters */
56 /* NULL if not enabled */
57 int buffer_size; /* Size of buffer, in XML_Char units */
58 int buffer_used; /* Buffer units in use */
Fred Drakeb91a36b2002-06-27 19:40:48 +000059 PyObject *intern; /* Dictionary to intern strings */
Fred Drake0582df92000-07-12 04:49:00 +000060 PyObject **handlers;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000061} xmlparseobject;
62
Fred Drake2a3d7db2002-06-28 22:56:48 +000063#define CHARACTER_DATA_BUFFER_SIZE 8192
64
Jeremy Hylton938ace62002-07-17 16:30:39 +000065static PyTypeObject Xmlparsetype;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000066
Fred Drake117ac852002-09-24 16:24:54 +000067typedef void (*xmlhandlersetter)(XML_Parser self, void *meth);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000068typedef void* xmlhandler;
69
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +000070struct HandlerInfo {
Fred Drake0582df92000-07-12 04:49:00 +000071 const char *name;
72 xmlhandlersetter setter;
73 xmlhandler handler;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +000074 PyCodeObject *tb_code;
Fred Drake71b63ff2002-06-28 22:29:01 +000075 PyObject *nameobj;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000076};
77
Jeremy Hylton938ace62002-07-17 16:30:39 +000078static struct HandlerInfo handler_info[64];
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000079
Fred Drakebd6101c2001-02-14 18:29:45 +000080/* Set an integer attribute on the error object; return true on success,
81 * false on an exception.
82 */
83static int
84set_error_attr(PyObject *err, char *name, int value)
85{
Christian Heimes217cfd12007-12-02 14:31:20 +000086 PyObject *v = PyLong_FromLong(value);
Fred Drake85d835f2001-02-08 15:39:08 +000087
Neal Norwitz2f5e9902006-03-08 06:36:45 +000088 if (v == NULL || PyObject_SetAttrString(err, name, v) == -1) {
89 Py_XDECREF(v);
Fred Drakebd6101c2001-02-14 18:29:45 +000090 return 0;
91 }
Michael W. Hudson0bb84542004-08-03 11:31:31 +000092 Py_DECREF(v);
Fred Drakebd6101c2001-02-14 18:29:45 +000093 return 1;
94}
95
96/* Build and set an Expat exception, including positioning
97 * information. Always returns NULL.
98 */
Fred Drake85d835f2001-02-08 15:39:08 +000099static PyObject *
Martin v. Löwis069dde22003-01-21 10:58:18 +0000100set_error(xmlparseobject *self, enum XML_Error code)
Fred Drake85d835f2001-02-08 15:39:08 +0000101{
102 PyObject *err;
103 char buffer[256];
104 XML_Parser parser = self->itself;
Fred Drakebd6101c2001-02-14 18:29:45 +0000105 int lineno = XML_GetErrorLineNumber(parser);
106 int column = XML_GetErrorColumnNumber(parser);
Fred Drake85d835f2001-02-08 15:39:08 +0000107
Martin v. Löwis6b2cf0e2002-06-30 06:03:35 +0000108 /* There is no risk of overflowing this buffer, since
109 even for 64-bit integers, there is sufficient space. */
110 sprintf(buffer, "%.200s: line %i, column %i",
Fred Drakebd6101c2001-02-14 18:29:45 +0000111 XML_ErrorString(code), lineno, column);
Fred Drake85d835f2001-02-08 15:39:08 +0000112 err = PyObject_CallFunction(ErrorObject, "s", buffer);
Fred Drakebd6101c2001-02-14 18:29:45 +0000113 if ( err != NULL
114 && set_error_attr(err, "code", code)
115 && set_error_attr(err, "offset", column)
116 && set_error_attr(err, "lineno", lineno)) {
117 PyErr_SetObject(ErrorObject, err);
Fred Drake85d835f2001-02-08 15:39:08 +0000118 }
Neal Norwitz2f5e9902006-03-08 06:36:45 +0000119 Py_XDECREF(err);
Fred Drake85d835f2001-02-08 15:39:08 +0000120 return NULL;
121}
122
Fred Drake71b63ff2002-06-28 22:29:01 +0000123static int
124have_handler(xmlparseobject *self, int type)
125{
126 PyObject *handler = self->handlers[type];
127 return handler != NULL;
128}
129
130static PyObject *
131get_handler_name(struct HandlerInfo *hinfo)
132{
133 PyObject *name = hinfo->nameobj;
134 if (name == NULL) {
Neal Norwitz392c5be2007-08-25 17:20:32 +0000135 name = PyUnicode_FromString(hinfo->name);
Fred Drake71b63ff2002-06-28 22:29:01 +0000136 hinfo->nameobj = name;
137 }
138 Py_XINCREF(name);
139 return name;
140}
141
Fred Drake85d835f2001-02-08 15:39:08 +0000142
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000143/* Convert a string of XML_Chars into a Unicode string.
144 Returns None if str is a null pointer. */
145
Fred Drake0582df92000-07-12 04:49:00 +0000146static PyObject *
Fred Drakeb91a36b2002-06-27 19:40:48 +0000147conv_string_to_unicode(const XML_Char *str)
Fred Drake0582df92000-07-12 04:49:00 +0000148{
Fred Drake71b63ff2002-06-28 22:29:01 +0000149 /* XXX currently this code assumes that XML_Char is 8-bit,
Fred Drake0582df92000-07-12 04:49:00 +0000150 and hence in UTF-8. */
151 /* UTF-8 from Expat, Unicode desired */
152 if (str == NULL) {
153 Py_INCREF(Py_None);
154 return Py_None;
155 }
Fred Drake71b63ff2002-06-28 22:29:01 +0000156 return PyUnicode_DecodeUTF8(str, strlen(str), "strict");
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000157}
158
Fred Drake0582df92000-07-12 04:49:00 +0000159static PyObject *
160conv_string_len_to_unicode(const XML_Char *str, int len)
161{
Fred Drake71b63ff2002-06-28 22:29:01 +0000162 /* XXX currently this code assumes that XML_Char is 8-bit,
Fred Drake0582df92000-07-12 04:49:00 +0000163 and hence in UTF-8. */
164 /* UTF-8 from Expat, Unicode desired */
165 if (str == NULL) {
166 Py_INCREF(Py_None);
167 return Py_None;
168 }
Fred Drake6f987622000-08-25 18:03:30 +0000169 return PyUnicode_DecodeUTF8((const char *)str, len, "strict");
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000170}
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000171
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000172/* Callback routines */
173
Martin v. Löwis5b68ce32001-10-21 08:53:52 +0000174static void clear_handlers(xmlparseobject *self, int initial);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000175
Martin v. Löwis069dde22003-01-21 10:58:18 +0000176/* This handler is used when an error has been detected, in the hope
177 that actual parsing can be terminated early. This will only help
178 if an external entity reference is encountered. */
179static int
180error_external_entity_ref_handler(XML_Parser parser,
181 const XML_Char *context,
182 const XML_Char *base,
183 const XML_Char *systemId,
184 const XML_Char *publicId)
185{
186 return 0;
187}
188
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000189/* Dummy character data handler used when an error (exception) has
190 been detected, and the actual parsing can be terminated early.
191 This is needed since character data handler can't be safely removed
192 from within the character data handler, but can be replaced. It is
193 used only from the character data handler trampoline, and must be
194 used right after `flag_error()` is called. */
195static void
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000196noop_character_data_handler(void *userData, const XML_Char *data, int len)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000197{
198 /* Do nothing. */
199}
200
Fred Drake6f987622000-08-25 18:03:30 +0000201static void
202flag_error(xmlparseobject *self)
203{
Martin v. Löwis5b68ce32001-10-21 08:53:52 +0000204 clear_handlers(self, 0);
Martin v. Löwis069dde22003-01-21 10:58:18 +0000205 XML_SetExternalEntityRefHandler(self->itself,
206 error_external_entity_ref_handler);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000207}
208
209static PyCodeObject*
210getcode(enum HandlerTypes slot, char* func_name, int lineno)
211{
Fred Drakebd6101c2001-02-14 18:29:45 +0000212 if (handler_info[slot].tb_code == NULL) {
Fred Drakebd6101c2001-02-14 18:29:45 +0000213 handler_info[slot].tb_code =
Alexandre Vassalotti7b82b402009-07-21 04:30:03 +0000214 PyCode_NewEmpty(__FILE__, func_name, lineno);
Fred Drakebd6101c2001-02-14 18:29:45 +0000215 }
216 return handler_info[slot].tb_code;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000217}
218
Jeremy Hylton9263f572003-06-27 16:13:17 +0000219#ifdef FIX_TRACE
Martin v. Löwis7d6e19d2002-08-04 08:24:49 +0000220static int
221trace_frame(PyThreadState *tstate, PyFrameObject *f, int code, PyObject *val)
222{
223 int result = 0;
224 if (!tstate->use_tracing || tstate->tracing)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000225 return 0;
Martin v. Löwis7d6e19d2002-08-04 08:24:49 +0000226 if (tstate->c_profilefunc != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000227 tstate->tracing++;
228 result = tstate->c_profilefunc(tstate->c_profileobj,
229 f, code , val);
230 tstate->use_tracing = ((tstate->c_tracefunc != NULL)
231 || (tstate->c_profilefunc != NULL));
232 tstate->tracing--;
233 if (result)
234 return result;
Martin v. Löwis7d6e19d2002-08-04 08:24:49 +0000235 }
236 if (tstate->c_tracefunc != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000237 tstate->tracing++;
238 result = tstate->c_tracefunc(tstate->c_traceobj,
239 f, code , val);
240 tstate->use_tracing = ((tstate->c_tracefunc != NULL)
241 || (tstate->c_profilefunc != NULL));
242 tstate->tracing--;
243 }
Martin v. Löwis7d6e19d2002-08-04 08:24:49 +0000244 return result;
245}
Jeremy Hylton9263f572003-06-27 16:13:17 +0000246
247static int
248trace_frame_exc(PyThreadState *tstate, PyFrameObject *f)
249{
250 PyObject *type, *value, *traceback, *arg;
251 int err;
252
253 if (tstate->c_tracefunc == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000254 return 0;
Jeremy Hylton9263f572003-06-27 16:13:17 +0000255
256 PyErr_Fetch(&type, &value, &traceback);
257 if (value == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000258 value = Py_None;
259 Py_INCREF(value);
Jeremy Hylton9263f572003-06-27 16:13:17 +0000260 }
Raymond Hettinger8ae46892003-10-12 19:09:37 +0000261 arg = PyTuple_Pack(3, type, value, traceback);
Jeremy Hylton9263f572003-06-27 16:13:17 +0000262 if (arg == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000263 PyErr_Restore(type, value, traceback);
264 return 0;
Jeremy Hylton9263f572003-06-27 16:13:17 +0000265 }
266 err = trace_frame(tstate, f, PyTrace_EXCEPTION, arg);
267 Py_DECREF(arg);
268 if (err == 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000269 PyErr_Restore(type, value, traceback);
Jeremy Hylton9263f572003-06-27 16:13:17 +0000270 else {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000271 Py_XDECREF(type);
272 Py_XDECREF(value);
273 Py_XDECREF(traceback);
Jeremy Hylton9263f572003-06-27 16:13:17 +0000274 }
275 return err;
276}
Martin v. Löwis069dde22003-01-21 10:58:18 +0000277#endif
Martin v. Löwis7d6e19d2002-08-04 08:24:49 +0000278
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000279static PyObject*
Fred Drake39689c52004-08-13 03:12:57 +0000280call_with_frame(PyCodeObject *c, PyObject* func, PyObject* args,
281 xmlparseobject *self)
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000282{
Fred Drakebd6101c2001-02-14 18:29:45 +0000283 PyThreadState *tstate = PyThreadState_GET();
284 PyFrameObject *f;
285 PyObject *res;
286
287 if (c == NULL)
288 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000289
Jeremy Hylton9263f572003-06-27 16:13:17 +0000290 f = PyFrame_New(tstate, c, PyEval_GetGlobals(), NULL);
Fred Drakebd6101c2001-02-14 18:29:45 +0000291 if (f == NULL)
292 return NULL;
293 tstate->frame = f;
Jeremy Hylton9263f572003-06-27 16:13:17 +0000294#ifdef FIX_TRACE
295 if (trace_frame(tstate, f, PyTrace_CALL, Py_None) < 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000296 return NULL;
Martin v. Löwis7d6e19d2002-08-04 08:24:49 +0000297 }
Martin v. Löwis069dde22003-01-21 10:58:18 +0000298#endif
Fred Drakebd6101c2001-02-14 18:29:45 +0000299 res = PyEval_CallObject(func, args);
Jeremy Hylton9263f572003-06-27 16:13:17 +0000300 if (res == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000301 if (tstate->curexc_traceback == NULL)
302 PyTraceBack_Here(f);
Fred Drake39689c52004-08-13 03:12:57 +0000303 XML_StopParser(self->itself, XML_FALSE);
Jeremy Hylton9263f572003-06-27 16:13:17 +0000304#ifdef FIX_TRACE
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000305 if (trace_frame_exc(tstate, f) < 0) {
306 return NULL;
307 }
Jeremy Hylton9263f572003-06-27 16:13:17 +0000308 }
Martin v. Löwis7d6e19d2002-08-04 08:24:49 +0000309 else {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000310 if (trace_frame(tstate, f, PyTrace_RETURN, res) < 0) {
311 Py_XDECREF(res);
312 res = NULL;
313 }
Martin v. Löwis7d6e19d2002-08-04 08:24:49 +0000314 }
Jeremy Hylton9263f572003-06-27 16:13:17 +0000315#else
316 }
Martin v. Löwis069dde22003-01-21 10:58:18 +0000317#endif
Fred Drakebd6101c2001-02-14 18:29:45 +0000318 tstate->frame = f->f_back;
319 Py_DECREF(f);
320 return res;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000321}
322
Fred Drakeb91a36b2002-06-27 19:40:48 +0000323static PyObject*
324string_intern(xmlparseobject *self, const char* str)
325{
Guido van Rossum4ca94712007-07-23 17:42:32 +0000326 PyObject *result = conv_string_to_unicode(str);
Fred Drakeb91a36b2002-06-27 19:40:48 +0000327 PyObject *value;
Neal Norwitz484d9a42005-09-30 04:46:49 +0000328 /* result can be NULL if the unicode conversion failed. */
329 if (!result)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000330 return result;
Fred Drakeb91a36b2002-06-27 19:40:48 +0000331 if (!self->intern)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000332 return result;
Fred Drakeb91a36b2002-06-27 19:40:48 +0000333 value = PyDict_GetItem(self->intern, result);
334 if (!value) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000335 if (PyDict_SetItem(self->intern, result, result) == 0)
Fred Drakeb91a36b2002-06-27 19:40:48 +0000336 return result;
337 else
338 return NULL;
339 }
340 Py_INCREF(value);
341 Py_DECREF(result);
342 return value;
343}
344
Fred Drake2a3d7db2002-06-28 22:56:48 +0000345/* Return 0 on success, -1 on exception.
346 * flag_error() will be called before return if needed.
347 */
348static int
349call_character_handler(xmlparseobject *self, const XML_Char *buffer, int len)
350{
351 PyObject *args;
352 PyObject *temp;
353
Georg Brandlc01537f2010-10-15 16:26:08 +0000354 if (!have_handler(self, CharacterData))
355 return -1;
356
Fred Drake2a3d7db2002-06-28 22:56:48 +0000357 args = PyTuple_New(1);
358 if (args == NULL)
359 return -1;
Guido van Rossum4ca94712007-07-23 17:42:32 +0000360 temp = (conv_string_len_to_unicode(buffer, len));
Fred Drake2a3d7db2002-06-28 22:56:48 +0000361 if (temp == NULL) {
362 Py_DECREF(args);
363 flag_error(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000364 XML_SetCharacterDataHandler(self->itself,
365 noop_character_data_handler);
Fred Drake2a3d7db2002-06-28 22:56:48 +0000366 return -1;
367 }
368 PyTuple_SET_ITEM(args, 0, temp);
369 /* temp is now a borrowed reference; consider it unused. */
370 self->in_callback = 1;
371 temp = call_with_frame(getcode(CharacterData, "CharacterData", __LINE__),
Fred Drake39689c52004-08-13 03:12:57 +0000372 self->handlers[CharacterData], args, self);
Fred Drake2a3d7db2002-06-28 22:56:48 +0000373 /* temp is an owned reference again, or NULL */
374 self->in_callback = 0;
375 Py_DECREF(args);
376 if (temp == NULL) {
377 flag_error(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000378 XML_SetCharacterDataHandler(self->itself,
379 noop_character_data_handler);
Fred Drake2a3d7db2002-06-28 22:56:48 +0000380 return -1;
381 }
382 Py_DECREF(temp);
383 return 0;
384}
385
386static int
387flush_character_buffer(xmlparseobject *self)
388{
389 int rc;
390 if (self->buffer == NULL || self->buffer_used == 0)
391 return 0;
392 rc = call_character_handler(self, self->buffer, self->buffer_used);
393 self->buffer_used = 0;
394 return rc;
395}
396
397static void
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000398my_CharacterDataHandler(void *userData, const XML_Char *data, int len)
Fred Drake2a3d7db2002-06-28 22:56:48 +0000399{
400 xmlparseobject *self = (xmlparseobject *) userData;
401 if (self->buffer == NULL)
402 call_character_handler(self, data, len);
403 else {
404 if ((self->buffer_used + len) > self->buffer_size) {
405 if (flush_character_buffer(self) < 0)
406 return;
407 /* handler might have changed; drop the rest on the floor
408 * if there isn't a handler anymore
409 */
410 if (!have_handler(self, CharacterData))
411 return;
412 }
413 if (len > self->buffer_size) {
414 call_character_handler(self, data, len);
415 self->buffer_used = 0;
416 }
417 else {
418 memcpy(self->buffer + self->buffer_used,
419 data, len * sizeof(XML_Char));
420 self->buffer_used += len;
421 }
422 }
423}
424
Fred Drake85d835f2001-02-08 15:39:08 +0000425static void
426my_StartElementHandler(void *userData,
Fred Drake71b63ff2002-06-28 22:29:01 +0000427 const XML_Char *name, const XML_Char *atts[])
Fred Drake85d835f2001-02-08 15:39:08 +0000428{
429 xmlparseobject *self = (xmlparseobject *)userData;
430
Fred Drake71b63ff2002-06-28 22:29:01 +0000431 if (have_handler(self, StartElement)) {
Fred Drake85d835f2001-02-08 15:39:08 +0000432 PyObject *container, *rv, *args;
433 int i, max;
434
Fred Drake2a3d7db2002-06-28 22:56:48 +0000435 if (flush_character_buffer(self) < 0)
436 return;
Fred Drake85d835f2001-02-08 15:39:08 +0000437 /* Set max to the number of slots filled in atts[]; max/2 is
438 * the number of attributes we need to process.
439 */
440 if (self->specified_attributes) {
441 max = XML_GetSpecifiedAttributeCount(self->itself);
442 }
443 else {
444 max = 0;
445 while (atts[max] != NULL)
446 max += 2;
447 }
448 /* Build the container. */
449 if (self->ordered_attributes)
450 container = PyList_New(max);
451 else
452 container = PyDict_New();
453 if (container == NULL) {
454 flag_error(self);
455 return;
456 }
457 for (i = 0; i < max; i += 2) {
Fred Drakeb91a36b2002-06-27 19:40:48 +0000458 PyObject *n = string_intern(self, (XML_Char *) atts[i]);
Fred Drake85d835f2001-02-08 15:39:08 +0000459 PyObject *v;
460 if (n == NULL) {
461 flag_error(self);
462 Py_DECREF(container);
463 return;
464 }
Guido van Rossum4ca94712007-07-23 17:42:32 +0000465 v = conv_string_to_unicode((XML_Char *) atts[i+1]);
Fred Drake85d835f2001-02-08 15:39:08 +0000466 if (v == NULL) {
467 flag_error(self);
468 Py_DECREF(container);
469 Py_DECREF(n);
470 return;
471 }
472 if (self->ordered_attributes) {
473 PyList_SET_ITEM(container, i, n);
474 PyList_SET_ITEM(container, i+1, v);
475 }
476 else if (PyDict_SetItem(container, n, v)) {
477 flag_error(self);
478 Py_DECREF(n);
479 Py_DECREF(v);
480 return;
481 }
482 else {
483 Py_DECREF(n);
484 Py_DECREF(v);
485 }
486 }
Neal Norwitz484d9a42005-09-30 04:46:49 +0000487 args = string_intern(self, name);
488 if (args != NULL)
489 args = Py_BuildValue("(NN)", args, container);
Fred Drake85d835f2001-02-08 15:39:08 +0000490 if (args == NULL) {
491 Py_DECREF(container);
492 return;
493 }
494 /* Container is now a borrowed reference; ignore it. */
Fred Drakebd6101c2001-02-14 18:29:45 +0000495 self->in_callback = 1;
496 rv = call_with_frame(getcode(StartElement, "StartElement", __LINE__),
Fred Drake39689c52004-08-13 03:12:57 +0000497 self->handlers[StartElement], args, self);
Fred Drakebd6101c2001-02-14 18:29:45 +0000498 self->in_callback = 0;
499 Py_DECREF(args);
Fred Drake85d835f2001-02-08 15:39:08 +0000500 if (rv == NULL) {
501 flag_error(self);
502 return;
Fred Drakebd6101c2001-02-14 18:29:45 +0000503 }
Fred Drake85d835f2001-02-08 15:39:08 +0000504 Py_DECREF(rv);
505 }
506}
507
508#define RC_HANDLER(RC, NAME, PARAMS, INIT, PARAM_FORMAT, CONVERSION, \
509 RETURN, GETUSERDATA) \
510static RC \
511my_##NAME##Handler PARAMS {\
512 xmlparseobject *self = GETUSERDATA ; \
513 PyObject *args = NULL; \
514 PyObject *rv = NULL; \
515 INIT \
516\
Fred Drake71b63ff2002-06-28 22:29:01 +0000517 if (have_handler(self, NAME)) { \
Fred Drake2a3d7db2002-06-28 22:56:48 +0000518 if (flush_character_buffer(self) < 0) \
519 return RETURN; \
Fred Drake85d835f2001-02-08 15:39:08 +0000520 args = Py_BuildValue PARAM_FORMAT ;\
Martin v. Löwis1d7c55f2001-11-10 13:57:55 +0000521 if (!args) { flag_error(self); return RETURN;} \
Fred Drakebd6101c2001-02-14 18:29:45 +0000522 self->in_callback = 1; \
Fred Drake85d835f2001-02-08 15:39:08 +0000523 rv = call_with_frame(getcode(NAME,#NAME,__LINE__), \
Fred Drake39689c52004-08-13 03:12:57 +0000524 self->handlers[NAME], args, self); \
Fred Drakebd6101c2001-02-14 18:29:45 +0000525 self->in_callback = 0; \
Fred Drake85d835f2001-02-08 15:39:08 +0000526 Py_DECREF(args); \
527 if (rv == NULL) { \
528 flag_error(self); \
529 return RETURN; \
530 } \
531 CONVERSION \
532 Py_DECREF(rv); \
533 } \
534 return RETURN; \
535}
536
Fred Drake6f987622000-08-25 18:03:30 +0000537#define VOID_HANDLER(NAME, PARAMS, PARAM_FORMAT) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000538 RC_HANDLER(void, NAME, PARAMS, ;, PARAM_FORMAT, ;, ;,\
539 (xmlparseobject *)userData)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000540
Fred Drake6f987622000-08-25 18:03:30 +0000541#define INT_HANDLER(NAME, PARAMS, PARAM_FORMAT)\
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000542 RC_HANDLER(int, NAME, PARAMS, int rc=0;, PARAM_FORMAT, \
543 rc = PyLong_AsLong(rv);, rc, \
544 (xmlparseobject *)userData)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000545
Fred Drake71b63ff2002-06-28 22:29:01 +0000546VOID_HANDLER(EndElement,
547 (void *userData, const XML_Char *name),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000548 ("(N)", string_intern(self, name)))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000549
Fred Drake6f987622000-08-25 18:03:30 +0000550VOID_HANDLER(ProcessingInstruction,
Fred Drake71b63ff2002-06-28 22:29:01 +0000551 (void *userData,
552 const XML_Char *target,
Fred Drake85d835f2001-02-08 15:39:08 +0000553 const XML_Char *data),
Guido van Rossum4ca94712007-07-23 17:42:32 +0000554 ("(NO&)", string_intern(self, target), conv_string_to_unicode ,data))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000555
Fred Drake6f987622000-08-25 18:03:30 +0000556VOID_HANDLER(UnparsedEntityDecl,
Fred Drake71b63ff2002-06-28 22:29:01 +0000557 (void *userData,
Fred Drake85d835f2001-02-08 15:39:08 +0000558 const XML_Char *entityName,
559 const XML_Char *base,
560 const XML_Char *systemId,
561 const XML_Char *publicId,
562 const XML_Char *notationName),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000563 ("(NNNNN)",
Fred Drake71b63ff2002-06-28 22:29:01 +0000564 string_intern(self, entityName), string_intern(self, base),
565 string_intern(self, systemId), string_intern(self, publicId),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000566 string_intern(self, notationName)))
Fred Drake85d835f2001-02-08 15:39:08 +0000567
Fred Drake85d835f2001-02-08 15:39:08 +0000568VOID_HANDLER(EntityDecl,
569 (void *userData,
570 const XML_Char *entityName,
571 int is_parameter_entity,
572 const XML_Char *value,
573 int value_length,
574 const XML_Char *base,
575 const XML_Char *systemId,
576 const XML_Char *publicId,
577 const XML_Char *notationName),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000578 ("NiNNNNN",
579 string_intern(self, entityName), is_parameter_entity,
Guido van Rossum4ca94712007-07-23 17:42:32 +0000580 (conv_string_len_to_unicode(value, value_length)),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000581 string_intern(self, base), string_intern(self, systemId),
582 string_intern(self, publicId),
583 string_intern(self, notationName)))
Fred Drake85d835f2001-02-08 15:39:08 +0000584
585VOID_HANDLER(XmlDecl,
586 (void *userData,
587 const XML_Char *version,
588 const XML_Char *encoding,
589 int standalone),
590 ("(O&O&i)",
Guido van Rossum4ca94712007-07-23 17:42:32 +0000591 conv_string_to_unicode ,version, conv_string_to_unicode ,encoding,
Fred Drake85d835f2001-02-08 15:39:08 +0000592 standalone))
593
594static PyObject *
595conv_content_model(XML_Content * const model,
Fred Drakeb91a36b2002-06-27 19:40:48 +0000596 PyObject *(*conv_string)(const XML_Char *))
Fred Drake85d835f2001-02-08 15:39:08 +0000597{
598 PyObject *result = NULL;
599 PyObject *children = PyTuple_New(model->numchildren);
600 int i;
601
602 if (children != NULL) {
Tim Peters9544fc52001-07-28 09:36:36 +0000603 assert(model->numchildren < INT_MAX);
604 for (i = 0; i < (int)model->numchildren; ++i) {
Fred Drake85d835f2001-02-08 15:39:08 +0000605 PyObject *child = conv_content_model(&model->children[i],
606 conv_string);
607 if (child == NULL) {
608 Py_XDECREF(children);
609 return NULL;
610 }
611 PyTuple_SET_ITEM(children, i, child);
612 }
613 result = Py_BuildValue("(iiO&N)",
614 model->type, model->quant,
615 conv_string,model->name, children);
616 }
617 return result;
618}
619
Fred Drake06dd8cf2003-02-02 03:54:17 +0000620static void
621my_ElementDeclHandler(void *userData,
622 const XML_Char *name,
623 XML_Content *model)
Fred Drake85d835f2001-02-08 15:39:08 +0000624{
Fred Drake06dd8cf2003-02-02 03:54:17 +0000625 xmlparseobject *self = (xmlparseobject *)userData;
626 PyObject *args = NULL;
Fred Drake85d835f2001-02-08 15:39:08 +0000627
Fred Drake06dd8cf2003-02-02 03:54:17 +0000628 if (have_handler(self, ElementDecl)) {
629 PyObject *rv = NULL;
630 PyObject *modelobj, *nameobj;
631
632 if (flush_character_buffer(self) < 0)
633 goto finally;
Guido van Rossum4ca94712007-07-23 17:42:32 +0000634 modelobj = conv_content_model(model, (conv_string_to_unicode));
Fred Drake06dd8cf2003-02-02 03:54:17 +0000635 if (modelobj == NULL) {
636 flag_error(self);
637 goto finally;
638 }
639 nameobj = string_intern(self, name);
640 if (nameobj == NULL) {
641 Py_DECREF(modelobj);
642 flag_error(self);
643 goto finally;
644 }
Michael W. Hudson0bb84542004-08-03 11:31:31 +0000645 args = Py_BuildValue("NN", nameobj, modelobj);
Fred Drake06dd8cf2003-02-02 03:54:17 +0000646 if (args == NULL) {
647 Py_DECREF(modelobj);
648 flag_error(self);
649 goto finally;
650 }
651 self->in_callback = 1;
652 rv = call_with_frame(getcode(ElementDecl, "ElementDecl", __LINE__),
Fred Drake39689c52004-08-13 03:12:57 +0000653 self->handlers[ElementDecl], args, self);
Fred Drake06dd8cf2003-02-02 03:54:17 +0000654 self->in_callback = 0;
655 if (rv == NULL) {
656 flag_error(self);
657 goto finally;
658 }
659 Py_DECREF(rv);
660 }
661 finally:
662 Py_XDECREF(args);
663 XML_FreeContentModel(self->itself, model);
664 return;
665}
Fred Drake85d835f2001-02-08 15:39:08 +0000666
667VOID_HANDLER(AttlistDecl,
668 (void *userData,
669 const XML_Char *elname,
670 const XML_Char *attname,
671 const XML_Char *att_type,
672 const XML_Char *dflt,
673 int isrequired),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000674 ("(NNO&O&i)",
675 string_intern(self, elname), string_intern(self, attname),
Guido van Rossum4ca94712007-07-23 17:42:32 +0000676 conv_string_to_unicode ,att_type, conv_string_to_unicode ,dflt,
Fred Drake85d835f2001-02-08 15:39:08 +0000677 isrequired))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000678
Martin v. Löwisc847f402003-01-21 11:09:21 +0000679#if XML_COMBINED_VERSION >= 19504
Martin v. Löwis069dde22003-01-21 10:58:18 +0000680VOID_HANDLER(SkippedEntity,
681 (void *userData,
682 const XML_Char *entityName,
683 int is_parameter_entity),
684 ("Ni",
685 string_intern(self, entityName), is_parameter_entity))
Martin v. Löwisc847f402003-01-21 11:09:21 +0000686#endif
Martin v. Löwis069dde22003-01-21 10:58:18 +0000687
Fred Drake71b63ff2002-06-28 22:29:01 +0000688VOID_HANDLER(NotationDecl,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000689 (void *userData,
690 const XML_Char *notationName,
691 const XML_Char *base,
692 const XML_Char *systemId,
693 const XML_Char *publicId),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000694 ("(NNNN)",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000695 string_intern(self, notationName), string_intern(self, base),
696 string_intern(self, systemId), string_intern(self, publicId)))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000697
Fred Drake6f987622000-08-25 18:03:30 +0000698VOID_HANDLER(StartNamespaceDecl,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000699 (void *userData,
700 const XML_Char *prefix,
701 const XML_Char *uri),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000702 ("(NN)",
703 string_intern(self, prefix), string_intern(self, uri)))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000704
Fred Drake6f987622000-08-25 18:03:30 +0000705VOID_HANDLER(EndNamespaceDecl,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000706 (void *userData,
707 const XML_Char *prefix),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000708 ("(N)", string_intern(self, prefix)))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000709
Fred Drake6f987622000-08-25 18:03:30 +0000710VOID_HANDLER(Comment,
Fred Drakeb91a36b2002-06-27 19:40:48 +0000711 (void *userData, const XML_Char *data),
Guido van Rossum4ca94712007-07-23 17:42:32 +0000712 ("(O&)", conv_string_to_unicode ,data))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000713
Fred Drake6f987622000-08-25 18:03:30 +0000714VOID_HANDLER(StartCdataSection,
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000715 (void *userData),
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000716 ("()"))
Fred Drake71b63ff2002-06-28 22:29:01 +0000717
Fred Drake6f987622000-08-25 18:03:30 +0000718VOID_HANDLER(EndCdataSection,
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000719 (void *userData),
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000720 ("()"))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000721
Fred Drake6f987622000-08-25 18:03:30 +0000722VOID_HANDLER(Default,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000723 (void *userData, const XML_Char *s, int len),
724 ("(N)", (conv_string_len_to_unicode(s,len))))
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000725
Fred Drake6f987622000-08-25 18:03:30 +0000726VOID_HANDLER(DefaultHandlerExpand,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000727 (void *userData, const XML_Char *s, int len),
728 ("(N)", (conv_string_len_to_unicode(s,len))))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000729
Fred Drake71b63ff2002-06-28 22:29:01 +0000730INT_HANDLER(NotStandalone,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000731 (void *userData),
732 ("()"))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000733
Fred Drake6f987622000-08-25 18:03:30 +0000734RC_HANDLER(int, ExternalEntityRef,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000735 (XML_Parser parser,
736 const XML_Char *context,
737 const XML_Char *base,
738 const XML_Char *systemId,
739 const XML_Char *publicId),
740 int rc=0;,
Fred Drakeb91a36b2002-06-27 19:40:48 +0000741 ("(O&NNN)",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000742 conv_string_to_unicode ,context, string_intern(self, base),
743 string_intern(self, systemId), string_intern(self, publicId)),
744 rc = PyLong_AsLong(rv);, rc,
745 XML_GetUserData(parser))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000746
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000747/* XXX UnknownEncodingHandler */
748
Fred Drake85d835f2001-02-08 15:39:08 +0000749VOID_HANDLER(StartDoctypeDecl,
750 (void *userData, const XML_Char *doctypeName,
751 const XML_Char *sysid, const XML_Char *pubid,
752 int has_internal_subset),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000753 ("(NNNi)", string_intern(self, doctypeName),
754 string_intern(self, sysid), string_intern(self, pubid),
Fred Drake85d835f2001-02-08 15:39:08 +0000755 has_internal_subset))
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000756
757VOID_HANDLER(EndDoctypeDecl, (void *userData), ("()"))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000758
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000759/* ---------------------------------------------------------------- */
760
Fred Drake71b63ff2002-06-28 22:29:01 +0000761static PyObject *
762get_parse_result(xmlparseobject *self, int rv)
763{
764 if (PyErr_Occurred()) {
765 return NULL;
766 }
767 if (rv == 0) {
Martin v. Löwis069dde22003-01-21 10:58:18 +0000768 return set_error(self, XML_GetErrorCode(self->itself));
Fred Drake71b63ff2002-06-28 22:29:01 +0000769 }
Fred Drake2a3d7db2002-06-28 22:56:48 +0000770 if (flush_character_buffer(self) < 0) {
771 return NULL;
772 }
Christian Heimes217cfd12007-12-02 14:31:20 +0000773 return PyLong_FromLong(rv);
Fred Drake71b63ff2002-06-28 22:29:01 +0000774}
775
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000776PyDoc_STRVAR(xmlparse_Parse__doc__,
Thomas Wouters35317302000-07-22 16:34:15 +0000777"Parse(data[, isfinal])\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000778Parse XML data. `isfinal' should be true at end of input.");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000779
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200780#define MAX_CHUNK_SIZE (1 << 20)
781
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000782static PyObject *
Fred Drake0582df92000-07-12 04:49:00 +0000783xmlparse_Parse(xmlparseobject *self, PyObject *args)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000784{
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200785 PyObject *data;
Fred Drake0582df92000-07-12 04:49:00 +0000786 int isFinal = 0;
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200787 const char *s;
788 Py_ssize_t slen;
789 Py_buffer view;
790 int rc;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000791
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200792 if (!PyArg_ParseTuple(args, "O|i:Parse", &data, &isFinal))
Fred Drake0582df92000-07-12 04:49:00 +0000793 return NULL;
Fred Drake71b63ff2002-06-28 22:29:01 +0000794
Serhiy Storchaka43536e92013-02-04 18:26:15 +0200795 if (PyUnicode_Check(data)) {
796 PyObject *bytes;
797 bytes = PyUnicode_AsUTF8String(data);
798 if (bytes == NULL)
799 return NULL;
800 view.buf = NULL;
801 s = PyBytes_AS_STRING(bytes);
802 slen = PyBytes_GET_SIZE(bytes);
803 /* Explicitly set UTF-8 encoding. Return code ignored. */
804 (void)XML_SetEncoding(self->itself, "utf-8");
805 }
806 else {
807 if (PyObject_GetBuffer(data, &view, PyBUF_SIMPLE) < 0)
808 return NULL;
809 s = view.buf;
810 slen = view.len;
811 }
812
813 while (slen > MAX_CHUNK_SIZE) {
814 rc = XML_Parse(self->itself, s, MAX_CHUNK_SIZE, 0);
815 if (!rc)
816 goto done;
817 s += MAX_CHUNK_SIZE;
818 slen -= MAX_CHUNK_SIZE;
819 }
820 rc = XML_Parse(self->itself, s, slen, isFinal);
821
822done:
823 if (view.buf != NULL)
824 PyBuffer_Release(&view);
825 return get_parse_result(self, rc);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000826}
827
Fred Drakeca1f4262000-09-21 20:10:23 +0000828/* File reading copied from cPickle */
829
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000830#define BUF_SIZE 2048
831
Fred Drake0582df92000-07-12 04:49:00 +0000832static int
833readinst(char *buf, int buf_size, PyObject *meth)
834{
Victor Stinner95f1dfc2011-01-10 23:00:36 +0000835 PyObject *str;
836 Py_ssize_t len;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000837 char *ptr;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000838
Victor Stinner95f1dfc2011-01-10 23:00:36 +0000839 str = PyObject_CallFunction(meth, "n", buf_size);
Martin v. Löwis9171f022004-10-13 19:50:11 +0000840 if (str == NULL)
Victor Stinner95f1dfc2011-01-10 23:00:36 +0000841 goto error;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000842
Christian Heimes72b710a2008-05-26 13:28:38 +0000843 if (PyBytes_Check(str))
844 ptr = PyBytes_AS_STRING(str);
Christian Heimes9c4756e2008-05-26 13:22:05 +0000845 else if (PyByteArray_Check(str))
846 ptr = PyByteArray_AS_STRING(str);
Guido van Rossum98297ee2007-11-06 21:34:58 +0000847 else {
Fred Drake71b63ff2002-06-28 22:29:01 +0000848 PyErr_Format(PyExc_TypeError,
Guido van Rossum4ca94712007-07-23 17:42:32 +0000849 "read() did not return a bytes object (type=%.400s)",
Christian Heimes90aa7642007-12-19 02:45:37 +0000850 Py_TYPE(str)->tp_name);
Victor Stinner95f1dfc2011-01-10 23:00:36 +0000851 goto error;
Fred Drake0582df92000-07-12 04:49:00 +0000852 }
Christian Heimes90aa7642007-12-19 02:45:37 +0000853 len = Py_SIZE(str);
Fred Drake0582df92000-07-12 04:49:00 +0000854 if (len > buf_size) {
855 PyErr_Format(PyExc_ValueError,
856 "read() returned too much data: "
Victor Stinner9d6f9362011-01-04 22:00:04 +0000857 "%i bytes requested, %zd returned",
Fred Drake0582df92000-07-12 04:49:00 +0000858 buf_size, len);
Victor Stinner95f1dfc2011-01-10 23:00:36 +0000859 goto error;
Fred Drake0582df92000-07-12 04:49:00 +0000860 }
Guido van Rossum98297ee2007-11-06 21:34:58 +0000861 memcpy(buf, ptr, len);
Victor Stinner95f1dfc2011-01-10 23:00:36 +0000862 Py_DECREF(str);
863 /* len <= buf_size <= INT_MAX */
Victor Stinner0fcab4a2011-01-04 12:59:15 +0000864 return (int)len;
Victor Stinner95f1dfc2011-01-10 23:00:36 +0000865
866error:
867 Py_XDECREF(str);
868 return -1;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000869}
870
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000871PyDoc_STRVAR(xmlparse_ParseFile__doc__,
Thomas Wouters35317302000-07-22 16:34:15 +0000872"ParseFile(file)\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000873Parse XML data from file-like object.");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000874
875static PyObject *
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000876xmlparse_ParseFile(xmlparseobject *self, PyObject *f)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000877{
Fred Drake0582df92000-07-12 04:49:00 +0000878 int rv = 1;
Fred Drake0582df92000-07-12 04:49:00 +0000879 PyObject *readmethod = NULL;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000880
Benjamin Peterson4e7f2852010-08-08 16:54:58 +0000881
882 readmethod = PyObject_GetAttrString(f, "read");
883 if (readmethod == NULL) {
Benjamin Peterson4e7f2852010-08-08 16:54:58 +0000884 PyErr_SetString(PyExc_TypeError,
885 "argument must have 'read' attribute");
886 return NULL;
Fred Drake0582df92000-07-12 04:49:00 +0000887 }
888 for (;;) {
889 int bytes_read;
890 void *buf = XML_GetBuffer(self->itself, BUF_SIZE);
Fred Drake7b6caff2003-07-21 17:05:56 +0000891 if (buf == NULL) {
Fred Drakef239c6d2003-07-21 17:22:43 +0000892 Py_XDECREF(readmethod);
Fred Drake0582df92000-07-12 04:49:00 +0000893 return PyErr_NoMemory();
Fred Drake7b6caff2003-07-21 17:05:56 +0000894 }
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000895
Benjamin Peterson4e7f2852010-08-08 16:54:58 +0000896 bytes_read = readinst(buf, BUF_SIZE, readmethod);
897 if (bytes_read < 0) {
898 Py_DECREF(readmethod);
899 return NULL;
Fred Drake0582df92000-07-12 04:49:00 +0000900 }
901 rv = XML_ParseBuffer(self->itself, bytes_read, bytes_read == 0);
Fred Drake7b6caff2003-07-21 17:05:56 +0000902 if (PyErr_Occurred()) {
903 Py_XDECREF(readmethod);
Fred Drake0582df92000-07-12 04:49:00 +0000904 return NULL;
Fred Drake7b6caff2003-07-21 17:05:56 +0000905 }
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000906
Fred Drake0582df92000-07-12 04:49:00 +0000907 if (!rv || bytes_read == 0)
908 break;
909 }
Fred Drake7b6caff2003-07-21 17:05:56 +0000910 Py_XDECREF(readmethod);
Fred Drake71b63ff2002-06-28 22:29:01 +0000911 return get_parse_result(self, rv);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000912}
913
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000914PyDoc_STRVAR(xmlparse_SetBase__doc__,
Thomas Wouters35317302000-07-22 16:34:15 +0000915"SetBase(base_url)\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000916Set the base URL for the parser.");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000917
918static PyObject *
Fred Drake0582df92000-07-12 04:49:00 +0000919xmlparse_SetBase(xmlparseobject *self, PyObject *args)
920{
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000921 char *base;
922
Fred Drake0582df92000-07-12 04:49:00 +0000923 if (!PyArg_ParseTuple(args, "s:SetBase", &base))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000924 return NULL;
Fred Drake0582df92000-07-12 04:49:00 +0000925 if (!XML_SetBase(self->itself, base)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000926 return PyErr_NoMemory();
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000927 }
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000928 Py_INCREF(Py_None);
929 return Py_None;
930}
931
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000932PyDoc_STRVAR(xmlparse_GetBase__doc__,
Thomas Wouters35317302000-07-22 16:34:15 +0000933"GetBase() -> url\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000934Return base URL string for the parser.");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000935
936static PyObject *
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000937xmlparse_GetBase(xmlparseobject *self, PyObject *unused)
Fred Drake0582df92000-07-12 04:49:00 +0000938{
Fred Drake0582df92000-07-12 04:49:00 +0000939 return Py_BuildValue("z", XML_GetBase(self->itself));
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000940}
941
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000942PyDoc_STRVAR(xmlparse_GetInputContext__doc__,
Fred Drakebd6101c2001-02-14 18:29:45 +0000943"GetInputContext() -> string\n\
944Return the untranslated text of the input that caused the current event.\n\
945If the event was generated by a large amount of text (such as a start tag\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000946for an element with many attributes), not all of the text may be available.");
Fred Drakebd6101c2001-02-14 18:29:45 +0000947
948static PyObject *
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000949xmlparse_GetInputContext(xmlparseobject *self, PyObject *unused)
Fred Drakebd6101c2001-02-14 18:29:45 +0000950{
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000951 if (self->in_callback) {
952 int offset, size;
953 const char *buffer
954 = XML_GetInputContext(self->itself, &offset, &size);
Fred Drakebd6101c2001-02-14 18:29:45 +0000955
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000956 if (buffer != NULL)
Christian Heimes72b710a2008-05-26 13:28:38 +0000957 return PyBytes_FromStringAndSize(buffer + offset,
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000958 size - offset);
959 else
960 Py_RETURN_NONE;
Fred Drakebd6101c2001-02-14 18:29:45 +0000961 }
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000962 else
963 Py_RETURN_NONE;
Fred Drakebd6101c2001-02-14 18:29:45 +0000964}
Fred Drakebd6101c2001-02-14 18:29:45 +0000965
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000966PyDoc_STRVAR(xmlparse_ExternalEntityParserCreate__doc__,
Fred Drake2d4ac202001-01-03 15:36:25 +0000967"ExternalEntityParserCreate(context[, encoding])\n\
Tim Peters51dc9682000-09-24 22:12:45 +0000968Create a parser for parsing an external entity based on the\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000969information passed to the ExternalEntityRefHandler.");
Lars Gustäbel4a30a072000-09-24 20:50:52 +0000970
971static PyObject *
972xmlparse_ExternalEntityParserCreate(xmlparseobject *self, PyObject *args)
973{
974 char *context;
975 char *encoding = NULL;
976 xmlparseobject *new_parser;
977 int i;
978
Martin v. Löwisc57428d2001-09-19 09:55:09 +0000979 if (!PyArg_ParseTuple(args, "z|s:ExternalEntityParserCreate",
Fred Drakecde79132001-04-25 16:01:30 +0000980 &context, &encoding)) {
981 return NULL;
Lars Gustäbel4a30a072000-09-24 20:50:52 +0000982 }
983
Martin v. Löwis894258c2001-09-23 10:20:10 +0000984 new_parser = PyObject_GC_New(xmlparseobject, &Xmlparsetype);
Fred Drake85d835f2001-02-08 15:39:08 +0000985 if (new_parser == NULL)
986 return NULL;
Fred Drake2a3d7db2002-06-28 22:56:48 +0000987 new_parser->buffer_size = self->buffer_size;
988 new_parser->buffer_used = 0;
Victor Stinnerb4ba9862010-09-10 22:25:19 +0000989 new_parser->buffer = NULL;
Fred Drake85d835f2001-02-08 15:39:08 +0000990 new_parser->ordered_attributes = self->ordered_attributes;
991 new_parser->specified_attributes = self->specified_attributes;
Fred Drakebd6101c2001-02-14 18:29:45 +0000992 new_parser->in_callback = 0;
Martin v. Löwis069dde22003-01-21 10:58:18 +0000993 new_parser->ns_prefixes = self->ns_prefixes;
Lars Gustäbel4a30a072000-09-24 20:50:52 +0000994 new_parser->itself = XML_ExternalEntityParserCreate(self->itself, context,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000995 encoding);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000996 new_parser->handlers = 0;
Fred Drakeb91a36b2002-06-27 19:40:48 +0000997 new_parser->intern = self->intern;
998 Py_XINCREF(new_parser->intern);
Martin v. Löwis894258c2001-09-23 10:20:10 +0000999 PyObject_GC_Track(new_parser);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001000
Victor Stinnerb4ba9862010-09-10 22:25:19 +00001001 if (self->buffer != NULL) {
1002 new_parser->buffer = malloc(new_parser->buffer_size);
1003 if (new_parser->buffer == NULL) {
1004 Py_DECREF(new_parser);
1005 return PyErr_NoMemory();
1006 }
1007 }
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001008 if (!new_parser->itself) {
Fred Drake85d835f2001-02-08 15:39:08 +00001009 Py_DECREF(new_parser);
1010 return PyErr_NoMemory();
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001011 }
1012
1013 XML_SetUserData(new_parser->itself, (void *)new_parser);
1014
1015 /* allocate and clear handlers first */
Fred Drake2a3d7db2002-06-28 22:56:48 +00001016 for (i = 0; handler_info[i].name != NULL; i++)
Fred Drake85d835f2001-02-08 15:39:08 +00001017 /* do nothing */;
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001018
Fred Drake2a3d7db2002-06-28 22:56:48 +00001019 new_parser->handlers = malloc(sizeof(PyObject *) * i);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001020 if (!new_parser->handlers) {
Fred Drake85d835f2001-02-08 15:39:08 +00001021 Py_DECREF(new_parser);
1022 return PyErr_NoMemory();
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001023 }
Martin v. Löwis5b68ce32001-10-21 08:53:52 +00001024 clear_handlers(new_parser, 1);
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001025
1026 /* then copy handlers from self */
1027 for (i = 0; handler_info[i].name != NULL; i++) {
Fred Drake71b63ff2002-06-28 22:29:01 +00001028 PyObject *handler = self->handlers[i];
1029 if (handler != NULL) {
1030 Py_INCREF(handler);
1031 new_parser->handlers[i] = handler;
1032 handler_info[i].setter(new_parser->itself,
Fred Drake85d835f2001-02-08 15:39:08 +00001033 handler_info[i].handler);
1034 }
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001035 }
Fred Drake71b63ff2002-06-28 22:29:01 +00001036 return (PyObject *)new_parser;
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001037}
1038
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001039PyDoc_STRVAR(xmlparse_SetParamEntityParsing__doc__,
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001040"SetParamEntityParsing(flag) -> success\n\
1041Controls parsing of parameter entities (including the external DTD\n\
1042subset). Possible flag values are XML_PARAM_ENTITY_PARSING_NEVER,\n\
1043XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE and\n\
1044XML_PARAM_ENTITY_PARSING_ALWAYS. Returns true if setting the flag\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001045was successful.");
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001046
1047static PyObject*
Fred Drakebd6101c2001-02-14 18:29:45 +00001048xmlparse_SetParamEntityParsing(xmlparseobject *p, PyObject* args)
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001049{
Fred Drake85d835f2001-02-08 15:39:08 +00001050 int flag;
1051 if (!PyArg_ParseTuple(args, "i", &flag))
1052 return NULL;
Fred Drakebd6101c2001-02-14 18:29:45 +00001053 flag = XML_SetParamEntityParsing(p->itself, flag);
Christian Heimes217cfd12007-12-02 14:31:20 +00001054 return PyLong_FromLong(flag);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001055}
1056
Martin v. Löwisc847f402003-01-21 11:09:21 +00001057
1058#if XML_COMBINED_VERSION >= 19505
Martin v. Löwis069dde22003-01-21 10:58:18 +00001059PyDoc_STRVAR(xmlparse_UseForeignDTD__doc__,
1060"UseForeignDTD([flag])\n\
1061Allows the application to provide an artificial external subset if one is\n\
1062not specified as part of the document instance. This readily allows the\n\
1063use of a 'default' document type controlled by the application, while still\n\
1064getting the advantage of providing document type information to the parser.\n\
1065'flag' defaults to True if not provided.");
1066
1067static PyObject *
1068xmlparse_UseForeignDTD(xmlparseobject *self, PyObject *args)
1069{
1070 PyObject *flagobj = NULL;
Antoine Pitrou6f430e42012-08-15 23:18:25 +02001071 int flag = 1;
Martin v. Löwis069dde22003-01-21 10:58:18 +00001072 enum XML_Error rc;
Christian Heimese26d3af2012-09-24 13:17:08 +02001073 if (!PyArg_ParseTuple(args, "|O:UseForeignDTD", &flagobj))
Martin v. Löwis069dde22003-01-21 10:58:18 +00001074 return NULL;
Antoine Pitrou6f430e42012-08-15 23:18:25 +02001075 if (flagobj != NULL) {
1076 flag = PyObject_IsTrue(flagobj);
1077 if (flag < 0)
1078 return NULL;
1079 }
1080 rc = XML_UseForeignDTD(self->itself, flag ? XML_TRUE : XML_FALSE);
Martin v. Löwis069dde22003-01-21 10:58:18 +00001081 if (rc != XML_ERROR_NONE) {
1082 return set_error(self, rc);
1083 }
1084 Py_INCREF(Py_None);
1085 return Py_None;
1086}
Martin v. Löwisc847f402003-01-21 11:09:21 +00001087#endif
Martin v. Löwis069dde22003-01-21 10:58:18 +00001088
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001089static PyObject *xmlparse_dir(PyObject *self, PyObject* noargs);
1090
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001091static struct PyMethodDef xmlparse_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001092 {"Parse", (PyCFunction)xmlparse_Parse,
1093 METH_VARARGS, xmlparse_Parse__doc__},
Fred Drake0582df92000-07-12 04:49:00 +00001094 {"ParseFile", (PyCFunction)xmlparse_ParseFile,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001095 METH_O, xmlparse_ParseFile__doc__},
Fred Drake0582df92000-07-12 04:49:00 +00001096 {"SetBase", (PyCFunction)xmlparse_SetBase,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001097 METH_VARARGS, xmlparse_SetBase__doc__},
Fred Drake0582df92000-07-12 04:49:00 +00001098 {"GetBase", (PyCFunction)xmlparse_GetBase,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001099 METH_NOARGS, xmlparse_GetBase__doc__},
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001100 {"ExternalEntityParserCreate", (PyCFunction)xmlparse_ExternalEntityParserCreate,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001101 METH_VARARGS, xmlparse_ExternalEntityParserCreate__doc__},
Fred Drakebd6101c2001-02-14 18:29:45 +00001102 {"SetParamEntityParsing", (PyCFunction)xmlparse_SetParamEntityParsing,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001103 METH_VARARGS, xmlparse_SetParamEntityParsing__doc__},
Fred Drakebd6101c2001-02-14 18:29:45 +00001104 {"GetInputContext", (PyCFunction)xmlparse_GetInputContext,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001105 METH_NOARGS, xmlparse_GetInputContext__doc__},
Martin v. Löwisc847f402003-01-21 11:09:21 +00001106#if XML_COMBINED_VERSION >= 19505
Martin v. Löwis069dde22003-01-21 10:58:18 +00001107 {"UseForeignDTD", (PyCFunction)xmlparse_UseForeignDTD,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001108 METH_VARARGS, xmlparse_UseForeignDTD__doc__},
Martin v. Löwisc847f402003-01-21 11:09:21 +00001109#endif
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001110 {"__dir__", xmlparse_dir, METH_NOARGS},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001111 {NULL, NULL} /* sentinel */
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001112};
1113
1114/* ---------- */
1115
1116
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001117
Fred Drake71b63ff2002-06-28 22:29:01 +00001118/* pyexpat international encoding support.
1119 Make it as simple as possible.
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001120*/
1121
Martin v. Löwis3af7cc02001-01-22 08:19:10 +00001122static char template_buffer[257];
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001123
Fred Drake71b63ff2002-06-28 22:29:01 +00001124static void
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001125init_template_buffer(void)
1126{
1127 int i;
Fred Drakebb66a202001-03-01 20:48:17 +00001128 for (i = 0; i < 256; i++) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001129 template_buffer[i] = i;
Tim Peters63cb99e2001-02-17 18:12:50 +00001130 }
Fred Drakebb66a202001-03-01 20:48:17 +00001131 template_buffer[256] = 0;
Tim Peters63cb99e2001-02-17 18:12:50 +00001132}
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001133
Fred Drake71b63ff2002-06-28 22:29:01 +00001134static int
1135PyUnknownEncodingHandler(void *encodingHandlerData,
1136 const XML_Char *name,
1137 XML_Encoding *info)
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001138{
Fred Drakebb66a202001-03-01 20:48:17 +00001139 PyUnicodeObject *_u_string = NULL;
1140 int result = 0;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001141 int i;
Fred Drake71b63ff2002-06-28 22:29:01 +00001142
Fred Drakebb66a202001-03-01 20:48:17 +00001143 /* Yes, supports only 8bit encodings */
1144 _u_string = (PyUnicodeObject *)
1145 PyUnicode_Decode(template_buffer, 256, name, "replace");
Fred Drake71b63ff2002-06-28 22:29:01 +00001146
Fred Drakebb66a202001-03-01 20:48:17 +00001147 if (_u_string == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001148 return result;
Fred Drake71b63ff2002-06-28 22:29:01 +00001149
Fred Drakebb66a202001-03-01 20:48:17 +00001150 for (i = 0; i < 256; i++) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001151 /* Stupid to access directly, but fast */
1152 Py_UNICODE c = _u_string->str[i];
1153 if (c == Py_UNICODE_REPLACEMENT_CHARACTER)
1154 info->map[i] = -1;
1155 else
1156 info->map[i] = c;
Tim Peters63cb99e2001-02-17 18:12:50 +00001157 }
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001158 info->data = NULL;
1159 info->convert = NULL;
1160 info->release = NULL;
Fred Drake71b63ff2002-06-28 22:29:01 +00001161 result = 1;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001162 Py_DECREF(_u_string);
1163 return result;
1164}
1165
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001166
1167static PyObject *
Fred Drakeb91a36b2002-06-27 19:40:48 +00001168newxmlparseobject(char *encoding, char *namespace_separator, PyObject *intern)
Fred Drake0582df92000-07-12 04:49:00 +00001169{
1170 int i;
1171 xmlparseobject *self;
Fred Drake71b63ff2002-06-28 22:29:01 +00001172
Martin v. Löwis894258c2001-09-23 10:20:10 +00001173 self = PyObject_GC_New(xmlparseobject, &Xmlparsetype);
Fred Drake0582df92000-07-12 04:49:00 +00001174 if (self == NULL)
1175 return NULL;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001176
Fred Drake2a3d7db2002-06-28 22:56:48 +00001177 self->buffer = NULL;
1178 self->buffer_size = CHARACTER_DATA_BUFFER_SIZE;
1179 self->buffer_used = 0;
Fred Drake85d835f2001-02-08 15:39:08 +00001180 self->ordered_attributes = 0;
1181 self->specified_attributes = 0;
Fred Drakebd6101c2001-02-14 18:29:45 +00001182 self->in_callback = 0;
Martin v. Löwis069dde22003-01-21 10:58:18 +00001183 self->ns_prefixes = 0;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001184 self->handlers = NULL;
Fred Drakecde79132001-04-25 16:01:30 +00001185 if (namespace_separator != NULL) {
Fred Drake0582df92000-07-12 04:49:00 +00001186 self->itself = XML_ParserCreateNS(encoding, *namespace_separator);
1187 }
Fred Drake85d835f2001-02-08 15:39:08 +00001188 else {
Fred Drake0582df92000-07-12 04:49:00 +00001189 self->itself = XML_ParserCreate(encoding);
1190 }
Gregory P. Smith25227712012-03-14 18:10:37 -07001191#if ((XML_MAJOR_VERSION >= 2) && (XML_MINOR_VERSION >= 1)) || defined(XML_HAS_SET_HASH_SALT)
1192 /* This feature was added upstream in libexpat 2.1.0. Our expat copy
1193 * has a backport of this feature where we also define XML_HAS_SET_HASH_SALT
1194 * to indicate that we can still use it. */
Gregory P. Smith8e91cf62012-03-14 14:26:55 -07001195 XML_SetHashSalt(self->itself,
1196 (unsigned long)_Py_HashSecret.prefix);
Gregory P. Smith25227712012-03-14 18:10:37 -07001197#endif
Fred Drakeb91a36b2002-06-27 19:40:48 +00001198 self->intern = intern;
1199 Py_XINCREF(self->intern);
Martin v. Löwis894258c2001-09-23 10:20:10 +00001200 PyObject_GC_Track(self);
Fred Drake0582df92000-07-12 04:49:00 +00001201 if (self->itself == NULL) {
Fred Drake71b63ff2002-06-28 22:29:01 +00001202 PyErr_SetString(PyExc_RuntimeError,
Fred Drake0582df92000-07-12 04:49:00 +00001203 "XML_ParserCreate failed");
1204 Py_DECREF(self);
1205 return NULL;
1206 }
1207 XML_SetUserData(self->itself, (void *)self);
Fred Drake7c75bf22002-07-01 14:02:31 +00001208 XML_SetUnknownEncodingHandler(self->itself,
1209 (XML_UnknownEncodingHandler) PyUnknownEncodingHandler, NULL);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001210
Fred Drake2a3d7db2002-06-28 22:56:48 +00001211 for (i = 0; handler_info[i].name != NULL; i++)
Fred Drake0582df92000-07-12 04:49:00 +00001212 /* do nothing */;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001213
Fred Drake7c75bf22002-07-01 14:02:31 +00001214 self->handlers = malloc(sizeof(PyObject *) * i);
1215 if (!self->handlers) {
Fred Drake71b63ff2002-06-28 22:29:01 +00001216 Py_DECREF(self);
1217 return PyErr_NoMemory();
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001218 }
Martin v. Löwis5b68ce32001-10-21 08:53:52 +00001219 clear_handlers(self, 1);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001220
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001221 return (PyObject*)self;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001222}
1223
1224
1225static void
Fred Drake0582df92000-07-12 04:49:00 +00001226xmlparse_dealloc(xmlparseobject *self)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001227{
Fred Drake0582df92000-07-12 04:49:00 +00001228 int i;
Martin v. Löwis894258c2001-09-23 10:20:10 +00001229 PyObject_GC_UnTrack(self);
Fred Drake85d835f2001-02-08 15:39:08 +00001230 if (self->itself != NULL)
Fred Drake0582df92000-07-12 04:49:00 +00001231 XML_ParserFree(self->itself);
1232 self->itself = NULL;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001233
Fred Drake85d835f2001-02-08 15:39:08 +00001234 if (self->handlers != NULL) {
Fred Drakecde79132001-04-25 16:01:30 +00001235 PyObject *temp;
Fred Drake85d835f2001-02-08 15:39:08 +00001236 for (i = 0; handler_info[i].name != NULL; i++) {
Fred Drakecde79132001-04-25 16:01:30 +00001237 temp = self->handlers[i];
1238 self->handlers[i] = NULL;
1239 Py_XDECREF(temp);
Fred Drake85d835f2001-02-08 15:39:08 +00001240 }
1241 free(self->handlers);
Fred Drake71b63ff2002-06-28 22:29:01 +00001242 self->handlers = NULL;
Fred Drake0582df92000-07-12 04:49:00 +00001243 }
Fred Drake2a3d7db2002-06-28 22:56:48 +00001244 if (self->buffer != NULL) {
1245 free(self->buffer);
1246 self->buffer = NULL;
1247 }
Fred Drakeb91a36b2002-06-27 19:40:48 +00001248 Py_XDECREF(self->intern);
Martin v. Löwis894258c2001-09-23 10:20:10 +00001249 PyObject_GC_Del(self);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001250}
1251
Fred Drake0582df92000-07-12 04:49:00 +00001252static int
Alexander Belopolskye239d232010-12-08 23:31:48 +00001253handlername2int(PyObject *name)
Fred Drake0582df92000-07-12 04:49:00 +00001254{
1255 int i;
Fred Drake71b63ff2002-06-28 22:29:01 +00001256 for (i = 0; handler_info[i].name != NULL; i++) {
Alexander Belopolskye239d232010-12-08 23:31:48 +00001257 if (PyUnicode_CompareWithASCIIString(
1258 name, handler_info[i].name) == 0) {
Fred Drake0582df92000-07-12 04:49:00 +00001259 return i;
1260 }
1261 }
1262 return -1;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001263}
1264
1265static PyObject *
Fred Drake71b63ff2002-06-28 22:29:01 +00001266get_pybool(int istrue)
1267{
1268 PyObject *result = istrue ? Py_True : Py_False;
1269 Py_INCREF(result);
1270 return result;
1271}
1272
1273static PyObject *
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001274xmlparse_getattro(xmlparseobject *self, PyObject *nameobj)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001275{
Alexander Belopolskye239d232010-12-08 23:31:48 +00001276 Py_UNICODE *name;
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001277 int handlernum = -1;
1278
Alexander Belopolskye239d232010-12-08 23:31:48 +00001279 if (!PyUnicode_Check(nameobj))
1280 goto generic;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001281
Alexander Belopolskye239d232010-12-08 23:31:48 +00001282 handlernum = handlername2int(nameobj);
Fred Drake71b63ff2002-06-28 22:29:01 +00001283
1284 if (handlernum != -1) {
1285 PyObject *result = self->handlers[handlernum];
1286 if (result == NULL)
1287 result = Py_None;
1288 Py_INCREF(result);
1289 return result;
1290 }
Alexander Belopolskye239d232010-12-08 23:31:48 +00001291
1292 name = PyUnicode_AS_UNICODE(nameobj);
Fred Drake71b63ff2002-06-28 22:29:01 +00001293 if (name[0] == 'E') {
Alexander Belopolskye239d232010-12-08 23:31:48 +00001294 if (PyUnicode_CompareWithASCIIString(nameobj, "ErrorCode") == 0)
Christian Heimes217cfd12007-12-02 14:31:20 +00001295 return PyLong_FromLong((long)
Fred Drake71b63ff2002-06-28 22:29:01 +00001296 XML_GetErrorCode(self->itself));
Alexander Belopolskye239d232010-12-08 23:31:48 +00001297 if (PyUnicode_CompareWithASCIIString(nameobj, "ErrorLineNumber") == 0)
Christian Heimes217cfd12007-12-02 14:31:20 +00001298 return PyLong_FromLong((long)
Fred Drake71b63ff2002-06-28 22:29:01 +00001299 XML_GetErrorLineNumber(self->itself));
Alexander Belopolskye239d232010-12-08 23:31:48 +00001300 if (PyUnicode_CompareWithASCIIString(nameobj, "ErrorColumnNumber") == 0)
Christian Heimes217cfd12007-12-02 14:31:20 +00001301 return PyLong_FromLong((long)
Fred Drake71b63ff2002-06-28 22:29:01 +00001302 XML_GetErrorColumnNumber(self->itself));
Alexander Belopolskye239d232010-12-08 23:31:48 +00001303 if (PyUnicode_CompareWithASCIIString(nameobj, "ErrorByteIndex") == 0)
Christian Heimes217cfd12007-12-02 14:31:20 +00001304 return PyLong_FromLong((long)
Fred Drake71b63ff2002-06-28 22:29:01 +00001305 XML_GetErrorByteIndex(self->itself));
1306 }
Dave Cole3203efb2004-08-26 00:37:31 +00001307 if (name[0] == 'C') {
Alexander Belopolskye239d232010-12-08 23:31:48 +00001308 if (PyUnicode_CompareWithASCIIString(nameobj, "CurrentLineNumber") == 0)
Christian Heimes217cfd12007-12-02 14:31:20 +00001309 return PyLong_FromLong((long)
Dave Cole3203efb2004-08-26 00:37:31 +00001310 XML_GetCurrentLineNumber(self->itself));
Alexander Belopolskye239d232010-12-08 23:31:48 +00001311 if (PyUnicode_CompareWithASCIIString(nameobj, "CurrentColumnNumber") == 0)
Christian Heimes217cfd12007-12-02 14:31:20 +00001312 return PyLong_FromLong((long)
Dave Cole3203efb2004-08-26 00:37:31 +00001313 XML_GetCurrentColumnNumber(self->itself));
Alexander Belopolskye239d232010-12-08 23:31:48 +00001314 if (PyUnicode_CompareWithASCIIString(nameobj, "CurrentByteIndex") == 0)
Christian Heimes217cfd12007-12-02 14:31:20 +00001315 return PyLong_FromLong((long)
Dave Cole3203efb2004-08-26 00:37:31 +00001316 XML_GetCurrentByteIndex(self->itself));
1317 }
Fred Drake2a3d7db2002-06-28 22:56:48 +00001318 if (name[0] == 'b') {
Alexander Belopolskye239d232010-12-08 23:31:48 +00001319 if (PyUnicode_CompareWithASCIIString(nameobj, "buffer_size") == 0)
Christian Heimes217cfd12007-12-02 14:31:20 +00001320 return PyLong_FromLong((long) self->buffer_size);
Alexander Belopolskye239d232010-12-08 23:31:48 +00001321 if (PyUnicode_CompareWithASCIIString(nameobj, "buffer_text") == 0)
Fred Drake2a3d7db2002-06-28 22:56:48 +00001322 return get_pybool(self->buffer != NULL);
Alexander Belopolskye239d232010-12-08 23:31:48 +00001323 if (PyUnicode_CompareWithASCIIString(nameobj, "buffer_used") == 0)
Christian Heimes217cfd12007-12-02 14:31:20 +00001324 return PyLong_FromLong((long) self->buffer_used);
Fred Drake2a3d7db2002-06-28 22:56:48 +00001325 }
Alexander Belopolskye239d232010-12-08 23:31:48 +00001326 if (PyUnicode_CompareWithASCIIString(nameobj, "namespace_prefixes") == 0)
Martin v. Löwis069dde22003-01-21 10:58:18 +00001327 return get_pybool(self->ns_prefixes);
Alexander Belopolskye239d232010-12-08 23:31:48 +00001328 if (PyUnicode_CompareWithASCIIString(nameobj, "ordered_attributes") == 0)
Fred Drake71b63ff2002-06-28 22:29:01 +00001329 return get_pybool(self->ordered_attributes);
Alexander Belopolskye239d232010-12-08 23:31:48 +00001330 if (PyUnicode_CompareWithASCIIString(nameobj, "specified_attributes") == 0)
Fred Drake71b63ff2002-06-28 22:29:01 +00001331 return get_pybool((long) self->specified_attributes);
Alexander Belopolskye239d232010-12-08 23:31:48 +00001332 if (PyUnicode_CompareWithASCIIString(nameobj, "intern") == 0) {
Fred Drakeb91a36b2002-06-27 19:40:48 +00001333 if (self->intern == NULL) {
1334 Py_INCREF(Py_None);
1335 return Py_None;
1336 }
1337 else {
1338 Py_INCREF(self->intern);
1339 return self->intern;
1340 }
1341 }
Alexander Belopolskye239d232010-12-08 23:31:48 +00001342 generic:
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001343 return PyObject_GenericGetAttr((PyObject*)self, nameobj);
Neal Norwitz8dfc4a92007-08-11 06:39:53 +00001344}
1345
1346static PyObject *
1347xmlparse_dir(PyObject *self, PyObject* noargs)
1348{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001349#define APPEND(list, str) \
1350 do { \
1351 PyObject *o = PyUnicode_FromString(str); \
1352 if (o != NULL) \
1353 PyList_Append(list, o); \
1354 Py_XDECREF(o); \
Martin v. Löwis069dde22003-01-21 10:58:18 +00001355 } while (0)
Neal Norwitzfa56e2d2003-01-19 15:40:09 +00001356
Neal Norwitz8dfc4a92007-08-11 06:39:53 +00001357 int i;
1358 PyObject *rc = PyList_New(0);
1359 if (!rc)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001360 return NULL;
Neal Norwitz8dfc4a92007-08-11 06:39:53 +00001361 for (i = 0; handler_info[i].name != NULL; i++) {
1362 PyObject *o = get_handler_name(&handler_info[i]);
1363 if (o != NULL)
1364 PyList_Append(rc, o);
1365 Py_XDECREF(o);
1366 }
1367 APPEND(rc, "ErrorCode");
1368 APPEND(rc, "ErrorLineNumber");
1369 APPEND(rc, "ErrorColumnNumber");
1370 APPEND(rc, "ErrorByteIndex");
1371 APPEND(rc, "CurrentLineNumber");
1372 APPEND(rc, "CurrentColumnNumber");
1373 APPEND(rc, "CurrentByteIndex");
1374 APPEND(rc, "buffer_size");
1375 APPEND(rc, "buffer_text");
1376 APPEND(rc, "buffer_used");
1377 APPEND(rc, "namespace_prefixes");
1378 APPEND(rc, "ordered_attributes");
1379 APPEND(rc, "specified_attributes");
1380 APPEND(rc, "intern");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001381
Neal Norwitzfa56e2d2003-01-19 15:40:09 +00001382#undef APPEND
Neal Norwitz8dfc4a92007-08-11 06:39:53 +00001383
1384 if (PyErr_Occurred()) {
1385 Py_DECREF(rc);
1386 rc = NULL;
Fred Drake0582df92000-07-12 04:49:00 +00001387 }
Neal Norwitz8dfc4a92007-08-11 06:39:53 +00001388
1389 return rc;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001390}
1391
Fred Drake6f987622000-08-25 18:03:30 +00001392static int
Alexander Belopolskye239d232010-12-08 23:31:48 +00001393sethandler(xmlparseobject *self, PyObject *name, PyObject* v)
Fred Drake0582df92000-07-12 04:49:00 +00001394{
1395 int handlernum = handlername2int(name);
Fred Drake71b63ff2002-06-28 22:29:01 +00001396 if (handlernum >= 0) {
1397 xmlhandler c_handler = NULL;
1398 PyObject *temp = self->handlers[handlernum];
1399
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001400 if (v == Py_None) {
1401 /* If this is the character data handler, and a character
1402 data handler is already active, we need to be more
1403 careful. What we can safely do is replace the existing
1404 character data handler callback function with a no-op
1405 function that will refuse to call Python. The downside
1406 is that this doesn't completely remove the character
1407 data handler from the C layer if there's any callback
1408 active, so Expat does a little more work than it
1409 otherwise would, but that's really an odd case. A more
1410 elaborate system of handlers and state could remove the
1411 C handler more effectively. */
1412 if (handlernum == CharacterData && self->in_callback)
1413 c_handler = noop_character_data_handler;
Fred Drake71b63ff2002-06-28 22:29:01 +00001414 v = NULL;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001415 }
Fred Drake71b63ff2002-06-28 22:29:01 +00001416 else if (v != NULL) {
1417 Py_INCREF(v);
1418 c_handler = handler_info[handlernum].handler;
1419 }
Fred Drake0582df92000-07-12 04:49:00 +00001420 self->handlers[handlernum] = v;
Fred Drake71b63ff2002-06-28 22:29:01 +00001421 Py_XDECREF(temp);
1422 handler_info[handlernum].setter(self->itself, c_handler);
Fred Drake0582df92000-07-12 04:49:00 +00001423 return 1;
1424 }
1425 return 0;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001426}
1427
1428static int
Alexander Belopolskye239d232010-12-08 23:31:48 +00001429xmlparse_setattro(xmlparseobject *self, PyObject *name, PyObject *v)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001430{
Fred Drake6f987622000-08-25 18:03:30 +00001431 /* Set attribute 'name' to value 'v'. v==NULL means delete */
Fred Drake85d835f2001-02-08 15:39:08 +00001432 if (v == NULL) {
Fred Drake6f987622000-08-25 18:03:30 +00001433 PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
1434 return -1;
1435 }
Alexander Belopolskye239d232010-12-08 23:31:48 +00001436 assert(PyUnicode_Check(name));
1437 if (PyUnicode_CompareWithASCIIString(name, "buffer_text") == 0) {
Antoine Pitrou6f430e42012-08-15 23:18:25 +02001438 int b = PyObject_IsTrue(v);
1439 if (b < 0)
1440 return -1;
1441 if (b) {
Fred Drake2a3d7db2002-06-28 22:56:48 +00001442 if (self->buffer == NULL) {
1443 self->buffer = malloc(self->buffer_size);
1444 if (self->buffer == NULL) {
1445 PyErr_NoMemory();
1446 return -1;
1447 }
1448 self->buffer_used = 0;
1449 }
1450 }
1451 else if (self->buffer != NULL) {
1452 if (flush_character_buffer(self) < 0)
1453 return -1;
1454 free(self->buffer);
1455 self->buffer = NULL;
1456 }
1457 return 0;
1458 }
Alexander Belopolskye239d232010-12-08 23:31:48 +00001459 if (PyUnicode_CompareWithASCIIString(name, "namespace_prefixes") == 0) {
Antoine Pitrou6f430e42012-08-15 23:18:25 +02001460 int b = PyObject_IsTrue(v);
1461 if (b < 0)
1462 return -1;
1463 self->ns_prefixes = b;
Martin v. Löwis069dde22003-01-21 10:58:18 +00001464 XML_SetReturnNSTriplet(self->itself, self->ns_prefixes);
1465 return 0;
1466 }
Alexander Belopolskye239d232010-12-08 23:31:48 +00001467 if (PyUnicode_CompareWithASCIIString(name, "ordered_attributes") == 0) {
Antoine Pitrou6f430e42012-08-15 23:18:25 +02001468 int b = PyObject_IsTrue(v);
1469 if (b < 0)
1470 return -1;
1471 self->ordered_attributes = b;
Fred Drake85d835f2001-02-08 15:39:08 +00001472 return 0;
1473 }
Alexander Belopolskye239d232010-12-08 23:31:48 +00001474 if (PyUnicode_CompareWithASCIIString(name, "specified_attributes") == 0) {
Antoine Pitrou6f430e42012-08-15 23:18:25 +02001475 int b = PyObject_IsTrue(v);
1476 if (b < 0)
1477 return -1;
1478 self->specified_attributes = b;
Fred Drake6f987622000-08-25 18:03:30 +00001479 return 0;
1480 }
Christian Heimes2380ac72008-01-09 00:17:24 +00001481
Alexander Belopolskye239d232010-12-08 23:31:48 +00001482 if (PyUnicode_CompareWithASCIIString(name, "buffer_size") == 0) {
Christian Heimes2380ac72008-01-09 00:17:24 +00001483 long new_buffer_size;
1484 if (!PyLong_Check(v)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001485 PyErr_SetString(PyExc_TypeError, "buffer_size must be an integer");
1486 return -1;
Christian Heimes2380ac72008-01-09 00:17:24 +00001487 }
1488
1489 new_buffer_size=PyLong_AS_LONG(v);
1490 /* trivial case -- no change */
1491 if (new_buffer_size == self->buffer_size) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001492 return 0;
Christian Heimes2380ac72008-01-09 00:17:24 +00001493 }
1494
1495 if (new_buffer_size <= 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001496 PyErr_SetString(PyExc_ValueError, "buffer_size must be greater than zero");
1497 return -1;
Christian Heimes2380ac72008-01-09 00:17:24 +00001498 }
1499
1500 /* check maximum */
1501 if (new_buffer_size > INT_MAX) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001502 char errmsg[100];
1503 sprintf(errmsg, "buffer_size must not be greater than %i", INT_MAX);
1504 PyErr_SetString(PyExc_ValueError, errmsg);
1505 return -1;
Christian Heimes2380ac72008-01-09 00:17:24 +00001506 }
1507
1508 if (self->buffer != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001509 /* there is already a buffer */
1510 if (self->buffer_used != 0) {
1511 flush_character_buffer(self);
1512 }
1513 /* free existing buffer */
1514 free(self->buffer);
Christian Heimes2380ac72008-01-09 00:17:24 +00001515 }
1516 self->buffer = malloc(new_buffer_size);
1517 if (self->buffer == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001518 PyErr_NoMemory();
1519 return -1;
1520 }
Christian Heimes2380ac72008-01-09 00:17:24 +00001521 self->buffer_size = new_buffer_size;
1522 return 0;
1523 }
1524
Alexander Belopolskye239d232010-12-08 23:31:48 +00001525 if (PyUnicode_CompareWithASCIIString(name, "CharacterDataHandler") == 0) {
Fred Drake2a3d7db2002-06-28 22:56:48 +00001526 /* If we're changing the character data handler, flush all
1527 * cached data with the old handler. Not sure there's a
1528 * "right" thing to do, though, but this probably won't
1529 * happen.
1530 */
1531 if (flush_character_buffer(self) < 0)
1532 return -1;
1533 }
Fred Drake6f987622000-08-25 18:03:30 +00001534 if (sethandler(self, name, v)) {
1535 return 0;
1536 }
Alexander Belopolskye239d232010-12-08 23:31:48 +00001537 PyErr_SetObject(PyExc_AttributeError, name);
Fred Drake6f987622000-08-25 18:03:30 +00001538 return -1;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001539}
1540
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001541static int
1542xmlparse_traverse(xmlparseobject *op, visitproc visit, void *arg)
1543{
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001544 int i;
1545 for (i = 0; handler_info[i].name != NULL; i++)
1546 Py_VISIT(op->handlers[i]);
Fred Drakecde79132001-04-25 16:01:30 +00001547 return 0;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001548}
1549
1550static int
1551xmlparse_clear(xmlparseobject *op)
1552{
Martin v. Löwis5b68ce32001-10-21 08:53:52 +00001553 clear_handlers(op, 0);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001554 Py_CLEAR(op->intern);
Fred Drakecde79132001-04-25 16:01:30 +00001555 return 0;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001556}
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001557
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001558PyDoc_STRVAR(Xmlparsetype__doc__, "XML parser");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001559
1560static PyTypeObject Xmlparsetype = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001561 PyVarObject_HEAD_INIT(NULL, 0)
1562 "pyexpat.xmlparser", /*tp_name*/
Antoine Pitrou23683ef2011-01-04 00:00:31 +00001563 sizeof(xmlparseobject), /*tp_basicsize*/
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001564 0, /*tp_itemsize*/
1565 /* methods */
1566 (destructor)xmlparse_dealloc, /*tp_dealloc*/
1567 (printfunc)0, /*tp_print*/
1568 0, /*tp_getattr*/
Alexander Belopolskye239d232010-12-08 23:31:48 +00001569 0, /*tp_setattr*/
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001570 0, /*tp_reserved*/
1571 (reprfunc)0, /*tp_repr*/
1572 0, /*tp_as_number*/
1573 0, /*tp_as_sequence*/
1574 0, /*tp_as_mapping*/
1575 (hashfunc)0, /*tp_hash*/
1576 (ternaryfunc)0, /*tp_call*/
1577 (reprfunc)0, /*tp_str*/
1578 (getattrofunc)xmlparse_getattro, /* tp_getattro */
Alexander Belopolskye239d232010-12-08 23:31:48 +00001579 (setattrofunc)xmlparse_setattro, /* tp_setattro */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001580 0, /* tp_as_buffer */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001581 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /*tp_flags*/
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001582 Xmlparsetype__doc__, /* tp_doc - Documentation string */
1583 (traverseproc)xmlparse_traverse, /* tp_traverse */
1584 (inquiry)xmlparse_clear, /* tp_clear */
1585 0, /* tp_richcompare */
1586 0, /* tp_weaklistoffset */
1587 0, /* tp_iter */
1588 0, /* tp_iternext */
1589 xmlparse_methods, /* tp_methods */
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001590};
1591
1592/* End of code for xmlparser objects */
1593/* -------------------------------------------------------- */
1594
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001595PyDoc_STRVAR(pyexpat_ParserCreate__doc__,
Fred Drake0582df92000-07-12 04:49:00 +00001596"ParserCreate([encoding[, namespace_separator]]) -> parser\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001597Return a new XML parser object.");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001598
1599static PyObject *
Fred Drake0582df92000-07-12 04:49:00 +00001600pyexpat_ParserCreate(PyObject *notused, PyObject *args, PyObject *kw)
1601{
Fred Drakecde79132001-04-25 16:01:30 +00001602 char *encoding = NULL;
1603 char *namespace_separator = NULL;
Fred Drakeb91a36b2002-06-27 19:40:48 +00001604 PyObject *intern = NULL;
1605 PyObject *result;
1606 int intern_decref = 0;
Martin v. Löwis15e62742006-02-27 16:46:16 +00001607 static char *kwlist[] = {"encoding", "namespace_separator",
Jeremy Hyltonaf68c872005-12-10 18:50:16 +00001608 "intern", NULL};
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001609
Fred Drakeb91a36b2002-06-27 19:40:48 +00001610 if (!PyArg_ParseTupleAndKeywords(args, kw, "|zzO:ParserCreate", kwlist,
1611 &encoding, &namespace_separator, &intern))
Fred Drakecde79132001-04-25 16:01:30 +00001612 return NULL;
1613 if (namespace_separator != NULL
1614 && strlen(namespace_separator) > 1) {
1615 PyErr_SetString(PyExc_ValueError,
1616 "namespace_separator must be at most one"
1617 " character, omitted, or None");
1618 return NULL;
1619 }
Fred Drakeb91a36b2002-06-27 19:40:48 +00001620 /* Explicitly passing None means no interning is desired.
1621 Not passing anything means that a new dictionary is used. */
1622 if (intern == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001623 intern = NULL;
Fred Drakeb91a36b2002-06-27 19:40:48 +00001624 else if (intern == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001625 intern = PyDict_New();
1626 if (!intern)
1627 return NULL;
1628 intern_decref = 1;
Fred Drake71b63ff2002-06-28 22:29:01 +00001629 }
Fred Drakeb91a36b2002-06-27 19:40:48 +00001630 else if (!PyDict_Check(intern)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001631 PyErr_SetString(PyExc_TypeError, "intern must be a dictionary");
1632 return NULL;
Fred Drakeb91a36b2002-06-27 19:40:48 +00001633 }
1634
1635 result = newxmlparseobject(encoding, namespace_separator, intern);
1636 if (intern_decref) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001637 Py_DECREF(intern);
Fred Drakeb91a36b2002-06-27 19:40:48 +00001638 }
1639 return result;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001640}
1641
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001642PyDoc_STRVAR(pyexpat_ErrorString__doc__,
Fred Drake0582df92000-07-12 04:49:00 +00001643"ErrorString(errno) -> string\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001644Returns string error for given number.");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001645
1646static PyObject *
Fred Drake0582df92000-07-12 04:49:00 +00001647pyexpat_ErrorString(PyObject *self, PyObject *args)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001648{
Fred Drake0582df92000-07-12 04:49:00 +00001649 long code = 0;
1650
1651 if (!PyArg_ParseTuple(args, "l:ErrorString", &code))
1652 return NULL;
1653 return Py_BuildValue("z", XML_ErrorString((int)code));
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001654}
1655
1656/* List of methods defined in the module */
1657
1658static struct PyMethodDef pyexpat_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001659 {"ParserCreate", (PyCFunction)pyexpat_ParserCreate,
Fred Drake0582df92000-07-12 04:49:00 +00001660 METH_VARARGS|METH_KEYWORDS, pyexpat_ParserCreate__doc__},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001661 {"ErrorString", (PyCFunction)pyexpat_ErrorString,
1662 METH_VARARGS, pyexpat_ErrorString__doc__},
Fred Drake71b63ff2002-06-28 22:29:01 +00001663
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001664 {NULL, (PyCFunction)NULL, 0, NULL} /* sentinel */
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001665};
1666
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +00001667/* Module docstring */
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001668
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001669PyDoc_STRVAR(pyexpat_module_documentation,
1670"Python wrapper for Expat parser.");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001671
Fred Drakecde79132001-04-25 16:01:30 +00001672/* Initialization function for the module */
1673
1674#ifndef MODULE_NAME
1675#define MODULE_NAME "pyexpat"
1676#endif
1677
1678#ifndef MODULE_INITFUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00001679#define MODULE_INITFUNC PyInit_pyexpat
Fred Drakecde79132001-04-25 16:01:30 +00001680#endif
1681
Martin v. Löwis069dde22003-01-21 10:58:18 +00001682#ifndef PyMODINIT_FUNC
1683# ifdef MS_WINDOWS
1684# define PyMODINIT_FUNC __declspec(dllexport) void
1685# else
1686# define PyMODINIT_FUNC void
1687# endif
1688#endif
1689
Mark Hammond8235ea12002-07-19 06:55:41 +00001690PyMODINIT_FUNC MODULE_INITFUNC(void); /* avoid compiler warnings */
Fred Drakecde79132001-04-25 16:01:30 +00001691
Martin v. Löwis1a214512008-06-11 05:26:20 +00001692static struct PyModuleDef pyexpatmodule = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001693 PyModuleDef_HEAD_INIT,
1694 MODULE_NAME,
1695 pyexpat_module_documentation,
1696 -1,
1697 pyexpat_methods,
1698 NULL,
1699 NULL,
1700 NULL,
1701 NULL
Martin v. Löwis1a214512008-06-11 05:26:20 +00001702};
1703
Martin v. Löwis069dde22003-01-21 10:58:18 +00001704PyMODINIT_FUNC
1705MODULE_INITFUNC(void)
Fred Drake0582df92000-07-12 04:49:00 +00001706{
1707 PyObject *m, *d;
Neal Norwitz392c5be2007-08-25 17:20:32 +00001708 PyObject *errmod_name = PyUnicode_FromString(MODULE_NAME ".errors");
Fred Drake85d835f2001-02-08 15:39:08 +00001709 PyObject *errors_module;
1710 PyObject *modelmod_name;
1711 PyObject *model_module;
Benjamin Petersona22c98d2011-05-31 18:59:49 -05001712 PyObject *version;
Fred Drake0582df92000-07-12 04:49:00 +00001713 PyObject *sys_modules;
Georg Brandlb4dac712010-10-15 14:46:48 +00001714 PyObject *tmpnum, *tmpstr;
1715 PyObject *codes_dict;
1716 PyObject *rev_codes_dict;
1717 int res;
Fredrik Lundhd7a42882005-12-13 20:43:04 +00001718 static struct PyExpat_CAPI capi;
Georg Brandlb4dac712010-10-15 14:46:48 +00001719 PyObject *capi_object;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001720
Fred Drake6f987622000-08-25 18:03:30 +00001721 if (errmod_name == NULL)
Martin v. Löwis1a214512008-06-11 05:26:20 +00001722 return NULL;
Neal Norwitz392c5be2007-08-25 17:20:32 +00001723 modelmod_name = PyUnicode_FromString(MODULE_NAME ".model");
Fred Drake85d835f2001-02-08 15:39:08 +00001724 if (modelmod_name == NULL)
Martin v. Löwis1a214512008-06-11 05:26:20 +00001725 return NULL;
Fred Drake6f987622000-08-25 18:03:30 +00001726
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001727 if (PyType_Ready(&Xmlparsetype) < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001728 return NULL;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001729
Fred Drake0582df92000-07-12 04:49:00 +00001730 /* Create the module and add the functions */
Martin v. Löwis1a214512008-06-11 05:26:20 +00001731 m = PyModule_Create(&pyexpatmodule);
Neal Norwitz1ac754f2006-01-19 06:09:39 +00001732 if (m == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001733 return NULL;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001734
Fred Drake0582df92000-07-12 04:49:00 +00001735 /* Add some symbolic constants to the module */
Fred Drakebd6101c2001-02-14 18:29:45 +00001736 if (ErrorObject == NULL) {
1737 ErrorObject = PyErr_NewException("xml.parsers.expat.ExpatError",
Fred Drake93adb692000-09-23 04:55:48 +00001738 NULL, NULL);
Fred Drakebd6101c2001-02-14 18:29:45 +00001739 if (ErrorObject == NULL)
Martin v. Löwis1a214512008-06-11 05:26:20 +00001740 return NULL;
Fred Drakebd6101c2001-02-14 18:29:45 +00001741 }
1742 Py_INCREF(ErrorObject);
Fred Drake93adb692000-09-23 04:55:48 +00001743 PyModule_AddObject(m, "error", ErrorObject);
Fred Drakebd6101c2001-02-14 18:29:45 +00001744 Py_INCREF(ErrorObject);
1745 PyModule_AddObject(m, "ExpatError", ErrorObject);
Fred Drake4ba298c2000-10-29 04:57:53 +00001746 Py_INCREF(&Xmlparsetype);
1747 PyModule_AddObject(m, "XMLParserType", (PyObject *) &Xmlparsetype);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001748
Benjamin Petersona22c98d2011-05-31 18:59:49 -05001749 version = PyUnicode_FromString(PY_VERSION);
1750 if (!version)
Benjamin Peterson52e61442011-05-31 21:38:15 -05001751 return NULL;
Benjamin Petersona22c98d2011-05-31 18:59:49 -05001752 PyModule_AddObject(m, "__version__", version);
Fred Drake738293d2000-12-21 17:25:07 +00001753 PyModule_AddStringConstant(m, "EXPAT_VERSION",
1754 (char *) XML_ExpatVersion());
Fred Drake85d835f2001-02-08 15:39:08 +00001755 {
1756 XML_Expat_Version info = XML_ExpatVersionInfo();
1757 PyModule_AddObject(m, "version_info",
1758 Py_BuildValue("(iii)", info.major,
1759 info.minor, info.micro));
1760 }
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001761 init_template_buffer();
Fred Drake0582df92000-07-12 04:49:00 +00001762 /* XXX When Expat supports some way of figuring out how it was
Fred Drake71b63ff2002-06-28 22:29:01 +00001763 compiled, this should check and set native_encoding
1764 appropriately.
Fred Drake0582df92000-07-12 04:49:00 +00001765 */
Fred Drake93adb692000-09-23 04:55:48 +00001766 PyModule_AddStringConstant(m, "native_encoding", "UTF-8");
Fred Drakec23b5232000-08-24 21:57:43 +00001767
Fred Drake85d835f2001-02-08 15:39:08 +00001768 sys_modules = PySys_GetObject("modules");
Fred Drake93adb692000-09-23 04:55:48 +00001769 d = PyModule_GetDict(m);
Fred Drake6f987622000-08-25 18:03:30 +00001770 errors_module = PyDict_GetItem(d, errmod_name);
1771 if (errors_module == NULL) {
Fred Drakecde79132001-04-25 16:01:30 +00001772 errors_module = PyModule_New(MODULE_NAME ".errors");
Fred Drake6f987622000-08-25 18:03:30 +00001773 if (errors_module != NULL) {
Fred Drake6f987622000-08-25 18:03:30 +00001774 PyDict_SetItem(sys_modules, errmod_name, errors_module);
Fred Drake93adb692000-09-23 04:55:48 +00001775 /* gives away the reference to errors_module */
1776 PyModule_AddObject(m, "errors", errors_module);
Fred Drakec23b5232000-08-24 21:57:43 +00001777 }
1778 }
Fred Drake6f987622000-08-25 18:03:30 +00001779 Py_DECREF(errmod_name);
Fred Drake85d835f2001-02-08 15:39:08 +00001780 model_module = PyDict_GetItem(d, modelmod_name);
1781 if (model_module == NULL) {
Fred Drakecde79132001-04-25 16:01:30 +00001782 model_module = PyModule_New(MODULE_NAME ".model");
Fred Drake85d835f2001-02-08 15:39:08 +00001783 if (model_module != NULL) {
1784 PyDict_SetItem(sys_modules, modelmod_name, model_module);
1785 /* gives away the reference to model_module */
1786 PyModule_AddObject(m, "model", model_module);
1787 }
1788 }
1789 Py_DECREF(modelmod_name);
1790 if (errors_module == NULL || model_module == NULL)
1791 /* Don't core dump later! */
Martin v. Löwis1a214512008-06-11 05:26:20 +00001792 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001793
Martin v. Löwisc847f402003-01-21 11:09:21 +00001794#if XML_COMBINED_VERSION > 19505
Martin v. Löwis069dde22003-01-21 10:58:18 +00001795 {
1796 const XML_Feature *features = XML_GetFeatureList();
1797 PyObject *list = PyList_New(0);
1798 if (list == NULL)
1799 /* just ignore it */
1800 PyErr_Clear();
1801 else {
1802 int i = 0;
1803 for (; features[i].feature != XML_FEATURE_END; ++i) {
1804 int ok;
1805 PyObject *item = Py_BuildValue("si", features[i].name,
1806 features[i].value);
1807 if (item == NULL) {
1808 Py_DECREF(list);
1809 list = NULL;
1810 break;
1811 }
1812 ok = PyList_Append(list, item);
1813 Py_DECREF(item);
1814 if (ok < 0) {
1815 PyErr_Clear();
1816 break;
1817 }
1818 }
1819 if (list != NULL)
1820 PyModule_AddObject(m, "features", list);
1821 }
1822 }
Martin v. Löwisc847f402003-01-21 11:09:21 +00001823#endif
Fred Drake6f987622000-08-25 18:03:30 +00001824
Georg Brandlb4dac712010-10-15 14:46:48 +00001825 codes_dict = PyDict_New();
1826 rev_codes_dict = PyDict_New();
1827 if (codes_dict == NULL || rev_codes_dict == NULL) {
1828 Py_XDECREF(codes_dict);
1829 Py_XDECREF(rev_codes_dict);
1830 return NULL;
1831 }
Victor Stinner0fcab4a2011-01-04 12:59:15 +00001832
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001833#define MYCONST(name) \
Georg Brandlb4dac712010-10-15 14:46:48 +00001834 if (PyModule_AddStringConstant(errors_module, #name, \
1835 (char *)XML_ErrorString(name)) < 0) \
1836 return NULL; \
1837 tmpnum = PyLong_FromLong(name); \
1838 if (tmpnum == NULL) return NULL; \
1839 res = PyDict_SetItemString(codes_dict, \
1840 XML_ErrorString(name), tmpnum); \
1841 if (res < 0) return NULL; \
1842 tmpstr = PyUnicode_FromString(XML_ErrorString(name)); \
1843 if (tmpstr == NULL) return NULL; \
1844 res = PyDict_SetItem(rev_codes_dict, tmpnum, tmpstr); \
1845 Py_DECREF(tmpstr); \
1846 Py_DECREF(tmpnum); \
1847 if (res < 0) return NULL; \
Fred Drake7bd9f412000-07-04 23:51:31 +00001848
Fred Drake0582df92000-07-12 04:49:00 +00001849 MYCONST(XML_ERROR_NO_MEMORY);
1850 MYCONST(XML_ERROR_SYNTAX);
1851 MYCONST(XML_ERROR_NO_ELEMENTS);
1852 MYCONST(XML_ERROR_INVALID_TOKEN);
1853 MYCONST(XML_ERROR_UNCLOSED_TOKEN);
1854 MYCONST(XML_ERROR_PARTIAL_CHAR);
1855 MYCONST(XML_ERROR_TAG_MISMATCH);
1856 MYCONST(XML_ERROR_DUPLICATE_ATTRIBUTE);
1857 MYCONST(XML_ERROR_JUNK_AFTER_DOC_ELEMENT);
1858 MYCONST(XML_ERROR_PARAM_ENTITY_REF);
1859 MYCONST(XML_ERROR_UNDEFINED_ENTITY);
1860 MYCONST(XML_ERROR_RECURSIVE_ENTITY_REF);
1861 MYCONST(XML_ERROR_ASYNC_ENTITY);
1862 MYCONST(XML_ERROR_BAD_CHAR_REF);
1863 MYCONST(XML_ERROR_BINARY_ENTITY_REF);
1864 MYCONST(XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF);
1865 MYCONST(XML_ERROR_MISPLACED_XML_PI);
1866 MYCONST(XML_ERROR_UNKNOWN_ENCODING);
1867 MYCONST(XML_ERROR_INCORRECT_ENCODING);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001868 MYCONST(XML_ERROR_UNCLOSED_CDATA_SECTION);
1869 MYCONST(XML_ERROR_EXTERNAL_ENTITY_HANDLING);
1870 MYCONST(XML_ERROR_NOT_STANDALONE);
Fred Drake283b6702004-08-04 22:28:16 +00001871 MYCONST(XML_ERROR_UNEXPECTED_STATE);
1872 MYCONST(XML_ERROR_ENTITY_DECLARED_IN_PE);
1873 MYCONST(XML_ERROR_FEATURE_REQUIRES_XML_DTD);
1874 MYCONST(XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING);
1875 /* Added in Expat 1.95.7. */
1876 MYCONST(XML_ERROR_UNBOUND_PREFIX);
1877 /* Added in Expat 1.95.8. */
1878 MYCONST(XML_ERROR_UNDECLARING_PREFIX);
1879 MYCONST(XML_ERROR_INCOMPLETE_PE);
1880 MYCONST(XML_ERROR_XML_DECL);
1881 MYCONST(XML_ERROR_TEXT_DECL);
1882 MYCONST(XML_ERROR_PUBLICID);
1883 MYCONST(XML_ERROR_SUSPENDED);
1884 MYCONST(XML_ERROR_NOT_SUSPENDED);
1885 MYCONST(XML_ERROR_ABORTED);
1886 MYCONST(XML_ERROR_FINISHED);
1887 MYCONST(XML_ERROR_SUSPEND_PE);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001888
Georg Brandlb4dac712010-10-15 14:46:48 +00001889 if (PyModule_AddStringConstant(errors_module, "__doc__",
1890 "Constants used to describe "
1891 "error conditions.") < 0)
1892 return NULL;
Fred Drake85d835f2001-02-08 15:39:08 +00001893
Georg Brandlb4dac712010-10-15 14:46:48 +00001894 if (PyModule_AddObject(errors_module, "codes", codes_dict) < 0)
1895 return NULL;
1896 if (PyModule_AddObject(errors_module, "messages", rev_codes_dict) < 0)
1897 return NULL;
Victor Stinner0fcab4a2011-01-04 12:59:15 +00001898
Fred Drake93adb692000-09-23 04:55:48 +00001899#undef MYCONST
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001900
Fred Drake85d835f2001-02-08 15:39:08 +00001901#define MYCONST(c) PyModule_AddIntConstant(m, #c, c)
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001902 MYCONST(XML_PARAM_ENTITY_PARSING_NEVER);
1903 MYCONST(XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE);
1904 MYCONST(XML_PARAM_ENTITY_PARSING_ALWAYS);
Fred Drake85d835f2001-02-08 15:39:08 +00001905#undef MYCONST
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001906
Fred Drake85d835f2001-02-08 15:39:08 +00001907#define MYCONST(c) PyModule_AddIntConstant(model_module, #c, c)
1908 PyModule_AddStringConstant(model_module, "__doc__",
1909 "Constants used to interpret content model information.");
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001910
Fred Drake85d835f2001-02-08 15:39:08 +00001911 MYCONST(XML_CTYPE_EMPTY);
1912 MYCONST(XML_CTYPE_ANY);
1913 MYCONST(XML_CTYPE_MIXED);
1914 MYCONST(XML_CTYPE_NAME);
1915 MYCONST(XML_CTYPE_CHOICE);
1916 MYCONST(XML_CTYPE_SEQ);
1917
1918 MYCONST(XML_CQUANT_NONE);
1919 MYCONST(XML_CQUANT_OPT);
1920 MYCONST(XML_CQUANT_REP);
1921 MYCONST(XML_CQUANT_PLUS);
1922#undef MYCONST
Fredrik Lundhc3345042005-12-13 19:49:55 +00001923
1924 /* initialize pyexpat dispatch table */
Fredrik Lundhd7a42882005-12-13 20:43:04 +00001925 capi.size = sizeof(capi);
Fredrik Lundhcc117db2005-12-13 21:55:36 +00001926 capi.magic = PyExpat_CAPI_MAGIC;
Fredrik Lundhd7a42882005-12-13 20:43:04 +00001927 capi.MAJOR_VERSION = XML_MAJOR_VERSION;
1928 capi.MINOR_VERSION = XML_MINOR_VERSION;
1929 capi.MICRO_VERSION = XML_MICRO_VERSION;
1930 capi.ErrorString = XML_ErrorString;
Fredrik Lundhcc117db2005-12-13 21:55:36 +00001931 capi.GetErrorCode = XML_GetErrorCode;
1932 capi.GetErrorColumnNumber = XML_GetErrorColumnNumber;
1933 capi.GetErrorLineNumber = XML_GetErrorLineNumber;
Fredrik Lundhd7a42882005-12-13 20:43:04 +00001934 capi.Parse = XML_Parse;
1935 capi.ParserCreate_MM = XML_ParserCreate_MM;
1936 capi.ParserFree = XML_ParserFree;
1937 capi.SetCharacterDataHandler = XML_SetCharacterDataHandler;
1938 capi.SetCommentHandler = XML_SetCommentHandler;
1939 capi.SetDefaultHandlerExpand = XML_SetDefaultHandlerExpand;
1940 capi.SetElementHandler = XML_SetElementHandler;
1941 capi.SetNamespaceDeclHandler = XML_SetNamespaceDeclHandler;
1942 capi.SetProcessingInstructionHandler = XML_SetProcessingInstructionHandler;
1943 capi.SetUnknownEncodingHandler = XML_SetUnknownEncodingHandler;
1944 capi.SetUserData = XML_SetUserData;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001945
Benjamin Petersonb173f782009-05-05 22:31:58 +00001946 /* export using capsule */
1947 capi_object = PyCapsule_New(&capi, PyExpat_CAPSULE_NAME, NULL);
Fredrik Lundhd7a42882005-12-13 20:43:04 +00001948 if (capi_object)
1949 PyModule_AddObject(m, "expat_CAPI", capi_object);
Martin v. Löwis1a214512008-06-11 05:26:20 +00001950 return m;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001951}
1952
Fred Drake6f987622000-08-25 18:03:30 +00001953static void
Martin v. Löwis5b68ce32001-10-21 08:53:52 +00001954clear_handlers(xmlparseobject *self, int initial)
Fred Drake0582df92000-07-12 04:49:00 +00001955{
Fred Drakecde79132001-04-25 16:01:30 +00001956 int i = 0;
1957 PyObject *temp;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001958
Fred Drake71b63ff2002-06-28 22:29:01 +00001959 for (; handler_info[i].name != NULL; i++) {
Martin v. Löwis5b68ce32001-10-21 08:53:52 +00001960 if (initial)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001961 self->handlers[i] = NULL;
1962 else {
Fred Drakecde79132001-04-25 16:01:30 +00001963 temp = self->handlers[i];
1964 self->handlers[i] = NULL;
1965 Py_XDECREF(temp);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001966 handler_info[i].setter(self->itself, NULL);
Fred Drakecde79132001-04-25 16:01:30 +00001967 }
Fred Drakecde79132001-04-25 16:01:30 +00001968 }
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001969}
1970
Tim Peters0c322792002-07-17 16:49:03 +00001971static struct HandlerInfo handler_info[] = {
Fred Drake71b63ff2002-06-28 22:29:01 +00001972 {"StartElementHandler",
1973 (xmlhandlersetter)XML_SetStartElementHandler,
Fred Drake0582df92000-07-12 04:49:00 +00001974 (xmlhandler)my_StartElementHandler},
Fred Drake71b63ff2002-06-28 22:29:01 +00001975 {"EndElementHandler",
1976 (xmlhandlersetter)XML_SetEndElementHandler,
Fred Drake0582df92000-07-12 04:49:00 +00001977 (xmlhandler)my_EndElementHandler},
Fred Drake71b63ff2002-06-28 22:29:01 +00001978 {"ProcessingInstructionHandler",
Fred Drake0582df92000-07-12 04:49:00 +00001979 (xmlhandlersetter)XML_SetProcessingInstructionHandler,
1980 (xmlhandler)my_ProcessingInstructionHandler},
Fred Drake71b63ff2002-06-28 22:29:01 +00001981 {"CharacterDataHandler",
Fred Drake0582df92000-07-12 04:49:00 +00001982 (xmlhandlersetter)XML_SetCharacterDataHandler,
1983 (xmlhandler)my_CharacterDataHandler},
Fred Drake71b63ff2002-06-28 22:29:01 +00001984 {"UnparsedEntityDeclHandler",
Fred Drake0582df92000-07-12 04:49:00 +00001985 (xmlhandlersetter)XML_SetUnparsedEntityDeclHandler,
Fred Drake2a3d7db2002-06-28 22:56:48 +00001986 (xmlhandler)my_UnparsedEntityDeclHandler},
Fred Drake71b63ff2002-06-28 22:29:01 +00001987 {"NotationDeclHandler",
Fred Drake0582df92000-07-12 04:49:00 +00001988 (xmlhandlersetter)XML_SetNotationDeclHandler,
Fred Drake2a3d7db2002-06-28 22:56:48 +00001989 (xmlhandler)my_NotationDeclHandler},
Fred Drake71b63ff2002-06-28 22:29:01 +00001990 {"StartNamespaceDeclHandler",
1991 (xmlhandlersetter)XML_SetStartNamespaceDeclHandler,
Fred Drake2a3d7db2002-06-28 22:56:48 +00001992 (xmlhandler)my_StartNamespaceDeclHandler},
Fred Drake71b63ff2002-06-28 22:29:01 +00001993 {"EndNamespaceDeclHandler",
1994 (xmlhandlersetter)XML_SetEndNamespaceDeclHandler,
Fred Drake2a3d7db2002-06-28 22:56:48 +00001995 (xmlhandler)my_EndNamespaceDeclHandler},
Fred Drake0582df92000-07-12 04:49:00 +00001996 {"CommentHandler",
1997 (xmlhandlersetter)XML_SetCommentHandler,
1998 (xmlhandler)my_CommentHandler},
1999 {"StartCdataSectionHandler",
Fred Drake71b63ff2002-06-28 22:29:01 +00002000 (xmlhandlersetter)XML_SetStartCdataSectionHandler,
Fred Drake0582df92000-07-12 04:49:00 +00002001 (xmlhandler)my_StartCdataSectionHandler},
2002 {"EndCdataSectionHandler",
Fred Drake71b63ff2002-06-28 22:29:01 +00002003 (xmlhandlersetter)XML_SetEndCdataSectionHandler,
Fred Drake0582df92000-07-12 04:49:00 +00002004 (xmlhandler)my_EndCdataSectionHandler},
2005 {"DefaultHandler",
2006 (xmlhandlersetter)XML_SetDefaultHandler,
2007 (xmlhandler)my_DefaultHandler},
2008 {"DefaultHandlerExpand",
2009 (xmlhandlersetter)XML_SetDefaultHandlerExpand,
2010 (xmlhandler)my_DefaultHandlerExpandHandler},
2011 {"NotStandaloneHandler",
2012 (xmlhandlersetter)XML_SetNotStandaloneHandler,
2013 (xmlhandler)my_NotStandaloneHandler},
2014 {"ExternalEntityRefHandler",
2015 (xmlhandlersetter)XML_SetExternalEntityRefHandler,
Fred Drake2a3d7db2002-06-28 22:56:48 +00002016 (xmlhandler)my_ExternalEntityRefHandler},
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00002017 {"StartDoctypeDeclHandler",
Fred Drake71b63ff2002-06-28 22:29:01 +00002018 (xmlhandlersetter)XML_SetStartDoctypeDeclHandler,
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00002019 (xmlhandler)my_StartDoctypeDeclHandler},
2020 {"EndDoctypeDeclHandler",
Fred Drake71b63ff2002-06-28 22:29:01 +00002021 (xmlhandlersetter)XML_SetEndDoctypeDeclHandler,
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00002022 (xmlhandler)my_EndDoctypeDeclHandler},
Fred Drake85d835f2001-02-08 15:39:08 +00002023 {"EntityDeclHandler",
2024 (xmlhandlersetter)XML_SetEntityDeclHandler,
2025 (xmlhandler)my_EntityDeclHandler},
2026 {"XmlDeclHandler",
2027 (xmlhandlersetter)XML_SetXmlDeclHandler,
2028 (xmlhandler)my_XmlDeclHandler},
2029 {"ElementDeclHandler",
2030 (xmlhandlersetter)XML_SetElementDeclHandler,
2031 (xmlhandler)my_ElementDeclHandler},
2032 {"AttlistDeclHandler",
2033 (xmlhandlersetter)XML_SetAttlistDeclHandler,
2034 (xmlhandler)my_AttlistDeclHandler},
Martin v. Löwisc847f402003-01-21 11:09:21 +00002035#if XML_COMBINED_VERSION >= 19504
Martin v. Löwis069dde22003-01-21 10:58:18 +00002036 {"SkippedEntityHandler",
2037 (xmlhandlersetter)XML_SetSkippedEntityHandler,
2038 (xmlhandler)my_SkippedEntityHandler},
Martin v. Löwisc847f402003-01-21 11:09:21 +00002039#endif
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00002040
Fred Drake0582df92000-07-12 04:49:00 +00002041 {NULL, NULL, NULL} /* sentinel */
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00002042};