blob: 20af5ed0b4c111fe5bd50da0a4ec443c00bcca06 [file] [log] [blame]
Martin v. Löwis7090ed12001-09-19 10:37:50 +00001#include "Python.h"
Fred Drake4113b132001-03-24 19:58:26 +00002#include <ctype.h>
3
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00004#include "frameobject.h"
Fred Drakea77254a2000-09-29 19:23:29 +00005#include "expat.h"
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00006
Fredrik Lundhc3345042005-12-13 19:49:55 +00007#include "pyexpat.h"
8
Martin v. Löwisc847f402003-01-21 11:09:21 +00009#define XML_COMBINED_VERSION (10000*XML_MAJOR_VERSION+100*XML_MINOR_VERSION+XML_MICRO_VERSION)
10
Jeremy Hylton9263f572003-06-27 16:13:17 +000011#define FIX_TRACE
Martin v. Löwis339d0f72001-08-17 18:39:25 +000012
Fred Drake0582df92000-07-12 04:49:00 +000013enum HandlerTypes {
14 StartElement,
15 EndElement,
16 ProcessingInstruction,
17 CharacterData,
18 UnparsedEntityDecl,
19 NotationDecl,
20 StartNamespaceDecl,
21 EndNamespaceDecl,
22 Comment,
23 StartCdataSection,
24 EndCdataSection,
25 Default,
26 DefaultHandlerExpand,
27 NotStandalone,
Martin v. Löwis0078f6c2001-01-21 10:18:10 +000028 ExternalEntityRef,
29 StartDoctypeDecl,
30 EndDoctypeDecl,
Fred Drake85d835f2001-02-08 15:39:08 +000031 EntityDecl,
32 XmlDecl,
33 ElementDecl,
34 AttlistDecl,
Martin v. Löwisc847f402003-01-21 11:09:21 +000035#if XML_COMBINED_VERSION >= 19504
Martin v. Löwis069dde22003-01-21 10:58:18 +000036 SkippedEntity,
Martin v. Löwisc847f402003-01-21 11:09:21 +000037#endif
Fred Drake85d835f2001-02-08 15:39:08 +000038 _DummyDecl
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000039};
40
41static PyObject *ErrorObject;
42
43/* ----------------------------------------------------- */
44
45/* Declarations for objects of type xmlparser */
46
47typedef struct {
Fred Drake0582df92000-07-12 04:49:00 +000048 PyObject_HEAD
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000049
Fred Drake0582df92000-07-12 04:49:00 +000050 XML_Parser itself;
Fred Drake85d835f2001-02-08 15:39:08 +000051 int ordered_attributes; /* Return attributes as a list. */
52 int specified_attributes; /* Report only specified attributes. */
Fred Drakebd6101c2001-02-14 18:29:45 +000053 int in_callback; /* Is a callback active? */
Martin v. Löwis069dde22003-01-21 10:58:18 +000054 int ns_prefixes; /* Namespace-triplets mode? */
Fred Drake2a3d7db2002-06-28 22:56:48 +000055 XML_Char *buffer; /* Buffer used when accumulating characters */
56 /* NULL if not enabled */
57 int buffer_size; /* Size of buffer, in XML_Char units */
58 int buffer_used; /* Buffer units in use */
Fred Drakeb91a36b2002-06-27 19:40:48 +000059 PyObject *intern; /* Dictionary to intern strings */
Fred Drake0582df92000-07-12 04:49:00 +000060 PyObject **handlers;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000061} xmlparseobject;
62
Fred Drake2a3d7db2002-06-28 22:56:48 +000063#define CHARACTER_DATA_BUFFER_SIZE 8192
64
Jeremy Hylton938ace62002-07-17 16:30:39 +000065static PyTypeObject Xmlparsetype;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000066
Fred Drake117ac852002-09-24 16:24:54 +000067typedef void (*xmlhandlersetter)(XML_Parser self, void *meth);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000068typedef void* xmlhandler;
69
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +000070struct HandlerInfo {
Fred Drake0582df92000-07-12 04:49:00 +000071 const char *name;
72 xmlhandlersetter setter;
73 xmlhandler handler;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +000074 PyCodeObject *tb_code;
Fred Drake71b63ff2002-06-28 22:29:01 +000075 PyObject *nameobj;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000076};
77
Jeremy Hylton938ace62002-07-17 16:30:39 +000078static struct HandlerInfo handler_info[64];
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000079
Fred Drakebd6101c2001-02-14 18:29:45 +000080/* Set an integer attribute on the error object; return true on success,
81 * false on an exception.
82 */
83static int
84set_error_attr(PyObject *err, char *name, int value)
85{
Christian Heimes217cfd12007-12-02 14:31:20 +000086 PyObject *v = PyLong_FromLong(value);
Fred Drake85d835f2001-02-08 15:39:08 +000087
Neal Norwitz2f5e9902006-03-08 06:36:45 +000088 if (v == NULL || PyObject_SetAttrString(err, name, v) == -1) {
89 Py_XDECREF(v);
Fred Drakebd6101c2001-02-14 18:29:45 +000090 return 0;
91 }
Michael W. Hudson0bb84542004-08-03 11:31:31 +000092 Py_DECREF(v);
Fred Drakebd6101c2001-02-14 18:29:45 +000093 return 1;
94}
95
96/* Build and set an Expat exception, including positioning
97 * information. Always returns NULL.
98 */
Fred Drake85d835f2001-02-08 15:39:08 +000099static PyObject *
Martin v. Löwis069dde22003-01-21 10:58:18 +0000100set_error(xmlparseobject *self, enum XML_Error code)
Fred Drake85d835f2001-02-08 15:39:08 +0000101{
102 PyObject *err;
103 char buffer[256];
104 XML_Parser parser = self->itself;
Fred Drakebd6101c2001-02-14 18:29:45 +0000105 int lineno = XML_GetErrorLineNumber(parser);
106 int column = XML_GetErrorColumnNumber(parser);
Fred Drake85d835f2001-02-08 15:39:08 +0000107
Martin v. Löwis6b2cf0e2002-06-30 06:03:35 +0000108 /* There is no risk of overflowing this buffer, since
109 even for 64-bit integers, there is sufficient space. */
110 sprintf(buffer, "%.200s: line %i, column %i",
Fred Drakebd6101c2001-02-14 18:29:45 +0000111 XML_ErrorString(code), lineno, column);
Fred Drake85d835f2001-02-08 15:39:08 +0000112 err = PyObject_CallFunction(ErrorObject, "s", buffer);
Fred Drakebd6101c2001-02-14 18:29:45 +0000113 if ( err != NULL
114 && set_error_attr(err, "code", code)
115 && set_error_attr(err, "offset", column)
116 && set_error_attr(err, "lineno", lineno)) {
117 PyErr_SetObject(ErrorObject, err);
Fred Drake85d835f2001-02-08 15:39:08 +0000118 }
Neal Norwitz2f5e9902006-03-08 06:36:45 +0000119 Py_XDECREF(err);
Fred Drake85d835f2001-02-08 15:39:08 +0000120 return NULL;
121}
122
Fred Drake71b63ff2002-06-28 22:29:01 +0000123static int
124have_handler(xmlparseobject *self, int type)
125{
126 PyObject *handler = self->handlers[type];
127 return handler != NULL;
128}
129
130static PyObject *
131get_handler_name(struct HandlerInfo *hinfo)
132{
133 PyObject *name = hinfo->nameobj;
134 if (name == NULL) {
Neal Norwitz392c5be2007-08-25 17:20:32 +0000135 name = PyUnicode_FromString(hinfo->name);
Fred Drake71b63ff2002-06-28 22:29:01 +0000136 hinfo->nameobj = name;
137 }
138 Py_XINCREF(name);
139 return name;
140}
141
Fred Drake85d835f2001-02-08 15:39:08 +0000142
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000143/* Convert a string of XML_Chars into a Unicode string.
144 Returns None if str is a null pointer. */
145
Fred Drake0582df92000-07-12 04:49:00 +0000146static PyObject *
Fred Drakeb91a36b2002-06-27 19:40:48 +0000147conv_string_to_unicode(const XML_Char *str)
Fred Drake0582df92000-07-12 04:49:00 +0000148{
Fred Drake71b63ff2002-06-28 22:29:01 +0000149 /* XXX currently this code assumes that XML_Char is 8-bit,
Fred Drake0582df92000-07-12 04:49:00 +0000150 and hence in UTF-8. */
151 /* UTF-8 from Expat, Unicode desired */
152 if (str == NULL) {
153 Py_INCREF(Py_None);
154 return Py_None;
155 }
Fred Drake71b63ff2002-06-28 22:29:01 +0000156 return PyUnicode_DecodeUTF8(str, strlen(str), "strict");
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000157}
158
Fred Drake0582df92000-07-12 04:49:00 +0000159static PyObject *
160conv_string_len_to_unicode(const XML_Char *str, int len)
161{
Fred Drake71b63ff2002-06-28 22:29:01 +0000162 /* XXX currently this code assumes that XML_Char is 8-bit,
Fred Drake0582df92000-07-12 04:49:00 +0000163 and hence in UTF-8. */
164 /* UTF-8 from Expat, Unicode desired */
165 if (str == NULL) {
166 Py_INCREF(Py_None);
167 return Py_None;
168 }
Fred Drake6f987622000-08-25 18:03:30 +0000169 return PyUnicode_DecodeUTF8((const char *)str, len, "strict");
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000170}
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000171
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000172/* Callback routines */
173
Martin v. Löwis5b68ce32001-10-21 08:53:52 +0000174static void clear_handlers(xmlparseobject *self, int initial);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000175
Martin v. Löwis069dde22003-01-21 10:58:18 +0000176/* This handler is used when an error has been detected, in the hope
177 that actual parsing can be terminated early. This will only help
178 if an external entity reference is encountered. */
179static int
180error_external_entity_ref_handler(XML_Parser parser,
181 const XML_Char *context,
182 const XML_Char *base,
183 const XML_Char *systemId,
184 const XML_Char *publicId)
185{
186 return 0;
187}
188
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000189/* Dummy character data handler used when an error (exception) has
190 been detected, and the actual parsing can be terminated early.
191 This is needed since character data handler can't be safely removed
192 from within the character data handler, but can be replaced. It is
193 used only from the character data handler trampoline, and must be
194 used right after `flag_error()` is called. */
195static void
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000196noop_character_data_handler(void *userData, const XML_Char *data, int len)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000197{
198 /* Do nothing. */
199}
200
Fred Drake6f987622000-08-25 18:03:30 +0000201static void
202flag_error(xmlparseobject *self)
203{
Martin v. Löwis5b68ce32001-10-21 08:53:52 +0000204 clear_handlers(self, 0);
Martin v. Löwis069dde22003-01-21 10:58:18 +0000205 XML_SetExternalEntityRefHandler(self->itself,
206 error_external_entity_ref_handler);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000207}
208
209static PyCodeObject*
210getcode(enum HandlerTypes slot, char* func_name, int lineno)
211{
Fred Drakebd6101c2001-02-14 18:29:45 +0000212 if (handler_info[slot].tb_code == NULL) {
Fred Drakebd6101c2001-02-14 18:29:45 +0000213 handler_info[slot].tb_code =
Alexandre Vassalotti7b82b402009-07-21 04:30:03 +0000214 PyCode_NewEmpty(__FILE__, func_name, lineno);
Fred Drakebd6101c2001-02-14 18:29:45 +0000215 }
216 return handler_info[slot].tb_code;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000217}
218
Jeremy Hylton9263f572003-06-27 16:13:17 +0000219#ifdef FIX_TRACE
Martin v. Löwis7d6e19d2002-08-04 08:24:49 +0000220static int
221trace_frame(PyThreadState *tstate, PyFrameObject *f, int code, PyObject *val)
222{
223 int result = 0;
224 if (!tstate->use_tracing || tstate->tracing)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000225 return 0;
Martin v. Löwis7d6e19d2002-08-04 08:24:49 +0000226 if (tstate->c_profilefunc != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000227 tstate->tracing++;
228 result = tstate->c_profilefunc(tstate->c_profileobj,
229 f, code , val);
230 tstate->use_tracing = ((tstate->c_tracefunc != NULL)
231 || (tstate->c_profilefunc != NULL));
232 tstate->tracing--;
233 if (result)
234 return result;
Martin v. Löwis7d6e19d2002-08-04 08:24:49 +0000235 }
236 if (tstate->c_tracefunc != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000237 tstate->tracing++;
238 result = tstate->c_tracefunc(tstate->c_traceobj,
239 f, code , val);
240 tstate->use_tracing = ((tstate->c_tracefunc != NULL)
241 || (tstate->c_profilefunc != NULL));
242 tstate->tracing--;
243 }
Martin v. Löwis7d6e19d2002-08-04 08:24:49 +0000244 return result;
245}
Jeremy Hylton9263f572003-06-27 16:13:17 +0000246
247static int
248trace_frame_exc(PyThreadState *tstate, PyFrameObject *f)
249{
250 PyObject *type, *value, *traceback, *arg;
251 int err;
252
253 if (tstate->c_tracefunc == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000254 return 0;
Jeremy Hylton9263f572003-06-27 16:13:17 +0000255
256 PyErr_Fetch(&type, &value, &traceback);
257 if (value == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000258 value = Py_None;
259 Py_INCREF(value);
Jeremy Hylton9263f572003-06-27 16:13:17 +0000260 }
Martin v. Löwis9171f022004-10-13 19:50:11 +0000261#if PY_VERSION_HEX < 0x02040000
262 arg = Py_BuildValue("(OOO)", type, value, traceback);
263#else
Raymond Hettinger8ae46892003-10-12 19:09:37 +0000264 arg = PyTuple_Pack(3, type, value, traceback);
Martin v. Löwis9171f022004-10-13 19:50:11 +0000265#endif
Jeremy Hylton9263f572003-06-27 16:13:17 +0000266 if (arg == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000267 PyErr_Restore(type, value, traceback);
268 return 0;
Jeremy Hylton9263f572003-06-27 16:13:17 +0000269 }
270 err = trace_frame(tstate, f, PyTrace_EXCEPTION, arg);
271 Py_DECREF(arg);
272 if (err == 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000273 PyErr_Restore(type, value, traceback);
Jeremy Hylton9263f572003-06-27 16:13:17 +0000274 else {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000275 Py_XDECREF(type);
276 Py_XDECREF(value);
277 Py_XDECREF(traceback);
Jeremy Hylton9263f572003-06-27 16:13:17 +0000278 }
279 return err;
280}
Martin v. Löwis069dde22003-01-21 10:58:18 +0000281#endif
Martin v. Löwis7d6e19d2002-08-04 08:24:49 +0000282
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000283static PyObject*
Fred Drake39689c52004-08-13 03:12:57 +0000284call_with_frame(PyCodeObject *c, PyObject* func, PyObject* args,
285 xmlparseobject *self)
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000286{
Fred Drakebd6101c2001-02-14 18:29:45 +0000287 PyThreadState *tstate = PyThreadState_GET();
288 PyFrameObject *f;
289 PyObject *res;
290
291 if (c == NULL)
292 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000293
Jeremy Hylton9263f572003-06-27 16:13:17 +0000294 f = PyFrame_New(tstate, c, PyEval_GetGlobals(), NULL);
Fred Drakebd6101c2001-02-14 18:29:45 +0000295 if (f == NULL)
296 return NULL;
297 tstate->frame = f;
Jeremy Hylton9263f572003-06-27 16:13:17 +0000298#ifdef FIX_TRACE
299 if (trace_frame(tstate, f, PyTrace_CALL, Py_None) < 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000300 return NULL;
Martin v. Löwis7d6e19d2002-08-04 08:24:49 +0000301 }
Martin v. Löwis069dde22003-01-21 10:58:18 +0000302#endif
Fred Drakebd6101c2001-02-14 18:29:45 +0000303 res = PyEval_CallObject(func, args);
Jeremy Hylton9263f572003-06-27 16:13:17 +0000304 if (res == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000305 if (tstate->curexc_traceback == NULL)
306 PyTraceBack_Here(f);
Fred Drake39689c52004-08-13 03:12:57 +0000307 XML_StopParser(self->itself, XML_FALSE);
Jeremy Hylton9263f572003-06-27 16:13:17 +0000308#ifdef FIX_TRACE
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000309 if (trace_frame_exc(tstate, f) < 0) {
310 return NULL;
311 }
Jeremy Hylton9263f572003-06-27 16:13:17 +0000312 }
Martin v. Löwis7d6e19d2002-08-04 08:24:49 +0000313 else {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000314 if (trace_frame(tstate, f, PyTrace_RETURN, res) < 0) {
315 Py_XDECREF(res);
316 res = NULL;
317 }
Martin v. Löwis7d6e19d2002-08-04 08:24:49 +0000318 }
Jeremy Hylton9263f572003-06-27 16:13:17 +0000319#else
320 }
Martin v. Löwis069dde22003-01-21 10:58:18 +0000321#endif
Fred Drakebd6101c2001-02-14 18:29:45 +0000322 tstate->frame = f->f_back;
323 Py_DECREF(f);
324 return res;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000325}
326
Fred Drakeb91a36b2002-06-27 19:40:48 +0000327static PyObject*
328string_intern(xmlparseobject *self, const char* str)
329{
Guido van Rossum4ca94712007-07-23 17:42:32 +0000330 PyObject *result = conv_string_to_unicode(str);
Fred Drakeb91a36b2002-06-27 19:40:48 +0000331 PyObject *value;
Neal Norwitz484d9a42005-09-30 04:46:49 +0000332 /* result can be NULL if the unicode conversion failed. */
333 if (!result)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000334 return result;
Fred Drakeb91a36b2002-06-27 19:40:48 +0000335 if (!self->intern)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000336 return result;
Fred Drakeb91a36b2002-06-27 19:40:48 +0000337 value = PyDict_GetItem(self->intern, result);
338 if (!value) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000339 if (PyDict_SetItem(self->intern, result, result) == 0)
Fred Drakeb91a36b2002-06-27 19:40:48 +0000340 return result;
341 else
342 return NULL;
343 }
344 Py_INCREF(value);
345 Py_DECREF(result);
346 return value;
347}
348
Fred Drake2a3d7db2002-06-28 22:56:48 +0000349/* Return 0 on success, -1 on exception.
350 * flag_error() will be called before return if needed.
351 */
352static int
353call_character_handler(xmlparseobject *self, const XML_Char *buffer, int len)
354{
355 PyObject *args;
356 PyObject *temp;
357
358 args = PyTuple_New(1);
359 if (args == NULL)
360 return -1;
Guido van Rossum4ca94712007-07-23 17:42:32 +0000361 temp = (conv_string_len_to_unicode(buffer, len));
Fred Drake2a3d7db2002-06-28 22:56:48 +0000362 if (temp == NULL) {
363 Py_DECREF(args);
364 flag_error(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000365 XML_SetCharacterDataHandler(self->itself,
366 noop_character_data_handler);
Fred Drake2a3d7db2002-06-28 22:56:48 +0000367 return -1;
368 }
369 PyTuple_SET_ITEM(args, 0, temp);
370 /* temp is now a borrowed reference; consider it unused. */
371 self->in_callback = 1;
372 temp = call_with_frame(getcode(CharacterData, "CharacterData", __LINE__),
Fred Drake39689c52004-08-13 03:12:57 +0000373 self->handlers[CharacterData], args, self);
Fred Drake2a3d7db2002-06-28 22:56:48 +0000374 /* temp is an owned reference again, or NULL */
375 self->in_callback = 0;
376 Py_DECREF(args);
377 if (temp == NULL) {
378 flag_error(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000379 XML_SetCharacterDataHandler(self->itself,
380 noop_character_data_handler);
Fred Drake2a3d7db2002-06-28 22:56:48 +0000381 return -1;
382 }
383 Py_DECREF(temp);
384 return 0;
385}
386
387static int
388flush_character_buffer(xmlparseobject *self)
389{
390 int rc;
391 if (self->buffer == NULL || self->buffer_used == 0)
392 return 0;
393 rc = call_character_handler(self, self->buffer, self->buffer_used);
394 self->buffer_used = 0;
395 return rc;
396}
397
398static void
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000399my_CharacterDataHandler(void *userData, const XML_Char *data, int len)
Fred Drake2a3d7db2002-06-28 22:56:48 +0000400{
401 xmlparseobject *self = (xmlparseobject *) userData;
402 if (self->buffer == NULL)
403 call_character_handler(self, data, len);
404 else {
405 if ((self->buffer_used + len) > self->buffer_size) {
406 if (flush_character_buffer(self) < 0)
407 return;
408 /* handler might have changed; drop the rest on the floor
409 * if there isn't a handler anymore
410 */
411 if (!have_handler(self, CharacterData))
412 return;
413 }
414 if (len > self->buffer_size) {
415 call_character_handler(self, data, len);
416 self->buffer_used = 0;
417 }
418 else {
419 memcpy(self->buffer + self->buffer_used,
420 data, len * sizeof(XML_Char));
421 self->buffer_used += len;
422 }
423 }
424}
425
Fred Drake85d835f2001-02-08 15:39:08 +0000426static void
427my_StartElementHandler(void *userData,
Fred Drake71b63ff2002-06-28 22:29:01 +0000428 const XML_Char *name, const XML_Char *atts[])
Fred Drake85d835f2001-02-08 15:39:08 +0000429{
430 xmlparseobject *self = (xmlparseobject *)userData;
431
Fred Drake71b63ff2002-06-28 22:29:01 +0000432 if (have_handler(self, StartElement)) {
Fred Drake85d835f2001-02-08 15:39:08 +0000433 PyObject *container, *rv, *args;
434 int i, max;
435
Fred Drake2a3d7db2002-06-28 22:56:48 +0000436 if (flush_character_buffer(self) < 0)
437 return;
Fred Drake85d835f2001-02-08 15:39:08 +0000438 /* Set max to the number of slots filled in atts[]; max/2 is
439 * the number of attributes we need to process.
440 */
441 if (self->specified_attributes) {
442 max = XML_GetSpecifiedAttributeCount(self->itself);
443 }
444 else {
445 max = 0;
446 while (atts[max] != NULL)
447 max += 2;
448 }
449 /* Build the container. */
450 if (self->ordered_attributes)
451 container = PyList_New(max);
452 else
453 container = PyDict_New();
454 if (container == NULL) {
455 flag_error(self);
456 return;
457 }
458 for (i = 0; i < max; i += 2) {
Fred Drakeb91a36b2002-06-27 19:40:48 +0000459 PyObject *n = string_intern(self, (XML_Char *) atts[i]);
Fred Drake85d835f2001-02-08 15:39:08 +0000460 PyObject *v;
461 if (n == NULL) {
462 flag_error(self);
463 Py_DECREF(container);
464 return;
465 }
Guido van Rossum4ca94712007-07-23 17:42:32 +0000466 v = conv_string_to_unicode((XML_Char *) atts[i+1]);
Fred Drake85d835f2001-02-08 15:39:08 +0000467 if (v == NULL) {
468 flag_error(self);
469 Py_DECREF(container);
470 Py_DECREF(n);
471 return;
472 }
473 if (self->ordered_attributes) {
474 PyList_SET_ITEM(container, i, n);
475 PyList_SET_ITEM(container, i+1, v);
476 }
477 else if (PyDict_SetItem(container, n, v)) {
478 flag_error(self);
479 Py_DECREF(n);
480 Py_DECREF(v);
481 return;
482 }
483 else {
484 Py_DECREF(n);
485 Py_DECREF(v);
486 }
487 }
Neal Norwitz484d9a42005-09-30 04:46:49 +0000488 args = string_intern(self, name);
489 if (args != NULL)
490 args = Py_BuildValue("(NN)", args, container);
Fred Drake85d835f2001-02-08 15:39:08 +0000491 if (args == NULL) {
492 Py_DECREF(container);
493 return;
494 }
495 /* Container is now a borrowed reference; ignore it. */
Fred Drakebd6101c2001-02-14 18:29:45 +0000496 self->in_callback = 1;
497 rv = call_with_frame(getcode(StartElement, "StartElement", __LINE__),
Fred Drake39689c52004-08-13 03:12:57 +0000498 self->handlers[StartElement], args, self);
Fred Drakebd6101c2001-02-14 18:29:45 +0000499 self->in_callback = 0;
500 Py_DECREF(args);
Fred Drake85d835f2001-02-08 15:39:08 +0000501 if (rv == NULL) {
502 flag_error(self);
503 return;
Fred Drakebd6101c2001-02-14 18:29:45 +0000504 }
Fred Drake85d835f2001-02-08 15:39:08 +0000505 Py_DECREF(rv);
506 }
507}
508
509#define RC_HANDLER(RC, NAME, PARAMS, INIT, PARAM_FORMAT, CONVERSION, \
510 RETURN, GETUSERDATA) \
511static RC \
512my_##NAME##Handler PARAMS {\
513 xmlparseobject *self = GETUSERDATA ; \
514 PyObject *args = NULL; \
515 PyObject *rv = NULL; \
516 INIT \
517\
Fred Drake71b63ff2002-06-28 22:29:01 +0000518 if (have_handler(self, NAME)) { \
Fred Drake2a3d7db2002-06-28 22:56:48 +0000519 if (flush_character_buffer(self) < 0) \
520 return RETURN; \
Fred Drake85d835f2001-02-08 15:39:08 +0000521 args = Py_BuildValue PARAM_FORMAT ;\
Martin v. Löwis1d7c55f2001-11-10 13:57:55 +0000522 if (!args) { flag_error(self); return RETURN;} \
Fred Drakebd6101c2001-02-14 18:29:45 +0000523 self->in_callback = 1; \
Fred Drake85d835f2001-02-08 15:39:08 +0000524 rv = call_with_frame(getcode(NAME,#NAME,__LINE__), \
Fred Drake39689c52004-08-13 03:12:57 +0000525 self->handlers[NAME], args, self); \
Fred Drakebd6101c2001-02-14 18:29:45 +0000526 self->in_callback = 0; \
Fred Drake85d835f2001-02-08 15:39:08 +0000527 Py_DECREF(args); \
528 if (rv == NULL) { \
529 flag_error(self); \
530 return RETURN; \
531 } \
532 CONVERSION \
533 Py_DECREF(rv); \
534 } \
535 return RETURN; \
536}
537
Fred Drake6f987622000-08-25 18:03:30 +0000538#define VOID_HANDLER(NAME, PARAMS, PARAM_FORMAT) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000539 RC_HANDLER(void, NAME, PARAMS, ;, PARAM_FORMAT, ;, ;,\
540 (xmlparseobject *)userData)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000541
Fred Drake6f987622000-08-25 18:03:30 +0000542#define INT_HANDLER(NAME, PARAMS, PARAM_FORMAT)\
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000543 RC_HANDLER(int, NAME, PARAMS, int rc=0;, PARAM_FORMAT, \
544 rc = PyLong_AsLong(rv);, rc, \
545 (xmlparseobject *)userData)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000546
Fred Drake71b63ff2002-06-28 22:29:01 +0000547VOID_HANDLER(EndElement,
548 (void *userData, const XML_Char *name),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000549 ("(N)", string_intern(self, name)))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000550
Fred Drake6f987622000-08-25 18:03:30 +0000551VOID_HANDLER(ProcessingInstruction,
Fred Drake71b63ff2002-06-28 22:29:01 +0000552 (void *userData,
553 const XML_Char *target,
Fred Drake85d835f2001-02-08 15:39:08 +0000554 const XML_Char *data),
Guido van Rossum4ca94712007-07-23 17:42:32 +0000555 ("(NO&)", string_intern(self, target), conv_string_to_unicode ,data))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000556
Fred Drake6f987622000-08-25 18:03:30 +0000557VOID_HANDLER(UnparsedEntityDecl,
Fred Drake71b63ff2002-06-28 22:29:01 +0000558 (void *userData,
Fred Drake85d835f2001-02-08 15:39:08 +0000559 const XML_Char *entityName,
560 const XML_Char *base,
561 const XML_Char *systemId,
562 const XML_Char *publicId,
563 const XML_Char *notationName),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000564 ("(NNNNN)",
Fred Drake71b63ff2002-06-28 22:29:01 +0000565 string_intern(self, entityName), string_intern(self, base),
566 string_intern(self, systemId), string_intern(self, publicId),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000567 string_intern(self, notationName)))
Fred Drake85d835f2001-02-08 15:39:08 +0000568
Fred Drake85d835f2001-02-08 15:39:08 +0000569VOID_HANDLER(EntityDecl,
570 (void *userData,
571 const XML_Char *entityName,
572 int is_parameter_entity,
573 const XML_Char *value,
574 int value_length,
575 const XML_Char *base,
576 const XML_Char *systemId,
577 const XML_Char *publicId,
578 const XML_Char *notationName),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000579 ("NiNNNNN",
580 string_intern(self, entityName), is_parameter_entity,
Guido van Rossum4ca94712007-07-23 17:42:32 +0000581 (conv_string_len_to_unicode(value, value_length)),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000582 string_intern(self, base), string_intern(self, systemId),
583 string_intern(self, publicId),
584 string_intern(self, notationName)))
Fred Drake85d835f2001-02-08 15:39:08 +0000585
586VOID_HANDLER(XmlDecl,
587 (void *userData,
588 const XML_Char *version,
589 const XML_Char *encoding,
590 int standalone),
591 ("(O&O&i)",
Guido van Rossum4ca94712007-07-23 17:42:32 +0000592 conv_string_to_unicode ,version, conv_string_to_unicode ,encoding,
Fred Drake85d835f2001-02-08 15:39:08 +0000593 standalone))
594
595static PyObject *
596conv_content_model(XML_Content * const model,
Fred Drakeb91a36b2002-06-27 19:40:48 +0000597 PyObject *(*conv_string)(const XML_Char *))
Fred Drake85d835f2001-02-08 15:39:08 +0000598{
599 PyObject *result = NULL;
600 PyObject *children = PyTuple_New(model->numchildren);
601 int i;
602
603 if (children != NULL) {
Tim Peters9544fc52001-07-28 09:36:36 +0000604 assert(model->numchildren < INT_MAX);
605 for (i = 0; i < (int)model->numchildren; ++i) {
Fred Drake85d835f2001-02-08 15:39:08 +0000606 PyObject *child = conv_content_model(&model->children[i],
607 conv_string);
608 if (child == NULL) {
609 Py_XDECREF(children);
610 return NULL;
611 }
612 PyTuple_SET_ITEM(children, i, child);
613 }
614 result = Py_BuildValue("(iiO&N)",
615 model->type, model->quant,
616 conv_string,model->name, children);
617 }
618 return result;
619}
620
Fred Drake06dd8cf2003-02-02 03:54:17 +0000621static void
622my_ElementDeclHandler(void *userData,
623 const XML_Char *name,
624 XML_Content *model)
Fred Drake85d835f2001-02-08 15:39:08 +0000625{
Fred Drake06dd8cf2003-02-02 03:54:17 +0000626 xmlparseobject *self = (xmlparseobject *)userData;
627 PyObject *args = NULL;
Fred Drake85d835f2001-02-08 15:39:08 +0000628
Fred Drake06dd8cf2003-02-02 03:54:17 +0000629 if (have_handler(self, ElementDecl)) {
630 PyObject *rv = NULL;
631 PyObject *modelobj, *nameobj;
632
633 if (flush_character_buffer(self) < 0)
634 goto finally;
Guido van Rossum4ca94712007-07-23 17:42:32 +0000635 modelobj = conv_content_model(model, (conv_string_to_unicode));
Fred Drake06dd8cf2003-02-02 03:54:17 +0000636 if (modelobj == NULL) {
637 flag_error(self);
638 goto finally;
639 }
640 nameobj = string_intern(self, name);
641 if (nameobj == NULL) {
642 Py_DECREF(modelobj);
643 flag_error(self);
644 goto finally;
645 }
Michael W. Hudson0bb84542004-08-03 11:31:31 +0000646 args = Py_BuildValue("NN", nameobj, modelobj);
Fred Drake06dd8cf2003-02-02 03:54:17 +0000647 if (args == NULL) {
648 Py_DECREF(modelobj);
649 flag_error(self);
650 goto finally;
651 }
652 self->in_callback = 1;
653 rv = call_with_frame(getcode(ElementDecl, "ElementDecl", __LINE__),
Fred Drake39689c52004-08-13 03:12:57 +0000654 self->handlers[ElementDecl], args, self);
Fred Drake06dd8cf2003-02-02 03:54:17 +0000655 self->in_callback = 0;
656 if (rv == NULL) {
657 flag_error(self);
658 goto finally;
659 }
660 Py_DECREF(rv);
661 }
662 finally:
663 Py_XDECREF(args);
664 XML_FreeContentModel(self->itself, model);
665 return;
666}
Fred Drake85d835f2001-02-08 15:39:08 +0000667
668VOID_HANDLER(AttlistDecl,
669 (void *userData,
670 const XML_Char *elname,
671 const XML_Char *attname,
672 const XML_Char *att_type,
673 const XML_Char *dflt,
674 int isrequired),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000675 ("(NNO&O&i)",
676 string_intern(self, elname), string_intern(self, attname),
Guido van Rossum4ca94712007-07-23 17:42:32 +0000677 conv_string_to_unicode ,att_type, conv_string_to_unicode ,dflt,
Fred Drake85d835f2001-02-08 15:39:08 +0000678 isrequired))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000679
Martin v. Löwisc847f402003-01-21 11:09:21 +0000680#if XML_COMBINED_VERSION >= 19504
Martin v. Löwis069dde22003-01-21 10:58:18 +0000681VOID_HANDLER(SkippedEntity,
682 (void *userData,
683 const XML_Char *entityName,
684 int is_parameter_entity),
685 ("Ni",
686 string_intern(self, entityName), is_parameter_entity))
Martin v. Löwisc847f402003-01-21 11:09:21 +0000687#endif
Martin v. Löwis069dde22003-01-21 10:58:18 +0000688
Fred Drake71b63ff2002-06-28 22:29:01 +0000689VOID_HANDLER(NotationDecl,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000690 (void *userData,
691 const XML_Char *notationName,
692 const XML_Char *base,
693 const XML_Char *systemId,
694 const XML_Char *publicId),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000695 ("(NNNN)",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000696 string_intern(self, notationName), string_intern(self, base),
697 string_intern(self, systemId), string_intern(self, publicId)))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000698
Fred Drake6f987622000-08-25 18:03:30 +0000699VOID_HANDLER(StartNamespaceDecl,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000700 (void *userData,
701 const XML_Char *prefix,
702 const XML_Char *uri),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000703 ("(NN)",
704 string_intern(self, prefix), string_intern(self, uri)))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000705
Fred Drake6f987622000-08-25 18:03:30 +0000706VOID_HANDLER(EndNamespaceDecl,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000707 (void *userData,
708 const XML_Char *prefix),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000709 ("(N)", string_intern(self, prefix)))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000710
Fred Drake6f987622000-08-25 18:03:30 +0000711VOID_HANDLER(Comment,
Fred Drakeb91a36b2002-06-27 19:40:48 +0000712 (void *userData, const XML_Char *data),
Guido van Rossum4ca94712007-07-23 17:42:32 +0000713 ("(O&)", conv_string_to_unicode ,data))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000714
Fred Drake6f987622000-08-25 18:03:30 +0000715VOID_HANDLER(StartCdataSection,
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000716 (void *userData),
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000717 ("()"))
Fred Drake71b63ff2002-06-28 22:29:01 +0000718
Fred Drake6f987622000-08-25 18:03:30 +0000719VOID_HANDLER(EndCdataSection,
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000720 (void *userData),
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000721 ("()"))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000722
Fred Drake6f987622000-08-25 18:03:30 +0000723VOID_HANDLER(Default,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000724 (void *userData, const XML_Char *s, int len),
725 ("(N)", (conv_string_len_to_unicode(s,len))))
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000726
Fred Drake6f987622000-08-25 18:03:30 +0000727VOID_HANDLER(DefaultHandlerExpand,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000728 (void *userData, const XML_Char *s, int len),
729 ("(N)", (conv_string_len_to_unicode(s,len))))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000730
Fred Drake71b63ff2002-06-28 22:29:01 +0000731INT_HANDLER(NotStandalone,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000732 (void *userData),
733 ("()"))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000734
Fred Drake6f987622000-08-25 18:03:30 +0000735RC_HANDLER(int, ExternalEntityRef,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000736 (XML_Parser parser,
737 const XML_Char *context,
738 const XML_Char *base,
739 const XML_Char *systemId,
740 const XML_Char *publicId),
741 int rc=0;,
Fred Drakeb91a36b2002-06-27 19:40:48 +0000742 ("(O&NNN)",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000743 conv_string_to_unicode ,context, string_intern(self, base),
744 string_intern(self, systemId), string_intern(self, publicId)),
745 rc = PyLong_AsLong(rv);, rc,
746 XML_GetUserData(parser))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000747
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000748/* XXX UnknownEncodingHandler */
749
Fred Drake85d835f2001-02-08 15:39:08 +0000750VOID_HANDLER(StartDoctypeDecl,
751 (void *userData, const XML_Char *doctypeName,
752 const XML_Char *sysid, const XML_Char *pubid,
753 int has_internal_subset),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000754 ("(NNNi)", string_intern(self, doctypeName),
755 string_intern(self, sysid), string_intern(self, pubid),
Fred Drake85d835f2001-02-08 15:39:08 +0000756 has_internal_subset))
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000757
758VOID_HANDLER(EndDoctypeDecl, (void *userData), ("()"))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000759
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000760/* ---------------------------------------------------------------- */
761
Fred Drake71b63ff2002-06-28 22:29:01 +0000762static PyObject *
763get_parse_result(xmlparseobject *self, int rv)
764{
765 if (PyErr_Occurred()) {
766 return NULL;
767 }
768 if (rv == 0) {
Martin v. Löwis069dde22003-01-21 10:58:18 +0000769 return set_error(self, XML_GetErrorCode(self->itself));
Fred Drake71b63ff2002-06-28 22:29:01 +0000770 }
Fred Drake2a3d7db2002-06-28 22:56:48 +0000771 if (flush_character_buffer(self) < 0) {
772 return NULL;
773 }
Christian Heimes217cfd12007-12-02 14:31:20 +0000774 return PyLong_FromLong(rv);
Fred Drake71b63ff2002-06-28 22:29:01 +0000775}
776
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000777PyDoc_STRVAR(xmlparse_Parse__doc__,
Thomas Wouters35317302000-07-22 16:34:15 +0000778"Parse(data[, isfinal])\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000779Parse XML data. `isfinal' should be true at end of input.");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000780
781static PyObject *
Fred Drake0582df92000-07-12 04:49:00 +0000782xmlparse_Parse(xmlparseobject *self, PyObject *args)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000783{
Fred Drake0582df92000-07-12 04:49:00 +0000784 char *s;
785 int slen;
786 int isFinal = 0;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000787
Fred Drake0582df92000-07-12 04:49:00 +0000788 if (!PyArg_ParseTuple(args, "s#|i:Parse", &s, &slen, &isFinal))
789 return NULL;
Fred Drake71b63ff2002-06-28 22:29:01 +0000790
791 return get_parse_result(self, XML_Parse(self->itself, s, slen, isFinal));
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000792}
793
Fred Drakeca1f4262000-09-21 20:10:23 +0000794/* File reading copied from cPickle */
795
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000796#define BUF_SIZE 2048
797
Fred Drake0582df92000-07-12 04:49:00 +0000798static int
799readinst(char *buf, int buf_size, PyObject *meth)
800{
801 PyObject *arg = NULL;
802 PyObject *bytes = NULL;
803 PyObject *str = NULL;
804 int len = -1;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000805 char *ptr;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000806
Christian Heimes217cfd12007-12-02 14:31:20 +0000807 if ((bytes = PyLong_FromLong(buf_size)) == NULL)
Fred Drake0582df92000-07-12 04:49:00 +0000808 goto finally;
Fred Drake676940b2000-09-22 15:21:31 +0000809
Fred Drake7b6caff2003-07-21 17:05:56 +0000810 if ((arg = PyTuple_New(1)) == NULL) {
811 Py_DECREF(bytes);
Fred Drake0582df92000-07-12 04:49:00 +0000812 goto finally;
Fred Drake7b6caff2003-07-21 17:05:56 +0000813 }
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000814
Tim Peters954eef72000-09-22 06:01:11 +0000815 PyTuple_SET_ITEM(arg, 0, bytes);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000816
Martin v. Löwis9171f022004-10-13 19:50:11 +0000817#if PY_VERSION_HEX < 0x02020000
818 str = PyObject_CallObject(meth, arg);
819#else
820 str = PyObject_Call(meth, arg, NULL);
821#endif
822 if (str == NULL)
Fred Drake0582df92000-07-12 04:49:00 +0000823 goto finally;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000824
Christian Heimes72b710a2008-05-26 13:28:38 +0000825 if (PyBytes_Check(str))
826 ptr = PyBytes_AS_STRING(str);
Christian Heimes9c4756e2008-05-26 13:22:05 +0000827 else if (PyByteArray_Check(str))
828 ptr = PyByteArray_AS_STRING(str);
Guido van Rossum98297ee2007-11-06 21:34:58 +0000829 else {
Fred Drake71b63ff2002-06-28 22:29:01 +0000830 PyErr_Format(PyExc_TypeError,
Guido van Rossum4ca94712007-07-23 17:42:32 +0000831 "read() did not return a bytes object (type=%.400s)",
Christian Heimes90aa7642007-12-19 02:45:37 +0000832 Py_TYPE(str)->tp_name);
Fred Drake0582df92000-07-12 04:49:00 +0000833 goto finally;
834 }
Christian Heimes90aa7642007-12-19 02:45:37 +0000835 len = Py_SIZE(str);
Fred Drake0582df92000-07-12 04:49:00 +0000836 if (len > buf_size) {
837 PyErr_Format(PyExc_ValueError,
838 "read() returned too much data: "
839 "%i bytes requested, %i returned",
840 buf_size, len);
Fred Drake0582df92000-07-12 04:49:00 +0000841 goto finally;
842 }
Guido van Rossum98297ee2007-11-06 21:34:58 +0000843 memcpy(buf, ptr, len);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000844finally:
Fred Drake0582df92000-07-12 04:49:00 +0000845 Py_XDECREF(arg);
Fred Drakeca1f4262000-09-21 20:10:23 +0000846 Py_XDECREF(str);
Fred Drake0582df92000-07-12 04:49:00 +0000847 return len;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000848}
849
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000850PyDoc_STRVAR(xmlparse_ParseFile__doc__,
Thomas Wouters35317302000-07-22 16:34:15 +0000851"ParseFile(file)\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000852Parse XML data from file-like object.");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000853
854static PyObject *
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000855xmlparse_ParseFile(xmlparseobject *self, PyObject *f)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000856{
Fred Drake0582df92000-07-12 04:49:00 +0000857 int rv = 1;
Fred Drake0582df92000-07-12 04:49:00 +0000858 FILE *fp;
859 PyObject *readmethod = NULL;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000860
Guido van Rossumda5b8f22007-06-12 23:30:11 +0000861 {
Fred Drake0582df92000-07-12 04:49:00 +0000862 fp = NULL;
Fred Drakeca1f4262000-09-21 20:10:23 +0000863 readmethod = PyObject_GetAttrString(f, "read");
864 if (readmethod == NULL) {
Fred Drake0582df92000-07-12 04:49:00 +0000865 PyErr_Clear();
Fred Drake71b63ff2002-06-28 22:29:01 +0000866 PyErr_SetString(PyExc_TypeError,
Fred Drake0582df92000-07-12 04:49:00 +0000867 "argument must have 'read' attribute");
Fred Drake814f9fe2002-07-19 22:03:03 +0000868 return NULL;
Fred Drake0582df92000-07-12 04:49:00 +0000869 }
870 }
871 for (;;) {
872 int bytes_read;
873 void *buf = XML_GetBuffer(self->itself, BUF_SIZE);
Fred Drake7b6caff2003-07-21 17:05:56 +0000874 if (buf == NULL) {
Fred Drakef239c6d2003-07-21 17:22:43 +0000875 Py_XDECREF(readmethod);
Fred Drake0582df92000-07-12 04:49:00 +0000876 return PyErr_NoMemory();
Fred Drake7b6caff2003-07-21 17:05:56 +0000877 }
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000878
Fred Drake0582df92000-07-12 04:49:00 +0000879 if (fp) {
880 bytes_read = fread(buf, sizeof(char), BUF_SIZE, fp);
881 if (bytes_read < 0) {
882 PyErr_SetFromErrno(PyExc_IOError);
883 return NULL;
884 }
885 }
886 else {
887 bytes_read = readinst(buf, BUF_SIZE, readmethod);
Fred Drake7b6caff2003-07-21 17:05:56 +0000888 if (bytes_read < 0) {
889 Py_DECREF(readmethod);
Fred Drake0582df92000-07-12 04:49:00 +0000890 return NULL;
Fred Drake7b6caff2003-07-21 17:05:56 +0000891 }
Fred Drake0582df92000-07-12 04:49:00 +0000892 }
893 rv = XML_ParseBuffer(self->itself, bytes_read, bytes_read == 0);
Fred Drake7b6caff2003-07-21 17:05:56 +0000894 if (PyErr_Occurred()) {
895 Py_XDECREF(readmethod);
Fred Drake0582df92000-07-12 04:49:00 +0000896 return NULL;
Fred Drake7b6caff2003-07-21 17:05:56 +0000897 }
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000898
Fred Drake0582df92000-07-12 04:49:00 +0000899 if (!rv || bytes_read == 0)
900 break;
901 }
Fred Drake7b6caff2003-07-21 17:05:56 +0000902 Py_XDECREF(readmethod);
Fred Drake71b63ff2002-06-28 22:29:01 +0000903 return get_parse_result(self, rv);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000904}
905
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000906PyDoc_STRVAR(xmlparse_SetBase__doc__,
Thomas Wouters35317302000-07-22 16:34:15 +0000907"SetBase(base_url)\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000908Set the base URL for the parser.");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000909
910static PyObject *
Fred Drake0582df92000-07-12 04:49:00 +0000911xmlparse_SetBase(xmlparseobject *self, PyObject *args)
912{
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000913 char *base;
914
Fred Drake0582df92000-07-12 04:49:00 +0000915 if (!PyArg_ParseTuple(args, "s:SetBase", &base))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000916 return NULL;
Fred Drake0582df92000-07-12 04:49:00 +0000917 if (!XML_SetBase(self->itself, base)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000918 return PyErr_NoMemory();
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000919 }
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000920 Py_INCREF(Py_None);
921 return Py_None;
922}
923
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000924PyDoc_STRVAR(xmlparse_GetBase__doc__,
Thomas Wouters35317302000-07-22 16:34:15 +0000925"GetBase() -> url\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000926Return base URL string for the parser.");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000927
928static PyObject *
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000929xmlparse_GetBase(xmlparseobject *self, PyObject *unused)
Fred Drake0582df92000-07-12 04:49:00 +0000930{
Fred Drake0582df92000-07-12 04:49:00 +0000931 return Py_BuildValue("z", XML_GetBase(self->itself));
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000932}
933
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000934PyDoc_STRVAR(xmlparse_GetInputContext__doc__,
Fred Drakebd6101c2001-02-14 18:29:45 +0000935"GetInputContext() -> string\n\
936Return the untranslated text of the input that caused the current event.\n\
937If the event was generated by a large amount of text (such as a start tag\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000938for an element with many attributes), not all of the text may be available.");
Fred Drakebd6101c2001-02-14 18:29:45 +0000939
940static PyObject *
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000941xmlparse_GetInputContext(xmlparseobject *self, PyObject *unused)
Fred Drakebd6101c2001-02-14 18:29:45 +0000942{
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000943 if (self->in_callback) {
944 int offset, size;
945 const char *buffer
946 = XML_GetInputContext(self->itself, &offset, &size);
Fred Drakebd6101c2001-02-14 18:29:45 +0000947
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000948 if (buffer != NULL)
Christian Heimes72b710a2008-05-26 13:28:38 +0000949 return PyBytes_FromStringAndSize(buffer + offset,
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000950 size - offset);
951 else
952 Py_RETURN_NONE;
Fred Drakebd6101c2001-02-14 18:29:45 +0000953 }
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000954 else
955 Py_RETURN_NONE;
Fred Drakebd6101c2001-02-14 18:29:45 +0000956}
Fred Drakebd6101c2001-02-14 18:29:45 +0000957
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000958PyDoc_STRVAR(xmlparse_ExternalEntityParserCreate__doc__,
Fred Drake2d4ac202001-01-03 15:36:25 +0000959"ExternalEntityParserCreate(context[, encoding])\n\
Tim Peters51dc9682000-09-24 22:12:45 +0000960Create a parser for parsing an external entity based on the\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000961information passed to the ExternalEntityRefHandler.");
Lars Gustäbel4a30a072000-09-24 20:50:52 +0000962
963static PyObject *
964xmlparse_ExternalEntityParserCreate(xmlparseobject *self, PyObject *args)
965{
966 char *context;
967 char *encoding = NULL;
968 xmlparseobject *new_parser;
969 int i;
970
Martin v. Löwisc57428d2001-09-19 09:55:09 +0000971 if (!PyArg_ParseTuple(args, "z|s:ExternalEntityParserCreate",
Fred Drakecde79132001-04-25 16:01:30 +0000972 &context, &encoding)) {
973 return NULL;
Lars Gustäbel4a30a072000-09-24 20:50:52 +0000974 }
975
Martin v. Löwis894258c2001-09-23 10:20:10 +0000976#ifndef Py_TPFLAGS_HAVE_GC
Martin v. Löwisb4fcf4d2002-06-30 06:40:55 +0000977 /* Python versions 2.0 and 2.1 */
Lars Gustäbel4a30a072000-09-24 20:50:52 +0000978 new_parser = PyObject_New(xmlparseobject, &Xmlparsetype);
Martin v. Löwis894258c2001-09-23 10:20:10 +0000979#else
980 /* Python versions 2.2 and later */
981 new_parser = PyObject_GC_New(xmlparseobject, &Xmlparsetype);
982#endif
Fred Drake85d835f2001-02-08 15:39:08 +0000983
984 if (new_parser == NULL)
985 return NULL;
Fred Drake2a3d7db2002-06-28 22:56:48 +0000986 new_parser->buffer_size = self->buffer_size;
987 new_parser->buffer_used = 0;
988 if (self->buffer != NULL) {
989 new_parser->buffer = malloc(new_parser->buffer_size);
990 if (new_parser->buffer == NULL) {
Fred Drakeb28467b2002-07-02 15:44:36 +0000991#ifndef Py_TPFLAGS_HAVE_GC
992 /* Code for versions 2.0 and 2.1 */
993 PyObject_Del(new_parser);
994#else
995 /* Code for versions 2.2 and later. */
Fred Drake2a3d7db2002-06-28 22:56:48 +0000996 PyObject_GC_Del(new_parser);
Fred Drakeb28467b2002-07-02 15:44:36 +0000997#endif
Fred Drake2a3d7db2002-06-28 22:56:48 +0000998 return PyErr_NoMemory();
999 }
1000 }
1001 else
1002 new_parser->buffer = NULL;
Fred Drake85d835f2001-02-08 15:39:08 +00001003 new_parser->ordered_attributes = self->ordered_attributes;
1004 new_parser->specified_attributes = self->specified_attributes;
Fred Drakebd6101c2001-02-14 18:29:45 +00001005 new_parser->in_callback = 0;
Martin v. Löwis069dde22003-01-21 10:58:18 +00001006 new_parser->ns_prefixes = self->ns_prefixes;
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001007 new_parser->itself = XML_ExternalEntityParserCreate(self->itself, context,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001008 encoding);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001009 new_parser->handlers = 0;
Fred Drakeb91a36b2002-06-27 19:40:48 +00001010 new_parser->intern = self->intern;
1011 Py_XINCREF(new_parser->intern);
Martin v. Löwis894258c2001-09-23 10:20:10 +00001012#ifdef Py_TPFLAGS_HAVE_GC
1013 PyObject_GC_Track(new_parser);
1014#else
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001015 PyObject_GC_Init(new_parser);
Martin v. Löwis894258c2001-09-23 10:20:10 +00001016#endif
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001017
1018 if (!new_parser->itself) {
Fred Drake85d835f2001-02-08 15:39:08 +00001019 Py_DECREF(new_parser);
1020 return PyErr_NoMemory();
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001021 }
1022
1023 XML_SetUserData(new_parser->itself, (void *)new_parser);
1024
1025 /* allocate and clear handlers first */
Fred Drake2a3d7db2002-06-28 22:56:48 +00001026 for (i = 0; handler_info[i].name != NULL; i++)
Fred Drake85d835f2001-02-08 15:39:08 +00001027 /* do nothing */;
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001028
Fred Drake2a3d7db2002-06-28 22:56:48 +00001029 new_parser->handlers = malloc(sizeof(PyObject *) * i);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001030 if (!new_parser->handlers) {
Fred Drake85d835f2001-02-08 15:39:08 +00001031 Py_DECREF(new_parser);
1032 return PyErr_NoMemory();
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001033 }
Martin v. Löwis5b68ce32001-10-21 08:53:52 +00001034 clear_handlers(new_parser, 1);
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001035
1036 /* then copy handlers from self */
1037 for (i = 0; handler_info[i].name != NULL; i++) {
Fred Drake71b63ff2002-06-28 22:29:01 +00001038 PyObject *handler = self->handlers[i];
1039 if (handler != NULL) {
1040 Py_INCREF(handler);
1041 new_parser->handlers[i] = handler;
1042 handler_info[i].setter(new_parser->itself,
Fred Drake85d835f2001-02-08 15:39:08 +00001043 handler_info[i].handler);
1044 }
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001045 }
Fred Drake71b63ff2002-06-28 22:29:01 +00001046 return (PyObject *)new_parser;
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001047}
1048
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001049PyDoc_STRVAR(xmlparse_SetParamEntityParsing__doc__,
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001050"SetParamEntityParsing(flag) -> success\n\
1051Controls parsing of parameter entities (including the external DTD\n\
1052subset). Possible flag values are XML_PARAM_ENTITY_PARSING_NEVER,\n\
1053XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE and\n\
1054XML_PARAM_ENTITY_PARSING_ALWAYS. Returns true if setting the flag\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001055was successful.");
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001056
1057static PyObject*
Fred Drakebd6101c2001-02-14 18:29:45 +00001058xmlparse_SetParamEntityParsing(xmlparseobject *p, PyObject* args)
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001059{
Fred Drake85d835f2001-02-08 15:39:08 +00001060 int flag;
1061 if (!PyArg_ParseTuple(args, "i", &flag))
1062 return NULL;
Fred Drakebd6101c2001-02-14 18:29:45 +00001063 flag = XML_SetParamEntityParsing(p->itself, flag);
Christian Heimes217cfd12007-12-02 14:31:20 +00001064 return PyLong_FromLong(flag);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001065}
1066
Martin v. Löwisc847f402003-01-21 11:09:21 +00001067
1068#if XML_COMBINED_VERSION >= 19505
Martin v. Löwis069dde22003-01-21 10:58:18 +00001069PyDoc_STRVAR(xmlparse_UseForeignDTD__doc__,
1070"UseForeignDTD([flag])\n\
1071Allows the application to provide an artificial external subset if one is\n\
1072not specified as part of the document instance. This readily allows the\n\
1073use of a 'default' document type controlled by the application, while still\n\
1074getting the advantage of providing document type information to the parser.\n\
1075'flag' defaults to True if not provided.");
1076
1077static PyObject *
1078xmlparse_UseForeignDTD(xmlparseobject *self, PyObject *args)
1079{
1080 PyObject *flagobj = NULL;
1081 XML_Bool flag = XML_TRUE;
1082 enum XML_Error rc;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001083 if (!PyArg_UnpackTuple(args, "UseForeignDTD", 0, 1, &flagobj))
Martin v. Löwis069dde22003-01-21 10:58:18 +00001084 return NULL;
1085 if (flagobj != NULL)
1086 flag = PyObject_IsTrue(flagobj) ? XML_TRUE : XML_FALSE;
1087 rc = XML_UseForeignDTD(self->itself, flag);
1088 if (rc != XML_ERROR_NONE) {
1089 return set_error(self, rc);
1090 }
1091 Py_INCREF(Py_None);
1092 return Py_None;
1093}
Martin v. Löwisc847f402003-01-21 11:09:21 +00001094#endif
Martin v. Löwis069dde22003-01-21 10:58:18 +00001095
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001096static PyObject *xmlparse_dir(PyObject *self, PyObject* noargs);
1097
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001098static struct PyMethodDef xmlparse_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001099 {"Parse", (PyCFunction)xmlparse_Parse,
1100 METH_VARARGS, xmlparse_Parse__doc__},
Fred Drake0582df92000-07-12 04:49:00 +00001101 {"ParseFile", (PyCFunction)xmlparse_ParseFile,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001102 METH_O, xmlparse_ParseFile__doc__},
Fred Drake0582df92000-07-12 04:49:00 +00001103 {"SetBase", (PyCFunction)xmlparse_SetBase,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001104 METH_VARARGS, xmlparse_SetBase__doc__},
Fred Drake0582df92000-07-12 04:49:00 +00001105 {"GetBase", (PyCFunction)xmlparse_GetBase,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001106 METH_NOARGS, xmlparse_GetBase__doc__},
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001107 {"ExternalEntityParserCreate", (PyCFunction)xmlparse_ExternalEntityParserCreate,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001108 METH_VARARGS, xmlparse_ExternalEntityParserCreate__doc__},
Fred Drakebd6101c2001-02-14 18:29:45 +00001109 {"SetParamEntityParsing", (PyCFunction)xmlparse_SetParamEntityParsing,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001110 METH_VARARGS, xmlparse_SetParamEntityParsing__doc__},
Fred Drakebd6101c2001-02-14 18:29:45 +00001111 {"GetInputContext", (PyCFunction)xmlparse_GetInputContext,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001112 METH_NOARGS, xmlparse_GetInputContext__doc__},
Martin v. Löwisc847f402003-01-21 11:09:21 +00001113#if XML_COMBINED_VERSION >= 19505
Martin v. Löwis069dde22003-01-21 10:58:18 +00001114 {"UseForeignDTD", (PyCFunction)xmlparse_UseForeignDTD,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001115 METH_VARARGS, xmlparse_UseForeignDTD__doc__},
Martin v. Löwisc847f402003-01-21 11:09:21 +00001116#endif
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001117 {"__dir__", xmlparse_dir, METH_NOARGS},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001118 {NULL, NULL} /* sentinel */
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001119};
1120
1121/* ---------- */
1122
1123
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001124
Fred Drake71b63ff2002-06-28 22:29:01 +00001125/* pyexpat international encoding support.
1126 Make it as simple as possible.
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001127*/
1128
Martin v. Löwis3af7cc02001-01-22 08:19:10 +00001129static char template_buffer[257];
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001130
Fred Drake71b63ff2002-06-28 22:29:01 +00001131static void
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001132init_template_buffer(void)
1133{
1134 int i;
Fred Drakebb66a202001-03-01 20:48:17 +00001135 for (i = 0; i < 256; i++) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001136 template_buffer[i] = i;
Tim Peters63cb99e2001-02-17 18:12:50 +00001137 }
Fred Drakebb66a202001-03-01 20:48:17 +00001138 template_buffer[256] = 0;
Tim Peters63cb99e2001-02-17 18:12:50 +00001139}
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001140
Fred Drake71b63ff2002-06-28 22:29:01 +00001141static int
1142PyUnknownEncodingHandler(void *encodingHandlerData,
1143 const XML_Char *name,
1144 XML_Encoding *info)
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001145{
Fred Drakebb66a202001-03-01 20:48:17 +00001146 PyUnicodeObject *_u_string = NULL;
1147 int result = 0;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001148 int i;
Fred Drake71b63ff2002-06-28 22:29:01 +00001149
Fred Drakebb66a202001-03-01 20:48:17 +00001150 /* Yes, supports only 8bit encodings */
1151 _u_string = (PyUnicodeObject *)
1152 PyUnicode_Decode(template_buffer, 256, name, "replace");
Fred Drake71b63ff2002-06-28 22:29:01 +00001153
Fred Drakebb66a202001-03-01 20:48:17 +00001154 if (_u_string == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001155 return result;
Fred Drake71b63ff2002-06-28 22:29:01 +00001156
Fred Drakebb66a202001-03-01 20:48:17 +00001157 for (i = 0; i < 256; i++) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001158 /* Stupid to access directly, but fast */
1159 Py_UNICODE c = _u_string->str[i];
1160 if (c == Py_UNICODE_REPLACEMENT_CHARACTER)
1161 info->map[i] = -1;
1162 else
1163 info->map[i] = c;
Tim Peters63cb99e2001-02-17 18:12:50 +00001164 }
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001165 info->data = NULL;
1166 info->convert = NULL;
1167 info->release = NULL;
Fred Drake71b63ff2002-06-28 22:29:01 +00001168 result = 1;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001169 Py_DECREF(_u_string);
1170 return result;
1171}
1172
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001173
1174static PyObject *
Fred Drakeb91a36b2002-06-27 19:40:48 +00001175newxmlparseobject(char *encoding, char *namespace_separator, PyObject *intern)
Fred Drake0582df92000-07-12 04:49:00 +00001176{
1177 int i;
1178 xmlparseobject *self;
Fred Drake71b63ff2002-06-28 22:29:01 +00001179
Martin v. Löwis894258c2001-09-23 10:20:10 +00001180#ifdef Py_TPFLAGS_HAVE_GC
1181 /* Code for versions 2.2 and later */
1182 self = PyObject_GC_New(xmlparseobject, &Xmlparsetype);
1183#else
Fred Drake0582df92000-07-12 04:49:00 +00001184 self = PyObject_New(xmlparseobject, &Xmlparsetype);
Martin v. Löwis894258c2001-09-23 10:20:10 +00001185#endif
Fred Drake0582df92000-07-12 04:49:00 +00001186 if (self == NULL)
1187 return NULL;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001188
Fred Drake2a3d7db2002-06-28 22:56:48 +00001189 self->buffer = NULL;
1190 self->buffer_size = CHARACTER_DATA_BUFFER_SIZE;
1191 self->buffer_used = 0;
Fred Drake85d835f2001-02-08 15:39:08 +00001192 self->ordered_attributes = 0;
1193 self->specified_attributes = 0;
Fred Drakebd6101c2001-02-14 18:29:45 +00001194 self->in_callback = 0;
Martin v. Löwis069dde22003-01-21 10:58:18 +00001195 self->ns_prefixes = 0;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001196 self->handlers = NULL;
Fred Drakecde79132001-04-25 16:01:30 +00001197 if (namespace_separator != NULL) {
Fred Drake0582df92000-07-12 04:49:00 +00001198 self->itself = XML_ParserCreateNS(encoding, *namespace_separator);
1199 }
Fred Drake85d835f2001-02-08 15:39:08 +00001200 else {
Fred Drake0582df92000-07-12 04:49:00 +00001201 self->itself = XML_ParserCreate(encoding);
1202 }
Fred Drakeb91a36b2002-06-27 19:40:48 +00001203 self->intern = intern;
1204 Py_XINCREF(self->intern);
Martin v. Löwis894258c2001-09-23 10:20:10 +00001205#ifdef Py_TPFLAGS_HAVE_GC
1206 PyObject_GC_Track(self);
1207#else
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001208 PyObject_GC_Init(self);
Martin v. Löwis894258c2001-09-23 10:20:10 +00001209#endif
Fred Drake0582df92000-07-12 04:49:00 +00001210 if (self->itself == NULL) {
Fred Drake71b63ff2002-06-28 22:29:01 +00001211 PyErr_SetString(PyExc_RuntimeError,
Fred Drake0582df92000-07-12 04:49:00 +00001212 "XML_ParserCreate failed");
1213 Py_DECREF(self);
1214 return NULL;
1215 }
1216 XML_SetUserData(self->itself, (void *)self);
Fred Drake7c75bf22002-07-01 14:02:31 +00001217 XML_SetUnknownEncodingHandler(self->itself,
1218 (XML_UnknownEncodingHandler) PyUnknownEncodingHandler, NULL);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001219
Fred Drake2a3d7db2002-06-28 22:56:48 +00001220 for (i = 0; handler_info[i].name != NULL; i++)
Fred Drake0582df92000-07-12 04:49:00 +00001221 /* do nothing */;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001222
Fred Drake7c75bf22002-07-01 14:02:31 +00001223 self->handlers = malloc(sizeof(PyObject *) * i);
1224 if (!self->handlers) {
Fred Drake71b63ff2002-06-28 22:29:01 +00001225 Py_DECREF(self);
1226 return PyErr_NoMemory();
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001227 }
Martin v. Löwis5b68ce32001-10-21 08:53:52 +00001228 clear_handlers(self, 1);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001229
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001230 return (PyObject*)self;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001231}
1232
1233
1234static void
Fred Drake0582df92000-07-12 04:49:00 +00001235xmlparse_dealloc(xmlparseobject *self)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001236{
Fred Drake0582df92000-07-12 04:49:00 +00001237 int i;
Martin v. Löwis894258c2001-09-23 10:20:10 +00001238#ifdef Py_TPFLAGS_HAVE_GC
1239 PyObject_GC_UnTrack(self);
1240#else
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001241 PyObject_GC_Fini(self);
Martin v. Löwis894258c2001-09-23 10:20:10 +00001242#endif
Fred Drake85d835f2001-02-08 15:39:08 +00001243 if (self->itself != NULL)
Fred Drake0582df92000-07-12 04:49:00 +00001244 XML_ParserFree(self->itself);
1245 self->itself = NULL;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001246
Fred Drake85d835f2001-02-08 15:39:08 +00001247 if (self->handlers != NULL) {
Fred Drakecde79132001-04-25 16:01:30 +00001248 PyObject *temp;
Fred Drake85d835f2001-02-08 15:39:08 +00001249 for (i = 0; handler_info[i].name != NULL; i++) {
Fred Drakecde79132001-04-25 16:01:30 +00001250 temp = self->handlers[i];
1251 self->handlers[i] = NULL;
1252 Py_XDECREF(temp);
Fred Drake85d835f2001-02-08 15:39:08 +00001253 }
1254 free(self->handlers);
Fred Drake71b63ff2002-06-28 22:29:01 +00001255 self->handlers = NULL;
Fred Drake0582df92000-07-12 04:49:00 +00001256 }
Fred Drake2a3d7db2002-06-28 22:56:48 +00001257 if (self->buffer != NULL) {
1258 free(self->buffer);
1259 self->buffer = NULL;
1260 }
Fred Drakeb91a36b2002-06-27 19:40:48 +00001261 Py_XDECREF(self->intern);
Martin v. Löwis894258c2001-09-23 10:20:10 +00001262#ifndef Py_TPFLAGS_HAVE_GC
Martin v. Löwisb4fcf4d2002-06-30 06:40:55 +00001263 /* Code for versions 2.0 and 2.1 */
Fred Drake0582df92000-07-12 04:49:00 +00001264 PyObject_Del(self);
Martin v. Löwis894258c2001-09-23 10:20:10 +00001265#else
1266 /* Code for versions 2.2 and later. */
1267 PyObject_GC_Del(self);
1268#endif
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001269}
1270
Fred Drake0582df92000-07-12 04:49:00 +00001271static int
1272handlername2int(const char *name)
1273{
1274 int i;
Fred Drake71b63ff2002-06-28 22:29:01 +00001275 for (i = 0; handler_info[i].name != NULL; i++) {
Fred Drake0582df92000-07-12 04:49:00 +00001276 if (strcmp(name, handler_info[i].name) == 0) {
1277 return i;
1278 }
1279 }
1280 return -1;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001281}
1282
1283static PyObject *
Fred Drake71b63ff2002-06-28 22:29:01 +00001284get_pybool(int istrue)
1285{
1286 PyObject *result = istrue ? Py_True : Py_False;
1287 Py_INCREF(result);
1288 return result;
1289}
1290
1291static PyObject *
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001292xmlparse_getattro(xmlparseobject *self, PyObject *nameobj)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001293{
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001294 char *name = "";
1295 int handlernum = -1;
1296
1297 if (PyUnicode_Check(nameobj))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001298 name = _PyUnicode_AsString(nameobj);
1299
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001300 handlernum = handlername2int(name);
Fred Drake71b63ff2002-06-28 22:29:01 +00001301
1302 if (handlernum != -1) {
1303 PyObject *result = self->handlers[handlernum];
1304 if (result == NULL)
1305 result = Py_None;
1306 Py_INCREF(result);
1307 return result;
1308 }
1309 if (name[0] == 'E') {
1310 if (strcmp(name, "ErrorCode") == 0)
Christian Heimes217cfd12007-12-02 14:31:20 +00001311 return PyLong_FromLong((long)
Fred Drake71b63ff2002-06-28 22:29:01 +00001312 XML_GetErrorCode(self->itself));
1313 if (strcmp(name, "ErrorLineNumber") == 0)
Christian Heimes217cfd12007-12-02 14:31:20 +00001314 return PyLong_FromLong((long)
Fred Drake71b63ff2002-06-28 22:29:01 +00001315 XML_GetErrorLineNumber(self->itself));
1316 if (strcmp(name, "ErrorColumnNumber") == 0)
Christian Heimes217cfd12007-12-02 14:31:20 +00001317 return PyLong_FromLong((long)
Fred Drake71b63ff2002-06-28 22:29:01 +00001318 XML_GetErrorColumnNumber(self->itself));
1319 if (strcmp(name, "ErrorByteIndex") == 0)
Christian Heimes217cfd12007-12-02 14:31:20 +00001320 return PyLong_FromLong((long)
Fred Drake71b63ff2002-06-28 22:29:01 +00001321 XML_GetErrorByteIndex(self->itself));
1322 }
Dave Cole3203efb2004-08-26 00:37:31 +00001323 if (name[0] == 'C') {
1324 if (strcmp(name, "CurrentLineNumber") == 0)
Christian Heimes217cfd12007-12-02 14:31:20 +00001325 return PyLong_FromLong((long)
Dave Cole3203efb2004-08-26 00:37:31 +00001326 XML_GetCurrentLineNumber(self->itself));
1327 if (strcmp(name, "CurrentColumnNumber") == 0)
Christian Heimes217cfd12007-12-02 14:31:20 +00001328 return PyLong_FromLong((long)
Dave Cole3203efb2004-08-26 00:37:31 +00001329 XML_GetCurrentColumnNumber(self->itself));
1330 if (strcmp(name, "CurrentByteIndex") == 0)
Christian Heimes217cfd12007-12-02 14:31:20 +00001331 return PyLong_FromLong((long)
Dave Cole3203efb2004-08-26 00:37:31 +00001332 XML_GetCurrentByteIndex(self->itself));
1333 }
Fred Drake2a3d7db2002-06-28 22:56:48 +00001334 if (name[0] == 'b') {
1335 if (strcmp(name, "buffer_size") == 0)
Christian Heimes217cfd12007-12-02 14:31:20 +00001336 return PyLong_FromLong((long) self->buffer_size);
Fred Drake2a3d7db2002-06-28 22:56:48 +00001337 if (strcmp(name, "buffer_text") == 0)
1338 return get_pybool(self->buffer != NULL);
1339 if (strcmp(name, "buffer_used") == 0)
Christian Heimes217cfd12007-12-02 14:31:20 +00001340 return PyLong_FromLong((long) self->buffer_used);
Fred Drake2a3d7db2002-06-28 22:56:48 +00001341 }
Martin v. Löwis069dde22003-01-21 10:58:18 +00001342 if (strcmp(name, "namespace_prefixes") == 0)
1343 return get_pybool(self->ns_prefixes);
Fred Drake85d835f2001-02-08 15:39:08 +00001344 if (strcmp(name, "ordered_attributes") == 0)
Fred Drake71b63ff2002-06-28 22:29:01 +00001345 return get_pybool(self->ordered_attributes);
Fred Drake85d835f2001-02-08 15:39:08 +00001346 if (strcmp(name, "specified_attributes") == 0)
Fred Drake71b63ff2002-06-28 22:29:01 +00001347 return get_pybool((long) self->specified_attributes);
Fred Drakeb91a36b2002-06-27 19:40:48 +00001348 if (strcmp(name, "intern") == 0) {
1349 if (self->intern == NULL) {
1350 Py_INCREF(Py_None);
1351 return Py_None;
1352 }
1353 else {
1354 Py_INCREF(self->intern);
1355 return self->intern;
1356 }
1357 }
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001358
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001359 return PyObject_GenericGetAttr((PyObject*)self, nameobj);
Neal Norwitz8dfc4a92007-08-11 06:39:53 +00001360}
1361
1362static PyObject *
1363xmlparse_dir(PyObject *self, PyObject* noargs)
1364{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001365#define APPEND(list, str) \
1366 do { \
1367 PyObject *o = PyUnicode_FromString(str); \
1368 if (o != NULL) \
1369 PyList_Append(list, o); \
1370 Py_XDECREF(o); \
Martin v. Löwis069dde22003-01-21 10:58:18 +00001371 } while (0)
Neal Norwitzfa56e2d2003-01-19 15:40:09 +00001372
Neal Norwitz8dfc4a92007-08-11 06:39:53 +00001373 int i;
1374 PyObject *rc = PyList_New(0);
1375 if (!rc)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001376 return NULL;
Neal Norwitz8dfc4a92007-08-11 06:39:53 +00001377 for (i = 0; handler_info[i].name != NULL; i++) {
1378 PyObject *o = get_handler_name(&handler_info[i]);
1379 if (o != NULL)
1380 PyList_Append(rc, o);
1381 Py_XDECREF(o);
1382 }
1383 APPEND(rc, "ErrorCode");
1384 APPEND(rc, "ErrorLineNumber");
1385 APPEND(rc, "ErrorColumnNumber");
1386 APPEND(rc, "ErrorByteIndex");
1387 APPEND(rc, "CurrentLineNumber");
1388 APPEND(rc, "CurrentColumnNumber");
1389 APPEND(rc, "CurrentByteIndex");
1390 APPEND(rc, "buffer_size");
1391 APPEND(rc, "buffer_text");
1392 APPEND(rc, "buffer_used");
1393 APPEND(rc, "namespace_prefixes");
1394 APPEND(rc, "ordered_attributes");
1395 APPEND(rc, "specified_attributes");
1396 APPEND(rc, "intern");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001397
Neal Norwitzfa56e2d2003-01-19 15:40:09 +00001398#undef APPEND
Neal Norwitz8dfc4a92007-08-11 06:39:53 +00001399
1400 if (PyErr_Occurred()) {
1401 Py_DECREF(rc);
1402 rc = NULL;
Fred Drake0582df92000-07-12 04:49:00 +00001403 }
Neal Norwitz8dfc4a92007-08-11 06:39:53 +00001404
1405 return rc;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001406}
1407
Fred Drake6f987622000-08-25 18:03:30 +00001408static int
1409sethandler(xmlparseobject *self, const char *name, PyObject* v)
Fred Drake0582df92000-07-12 04:49:00 +00001410{
1411 int handlernum = handlername2int(name);
Fred Drake71b63ff2002-06-28 22:29:01 +00001412 if (handlernum >= 0) {
1413 xmlhandler c_handler = NULL;
1414 PyObject *temp = self->handlers[handlernum];
1415
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001416 if (v == Py_None) {
1417 /* If this is the character data handler, and a character
1418 data handler is already active, we need to be more
1419 careful. What we can safely do is replace the existing
1420 character data handler callback function with a no-op
1421 function that will refuse to call Python. The downside
1422 is that this doesn't completely remove the character
1423 data handler from the C layer if there's any callback
1424 active, so Expat does a little more work than it
1425 otherwise would, but that's really an odd case. A more
1426 elaborate system of handlers and state could remove the
1427 C handler more effectively. */
1428 if (handlernum == CharacterData && self->in_callback)
1429 c_handler = noop_character_data_handler;
Fred Drake71b63ff2002-06-28 22:29:01 +00001430 v = NULL;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001431 }
Fred Drake71b63ff2002-06-28 22:29:01 +00001432 else if (v != NULL) {
1433 Py_INCREF(v);
1434 c_handler = handler_info[handlernum].handler;
1435 }
Fred Drake0582df92000-07-12 04:49:00 +00001436 self->handlers[handlernum] = v;
Fred Drake71b63ff2002-06-28 22:29:01 +00001437 Py_XDECREF(temp);
1438 handler_info[handlernum].setter(self->itself, c_handler);
Fred Drake0582df92000-07-12 04:49:00 +00001439 return 1;
1440 }
1441 return 0;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001442}
1443
1444static int
Fred Drake6f987622000-08-25 18:03:30 +00001445xmlparse_setattr(xmlparseobject *self, char *name, PyObject *v)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001446{
Fred Drake6f987622000-08-25 18:03:30 +00001447 /* Set attribute 'name' to value 'v'. v==NULL means delete */
Fred Drake85d835f2001-02-08 15:39:08 +00001448 if (v == NULL) {
Fred Drake6f987622000-08-25 18:03:30 +00001449 PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
1450 return -1;
1451 }
Fred Drake2a3d7db2002-06-28 22:56:48 +00001452 if (strcmp(name, "buffer_text") == 0) {
1453 if (PyObject_IsTrue(v)) {
1454 if (self->buffer == NULL) {
1455 self->buffer = malloc(self->buffer_size);
1456 if (self->buffer == NULL) {
1457 PyErr_NoMemory();
1458 return -1;
1459 }
1460 self->buffer_used = 0;
1461 }
1462 }
1463 else if (self->buffer != NULL) {
1464 if (flush_character_buffer(self) < 0)
1465 return -1;
1466 free(self->buffer);
1467 self->buffer = NULL;
1468 }
1469 return 0;
1470 }
Martin v. Löwis069dde22003-01-21 10:58:18 +00001471 if (strcmp(name, "namespace_prefixes") == 0) {
1472 if (PyObject_IsTrue(v))
1473 self->ns_prefixes = 1;
1474 else
1475 self->ns_prefixes = 0;
1476 XML_SetReturnNSTriplet(self->itself, self->ns_prefixes);
1477 return 0;
1478 }
Fred Drake85d835f2001-02-08 15:39:08 +00001479 if (strcmp(name, "ordered_attributes") == 0) {
1480 if (PyObject_IsTrue(v))
1481 self->ordered_attributes = 1;
1482 else
1483 self->ordered_attributes = 0;
1484 return 0;
1485 }
Fred Drake85d835f2001-02-08 15:39:08 +00001486 if (strcmp(name, "specified_attributes") == 0) {
1487 if (PyObject_IsTrue(v))
1488 self->specified_attributes = 1;
1489 else
1490 self->specified_attributes = 0;
Fred Drake6f987622000-08-25 18:03:30 +00001491 return 0;
1492 }
Christian Heimes2380ac72008-01-09 00:17:24 +00001493
1494 if (strcmp(name, "buffer_size") == 0) {
1495 long new_buffer_size;
1496 if (!PyLong_Check(v)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001497 PyErr_SetString(PyExc_TypeError, "buffer_size must be an integer");
1498 return -1;
Christian Heimes2380ac72008-01-09 00:17:24 +00001499 }
1500
1501 new_buffer_size=PyLong_AS_LONG(v);
1502 /* trivial case -- no change */
1503 if (new_buffer_size == self->buffer_size) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001504 return 0;
Christian Heimes2380ac72008-01-09 00:17:24 +00001505 }
1506
1507 if (new_buffer_size <= 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001508 PyErr_SetString(PyExc_ValueError, "buffer_size must be greater than zero");
1509 return -1;
Christian Heimes2380ac72008-01-09 00:17:24 +00001510 }
1511
1512 /* check maximum */
1513 if (new_buffer_size > INT_MAX) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001514 char errmsg[100];
1515 sprintf(errmsg, "buffer_size must not be greater than %i", INT_MAX);
1516 PyErr_SetString(PyExc_ValueError, errmsg);
1517 return -1;
Christian Heimes2380ac72008-01-09 00:17:24 +00001518 }
1519
1520 if (self->buffer != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001521 /* there is already a buffer */
1522 if (self->buffer_used != 0) {
1523 flush_character_buffer(self);
1524 }
1525 /* free existing buffer */
1526 free(self->buffer);
Christian Heimes2380ac72008-01-09 00:17:24 +00001527 }
1528 self->buffer = malloc(new_buffer_size);
1529 if (self->buffer == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001530 PyErr_NoMemory();
1531 return -1;
1532 }
Christian Heimes2380ac72008-01-09 00:17:24 +00001533 self->buffer_size = new_buffer_size;
1534 return 0;
1535 }
1536
Fred Drake2a3d7db2002-06-28 22:56:48 +00001537 if (strcmp(name, "CharacterDataHandler") == 0) {
1538 /* If we're changing the character data handler, flush all
1539 * cached data with the old handler. Not sure there's a
1540 * "right" thing to do, though, but this probably won't
1541 * happen.
1542 */
1543 if (flush_character_buffer(self) < 0)
1544 return -1;
1545 }
Fred Drake6f987622000-08-25 18:03:30 +00001546 if (sethandler(self, name, v)) {
1547 return 0;
1548 }
1549 PyErr_SetString(PyExc_AttributeError, name);
1550 return -1;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001551}
1552
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001553static int
1554xmlparse_traverse(xmlparseobject *op, visitproc visit, void *arg)
1555{
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001556 int i;
1557 for (i = 0; handler_info[i].name != NULL; i++)
1558 Py_VISIT(op->handlers[i]);
Fred Drakecde79132001-04-25 16:01:30 +00001559 return 0;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001560}
1561
1562static int
1563xmlparse_clear(xmlparseobject *op)
1564{
Martin v. Löwis5b68ce32001-10-21 08:53:52 +00001565 clear_handlers(op, 0);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001566 Py_CLEAR(op->intern);
Fred Drakecde79132001-04-25 16:01:30 +00001567 return 0;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001568}
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001569
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001570PyDoc_STRVAR(Xmlparsetype__doc__, "XML parser");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001571
1572static PyTypeObject Xmlparsetype = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001573 PyVarObject_HEAD_INIT(NULL, 0)
1574 "pyexpat.xmlparser", /*tp_name*/
1575 sizeof(xmlparseobject) + PyGC_HEAD_SIZE,/*tp_basicsize*/
1576 0, /*tp_itemsize*/
1577 /* methods */
1578 (destructor)xmlparse_dealloc, /*tp_dealloc*/
1579 (printfunc)0, /*tp_print*/
1580 0, /*tp_getattr*/
1581 (setattrfunc)xmlparse_setattr, /*tp_setattr*/
1582 0, /*tp_reserved*/
1583 (reprfunc)0, /*tp_repr*/
1584 0, /*tp_as_number*/
1585 0, /*tp_as_sequence*/
1586 0, /*tp_as_mapping*/
1587 (hashfunc)0, /*tp_hash*/
1588 (ternaryfunc)0, /*tp_call*/
1589 (reprfunc)0, /*tp_str*/
1590 (getattrofunc)xmlparse_getattro, /* tp_getattro */
1591 0, /* tp_setattro */
1592 0, /* tp_as_buffer */
Martin v. Löwis894258c2001-09-23 10:20:10 +00001593#ifdef Py_TPFLAGS_HAVE_GC
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001594 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /*tp_flags*/
Martin v. Löwis894258c2001-09-23 10:20:10 +00001595#else
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001596 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_GC, /*tp_flags*/
Martin v. Löwis894258c2001-09-23 10:20:10 +00001597#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001598 Xmlparsetype__doc__, /* tp_doc - Documentation string */
1599 (traverseproc)xmlparse_traverse, /* tp_traverse */
1600 (inquiry)xmlparse_clear, /* tp_clear */
1601 0, /* tp_richcompare */
1602 0, /* tp_weaklistoffset */
1603 0, /* tp_iter */
1604 0, /* tp_iternext */
1605 xmlparse_methods, /* tp_methods */
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001606};
1607
1608/* End of code for xmlparser objects */
1609/* -------------------------------------------------------- */
1610
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001611PyDoc_STRVAR(pyexpat_ParserCreate__doc__,
Fred Drake0582df92000-07-12 04:49:00 +00001612"ParserCreate([encoding[, namespace_separator]]) -> parser\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001613Return a new XML parser object.");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001614
1615static PyObject *
Fred Drake0582df92000-07-12 04:49:00 +00001616pyexpat_ParserCreate(PyObject *notused, PyObject *args, PyObject *kw)
1617{
Fred Drakecde79132001-04-25 16:01:30 +00001618 char *encoding = NULL;
1619 char *namespace_separator = NULL;
Fred Drakeb91a36b2002-06-27 19:40:48 +00001620 PyObject *intern = NULL;
1621 PyObject *result;
1622 int intern_decref = 0;
Martin v. Löwis15e62742006-02-27 16:46:16 +00001623 static char *kwlist[] = {"encoding", "namespace_separator",
Jeremy Hyltonaf68c872005-12-10 18:50:16 +00001624 "intern", NULL};
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001625
Fred Drakeb91a36b2002-06-27 19:40:48 +00001626 if (!PyArg_ParseTupleAndKeywords(args, kw, "|zzO:ParserCreate", kwlist,
1627 &encoding, &namespace_separator, &intern))
Fred Drakecde79132001-04-25 16:01:30 +00001628 return NULL;
1629 if (namespace_separator != NULL
1630 && strlen(namespace_separator) > 1) {
1631 PyErr_SetString(PyExc_ValueError,
1632 "namespace_separator must be at most one"
1633 " character, omitted, or None");
1634 return NULL;
1635 }
Fred Drakeb91a36b2002-06-27 19:40:48 +00001636 /* Explicitly passing None means no interning is desired.
1637 Not passing anything means that a new dictionary is used. */
1638 if (intern == Py_None)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001639 intern = NULL;
Fred Drakeb91a36b2002-06-27 19:40:48 +00001640 else if (intern == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001641 intern = PyDict_New();
1642 if (!intern)
1643 return NULL;
1644 intern_decref = 1;
Fred Drake71b63ff2002-06-28 22:29:01 +00001645 }
Fred Drakeb91a36b2002-06-27 19:40:48 +00001646 else if (!PyDict_Check(intern)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001647 PyErr_SetString(PyExc_TypeError, "intern must be a dictionary");
1648 return NULL;
Fred Drakeb91a36b2002-06-27 19:40:48 +00001649 }
1650
1651 result = newxmlparseobject(encoding, namespace_separator, intern);
1652 if (intern_decref) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001653 Py_DECREF(intern);
Fred Drakeb91a36b2002-06-27 19:40:48 +00001654 }
1655 return result;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001656}
1657
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001658PyDoc_STRVAR(pyexpat_ErrorString__doc__,
Fred Drake0582df92000-07-12 04:49:00 +00001659"ErrorString(errno) -> string\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001660Returns string error for given number.");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001661
1662static PyObject *
Fred Drake0582df92000-07-12 04:49:00 +00001663pyexpat_ErrorString(PyObject *self, PyObject *args)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001664{
Fred Drake0582df92000-07-12 04:49:00 +00001665 long code = 0;
1666
1667 if (!PyArg_ParseTuple(args, "l:ErrorString", &code))
1668 return NULL;
1669 return Py_BuildValue("z", XML_ErrorString((int)code));
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001670}
1671
1672/* List of methods defined in the module */
1673
1674static struct PyMethodDef pyexpat_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001675 {"ParserCreate", (PyCFunction)pyexpat_ParserCreate,
Fred Drake0582df92000-07-12 04:49:00 +00001676 METH_VARARGS|METH_KEYWORDS, pyexpat_ParserCreate__doc__},
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001677 {"ErrorString", (PyCFunction)pyexpat_ErrorString,
1678 METH_VARARGS, pyexpat_ErrorString__doc__},
Fred Drake71b63ff2002-06-28 22:29:01 +00001679
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001680 {NULL, (PyCFunction)NULL, 0, NULL} /* sentinel */
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001681};
1682
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +00001683/* Module docstring */
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001684
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001685PyDoc_STRVAR(pyexpat_module_documentation,
1686"Python wrapper for Expat parser.");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001687
Fred Drake4113b132001-03-24 19:58:26 +00001688/* Return a Python string that represents the version number without the
1689 * extra cruft added by revision control, even if the right options were
1690 * given to the "cvs export" command to make it not include the extra
1691 * cruft.
1692 */
1693static PyObject *
1694get_version_string(void)
1695{
1696 static char *rcsid = "$Revision$";
1697 char *rev = rcsid;
1698 int i = 0;
1699
Neal Norwitz30b5c5d2005-12-19 06:05:18 +00001700 while (!isdigit(Py_CHARMASK(*rev)))
Fred Drake4113b132001-03-24 19:58:26 +00001701 ++rev;
1702 while (rev[i] != ' ' && rev[i] != '\0')
1703 ++i;
1704
Neal Norwitz392c5be2007-08-25 17:20:32 +00001705 return PyUnicode_FromStringAndSize(rev, i);
Fred Drake4113b132001-03-24 19:58:26 +00001706}
1707
Fred Drakecde79132001-04-25 16:01:30 +00001708/* Initialization function for the module */
1709
1710#ifndef MODULE_NAME
1711#define MODULE_NAME "pyexpat"
1712#endif
1713
1714#ifndef MODULE_INITFUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00001715#define MODULE_INITFUNC PyInit_pyexpat
Fred Drakecde79132001-04-25 16:01:30 +00001716#endif
1717
Martin v. Löwis069dde22003-01-21 10:58:18 +00001718#ifndef PyMODINIT_FUNC
1719# ifdef MS_WINDOWS
1720# define PyMODINIT_FUNC __declspec(dllexport) void
1721# else
1722# define PyMODINIT_FUNC void
1723# endif
1724#endif
1725
Mark Hammond8235ea12002-07-19 06:55:41 +00001726PyMODINIT_FUNC MODULE_INITFUNC(void); /* avoid compiler warnings */
Fred Drakecde79132001-04-25 16:01:30 +00001727
Martin v. Löwis1a214512008-06-11 05:26:20 +00001728static struct PyModuleDef pyexpatmodule = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001729 PyModuleDef_HEAD_INIT,
1730 MODULE_NAME,
1731 pyexpat_module_documentation,
1732 -1,
1733 pyexpat_methods,
1734 NULL,
1735 NULL,
1736 NULL,
1737 NULL
Martin v. Löwis1a214512008-06-11 05:26:20 +00001738};
1739
Martin v. Löwis069dde22003-01-21 10:58:18 +00001740PyMODINIT_FUNC
1741MODULE_INITFUNC(void)
Fred Drake0582df92000-07-12 04:49:00 +00001742{
1743 PyObject *m, *d;
Neal Norwitz392c5be2007-08-25 17:20:32 +00001744 PyObject *errmod_name = PyUnicode_FromString(MODULE_NAME ".errors");
Fred Drake85d835f2001-02-08 15:39:08 +00001745 PyObject *errors_module;
1746 PyObject *modelmod_name;
1747 PyObject *model_module;
Fred Drake0582df92000-07-12 04:49:00 +00001748 PyObject *sys_modules;
Fredrik Lundhd7a42882005-12-13 20:43:04 +00001749 static struct PyExpat_CAPI capi;
1750 PyObject* capi_object;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001751
Fred Drake6f987622000-08-25 18:03:30 +00001752 if (errmod_name == NULL)
Martin v. Löwis1a214512008-06-11 05:26:20 +00001753 return NULL;
Neal Norwitz392c5be2007-08-25 17:20:32 +00001754 modelmod_name = PyUnicode_FromString(MODULE_NAME ".model");
Fred Drake85d835f2001-02-08 15:39:08 +00001755 if (modelmod_name == NULL)
Martin v. Löwis1a214512008-06-11 05:26:20 +00001756 return NULL;
Fred Drake6f987622000-08-25 18:03:30 +00001757
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001758 if (PyType_Ready(&Xmlparsetype) < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001759 return NULL;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001760
Fred Drake0582df92000-07-12 04:49:00 +00001761 /* Create the module and add the functions */
Martin v. Löwis1a214512008-06-11 05:26:20 +00001762 m = PyModule_Create(&pyexpatmodule);
Neal Norwitz1ac754f2006-01-19 06:09:39 +00001763 if (m == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001764 return NULL;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001765
Fred Drake0582df92000-07-12 04:49:00 +00001766 /* Add some symbolic constants to the module */
Fred Drakebd6101c2001-02-14 18:29:45 +00001767 if (ErrorObject == NULL) {
1768 ErrorObject = PyErr_NewException("xml.parsers.expat.ExpatError",
Fred Drake93adb692000-09-23 04:55:48 +00001769 NULL, NULL);
Fred Drakebd6101c2001-02-14 18:29:45 +00001770 if (ErrorObject == NULL)
Martin v. Löwis1a214512008-06-11 05:26:20 +00001771 return NULL;
Fred Drakebd6101c2001-02-14 18:29:45 +00001772 }
1773 Py_INCREF(ErrorObject);
Fred Drake93adb692000-09-23 04:55:48 +00001774 PyModule_AddObject(m, "error", ErrorObject);
Fred Drakebd6101c2001-02-14 18:29:45 +00001775 Py_INCREF(ErrorObject);
1776 PyModule_AddObject(m, "ExpatError", ErrorObject);
Fred Drake4ba298c2000-10-29 04:57:53 +00001777 Py_INCREF(&Xmlparsetype);
1778 PyModule_AddObject(m, "XMLParserType", (PyObject *) &Xmlparsetype);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001779
Fred Drake4113b132001-03-24 19:58:26 +00001780 PyModule_AddObject(m, "__version__", get_version_string());
Fred Drake738293d2000-12-21 17:25:07 +00001781 PyModule_AddStringConstant(m, "EXPAT_VERSION",
1782 (char *) XML_ExpatVersion());
Fred Drake85d835f2001-02-08 15:39:08 +00001783 {
1784 XML_Expat_Version info = XML_ExpatVersionInfo();
1785 PyModule_AddObject(m, "version_info",
1786 Py_BuildValue("(iii)", info.major,
1787 info.minor, info.micro));
1788 }
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001789 init_template_buffer();
Fred Drake0582df92000-07-12 04:49:00 +00001790 /* XXX When Expat supports some way of figuring out how it was
Fred Drake71b63ff2002-06-28 22:29:01 +00001791 compiled, this should check and set native_encoding
1792 appropriately.
Fred Drake0582df92000-07-12 04:49:00 +00001793 */
Fred Drake93adb692000-09-23 04:55:48 +00001794 PyModule_AddStringConstant(m, "native_encoding", "UTF-8");
Fred Drakec23b5232000-08-24 21:57:43 +00001795
Fred Drake85d835f2001-02-08 15:39:08 +00001796 sys_modules = PySys_GetObject("modules");
Fred Drake93adb692000-09-23 04:55:48 +00001797 d = PyModule_GetDict(m);
Fred Drake6f987622000-08-25 18:03:30 +00001798 errors_module = PyDict_GetItem(d, errmod_name);
1799 if (errors_module == NULL) {
Fred Drakecde79132001-04-25 16:01:30 +00001800 errors_module = PyModule_New(MODULE_NAME ".errors");
Fred Drake6f987622000-08-25 18:03:30 +00001801 if (errors_module != NULL) {
Fred Drake6f987622000-08-25 18:03:30 +00001802 PyDict_SetItem(sys_modules, errmod_name, errors_module);
Fred Drake93adb692000-09-23 04:55:48 +00001803 /* gives away the reference to errors_module */
1804 PyModule_AddObject(m, "errors", errors_module);
Fred Drakec23b5232000-08-24 21:57:43 +00001805 }
1806 }
Fred Drake6f987622000-08-25 18:03:30 +00001807 Py_DECREF(errmod_name);
Fred Drake85d835f2001-02-08 15:39:08 +00001808 model_module = PyDict_GetItem(d, modelmod_name);
1809 if (model_module == NULL) {
Fred Drakecde79132001-04-25 16:01:30 +00001810 model_module = PyModule_New(MODULE_NAME ".model");
Fred Drake85d835f2001-02-08 15:39:08 +00001811 if (model_module != NULL) {
1812 PyDict_SetItem(sys_modules, modelmod_name, model_module);
1813 /* gives away the reference to model_module */
1814 PyModule_AddObject(m, "model", model_module);
1815 }
1816 }
1817 Py_DECREF(modelmod_name);
1818 if (errors_module == NULL || model_module == NULL)
1819 /* Don't core dump later! */
Martin v. Löwis1a214512008-06-11 05:26:20 +00001820 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001821
Martin v. Löwisc847f402003-01-21 11:09:21 +00001822#if XML_COMBINED_VERSION > 19505
Martin v. Löwis069dde22003-01-21 10:58:18 +00001823 {
1824 const XML_Feature *features = XML_GetFeatureList();
1825 PyObject *list = PyList_New(0);
1826 if (list == NULL)
1827 /* just ignore it */
1828 PyErr_Clear();
1829 else {
1830 int i = 0;
1831 for (; features[i].feature != XML_FEATURE_END; ++i) {
1832 int ok;
1833 PyObject *item = Py_BuildValue("si", features[i].name,
1834 features[i].value);
1835 if (item == NULL) {
1836 Py_DECREF(list);
1837 list = NULL;
1838 break;
1839 }
1840 ok = PyList_Append(list, item);
1841 Py_DECREF(item);
1842 if (ok < 0) {
1843 PyErr_Clear();
1844 break;
1845 }
1846 }
1847 if (list != NULL)
1848 PyModule_AddObject(m, "features", list);
1849 }
1850 }
Martin v. Löwisc847f402003-01-21 11:09:21 +00001851#endif
Fred Drake6f987622000-08-25 18:03:30 +00001852
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001853#define MYCONST(name) \
Fred Drake93adb692000-09-23 04:55:48 +00001854 PyModule_AddStringConstant(errors_module, #name, \
1855 (char*)XML_ErrorString(name))
Fred Drake7bd9f412000-07-04 23:51:31 +00001856
Fred Drake0582df92000-07-12 04:49:00 +00001857 MYCONST(XML_ERROR_NO_MEMORY);
1858 MYCONST(XML_ERROR_SYNTAX);
1859 MYCONST(XML_ERROR_NO_ELEMENTS);
1860 MYCONST(XML_ERROR_INVALID_TOKEN);
1861 MYCONST(XML_ERROR_UNCLOSED_TOKEN);
1862 MYCONST(XML_ERROR_PARTIAL_CHAR);
1863 MYCONST(XML_ERROR_TAG_MISMATCH);
1864 MYCONST(XML_ERROR_DUPLICATE_ATTRIBUTE);
1865 MYCONST(XML_ERROR_JUNK_AFTER_DOC_ELEMENT);
1866 MYCONST(XML_ERROR_PARAM_ENTITY_REF);
1867 MYCONST(XML_ERROR_UNDEFINED_ENTITY);
1868 MYCONST(XML_ERROR_RECURSIVE_ENTITY_REF);
1869 MYCONST(XML_ERROR_ASYNC_ENTITY);
1870 MYCONST(XML_ERROR_BAD_CHAR_REF);
1871 MYCONST(XML_ERROR_BINARY_ENTITY_REF);
1872 MYCONST(XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF);
1873 MYCONST(XML_ERROR_MISPLACED_XML_PI);
1874 MYCONST(XML_ERROR_UNKNOWN_ENCODING);
1875 MYCONST(XML_ERROR_INCORRECT_ENCODING);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001876 MYCONST(XML_ERROR_UNCLOSED_CDATA_SECTION);
1877 MYCONST(XML_ERROR_EXTERNAL_ENTITY_HANDLING);
1878 MYCONST(XML_ERROR_NOT_STANDALONE);
Fred Drake283b6702004-08-04 22:28:16 +00001879 MYCONST(XML_ERROR_UNEXPECTED_STATE);
1880 MYCONST(XML_ERROR_ENTITY_DECLARED_IN_PE);
1881 MYCONST(XML_ERROR_FEATURE_REQUIRES_XML_DTD);
1882 MYCONST(XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING);
1883 /* Added in Expat 1.95.7. */
1884 MYCONST(XML_ERROR_UNBOUND_PREFIX);
1885 /* Added in Expat 1.95.8. */
1886 MYCONST(XML_ERROR_UNDECLARING_PREFIX);
1887 MYCONST(XML_ERROR_INCOMPLETE_PE);
1888 MYCONST(XML_ERROR_XML_DECL);
1889 MYCONST(XML_ERROR_TEXT_DECL);
1890 MYCONST(XML_ERROR_PUBLICID);
1891 MYCONST(XML_ERROR_SUSPENDED);
1892 MYCONST(XML_ERROR_NOT_SUSPENDED);
1893 MYCONST(XML_ERROR_ABORTED);
1894 MYCONST(XML_ERROR_FINISHED);
1895 MYCONST(XML_ERROR_SUSPEND_PE);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001896
Fred Drake85d835f2001-02-08 15:39:08 +00001897 PyModule_AddStringConstant(errors_module, "__doc__",
1898 "Constants used to describe error conditions.");
1899
Fred Drake93adb692000-09-23 04:55:48 +00001900#undef MYCONST
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001901
Fred Drake85d835f2001-02-08 15:39:08 +00001902#define MYCONST(c) PyModule_AddIntConstant(m, #c, c)
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001903 MYCONST(XML_PARAM_ENTITY_PARSING_NEVER);
1904 MYCONST(XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE);
1905 MYCONST(XML_PARAM_ENTITY_PARSING_ALWAYS);
Fred Drake85d835f2001-02-08 15:39:08 +00001906#undef MYCONST
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001907
Fred Drake85d835f2001-02-08 15:39:08 +00001908#define MYCONST(c) PyModule_AddIntConstant(model_module, #c, c)
1909 PyModule_AddStringConstant(model_module, "__doc__",
1910 "Constants used to interpret content model information.");
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001911
Fred Drake85d835f2001-02-08 15:39:08 +00001912 MYCONST(XML_CTYPE_EMPTY);
1913 MYCONST(XML_CTYPE_ANY);
1914 MYCONST(XML_CTYPE_MIXED);
1915 MYCONST(XML_CTYPE_NAME);
1916 MYCONST(XML_CTYPE_CHOICE);
1917 MYCONST(XML_CTYPE_SEQ);
1918
1919 MYCONST(XML_CQUANT_NONE);
1920 MYCONST(XML_CQUANT_OPT);
1921 MYCONST(XML_CQUANT_REP);
1922 MYCONST(XML_CQUANT_PLUS);
1923#undef MYCONST
Fredrik Lundhc3345042005-12-13 19:49:55 +00001924
1925 /* initialize pyexpat dispatch table */
Fredrik Lundhd7a42882005-12-13 20:43:04 +00001926 capi.size = sizeof(capi);
Fredrik Lundhcc117db2005-12-13 21:55:36 +00001927 capi.magic = PyExpat_CAPI_MAGIC;
Fredrik Lundhd7a42882005-12-13 20:43:04 +00001928 capi.MAJOR_VERSION = XML_MAJOR_VERSION;
1929 capi.MINOR_VERSION = XML_MINOR_VERSION;
1930 capi.MICRO_VERSION = XML_MICRO_VERSION;
1931 capi.ErrorString = XML_ErrorString;
Fredrik Lundhcc117db2005-12-13 21:55:36 +00001932 capi.GetErrorCode = XML_GetErrorCode;
1933 capi.GetErrorColumnNumber = XML_GetErrorColumnNumber;
1934 capi.GetErrorLineNumber = XML_GetErrorLineNumber;
Fredrik Lundhd7a42882005-12-13 20:43:04 +00001935 capi.Parse = XML_Parse;
1936 capi.ParserCreate_MM = XML_ParserCreate_MM;
1937 capi.ParserFree = XML_ParserFree;
1938 capi.SetCharacterDataHandler = XML_SetCharacterDataHandler;
1939 capi.SetCommentHandler = XML_SetCommentHandler;
1940 capi.SetDefaultHandlerExpand = XML_SetDefaultHandlerExpand;
1941 capi.SetElementHandler = XML_SetElementHandler;
1942 capi.SetNamespaceDeclHandler = XML_SetNamespaceDeclHandler;
1943 capi.SetProcessingInstructionHandler = XML_SetProcessingInstructionHandler;
1944 capi.SetUnknownEncodingHandler = XML_SetUnknownEncodingHandler;
1945 capi.SetUserData = XML_SetUserData;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001946
Benjamin Petersonb173f782009-05-05 22:31:58 +00001947 /* export using capsule */
1948 capi_object = PyCapsule_New(&capi, PyExpat_CAPSULE_NAME, NULL);
Fredrik Lundhd7a42882005-12-13 20:43:04 +00001949 if (capi_object)
1950 PyModule_AddObject(m, "expat_CAPI", capi_object);
Martin v. Löwis1a214512008-06-11 05:26:20 +00001951 return m;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001952}
1953
Fred Drake6f987622000-08-25 18:03:30 +00001954static void
Martin v. Löwis5b68ce32001-10-21 08:53:52 +00001955clear_handlers(xmlparseobject *self, int initial)
Fred Drake0582df92000-07-12 04:49:00 +00001956{
Fred Drakecde79132001-04-25 16:01:30 +00001957 int i = 0;
1958 PyObject *temp;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001959
Fred Drake71b63ff2002-06-28 22:29:01 +00001960 for (; handler_info[i].name != NULL; i++) {
Martin v. Löwis5b68ce32001-10-21 08:53:52 +00001961 if (initial)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001962 self->handlers[i] = NULL;
1963 else {
Fred Drakecde79132001-04-25 16:01:30 +00001964 temp = self->handlers[i];
1965 self->handlers[i] = NULL;
1966 Py_XDECREF(temp);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001967 handler_info[i].setter(self->itself, NULL);
Fred Drakecde79132001-04-25 16:01:30 +00001968 }
Fred Drakecde79132001-04-25 16:01:30 +00001969 }
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001970}
1971
Tim Peters0c322792002-07-17 16:49:03 +00001972static struct HandlerInfo handler_info[] = {
Fred Drake71b63ff2002-06-28 22:29:01 +00001973 {"StartElementHandler",
1974 (xmlhandlersetter)XML_SetStartElementHandler,
Fred Drake0582df92000-07-12 04:49:00 +00001975 (xmlhandler)my_StartElementHandler},
Fred Drake71b63ff2002-06-28 22:29:01 +00001976 {"EndElementHandler",
1977 (xmlhandlersetter)XML_SetEndElementHandler,
Fred Drake0582df92000-07-12 04:49:00 +00001978 (xmlhandler)my_EndElementHandler},
Fred Drake71b63ff2002-06-28 22:29:01 +00001979 {"ProcessingInstructionHandler",
Fred Drake0582df92000-07-12 04:49:00 +00001980 (xmlhandlersetter)XML_SetProcessingInstructionHandler,
1981 (xmlhandler)my_ProcessingInstructionHandler},
Fred Drake71b63ff2002-06-28 22:29:01 +00001982 {"CharacterDataHandler",
Fred Drake0582df92000-07-12 04:49:00 +00001983 (xmlhandlersetter)XML_SetCharacterDataHandler,
1984 (xmlhandler)my_CharacterDataHandler},
Fred Drake71b63ff2002-06-28 22:29:01 +00001985 {"UnparsedEntityDeclHandler",
Fred Drake0582df92000-07-12 04:49:00 +00001986 (xmlhandlersetter)XML_SetUnparsedEntityDeclHandler,
Fred Drake2a3d7db2002-06-28 22:56:48 +00001987 (xmlhandler)my_UnparsedEntityDeclHandler},
Fred Drake71b63ff2002-06-28 22:29:01 +00001988 {"NotationDeclHandler",
Fred Drake0582df92000-07-12 04:49:00 +00001989 (xmlhandlersetter)XML_SetNotationDeclHandler,
Fred Drake2a3d7db2002-06-28 22:56:48 +00001990 (xmlhandler)my_NotationDeclHandler},
Fred Drake71b63ff2002-06-28 22:29:01 +00001991 {"StartNamespaceDeclHandler",
1992 (xmlhandlersetter)XML_SetStartNamespaceDeclHandler,
Fred Drake2a3d7db2002-06-28 22:56:48 +00001993 (xmlhandler)my_StartNamespaceDeclHandler},
Fred Drake71b63ff2002-06-28 22:29:01 +00001994 {"EndNamespaceDeclHandler",
1995 (xmlhandlersetter)XML_SetEndNamespaceDeclHandler,
Fred Drake2a3d7db2002-06-28 22:56:48 +00001996 (xmlhandler)my_EndNamespaceDeclHandler},
Fred Drake0582df92000-07-12 04:49:00 +00001997 {"CommentHandler",
1998 (xmlhandlersetter)XML_SetCommentHandler,
1999 (xmlhandler)my_CommentHandler},
2000 {"StartCdataSectionHandler",
Fred Drake71b63ff2002-06-28 22:29:01 +00002001 (xmlhandlersetter)XML_SetStartCdataSectionHandler,
Fred Drake0582df92000-07-12 04:49:00 +00002002 (xmlhandler)my_StartCdataSectionHandler},
2003 {"EndCdataSectionHandler",
Fred Drake71b63ff2002-06-28 22:29:01 +00002004 (xmlhandlersetter)XML_SetEndCdataSectionHandler,
Fred Drake0582df92000-07-12 04:49:00 +00002005 (xmlhandler)my_EndCdataSectionHandler},
2006 {"DefaultHandler",
2007 (xmlhandlersetter)XML_SetDefaultHandler,
2008 (xmlhandler)my_DefaultHandler},
2009 {"DefaultHandlerExpand",
2010 (xmlhandlersetter)XML_SetDefaultHandlerExpand,
2011 (xmlhandler)my_DefaultHandlerExpandHandler},
2012 {"NotStandaloneHandler",
2013 (xmlhandlersetter)XML_SetNotStandaloneHandler,
2014 (xmlhandler)my_NotStandaloneHandler},
2015 {"ExternalEntityRefHandler",
2016 (xmlhandlersetter)XML_SetExternalEntityRefHandler,
Fred Drake2a3d7db2002-06-28 22:56:48 +00002017 (xmlhandler)my_ExternalEntityRefHandler},
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00002018 {"StartDoctypeDeclHandler",
Fred Drake71b63ff2002-06-28 22:29:01 +00002019 (xmlhandlersetter)XML_SetStartDoctypeDeclHandler,
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00002020 (xmlhandler)my_StartDoctypeDeclHandler},
2021 {"EndDoctypeDeclHandler",
Fred Drake71b63ff2002-06-28 22:29:01 +00002022 (xmlhandlersetter)XML_SetEndDoctypeDeclHandler,
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00002023 (xmlhandler)my_EndDoctypeDeclHandler},
Fred Drake85d835f2001-02-08 15:39:08 +00002024 {"EntityDeclHandler",
2025 (xmlhandlersetter)XML_SetEntityDeclHandler,
2026 (xmlhandler)my_EntityDeclHandler},
2027 {"XmlDeclHandler",
2028 (xmlhandlersetter)XML_SetXmlDeclHandler,
2029 (xmlhandler)my_XmlDeclHandler},
2030 {"ElementDeclHandler",
2031 (xmlhandlersetter)XML_SetElementDeclHandler,
2032 (xmlhandler)my_ElementDeclHandler},
2033 {"AttlistDeclHandler",
2034 (xmlhandlersetter)XML_SetAttlistDeclHandler,
2035 (xmlhandler)my_AttlistDeclHandler},
Martin v. Löwisc847f402003-01-21 11:09:21 +00002036#if XML_COMBINED_VERSION >= 19504
Martin v. Löwis069dde22003-01-21 10:58:18 +00002037 {"SkippedEntityHandler",
2038 (xmlhandlersetter)XML_SetSkippedEntityHandler,
2039 (xmlhandler)my_SkippedEntityHandler},
Martin v. Löwisc847f402003-01-21 11:09:21 +00002040#endif
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00002041
Fred Drake0582df92000-07-12 04:49:00 +00002042 {NULL, NULL, NULL} /* sentinel */
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00002043};