blob: 389975d52bdbc9a272464c63147b8a9bec13cf2e [file] [log] [blame]
Martin v. Löwis7090ed12001-09-19 10:37:50 +00001#include "Python.h"
Fred Drake4113b132001-03-24 19:58:26 +00002#include <ctype.h>
3
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00004#include "frameobject.h"
Fred Drakea77254a2000-09-29 19:23:29 +00005#include "expat.h"
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00006
Fredrik Lundhc3345042005-12-13 19:49:55 +00007#include "pyexpat.h"
8
Martin v. Löwisc847f402003-01-21 11:09:21 +00009#define XML_COMBINED_VERSION (10000*XML_MAJOR_VERSION+100*XML_MINOR_VERSION+XML_MICRO_VERSION)
10
Jeremy Hylton9263f572003-06-27 16:13:17 +000011#define FIX_TRACE
Martin v. Löwis339d0f72001-08-17 18:39:25 +000012
Fred Drake0582df92000-07-12 04:49:00 +000013enum HandlerTypes {
14 StartElement,
15 EndElement,
16 ProcessingInstruction,
17 CharacterData,
18 UnparsedEntityDecl,
19 NotationDecl,
20 StartNamespaceDecl,
21 EndNamespaceDecl,
22 Comment,
23 StartCdataSection,
24 EndCdataSection,
25 Default,
26 DefaultHandlerExpand,
27 NotStandalone,
Martin v. Löwis0078f6c2001-01-21 10:18:10 +000028 ExternalEntityRef,
29 StartDoctypeDecl,
30 EndDoctypeDecl,
Fred Drake85d835f2001-02-08 15:39:08 +000031 EntityDecl,
32 XmlDecl,
33 ElementDecl,
34 AttlistDecl,
Martin v. Löwisc847f402003-01-21 11:09:21 +000035#if XML_COMBINED_VERSION >= 19504
Martin v. Löwis069dde22003-01-21 10:58:18 +000036 SkippedEntity,
Martin v. Löwisc847f402003-01-21 11:09:21 +000037#endif
Fred Drake85d835f2001-02-08 15:39:08 +000038 _DummyDecl
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000039};
40
41static PyObject *ErrorObject;
42
43/* ----------------------------------------------------- */
44
45/* Declarations for objects of type xmlparser */
46
47typedef struct {
Fred Drake0582df92000-07-12 04:49:00 +000048 PyObject_HEAD
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000049
Fred Drake0582df92000-07-12 04:49:00 +000050 XML_Parser itself;
Fred Drake85d835f2001-02-08 15:39:08 +000051 int ordered_attributes; /* Return attributes as a list. */
52 int specified_attributes; /* Report only specified attributes. */
Fred Drakebd6101c2001-02-14 18:29:45 +000053 int in_callback; /* Is a callback active? */
Martin v. Löwis069dde22003-01-21 10:58:18 +000054 int ns_prefixes; /* Namespace-triplets mode? */
Fred Drake2a3d7db2002-06-28 22:56:48 +000055 XML_Char *buffer; /* Buffer used when accumulating characters */
56 /* NULL if not enabled */
57 int buffer_size; /* Size of buffer, in XML_Char units */
58 int buffer_used; /* Buffer units in use */
Fred Drakeb91a36b2002-06-27 19:40:48 +000059 PyObject *intern; /* Dictionary to intern strings */
Fred Drake0582df92000-07-12 04:49:00 +000060 PyObject **handlers;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000061} xmlparseobject;
62
Fred Drake2a3d7db2002-06-28 22:56:48 +000063#define CHARACTER_DATA_BUFFER_SIZE 8192
64
Jeremy Hylton938ace62002-07-17 16:30:39 +000065static PyTypeObject Xmlparsetype;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000066
Fred Drake117ac852002-09-24 16:24:54 +000067typedef void (*xmlhandlersetter)(XML_Parser self, void *meth);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000068typedef void* xmlhandler;
69
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +000070struct HandlerInfo {
Fred Drake0582df92000-07-12 04:49:00 +000071 const char *name;
72 xmlhandlersetter setter;
73 xmlhandler handler;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +000074 PyCodeObject *tb_code;
Fred Drake71b63ff2002-06-28 22:29:01 +000075 PyObject *nameobj;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000076};
77
Jeremy Hylton938ace62002-07-17 16:30:39 +000078static struct HandlerInfo handler_info[64];
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000079
Fred Drakebd6101c2001-02-14 18:29:45 +000080/* Set an integer attribute on the error object; return true on success,
81 * false on an exception.
82 */
83static int
84set_error_attr(PyObject *err, char *name, int value)
85{
Christian Heimes217cfd12007-12-02 14:31:20 +000086 PyObject *v = PyLong_FromLong(value);
Fred Drake85d835f2001-02-08 15:39:08 +000087
Neal Norwitz2f5e9902006-03-08 06:36:45 +000088 if (v == NULL || PyObject_SetAttrString(err, name, v) == -1) {
89 Py_XDECREF(v);
Fred Drakebd6101c2001-02-14 18:29:45 +000090 return 0;
91 }
Michael W. Hudson0bb84542004-08-03 11:31:31 +000092 Py_DECREF(v);
Fred Drakebd6101c2001-02-14 18:29:45 +000093 return 1;
94}
95
96/* Build and set an Expat exception, including positioning
97 * information. Always returns NULL.
98 */
Fred Drake85d835f2001-02-08 15:39:08 +000099static PyObject *
Martin v. Löwis069dde22003-01-21 10:58:18 +0000100set_error(xmlparseobject *self, enum XML_Error code)
Fred Drake85d835f2001-02-08 15:39:08 +0000101{
102 PyObject *err;
103 char buffer[256];
104 XML_Parser parser = self->itself;
Fred Drakebd6101c2001-02-14 18:29:45 +0000105 int lineno = XML_GetErrorLineNumber(parser);
106 int column = XML_GetErrorColumnNumber(parser);
Fred Drake85d835f2001-02-08 15:39:08 +0000107
Martin v. Löwis6b2cf0e2002-06-30 06:03:35 +0000108 /* There is no risk of overflowing this buffer, since
109 even for 64-bit integers, there is sufficient space. */
110 sprintf(buffer, "%.200s: line %i, column %i",
Fred Drakebd6101c2001-02-14 18:29:45 +0000111 XML_ErrorString(code), lineno, column);
Fred Drake85d835f2001-02-08 15:39:08 +0000112 err = PyObject_CallFunction(ErrorObject, "s", buffer);
Fred Drakebd6101c2001-02-14 18:29:45 +0000113 if ( err != NULL
114 && set_error_attr(err, "code", code)
115 && set_error_attr(err, "offset", column)
116 && set_error_attr(err, "lineno", lineno)) {
117 PyErr_SetObject(ErrorObject, err);
Fred Drake85d835f2001-02-08 15:39:08 +0000118 }
Neal Norwitz2f5e9902006-03-08 06:36:45 +0000119 Py_XDECREF(err);
Fred Drake85d835f2001-02-08 15:39:08 +0000120 return NULL;
121}
122
Fred Drake71b63ff2002-06-28 22:29:01 +0000123static int
124have_handler(xmlparseobject *self, int type)
125{
126 PyObject *handler = self->handlers[type];
127 return handler != NULL;
128}
129
130static PyObject *
131get_handler_name(struct HandlerInfo *hinfo)
132{
133 PyObject *name = hinfo->nameobj;
134 if (name == NULL) {
Neal Norwitz392c5be2007-08-25 17:20:32 +0000135 name = PyUnicode_FromString(hinfo->name);
Fred Drake71b63ff2002-06-28 22:29:01 +0000136 hinfo->nameobj = name;
137 }
138 Py_XINCREF(name);
139 return name;
140}
141
Fred Drake85d835f2001-02-08 15:39:08 +0000142
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000143/* Convert a string of XML_Chars into a Unicode string.
144 Returns None if str is a null pointer. */
145
Fred Drake0582df92000-07-12 04:49:00 +0000146static PyObject *
Fred Drakeb91a36b2002-06-27 19:40:48 +0000147conv_string_to_unicode(const XML_Char *str)
Fred Drake0582df92000-07-12 04:49:00 +0000148{
Fred Drake71b63ff2002-06-28 22:29:01 +0000149 /* XXX currently this code assumes that XML_Char is 8-bit,
Fred Drake0582df92000-07-12 04:49:00 +0000150 and hence in UTF-8. */
151 /* UTF-8 from Expat, Unicode desired */
152 if (str == NULL) {
153 Py_INCREF(Py_None);
154 return Py_None;
155 }
Fred Drake71b63ff2002-06-28 22:29:01 +0000156 return PyUnicode_DecodeUTF8(str, strlen(str), "strict");
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000157}
158
Fred Drake0582df92000-07-12 04:49:00 +0000159static PyObject *
160conv_string_len_to_unicode(const XML_Char *str, int len)
161{
Fred Drake71b63ff2002-06-28 22:29:01 +0000162 /* XXX currently this code assumes that XML_Char is 8-bit,
Fred Drake0582df92000-07-12 04:49:00 +0000163 and hence in UTF-8. */
164 /* UTF-8 from Expat, Unicode desired */
165 if (str == NULL) {
166 Py_INCREF(Py_None);
167 return Py_None;
168 }
Fred Drake6f987622000-08-25 18:03:30 +0000169 return PyUnicode_DecodeUTF8((const char *)str, len, "strict");
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000170}
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000171
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000172/* Callback routines */
173
Martin v. Löwis5b68ce32001-10-21 08:53:52 +0000174static void clear_handlers(xmlparseobject *self, int initial);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000175
Martin v. Löwis069dde22003-01-21 10:58:18 +0000176/* This handler is used when an error has been detected, in the hope
177 that actual parsing can be terminated early. This will only help
178 if an external entity reference is encountered. */
179static int
180error_external_entity_ref_handler(XML_Parser parser,
181 const XML_Char *context,
182 const XML_Char *base,
183 const XML_Char *systemId,
184 const XML_Char *publicId)
185{
186 return 0;
187}
188
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000189/* Dummy character data handler used when an error (exception) has
190 been detected, and the actual parsing can be terminated early.
191 This is needed since character data handler can't be safely removed
192 from within the character data handler, but can be replaced. It is
193 used only from the character data handler trampoline, and must be
194 used right after `flag_error()` is called. */
195static void
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000196noop_character_data_handler(void *userData, const XML_Char *data, int len)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000197{
198 /* Do nothing. */
199}
200
Fred Drake6f987622000-08-25 18:03:30 +0000201static void
202flag_error(xmlparseobject *self)
203{
Martin v. Löwis5b68ce32001-10-21 08:53:52 +0000204 clear_handlers(self, 0);
Martin v. Löwis069dde22003-01-21 10:58:18 +0000205 XML_SetExternalEntityRefHandler(self->itself,
206 error_external_entity_ref_handler);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000207}
208
209static PyCodeObject*
210getcode(enum HandlerTypes slot, char* func_name, int lineno)
211{
Fred Drakebd6101c2001-02-14 18:29:45 +0000212 PyObject *code = NULL;
213 PyObject *name = NULL;
214 PyObject *nulltuple = NULL;
215 PyObject *filename = NULL;
216
217 if (handler_info[slot].tb_code == NULL) {
Christian Heimes72b710a2008-05-26 13:28:38 +0000218 code = PyBytes_FromString("");
Fred Drakebd6101c2001-02-14 18:29:45 +0000219 if (code == NULL)
220 goto failed;
Guido van Rossum00bc0e02007-10-15 02:52:41 +0000221 name = PyUnicode_FromString(func_name);
Fred Drakebd6101c2001-02-14 18:29:45 +0000222 if (name == NULL)
223 goto failed;
224 nulltuple = PyTuple_New(0);
225 if (nulltuple == NULL)
226 goto failed;
Guido van Rossum00bc0e02007-10-15 02:52:41 +0000227 filename = PyUnicode_DecodeFSDefault(__FILE__);
Fred Drakebd6101c2001-02-14 18:29:45 +0000228 handler_info[slot].tb_code =
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000229 PyCode_New(0, /* argcount */
Guido van Rossum4f72a782006-10-27 23:31:49 +0000230 0, /* kwonlyargcount */
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000231 0, /* nlocals */
232 0, /* stacksize */
233 0, /* flags */
234 code, /* code */
235 nulltuple, /* consts */
236 nulltuple, /* names */
237 nulltuple, /* varnames */
Martin v. Löwis76192ee2001-02-06 09:34:40 +0000238#if PYTHON_API_VERSION >= 1010
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000239 nulltuple, /* freevars */
240 nulltuple, /* cellvars */
Martin v. Löwis76192ee2001-02-06 09:34:40 +0000241#endif
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000242 filename, /* filename */
243 name, /* name */
244 lineno, /* firstlineno */
245 code /* lnotab */
Fred Drakebd6101c2001-02-14 18:29:45 +0000246 );
247 if (handler_info[slot].tb_code == NULL)
248 goto failed;
249 Py_DECREF(code);
250 Py_DECREF(nulltuple);
251 Py_DECREF(filename);
252 Py_DECREF(name);
253 }
254 return handler_info[slot].tb_code;
255 failed:
256 Py_XDECREF(code);
257 Py_XDECREF(name);
258 return NULL;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000259}
260
Jeremy Hylton9263f572003-06-27 16:13:17 +0000261#ifdef FIX_TRACE
Martin v. Löwis7d6e19d2002-08-04 08:24:49 +0000262static int
263trace_frame(PyThreadState *tstate, PyFrameObject *f, int code, PyObject *val)
264{
265 int result = 0;
266 if (!tstate->use_tracing || tstate->tracing)
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000267 return 0;
Martin v. Löwis7d6e19d2002-08-04 08:24:49 +0000268 if (tstate->c_profilefunc != NULL) {
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000269 tstate->tracing++;
270 result = tstate->c_profilefunc(tstate->c_profileobj,
271 f, code , val);
272 tstate->use_tracing = ((tstate->c_tracefunc != NULL)
273 || (tstate->c_profilefunc != NULL));
274 tstate->tracing--;
275 if (result)
276 return result;
Martin v. Löwis7d6e19d2002-08-04 08:24:49 +0000277 }
278 if (tstate->c_tracefunc != NULL) {
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000279 tstate->tracing++;
280 result = tstate->c_tracefunc(tstate->c_traceobj,
281 f, code , val);
282 tstate->use_tracing = ((tstate->c_tracefunc != NULL)
283 || (tstate->c_profilefunc != NULL));
284 tstate->tracing--;
285 }
Martin v. Löwis7d6e19d2002-08-04 08:24:49 +0000286 return result;
287}
Jeremy Hylton9263f572003-06-27 16:13:17 +0000288
289static int
290trace_frame_exc(PyThreadState *tstate, PyFrameObject *f)
291{
292 PyObject *type, *value, *traceback, *arg;
293 int err;
294
295 if (tstate->c_tracefunc == NULL)
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000296 return 0;
Jeremy Hylton9263f572003-06-27 16:13:17 +0000297
298 PyErr_Fetch(&type, &value, &traceback);
299 if (value == NULL) {
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000300 value = Py_None;
301 Py_INCREF(value);
Jeremy Hylton9263f572003-06-27 16:13:17 +0000302 }
Martin v. Löwis9171f022004-10-13 19:50:11 +0000303#if PY_VERSION_HEX < 0x02040000
304 arg = Py_BuildValue("(OOO)", type, value, traceback);
305#else
Raymond Hettinger8ae46892003-10-12 19:09:37 +0000306 arg = PyTuple_Pack(3, type, value, traceback);
Martin v. Löwis9171f022004-10-13 19:50:11 +0000307#endif
Jeremy Hylton9263f572003-06-27 16:13:17 +0000308 if (arg == NULL) {
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000309 PyErr_Restore(type, value, traceback);
310 return 0;
Jeremy Hylton9263f572003-06-27 16:13:17 +0000311 }
312 err = trace_frame(tstate, f, PyTrace_EXCEPTION, arg);
313 Py_DECREF(arg);
314 if (err == 0)
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000315 PyErr_Restore(type, value, traceback);
Jeremy Hylton9263f572003-06-27 16:13:17 +0000316 else {
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000317 Py_XDECREF(type);
318 Py_XDECREF(value);
319 Py_XDECREF(traceback);
Jeremy Hylton9263f572003-06-27 16:13:17 +0000320 }
321 return err;
322}
Martin v. Löwis069dde22003-01-21 10:58:18 +0000323#endif
Martin v. Löwis7d6e19d2002-08-04 08:24:49 +0000324
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000325static PyObject*
Fred Drake39689c52004-08-13 03:12:57 +0000326call_with_frame(PyCodeObject *c, PyObject* func, PyObject* args,
327 xmlparseobject *self)
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000328{
Fred Drakebd6101c2001-02-14 18:29:45 +0000329 PyThreadState *tstate = PyThreadState_GET();
330 PyFrameObject *f;
331 PyObject *res;
332
333 if (c == NULL)
334 return NULL;
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000335
Jeremy Hylton9263f572003-06-27 16:13:17 +0000336 f = PyFrame_New(tstate, c, PyEval_GetGlobals(), NULL);
Fred Drakebd6101c2001-02-14 18:29:45 +0000337 if (f == NULL)
338 return NULL;
339 tstate->frame = f;
Jeremy Hylton9263f572003-06-27 16:13:17 +0000340#ifdef FIX_TRACE
341 if (trace_frame(tstate, f, PyTrace_CALL, Py_None) < 0) {
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000342 return NULL;
Martin v. Löwis7d6e19d2002-08-04 08:24:49 +0000343 }
Martin v. Löwis069dde22003-01-21 10:58:18 +0000344#endif
Fred Drakebd6101c2001-02-14 18:29:45 +0000345 res = PyEval_CallObject(func, args);
Jeremy Hylton9263f572003-06-27 16:13:17 +0000346 if (res == NULL) {
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000347 if (tstate->curexc_traceback == NULL)
348 PyTraceBack_Here(f);
Fred Drake39689c52004-08-13 03:12:57 +0000349 XML_StopParser(self->itself, XML_FALSE);
Jeremy Hylton9263f572003-06-27 16:13:17 +0000350#ifdef FIX_TRACE
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000351 if (trace_frame_exc(tstate, f) < 0) {
352 return NULL;
353 }
Jeremy Hylton9263f572003-06-27 16:13:17 +0000354 }
Martin v. Löwis7d6e19d2002-08-04 08:24:49 +0000355 else {
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000356 if (trace_frame(tstate, f, PyTrace_RETURN, res) < 0) {
357 Py_XDECREF(res);
358 res = NULL;
359 }
Martin v. Löwis7d6e19d2002-08-04 08:24:49 +0000360 }
Jeremy Hylton9263f572003-06-27 16:13:17 +0000361#else
362 }
Martin v. Löwis069dde22003-01-21 10:58:18 +0000363#endif
Fred Drakebd6101c2001-02-14 18:29:45 +0000364 tstate->frame = f->f_back;
365 Py_DECREF(f);
366 return res;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000367}
368
Fred Drakeb91a36b2002-06-27 19:40:48 +0000369static PyObject*
370string_intern(xmlparseobject *self, const char* str)
371{
Guido van Rossum4ca94712007-07-23 17:42:32 +0000372 PyObject *result = conv_string_to_unicode(str);
Fred Drakeb91a36b2002-06-27 19:40:48 +0000373 PyObject *value;
Neal Norwitz484d9a42005-09-30 04:46:49 +0000374 /* result can be NULL if the unicode conversion failed. */
375 if (!result)
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000376 return result;
Fred Drakeb91a36b2002-06-27 19:40:48 +0000377 if (!self->intern)
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000378 return result;
Fred Drakeb91a36b2002-06-27 19:40:48 +0000379 value = PyDict_GetItem(self->intern, result);
380 if (!value) {
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000381 if (PyDict_SetItem(self->intern, result, result) == 0)
Fred Drakeb91a36b2002-06-27 19:40:48 +0000382 return result;
383 else
384 return NULL;
385 }
386 Py_INCREF(value);
387 Py_DECREF(result);
388 return value;
389}
390
Fred Drake2a3d7db2002-06-28 22:56:48 +0000391/* Return 0 on success, -1 on exception.
392 * flag_error() will be called before return if needed.
393 */
394static int
395call_character_handler(xmlparseobject *self, const XML_Char *buffer, int len)
396{
397 PyObject *args;
398 PyObject *temp;
399
400 args = PyTuple_New(1);
401 if (args == NULL)
402 return -1;
Guido van Rossum4ca94712007-07-23 17:42:32 +0000403 temp = (conv_string_len_to_unicode(buffer, len));
Fred Drake2a3d7db2002-06-28 22:56:48 +0000404 if (temp == NULL) {
405 Py_DECREF(args);
406 flag_error(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000407 XML_SetCharacterDataHandler(self->itself,
408 noop_character_data_handler);
Fred Drake2a3d7db2002-06-28 22:56:48 +0000409 return -1;
410 }
411 PyTuple_SET_ITEM(args, 0, temp);
412 /* temp is now a borrowed reference; consider it unused. */
413 self->in_callback = 1;
414 temp = call_with_frame(getcode(CharacterData, "CharacterData", __LINE__),
Fred Drake39689c52004-08-13 03:12:57 +0000415 self->handlers[CharacterData], args, self);
Fred Drake2a3d7db2002-06-28 22:56:48 +0000416 /* temp is an owned reference again, or NULL */
417 self->in_callback = 0;
418 Py_DECREF(args);
419 if (temp == NULL) {
420 flag_error(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000421 XML_SetCharacterDataHandler(self->itself,
422 noop_character_data_handler);
Fred Drake2a3d7db2002-06-28 22:56:48 +0000423 return -1;
424 }
425 Py_DECREF(temp);
426 return 0;
427}
428
429static int
430flush_character_buffer(xmlparseobject *self)
431{
432 int rc;
433 if (self->buffer == NULL || self->buffer_used == 0)
434 return 0;
435 rc = call_character_handler(self, self->buffer, self->buffer_used);
436 self->buffer_used = 0;
437 return rc;
438}
439
440static void
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000441my_CharacterDataHandler(void *userData, const XML_Char *data, int len)
Fred Drake2a3d7db2002-06-28 22:56:48 +0000442{
443 xmlparseobject *self = (xmlparseobject *) userData;
444 if (self->buffer == NULL)
445 call_character_handler(self, data, len);
446 else {
447 if ((self->buffer_used + len) > self->buffer_size) {
448 if (flush_character_buffer(self) < 0)
449 return;
450 /* handler might have changed; drop the rest on the floor
451 * if there isn't a handler anymore
452 */
453 if (!have_handler(self, CharacterData))
454 return;
455 }
456 if (len > self->buffer_size) {
457 call_character_handler(self, data, len);
458 self->buffer_used = 0;
459 }
460 else {
461 memcpy(self->buffer + self->buffer_used,
462 data, len * sizeof(XML_Char));
463 self->buffer_used += len;
464 }
465 }
466}
467
Fred Drake85d835f2001-02-08 15:39:08 +0000468static void
469my_StartElementHandler(void *userData,
Fred Drake71b63ff2002-06-28 22:29:01 +0000470 const XML_Char *name, const XML_Char *atts[])
Fred Drake85d835f2001-02-08 15:39:08 +0000471{
472 xmlparseobject *self = (xmlparseobject *)userData;
473
Fred Drake71b63ff2002-06-28 22:29:01 +0000474 if (have_handler(self, StartElement)) {
Fred Drake85d835f2001-02-08 15:39:08 +0000475 PyObject *container, *rv, *args;
476 int i, max;
477
Fred Drake2a3d7db2002-06-28 22:56:48 +0000478 if (flush_character_buffer(self) < 0)
479 return;
Fred Drake85d835f2001-02-08 15:39:08 +0000480 /* Set max to the number of slots filled in atts[]; max/2 is
481 * the number of attributes we need to process.
482 */
483 if (self->specified_attributes) {
484 max = XML_GetSpecifiedAttributeCount(self->itself);
485 }
486 else {
487 max = 0;
488 while (atts[max] != NULL)
489 max += 2;
490 }
491 /* Build the container. */
492 if (self->ordered_attributes)
493 container = PyList_New(max);
494 else
495 container = PyDict_New();
496 if (container == NULL) {
497 flag_error(self);
498 return;
499 }
500 for (i = 0; i < max; i += 2) {
Fred Drakeb91a36b2002-06-27 19:40:48 +0000501 PyObject *n = string_intern(self, (XML_Char *) atts[i]);
Fred Drake85d835f2001-02-08 15:39:08 +0000502 PyObject *v;
503 if (n == NULL) {
504 flag_error(self);
505 Py_DECREF(container);
506 return;
507 }
Guido van Rossum4ca94712007-07-23 17:42:32 +0000508 v = conv_string_to_unicode((XML_Char *) atts[i+1]);
Fred Drake85d835f2001-02-08 15:39:08 +0000509 if (v == NULL) {
510 flag_error(self);
511 Py_DECREF(container);
512 Py_DECREF(n);
513 return;
514 }
515 if (self->ordered_attributes) {
516 PyList_SET_ITEM(container, i, n);
517 PyList_SET_ITEM(container, i+1, v);
518 }
519 else if (PyDict_SetItem(container, n, v)) {
520 flag_error(self);
521 Py_DECREF(n);
522 Py_DECREF(v);
523 return;
524 }
525 else {
526 Py_DECREF(n);
527 Py_DECREF(v);
528 }
529 }
Neal Norwitz484d9a42005-09-30 04:46:49 +0000530 args = string_intern(self, name);
531 if (args != NULL)
532 args = Py_BuildValue("(NN)", args, container);
Fred Drake85d835f2001-02-08 15:39:08 +0000533 if (args == NULL) {
534 Py_DECREF(container);
535 return;
536 }
537 /* Container is now a borrowed reference; ignore it. */
Fred Drakebd6101c2001-02-14 18:29:45 +0000538 self->in_callback = 1;
539 rv = call_with_frame(getcode(StartElement, "StartElement", __LINE__),
Fred Drake39689c52004-08-13 03:12:57 +0000540 self->handlers[StartElement], args, self);
Fred Drakebd6101c2001-02-14 18:29:45 +0000541 self->in_callback = 0;
542 Py_DECREF(args);
Fred Drake85d835f2001-02-08 15:39:08 +0000543 if (rv == NULL) {
544 flag_error(self);
545 return;
Fred Drakebd6101c2001-02-14 18:29:45 +0000546 }
Fred Drake85d835f2001-02-08 15:39:08 +0000547 Py_DECREF(rv);
548 }
549}
550
551#define RC_HANDLER(RC, NAME, PARAMS, INIT, PARAM_FORMAT, CONVERSION, \
552 RETURN, GETUSERDATA) \
553static RC \
554my_##NAME##Handler PARAMS {\
555 xmlparseobject *self = GETUSERDATA ; \
556 PyObject *args = NULL; \
557 PyObject *rv = NULL; \
558 INIT \
559\
Fred Drake71b63ff2002-06-28 22:29:01 +0000560 if (have_handler(self, NAME)) { \
Fred Drake2a3d7db2002-06-28 22:56:48 +0000561 if (flush_character_buffer(self) < 0) \
562 return RETURN; \
Fred Drake85d835f2001-02-08 15:39:08 +0000563 args = Py_BuildValue PARAM_FORMAT ;\
Martin v. Löwis1d7c55f2001-11-10 13:57:55 +0000564 if (!args) { flag_error(self); return RETURN;} \
Fred Drakebd6101c2001-02-14 18:29:45 +0000565 self->in_callback = 1; \
Fred Drake85d835f2001-02-08 15:39:08 +0000566 rv = call_with_frame(getcode(NAME,#NAME,__LINE__), \
Fred Drake39689c52004-08-13 03:12:57 +0000567 self->handlers[NAME], args, self); \
Fred Drakebd6101c2001-02-14 18:29:45 +0000568 self->in_callback = 0; \
Fred Drake85d835f2001-02-08 15:39:08 +0000569 Py_DECREF(args); \
570 if (rv == NULL) { \
571 flag_error(self); \
572 return RETURN; \
573 } \
574 CONVERSION \
575 Py_DECREF(rv); \
576 } \
577 return RETURN; \
578}
579
Fred Drake6f987622000-08-25 18:03:30 +0000580#define VOID_HANDLER(NAME, PARAMS, PARAM_FORMAT) \
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000581 RC_HANDLER(void, NAME, PARAMS, ;, PARAM_FORMAT, ;, ;,\
582 (xmlparseobject *)userData)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000583
Fred Drake6f987622000-08-25 18:03:30 +0000584#define INT_HANDLER(NAME, PARAMS, PARAM_FORMAT)\
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000585 RC_HANDLER(int, NAME, PARAMS, int rc=0;, PARAM_FORMAT, \
586 rc = PyLong_AsLong(rv);, rc, \
587 (xmlparseobject *)userData)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000588
Fred Drake71b63ff2002-06-28 22:29:01 +0000589VOID_HANDLER(EndElement,
590 (void *userData, const XML_Char *name),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000591 ("(N)", string_intern(self, name)))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000592
Fred Drake6f987622000-08-25 18:03:30 +0000593VOID_HANDLER(ProcessingInstruction,
Fred Drake71b63ff2002-06-28 22:29:01 +0000594 (void *userData,
595 const XML_Char *target,
Fred Drake85d835f2001-02-08 15:39:08 +0000596 const XML_Char *data),
Guido van Rossum4ca94712007-07-23 17:42:32 +0000597 ("(NO&)", string_intern(self, target), conv_string_to_unicode ,data))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000598
Fred Drake6f987622000-08-25 18:03:30 +0000599VOID_HANDLER(UnparsedEntityDecl,
Fred Drake71b63ff2002-06-28 22:29:01 +0000600 (void *userData,
Fred Drake85d835f2001-02-08 15:39:08 +0000601 const XML_Char *entityName,
602 const XML_Char *base,
603 const XML_Char *systemId,
604 const XML_Char *publicId,
605 const XML_Char *notationName),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000606 ("(NNNNN)",
Fred Drake71b63ff2002-06-28 22:29:01 +0000607 string_intern(self, entityName), string_intern(self, base),
608 string_intern(self, systemId), string_intern(self, publicId),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000609 string_intern(self, notationName)))
Fred Drake85d835f2001-02-08 15:39:08 +0000610
Fred Drake85d835f2001-02-08 15:39:08 +0000611VOID_HANDLER(EntityDecl,
612 (void *userData,
613 const XML_Char *entityName,
614 int is_parameter_entity,
615 const XML_Char *value,
616 int value_length,
617 const XML_Char *base,
618 const XML_Char *systemId,
619 const XML_Char *publicId,
620 const XML_Char *notationName),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000621 ("NiNNNNN",
622 string_intern(self, entityName), is_parameter_entity,
Guido van Rossum4ca94712007-07-23 17:42:32 +0000623 (conv_string_len_to_unicode(value, value_length)),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000624 string_intern(self, base), string_intern(self, systemId),
625 string_intern(self, publicId),
626 string_intern(self, notationName)))
Fred Drake85d835f2001-02-08 15:39:08 +0000627
628VOID_HANDLER(XmlDecl,
629 (void *userData,
630 const XML_Char *version,
631 const XML_Char *encoding,
632 int standalone),
633 ("(O&O&i)",
Guido van Rossum4ca94712007-07-23 17:42:32 +0000634 conv_string_to_unicode ,version, conv_string_to_unicode ,encoding,
Fred Drake85d835f2001-02-08 15:39:08 +0000635 standalone))
636
637static PyObject *
638conv_content_model(XML_Content * const model,
Fred Drakeb91a36b2002-06-27 19:40:48 +0000639 PyObject *(*conv_string)(const XML_Char *))
Fred Drake85d835f2001-02-08 15:39:08 +0000640{
641 PyObject *result = NULL;
642 PyObject *children = PyTuple_New(model->numchildren);
643 int i;
644
645 if (children != NULL) {
Tim Peters9544fc52001-07-28 09:36:36 +0000646 assert(model->numchildren < INT_MAX);
647 for (i = 0; i < (int)model->numchildren; ++i) {
Fred Drake85d835f2001-02-08 15:39:08 +0000648 PyObject *child = conv_content_model(&model->children[i],
649 conv_string);
650 if (child == NULL) {
651 Py_XDECREF(children);
652 return NULL;
653 }
654 PyTuple_SET_ITEM(children, i, child);
655 }
656 result = Py_BuildValue("(iiO&N)",
657 model->type, model->quant,
658 conv_string,model->name, children);
659 }
660 return result;
661}
662
Fred Drake06dd8cf2003-02-02 03:54:17 +0000663static void
664my_ElementDeclHandler(void *userData,
665 const XML_Char *name,
666 XML_Content *model)
Fred Drake85d835f2001-02-08 15:39:08 +0000667{
Fred Drake06dd8cf2003-02-02 03:54:17 +0000668 xmlparseobject *self = (xmlparseobject *)userData;
669 PyObject *args = NULL;
Fred Drake85d835f2001-02-08 15:39:08 +0000670
Fred Drake06dd8cf2003-02-02 03:54:17 +0000671 if (have_handler(self, ElementDecl)) {
672 PyObject *rv = NULL;
673 PyObject *modelobj, *nameobj;
674
675 if (flush_character_buffer(self) < 0)
676 goto finally;
Guido van Rossum4ca94712007-07-23 17:42:32 +0000677 modelobj = conv_content_model(model, (conv_string_to_unicode));
Fred Drake06dd8cf2003-02-02 03:54:17 +0000678 if (modelobj == NULL) {
679 flag_error(self);
680 goto finally;
681 }
682 nameobj = string_intern(self, name);
683 if (nameobj == NULL) {
684 Py_DECREF(modelobj);
685 flag_error(self);
686 goto finally;
687 }
Michael W. Hudson0bb84542004-08-03 11:31:31 +0000688 args = Py_BuildValue("NN", nameobj, modelobj);
Fred Drake06dd8cf2003-02-02 03:54:17 +0000689 if (args == NULL) {
690 Py_DECREF(modelobj);
691 flag_error(self);
692 goto finally;
693 }
694 self->in_callback = 1;
695 rv = call_with_frame(getcode(ElementDecl, "ElementDecl", __LINE__),
Fred Drake39689c52004-08-13 03:12:57 +0000696 self->handlers[ElementDecl], args, self);
Fred Drake06dd8cf2003-02-02 03:54:17 +0000697 self->in_callback = 0;
698 if (rv == NULL) {
699 flag_error(self);
700 goto finally;
701 }
702 Py_DECREF(rv);
703 }
704 finally:
705 Py_XDECREF(args);
706 XML_FreeContentModel(self->itself, model);
707 return;
708}
Fred Drake85d835f2001-02-08 15:39:08 +0000709
710VOID_HANDLER(AttlistDecl,
711 (void *userData,
712 const XML_Char *elname,
713 const XML_Char *attname,
714 const XML_Char *att_type,
715 const XML_Char *dflt,
716 int isrequired),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000717 ("(NNO&O&i)",
718 string_intern(self, elname), string_intern(self, attname),
Guido van Rossum4ca94712007-07-23 17:42:32 +0000719 conv_string_to_unicode ,att_type, conv_string_to_unicode ,dflt,
Fred Drake85d835f2001-02-08 15:39:08 +0000720 isrequired))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000721
Martin v. Löwisc847f402003-01-21 11:09:21 +0000722#if XML_COMBINED_VERSION >= 19504
Martin v. Löwis069dde22003-01-21 10:58:18 +0000723VOID_HANDLER(SkippedEntity,
724 (void *userData,
725 const XML_Char *entityName,
726 int is_parameter_entity),
727 ("Ni",
728 string_intern(self, entityName), is_parameter_entity))
Martin v. Löwisc847f402003-01-21 11:09:21 +0000729#endif
Martin v. Löwis069dde22003-01-21 10:58:18 +0000730
Fred Drake71b63ff2002-06-28 22:29:01 +0000731VOID_HANDLER(NotationDecl,
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000732 (void *userData,
733 const XML_Char *notationName,
734 const XML_Char *base,
735 const XML_Char *systemId,
736 const XML_Char *publicId),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000737 ("(NNNN)",
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000738 string_intern(self, notationName), string_intern(self, base),
739 string_intern(self, systemId), string_intern(self, publicId)))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000740
Fred Drake6f987622000-08-25 18:03:30 +0000741VOID_HANDLER(StartNamespaceDecl,
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000742 (void *userData,
743 const XML_Char *prefix,
744 const XML_Char *uri),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000745 ("(NN)",
746 string_intern(self, prefix), string_intern(self, uri)))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000747
Fred Drake6f987622000-08-25 18:03:30 +0000748VOID_HANDLER(EndNamespaceDecl,
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000749 (void *userData,
750 const XML_Char *prefix),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000751 ("(N)", string_intern(self, prefix)))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000752
Fred Drake6f987622000-08-25 18:03:30 +0000753VOID_HANDLER(Comment,
Fred Drakeb91a36b2002-06-27 19:40:48 +0000754 (void *userData, const XML_Char *data),
Guido van Rossum4ca94712007-07-23 17:42:32 +0000755 ("(O&)", conv_string_to_unicode ,data))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000756
Fred Drake6f987622000-08-25 18:03:30 +0000757VOID_HANDLER(StartCdataSection,
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000758 (void *userData),
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000759 ("()"))
Fred Drake71b63ff2002-06-28 22:29:01 +0000760
Fred Drake6f987622000-08-25 18:03:30 +0000761VOID_HANDLER(EndCdataSection,
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000762 (void *userData),
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000763 ("()"))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000764
Fred Drake6f987622000-08-25 18:03:30 +0000765VOID_HANDLER(Default,
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000766 (void *userData, const XML_Char *s, int len),
767 ("(N)", (conv_string_len_to_unicode(s,len))))
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000768
Fred Drake6f987622000-08-25 18:03:30 +0000769VOID_HANDLER(DefaultHandlerExpand,
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000770 (void *userData, const XML_Char *s, int len),
771 ("(N)", (conv_string_len_to_unicode(s,len))))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000772
Fred Drake71b63ff2002-06-28 22:29:01 +0000773INT_HANDLER(NotStandalone,
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000774 (void *userData),
775 ("()"))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000776
Fred Drake6f987622000-08-25 18:03:30 +0000777RC_HANDLER(int, ExternalEntityRef,
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000778 (XML_Parser parser,
779 const XML_Char *context,
780 const XML_Char *base,
781 const XML_Char *systemId,
782 const XML_Char *publicId),
783 int rc=0;,
Fred Drakeb91a36b2002-06-27 19:40:48 +0000784 ("(O&NNN)",
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000785 conv_string_to_unicode ,context, string_intern(self, base),
786 string_intern(self, systemId), string_intern(self, publicId)),
787 rc = PyLong_AsLong(rv);, rc,
788 XML_GetUserData(parser))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000789
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000790/* XXX UnknownEncodingHandler */
791
Fred Drake85d835f2001-02-08 15:39:08 +0000792VOID_HANDLER(StartDoctypeDecl,
793 (void *userData, const XML_Char *doctypeName,
794 const XML_Char *sysid, const XML_Char *pubid,
795 int has_internal_subset),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000796 ("(NNNi)", string_intern(self, doctypeName),
797 string_intern(self, sysid), string_intern(self, pubid),
Fred Drake85d835f2001-02-08 15:39:08 +0000798 has_internal_subset))
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000799
800VOID_HANDLER(EndDoctypeDecl, (void *userData), ("()"))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000801
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000802/* ---------------------------------------------------------------- */
803
Fred Drake71b63ff2002-06-28 22:29:01 +0000804static PyObject *
805get_parse_result(xmlparseobject *self, int rv)
806{
807 if (PyErr_Occurred()) {
808 return NULL;
809 }
810 if (rv == 0) {
Martin v. Löwis069dde22003-01-21 10:58:18 +0000811 return set_error(self, XML_GetErrorCode(self->itself));
Fred Drake71b63ff2002-06-28 22:29:01 +0000812 }
Fred Drake2a3d7db2002-06-28 22:56:48 +0000813 if (flush_character_buffer(self) < 0) {
814 return NULL;
815 }
Christian Heimes217cfd12007-12-02 14:31:20 +0000816 return PyLong_FromLong(rv);
Fred Drake71b63ff2002-06-28 22:29:01 +0000817}
818
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000819PyDoc_STRVAR(xmlparse_Parse__doc__,
Thomas Wouters35317302000-07-22 16:34:15 +0000820"Parse(data[, isfinal])\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000821Parse XML data. `isfinal' should be true at end of input.");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000822
823static PyObject *
Fred Drake0582df92000-07-12 04:49:00 +0000824xmlparse_Parse(xmlparseobject *self, PyObject *args)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000825{
Fred Drake0582df92000-07-12 04:49:00 +0000826 char *s;
827 int slen;
828 int isFinal = 0;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000829
Fred Drake0582df92000-07-12 04:49:00 +0000830 if (!PyArg_ParseTuple(args, "s#|i:Parse", &s, &slen, &isFinal))
831 return NULL;
Fred Drake71b63ff2002-06-28 22:29:01 +0000832
833 return get_parse_result(self, XML_Parse(self->itself, s, slen, isFinal));
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000834}
835
Fred Drakeca1f4262000-09-21 20:10:23 +0000836/* File reading copied from cPickle */
837
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000838#define BUF_SIZE 2048
839
Fred Drake0582df92000-07-12 04:49:00 +0000840static int
841readinst(char *buf, int buf_size, PyObject *meth)
842{
843 PyObject *arg = NULL;
844 PyObject *bytes = NULL;
845 PyObject *str = NULL;
846 int len = -1;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000847 char *ptr;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000848
Christian Heimes217cfd12007-12-02 14:31:20 +0000849 if ((bytes = PyLong_FromLong(buf_size)) == NULL)
Fred Drake0582df92000-07-12 04:49:00 +0000850 goto finally;
Fred Drake676940b2000-09-22 15:21:31 +0000851
Fred Drake7b6caff2003-07-21 17:05:56 +0000852 if ((arg = PyTuple_New(1)) == NULL) {
853 Py_DECREF(bytes);
Fred Drake0582df92000-07-12 04:49:00 +0000854 goto finally;
Fred Drake7b6caff2003-07-21 17:05:56 +0000855 }
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000856
Tim Peters954eef72000-09-22 06:01:11 +0000857 PyTuple_SET_ITEM(arg, 0, bytes);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000858
Martin v. Löwis9171f022004-10-13 19:50:11 +0000859#if PY_VERSION_HEX < 0x02020000
860 str = PyObject_CallObject(meth, arg);
861#else
862 str = PyObject_Call(meth, arg, NULL);
863#endif
864 if (str == NULL)
Fred Drake0582df92000-07-12 04:49:00 +0000865 goto finally;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000866
Christian Heimes72b710a2008-05-26 13:28:38 +0000867 if (PyBytes_Check(str))
868 ptr = PyBytes_AS_STRING(str);
Christian Heimes9c4756e2008-05-26 13:22:05 +0000869 else if (PyByteArray_Check(str))
870 ptr = PyByteArray_AS_STRING(str);
Guido van Rossum98297ee2007-11-06 21:34:58 +0000871 else {
Fred Drake71b63ff2002-06-28 22:29:01 +0000872 PyErr_Format(PyExc_TypeError,
Guido van Rossum4ca94712007-07-23 17:42:32 +0000873 "read() did not return a bytes object (type=%.400s)",
Christian Heimes90aa7642007-12-19 02:45:37 +0000874 Py_TYPE(str)->tp_name);
Fred Drake0582df92000-07-12 04:49:00 +0000875 goto finally;
876 }
Christian Heimes90aa7642007-12-19 02:45:37 +0000877 len = Py_SIZE(str);
Fred Drake0582df92000-07-12 04:49:00 +0000878 if (len > buf_size) {
879 PyErr_Format(PyExc_ValueError,
880 "read() returned too much data: "
881 "%i bytes requested, %i returned",
882 buf_size, len);
Fred Drake0582df92000-07-12 04:49:00 +0000883 goto finally;
884 }
Guido van Rossum98297ee2007-11-06 21:34:58 +0000885 memcpy(buf, ptr, len);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000886finally:
Fred Drake0582df92000-07-12 04:49:00 +0000887 Py_XDECREF(arg);
Fred Drakeca1f4262000-09-21 20:10:23 +0000888 Py_XDECREF(str);
Fred Drake0582df92000-07-12 04:49:00 +0000889 return len;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000890}
891
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000892PyDoc_STRVAR(xmlparse_ParseFile__doc__,
Thomas Wouters35317302000-07-22 16:34:15 +0000893"ParseFile(file)\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000894Parse XML data from file-like object.");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000895
896static PyObject *
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000897xmlparse_ParseFile(xmlparseobject *self, PyObject *f)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000898{
Fred Drake0582df92000-07-12 04:49:00 +0000899 int rv = 1;
Fred Drake0582df92000-07-12 04:49:00 +0000900 FILE *fp;
901 PyObject *readmethod = NULL;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000902
Guido van Rossumda5b8f22007-06-12 23:30:11 +0000903 {
Fred Drake0582df92000-07-12 04:49:00 +0000904 fp = NULL;
Fred Drakeca1f4262000-09-21 20:10:23 +0000905 readmethod = PyObject_GetAttrString(f, "read");
906 if (readmethod == NULL) {
Fred Drake0582df92000-07-12 04:49:00 +0000907 PyErr_Clear();
Fred Drake71b63ff2002-06-28 22:29:01 +0000908 PyErr_SetString(PyExc_TypeError,
Fred Drake0582df92000-07-12 04:49:00 +0000909 "argument must have 'read' attribute");
Fred Drake814f9fe2002-07-19 22:03:03 +0000910 return NULL;
Fred Drake0582df92000-07-12 04:49:00 +0000911 }
912 }
913 for (;;) {
914 int bytes_read;
915 void *buf = XML_GetBuffer(self->itself, BUF_SIZE);
Fred Drake7b6caff2003-07-21 17:05:56 +0000916 if (buf == NULL) {
Fred Drakef239c6d2003-07-21 17:22:43 +0000917 Py_XDECREF(readmethod);
Fred Drake0582df92000-07-12 04:49:00 +0000918 return PyErr_NoMemory();
Fred Drake7b6caff2003-07-21 17:05:56 +0000919 }
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000920
Fred Drake0582df92000-07-12 04:49:00 +0000921 if (fp) {
922 bytes_read = fread(buf, sizeof(char), BUF_SIZE, fp);
923 if (bytes_read < 0) {
924 PyErr_SetFromErrno(PyExc_IOError);
925 return NULL;
926 }
927 }
928 else {
929 bytes_read = readinst(buf, BUF_SIZE, readmethod);
Fred Drake7b6caff2003-07-21 17:05:56 +0000930 if (bytes_read < 0) {
931 Py_DECREF(readmethod);
Fred Drake0582df92000-07-12 04:49:00 +0000932 return NULL;
Fred Drake7b6caff2003-07-21 17:05:56 +0000933 }
Fred Drake0582df92000-07-12 04:49:00 +0000934 }
935 rv = XML_ParseBuffer(self->itself, bytes_read, bytes_read == 0);
Fred Drake7b6caff2003-07-21 17:05:56 +0000936 if (PyErr_Occurred()) {
937 Py_XDECREF(readmethod);
Fred Drake0582df92000-07-12 04:49:00 +0000938 return NULL;
Fred Drake7b6caff2003-07-21 17:05:56 +0000939 }
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000940
Fred Drake0582df92000-07-12 04:49:00 +0000941 if (!rv || bytes_read == 0)
942 break;
943 }
Fred Drake7b6caff2003-07-21 17:05:56 +0000944 Py_XDECREF(readmethod);
Fred Drake71b63ff2002-06-28 22:29:01 +0000945 return get_parse_result(self, rv);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000946}
947
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000948PyDoc_STRVAR(xmlparse_SetBase__doc__,
Thomas Wouters35317302000-07-22 16:34:15 +0000949"SetBase(base_url)\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000950Set the base URL for the parser.");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000951
952static PyObject *
Fred Drake0582df92000-07-12 04:49:00 +0000953xmlparse_SetBase(xmlparseobject *self, PyObject *args)
954{
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000955 char *base;
956
Fred Drake0582df92000-07-12 04:49:00 +0000957 if (!PyArg_ParseTuple(args, "s:SetBase", &base))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000958 return NULL;
Fred Drake0582df92000-07-12 04:49:00 +0000959 if (!XML_SetBase(self->itself, base)) {
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000960 return PyErr_NoMemory();
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000961 }
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000962 Py_INCREF(Py_None);
963 return Py_None;
964}
965
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000966PyDoc_STRVAR(xmlparse_GetBase__doc__,
Thomas Wouters35317302000-07-22 16:34:15 +0000967"GetBase() -> url\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000968Return base URL string for the parser.");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000969
970static PyObject *
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000971xmlparse_GetBase(xmlparseobject *self, PyObject *unused)
Fred Drake0582df92000-07-12 04:49:00 +0000972{
Fred Drake0582df92000-07-12 04:49:00 +0000973 return Py_BuildValue("z", XML_GetBase(self->itself));
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000974}
975
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000976PyDoc_STRVAR(xmlparse_GetInputContext__doc__,
Fred Drakebd6101c2001-02-14 18:29:45 +0000977"GetInputContext() -> string\n\
978Return the untranslated text of the input that caused the current event.\n\
979If the event was generated by a large amount of text (such as a start tag\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000980for an element with many attributes), not all of the text may be available.");
Fred Drakebd6101c2001-02-14 18:29:45 +0000981
982static PyObject *
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000983xmlparse_GetInputContext(xmlparseobject *self, PyObject *unused)
Fred Drakebd6101c2001-02-14 18:29:45 +0000984{
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000985 if (self->in_callback) {
986 int offset, size;
987 const char *buffer
988 = XML_GetInputContext(self->itself, &offset, &size);
Fred Drakebd6101c2001-02-14 18:29:45 +0000989
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000990 if (buffer != NULL)
Christian Heimes72b710a2008-05-26 13:28:38 +0000991 return PyBytes_FromStringAndSize(buffer + offset,
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000992 size - offset);
993 else
994 Py_RETURN_NONE;
Fred Drakebd6101c2001-02-14 18:29:45 +0000995 }
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000996 else
997 Py_RETURN_NONE;
Fred Drakebd6101c2001-02-14 18:29:45 +0000998}
Fred Drakebd6101c2001-02-14 18:29:45 +0000999
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001000PyDoc_STRVAR(xmlparse_ExternalEntityParserCreate__doc__,
Fred Drake2d4ac202001-01-03 15:36:25 +00001001"ExternalEntityParserCreate(context[, encoding])\n\
Tim Peters51dc9682000-09-24 22:12:45 +00001002Create a parser for parsing an external entity based on the\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001003information passed to the ExternalEntityRefHandler.");
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001004
1005static PyObject *
1006xmlparse_ExternalEntityParserCreate(xmlparseobject *self, PyObject *args)
1007{
1008 char *context;
1009 char *encoding = NULL;
1010 xmlparseobject *new_parser;
1011 int i;
1012
Martin v. Löwisc57428d2001-09-19 09:55:09 +00001013 if (!PyArg_ParseTuple(args, "z|s:ExternalEntityParserCreate",
Fred Drakecde79132001-04-25 16:01:30 +00001014 &context, &encoding)) {
1015 return NULL;
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001016 }
1017
Martin v. Löwis894258c2001-09-23 10:20:10 +00001018#ifndef Py_TPFLAGS_HAVE_GC
Martin v. Löwisb4fcf4d2002-06-30 06:40:55 +00001019 /* Python versions 2.0 and 2.1 */
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001020 new_parser = PyObject_New(xmlparseobject, &Xmlparsetype);
Martin v. Löwis894258c2001-09-23 10:20:10 +00001021#else
1022 /* Python versions 2.2 and later */
1023 new_parser = PyObject_GC_New(xmlparseobject, &Xmlparsetype);
1024#endif
Fred Drake85d835f2001-02-08 15:39:08 +00001025
1026 if (new_parser == NULL)
1027 return NULL;
Fred Drake2a3d7db2002-06-28 22:56:48 +00001028 new_parser->buffer_size = self->buffer_size;
1029 new_parser->buffer_used = 0;
1030 if (self->buffer != NULL) {
1031 new_parser->buffer = malloc(new_parser->buffer_size);
1032 if (new_parser->buffer == NULL) {
Fred Drakeb28467b2002-07-02 15:44:36 +00001033#ifndef Py_TPFLAGS_HAVE_GC
1034 /* Code for versions 2.0 and 2.1 */
1035 PyObject_Del(new_parser);
1036#else
1037 /* Code for versions 2.2 and later. */
Fred Drake2a3d7db2002-06-28 22:56:48 +00001038 PyObject_GC_Del(new_parser);
Fred Drakeb28467b2002-07-02 15:44:36 +00001039#endif
Fred Drake2a3d7db2002-06-28 22:56:48 +00001040 return PyErr_NoMemory();
1041 }
1042 }
1043 else
1044 new_parser->buffer = NULL;
Fred Drake85d835f2001-02-08 15:39:08 +00001045 new_parser->ordered_attributes = self->ordered_attributes;
1046 new_parser->specified_attributes = self->specified_attributes;
Fred Drakebd6101c2001-02-14 18:29:45 +00001047 new_parser->in_callback = 0;
Martin v. Löwis069dde22003-01-21 10:58:18 +00001048 new_parser->ns_prefixes = self->ns_prefixes;
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001049 new_parser->itself = XML_ExternalEntityParserCreate(self->itself, context,
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001050 encoding);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001051 new_parser->handlers = 0;
Fred Drakeb91a36b2002-06-27 19:40:48 +00001052 new_parser->intern = self->intern;
1053 Py_XINCREF(new_parser->intern);
Martin v. Löwis894258c2001-09-23 10:20:10 +00001054#ifdef Py_TPFLAGS_HAVE_GC
1055 PyObject_GC_Track(new_parser);
1056#else
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001057 PyObject_GC_Init(new_parser);
Martin v. Löwis894258c2001-09-23 10:20:10 +00001058#endif
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001059
1060 if (!new_parser->itself) {
Fred Drake85d835f2001-02-08 15:39:08 +00001061 Py_DECREF(new_parser);
1062 return PyErr_NoMemory();
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001063 }
1064
1065 XML_SetUserData(new_parser->itself, (void *)new_parser);
1066
1067 /* allocate and clear handlers first */
Fred Drake2a3d7db2002-06-28 22:56:48 +00001068 for (i = 0; handler_info[i].name != NULL; i++)
Fred Drake85d835f2001-02-08 15:39:08 +00001069 /* do nothing */;
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001070
Fred Drake2a3d7db2002-06-28 22:56:48 +00001071 new_parser->handlers = malloc(sizeof(PyObject *) * i);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001072 if (!new_parser->handlers) {
Fred Drake85d835f2001-02-08 15:39:08 +00001073 Py_DECREF(new_parser);
1074 return PyErr_NoMemory();
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001075 }
Martin v. Löwis5b68ce32001-10-21 08:53:52 +00001076 clear_handlers(new_parser, 1);
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001077
1078 /* then copy handlers from self */
1079 for (i = 0; handler_info[i].name != NULL; i++) {
Fred Drake71b63ff2002-06-28 22:29:01 +00001080 PyObject *handler = self->handlers[i];
1081 if (handler != NULL) {
1082 Py_INCREF(handler);
1083 new_parser->handlers[i] = handler;
1084 handler_info[i].setter(new_parser->itself,
Fred Drake85d835f2001-02-08 15:39:08 +00001085 handler_info[i].handler);
1086 }
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001087 }
Fred Drake71b63ff2002-06-28 22:29:01 +00001088 return (PyObject *)new_parser;
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001089}
1090
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001091PyDoc_STRVAR(xmlparse_SetParamEntityParsing__doc__,
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001092"SetParamEntityParsing(flag) -> success\n\
1093Controls parsing of parameter entities (including the external DTD\n\
1094subset). Possible flag values are XML_PARAM_ENTITY_PARSING_NEVER,\n\
1095XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE and\n\
1096XML_PARAM_ENTITY_PARSING_ALWAYS. Returns true if setting the flag\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001097was successful.");
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001098
1099static PyObject*
Fred Drakebd6101c2001-02-14 18:29:45 +00001100xmlparse_SetParamEntityParsing(xmlparseobject *p, PyObject* args)
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001101{
Fred Drake85d835f2001-02-08 15:39:08 +00001102 int flag;
1103 if (!PyArg_ParseTuple(args, "i", &flag))
1104 return NULL;
Fred Drakebd6101c2001-02-14 18:29:45 +00001105 flag = XML_SetParamEntityParsing(p->itself, flag);
Christian Heimes217cfd12007-12-02 14:31:20 +00001106 return PyLong_FromLong(flag);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001107}
1108
Martin v. Löwisc847f402003-01-21 11:09:21 +00001109
1110#if XML_COMBINED_VERSION >= 19505
Martin v. Löwis069dde22003-01-21 10:58:18 +00001111PyDoc_STRVAR(xmlparse_UseForeignDTD__doc__,
1112"UseForeignDTD([flag])\n\
1113Allows the application to provide an artificial external subset if one is\n\
1114not specified as part of the document instance. This readily allows the\n\
1115use of a 'default' document type controlled by the application, while still\n\
1116getting the advantage of providing document type information to the parser.\n\
1117'flag' defaults to True if not provided.");
1118
1119static PyObject *
1120xmlparse_UseForeignDTD(xmlparseobject *self, PyObject *args)
1121{
1122 PyObject *flagobj = NULL;
1123 XML_Bool flag = XML_TRUE;
1124 enum XML_Error rc;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001125 if (!PyArg_UnpackTuple(args, "UseForeignDTD", 0, 1, &flagobj))
Martin v. Löwis069dde22003-01-21 10:58:18 +00001126 return NULL;
1127 if (flagobj != NULL)
1128 flag = PyObject_IsTrue(flagobj) ? XML_TRUE : XML_FALSE;
1129 rc = XML_UseForeignDTD(self->itself, flag);
1130 if (rc != XML_ERROR_NONE) {
1131 return set_error(self, rc);
1132 }
1133 Py_INCREF(Py_None);
1134 return Py_None;
1135}
Martin v. Löwisc847f402003-01-21 11:09:21 +00001136#endif
Martin v. Löwis069dde22003-01-21 10:58:18 +00001137
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001138static PyObject *xmlparse_dir(PyObject *self, PyObject* noargs);
1139
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001140static struct PyMethodDef xmlparse_methods[] = {
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001141 {"Parse", (PyCFunction)xmlparse_Parse,
1142 METH_VARARGS, xmlparse_Parse__doc__},
Fred Drake0582df92000-07-12 04:49:00 +00001143 {"ParseFile", (PyCFunction)xmlparse_ParseFile,
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001144 METH_O, xmlparse_ParseFile__doc__},
Fred Drake0582df92000-07-12 04:49:00 +00001145 {"SetBase", (PyCFunction)xmlparse_SetBase,
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001146 METH_VARARGS, xmlparse_SetBase__doc__},
Fred Drake0582df92000-07-12 04:49:00 +00001147 {"GetBase", (PyCFunction)xmlparse_GetBase,
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001148 METH_NOARGS, xmlparse_GetBase__doc__},
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001149 {"ExternalEntityParserCreate", (PyCFunction)xmlparse_ExternalEntityParserCreate,
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001150 METH_VARARGS, xmlparse_ExternalEntityParserCreate__doc__},
Fred Drakebd6101c2001-02-14 18:29:45 +00001151 {"SetParamEntityParsing", (PyCFunction)xmlparse_SetParamEntityParsing,
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001152 METH_VARARGS, xmlparse_SetParamEntityParsing__doc__},
Fred Drakebd6101c2001-02-14 18:29:45 +00001153 {"GetInputContext", (PyCFunction)xmlparse_GetInputContext,
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001154 METH_NOARGS, xmlparse_GetInputContext__doc__},
Martin v. Löwisc847f402003-01-21 11:09:21 +00001155#if XML_COMBINED_VERSION >= 19505
Martin v. Löwis069dde22003-01-21 10:58:18 +00001156 {"UseForeignDTD", (PyCFunction)xmlparse_UseForeignDTD,
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001157 METH_VARARGS, xmlparse_UseForeignDTD__doc__},
Martin v. Löwisc847f402003-01-21 11:09:21 +00001158#endif
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001159 {"__dir__", xmlparse_dir, METH_NOARGS},
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001160 {NULL, NULL} /* sentinel */
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001161};
1162
1163/* ---------- */
1164
1165
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001166
Fred Drake71b63ff2002-06-28 22:29:01 +00001167/* pyexpat international encoding support.
1168 Make it as simple as possible.
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001169*/
1170
Martin v. Löwis3af7cc02001-01-22 08:19:10 +00001171static char template_buffer[257];
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001172
Fred Drake71b63ff2002-06-28 22:29:01 +00001173static void
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001174init_template_buffer(void)
1175{
1176 int i;
Fred Drakebb66a202001-03-01 20:48:17 +00001177 for (i = 0; i < 256; i++) {
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001178 template_buffer[i] = i;
Tim Peters63cb99e2001-02-17 18:12:50 +00001179 }
Fred Drakebb66a202001-03-01 20:48:17 +00001180 template_buffer[256] = 0;
Tim Peters63cb99e2001-02-17 18:12:50 +00001181}
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001182
Fred Drake71b63ff2002-06-28 22:29:01 +00001183static int
1184PyUnknownEncodingHandler(void *encodingHandlerData,
1185 const XML_Char *name,
1186 XML_Encoding *info)
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001187{
Fred Drakebb66a202001-03-01 20:48:17 +00001188 PyUnicodeObject *_u_string = NULL;
1189 int result = 0;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001190 int i;
Fred Drake71b63ff2002-06-28 22:29:01 +00001191
Fred Drakebb66a202001-03-01 20:48:17 +00001192 /* Yes, supports only 8bit encodings */
1193 _u_string = (PyUnicodeObject *)
1194 PyUnicode_Decode(template_buffer, 256, name, "replace");
Fred Drake71b63ff2002-06-28 22:29:01 +00001195
Fred Drakebb66a202001-03-01 20:48:17 +00001196 if (_u_string == NULL)
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001197 return result;
Fred Drake71b63ff2002-06-28 22:29:01 +00001198
Fred Drakebb66a202001-03-01 20:48:17 +00001199 for (i = 0; i < 256; i++) {
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001200 /* Stupid to access directly, but fast */
1201 Py_UNICODE c = _u_string->str[i];
1202 if (c == Py_UNICODE_REPLACEMENT_CHARACTER)
1203 info->map[i] = -1;
1204 else
1205 info->map[i] = c;
Tim Peters63cb99e2001-02-17 18:12:50 +00001206 }
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001207 info->data = NULL;
1208 info->convert = NULL;
1209 info->release = NULL;
Fred Drake71b63ff2002-06-28 22:29:01 +00001210 result = 1;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001211 Py_DECREF(_u_string);
1212 return result;
1213}
1214
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001215
1216static PyObject *
Fred Drakeb91a36b2002-06-27 19:40:48 +00001217newxmlparseobject(char *encoding, char *namespace_separator, PyObject *intern)
Fred Drake0582df92000-07-12 04:49:00 +00001218{
1219 int i;
1220 xmlparseobject *self;
Fred Drake71b63ff2002-06-28 22:29:01 +00001221
Martin v. Löwis894258c2001-09-23 10:20:10 +00001222#ifdef Py_TPFLAGS_HAVE_GC
1223 /* Code for versions 2.2 and later */
1224 self = PyObject_GC_New(xmlparseobject, &Xmlparsetype);
1225#else
Fred Drake0582df92000-07-12 04:49:00 +00001226 self = PyObject_New(xmlparseobject, &Xmlparsetype);
Martin v. Löwis894258c2001-09-23 10:20:10 +00001227#endif
Fred Drake0582df92000-07-12 04:49:00 +00001228 if (self == NULL)
1229 return NULL;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001230
Fred Drake2a3d7db2002-06-28 22:56:48 +00001231 self->buffer = NULL;
1232 self->buffer_size = CHARACTER_DATA_BUFFER_SIZE;
1233 self->buffer_used = 0;
Fred Drake85d835f2001-02-08 15:39:08 +00001234 self->ordered_attributes = 0;
1235 self->specified_attributes = 0;
Fred Drakebd6101c2001-02-14 18:29:45 +00001236 self->in_callback = 0;
Martin v. Löwis069dde22003-01-21 10:58:18 +00001237 self->ns_prefixes = 0;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001238 self->handlers = NULL;
Fred Drakecde79132001-04-25 16:01:30 +00001239 if (namespace_separator != NULL) {
Fred Drake0582df92000-07-12 04:49:00 +00001240 self->itself = XML_ParserCreateNS(encoding, *namespace_separator);
1241 }
Fred Drake85d835f2001-02-08 15:39:08 +00001242 else {
Fred Drake0582df92000-07-12 04:49:00 +00001243 self->itself = XML_ParserCreate(encoding);
1244 }
Fred Drakeb91a36b2002-06-27 19:40:48 +00001245 self->intern = intern;
1246 Py_XINCREF(self->intern);
Martin v. Löwis894258c2001-09-23 10:20:10 +00001247#ifdef Py_TPFLAGS_HAVE_GC
1248 PyObject_GC_Track(self);
1249#else
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001250 PyObject_GC_Init(self);
Martin v. Löwis894258c2001-09-23 10:20:10 +00001251#endif
Fred Drake0582df92000-07-12 04:49:00 +00001252 if (self->itself == NULL) {
Fred Drake71b63ff2002-06-28 22:29:01 +00001253 PyErr_SetString(PyExc_RuntimeError,
Fred Drake0582df92000-07-12 04:49:00 +00001254 "XML_ParserCreate failed");
1255 Py_DECREF(self);
1256 return NULL;
1257 }
1258 XML_SetUserData(self->itself, (void *)self);
Fred Drake7c75bf22002-07-01 14:02:31 +00001259 XML_SetUnknownEncodingHandler(self->itself,
1260 (XML_UnknownEncodingHandler) PyUnknownEncodingHandler, NULL);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001261
Fred Drake2a3d7db2002-06-28 22:56:48 +00001262 for (i = 0; handler_info[i].name != NULL; i++)
Fred Drake0582df92000-07-12 04:49:00 +00001263 /* do nothing */;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001264
Fred Drake7c75bf22002-07-01 14:02:31 +00001265 self->handlers = malloc(sizeof(PyObject *) * i);
1266 if (!self->handlers) {
Fred Drake71b63ff2002-06-28 22:29:01 +00001267 Py_DECREF(self);
1268 return PyErr_NoMemory();
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001269 }
Martin v. Löwis5b68ce32001-10-21 08:53:52 +00001270 clear_handlers(self, 1);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001271
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001272 return (PyObject*)self;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001273}
1274
1275
1276static void
Fred Drake0582df92000-07-12 04:49:00 +00001277xmlparse_dealloc(xmlparseobject *self)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001278{
Fred Drake0582df92000-07-12 04:49:00 +00001279 int i;
Martin v. Löwis894258c2001-09-23 10:20:10 +00001280#ifdef Py_TPFLAGS_HAVE_GC
1281 PyObject_GC_UnTrack(self);
1282#else
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001283 PyObject_GC_Fini(self);
Martin v. Löwis894258c2001-09-23 10:20:10 +00001284#endif
Fred Drake85d835f2001-02-08 15:39:08 +00001285 if (self->itself != NULL)
Fred Drake0582df92000-07-12 04:49:00 +00001286 XML_ParserFree(self->itself);
1287 self->itself = NULL;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001288
Fred Drake85d835f2001-02-08 15:39:08 +00001289 if (self->handlers != NULL) {
Fred Drakecde79132001-04-25 16:01:30 +00001290 PyObject *temp;
Fred Drake85d835f2001-02-08 15:39:08 +00001291 for (i = 0; handler_info[i].name != NULL; i++) {
Fred Drakecde79132001-04-25 16:01:30 +00001292 temp = self->handlers[i];
1293 self->handlers[i] = NULL;
1294 Py_XDECREF(temp);
Fred Drake85d835f2001-02-08 15:39:08 +00001295 }
1296 free(self->handlers);
Fred Drake71b63ff2002-06-28 22:29:01 +00001297 self->handlers = NULL;
Fred Drake0582df92000-07-12 04:49:00 +00001298 }
Fred Drake2a3d7db2002-06-28 22:56:48 +00001299 if (self->buffer != NULL) {
1300 free(self->buffer);
1301 self->buffer = NULL;
1302 }
Fred Drakeb91a36b2002-06-27 19:40:48 +00001303 Py_XDECREF(self->intern);
Martin v. Löwis894258c2001-09-23 10:20:10 +00001304#ifndef Py_TPFLAGS_HAVE_GC
Martin v. Löwisb4fcf4d2002-06-30 06:40:55 +00001305 /* Code for versions 2.0 and 2.1 */
Fred Drake0582df92000-07-12 04:49:00 +00001306 PyObject_Del(self);
Martin v. Löwis894258c2001-09-23 10:20:10 +00001307#else
1308 /* Code for versions 2.2 and later. */
1309 PyObject_GC_Del(self);
1310#endif
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001311}
1312
Fred Drake0582df92000-07-12 04:49:00 +00001313static int
1314handlername2int(const char *name)
1315{
1316 int i;
Fred Drake71b63ff2002-06-28 22:29:01 +00001317 for (i = 0; handler_info[i].name != NULL; i++) {
Fred Drake0582df92000-07-12 04:49:00 +00001318 if (strcmp(name, handler_info[i].name) == 0) {
1319 return i;
1320 }
1321 }
1322 return -1;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001323}
1324
1325static PyObject *
Fred Drake71b63ff2002-06-28 22:29:01 +00001326get_pybool(int istrue)
1327{
1328 PyObject *result = istrue ? Py_True : Py_False;
1329 Py_INCREF(result);
1330 return result;
1331}
1332
1333static PyObject *
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001334xmlparse_getattro(xmlparseobject *self, PyObject *nameobj)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001335{
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001336 char *name = "";
1337 int handlernum = -1;
1338
1339 if (PyUnicode_Check(nameobj))
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001340 name = _PyUnicode_AsString(nameobj);
1341
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001342 handlernum = handlername2int(name);
Fred Drake71b63ff2002-06-28 22:29:01 +00001343
1344 if (handlernum != -1) {
1345 PyObject *result = self->handlers[handlernum];
1346 if (result == NULL)
1347 result = Py_None;
1348 Py_INCREF(result);
1349 return result;
1350 }
1351 if (name[0] == 'E') {
1352 if (strcmp(name, "ErrorCode") == 0)
Christian Heimes217cfd12007-12-02 14:31:20 +00001353 return PyLong_FromLong((long)
Fred Drake71b63ff2002-06-28 22:29:01 +00001354 XML_GetErrorCode(self->itself));
1355 if (strcmp(name, "ErrorLineNumber") == 0)
Christian Heimes217cfd12007-12-02 14:31:20 +00001356 return PyLong_FromLong((long)
Fred Drake71b63ff2002-06-28 22:29:01 +00001357 XML_GetErrorLineNumber(self->itself));
1358 if (strcmp(name, "ErrorColumnNumber") == 0)
Christian Heimes217cfd12007-12-02 14:31:20 +00001359 return PyLong_FromLong((long)
Fred Drake71b63ff2002-06-28 22:29:01 +00001360 XML_GetErrorColumnNumber(self->itself));
1361 if (strcmp(name, "ErrorByteIndex") == 0)
Christian Heimes217cfd12007-12-02 14:31:20 +00001362 return PyLong_FromLong((long)
Fred Drake71b63ff2002-06-28 22:29:01 +00001363 XML_GetErrorByteIndex(self->itself));
1364 }
Dave Cole3203efb2004-08-26 00:37:31 +00001365 if (name[0] == 'C') {
1366 if (strcmp(name, "CurrentLineNumber") == 0)
Christian Heimes217cfd12007-12-02 14:31:20 +00001367 return PyLong_FromLong((long)
Dave Cole3203efb2004-08-26 00:37:31 +00001368 XML_GetCurrentLineNumber(self->itself));
1369 if (strcmp(name, "CurrentColumnNumber") == 0)
Christian Heimes217cfd12007-12-02 14:31:20 +00001370 return PyLong_FromLong((long)
Dave Cole3203efb2004-08-26 00:37:31 +00001371 XML_GetCurrentColumnNumber(self->itself));
1372 if (strcmp(name, "CurrentByteIndex") == 0)
Christian Heimes217cfd12007-12-02 14:31:20 +00001373 return PyLong_FromLong((long)
Dave Cole3203efb2004-08-26 00:37:31 +00001374 XML_GetCurrentByteIndex(self->itself));
1375 }
Fred Drake2a3d7db2002-06-28 22:56:48 +00001376 if (name[0] == 'b') {
1377 if (strcmp(name, "buffer_size") == 0)
Christian Heimes217cfd12007-12-02 14:31:20 +00001378 return PyLong_FromLong((long) self->buffer_size);
Fred Drake2a3d7db2002-06-28 22:56:48 +00001379 if (strcmp(name, "buffer_text") == 0)
1380 return get_pybool(self->buffer != NULL);
1381 if (strcmp(name, "buffer_used") == 0)
Christian Heimes217cfd12007-12-02 14:31:20 +00001382 return PyLong_FromLong((long) self->buffer_used);
Fred Drake2a3d7db2002-06-28 22:56:48 +00001383 }
Martin v. Löwis069dde22003-01-21 10:58:18 +00001384 if (strcmp(name, "namespace_prefixes") == 0)
1385 return get_pybool(self->ns_prefixes);
Fred Drake85d835f2001-02-08 15:39:08 +00001386 if (strcmp(name, "ordered_attributes") == 0)
Fred Drake71b63ff2002-06-28 22:29:01 +00001387 return get_pybool(self->ordered_attributes);
Fred Drake85d835f2001-02-08 15:39:08 +00001388 if (strcmp(name, "specified_attributes") == 0)
Fred Drake71b63ff2002-06-28 22:29:01 +00001389 return get_pybool((long) self->specified_attributes);
Fred Drakeb91a36b2002-06-27 19:40:48 +00001390 if (strcmp(name, "intern") == 0) {
1391 if (self->intern == NULL) {
1392 Py_INCREF(Py_None);
1393 return Py_None;
1394 }
1395 else {
1396 Py_INCREF(self->intern);
1397 return self->intern;
1398 }
1399 }
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001400
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001401 return PyObject_GenericGetAttr((PyObject*)self, nameobj);
Neal Norwitz8dfc4a92007-08-11 06:39:53 +00001402}
1403
1404static PyObject *
1405xmlparse_dir(PyObject *self, PyObject* noargs)
1406{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001407#define APPEND(list, str) \
1408 do { \
1409 PyObject *o = PyUnicode_FromString(str); \
1410 if (o != NULL) \
1411 PyList_Append(list, o); \
1412 Py_XDECREF(o); \
Martin v. Löwis069dde22003-01-21 10:58:18 +00001413 } while (0)
Neal Norwitzfa56e2d2003-01-19 15:40:09 +00001414
Neal Norwitz8dfc4a92007-08-11 06:39:53 +00001415 int i;
1416 PyObject *rc = PyList_New(0);
1417 if (!rc)
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001418 return NULL;
Neal Norwitz8dfc4a92007-08-11 06:39:53 +00001419 for (i = 0; handler_info[i].name != NULL; i++) {
1420 PyObject *o = get_handler_name(&handler_info[i]);
1421 if (o != NULL)
1422 PyList_Append(rc, o);
1423 Py_XDECREF(o);
1424 }
1425 APPEND(rc, "ErrorCode");
1426 APPEND(rc, "ErrorLineNumber");
1427 APPEND(rc, "ErrorColumnNumber");
1428 APPEND(rc, "ErrorByteIndex");
1429 APPEND(rc, "CurrentLineNumber");
1430 APPEND(rc, "CurrentColumnNumber");
1431 APPEND(rc, "CurrentByteIndex");
1432 APPEND(rc, "buffer_size");
1433 APPEND(rc, "buffer_text");
1434 APPEND(rc, "buffer_used");
1435 APPEND(rc, "namespace_prefixes");
1436 APPEND(rc, "ordered_attributes");
1437 APPEND(rc, "specified_attributes");
1438 APPEND(rc, "intern");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001439
Neal Norwitzfa56e2d2003-01-19 15:40:09 +00001440#undef APPEND
Neal Norwitz8dfc4a92007-08-11 06:39:53 +00001441
1442 if (PyErr_Occurred()) {
1443 Py_DECREF(rc);
1444 rc = NULL;
Fred Drake0582df92000-07-12 04:49:00 +00001445 }
Neal Norwitz8dfc4a92007-08-11 06:39:53 +00001446
1447 return rc;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001448}
1449
Fred Drake6f987622000-08-25 18:03:30 +00001450static int
1451sethandler(xmlparseobject *self, const char *name, PyObject* v)
Fred Drake0582df92000-07-12 04:49:00 +00001452{
1453 int handlernum = handlername2int(name);
Fred Drake71b63ff2002-06-28 22:29:01 +00001454 if (handlernum >= 0) {
1455 xmlhandler c_handler = NULL;
1456 PyObject *temp = self->handlers[handlernum];
1457
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001458 if (v == Py_None) {
1459 /* If this is the character data handler, and a character
1460 data handler is already active, we need to be more
1461 careful. What we can safely do is replace the existing
1462 character data handler callback function with a no-op
1463 function that will refuse to call Python. The downside
1464 is that this doesn't completely remove the character
1465 data handler from the C layer if there's any callback
1466 active, so Expat does a little more work than it
1467 otherwise would, but that's really an odd case. A more
1468 elaborate system of handlers and state could remove the
1469 C handler more effectively. */
1470 if (handlernum == CharacterData && self->in_callback)
1471 c_handler = noop_character_data_handler;
Fred Drake71b63ff2002-06-28 22:29:01 +00001472 v = NULL;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001473 }
Fred Drake71b63ff2002-06-28 22:29:01 +00001474 else if (v != NULL) {
1475 Py_INCREF(v);
1476 c_handler = handler_info[handlernum].handler;
1477 }
Fred Drake0582df92000-07-12 04:49:00 +00001478 self->handlers[handlernum] = v;
Fred Drake71b63ff2002-06-28 22:29:01 +00001479 Py_XDECREF(temp);
1480 handler_info[handlernum].setter(self->itself, c_handler);
Fred Drake0582df92000-07-12 04:49:00 +00001481 return 1;
1482 }
1483 return 0;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001484}
1485
1486static int
Fred Drake6f987622000-08-25 18:03:30 +00001487xmlparse_setattr(xmlparseobject *self, char *name, PyObject *v)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001488{
Fred Drake6f987622000-08-25 18:03:30 +00001489 /* Set attribute 'name' to value 'v'. v==NULL means delete */
Fred Drake85d835f2001-02-08 15:39:08 +00001490 if (v == NULL) {
Fred Drake6f987622000-08-25 18:03:30 +00001491 PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
1492 return -1;
1493 }
Fred Drake2a3d7db2002-06-28 22:56:48 +00001494 if (strcmp(name, "buffer_text") == 0) {
1495 if (PyObject_IsTrue(v)) {
1496 if (self->buffer == NULL) {
1497 self->buffer = malloc(self->buffer_size);
1498 if (self->buffer == NULL) {
1499 PyErr_NoMemory();
1500 return -1;
1501 }
1502 self->buffer_used = 0;
1503 }
1504 }
1505 else if (self->buffer != NULL) {
1506 if (flush_character_buffer(self) < 0)
1507 return -1;
1508 free(self->buffer);
1509 self->buffer = NULL;
1510 }
1511 return 0;
1512 }
Martin v. Löwis069dde22003-01-21 10:58:18 +00001513 if (strcmp(name, "namespace_prefixes") == 0) {
1514 if (PyObject_IsTrue(v))
1515 self->ns_prefixes = 1;
1516 else
1517 self->ns_prefixes = 0;
1518 XML_SetReturnNSTriplet(self->itself, self->ns_prefixes);
1519 return 0;
1520 }
Fred Drake85d835f2001-02-08 15:39:08 +00001521 if (strcmp(name, "ordered_attributes") == 0) {
1522 if (PyObject_IsTrue(v))
1523 self->ordered_attributes = 1;
1524 else
1525 self->ordered_attributes = 0;
1526 return 0;
1527 }
Fred Drake85d835f2001-02-08 15:39:08 +00001528 if (strcmp(name, "specified_attributes") == 0) {
1529 if (PyObject_IsTrue(v))
1530 self->specified_attributes = 1;
1531 else
1532 self->specified_attributes = 0;
Fred Drake6f987622000-08-25 18:03:30 +00001533 return 0;
1534 }
Christian Heimes2380ac72008-01-09 00:17:24 +00001535
1536 if (strcmp(name, "buffer_size") == 0) {
1537 long new_buffer_size;
1538 if (!PyLong_Check(v)) {
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001539 PyErr_SetString(PyExc_TypeError, "buffer_size must be an integer");
1540 return -1;
Christian Heimes2380ac72008-01-09 00:17:24 +00001541 }
1542
1543 new_buffer_size=PyLong_AS_LONG(v);
1544 /* trivial case -- no change */
1545 if (new_buffer_size == self->buffer_size) {
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001546 return 0;
Christian Heimes2380ac72008-01-09 00:17:24 +00001547 }
1548
1549 if (new_buffer_size <= 0) {
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001550 PyErr_SetString(PyExc_ValueError, "buffer_size must be greater than zero");
1551 return -1;
Christian Heimes2380ac72008-01-09 00:17:24 +00001552 }
1553
1554 /* check maximum */
1555 if (new_buffer_size > INT_MAX) {
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001556 char errmsg[100];
1557 sprintf(errmsg, "buffer_size must not be greater than %i", INT_MAX);
1558 PyErr_SetString(PyExc_ValueError, errmsg);
1559 return -1;
Christian Heimes2380ac72008-01-09 00:17:24 +00001560 }
1561
1562 if (self->buffer != NULL) {
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001563 /* there is already a buffer */
1564 if (self->buffer_used != 0) {
1565 flush_character_buffer(self);
1566 }
1567 /* free existing buffer */
1568 free(self->buffer);
Christian Heimes2380ac72008-01-09 00:17:24 +00001569 }
1570 self->buffer = malloc(new_buffer_size);
1571 if (self->buffer == NULL) {
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001572 PyErr_NoMemory();
1573 return -1;
1574 }
Christian Heimes2380ac72008-01-09 00:17:24 +00001575 self->buffer_size = new_buffer_size;
1576 return 0;
1577 }
1578
Fred Drake2a3d7db2002-06-28 22:56:48 +00001579 if (strcmp(name, "CharacterDataHandler") == 0) {
1580 /* If we're changing the character data handler, flush all
1581 * cached data with the old handler. Not sure there's a
1582 * "right" thing to do, though, but this probably won't
1583 * happen.
1584 */
1585 if (flush_character_buffer(self) < 0)
1586 return -1;
1587 }
Fred Drake6f987622000-08-25 18:03:30 +00001588 if (sethandler(self, name, v)) {
1589 return 0;
1590 }
1591 PyErr_SetString(PyExc_AttributeError, name);
1592 return -1;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001593}
1594
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001595static int
1596xmlparse_traverse(xmlparseobject *op, visitproc visit, void *arg)
1597{
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001598 int i;
1599 for (i = 0; handler_info[i].name != NULL; i++)
1600 Py_VISIT(op->handlers[i]);
Fred Drakecde79132001-04-25 16:01:30 +00001601 return 0;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001602}
1603
1604static int
1605xmlparse_clear(xmlparseobject *op)
1606{
Martin v. Löwis5b68ce32001-10-21 08:53:52 +00001607 clear_handlers(op, 0);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001608 Py_CLEAR(op->intern);
Fred Drakecde79132001-04-25 16:01:30 +00001609 return 0;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001610}
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001611
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001612PyDoc_STRVAR(Xmlparsetype__doc__, "XML parser");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001613
1614static PyTypeObject Xmlparsetype = {
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001615 PyVarObject_HEAD_INIT(NULL, 0)
1616 "pyexpat.xmlparser", /*tp_name*/
1617 sizeof(xmlparseobject) + PyGC_HEAD_SIZE,/*tp_basicsize*/
1618 0, /*tp_itemsize*/
1619 /* methods */
1620 (destructor)xmlparse_dealloc, /*tp_dealloc*/
1621 (printfunc)0, /*tp_print*/
1622 0, /*tp_getattr*/
1623 (setattrfunc)xmlparse_setattr, /*tp_setattr*/
1624 0, /*tp_reserved*/
1625 (reprfunc)0, /*tp_repr*/
1626 0, /*tp_as_number*/
1627 0, /*tp_as_sequence*/
1628 0, /*tp_as_mapping*/
1629 (hashfunc)0, /*tp_hash*/
1630 (ternaryfunc)0, /*tp_call*/
1631 (reprfunc)0, /*tp_str*/
1632 (getattrofunc)xmlparse_getattro, /* tp_getattro */
1633 0, /* tp_setattro */
1634 0, /* tp_as_buffer */
Martin v. Löwis894258c2001-09-23 10:20:10 +00001635#ifdef Py_TPFLAGS_HAVE_GC
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001636 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /*tp_flags*/
Martin v. Löwis894258c2001-09-23 10:20:10 +00001637#else
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001638 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_GC, /*tp_flags*/
Martin v. Löwis894258c2001-09-23 10:20:10 +00001639#endif
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001640 Xmlparsetype__doc__, /* tp_doc - Documentation string */
1641 (traverseproc)xmlparse_traverse, /* tp_traverse */
1642 (inquiry)xmlparse_clear, /* tp_clear */
1643 0, /* tp_richcompare */
1644 0, /* tp_weaklistoffset */
1645 0, /* tp_iter */
1646 0, /* tp_iternext */
1647 xmlparse_methods, /* tp_methods */
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001648};
1649
1650/* End of code for xmlparser objects */
1651/* -------------------------------------------------------- */
1652
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001653PyDoc_STRVAR(pyexpat_ParserCreate__doc__,
Fred Drake0582df92000-07-12 04:49:00 +00001654"ParserCreate([encoding[, namespace_separator]]) -> parser\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001655Return a new XML parser object.");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001656
1657static PyObject *
Fred Drake0582df92000-07-12 04:49:00 +00001658pyexpat_ParserCreate(PyObject *notused, PyObject *args, PyObject *kw)
1659{
Fred Drakecde79132001-04-25 16:01:30 +00001660 char *encoding = NULL;
1661 char *namespace_separator = NULL;
Fred Drakeb91a36b2002-06-27 19:40:48 +00001662 PyObject *intern = NULL;
1663 PyObject *result;
1664 int intern_decref = 0;
Martin v. Löwis15e62742006-02-27 16:46:16 +00001665 static char *kwlist[] = {"encoding", "namespace_separator",
Jeremy Hyltonaf68c872005-12-10 18:50:16 +00001666 "intern", NULL};
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001667
Fred Drakeb91a36b2002-06-27 19:40:48 +00001668 if (!PyArg_ParseTupleAndKeywords(args, kw, "|zzO:ParserCreate", kwlist,
1669 &encoding, &namespace_separator, &intern))
Fred Drakecde79132001-04-25 16:01:30 +00001670 return NULL;
1671 if (namespace_separator != NULL
1672 && strlen(namespace_separator) > 1) {
1673 PyErr_SetString(PyExc_ValueError,
1674 "namespace_separator must be at most one"
1675 " character, omitted, or None");
1676 return NULL;
1677 }
Fred Drakeb91a36b2002-06-27 19:40:48 +00001678 /* Explicitly passing None means no interning is desired.
1679 Not passing anything means that a new dictionary is used. */
1680 if (intern == Py_None)
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001681 intern = NULL;
Fred Drakeb91a36b2002-06-27 19:40:48 +00001682 else if (intern == NULL) {
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001683 intern = PyDict_New();
1684 if (!intern)
1685 return NULL;
1686 intern_decref = 1;
Fred Drake71b63ff2002-06-28 22:29:01 +00001687 }
Fred Drakeb91a36b2002-06-27 19:40:48 +00001688 else if (!PyDict_Check(intern)) {
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001689 PyErr_SetString(PyExc_TypeError, "intern must be a dictionary");
1690 return NULL;
Fred Drakeb91a36b2002-06-27 19:40:48 +00001691 }
1692
1693 result = newxmlparseobject(encoding, namespace_separator, intern);
1694 if (intern_decref) {
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001695 Py_DECREF(intern);
Fred Drakeb91a36b2002-06-27 19:40:48 +00001696 }
1697 return result;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001698}
1699
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001700PyDoc_STRVAR(pyexpat_ErrorString__doc__,
Fred Drake0582df92000-07-12 04:49:00 +00001701"ErrorString(errno) -> string\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001702Returns string error for given number.");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001703
1704static PyObject *
Fred Drake0582df92000-07-12 04:49:00 +00001705pyexpat_ErrorString(PyObject *self, PyObject *args)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001706{
Fred Drake0582df92000-07-12 04:49:00 +00001707 long code = 0;
1708
1709 if (!PyArg_ParseTuple(args, "l:ErrorString", &code))
1710 return NULL;
1711 return Py_BuildValue("z", XML_ErrorString((int)code));
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001712}
1713
1714/* List of methods defined in the module */
1715
1716static struct PyMethodDef pyexpat_methods[] = {
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001717 {"ParserCreate", (PyCFunction)pyexpat_ParserCreate,
Fred Drake0582df92000-07-12 04:49:00 +00001718 METH_VARARGS|METH_KEYWORDS, pyexpat_ParserCreate__doc__},
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001719 {"ErrorString", (PyCFunction)pyexpat_ErrorString,
1720 METH_VARARGS, pyexpat_ErrorString__doc__},
Fred Drake71b63ff2002-06-28 22:29:01 +00001721
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001722 {NULL, (PyCFunction)NULL, 0, NULL} /* sentinel */
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001723};
1724
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +00001725/* Module docstring */
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001726
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001727PyDoc_STRVAR(pyexpat_module_documentation,
1728"Python wrapper for Expat parser.");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001729
Fred Drake4113b132001-03-24 19:58:26 +00001730/* Return a Python string that represents the version number without the
1731 * extra cruft added by revision control, even if the right options were
1732 * given to the "cvs export" command to make it not include the extra
1733 * cruft.
1734 */
1735static PyObject *
1736get_version_string(void)
1737{
1738 static char *rcsid = "$Revision$";
1739 char *rev = rcsid;
1740 int i = 0;
1741
Neal Norwitz30b5c5d2005-12-19 06:05:18 +00001742 while (!isdigit(Py_CHARMASK(*rev)))
Fred Drake4113b132001-03-24 19:58:26 +00001743 ++rev;
1744 while (rev[i] != ' ' && rev[i] != '\0')
1745 ++i;
1746
Neal Norwitz392c5be2007-08-25 17:20:32 +00001747 return PyUnicode_FromStringAndSize(rev, i);
Fred Drake4113b132001-03-24 19:58:26 +00001748}
1749
Fred Drakecde79132001-04-25 16:01:30 +00001750/* Initialization function for the module */
1751
1752#ifndef MODULE_NAME
1753#define MODULE_NAME "pyexpat"
1754#endif
1755
1756#ifndef MODULE_INITFUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00001757#define MODULE_INITFUNC PyInit_pyexpat
Fred Drakecde79132001-04-25 16:01:30 +00001758#endif
1759
Martin v. Löwis069dde22003-01-21 10:58:18 +00001760#ifndef PyMODINIT_FUNC
1761# ifdef MS_WINDOWS
1762# define PyMODINIT_FUNC __declspec(dllexport) void
1763# else
1764# define PyMODINIT_FUNC void
1765# endif
1766#endif
1767
Mark Hammond8235ea12002-07-19 06:55:41 +00001768PyMODINIT_FUNC MODULE_INITFUNC(void); /* avoid compiler warnings */
Fred Drakecde79132001-04-25 16:01:30 +00001769
Martin v. Löwis1a214512008-06-11 05:26:20 +00001770static struct PyModuleDef pyexpatmodule = {
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001771 PyModuleDef_HEAD_INIT,
1772 MODULE_NAME,
1773 pyexpat_module_documentation,
1774 -1,
1775 pyexpat_methods,
1776 NULL,
1777 NULL,
1778 NULL,
1779 NULL
Martin v. Löwis1a214512008-06-11 05:26:20 +00001780};
1781
Martin v. Löwis069dde22003-01-21 10:58:18 +00001782PyMODINIT_FUNC
1783MODULE_INITFUNC(void)
Fred Drake0582df92000-07-12 04:49:00 +00001784{
1785 PyObject *m, *d;
Neal Norwitz392c5be2007-08-25 17:20:32 +00001786 PyObject *errmod_name = PyUnicode_FromString(MODULE_NAME ".errors");
Fred Drake85d835f2001-02-08 15:39:08 +00001787 PyObject *errors_module;
1788 PyObject *modelmod_name;
1789 PyObject *model_module;
Fred Drake0582df92000-07-12 04:49:00 +00001790 PyObject *sys_modules;
Fredrik Lundhd7a42882005-12-13 20:43:04 +00001791 static struct PyExpat_CAPI capi;
1792 PyObject* capi_object;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001793
Fred Drake6f987622000-08-25 18:03:30 +00001794 if (errmod_name == NULL)
Martin v. Löwis1a214512008-06-11 05:26:20 +00001795 return NULL;
Neal Norwitz392c5be2007-08-25 17:20:32 +00001796 modelmod_name = PyUnicode_FromString(MODULE_NAME ".model");
Fred Drake85d835f2001-02-08 15:39:08 +00001797 if (modelmod_name == NULL)
Martin v. Löwis1a214512008-06-11 05:26:20 +00001798 return NULL;
Fred Drake6f987622000-08-25 18:03:30 +00001799
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001800 if (PyType_Ready(&Xmlparsetype) < 0)
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001801 return NULL;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001802
Fred Drake0582df92000-07-12 04:49:00 +00001803 /* Create the module and add the functions */
Martin v. Löwis1a214512008-06-11 05:26:20 +00001804 m = PyModule_Create(&pyexpatmodule);
Neal Norwitz1ac754f2006-01-19 06:09:39 +00001805 if (m == NULL)
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001806 return NULL;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001807
Fred Drake0582df92000-07-12 04:49:00 +00001808 /* Add some symbolic constants to the module */
Fred Drakebd6101c2001-02-14 18:29:45 +00001809 if (ErrorObject == NULL) {
1810 ErrorObject = PyErr_NewException("xml.parsers.expat.ExpatError",
Fred Drake93adb692000-09-23 04:55:48 +00001811 NULL, NULL);
Fred Drakebd6101c2001-02-14 18:29:45 +00001812 if (ErrorObject == NULL)
Martin v. Löwis1a214512008-06-11 05:26:20 +00001813 return NULL;
Fred Drakebd6101c2001-02-14 18:29:45 +00001814 }
1815 Py_INCREF(ErrorObject);
Fred Drake93adb692000-09-23 04:55:48 +00001816 PyModule_AddObject(m, "error", ErrorObject);
Fred Drakebd6101c2001-02-14 18:29:45 +00001817 Py_INCREF(ErrorObject);
1818 PyModule_AddObject(m, "ExpatError", ErrorObject);
Fred Drake4ba298c2000-10-29 04:57:53 +00001819 Py_INCREF(&Xmlparsetype);
1820 PyModule_AddObject(m, "XMLParserType", (PyObject *) &Xmlparsetype);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001821
Fred Drake4113b132001-03-24 19:58:26 +00001822 PyModule_AddObject(m, "__version__", get_version_string());
Fred Drake738293d2000-12-21 17:25:07 +00001823 PyModule_AddStringConstant(m, "EXPAT_VERSION",
1824 (char *) XML_ExpatVersion());
Fred Drake85d835f2001-02-08 15:39:08 +00001825 {
1826 XML_Expat_Version info = XML_ExpatVersionInfo();
1827 PyModule_AddObject(m, "version_info",
1828 Py_BuildValue("(iii)", info.major,
1829 info.minor, info.micro));
1830 }
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001831 init_template_buffer();
Fred Drake0582df92000-07-12 04:49:00 +00001832 /* XXX When Expat supports some way of figuring out how it was
Fred Drake71b63ff2002-06-28 22:29:01 +00001833 compiled, this should check and set native_encoding
1834 appropriately.
Fred Drake0582df92000-07-12 04:49:00 +00001835 */
Fred Drake93adb692000-09-23 04:55:48 +00001836 PyModule_AddStringConstant(m, "native_encoding", "UTF-8");
Fred Drakec23b5232000-08-24 21:57:43 +00001837
Fred Drake85d835f2001-02-08 15:39:08 +00001838 sys_modules = PySys_GetObject("modules");
Fred Drake93adb692000-09-23 04:55:48 +00001839 d = PyModule_GetDict(m);
Fred Drake6f987622000-08-25 18:03:30 +00001840 errors_module = PyDict_GetItem(d, errmod_name);
1841 if (errors_module == NULL) {
Fred Drakecde79132001-04-25 16:01:30 +00001842 errors_module = PyModule_New(MODULE_NAME ".errors");
Fred Drake6f987622000-08-25 18:03:30 +00001843 if (errors_module != NULL) {
Fred Drake6f987622000-08-25 18:03:30 +00001844 PyDict_SetItem(sys_modules, errmod_name, errors_module);
Fred Drake93adb692000-09-23 04:55:48 +00001845 /* gives away the reference to errors_module */
1846 PyModule_AddObject(m, "errors", errors_module);
Fred Drakec23b5232000-08-24 21:57:43 +00001847 }
1848 }
Fred Drake6f987622000-08-25 18:03:30 +00001849 Py_DECREF(errmod_name);
Fred Drake85d835f2001-02-08 15:39:08 +00001850 model_module = PyDict_GetItem(d, modelmod_name);
1851 if (model_module == NULL) {
Fred Drakecde79132001-04-25 16:01:30 +00001852 model_module = PyModule_New(MODULE_NAME ".model");
Fred Drake85d835f2001-02-08 15:39:08 +00001853 if (model_module != NULL) {
1854 PyDict_SetItem(sys_modules, modelmod_name, model_module);
1855 /* gives away the reference to model_module */
1856 PyModule_AddObject(m, "model", model_module);
1857 }
1858 }
1859 Py_DECREF(modelmod_name);
1860 if (errors_module == NULL || model_module == NULL)
1861 /* Don't core dump later! */
Martin v. Löwis1a214512008-06-11 05:26:20 +00001862 return NULL;
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001863
Martin v. Löwisc847f402003-01-21 11:09:21 +00001864#if XML_COMBINED_VERSION > 19505
Martin v. Löwis069dde22003-01-21 10:58:18 +00001865 {
1866 const XML_Feature *features = XML_GetFeatureList();
1867 PyObject *list = PyList_New(0);
1868 if (list == NULL)
1869 /* just ignore it */
1870 PyErr_Clear();
1871 else {
1872 int i = 0;
1873 for (; features[i].feature != XML_FEATURE_END; ++i) {
1874 int ok;
1875 PyObject *item = Py_BuildValue("si", features[i].name,
1876 features[i].value);
1877 if (item == NULL) {
1878 Py_DECREF(list);
1879 list = NULL;
1880 break;
1881 }
1882 ok = PyList_Append(list, item);
1883 Py_DECREF(item);
1884 if (ok < 0) {
1885 PyErr_Clear();
1886 break;
1887 }
1888 }
1889 if (list != NULL)
1890 PyModule_AddObject(m, "features", list);
1891 }
1892 }
Martin v. Löwisc847f402003-01-21 11:09:21 +00001893#endif
Fred Drake6f987622000-08-25 18:03:30 +00001894
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001895#define MYCONST(name) \
Fred Drake93adb692000-09-23 04:55:48 +00001896 PyModule_AddStringConstant(errors_module, #name, \
1897 (char*)XML_ErrorString(name))
Fred Drake7bd9f412000-07-04 23:51:31 +00001898
Fred Drake0582df92000-07-12 04:49:00 +00001899 MYCONST(XML_ERROR_NO_MEMORY);
1900 MYCONST(XML_ERROR_SYNTAX);
1901 MYCONST(XML_ERROR_NO_ELEMENTS);
1902 MYCONST(XML_ERROR_INVALID_TOKEN);
1903 MYCONST(XML_ERROR_UNCLOSED_TOKEN);
1904 MYCONST(XML_ERROR_PARTIAL_CHAR);
1905 MYCONST(XML_ERROR_TAG_MISMATCH);
1906 MYCONST(XML_ERROR_DUPLICATE_ATTRIBUTE);
1907 MYCONST(XML_ERROR_JUNK_AFTER_DOC_ELEMENT);
1908 MYCONST(XML_ERROR_PARAM_ENTITY_REF);
1909 MYCONST(XML_ERROR_UNDEFINED_ENTITY);
1910 MYCONST(XML_ERROR_RECURSIVE_ENTITY_REF);
1911 MYCONST(XML_ERROR_ASYNC_ENTITY);
1912 MYCONST(XML_ERROR_BAD_CHAR_REF);
1913 MYCONST(XML_ERROR_BINARY_ENTITY_REF);
1914 MYCONST(XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF);
1915 MYCONST(XML_ERROR_MISPLACED_XML_PI);
1916 MYCONST(XML_ERROR_UNKNOWN_ENCODING);
1917 MYCONST(XML_ERROR_INCORRECT_ENCODING);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001918 MYCONST(XML_ERROR_UNCLOSED_CDATA_SECTION);
1919 MYCONST(XML_ERROR_EXTERNAL_ENTITY_HANDLING);
1920 MYCONST(XML_ERROR_NOT_STANDALONE);
Fred Drake283b6702004-08-04 22:28:16 +00001921 MYCONST(XML_ERROR_UNEXPECTED_STATE);
1922 MYCONST(XML_ERROR_ENTITY_DECLARED_IN_PE);
1923 MYCONST(XML_ERROR_FEATURE_REQUIRES_XML_DTD);
1924 MYCONST(XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING);
1925 /* Added in Expat 1.95.7. */
1926 MYCONST(XML_ERROR_UNBOUND_PREFIX);
1927 /* Added in Expat 1.95.8. */
1928 MYCONST(XML_ERROR_UNDECLARING_PREFIX);
1929 MYCONST(XML_ERROR_INCOMPLETE_PE);
1930 MYCONST(XML_ERROR_XML_DECL);
1931 MYCONST(XML_ERROR_TEXT_DECL);
1932 MYCONST(XML_ERROR_PUBLICID);
1933 MYCONST(XML_ERROR_SUSPENDED);
1934 MYCONST(XML_ERROR_NOT_SUSPENDED);
1935 MYCONST(XML_ERROR_ABORTED);
1936 MYCONST(XML_ERROR_FINISHED);
1937 MYCONST(XML_ERROR_SUSPEND_PE);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001938
Fred Drake85d835f2001-02-08 15:39:08 +00001939 PyModule_AddStringConstant(errors_module, "__doc__",
1940 "Constants used to describe error conditions.");
1941
Fred Drake93adb692000-09-23 04:55:48 +00001942#undef MYCONST
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001943
Fred Drake85d835f2001-02-08 15:39:08 +00001944#define MYCONST(c) PyModule_AddIntConstant(m, #c, c)
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001945 MYCONST(XML_PARAM_ENTITY_PARSING_NEVER);
1946 MYCONST(XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE);
1947 MYCONST(XML_PARAM_ENTITY_PARSING_ALWAYS);
Fred Drake85d835f2001-02-08 15:39:08 +00001948#undef MYCONST
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001949
Fred Drake85d835f2001-02-08 15:39:08 +00001950#define MYCONST(c) PyModule_AddIntConstant(model_module, #c, c)
1951 PyModule_AddStringConstant(model_module, "__doc__",
1952 "Constants used to interpret content model information.");
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001953
Fred Drake85d835f2001-02-08 15:39:08 +00001954 MYCONST(XML_CTYPE_EMPTY);
1955 MYCONST(XML_CTYPE_ANY);
1956 MYCONST(XML_CTYPE_MIXED);
1957 MYCONST(XML_CTYPE_NAME);
1958 MYCONST(XML_CTYPE_CHOICE);
1959 MYCONST(XML_CTYPE_SEQ);
1960
1961 MYCONST(XML_CQUANT_NONE);
1962 MYCONST(XML_CQUANT_OPT);
1963 MYCONST(XML_CQUANT_REP);
1964 MYCONST(XML_CQUANT_PLUS);
1965#undef MYCONST
Fredrik Lundhc3345042005-12-13 19:49:55 +00001966
1967 /* initialize pyexpat dispatch table */
Fredrik Lundhd7a42882005-12-13 20:43:04 +00001968 capi.size = sizeof(capi);
Fredrik Lundhcc117db2005-12-13 21:55:36 +00001969 capi.magic = PyExpat_CAPI_MAGIC;
Fredrik Lundhd7a42882005-12-13 20:43:04 +00001970 capi.MAJOR_VERSION = XML_MAJOR_VERSION;
1971 capi.MINOR_VERSION = XML_MINOR_VERSION;
1972 capi.MICRO_VERSION = XML_MICRO_VERSION;
1973 capi.ErrorString = XML_ErrorString;
Fredrik Lundhcc117db2005-12-13 21:55:36 +00001974 capi.GetErrorCode = XML_GetErrorCode;
1975 capi.GetErrorColumnNumber = XML_GetErrorColumnNumber;
1976 capi.GetErrorLineNumber = XML_GetErrorLineNumber;
Fredrik Lundhd7a42882005-12-13 20:43:04 +00001977 capi.Parse = XML_Parse;
1978 capi.ParserCreate_MM = XML_ParserCreate_MM;
1979 capi.ParserFree = XML_ParserFree;
1980 capi.SetCharacterDataHandler = XML_SetCharacterDataHandler;
1981 capi.SetCommentHandler = XML_SetCommentHandler;
1982 capi.SetDefaultHandlerExpand = XML_SetDefaultHandlerExpand;
1983 capi.SetElementHandler = XML_SetElementHandler;
1984 capi.SetNamespaceDeclHandler = XML_SetNamespaceDeclHandler;
1985 capi.SetProcessingInstructionHandler = XML_SetProcessingInstructionHandler;
1986 capi.SetUnknownEncodingHandler = XML_SetUnknownEncodingHandler;
1987 capi.SetUserData = XML_SetUserData;
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001988
Benjamin Petersonb173f782009-05-05 22:31:58 +00001989 /* export using capsule */
1990 capi_object = PyCapsule_New(&capi, PyExpat_CAPSULE_NAME, NULL);
Fredrik Lundhd7a42882005-12-13 20:43:04 +00001991 if (capi_object)
1992 PyModule_AddObject(m, "expat_CAPI", capi_object);
Martin v. Löwis1a214512008-06-11 05:26:20 +00001993 return m;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001994}
1995
Fred Drake6f987622000-08-25 18:03:30 +00001996static void
Martin v. Löwis5b68ce32001-10-21 08:53:52 +00001997clear_handlers(xmlparseobject *self, int initial)
Fred Drake0582df92000-07-12 04:49:00 +00001998{
Fred Drakecde79132001-04-25 16:01:30 +00001999 int i = 0;
2000 PyObject *temp;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00002001
Fred Drake71b63ff2002-06-28 22:29:01 +00002002 for (; handler_info[i].name != NULL; i++) {
Martin v. Löwis5b68ce32001-10-21 08:53:52 +00002003 if (initial)
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002004 self->handlers[i] = NULL;
2005 else {
Fred Drakecde79132001-04-25 16:01:30 +00002006 temp = self->handlers[i];
2007 self->handlers[i] = NULL;
2008 Py_XDECREF(temp);
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002009 handler_info[i].setter(self->itself, NULL);
Fred Drakecde79132001-04-25 16:01:30 +00002010 }
Fred Drakecde79132001-04-25 16:01:30 +00002011 }
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00002012}
2013
Tim Peters0c322792002-07-17 16:49:03 +00002014static struct HandlerInfo handler_info[] = {
Fred Drake71b63ff2002-06-28 22:29:01 +00002015 {"StartElementHandler",
2016 (xmlhandlersetter)XML_SetStartElementHandler,
Fred Drake0582df92000-07-12 04:49:00 +00002017 (xmlhandler)my_StartElementHandler},
Fred Drake71b63ff2002-06-28 22:29:01 +00002018 {"EndElementHandler",
2019 (xmlhandlersetter)XML_SetEndElementHandler,
Fred Drake0582df92000-07-12 04:49:00 +00002020 (xmlhandler)my_EndElementHandler},
Fred Drake71b63ff2002-06-28 22:29:01 +00002021 {"ProcessingInstructionHandler",
Fred Drake0582df92000-07-12 04:49:00 +00002022 (xmlhandlersetter)XML_SetProcessingInstructionHandler,
2023 (xmlhandler)my_ProcessingInstructionHandler},
Fred Drake71b63ff2002-06-28 22:29:01 +00002024 {"CharacterDataHandler",
Fred Drake0582df92000-07-12 04:49:00 +00002025 (xmlhandlersetter)XML_SetCharacterDataHandler,
2026 (xmlhandler)my_CharacterDataHandler},
Fred Drake71b63ff2002-06-28 22:29:01 +00002027 {"UnparsedEntityDeclHandler",
Fred Drake0582df92000-07-12 04:49:00 +00002028 (xmlhandlersetter)XML_SetUnparsedEntityDeclHandler,
Fred Drake2a3d7db2002-06-28 22:56:48 +00002029 (xmlhandler)my_UnparsedEntityDeclHandler},
Fred Drake71b63ff2002-06-28 22:29:01 +00002030 {"NotationDeclHandler",
Fred Drake0582df92000-07-12 04:49:00 +00002031 (xmlhandlersetter)XML_SetNotationDeclHandler,
Fred Drake2a3d7db2002-06-28 22:56:48 +00002032 (xmlhandler)my_NotationDeclHandler},
Fred Drake71b63ff2002-06-28 22:29:01 +00002033 {"StartNamespaceDeclHandler",
2034 (xmlhandlersetter)XML_SetStartNamespaceDeclHandler,
Fred Drake2a3d7db2002-06-28 22:56:48 +00002035 (xmlhandler)my_StartNamespaceDeclHandler},
Fred Drake71b63ff2002-06-28 22:29:01 +00002036 {"EndNamespaceDeclHandler",
2037 (xmlhandlersetter)XML_SetEndNamespaceDeclHandler,
Fred Drake2a3d7db2002-06-28 22:56:48 +00002038 (xmlhandler)my_EndNamespaceDeclHandler},
Fred Drake0582df92000-07-12 04:49:00 +00002039 {"CommentHandler",
2040 (xmlhandlersetter)XML_SetCommentHandler,
2041 (xmlhandler)my_CommentHandler},
2042 {"StartCdataSectionHandler",
Fred Drake71b63ff2002-06-28 22:29:01 +00002043 (xmlhandlersetter)XML_SetStartCdataSectionHandler,
Fred Drake0582df92000-07-12 04:49:00 +00002044 (xmlhandler)my_StartCdataSectionHandler},
2045 {"EndCdataSectionHandler",
Fred Drake71b63ff2002-06-28 22:29:01 +00002046 (xmlhandlersetter)XML_SetEndCdataSectionHandler,
Fred Drake0582df92000-07-12 04:49:00 +00002047 (xmlhandler)my_EndCdataSectionHandler},
2048 {"DefaultHandler",
2049 (xmlhandlersetter)XML_SetDefaultHandler,
2050 (xmlhandler)my_DefaultHandler},
2051 {"DefaultHandlerExpand",
2052 (xmlhandlersetter)XML_SetDefaultHandlerExpand,
2053 (xmlhandler)my_DefaultHandlerExpandHandler},
2054 {"NotStandaloneHandler",
2055 (xmlhandlersetter)XML_SetNotStandaloneHandler,
2056 (xmlhandler)my_NotStandaloneHandler},
2057 {"ExternalEntityRefHandler",
2058 (xmlhandlersetter)XML_SetExternalEntityRefHandler,
Fred Drake2a3d7db2002-06-28 22:56:48 +00002059 (xmlhandler)my_ExternalEntityRefHandler},
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00002060 {"StartDoctypeDeclHandler",
Fred Drake71b63ff2002-06-28 22:29:01 +00002061 (xmlhandlersetter)XML_SetStartDoctypeDeclHandler,
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00002062 (xmlhandler)my_StartDoctypeDeclHandler},
2063 {"EndDoctypeDeclHandler",
Fred Drake71b63ff2002-06-28 22:29:01 +00002064 (xmlhandlersetter)XML_SetEndDoctypeDeclHandler,
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00002065 (xmlhandler)my_EndDoctypeDeclHandler},
Fred Drake85d835f2001-02-08 15:39:08 +00002066 {"EntityDeclHandler",
2067 (xmlhandlersetter)XML_SetEntityDeclHandler,
2068 (xmlhandler)my_EntityDeclHandler},
2069 {"XmlDeclHandler",
2070 (xmlhandlersetter)XML_SetXmlDeclHandler,
2071 (xmlhandler)my_XmlDeclHandler},
2072 {"ElementDeclHandler",
2073 (xmlhandlersetter)XML_SetElementDeclHandler,
2074 (xmlhandler)my_ElementDeclHandler},
2075 {"AttlistDeclHandler",
2076 (xmlhandlersetter)XML_SetAttlistDeclHandler,
2077 (xmlhandler)my_AttlistDeclHandler},
Martin v. Löwisc847f402003-01-21 11:09:21 +00002078#if XML_COMBINED_VERSION >= 19504
Martin v. Löwis069dde22003-01-21 10:58:18 +00002079 {"SkippedEntityHandler",
2080 (xmlhandlersetter)XML_SetSkippedEntityHandler,
2081 (xmlhandler)my_SkippedEntityHandler},
Martin v. Löwisc847f402003-01-21 11:09:21 +00002082#endif
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00002083
Fred Drake0582df92000-07-12 04:49:00 +00002084 {NULL, NULL, NULL} /* sentinel */
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00002085};