blob: cc62274d1484c3489f6bb58e5a828e389f8aec39 [file] [log] [blame]
Martin v. Löwis7090ed12001-09-19 10:37:50 +00001#include "Python.h"
Fred Drake4113b132001-03-24 19:58:26 +00002#include <ctype.h>
3
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00004#include "frameobject.h"
Fred Drakea77254a2000-09-29 19:23:29 +00005#include "expat.h"
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00006
Fredrik Lundhc3345042005-12-13 19:49:55 +00007#include "pyexpat.h"
8
Martin v. Löwisc847f402003-01-21 11:09:21 +00009#define XML_COMBINED_VERSION (10000*XML_MAJOR_VERSION+100*XML_MINOR_VERSION+XML_MICRO_VERSION)
10
Jeremy Hylton9263f572003-06-27 16:13:17 +000011#define FIX_TRACE
Martin v. Löwis339d0f72001-08-17 18:39:25 +000012
Fred Drake0582df92000-07-12 04:49:00 +000013enum HandlerTypes {
14 StartElement,
15 EndElement,
16 ProcessingInstruction,
17 CharacterData,
18 UnparsedEntityDecl,
19 NotationDecl,
20 StartNamespaceDecl,
21 EndNamespaceDecl,
22 Comment,
23 StartCdataSection,
24 EndCdataSection,
25 Default,
26 DefaultHandlerExpand,
27 NotStandalone,
Martin v. Löwis0078f6c2001-01-21 10:18:10 +000028 ExternalEntityRef,
29 StartDoctypeDecl,
30 EndDoctypeDecl,
Fred Drake85d835f2001-02-08 15:39:08 +000031 EntityDecl,
32 XmlDecl,
33 ElementDecl,
34 AttlistDecl,
Martin v. Löwisc847f402003-01-21 11:09:21 +000035#if XML_COMBINED_VERSION >= 19504
Martin v. Löwis069dde22003-01-21 10:58:18 +000036 SkippedEntity,
Martin v. Löwisc847f402003-01-21 11:09:21 +000037#endif
Fred Drake85d835f2001-02-08 15:39:08 +000038 _DummyDecl
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000039};
40
41static PyObject *ErrorObject;
42
43/* ----------------------------------------------------- */
44
45/* Declarations for objects of type xmlparser */
46
47typedef struct {
Fred Drake0582df92000-07-12 04:49:00 +000048 PyObject_HEAD
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000049
Fred Drake0582df92000-07-12 04:49:00 +000050 XML_Parser itself;
Fred Drake85d835f2001-02-08 15:39:08 +000051 int ordered_attributes; /* Return attributes as a list. */
52 int specified_attributes; /* Report only specified attributes. */
Fred Drakebd6101c2001-02-14 18:29:45 +000053 int in_callback; /* Is a callback active? */
Martin v. Löwis069dde22003-01-21 10:58:18 +000054 int ns_prefixes; /* Namespace-triplets mode? */
Fred Drake2a3d7db2002-06-28 22:56:48 +000055 XML_Char *buffer; /* Buffer used when accumulating characters */
56 /* NULL if not enabled */
57 int buffer_size; /* Size of buffer, in XML_Char units */
58 int buffer_used; /* Buffer units in use */
Fred Drakeb91a36b2002-06-27 19:40:48 +000059 PyObject *intern; /* Dictionary to intern strings */
Fred Drake0582df92000-07-12 04:49:00 +000060 PyObject **handlers;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000061} xmlparseobject;
62
Fred Drake2a3d7db2002-06-28 22:56:48 +000063#define CHARACTER_DATA_BUFFER_SIZE 8192
64
Jeremy Hylton938ace62002-07-17 16:30:39 +000065static PyTypeObject Xmlparsetype;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000066
Fred Drake117ac852002-09-24 16:24:54 +000067typedef void (*xmlhandlersetter)(XML_Parser self, void *meth);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000068typedef void* xmlhandler;
69
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +000070struct HandlerInfo {
Fred Drake0582df92000-07-12 04:49:00 +000071 const char *name;
72 xmlhandlersetter setter;
73 xmlhandler handler;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +000074 PyCodeObject *tb_code;
Fred Drake71b63ff2002-06-28 22:29:01 +000075 PyObject *nameobj;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000076};
77
Jeremy Hylton938ace62002-07-17 16:30:39 +000078static struct HandlerInfo handler_info[64];
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +000079
Fred Drakebd6101c2001-02-14 18:29:45 +000080/* Set an integer attribute on the error object; return true on success,
81 * false on an exception.
82 */
83static int
84set_error_attr(PyObject *err, char *name, int value)
85{
Christian Heimes217cfd12007-12-02 14:31:20 +000086 PyObject *v = PyLong_FromLong(value);
Fred Drake85d835f2001-02-08 15:39:08 +000087
Neal Norwitz2f5e9902006-03-08 06:36:45 +000088 if (v == NULL || PyObject_SetAttrString(err, name, v) == -1) {
89 Py_XDECREF(v);
Fred Drakebd6101c2001-02-14 18:29:45 +000090 return 0;
91 }
Michael W. Hudson0bb84542004-08-03 11:31:31 +000092 Py_DECREF(v);
Fred Drakebd6101c2001-02-14 18:29:45 +000093 return 1;
94}
95
96/* Build and set an Expat exception, including positioning
97 * information. Always returns NULL.
98 */
Fred Drake85d835f2001-02-08 15:39:08 +000099static PyObject *
Martin v. Löwis069dde22003-01-21 10:58:18 +0000100set_error(xmlparseobject *self, enum XML_Error code)
Fred Drake85d835f2001-02-08 15:39:08 +0000101{
102 PyObject *err;
103 char buffer[256];
104 XML_Parser parser = self->itself;
Fred Drakebd6101c2001-02-14 18:29:45 +0000105 int lineno = XML_GetErrorLineNumber(parser);
106 int column = XML_GetErrorColumnNumber(parser);
Fred Drake85d835f2001-02-08 15:39:08 +0000107
Martin v. Löwis6b2cf0e2002-06-30 06:03:35 +0000108 /* There is no risk of overflowing this buffer, since
109 even for 64-bit integers, there is sufficient space. */
110 sprintf(buffer, "%.200s: line %i, column %i",
Fred Drakebd6101c2001-02-14 18:29:45 +0000111 XML_ErrorString(code), lineno, column);
Fred Drake85d835f2001-02-08 15:39:08 +0000112 err = PyObject_CallFunction(ErrorObject, "s", buffer);
Fred Drakebd6101c2001-02-14 18:29:45 +0000113 if ( err != NULL
114 && set_error_attr(err, "code", code)
115 && set_error_attr(err, "offset", column)
116 && set_error_attr(err, "lineno", lineno)) {
117 PyErr_SetObject(ErrorObject, err);
Fred Drake85d835f2001-02-08 15:39:08 +0000118 }
Neal Norwitz2f5e9902006-03-08 06:36:45 +0000119 Py_XDECREF(err);
Fred Drake85d835f2001-02-08 15:39:08 +0000120 return NULL;
121}
122
Fred Drake71b63ff2002-06-28 22:29:01 +0000123static int
124have_handler(xmlparseobject *self, int type)
125{
126 PyObject *handler = self->handlers[type];
127 return handler != NULL;
128}
129
130static PyObject *
131get_handler_name(struct HandlerInfo *hinfo)
132{
133 PyObject *name = hinfo->nameobj;
134 if (name == NULL) {
Neal Norwitz392c5be2007-08-25 17:20:32 +0000135 name = PyUnicode_FromString(hinfo->name);
Fred Drake71b63ff2002-06-28 22:29:01 +0000136 hinfo->nameobj = name;
137 }
138 Py_XINCREF(name);
139 return name;
140}
141
Fred Drake85d835f2001-02-08 15:39:08 +0000142
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000143/* Convert a string of XML_Chars into a Unicode string.
144 Returns None if str is a null pointer. */
145
Fred Drake0582df92000-07-12 04:49:00 +0000146static PyObject *
Fred Drakeb91a36b2002-06-27 19:40:48 +0000147conv_string_to_unicode(const XML_Char *str)
Fred Drake0582df92000-07-12 04:49:00 +0000148{
Fred Drake71b63ff2002-06-28 22:29:01 +0000149 /* XXX currently this code assumes that XML_Char is 8-bit,
Fred Drake0582df92000-07-12 04:49:00 +0000150 and hence in UTF-8. */
151 /* UTF-8 from Expat, Unicode desired */
152 if (str == NULL) {
153 Py_INCREF(Py_None);
154 return Py_None;
155 }
Fred Drake71b63ff2002-06-28 22:29:01 +0000156 return PyUnicode_DecodeUTF8(str, strlen(str), "strict");
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000157}
158
Fred Drake0582df92000-07-12 04:49:00 +0000159static PyObject *
160conv_string_len_to_unicode(const XML_Char *str, int len)
161{
Fred Drake71b63ff2002-06-28 22:29:01 +0000162 /* XXX currently this code assumes that XML_Char is 8-bit,
Fred Drake0582df92000-07-12 04:49:00 +0000163 and hence in UTF-8. */
164 /* UTF-8 from Expat, Unicode desired */
165 if (str == NULL) {
166 Py_INCREF(Py_None);
167 return Py_None;
168 }
Fred Drake6f987622000-08-25 18:03:30 +0000169 return PyUnicode_DecodeUTF8((const char *)str, len, "strict");
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000170}
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000171
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000172/* Callback routines */
173
Martin v. Löwis5b68ce32001-10-21 08:53:52 +0000174static void clear_handlers(xmlparseobject *self, int initial);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000175
Martin v. Löwis069dde22003-01-21 10:58:18 +0000176/* This handler is used when an error has been detected, in the hope
177 that actual parsing can be terminated early. This will only help
178 if an external entity reference is encountered. */
179static int
180error_external_entity_ref_handler(XML_Parser parser,
181 const XML_Char *context,
182 const XML_Char *base,
183 const XML_Char *systemId,
184 const XML_Char *publicId)
185{
186 return 0;
187}
188
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000189/* Dummy character data handler used when an error (exception) has
190 been detected, and the actual parsing can be terminated early.
191 This is needed since character data handler can't be safely removed
192 from within the character data handler, but can be replaced. It is
193 used only from the character data handler trampoline, and must be
194 used right after `flag_error()` is called. */
195static void
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000196noop_character_data_handler(void *userData, const XML_Char *data, int len)
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000197{
198 /* Do nothing. */
199}
200
Fred Drake6f987622000-08-25 18:03:30 +0000201static void
202flag_error(xmlparseobject *self)
203{
Martin v. Löwis5b68ce32001-10-21 08:53:52 +0000204 clear_handlers(self, 0);
Martin v. Löwis069dde22003-01-21 10:58:18 +0000205 XML_SetExternalEntityRefHandler(self->itself,
206 error_external_entity_ref_handler);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000207}
208
209static PyCodeObject*
210getcode(enum HandlerTypes slot, char* func_name, int lineno)
211{
Fred Drakebd6101c2001-02-14 18:29:45 +0000212 PyObject *code = NULL;
213 PyObject *name = NULL;
214 PyObject *nulltuple = NULL;
215 PyObject *filename = NULL;
216
217 if (handler_info[slot].tb_code == NULL) {
Christian Heimes72b710a2008-05-26 13:28:38 +0000218 code = PyBytes_FromString("");
Fred Drakebd6101c2001-02-14 18:29:45 +0000219 if (code == NULL)
220 goto failed;
Guido van Rossum00bc0e02007-10-15 02:52:41 +0000221 name = PyUnicode_FromString(func_name);
Fred Drakebd6101c2001-02-14 18:29:45 +0000222 if (name == NULL)
223 goto failed;
224 nulltuple = PyTuple_New(0);
225 if (nulltuple == NULL)
226 goto failed;
Guido van Rossum00bc0e02007-10-15 02:52:41 +0000227 filename = PyUnicode_DecodeFSDefault(__FILE__);
Fred Drakebd6101c2001-02-14 18:29:45 +0000228 handler_info[slot].tb_code =
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000229 PyCode_New(0, /* argcount */
Guido van Rossum4f72a782006-10-27 23:31:49 +0000230 0, /* kwonlyargcount */
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000231 0, /* nlocals */
232 0, /* stacksize */
233 0, /* flags */
234 code, /* code */
235 nulltuple, /* consts */
236 nulltuple, /* names */
237 nulltuple, /* varnames */
Martin v. Löwis76192ee2001-02-06 09:34:40 +0000238#if PYTHON_API_VERSION >= 1010
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000239 nulltuple, /* freevars */
240 nulltuple, /* cellvars */
Martin v. Löwis76192ee2001-02-06 09:34:40 +0000241#endif
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000242 filename, /* filename */
243 name, /* name */
244 lineno, /* firstlineno */
245 code /* lnotab */
Fred Drakebd6101c2001-02-14 18:29:45 +0000246 );
247 if (handler_info[slot].tb_code == NULL)
248 goto failed;
249 Py_DECREF(code);
250 Py_DECREF(nulltuple);
251 Py_DECREF(filename);
252 Py_DECREF(name);
253 }
254 return handler_info[slot].tb_code;
255 failed:
256 Py_XDECREF(code);
257 Py_XDECREF(name);
258 return NULL;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000259}
260
Jeremy Hylton9263f572003-06-27 16:13:17 +0000261#ifdef FIX_TRACE
Martin v. Löwis7d6e19d2002-08-04 08:24:49 +0000262static int
263trace_frame(PyThreadState *tstate, PyFrameObject *f, int code, PyObject *val)
264{
265 int result = 0;
266 if (!tstate->use_tracing || tstate->tracing)
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000267 return 0;
Martin v. Löwis7d6e19d2002-08-04 08:24:49 +0000268 if (tstate->c_profilefunc != NULL) {
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000269 tstate->tracing++;
270 result = tstate->c_profilefunc(tstate->c_profileobj,
271 f, code , val);
272 tstate->use_tracing = ((tstate->c_tracefunc != NULL)
273 || (tstate->c_profilefunc != NULL));
274 tstate->tracing--;
275 if (result)
276 return result;
Martin v. Löwis7d6e19d2002-08-04 08:24:49 +0000277 }
278 if (tstate->c_tracefunc != NULL) {
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000279 tstate->tracing++;
280 result = tstate->c_tracefunc(tstate->c_traceobj,
281 f, code , val);
282 tstate->use_tracing = ((tstate->c_tracefunc != NULL)
283 || (tstate->c_profilefunc != NULL));
284 tstate->tracing--;
285 }
Martin v. Löwis7d6e19d2002-08-04 08:24:49 +0000286 return result;
287}
Jeremy Hylton9263f572003-06-27 16:13:17 +0000288
289static int
290trace_frame_exc(PyThreadState *tstate, PyFrameObject *f)
291{
292 PyObject *type, *value, *traceback, *arg;
293 int err;
294
295 if (tstate->c_tracefunc == NULL)
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000296 return 0;
Jeremy Hylton9263f572003-06-27 16:13:17 +0000297
298 PyErr_Fetch(&type, &value, &traceback);
299 if (value == NULL) {
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000300 value = Py_None;
301 Py_INCREF(value);
Jeremy Hylton9263f572003-06-27 16:13:17 +0000302 }
Martin v. Löwis9171f022004-10-13 19:50:11 +0000303#if PY_VERSION_HEX < 0x02040000
304 arg = Py_BuildValue("(OOO)", type, value, traceback);
305#else
Raymond Hettinger8ae46892003-10-12 19:09:37 +0000306 arg = PyTuple_Pack(3, type, value, traceback);
Martin v. Löwis9171f022004-10-13 19:50:11 +0000307#endif
Jeremy Hylton9263f572003-06-27 16:13:17 +0000308 if (arg == NULL) {
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000309 PyErr_Restore(type, value, traceback);
310 return 0;
Jeremy Hylton9263f572003-06-27 16:13:17 +0000311 }
312 err = trace_frame(tstate, f, PyTrace_EXCEPTION, arg);
313 Py_DECREF(arg);
314 if (err == 0)
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000315 PyErr_Restore(type, value, traceback);
Jeremy Hylton9263f572003-06-27 16:13:17 +0000316 else {
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000317 Py_XDECREF(type);
318 Py_XDECREF(value);
319 Py_XDECREF(traceback);
Jeremy Hylton9263f572003-06-27 16:13:17 +0000320 }
321 return err;
322}
Martin v. Löwis069dde22003-01-21 10:58:18 +0000323#endif
Martin v. Löwis7d6e19d2002-08-04 08:24:49 +0000324
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000325static PyObject*
Fred Drake39689c52004-08-13 03:12:57 +0000326call_with_frame(PyCodeObject *c, PyObject* func, PyObject* args,
327 xmlparseobject *self)
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000328{
Fred Drakebd6101c2001-02-14 18:29:45 +0000329 PyThreadState *tstate = PyThreadState_GET();
330 PyFrameObject *f;
331 PyObject *res;
332
333 if (c == NULL)
334 return NULL;
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000335
Jeremy Hylton9263f572003-06-27 16:13:17 +0000336 f = PyFrame_New(tstate, c, PyEval_GetGlobals(), NULL);
Fred Drakebd6101c2001-02-14 18:29:45 +0000337 if (f == NULL)
338 return NULL;
339 tstate->frame = f;
Jeremy Hylton9263f572003-06-27 16:13:17 +0000340#ifdef FIX_TRACE
341 if (trace_frame(tstate, f, PyTrace_CALL, Py_None) < 0) {
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000342 return NULL;
Martin v. Löwis7d6e19d2002-08-04 08:24:49 +0000343 }
Martin v. Löwis069dde22003-01-21 10:58:18 +0000344#endif
Fred Drakebd6101c2001-02-14 18:29:45 +0000345 res = PyEval_CallObject(func, args);
Jeremy Hylton9263f572003-06-27 16:13:17 +0000346 if (res == NULL) {
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000347 if (tstate->curexc_traceback == NULL)
348 PyTraceBack_Here(f);
Fred Drake39689c52004-08-13 03:12:57 +0000349 XML_StopParser(self->itself, XML_FALSE);
Jeremy Hylton9263f572003-06-27 16:13:17 +0000350#ifdef FIX_TRACE
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000351 if (trace_frame_exc(tstate, f) < 0) {
352 return NULL;
353 }
Jeremy Hylton9263f572003-06-27 16:13:17 +0000354 }
Martin v. Löwis7d6e19d2002-08-04 08:24:49 +0000355 else {
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000356 if (trace_frame(tstate, f, PyTrace_RETURN, res) < 0) {
357 Py_XDECREF(res);
358 res = NULL;
359 }
Martin v. Löwis7d6e19d2002-08-04 08:24:49 +0000360 }
Jeremy Hylton9263f572003-06-27 16:13:17 +0000361#else
362 }
Martin v. Löwis069dde22003-01-21 10:58:18 +0000363#endif
Fred Drakebd6101c2001-02-14 18:29:45 +0000364 tstate->frame = f->f_back;
365 Py_DECREF(f);
366 return res;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000367}
368
Fred Drakeb91a36b2002-06-27 19:40:48 +0000369static PyObject*
370string_intern(xmlparseobject *self, const char* str)
371{
Guido van Rossum4ca94712007-07-23 17:42:32 +0000372 PyObject *result = conv_string_to_unicode(str);
Fred Drakeb91a36b2002-06-27 19:40:48 +0000373 PyObject *value;
Neal Norwitz484d9a42005-09-30 04:46:49 +0000374 /* result can be NULL if the unicode conversion failed. */
375 if (!result)
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000376 return result;
Fred Drakeb91a36b2002-06-27 19:40:48 +0000377 if (!self->intern)
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000378 return result;
Fred Drakeb91a36b2002-06-27 19:40:48 +0000379 value = PyDict_GetItem(self->intern, result);
380 if (!value) {
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000381 if (PyDict_SetItem(self->intern, result, result) == 0)
Fred Drakeb91a36b2002-06-27 19:40:48 +0000382 return result;
383 else
384 return NULL;
385 }
386 Py_INCREF(value);
387 Py_DECREF(result);
388 return value;
389}
390
Fred Drake2a3d7db2002-06-28 22:56:48 +0000391/* Return 0 on success, -1 on exception.
392 * flag_error() will be called before return if needed.
393 */
394static int
395call_character_handler(xmlparseobject *self, const XML_Char *buffer, int len)
396{
397 PyObject *args;
398 PyObject *temp;
399
Georg Brandl5bbbf1d2010-10-24 14:20:36 +0000400 if (!have_handler(self, CharacterData))
401 return -1;
402
Fred Drake2a3d7db2002-06-28 22:56:48 +0000403 args = PyTuple_New(1);
404 if (args == NULL)
405 return -1;
Guido van Rossum4ca94712007-07-23 17:42:32 +0000406 temp = (conv_string_len_to_unicode(buffer, len));
Fred Drake2a3d7db2002-06-28 22:56:48 +0000407 if (temp == NULL) {
408 Py_DECREF(args);
409 flag_error(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000410 XML_SetCharacterDataHandler(self->itself,
411 noop_character_data_handler);
Fred Drake2a3d7db2002-06-28 22:56:48 +0000412 return -1;
413 }
414 PyTuple_SET_ITEM(args, 0, temp);
415 /* temp is now a borrowed reference; consider it unused. */
416 self->in_callback = 1;
417 temp = call_with_frame(getcode(CharacterData, "CharacterData", __LINE__),
Fred Drake39689c52004-08-13 03:12:57 +0000418 self->handlers[CharacterData], args, self);
Fred Drake2a3d7db2002-06-28 22:56:48 +0000419 /* temp is an owned reference again, or NULL */
420 self->in_callback = 0;
421 Py_DECREF(args);
422 if (temp == NULL) {
423 flag_error(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +0000424 XML_SetCharacterDataHandler(self->itself,
425 noop_character_data_handler);
Fred Drake2a3d7db2002-06-28 22:56:48 +0000426 return -1;
427 }
428 Py_DECREF(temp);
429 return 0;
430}
431
432static int
433flush_character_buffer(xmlparseobject *self)
434{
435 int rc;
436 if (self->buffer == NULL || self->buffer_used == 0)
437 return 0;
438 rc = call_character_handler(self, self->buffer, self->buffer_used);
439 self->buffer_used = 0;
440 return rc;
441}
442
443static void
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000444my_CharacterDataHandler(void *userData, const XML_Char *data, int len)
Fred Drake2a3d7db2002-06-28 22:56:48 +0000445{
446 xmlparseobject *self = (xmlparseobject *) userData;
447 if (self->buffer == NULL)
448 call_character_handler(self, data, len);
449 else {
450 if ((self->buffer_used + len) > self->buffer_size) {
451 if (flush_character_buffer(self) < 0)
452 return;
453 /* handler might have changed; drop the rest on the floor
454 * if there isn't a handler anymore
455 */
456 if (!have_handler(self, CharacterData))
457 return;
458 }
459 if (len > self->buffer_size) {
460 call_character_handler(self, data, len);
461 self->buffer_used = 0;
462 }
463 else {
464 memcpy(self->buffer + self->buffer_used,
465 data, len * sizeof(XML_Char));
466 self->buffer_used += len;
467 }
468 }
469}
470
Fred Drake85d835f2001-02-08 15:39:08 +0000471static void
472my_StartElementHandler(void *userData,
Fred Drake71b63ff2002-06-28 22:29:01 +0000473 const XML_Char *name, const XML_Char *atts[])
Fred Drake85d835f2001-02-08 15:39:08 +0000474{
475 xmlparseobject *self = (xmlparseobject *)userData;
476
Fred Drake71b63ff2002-06-28 22:29:01 +0000477 if (have_handler(self, StartElement)) {
Fred Drake85d835f2001-02-08 15:39:08 +0000478 PyObject *container, *rv, *args;
479 int i, max;
480
Fred Drake2a3d7db2002-06-28 22:56:48 +0000481 if (flush_character_buffer(self) < 0)
482 return;
Fred Drake85d835f2001-02-08 15:39:08 +0000483 /* Set max to the number of slots filled in atts[]; max/2 is
484 * the number of attributes we need to process.
485 */
486 if (self->specified_attributes) {
487 max = XML_GetSpecifiedAttributeCount(self->itself);
488 }
489 else {
490 max = 0;
491 while (atts[max] != NULL)
492 max += 2;
493 }
494 /* Build the container. */
495 if (self->ordered_attributes)
496 container = PyList_New(max);
497 else
498 container = PyDict_New();
499 if (container == NULL) {
500 flag_error(self);
501 return;
502 }
503 for (i = 0; i < max; i += 2) {
Fred Drakeb91a36b2002-06-27 19:40:48 +0000504 PyObject *n = string_intern(self, (XML_Char *) atts[i]);
Fred Drake85d835f2001-02-08 15:39:08 +0000505 PyObject *v;
506 if (n == NULL) {
507 flag_error(self);
508 Py_DECREF(container);
509 return;
510 }
Guido van Rossum4ca94712007-07-23 17:42:32 +0000511 v = conv_string_to_unicode((XML_Char *) atts[i+1]);
Fred Drake85d835f2001-02-08 15:39:08 +0000512 if (v == NULL) {
513 flag_error(self);
514 Py_DECREF(container);
515 Py_DECREF(n);
516 return;
517 }
518 if (self->ordered_attributes) {
519 PyList_SET_ITEM(container, i, n);
520 PyList_SET_ITEM(container, i+1, v);
521 }
522 else if (PyDict_SetItem(container, n, v)) {
523 flag_error(self);
524 Py_DECREF(n);
525 Py_DECREF(v);
526 return;
527 }
528 else {
529 Py_DECREF(n);
530 Py_DECREF(v);
531 }
532 }
Neal Norwitz484d9a42005-09-30 04:46:49 +0000533 args = string_intern(self, name);
534 if (args != NULL)
535 args = Py_BuildValue("(NN)", args, container);
Fred Drake85d835f2001-02-08 15:39:08 +0000536 if (args == NULL) {
537 Py_DECREF(container);
538 return;
539 }
540 /* Container is now a borrowed reference; ignore it. */
Fred Drakebd6101c2001-02-14 18:29:45 +0000541 self->in_callback = 1;
542 rv = call_with_frame(getcode(StartElement, "StartElement", __LINE__),
Fred Drake39689c52004-08-13 03:12:57 +0000543 self->handlers[StartElement], args, self);
Fred Drakebd6101c2001-02-14 18:29:45 +0000544 self->in_callback = 0;
545 Py_DECREF(args);
Fred Drake85d835f2001-02-08 15:39:08 +0000546 if (rv == NULL) {
547 flag_error(self);
548 return;
Fred Drakebd6101c2001-02-14 18:29:45 +0000549 }
Fred Drake85d835f2001-02-08 15:39:08 +0000550 Py_DECREF(rv);
551 }
552}
553
554#define RC_HANDLER(RC, NAME, PARAMS, INIT, PARAM_FORMAT, CONVERSION, \
555 RETURN, GETUSERDATA) \
556static RC \
557my_##NAME##Handler PARAMS {\
558 xmlparseobject *self = GETUSERDATA ; \
559 PyObject *args = NULL; \
560 PyObject *rv = NULL; \
561 INIT \
562\
Fred Drake71b63ff2002-06-28 22:29:01 +0000563 if (have_handler(self, NAME)) { \
Fred Drake2a3d7db2002-06-28 22:56:48 +0000564 if (flush_character_buffer(self) < 0) \
565 return RETURN; \
Fred Drake85d835f2001-02-08 15:39:08 +0000566 args = Py_BuildValue PARAM_FORMAT ;\
Martin v. Löwis1d7c55f2001-11-10 13:57:55 +0000567 if (!args) { flag_error(self); return RETURN;} \
Fred Drakebd6101c2001-02-14 18:29:45 +0000568 self->in_callback = 1; \
Fred Drake85d835f2001-02-08 15:39:08 +0000569 rv = call_with_frame(getcode(NAME,#NAME,__LINE__), \
Fred Drake39689c52004-08-13 03:12:57 +0000570 self->handlers[NAME], args, self); \
Fred Drakebd6101c2001-02-14 18:29:45 +0000571 self->in_callback = 0; \
Fred Drake85d835f2001-02-08 15:39:08 +0000572 Py_DECREF(args); \
573 if (rv == NULL) { \
574 flag_error(self); \
575 return RETURN; \
576 } \
577 CONVERSION \
578 Py_DECREF(rv); \
579 } \
580 return RETURN; \
581}
582
Fred Drake6f987622000-08-25 18:03:30 +0000583#define VOID_HANDLER(NAME, PARAMS, PARAM_FORMAT) \
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000584 RC_HANDLER(void, NAME, PARAMS, ;, PARAM_FORMAT, ;, ;,\
585 (xmlparseobject *)userData)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000586
Fred Drake6f987622000-08-25 18:03:30 +0000587#define INT_HANDLER(NAME, PARAMS, PARAM_FORMAT)\
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000588 RC_HANDLER(int, NAME, PARAMS, int rc=0;, PARAM_FORMAT, \
589 rc = PyLong_AsLong(rv);, rc, \
590 (xmlparseobject *)userData)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000591
Fred Drake71b63ff2002-06-28 22:29:01 +0000592VOID_HANDLER(EndElement,
593 (void *userData, const XML_Char *name),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000594 ("(N)", string_intern(self, name)))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000595
Fred Drake6f987622000-08-25 18:03:30 +0000596VOID_HANDLER(ProcessingInstruction,
Fred Drake71b63ff2002-06-28 22:29:01 +0000597 (void *userData,
598 const XML_Char *target,
Fred Drake85d835f2001-02-08 15:39:08 +0000599 const XML_Char *data),
Guido van Rossum4ca94712007-07-23 17:42:32 +0000600 ("(NO&)", string_intern(self, target), conv_string_to_unicode ,data))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000601
Fred Drake6f987622000-08-25 18:03:30 +0000602VOID_HANDLER(UnparsedEntityDecl,
Fred Drake71b63ff2002-06-28 22:29:01 +0000603 (void *userData,
Fred Drake85d835f2001-02-08 15:39:08 +0000604 const XML_Char *entityName,
605 const XML_Char *base,
606 const XML_Char *systemId,
607 const XML_Char *publicId,
608 const XML_Char *notationName),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000609 ("(NNNNN)",
Fred Drake71b63ff2002-06-28 22:29:01 +0000610 string_intern(self, entityName), string_intern(self, base),
611 string_intern(self, systemId), string_intern(self, publicId),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000612 string_intern(self, notationName)))
Fred Drake85d835f2001-02-08 15:39:08 +0000613
Fred Drake85d835f2001-02-08 15:39:08 +0000614VOID_HANDLER(EntityDecl,
615 (void *userData,
616 const XML_Char *entityName,
617 int is_parameter_entity,
618 const XML_Char *value,
619 int value_length,
620 const XML_Char *base,
621 const XML_Char *systemId,
622 const XML_Char *publicId,
623 const XML_Char *notationName),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000624 ("NiNNNNN",
625 string_intern(self, entityName), is_parameter_entity,
Guido van Rossum4ca94712007-07-23 17:42:32 +0000626 (conv_string_len_to_unicode(value, value_length)),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000627 string_intern(self, base), string_intern(self, systemId),
628 string_intern(self, publicId),
629 string_intern(self, notationName)))
Fred Drake85d835f2001-02-08 15:39:08 +0000630
631VOID_HANDLER(XmlDecl,
632 (void *userData,
633 const XML_Char *version,
634 const XML_Char *encoding,
635 int standalone),
636 ("(O&O&i)",
Guido van Rossum4ca94712007-07-23 17:42:32 +0000637 conv_string_to_unicode ,version, conv_string_to_unicode ,encoding,
Fred Drake85d835f2001-02-08 15:39:08 +0000638 standalone))
639
640static PyObject *
641conv_content_model(XML_Content * const model,
Fred Drakeb91a36b2002-06-27 19:40:48 +0000642 PyObject *(*conv_string)(const XML_Char *))
Fred Drake85d835f2001-02-08 15:39:08 +0000643{
644 PyObject *result = NULL;
645 PyObject *children = PyTuple_New(model->numchildren);
646 int i;
647
648 if (children != NULL) {
Tim Peters9544fc52001-07-28 09:36:36 +0000649 assert(model->numchildren < INT_MAX);
650 for (i = 0; i < (int)model->numchildren; ++i) {
Fred Drake85d835f2001-02-08 15:39:08 +0000651 PyObject *child = conv_content_model(&model->children[i],
652 conv_string);
653 if (child == NULL) {
654 Py_XDECREF(children);
655 return NULL;
656 }
657 PyTuple_SET_ITEM(children, i, child);
658 }
659 result = Py_BuildValue("(iiO&N)",
660 model->type, model->quant,
661 conv_string,model->name, children);
662 }
663 return result;
664}
665
Fred Drake06dd8cf2003-02-02 03:54:17 +0000666static void
667my_ElementDeclHandler(void *userData,
668 const XML_Char *name,
669 XML_Content *model)
Fred Drake85d835f2001-02-08 15:39:08 +0000670{
Fred Drake06dd8cf2003-02-02 03:54:17 +0000671 xmlparseobject *self = (xmlparseobject *)userData;
672 PyObject *args = NULL;
Fred Drake85d835f2001-02-08 15:39:08 +0000673
Fred Drake06dd8cf2003-02-02 03:54:17 +0000674 if (have_handler(self, ElementDecl)) {
675 PyObject *rv = NULL;
676 PyObject *modelobj, *nameobj;
677
678 if (flush_character_buffer(self) < 0)
679 goto finally;
Guido van Rossum4ca94712007-07-23 17:42:32 +0000680 modelobj = conv_content_model(model, (conv_string_to_unicode));
Fred Drake06dd8cf2003-02-02 03:54:17 +0000681 if (modelobj == NULL) {
682 flag_error(self);
683 goto finally;
684 }
685 nameobj = string_intern(self, name);
686 if (nameobj == NULL) {
687 Py_DECREF(modelobj);
688 flag_error(self);
689 goto finally;
690 }
Michael W. Hudson0bb84542004-08-03 11:31:31 +0000691 args = Py_BuildValue("NN", nameobj, modelobj);
Fred Drake06dd8cf2003-02-02 03:54:17 +0000692 if (args == NULL) {
693 Py_DECREF(modelobj);
694 flag_error(self);
695 goto finally;
696 }
697 self->in_callback = 1;
698 rv = call_with_frame(getcode(ElementDecl, "ElementDecl", __LINE__),
Fred Drake39689c52004-08-13 03:12:57 +0000699 self->handlers[ElementDecl], args, self);
Fred Drake06dd8cf2003-02-02 03:54:17 +0000700 self->in_callback = 0;
701 if (rv == NULL) {
702 flag_error(self);
703 goto finally;
704 }
705 Py_DECREF(rv);
706 }
707 finally:
708 Py_XDECREF(args);
709 XML_FreeContentModel(self->itself, model);
710 return;
711}
Fred Drake85d835f2001-02-08 15:39:08 +0000712
713VOID_HANDLER(AttlistDecl,
714 (void *userData,
715 const XML_Char *elname,
716 const XML_Char *attname,
717 const XML_Char *att_type,
718 const XML_Char *dflt,
719 int isrequired),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000720 ("(NNO&O&i)",
721 string_intern(self, elname), string_intern(self, attname),
Guido van Rossum4ca94712007-07-23 17:42:32 +0000722 conv_string_to_unicode ,att_type, conv_string_to_unicode ,dflt,
Fred Drake85d835f2001-02-08 15:39:08 +0000723 isrequired))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000724
Martin v. Löwisc847f402003-01-21 11:09:21 +0000725#if XML_COMBINED_VERSION >= 19504
Martin v. Löwis069dde22003-01-21 10:58:18 +0000726VOID_HANDLER(SkippedEntity,
727 (void *userData,
728 const XML_Char *entityName,
729 int is_parameter_entity),
730 ("Ni",
731 string_intern(self, entityName), is_parameter_entity))
Martin v. Löwisc847f402003-01-21 11:09:21 +0000732#endif
Martin v. Löwis069dde22003-01-21 10:58:18 +0000733
Fred Drake71b63ff2002-06-28 22:29:01 +0000734VOID_HANDLER(NotationDecl,
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000735 (void *userData,
736 const XML_Char *notationName,
737 const XML_Char *base,
738 const XML_Char *systemId,
739 const XML_Char *publicId),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000740 ("(NNNN)",
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000741 string_intern(self, notationName), string_intern(self, base),
742 string_intern(self, systemId), string_intern(self, publicId)))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000743
Fred Drake6f987622000-08-25 18:03:30 +0000744VOID_HANDLER(StartNamespaceDecl,
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000745 (void *userData,
746 const XML_Char *prefix,
747 const XML_Char *uri),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000748 ("(NN)",
749 string_intern(self, prefix), string_intern(self, uri)))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000750
Fred Drake6f987622000-08-25 18:03:30 +0000751VOID_HANDLER(EndNamespaceDecl,
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000752 (void *userData,
753 const XML_Char *prefix),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000754 ("(N)", string_intern(self, prefix)))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000755
Fred Drake6f987622000-08-25 18:03:30 +0000756VOID_HANDLER(Comment,
Fred Drakeb91a36b2002-06-27 19:40:48 +0000757 (void *userData, const XML_Char *data),
Guido van Rossum4ca94712007-07-23 17:42:32 +0000758 ("(O&)", conv_string_to_unicode ,data))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000759
Fred Drake6f987622000-08-25 18:03:30 +0000760VOID_HANDLER(StartCdataSection,
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000761 (void *userData),
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000762 ("()"))
Fred Drake71b63ff2002-06-28 22:29:01 +0000763
Fred Drake6f987622000-08-25 18:03:30 +0000764VOID_HANDLER(EndCdataSection,
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000765 (void *userData),
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000766 ("()"))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000767
Fred Drake6f987622000-08-25 18:03:30 +0000768VOID_HANDLER(Default,
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000769 (void *userData, const XML_Char *s, int len),
770 ("(N)", (conv_string_len_to_unicode(s,len))))
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +0000771
Fred Drake6f987622000-08-25 18:03:30 +0000772VOID_HANDLER(DefaultHandlerExpand,
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000773 (void *userData, const XML_Char *s, int len),
774 ("(N)", (conv_string_len_to_unicode(s,len))))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000775
Fred Drake71b63ff2002-06-28 22:29:01 +0000776INT_HANDLER(NotStandalone,
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000777 (void *userData),
778 ("()"))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000779
Fred Drake6f987622000-08-25 18:03:30 +0000780RC_HANDLER(int, ExternalEntityRef,
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000781 (XML_Parser parser,
782 const XML_Char *context,
783 const XML_Char *base,
784 const XML_Char *systemId,
785 const XML_Char *publicId),
786 int rc=0;,
Fred Drakeb91a36b2002-06-27 19:40:48 +0000787 ("(O&NNN)",
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000788 conv_string_to_unicode ,context, string_intern(self, base),
789 string_intern(self, systemId), string_intern(self, publicId)),
790 rc = PyLong_AsLong(rv);, rc,
791 XML_GetUserData(parser))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000792
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000793/* XXX UnknownEncodingHandler */
794
Fred Drake85d835f2001-02-08 15:39:08 +0000795VOID_HANDLER(StartDoctypeDecl,
796 (void *userData, const XML_Char *doctypeName,
797 const XML_Char *sysid, const XML_Char *pubid,
798 int has_internal_subset),
Fred Drakeb91a36b2002-06-27 19:40:48 +0000799 ("(NNNi)", string_intern(self, doctypeName),
800 string_intern(self, sysid), string_intern(self, pubid),
Fred Drake85d835f2001-02-08 15:39:08 +0000801 has_internal_subset))
Martin v. Löwis0078f6c2001-01-21 10:18:10 +0000802
803VOID_HANDLER(EndDoctypeDecl, (void *userData), ("()"))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000804
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000805/* ---------------------------------------------------------------- */
806
Fred Drake71b63ff2002-06-28 22:29:01 +0000807static PyObject *
808get_parse_result(xmlparseobject *self, int rv)
809{
810 if (PyErr_Occurred()) {
811 return NULL;
812 }
813 if (rv == 0) {
Martin v. Löwis069dde22003-01-21 10:58:18 +0000814 return set_error(self, XML_GetErrorCode(self->itself));
Fred Drake71b63ff2002-06-28 22:29:01 +0000815 }
Fred Drake2a3d7db2002-06-28 22:56:48 +0000816 if (flush_character_buffer(self) < 0) {
817 return NULL;
818 }
Christian Heimes217cfd12007-12-02 14:31:20 +0000819 return PyLong_FromLong(rv);
Fred Drake71b63ff2002-06-28 22:29:01 +0000820}
821
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000822PyDoc_STRVAR(xmlparse_Parse__doc__,
Thomas Wouters35317302000-07-22 16:34:15 +0000823"Parse(data[, isfinal])\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000824Parse XML data. `isfinal' should be true at end of input.");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000825
826static PyObject *
Fred Drake0582df92000-07-12 04:49:00 +0000827xmlparse_Parse(xmlparseobject *self, PyObject *args)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000828{
Fred Drake0582df92000-07-12 04:49:00 +0000829 char *s;
830 int slen;
831 int isFinal = 0;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000832
Fred Drake0582df92000-07-12 04:49:00 +0000833 if (!PyArg_ParseTuple(args, "s#|i:Parse", &s, &slen, &isFinal))
834 return NULL;
Fred Drake71b63ff2002-06-28 22:29:01 +0000835
836 return get_parse_result(self, XML_Parse(self->itself, s, slen, isFinal));
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000837}
838
Fred Drakeca1f4262000-09-21 20:10:23 +0000839/* File reading copied from cPickle */
840
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000841#define BUF_SIZE 2048
842
Fred Drake0582df92000-07-12 04:49:00 +0000843static int
844readinst(char *buf, int buf_size, PyObject *meth)
845{
846 PyObject *arg = NULL;
847 PyObject *bytes = NULL;
848 PyObject *str = NULL;
849 int len = -1;
Guido van Rossum98297ee2007-11-06 21:34:58 +0000850 char *ptr;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000851
Christian Heimes217cfd12007-12-02 14:31:20 +0000852 if ((bytes = PyLong_FromLong(buf_size)) == NULL)
Fred Drake0582df92000-07-12 04:49:00 +0000853 goto finally;
Fred Drake676940b2000-09-22 15:21:31 +0000854
Fred Drake7b6caff2003-07-21 17:05:56 +0000855 if ((arg = PyTuple_New(1)) == NULL) {
856 Py_DECREF(bytes);
Fred Drake0582df92000-07-12 04:49:00 +0000857 goto finally;
Fred Drake7b6caff2003-07-21 17:05:56 +0000858 }
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000859
Tim Peters954eef72000-09-22 06:01:11 +0000860 PyTuple_SET_ITEM(arg, 0, bytes);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000861
Martin v. Löwis9171f022004-10-13 19:50:11 +0000862#if PY_VERSION_HEX < 0x02020000
863 str = PyObject_CallObject(meth, arg);
864#else
865 str = PyObject_Call(meth, arg, NULL);
866#endif
867 if (str == NULL)
Fred Drake0582df92000-07-12 04:49:00 +0000868 goto finally;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000869
Christian Heimes72b710a2008-05-26 13:28:38 +0000870 if (PyBytes_Check(str))
871 ptr = PyBytes_AS_STRING(str);
Christian Heimes9c4756e2008-05-26 13:22:05 +0000872 else if (PyByteArray_Check(str))
873 ptr = PyByteArray_AS_STRING(str);
Guido van Rossum98297ee2007-11-06 21:34:58 +0000874 else {
Fred Drake71b63ff2002-06-28 22:29:01 +0000875 PyErr_Format(PyExc_TypeError,
Guido van Rossum4ca94712007-07-23 17:42:32 +0000876 "read() did not return a bytes object (type=%.400s)",
Christian Heimes90aa7642007-12-19 02:45:37 +0000877 Py_TYPE(str)->tp_name);
Fred Drake0582df92000-07-12 04:49:00 +0000878 goto finally;
879 }
Christian Heimes90aa7642007-12-19 02:45:37 +0000880 len = Py_SIZE(str);
Fred Drake0582df92000-07-12 04:49:00 +0000881 if (len > buf_size) {
882 PyErr_Format(PyExc_ValueError,
883 "read() returned too much data: "
884 "%i bytes requested, %i returned",
885 buf_size, len);
Fred Drake0582df92000-07-12 04:49:00 +0000886 goto finally;
887 }
Guido van Rossum98297ee2007-11-06 21:34:58 +0000888 memcpy(buf, ptr, len);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000889finally:
Fred Drake0582df92000-07-12 04:49:00 +0000890 Py_XDECREF(arg);
Fred Drakeca1f4262000-09-21 20:10:23 +0000891 Py_XDECREF(str);
Fred Drake0582df92000-07-12 04:49:00 +0000892 return len;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000893}
894
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000895PyDoc_STRVAR(xmlparse_ParseFile__doc__,
Thomas Wouters35317302000-07-22 16:34:15 +0000896"ParseFile(file)\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000897Parse XML data from file-like object.");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000898
899static PyObject *
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000900xmlparse_ParseFile(xmlparseobject *self, PyObject *f)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000901{
Fred Drake0582df92000-07-12 04:49:00 +0000902 int rv = 1;
Fred Drake0582df92000-07-12 04:49:00 +0000903 FILE *fp;
904 PyObject *readmethod = NULL;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000905
Guido van Rossumda5b8f22007-06-12 23:30:11 +0000906 {
Fred Drake0582df92000-07-12 04:49:00 +0000907 fp = NULL;
Fred Drakeca1f4262000-09-21 20:10:23 +0000908 readmethod = PyObject_GetAttrString(f, "read");
909 if (readmethod == NULL) {
Fred Drake0582df92000-07-12 04:49:00 +0000910 PyErr_Clear();
Fred Drake71b63ff2002-06-28 22:29:01 +0000911 PyErr_SetString(PyExc_TypeError,
Fred Drake0582df92000-07-12 04:49:00 +0000912 "argument must have 'read' attribute");
Fred Drake814f9fe2002-07-19 22:03:03 +0000913 return NULL;
Fred Drake0582df92000-07-12 04:49:00 +0000914 }
915 }
916 for (;;) {
917 int bytes_read;
918 void *buf = XML_GetBuffer(self->itself, BUF_SIZE);
Fred Drake7b6caff2003-07-21 17:05:56 +0000919 if (buf == NULL) {
Fred Drakef239c6d2003-07-21 17:22:43 +0000920 Py_XDECREF(readmethod);
Fred Drake0582df92000-07-12 04:49:00 +0000921 return PyErr_NoMemory();
Fred Drake7b6caff2003-07-21 17:05:56 +0000922 }
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000923
Fred Drake0582df92000-07-12 04:49:00 +0000924 if (fp) {
925 bytes_read = fread(buf, sizeof(char), BUF_SIZE, fp);
926 if (bytes_read < 0) {
927 PyErr_SetFromErrno(PyExc_IOError);
928 return NULL;
929 }
930 }
931 else {
932 bytes_read = readinst(buf, BUF_SIZE, readmethod);
Fred Drake7b6caff2003-07-21 17:05:56 +0000933 if (bytes_read < 0) {
934 Py_DECREF(readmethod);
Fred Drake0582df92000-07-12 04:49:00 +0000935 return NULL;
Fred Drake7b6caff2003-07-21 17:05:56 +0000936 }
Fred Drake0582df92000-07-12 04:49:00 +0000937 }
938 rv = XML_ParseBuffer(self->itself, bytes_read, bytes_read == 0);
Fred Drake7b6caff2003-07-21 17:05:56 +0000939 if (PyErr_Occurred()) {
940 Py_XDECREF(readmethod);
Fred Drake0582df92000-07-12 04:49:00 +0000941 return NULL;
Fred Drake7b6caff2003-07-21 17:05:56 +0000942 }
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000943
Fred Drake0582df92000-07-12 04:49:00 +0000944 if (!rv || bytes_read == 0)
945 break;
946 }
Fred Drake7b6caff2003-07-21 17:05:56 +0000947 Py_XDECREF(readmethod);
Fred Drake71b63ff2002-06-28 22:29:01 +0000948 return get_parse_result(self, rv);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000949}
950
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000951PyDoc_STRVAR(xmlparse_SetBase__doc__,
Thomas Wouters35317302000-07-22 16:34:15 +0000952"SetBase(base_url)\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000953Set the base URL for the parser.");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000954
955static PyObject *
Fred Drake0582df92000-07-12 04:49:00 +0000956xmlparse_SetBase(xmlparseobject *self, PyObject *args)
957{
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000958 char *base;
959
Fred Drake0582df92000-07-12 04:49:00 +0000960 if (!PyArg_ParseTuple(args, "s:SetBase", &base))
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000961 return NULL;
Fred Drake0582df92000-07-12 04:49:00 +0000962 if (!XML_SetBase(self->itself, base)) {
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000963 return PyErr_NoMemory();
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000964 }
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000965 Py_INCREF(Py_None);
966 return Py_None;
967}
968
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000969PyDoc_STRVAR(xmlparse_GetBase__doc__,
Thomas Wouters35317302000-07-22 16:34:15 +0000970"GetBase() -> url\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000971Return base URL string for the parser.");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000972
973static PyObject *
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000974xmlparse_GetBase(xmlparseobject *self, PyObject *unused)
Fred Drake0582df92000-07-12 04:49:00 +0000975{
Fred Drake0582df92000-07-12 04:49:00 +0000976 return Py_BuildValue("z", XML_GetBase(self->itself));
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +0000977}
978
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000979PyDoc_STRVAR(xmlparse_GetInputContext__doc__,
Fred Drakebd6101c2001-02-14 18:29:45 +0000980"GetInputContext() -> string\n\
981Return the untranslated text of the input that caused the current event.\n\
982If the event was generated by a large amount of text (such as a start tag\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +0000983for an element with many attributes), not all of the text may be available.");
Fred Drakebd6101c2001-02-14 18:29:45 +0000984
985static PyObject *
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000986xmlparse_GetInputContext(xmlparseobject *self, PyObject *unused)
Fred Drakebd6101c2001-02-14 18:29:45 +0000987{
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000988 if (self->in_callback) {
989 int offset, size;
990 const char *buffer
991 = XML_GetInputContext(self->itself, &offset, &size);
Fred Drakebd6101c2001-02-14 18:29:45 +0000992
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000993 if (buffer != NULL)
Christian Heimes72b710a2008-05-26 13:28:38 +0000994 return PyBytes_FromStringAndSize(buffer + offset,
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000995 size - offset);
996 else
997 Py_RETURN_NONE;
Fred Drakebd6101c2001-02-14 18:29:45 +0000998 }
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000999 else
1000 Py_RETURN_NONE;
Fred Drakebd6101c2001-02-14 18:29:45 +00001001}
Fred Drakebd6101c2001-02-14 18:29:45 +00001002
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001003PyDoc_STRVAR(xmlparse_ExternalEntityParserCreate__doc__,
Fred Drake2d4ac202001-01-03 15:36:25 +00001004"ExternalEntityParserCreate(context[, encoding])\n\
Tim Peters51dc9682000-09-24 22:12:45 +00001005Create a parser for parsing an external entity based on the\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001006information passed to the ExternalEntityRefHandler.");
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001007
1008static PyObject *
1009xmlparse_ExternalEntityParserCreate(xmlparseobject *self, PyObject *args)
1010{
1011 char *context;
1012 char *encoding = NULL;
1013 xmlparseobject *new_parser;
1014 int i;
1015
Martin v. Löwisc57428d2001-09-19 09:55:09 +00001016 if (!PyArg_ParseTuple(args, "z|s:ExternalEntityParserCreate",
Fred Drakecde79132001-04-25 16:01:30 +00001017 &context, &encoding)) {
1018 return NULL;
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001019 }
1020
Martin v. Löwis894258c2001-09-23 10:20:10 +00001021#ifndef Py_TPFLAGS_HAVE_GC
Martin v. Löwisb4fcf4d2002-06-30 06:40:55 +00001022 /* Python versions 2.0 and 2.1 */
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001023 new_parser = PyObject_New(xmlparseobject, &Xmlparsetype);
Martin v. Löwis894258c2001-09-23 10:20:10 +00001024#else
1025 /* Python versions 2.2 and later */
1026 new_parser = PyObject_GC_New(xmlparseobject, &Xmlparsetype);
1027#endif
Fred Drake85d835f2001-02-08 15:39:08 +00001028
1029 if (new_parser == NULL)
1030 return NULL;
Fred Drake2a3d7db2002-06-28 22:56:48 +00001031 new_parser->buffer_size = self->buffer_size;
1032 new_parser->buffer_used = 0;
1033 if (self->buffer != NULL) {
1034 new_parser->buffer = malloc(new_parser->buffer_size);
1035 if (new_parser->buffer == NULL) {
Fred Drakeb28467b2002-07-02 15:44:36 +00001036#ifndef Py_TPFLAGS_HAVE_GC
1037 /* Code for versions 2.0 and 2.1 */
1038 PyObject_Del(new_parser);
1039#else
1040 /* Code for versions 2.2 and later. */
Fred Drake2a3d7db2002-06-28 22:56:48 +00001041 PyObject_GC_Del(new_parser);
Fred Drakeb28467b2002-07-02 15:44:36 +00001042#endif
Fred Drake2a3d7db2002-06-28 22:56:48 +00001043 return PyErr_NoMemory();
1044 }
1045 }
1046 else
1047 new_parser->buffer = NULL;
Fred Drake85d835f2001-02-08 15:39:08 +00001048 new_parser->ordered_attributes = self->ordered_attributes;
1049 new_parser->specified_attributes = self->specified_attributes;
Fred Drakebd6101c2001-02-14 18:29:45 +00001050 new_parser->in_callback = 0;
Martin v. Löwis069dde22003-01-21 10:58:18 +00001051 new_parser->ns_prefixes = self->ns_prefixes;
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001052 new_parser->itself = XML_ExternalEntityParserCreate(self->itself, context,
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001053 encoding);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001054 new_parser->handlers = 0;
Fred Drakeb91a36b2002-06-27 19:40:48 +00001055 new_parser->intern = self->intern;
1056 Py_XINCREF(new_parser->intern);
Martin v. Löwis894258c2001-09-23 10:20:10 +00001057#ifdef Py_TPFLAGS_HAVE_GC
1058 PyObject_GC_Track(new_parser);
1059#else
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001060 PyObject_GC_Init(new_parser);
Martin v. Löwis894258c2001-09-23 10:20:10 +00001061#endif
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001062
1063 if (!new_parser->itself) {
Fred Drake85d835f2001-02-08 15:39:08 +00001064 Py_DECREF(new_parser);
1065 return PyErr_NoMemory();
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001066 }
1067
1068 XML_SetUserData(new_parser->itself, (void *)new_parser);
1069
1070 /* allocate and clear handlers first */
Fred Drake2a3d7db2002-06-28 22:56:48 +00001071 for (i = 0; handler_info[i].name != NULL; i++)
Fred Drake85d835f2001-02-08 15:39:08 +00001072 /* do nothing */;
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001073
Fred Drake2a3d7db2002-06-28 22:56:48 +00001074 new_parser->handlers = malloc(sizeof(PyObject *) * i);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001075 if (!new_parser->handlers) {
Fred Drake85d835f2001-02-08 15:39:08 +00001076 Py_DECREF(new_parser);
1077 return PyErr_NoMemory();
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001078 }
Martin v. Löwis5b68ce32001-10-21 08:53:52 +00001079 clear_handlers(new_parser, 1);
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001080
1081 /* then copy handlers from self */
1082 for (i = 0; handler_info[i].name != NULL; i++) {
Fred Drake71b63ff2002-06-28 22:29:01 +00001083 PyObject *handler = self->handlers[i];
1084 if (handler != NULL) {
1085 Py_INCREF(handler);
1086 new_parser->handlers[i] = handler;
1087 handler_info[i].setter(new_parser->itself,
Fred Drake85d835f2001-02-08 15:39:08 +00001088 handler_info[i].handler);
1089 }
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001090 }
Fred Drake71b63ff2002-06-28 22:29:01 +00001091 return (PyObject *)new_parser;
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001092}
1093
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001094PyDoc_STRVAR(xmlparse_SetParamEntityParsing__doc__,
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001095"SetParamEntityParsing(flag) -> success\n\
1096Controls parsing of parameter entities (including the external DTD\n\
1097subset). Possible flag values are XML_PARAM_ENTITY_PARSING_NEVER,\n\
1098XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE and\n\
1099XML_PARAM_ENTITY_PARSING_ALWAYS. Returns true if setting the flag\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001100was successful.");
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001101
1102static PyObject*
Fred Drakebd6101c2001-02-14 18:29:45 +00001103xmlparse_SetParamEntityParsing(xmlparseobject *p, PyObject* args)
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001104{
Fred Drake85d835f2001-02-08 15:39:08 +00001105 int flag;
1106 if (!PyArg_ParseTuple(args, "i", &flag))
1107 return NULL;
Fred Drakebd6101c2001-02-14 18:29:45 +00001108 flag = XML_SetParamEntityParsing(p->itself, flag);
Christian Heimes217cfd12007-12-02 14:31:20 +00001109 return PyLong_FromLong(flag);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001110}
1111
Martin v. Löwisc847f402003-01-21 11:09:21 +00001112
1113#if XML_COMBINED_VERSION >= 19505
Martin v. Löwis069dde22003-01-21 10:58:18 +00001114PyDoc_STRVAR(xmlparse_UseForeignDTD__doc__,
1115"UseForeignDTD([flag])\n\
1116Allows the application to provide an artificial external subset if one is\n\
1117not specified as part of the document instance. This readily allows the\n\
1118use of a 'default' document type controlled by the application, while still\n\
1119getting the advantage of providing document type information to the parser.\n\
1120'flag' defaults to True if not provided.");
1121
1122static PyObject *
1123xmlparse_UseForeignDTD(xmlparseobject *self, PyObject *args)
1124{
1125 PyObject *flagobj = NULL;
1126 XML_Bool flag = XML_TRUE;
1127 enum XML_Error rc;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001128 if (!PyArg_UnpackTuple(args, "UseForeignDTD", 0, 1, &flagobj))
Martin v. Löwis069dde22003-01-21 10:58:18 +00001129 return NULL;
1130 if (flagobj != NULL)
1131 flag = PyObject_IsTrue(flagobj) ? XML_TRUE : XML_FALSE;
1132 rc = XML_UseForeignDTD(self->itself, flag);
1133 if (rc != XML_ERROR_NONE) {
1134 return set_error(self, rc);
1135 }
1136 Py_INCREF(Py_None);
1137 return Py_None;
1138}
Martin v. Löwisc847f402003-01-21 11:09:21 +00001139#endif
Martin v. Löwis069dde22003-01-21 10:58:18 +00001140
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001141static PyObject *xmlparse_dir(PyObject *self, PyObject* noargs);
1142
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001143static struct PyMethodDef xmlparse_methods[] = {
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001144 {"Parse", (PyCFunction)xmlparse_Parse,
1145 METH_VARARGS, xmlparse_Parse__doc__},
Fred Drake0582df92000-07-12 04:49:00 +00001146 {"ParseFile", (PyCFunction)xmlparse_ParseFile,
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001147 METH_O, xmlparse_ParseFile__doc__},
Fred Drake0582df92000-07-12 04:49:00 +00001148 {"SetBase", (PyCFunction)xmlparse_SetBase,
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001149 METH_VARARGS, xmlparse_SetBase__doc__},
Fred Drake0582df92000-07-12 04:49:00 +00001150 {"GetBase", (PyCFunction)xmlparse_GetBase,
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001151 METH_NOARGS, xmlparse_GetBase__doc__},
Lars Gustäbel4a30a072000-09-24 20:50:52 +00001152 {"ExternalEntityParserCreate", (PyCFunction)xmlparse_ExternalEntityParserCreate,
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001153 METH_VARARGS, xmlparse_ExternalEntityParserCreate__doc__},
Fred Drakebd6101c2001-02-14 18:29:45 +00001154 {"SetParamEntityParsing", (PyCFunction)xmlparse_SetParamEntityParsing,
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001155 METH_VARARGS, xmlparse_SetParamEntityParsing__doc__},
Fred Drakebd6101c2001-02-14 18:29:45 +00001156 {"GetInputContext", (PyCFunction)xmlparse_GetInputContext,
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001157 METH_NOARGS, xmlparse_GetInputContext__doc__},
Martin v. Löwisc847f402003-01-21 11:09:21 +00001158#if XML_COMBINED_VERSION >= 19505
Martin v. Löwis069dde22003-01-21 10:58:18 +00001159 {"UseForeignDTD", (PyCFunction)xmlparse_UseForeignDTD,
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001160 METH_VARARGS, xmlparse_UseForeignDTD__doc__},
Martin v. Löwisc847f402003-01-21 11:09:21 +00001161#endif
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001162 {"__dir__", xmlparse_dir, METH_NOARGS},
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001163 {NULL, NULL} /* sentinel */
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001164};
1165
1166/* ---------- */
1167
1168
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001169
Fred Drake71b63ff2002-06-28 22:29:01 +00001170/* pyexpat international encoding support.
1171 Make it as simple as possible.
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001172*/
1173
Martin v. Löwis3af7cc02001-01-22 08:19:10 +00001174static char template_buffer[257];
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001175
Fred Drake71b63ff2002-06-28 22:29:01 +00001176static void
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001177init_template_buffer(void)
1178{
1179 int i;
Fred Drakebb66a202001-03-01 20:48:17 +00001180 for (i = 0; i < 256; i++) {
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001181 template_buffer[i] = i;
Tim Peters63cb99e2001-02-17 18:12:50 +00001182 }
Fred Drakebb66a202001-03-01 20:48:17 +00001183 template_buffer[256] = 0;
Tim Peters63cb99e2001-02-17 18:12:50 +00001184}
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001185
Fred Drake71b63ff2002-06-28 22:29:01 +00001186static int
1187PyUnknownEncodingHandler(void *encodingHandlerData,
1188 const XML_Char *name,
1189 XML_Encoding *info)
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001190{
Fred Drakebb66a202001-03-01 20:48:17 +00001191 PyUnicodeObject *_u_string = NULL;
1192 int result = 0;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001193 int i;
Fred Drake71b63ff2002-06-28 22:29:01 +00001194
Fred Drakebb66a202001-03-01 20:48:17 +00001195 /* Yes, supports only 8bit encodings */
1196 _u_string = (PyUnicodeObject *)
1197 PyUnicode_Decode(template_buffer, 256, name, "replace");
Fred Drake71b63ff2002-06-28 22:29:01 +00001198
Fred Drakebb66a202001-03-01 20:48:17 +00001199 if (_u_string == NULL)
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001200 return result;
Fred Drake71b63ff2002-06-28 22:29:01 +00001201
Fred Drakebb66a202001-03-01 20:48:17 +00001202 for (i = 0; i < 256; i++) {
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001203 /* Stupid to access directly, but fast */
1204 Py_UNICODE c = _u_string->str[i];
1205 if (c == Py_UNICODE_REPLACEMENT_CHARACTER)
1206 info->map[i] = -1;
1207 else
1208 info->map[i] = c;
Tim Peters63cb99e2001-02-17 18:12:50 +00001209 }
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001210 info->data = NULL;
1211 info->convert = NULL;
1212 info->release = NULL;
Fred Drake71b63ff2002-06-28 22:29:01 +00001213 result = 1;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001214 Py_DECREF(_u_string);
1215 return result;
1216}
1217
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001218
1219static PyObject *
Fred Drakeb91a36b2002-06-27 19:40:48 +00001220newxmlparseobject(char *encoding, char *namespace_separator, PyObject *intern)
Fred Drake0582df92000-07-12 04:49:00 +00001221{
1222 int i;
1223 xmlparseobject *self;
Fred Drake71b63ff2002-06-28 22:29:01 +00001224
Martin v. Löwis894258c2001-09-23 10:20:10 +00001225#ifdef Py_TPFLAGS_HAVE_GC
1226 /* Code for versions 2.2 and later */
1227 self = PyObject_GC_New(xmlparseobject, &Xmlparsetype);
1228#else
Fred Drake0582df92000-07-12 04:49:00 +00001229 self = PyObject_New(xmlparseobject, &Xmlparsetype);
Martin v. Löwis894258c2001-09-23 10:20:10 +00001230#endif
Fred Drake0582df92000-07-12 04:49:00 +00001231 if (self == NULL)
1232 return NULL;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001233
Fred Drake2a3d7db2002-06-28 22:56:48 +00001234 self->buffer = NULL;
1235 self->buffer_size = CHARACTER_DATA_BUFFER_SIZE;
1236 self->buffer_used = 0;
Fred Drake85d835f2001-02-08 15:39:08 +00001237 self->ordered_attributes = 0;
1238 self->specified_attributes = 0;
Fred Drakebd6101c2001-02-14 18:29:45 +00001239 self->in_callback = 0;
Martin v. Löwis069dde22003-01-21 10:58:18 +00001240 self->ns_prefixes = 0;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001241 self->handlers = NULL;
Fred Drakecde79132001-04-25 16:01:30 +00001242 if (namespace_separator != NULL) {
Fred Drake0582df92000-07-12 04:49:00 +00001243 self->itself = XML_ParserCreateNS(encoding, *namespace_separator);
1244 }
Fred Drake85d835f2001-02-08 15:39:08 +00001245 else {
Fred Drake0582df92000-07-12 04:49:00 +00001246 self->itself = XML_ParserCreate(encoding);
1247 }
Fred Drakeb91a36b2002-06-27 19:40:48 +00001248 self->intern = intern;
1249 Py_XINCREF(self->intern);
Martin v. Löwis894258c2001-09-23 10:20:10 +00001250#ifdef Py_TPFLAGS_HAVE_GC
1251 PyObject_GC_Track(self);
1252#else
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001253 PyObject_GC_Init(self);
Martin v. Löwis894258c2001-09-23 10:20:10 +00001254#endif
Fred Drake0582df92000-07-12 04:49:00 +00001255 if (self->itself == NULL) {
Fred Drake71b63ff2002-06-28 22:29:01 +00001256 PyErr_SetString(PyExc_RuntimeError,
Fred Drake0582df92000-07-12 04:49:00 +00001257 "XML_ParserCreate failed");
1258 Py_DECREF(self);
1259 return NULL;
1260 }
1261 XML_SetUserData(self->itself, (void *)self);
Fred Drake7c75bf22002-07-01 14:02:31 +00001262 XML_SetUnknownEncodingHandler(self->itself,
1263 (XML_UnknownEncodingHandler) PyUnknownEncodingHandler, NULL);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001264
Fred Drake2a3d7db2002-06-28 22:56:48 +00001265 for (i = 0; handler_info[i].name != NULL; i++)
Fred Drake0582df92000-07-12 04:49:00 +00001266 /* do nothing */;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001267
Fred Drake7c75bf22002-07-01 14:02:31 +00001268 self->handlers = malloc(sizeof(PyObject *) * i);
1269 if (!self->handlers) {
Fred Drake71b63ff2002-06-28 22:29:01 +00001270 Py_DECREF(self);
1271 return PyErr_NoMemory();
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001272 }
Martin v. Löwis5b68ce32001-10-21 08:53:52 +00001273 clear_handlers(self, 1);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001274
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001275 return (PyObject*)self;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001276}
1277
1278
1279static void
Fred Drake0582df92000-07-12 04:49:00 +00001280xmlparse_dealloc(xmlparseobject *self)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001281{
Fred Drake0582df92000-07-12 04:49:00 +00001282 int i;
Martin v. Löwis894258c2001-09-23 10:20:10 +00001283#ifdef Py_TPFLAGS_HAVE_GC
1284 PyObject_GC_UnTrack(self);
1285#else
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001286 PyObject_GC_Fini(self);
Martin v. Löwis894258c2001-09-23 10:20:10 +00001287#endif
Fred Drake85d835f2001-02-08 15:39:08 +00001288 if (self->itself != NULL)
Fred Drake0582df92000-07-12 04:49:00 +00001289 XML_ParserFree(self->itself);
1290 self->itself = NULL;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001291
Fred Drake85d835f2001-02-08 15:39:08 +00001292 if (self->handlers != NULL) {
Fred Drakecde79132001-04-25 16:01:30 +00001293 PyObject *temp;
Fred Drake85d835f2001-02-08 15:39:08 +00001294 for (i = 0; handler_info[i].name != NULL; i++) {
Fred Drakecde79132001-04-25 16:01:30 +00001295 temp = self->handlers[i];
1296 self->handlers[i] = NULL;
1297 Py_XDECREF(temp);
Fred Drake85d835f2001-02-08 15:39:08 +00001298 }
1299 free(self->handlers);
Fred Drake71b63ff2002-06-28 22:29:01 +00001300 self->handlers = NULL;
Fred Drake0582df92000-07-12 04:49:00 +00001301 }
Fred Drake2a3d7db2002-06-28 22:56:48 +00001302 if (self->buffer != NULL) {
1303 free(self->buffer);
1304 self->buffer = NULL;
1305 }
Fred Drakeb91a36b2002-06-27 19:40:48 +00001306 Py_XDECREF(self->intern);
Martin v. Löwis894258c2001-09-23 10:20:10 +00001307#ifndef Py_TPFLAGS_HAVE_GC
Martin v. Löwisb4fcf4d2002-06-30 06:40:55 +00001308 /* Code for versions 2.0 and 2.1 */
Fred Drake0582df92000-07-12 04:49:00 +00001309 PyObject_Del(self);
Martin v. Löwis894258c2001-09-23 10:20:10 +00001310#else
1311 /* Code for versions 2.2 and later. */
1312 PyObject_GC_Del(self);
1313#endif
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001314}
1315
Fred Drake0582df92000-07-12 04:49:00 +00001316static int
1317handlername2int(const char *name)
1318{
1319 int i;
Fred Drake71b63ff2002-06-28 22:29:01 +00001320 for (i = 0; handler_info[i].name != NULL; i++) {
Fred Drake0582df92000-07-12 04:49:00 +00001321 if (strcmp(name, handler_info[i].name) == 0) {
1322 return i;
1323 }
1324 }
1325 return -1;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001326}
1327
1328static PyObject *
Fred Drake71b63ff2002-06-28 22:29:01 +00001329get_pybool(int istrue)
1330{
1331 PyObject *result = istrue ? Py_True : Py_False;
1332 Py_INCREF(result);
1333 return result;
1334}
1335
1336static PyObject *
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001337xmlparse_getattro(xmlparseobject *self, PyObject *nameobj)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001338{
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001339 char *name = "";
1340 int handlernum = -1;
1341
1342 if (PyUnicode_Check(nameobj))
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001343 name = _PyUnicode_AsString(nameobj);
1344
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001345 handlernum = handlername2int(name);
Fred Drake71b63ff2002-06-28 22:29:01 +00001346
1347 if (handlernum != -1) {
1348 PyObject *result = self->handlers[handlernum];
1349 if (result == NULL)
1350 result = Py_None;
1351 Py_INCREF(result);
1352 return result;
1353 }
1354 if (name[0] == 'E') {
1355 if (strcmp(name, "ErrorCode") == 0)
Christian Heimes217cfd12007-12-02 14:31:20 +00001356 return PyLong_FromLong((long)
Fred Drake71b63ff2002-06-28 22:29:01 +00001357 XML_GetErrorCode(self->itself));
1358 if (strcmp(name, "ErrorLineNumber") == 0)
Christian Heimes217cfd12007-12-02 14:31:20 +00001359 return PyLong_FromLong((long)
Fred Drake71b63ff2002-06-28 22:29:01 +00001360 XML_GetErrorLineNumber(self->itself));
1361 if (strcmp(name, "ErrorColumnNumber") == 0)
Christian Heimes217cfd12007-12-02 14:31:20 +00001362 return PyLong_FromLong((long)
Fred Drake71b63ff2002-06-28 22:29:01 +00001363 XML_GetErrorColumnNumber(self->itself));
1364 if (strcmp(name, "ErrorByteIndex") == 0)
Christian Heimes217cfd12007-12-02 14:31:20 +00001365 return PyLong_FromLong((long)
Fred Drake71b63ff2002-06-28 22:29:01 +00001366 XML_GetErrorByteIndex(self->itself));
1367 }
Dave Cole3203efb2004-08-26 00:37:31 +00001368 if (name[0] == 'C') {
1369 if (strcmp(name, "CurrentLineNumber") == 0)
Christian Heimes217cfd12007-12-02 14:31:20 +00001370 return PyLong_FromLong((long)
Dave Cole3203efb2004-08-26 00:37:31 +00001371 XML_GetCurrentLineNumber(self->itself));
1372 if (strcmp(name, "CurrentColumnNumber") == 0)
Christian Heimes217cfd12007-12-02 14:31:20 +00001373 return PyLong_FromLong((long)
Dave Cole3203efb2004-08-26 00:37:31 +00001374 XML_GetCurrentColumnNumber(self->itself));
1375 if (strcmp(name, "CurrentByteIndex") == 0)
Christian Heimes217cfd12007-12-02 14:31:20 +00001376 return PyLong_FromLong((long)
Dave Cole3203efb2004-08-26 00:37:31 +00001377 XML_GetCurrentByteIndex(self->itself));
1378 }
Fred Drake2a3d7db2002-06-28 22:56:48 +00001379 if (name[0] == 'b') {
1380 if (strcmp(name, "buffer_size") == 0)
Christian Heimes217cfd12007-12-02 14:31:20 +00001381 return PyLong_FromLong((long) self->buffer_size);
Fred Drake2a3d7db2002-06-28 22:56:48 +00001382 if (strcmp(name, "buffer_text") == 0)
1383 return get_pybool(self->buffer != NULL);
1384 if (strcmp(name, "buffer_used") == 0)
Christian Heimes217cfd12007-12-02 14:31:20 +00001385 return PyLong_FromLong((long) self->buffer_used);
Fred Drake2a3d7db2002-06-28 22:56:48 +00001386 }
Martin v. Löwis069dde22003-01-21 10:58:18 +00001387 if (strcmp(name, "namespace_prefixes") == 0)
1388 return get_pybool(self->ns_prefixes);
Fred Drake85d835f2001-02-08 15:39:08 +00001389 if (strcmp(name, "ordered_attributes") == 0)
Fred Drake71b63ff2002-06-28 22:29:01 +00001390 return get_pybool(self->ordered_attributes);
Fred Drake85d835f2001-02-08 15:39:08 +00001391 if (strcmp(name, "specified_attributes") == 0)
Fred Drake71b63ff2002-06-28 22:29:01 +00001392 return get_pybool((long) self->specified_attributes);
Fred Drakeb91a36b2002-06-27 19:40:48 +00001393 if (strcmp(name, "intern") == 0) {
1394 if (self->intern == NULL) {
1395 Py_INCREF(Py_None);
1396 return Py_None;
1397 }
1398 else {
1399 Py_INCREF(self->intern);
1400 return self->intern;
1401 }
1402 }
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001403
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001404 return PyObject_GenericGetAttr((PyObject*)self, nameobj);
Neal Norwitz8dfc4a92007-08-11 06:39:53 +00001405}
1406
1407static PyObject *
1408xmlparse_dir(PyObject *self, PyObject* noargs)
1409{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001410#define APPEND(list, str) \
1411 do { \
1412 PyObject *o = PyUnicode_FromString(str); \
1413 if (o != NULL) \
1414 PyList_Append(list, o); \
1415 Py_XDECREF(o); \
Martin v. Löwis069dde22003-01-21 10:58:18 +00001416 } while (0)
Neal Norwitzfa56e2d2003-01-19 15:40:09 +00001417
Neal Norwitz8dfc4a92007-08-11 06:39:53 +00001418 int i;
1419 PyObject *rc = PyList_New(0);
1420 if (!rc)
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001421 return NULL;
Neal Norwitz8dfc4a92007-08-11 06:39:53 +00001422 for (i = 0; handler_info[i].name != NULL; i++) {
1423 PyObject *o = get_handler_name(&handler_info[i]);
1424 if (o != NULL)
1425 PyList_Append(rc, o);
1426 Py_XDECREF(o);
1427 }
1428 APPEND(rc, "ErrorCode");
1429 APPEND(rc, "ErrorLineNumber");
1430 APPEND(rc, "ErrorColumnNumber");
1431 APPEND(rc, "ErrorByteIndex");
1432 APPEND(rc, "CurrentLineNumber");
1433 APPEND(rc, "CurrentColumnNumber");
1434 APPEND(rc, "CurrentByteIndex");
1435 APPEND(rc, "buffer_size");
1436 APPEND(rc, "buffer_text");
1437 APPEND(rc, "buffer_used");
1438 APPEND(rc, "namespace_prefixes");
1439 APPEND(rc, "ordered_attributes");
1440 APPEND(rc, "specified_attributes");
1441 APPEND(rc, "intern");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001442
Neal Norwitzfa56e2d2003-01-19 15:40:09 +00001443#undef APPEND
Neal Norwitz8dfc4a92007-08-11 06:39:53 +00001444
1445 if (PyErr_Occurred()) {
1446 Py_DECREF(rc);
1447 rc = NULL;
Fred Drake0582df92000-07-12 04:49:00 +00001448 }
Neal Norwitz8dfc4a92007-08-11 06:39:53 +00001449
1450 return rc;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001451}
1452
Fred Drake6f987622000-08-25 18:03:30 +00001453static int
1454sethandler(xmlparseobject *self, const char *name, PyObject* v)
Fred Drake0582df92000-07-12 04:49:00 +00001455{
1456 int handlernum = handlername2int(name);
Fred Drake71b63ff2002-06-28 22:29:01 +00001457 if (handlernum >= 0) {
1458 xmlhandler c_handler = NULL;
1459 PyObject *temp = self->handlers[handlernum];
1460
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001461 if (v == Py_None) {
1462 /* If this is the character data handler, and a character
1463 data handler is already active, we need to be more
1464 careful. What we can safely do is replace the existing
1465 character data handler callback function with a no-op
1466 function that will refuse to call Python. The downside
1467 is that this doesn't completely remove the character
1468 data handler from the C layer if there's any callback
1469 active, so Expat does a little more work than it
1470 otherwise would, but that's really an odd case. A more
1471 elaborate system of handlers and state could remove the
1472 C handler more effectively. */
1473 if (handlernum == CharacterData && self->in_callback)
1474 c_handler = noop_character_data_handler;
Fred Drake71b63ff2002-06-28 22:29:01 +00001475 v = NULL;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001476 }
Fred Drake71b63ff2002-06-28 22:29:01 +00001477 else if (v != NULL) {
1478 Py_INCREF(v);
1479 c_handler = handler_info[handlernum].handler;
1480 }
Fred Drake0582df92000-07-12 04:49:00 +00001481 self->handlers[handlernum] = v;
Fred Drake71b63ff2002-06-28 22:29:01 +00001482 Py_XDECREF(temp);
1483 handler_info[handlernum].setter(self->itself, c_handler);
Fred Drake0582df92000-07-12 04:49:00 +00001484 return 1;
1485 }
1486 return 0;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001487}
1488
1489static int
Fred Drake6f987622000-08-25 18:03:30 +00001490xmlparse_setattr(xmlparseobject *self, char *name, PyObject *v)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001491{
Fred Drake6f987622000-08-25 18:03:30 +00001492 /* Set attribute 'name' to value 'v'. v==NULL means delete */
Fred Drake85d835f2001-02-08 15:39:08 +00001493 if (v == NULL) {
Fred Drake6f987622000-08-25 18:03:30 +00001494 PyErr_SetString(PyExc_RuntimeError, "Cannot delete attribute");
1495 return -1;
1496 }
Fred Drake2a3d7db2002-06-28 22:56:48 +00001497 if (strcmp(name, "buffer_text") == 0) {
1498 if (PyObject_IsTrue(v)) {
1499 if (self->buffer == NULL) {
1500 self->buffer = malloc(self->buffer_size);
1501 if (self->buffer == NULL) {
1502 PyErr_NoMemory();
1503 return -1;
1504 }
1505 self->buffer_used = 0;
1506 }
1507 }
1508 else if (self->buffer != NULL) {
1509 if (flush_character_buffer(self) < 0)
1510 return -1;
1511 free(self->buffer);
1512 self->buffer = NULL;
1513 }
1514 return 0;
1515 }
Martin v. Löwis069dde22003-01-21 10:58:18 +00001516 if (strcmp(name, "namespace_prefixes") == 0) {
1517 if (PyObject_IsTrue(v))
1518 self->ns_prefixes = 1;
1519 else
1520 self->ns_prefixes = 0;
1521 XML_SetReturnNSTriplet(self->itself, self->ns_prefixes);
1522 return 0;
1523 }
Fred Drake85d835f2001-02-08 15:39:08 +00001524 if (strcmp(name, "ordered_attributes") == 0) {
1525 if (PyObject_IsTrue(v))
1526 self->ordered_attributes = 1;
1527 else
1528 self->ordered_attributes = 0;
1529 return 0;
1530 }
Fred Drake85d835f2001-02-08 15:39:08 +00001531 if (strcmp(name, "specified_attributes") == 0) {
1532 if (PyObject_IsTrue(v))
1533 self->specified_attributes = 1;
1534 else
1535 self->specified_attributes = 0;
Fred Drake6f987622000-08-25 18:03:30 +00001536 return 0;
1537 }
Christian Heimes2380ac72008-01-09 00:17:24 +00001538
1539 if (strcmp(name, "buffer_size") == 0) {
1540 long new_buffer_size;
1541 if (!PyLong_Check(v)) {
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001542 PyErr_SetString(PyExc_TypeError, "buffer_size must be an integer");
1543 return -1;
Christian Heimes2380ac72008-01-09 00:17:24 +00001544 }
1545
1546 new_buffer_size=PyLong_AS_LONG(v);
1547 /* trivial case -- no change */
1548 if (new_buffer_size == self->buffer_size) {
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001549 return 0;
Christian Heimes2380ac72008-01-09 00:17:24 +00001550 }
1551
1552 if (new_buffer_size <= 0) {
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001553 PyErr_SetString(PyExc_ValueError, "buffer_size must be greater than zero");
1554 return -1;
Christian Heimes2380ac72008-01-09 00:17:24 +00001555 }
1556
1557 /* check maximum */
1558 if (new_buffer_size > INT_MAX) {
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001559 char errmsg[100];
1560 sprintf(errmsg, "buffer_size must not be greater than %i", INT_MAX);
1561 PyErr_SetString(PyExc_ValueError, errmsg);
1562 return -1;
Christian Heimes2380ac72008-01-09 00:17:24 +00001563 }
1564
1565 if (self->buffer != NULL) {
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001566 /* there is already a buffer */
1567 if (self->buffer_used != 0) {
1568 flush_character_buffer(self);
1569 }
1570 /* free existing buffer */
1571 free(self->buffer);
Christian Heimes2380ac72008-01-09 00:17:24 +00001572 }
1573 self->buffer = malloc(new_buffer_size);
1574 if (self->buffer == NULL) {
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001575 PyErr_NoMemory();
1576 return -1;
1577 }
Christian Heimes2380ac72008-01-09 00:17:24 +00001578 self->buffer_size = new_buffer_size;
1579 return 0;
1580 }
1581
Fred Drake2a3d7db2002-06-28 22:56:48 +00001582 if (strcmp(name, "CharacterDataHandler") == 0) {
1583 /* If we're changing the character data handler, flush all
1584 * cached data with the old handler. Not sure there's a
1585 * "right" thing to do, though, but this probably won't
1586 * happen.
1587 */
1588 if (flush_character_buffer(self) < 0)
1589 return -1;
1590 }
Fred Drake6f987622000-08-25 18:03:30 +00001591 if (sethandler(self, name, v)) {
1592 return 0;
1593 }
1594 PyErr_SetString(PyExc_AttributeError, name);
1595 return -1;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001596}
1597
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001598static int
1599xmlparse_traverse(xmlparseobject *op, visitproc visit, void *arg)
1600{
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001601 int i;
1602 for (i = 0; handler_info[i].name != NULL; i++)
1603 Py_VISIT(op->handlers[i]);
Fred Drakecde79132001-04-25 16:01:30 +00001604 return 0;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001605}
1606
1607static int
1608xmlparse_clear(xmlparseobject *op)
1609{
Martin v. Löwis5b68ce32001-10-21 08:53:52 +00001610 clear_handlers(op, 0);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001611 Py_CLEAR(op->intern);
Fred Drakecde79132001-04-25 16:01:30 +00001612 return 0;
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001613}
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001614
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001615PyDoc_STRVAR(Xmlparsetype__doc__, "XML parser");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001616
1617static PyTypeObject Xmlparsetype = {
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001618 PyVarObject_HEAD_INIT(NULL, 0)
1619 "pyexpat.xmlparser", /*tp_name*/
1620 sizeof(xmlparseobject) + PyGC_HEAD_SIZE,/*tp_basicsize*/
1621 0, /*tp_itemsize*/
1622 /* methods */
1623 (destructor)xmlparse_dealloc, /*tp_dealloc*/
1624 (printfunc)0, /*tp_print*/
1625 0, /*tp_getattr*/
1626 (setattrfunc)xmlparse_setattr, /*tp_setattr*/
1627 0, /*tp_reserved*/
1628 (reprfunc)0, /*tp_repr*/
1629 0, /*tp_as_number*/
1630 0, /*tp_as_sequence*/
1631 0, /*tp_as_mapping*/
1632 (hashfunc)0, /*tp_hash*/
1633 (ternaryfunc)0, /*tp_call*/
1634 (reprfunc)0, /*tp_str*/
1635 (getattrofunc)xmlparse_getattro, /* tp_getattro */
1636 0, /* tp_setattro */
1637 0, /* tp_as_buffer */
Martin v. Löwis894258c2001-09-23 10:20:10 +00001638#ifdef Py_TPFLAGS_HAVE_GC
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001639 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /*tp_flags*/
Martin v. Löwis894258c2001-09-23 10:20:10 +00001640#else
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001641 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_GC, /*tp_flags*/
Martin v. Löwis894258c2001-09-23 10:20:10 +00001642#endif
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001643 Xmlparsetype__doc__, /* tp_doc - Documentation string */
1644 (traverseproc)xmlparse_traverse, /* tp_traverse */
1645 (inquiry)xmlparse_clear, /* tp_clear */
1646 0, /* tp_richcompare */
1647 0, /* tp_weaklistoffset */
1648 0, /* tp_iter */
1649 0, /* tp_iternext */
1650 xmlparse_methods, /* tp_methods */
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001651};
1652
1653/* End of code for xmlparser objects */
1654/* -------------------------------------------------------- */
1655
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001656PyDoc_STRVAR(pyexpat_ParserCreate__doc__,
Fred Drake0582df92000-07-12 04:49:00 +00001657"ParserCreate([encoding[, namespace_separator]]) -> parser\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001658Return a new XML parser object.");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001659
1660static PyObject *
Fred Drake0582df92000-07-12 04:49:00 +00001661pyexpat_ParserCreate(PyObject *notused, PyObject *args, PyObject *kw)
1662{
Fred Drakecde79132001-04-25 16:01:30 +00001663 char *encoding = NULL;
1664 char *namespace_separator = NULL;
Fred Drakeb91a36b2002-06-27 19:40:48 +00001665 PyObject *intern = NULL;
1666 PyObject *result;
1667 int intern_decref = 0;
Martin v. Löwis15e62742006-02-27 16:46:16 +00001668 static char *kwlist[] = {"encoding", "namespace_separator",
Jeremy Hyltonaf68c872005-12-10 18:50:16 +00001669 "intern", NULL};
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001670
Fred Drakeb91a36b2002-06-27 19:40:48 +00001671 if (!PyArg_ParseTupleAndKeywords(args, kw, "|zzO:ParserCreate", kwlist,
1672 &encoding, &namespace_separator, &intern))
Fred Drakecde79132001-04-25 16:01:30 +00001673 return NULL;
1674 if (namespace_separator != NULL
1675 && strlen(namespace_separator) > 1) {
1676 PyErr_SetString(PyExc_ValueError,
1677 "namespace_separator must be at most one"
1678 " character, omitted, or None");
1679 return NULL;
1680 }
Fred Drakeb91a36b2002-06-27 19:40:48 +00001681 /* Explicitly passing None means no interning is desired.
1682 Not passing anything means that a new dictionary is used. */
1683 if (intern == Py_None)
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001684 intern = NULL;
Fred Drakeb91a36b2002-06-27 19:40:48 +00001685 else if (intern == NULL) {
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001686 intern = PyDict_New();
1687 if (!intern)
1688 return NULL;
1689 intern_decref = 1;
Fred Drake71b63ff2002-06-28 22:29:01 +00001690 }
Fred Drakeb91a36b2002-06-27 19:40:48 +00001691 else if (!PyDict_Check(intern)) {
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001692 PyErr_SetString(PyExc_TypeError, "intern must be a dictionary");
1693 return NULL;
Fred Drakeb91a36b2002-06-27 19:40:48 +00001694 }
1695
1696 result = newxmlparseobject(encoding, namespace_separator, intern);
1697 if (intern_decref) {
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001698 Py_DECREF(intern);
Fred Drakeb91a36b2002-06-27 19:40:48 +00001699 }
1700 return result;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001701}
1702
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001703PyDoc_STRVAR(pyexpat_ErrorString__doc__,
Fred Drake0582df92000-07-12 04:49:00 +00001704"ErrorString(errno) -> string\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001705Returns string error for given number.");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001706
1707static PyObject *
Fred Drake0582df92000-07-12 04:49:00 +00001708pyexpat_ErrorString(PyObject *self, PyObject *args)
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001709{
Fred Drake0582df92000-07-12 04:49:00 +00001710 long code = 0;
1711
1712 if (!PyArg_ParseTuple(args, "l:ErrorString", &code))
1713 return NULL;
1714 return Py_BuildValue("z", XML_ErrorString((int)code));
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001715}
1716
1717/* List of methods defined in the module */
1718
1719static struct PyMethodDef pyexpat_methods[] = {
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001720 {"ParserCreate", (PyCFunction)pyexpat_ParserCreate,
Fred Drake0582df92000-07-12 04:49:00 +00001721 METH_VARARGS|METH_KEYWORDS, pyexpat_ParserCreate__doc__},
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001722 {"ErrorString", (PyCFunction)pyexpat_ErrorString,
1723 METH_VARARGS, pyexpat_ErrorString__doc__},
Fred Drake71b63ff2002-06-28 22:29:01 +00001724
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001725 {NULL, (PyCFunction)NULL, 0, NULL} /* sentinel */
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001726};
1727
Andrew M. Kuchlingbeba0562000-06-27 00:33:30 +00001728/* Module docstring */
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001729
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001730PyDoc_STRVAR(pyexpat_module_documentation,
1731"Python wrapper for Expat parser.");
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001732
Fred Drake4113b132001-03-24 19:58:26 +00001733/* Return a Python string that represents the version number without the
1734 * extra cruft added by revision control, even if the right options were
1735 * given to the "cvs export" command to make it not include the extra
1736 * cruft.
1737 */
1738static PyObject *
1739get_version_string(void)
1740{
1741 static char *rcsid = "$Revision$";
1742 char *rev = rcsid;
1743 int i = 0;
1744
Neal Norwitz30b5c5d2005-12-19 06:05:18 +00001745 while (!isdigit(Py_CHARMASK(*rev)))
Fred Drake4113b132001-03-24 19:58:26 +00001746 ++rev;
1747 while (rev[i] != ' ' && rev[i] != '\0')
1748 ++i;
1749
Neal Norwitz392c5be2007-08-25 17:20:32 +00001750 return PyUnicode_FromStringAndSize(rev, i);
Fred Drake4113b132001-03-24 19:58:26 +00001751}
1752
Fred Drakecde79132001-04-25 16:01:30 +00001753/* Initialization function for the module */
1754
1755#ifndef MODULE_NAME
1756#define MODULE_NAME "pyexpat"
1757#endif
1758
1759#ifndef MODULE_INITFUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00001760#define MODULE_INITFUNC PyInit_pyexpat
Fred Drakecde79132001-04-25 16:01:30 +00001761#endif
1762
Martin v. Löwis069dde22003-01-21 10:58:18 +00001763#ifndef PyMODINIT_FUNC
1764# ifdef MS_WINDOWS
1765# define PyMODINIT_FUNC __declspec(dllexport) void
1766# else
1767# define PyMODINIT_FUNC void
1768# endif
1769#endif
1770
Mark Hammond8235ea12002-07-19 06:55:41 +00001771PyMODINIT_FUNC MODULE_INITFUNC(void); /* avoid compiler warnings */
Fred Drakecde79132001-04-25 16:01:30 +00001772
Martin v. Löwis1a214512008-06-11 05:26:20 +00001773static struct PyModuleDef pyexpatmodule = {
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001774 PyModuleDef_HEAD_INIT,
1775 MODULE_NAME,
1776 pyexpat_module_documentation,
1777 -1,
1778 pyexpat_methods,
1779 NULL,
1780 NULL,
1781 NULL,
1782 NULL
Martin v. Löwis1a214512008-06-11 05:26:20 +00001783};
1784
Martin v. Löwis069dde22003-01-21 10:58:18 +00001785PyMODINIT_FUNC
1786MODULE_INITFUNC(void)
Fred Drake0582df92000-07-12 04:49:00 +00001787{
1788 PyObject *m, *d;
Neal Norwitz392c5be2007-08-25 17:20:32 +00001789 PyObject *errmod_name = PyUnicode_FromString(MODULE_NAME ".errors");
Fred Drake85d835f2001-02-08 15:39:08 +00001790 PyObject *errors_module;
1791 PyObject *modelmod_name;
1792 PyObject *model_module;
Fred Drake0582df92000-07-12 04:49:00 +00001793 PyObject *sys_modules;
Fredrik Lundhd7a42882005-12-13 20:43:04 +00001794 static struct PyExpat_CAPI capi;
1795 PyObject* capi_object;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001796
Fred Drake6f987622000-08-25 18:03:30 +00001797 if (errmod_name == NULL)
Martin v. Löwis1a214512008-06-11 05:26:20 +00001798 return NULL;
Neal Norwitz392c5be2007-08-25 17:20:32 +00001799 modelmod_name = PyUnicode_FromString(MODULE_NAME ".model");
Fred Drake85d835f2001-02-08 15:39:08 +00001800 if (modelmod_name == NULL)
Martin v. Löwis1a214512008-06-11 05:26:20 +00001801 return NULL;
Fred Drake6f987622000-08-25 18:03:30 +00001802
Amaury Forgeot d'Arcba4105c2008-07-02 21:41:01 +00001803 if (PyType_Ready(&Xmlparsetype) < 0)
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001804 return NULL;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001805
Fred Drake0582df92000-07-12 04:49:00 +00001806 /* Create the module and add the functions */
Martin v. Löwis1a214512008-06-11 05:26:20 +00001807 m = PyModule_Create(&pyexpatmodule);
Neal Norwitz1ac754f2006-01-19 06:09:39 +00001808 if (m == NULL)
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001809 return NULL;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001810
Fred Drake0582df92000-07-12 04:49:00 +00001811 /* Add some symbolic constants to the module */
Fred Drakebd6101c2001-02-14 18:29:45 +00001812 if (ErrorObject == NULL) {
1813 ErrorObject = PyErr_NewException("xml.parsers.expat.ExpatError",
Fred Drake93adb692000-09-23 04:55:48 +00001814 NULL, NULL);
Fred Drakebd6101c2001-02-14 18:29:45 +00001815 if (ErrorObject == NULL)
Martin v. Löwis1a214512008-06-11 05:26:20 +00001816 return NULL;
Fred Drakebd6101c2001-02-14 18:29:45 +00001817 }
1818 Py_INCREF(ErrorObject);
Fred Drake93adb692000-09-23 04:55:48 +00001819 PyModule_AddObject(m, "error", ErrorObject);
Fred Drakebd6101c2001-02-14 18:29:45 +00001820 Py_INCREF(ErrorObject);
1821 PyModule_AddObject(m, "ExpatError", ErrorObject);
Fred Drake4ba298c2000-10-29 04:57:53 +00001822 Py_INCREF(&Xmlparsetype);
1823 PyModule_AddObject(m, "XMLParserType", (PyObject *) &Xmlparsetype);
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001824
Fred Drake4113b132001-03-24 19:58:26 +00001825 PyModule_AddObject(m, "__version__", get_version_string());
Fred Drake738293d2000-12-21 17:25:07 +00001826 PyModule_AddStringConstant(m, "EXPAT_VERSION",
1827 (char *) XML_ExpatVersion());
Fred Drake85d835f2001-02-08 15:39:08 +00001828 {
1829 XML_Expat_Version info = XML_ExpatVersionInfo();
1830 PyModule_AddObject(m, "version_info",
1831 Py_BuildValue("(iii)", info.major,
1832 info.minor, info.micro));
1833 }
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001834 init_template_buffer();
Fred Drake0582df92000-07-12 04:49:00 +00001835 /* XXX When Expat supports some way of figuring out how it was
Fred Drake71b63ff2002-06-28 22:29:01 +00001836 compiled, this should check and set native_encoding
1837 appropriately.
Fred Drake0582df92000-07-12 04:49:00 +00001838 */
Fred Drake93adb692000-09-23 04:55:48 +00001839 PyModule_AddStringConstant(m, "native_encoding", "UTF-8");
Fred Drakec23b5232000-08-24 21:57:43 +00001840
Fred Drake85d835f2001-02-08 15:39:08 +00001841 sys_modules = PySys_GetObject("modules");
Fred Drake93adb692000-09-23 04:55:48 +00001842 d = PyModule_GetDict(m);
Fred Drake6f987622000-08-25 18:03:30 +00001843 errors_module = PyDict_GetItem(d, errmod_name);
1844 if (errors_module == NULL) {
Fred Drakecde79132001-04-25 16:01:30 +00001845 errors_module = PyModule_New(MODULE_NAME ".errors");
Fred Drake6f987622000-08-25 18:03:30 +00001846 if (errors_module != NULL) {
Fred Drake6f987622000-08-25 18:03:30 +00001847 PyDict_SetItem(sys_modules, errmod_name, errors_module);
Fred Drake93adb692000-09-23 04:55:48 +00001848 /* gives away the reference to errors_module */
1849 PyModule_AddObject(m, "errors", errors_module);
Fred Drakec23b5232000-08-24 21:57:43 +00001850 }
1851 }
Fred Drake6f987622000-08-25 18:03:30 +00001852 Py_DECREF(errmod_name);
Fred Drake85d835f2001-02-08 15:39:08 +00001853 model_module = PyDict_GetItem(d, modelmod_name);
1854 if (model_module == NULL) {
Fred Drakecde79132001-04-25 16:01:30 +00001855 model_module = PyModule_New(MODULE_NAME ".model");
Fred Drake85d835f2001-02-08 15:39:08 +00001856 if (model_module != NULL) {
1857 PyDict_SetItem(sys_modules, modelmod_name, model_module);
1858 /* gives away the reference to model_module */
1859 PyModule_AddObject(m, "model", model_module);
1860 }
1861 }
1862 Py_DECREF(modelmod_name);
1863 if (errors_module == NULL || model_module == NULL)
1864 /* Don't core dump later! */
Martin v. Löwis1a214512008-06-11 05:26:20 +00001865 return NULL;
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001866
Martin v. Löwisc847f402003-01-21 11:09:21 +00001867#if XML_COMBINED_VERSION > 19505
Martin v. Löwis069dde22003-01-21 10:58:18 +00001868 {
1869 const XML_Feature *features = XML_GetFeatureList();
1870 PyObject *list = PyList_New(0);
1871 if (list == NULL)
1872 /* just ignore it */
1873 PyErr_Clear();
1874 else {
1875 int i = 0;
1876 for (; features[i].feature != XML_FEATURE_END; ++i) {
1877 int ok;
1878 PyObject *item = Py_BuildValue("si", features[i].name,
1879 features[i].value);
1880 if (item == NULL) {
1881 Py_DECREF(list);
1882 list = NULL;
1883 break;
1884 }
1885 ok = PyList_Append(list, item);
1886 Py_DECREF(item);
1887 if (ok < 0) {
1888 PyErr_Clear();
1889 break;
1890 }
1891 }
1892 if (list != NULL)
1893 PyModule_AddObject(m, "features", list);
1894 }
1895 }
Martin v. Löwisc847f402003-01-21 11:09:21 +00001896#endif
Fred Drake6f987622000-08-25 18:03:30 +00001897
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001898#define MYCONST(name) \
Fred Drake93adb692000-09-23 04:55:48 +00001899 PyModule_AddStringConstant(errors_module, #name, \
1900 (char*)XML_ErrorString(name))
Fred Drake7bd9f412000-07-04 23:51:31 +00001901
Fred Drake0582df92000-07-12 04:49:00 +00001902 MYCONST(XML_ERROR_NO_MEMORY);
1903 MYCONST(XML_ERROR_SYNTAX);
1904 MYCONST(XML_ERROR_NO_ELEMENTS);
1905 MYCONST(XML_ERROR_INVALID_TOKEN);
1906 MYCONST(XML_ERROR_UNCLOSED_TOKEN);
1907 MYCONST(XML_ERROR_PARTIAL_CHAR);
1908 MYCONST(XML_ERROR_TAG_MISMATCH);
1909 MYCONST(XML_ERROR_DUPLICATE_ATTRIBUTE);
1910 MYCONST(XML_ERROR_JUNK_AFTER_DOC_ELEMENT);
1911 MYCONST(XML_ERROR_PARAM_ENTITY_REF);
1912 MYCONST(XML_ERROR_UNDEFINED_ENTITY);
1913 MYCONST(XML_ERROR_RECURSIVE_ENTITY_REF);
1914 MYCONST(XML_ERROR_ASYNC_ENTITY);
1915 MYCONST(XML_ERROR_BAD_CHAR_REF);
1916 MYCONST(XML_ERROR_BINARY_ENTITY_REF);
1917 MYCONST(XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF);
1918 MYCONST(XML_ERROR_MISPLACED_XML_PI);
1919 MYCONST(XML_ERROR_UNKNOWN_ENCODING);
1920 MYCONST(XML_ERROR_INCORRECT_ENCODING);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001921 MYCONST(XML_ERROR_UNCLOSED_CDATA_SECTION);
1922 MYCONST(XML_ERROR_EXTERNAL_ENTITY_HANDLING);
1923 MYCONST(XML_ERROR_NOT_STANDALONE);
Fred Drake283b6702004-08-04 22:28:16 +00001924 MYCONST(XML_ERROR_UNEXPECTED_STATE);
1925 MYCONST(XML_ERROR_ENTITY_DECLARED_IN_PE);
1926 MYCONST(XML_ERROR_FEATURE_REQUIRES_XML_DTD);
1927 MYCONST(XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING);
1928 /* Added in Expat 1.95.7. */
1929 MYCONST(XML_ERROR_UNBOUND_PREFIX);
1930 /* Added in Expat 1.95.8. */
1931 MYCONST(XML_ERROR_UNDECLARING_PREFIX);
1932 MYCONST(XML_ERROR_INCOMPLETE_PE);
1933 MYCONST(XML_ERROR_XML_DECL);
1934 MYCONST(XML_ERROR_TEXT_DECL);
1935 MYCONST(XML_ERROR_PUBLICID);
1936 MYCONST(XML_ERROR_SUSPENDED);
1937 MYCONST(XML_ERROR_NOT_SUSPENDED);
1938 MYCONST(XML_ERROR_ABORTED);
1939 MYCONST(XML_ERROR_FINISHED);
1940 MYCONST(XML_ERROR_SUSPEND_PE);
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001941
Fred Drake85d835f2001-02-08 15:39:08 +00001942 PyModule_AddStringConstant(errors_module, "__doc__",
1943 "Constants used to describe error conditions.");
1944
Fred Drake93adb692000-09-23 04:55:48 +00001945#undef MYCONST
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001946
Fred Drake85d835f2001-02-08 15:39:08 +00001947#define MYCONST(c) PyModule_AddIntConstant(m, #c, c)
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001948 MYCONST(XML_PARAM_ENTITY_PARSING_NEVER);
1949 MYCONST(XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE);
1950 MYCONST(XML_PARAM_ENTITY_PARSING_ALWAYS);
Fred Drake85d835f2001-02-08 15:39:08 +00001951#undef MYCONST
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001952
Fred Drake85d835f2001-02-08 15:39:08 +00001953#define MYCONST(c) PyModule_AddIntConstant(model_module, #c, c)
1954 PyModule_AddStringConstant(model_module, "__doc__",
1955 "Constants used to interpret content model information.");
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00001956
Fred Drake85d835f2001-02-08 15:39:08 +00001957 MYCONST(XML_CTYPE_EMPTY);
1958 MYCONST(XML_CTYPE_ANY);
1959 MYCONST(XML_CTYPE_MIXED);
1960 MYCONST(XML_CTYPE_NAME);
1961 MYCONST(XML_CTYPE_CHOICE);
1962 MYCONST(XML_CTYPE_SEQ);
1963
1964 MYCONST(XML_CQUANT_NONE);
1965 MYCONST(XML_CQUANT_OPT);
1966 MYCONST(XML_CQUANT_REP);
1967 MYCONST(XML_CQUANT_PLUS);
1968#undef MYCONST
Fredrik Lundhc3345042005-12-13 19:49:55 +00001969
1970 /* initialize pyexpat dispatch table */
Fredrik Lundhd7a42882005-12-13 20:43:04 +00001971 capi.size = sizeof(capi);
Fredrik Lundhcc117db2005-12-13 21:55:36 +00001972 capi.magic = PyExpat_CAPI_MAGIC;
Fredrik Lundhd7a42882005-12-13 20:43:04 +00001973 capi.MAJOR_VERSION = XML_MAJOR_VERSION;
1974 capi.MINOR_VERSION = XML_MINOR_VERSION;
1975 capi.MICRO_VERSION = XML_MICRO_VERSION;
1976 capi.ErrorString = XML_ErrorString;
Fredrik Lundhcc117db2005-12-13 21:55:36 +00001977 capi.GetErrorCode = XML_GetErrorCode;
1978 capi.GetErrorColumnNumber = XML_GetErrorColumnNumber;
1979 capi.GetErrorLineNumber = XML_GetErrorLineNumber;
Fredrik Lundhd7a42882005-12-13 20:43:04 +00001980 capi.Parse = XML_Parse;
1981 capi.ParserCreate_MM = XML_ParserCreate_MM;
1982 capi.ParserFree = XML_ParserFree;
1983 capi.SetCharacterDataHandler = XML_SetCharacterDataHandler;
1984 capi.SetCommentHandler = XML_SetCommentHandler;
1985 capi.SetDefaultHandlerExpand = XML_SetDefaultHandlerExpand;
1986 capi.SetElementHandler = XML_SetElementHandler;
1987 capi.SetNamespaceDeclHandler = XML_SetNamespaceDeclHandler;
1988 capi.SetProcessingInstructionHandler = XML_SetProcessingInstructionHandler;
1989 capi.SetUnknownEncodingHandler = XML_SetUnknownEncodingHandler;
1990 capi.SetUserData = XML_SetUserData;
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001991
Benjamin Petersonb173f782009-05-05 22:31:58 +00001992 /* export using capsule */
1993 capi_object = PyCapsule_New(&capi, PyExpat_CAPSULE_NAME, NULL);
Fredrik Lundhd7a42882005-12-13 20:43:04 +00001994 if (capi_object)
1995 PyModule_AddObject(m, "expat_CAPI", capi_object);
Martin v. Löwis1a214512008-06-11 05:26:20 +00001996 return m;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00001997}
1998
Fred Drake6f987622000-08-25 18:03:30 +00001999static void
Martin v. Löwis5b68ce32001-10-21 08:53:52 +00002000clear_handlers(xmlparseobject *self, int initial)
Fred Drake0582df92000-07-12 04:49:00 +00002001{
Fred Drakecde79132001-04-25 16:01:30 +00002002 int i = 0;
2003 PyObject *temp;
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00002004
Fred Drake71b63ff2002-06-28 22:29:01 +00002005 for (; handler_info[i].name != NULL; i++) {
Martin v. Löwis5b68ce32001-10-21 08:53:52 +00002006 if (initial)
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002007 self->handlers[i] = NULL;
2008 else {
Fred Drakecde79132001-04-25 16:01:30 +00002009 temp = self->handlers[i];
2010 self->handlers[i] = NULL;
2011 Py_XDECREF(temp);
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002012 handler_info[i].setter(self->itself, NULL);
Fred Drakecde79132001-04-25 16:01:30 +00002013 }
Fred Drakecde79132001-04-25 16:01:30 +00002014 }
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00002015}
2016
Tim Peters0c322792002-07-17 16:49:03 +00002017static struct HandlerInfo handler_info[] = {
Fred Drake71b63ff2002-06-28 22:29:01 +00002018 {"StartElementHandler",
2019 (xmlhandlersetter)XML_SetStartElementHandler,
Fred Drake0582df92000-07-12 04:49:00 +00002020 (xmlhandler)my_StartElementHandler},
Fred Drake71b63ff2002-06-28 22:29:01 +00002021 {"EndElementHandler",
2022 (xmlhandlersetter)XML_SetEndElementHandler,
Fred Drake0582df92000-07-12 04:49:00 +00002023 (xmlhandler)my_EndElementHandler},
Fred Drake71b63ff2002-06-28 22:29:01 +00002024 {"ProcessingInstructionHandler",
Fred Drake0582df92000-07-12 04:49:00 +00002025 (xmlhandlersetter)XML_SetProcessingInstructionHandler,
2026 (xmlhandler)my_ProcessingInstructionHandler},
Fred Drake71b63ff2002-06-28 22:29:01 +00002027 {"CharacterDataHandler",
Fred Drake0582df92000-07-12 04:49:00 +00002028 (xmlhandlersetter)XML_SetCharacterDataHandler,
2029 (xmlhandler)my_CharacterDataHandler},
Fred Drake71b63ff2002-06-28 22:29:01 +00002030 {"UnparsedEntityDeclHandler",
Fred Drake0582df92000-07-12 04:49:00 +00002031 (xmlhandlersetter)XML_SetUnparsedEntityDeclHandler,
Fred Drake2a3d7db2002-06-28 22:56:48 +00002032 (xmlhandler)my_UnparsedEntityDeclHandler},
Fred Drake71b63ff2002-06-28 22:29:01 +00002033 {"NotationDeclHandler",
Fred Drake0582df92000-07-12 04:49:00 +00002034 (xmlhandlersetter)XML_SetNotationDeclHandler,
Fred Drake2a3d7db2002-06-28 22:56:48 +00002035 (xmlhandler)my_NotationDeclHandler},
Fred Drake71b63ff2002-06-28 22:29:01 +00002036 {"StartNamespaceDeclHandler",
2037 (xmlhandlersetter)XML_SetStartNamespaceDeclHandler,
Fred Drake2a3d7db2002-06-28 22:56:48 +00002038 (xmlhandler)my_StartNamespaceDeclHandler},
Fred Drake71b63ff2002-06-28 22:29:01 +00002039 {"EndNamespaceDeclHandler",
2040 (xmlhandlersetter)XML_SetEndNamespaceDeclHandler,
Fred Drake2a3d7db2002-06-28 22:56:48 +00002041 (xmlhandler)my_EndNamespaceDeclHandler},
Fred Drake0582df92000-07-12 04:49:00 +00002042 {"CommentHandler",
2043 (xmlhandlersetter)XML_SetCommentHandler,
2044 (xmlhandler)my_CommentHandler},
2045 {"StartCdataSectionHandler",
Fred Drake71b63ff2002-06-28 22:29:01 +00002046 (xmlhandlersetter)XML_SetStartCdataSectionHandler,
Fred Drake0582df92000-07-12 04:49:00 +00002047 (xmlhandler)my_StartCdataSectionHandler},
2048 {"EndCdataSectionHandler",
Fred Drake71b63ff2002-06-28 22:29:01 +00002049 (xmlhandlersetter)XML_SetEndCdataSectionHandler,
Fred Drake0582df92000-07-12 04:49:00 +00002050 (xmlhandler)my_EndCdataSectionHandler},
2051 {"DefaultHandler",
2052 (xmlhandlersetter)XML_SetDefaultHandler,
2053 (xmlhandler)my_DefaultHandler},
2054 {"DefaultHandlerExpand",
2055 (xmlhandlersetter)XML_SetDefaultHandlerExpand,
2056 (xmlhandler)my_DefaultHandlerExpandHandler},
2057 {"NotStandaloneHandler",
2058 (xmlhandlersetter)XML_SetNotStandaloneHandler,
2059 (xmlhandler)my_NotStandaloneHandler},
2060 {"ExternalEntityRefHandler",
2061 (xmlhandlersetter)XML_SetExternalEntityRefHandler,
Fred Drake2a3d7db2002-06-28 22:56:48 +00002062 (xmlhandler)my_ExternalEntityRefHandler},
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00002063 {"StartDoctypeDeclHandler",
Fred Drake71b63ff2002-06-28 22:29:01 +00002064 (xmlhandlersetter)XML_SetStartDoctypeDeclHandler,
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00002065 (xmlhandler)my_StartDoctypeDeclHandler},
2066 {"EndDoctypeDeclHandler",
Fred Drake71b63ff2002-06-28 22:29:01 +00002067 (xmlhandlersetter)XML_SetEndDoctypeDeclHandler,
Martin v. Löwis0078f6c2001-01-21 10:18:10 +00002068 (xmlhandler)my_EndDoctypeDeclHandler},
Fred Drake85d835f2001-02-08 15:39:08 +00002069 {"EntityDeclHandler",
2070 (xmlhandlersetter)XML_SetEntityDeclHandler,
2071 (xmlhandler)my_EntityDeclHandler},
2072 {"XmlDeclHandler",
2073 (xmlhandlersetter)XML_SetXmlDeclHandler,
2074 (xmlhandler)my_XmlDeclHandler},
2075 {"ElementDeclHandler",
2076 (xmlhandlersetter)XML_SetElementDeclHandler,
2077 (xmlhandler)my_ElementDeclHandler},
2078 {"AttlistDeclHandler",
2079 (xmlhandlersetter)XML_SetAttlistDeclHandler,
2080 (xmlhandler)my_AttlistDeclHandler},
Martin v. Löwisc847f402003-01-21 11:09:21 +00002081#if XML_COMBINED_VERSION >= 19504
Martin v. Löwis069dde22003-01-21 10:58:18 +00002082 {"SkippedEntityHandler",
2083 (xmlhandlersetter)XML_SetSkippedEntityHandler,
2084 (xmlhandler)my_SkippedEntityHandler},
Martin v. Löwisc847f402003-01-21 11:09:21 +00002085#endif
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00002086
Fred Drake0582df92000-07-12 04:49:00 +00002087 {NULL, NULL, NULL} /* sentinel */
Andrew M. Kuchlingb7f10532000-03-31 15:43:31 +00002088};