blob: ab2a5ed8d24a5ec1dca5749615a5358ef4fc5887 [file] [log] [blame]
Skip Montanaroa16b21f2003-03-23 14:32:54 +00001/* csv module */
2
3/*
4
5This module provides the low-level underpinnings of a CSV reading/writing
6module. Users should not use this module directly, but import the csv.py
7module instead.
8
9**** For people modifying this code, please note that as of this writing
Skip Montanarodfa35fa2003-04-11 21:40:01 +000010**** (2003-03-23), it is intended that this code should work with Python
Skip Montanaroa16b21f2003-03-23 14:32:54 +000011**** 2.2.
12
Skip Montanarob4a04172003-03-20 23:29:12 +000013*/
14
Skip Montanaro7b01a832003-04-12 19:23:46 +000015#define MODULE_VERSION "1.0"
16
Skip Montanarob4a04172003-03-20 23:29:12 +000017#include "Python.h"
18#include "structmember.h"
19
Skip Montanaroa16b21f2003-03-23 14:32:54 +000020
Skip Montanarob4a04172003-03-20 23:29:12 +000021/* begin 2.2 compatibility macros */
22#ifndef PyDoc_STRVAR
23/* Define macros for inline documentation. */
24#define PyDoc_VAR(name) static char name[]
25#define PyDoc_STRVAR(name,str) PyDoc_VAR(name) = PyDoc_STR(str)
26#ifdef WITH_DOC_STRINGS
27#define PyDoc_STR(str) str
28#else
29#define PyDoc_STR(str) ""
30#endif
31#endif /* ifndef PyDoc_STRVAR */
32
33#ifndef PyMODINIT_FUNC
Antoine Pitrouc83ea132010-05-09 14:46:46 +000034# if defined(__cplusplus)
35# define PyMODINIT_FUNC extern "C" void
36# else /* __cplusplus */
37# define PyMODINIT_FUNC void
38# endif /* __cplusplus */
Skip Montanarob4a04172003-03-20 23:29:12 +000039#endif
Thomas Wouters2742c5e2006-04-15 17:33:14 +000040
41#ifndef Py_CLEAR
Antoine Pitrouc83ea132010-05-09 14:46:46 +000042#define Py_CLEAR(op) \
43 do { \
44 if (op) { \
45 PyObject *tmp = (PyObject *)(op); \
46 (op) = NULL; \
47 Py_DECREF(tmp); \
48 } \
49 } while (0)
Thomas Wouters2742c5e2006-04-15 17:33:14 +000050#endif
Thomas Woutersc6e55062006-04-15 21:47:09 +000051#ifndef Py_VISIT
Antoine Pitrouc83ea132010-05-09 14:46:46 +000052#define Py_VISIT(op) \
53 do { \
54 if (op) { \
55 int vret = visit((PyObject *)(op), arg); \
56 if (vret) \
57 return vret; \
58 } \
59 } while (0)
Thomas Woutersc6e55062006-04-15 21:47:09 +000060#endif
Thomas Wouters2742c5e2006-04-15 17:33:14 +000061
Skip Montanarob4a04172003-03-20 23:29:12 +000062/* end 2.2 compatibility macros */
63
Andrew McNamara37d2bdf2005-01-10 12:22:48 +000064#define IS_BASESTRING(o) \
Antoine Pitrouc83ea132010-05-09 14:46:46 +000065 PyObject_TypeCheck(o, &PyBaseString_Type)
Andrew McNamara37d2bdf2005-01-10 12:22:48 +000066
Antoine Pitrouc83ea132010-05-09 14:46:46 +000067static PyObject *error_obj; /* CSV exception */
Skip Montanarob4a04172003-03-20 23:29:12 +000068static PyObject *dialects; /* Dialect registry */
Antoine Pitrouc83ea132010-05-09 14:46:46 +000069static long field_limit = 128 * 1024; /* max parsed field size */
Skip Montanarob4a04172003-03-20 23:29:12 +000070
71typedef enum {
Antoine Pitrouc83ea132010-05-09 14:46:46 +000072 START_RECORD, START_FIELD, ESCAPED_CHAR, IN_FIELD,
73 IN_QUOTED_FIELD, ESCAPE_IN_QUOTED_FIELD, QUOTE_IN_QUOTED_FIELD,
74 EAT_CRNL
Skip Montanarob4a04172003-03-20 23:29:12 +000075} ParserState;
76
77typedef enum {
Antoine Pitrouc83ea132010-05-09 14:46:46 +000078 QUOTE_MINIMAL, QUOTE_ALL, QUOTE_NONNUMERIC, QUOTE_NONE
Skip Montanarob4a04172003-03-20 23:29:12 +000079} QuoteStyle;
80
81typedef struct {
Antoine Pitrouc83ea132010-05-09 14:46:46 +000082 QuoteStyle style;
83 char *name;
Skip Montanarob4a04172003-03-20 23:29:12 +000084} StyleDesc;
85
86static StyleDesc quote_styles[] = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +000087 { QUOTE_MINIMAL, "QUOTE_MINIMAL" },
88 { QUOTE_ALL, "QUOTE_ALL" },
89 { QUOTE_NONNUMERIC, "QUOTE_NONNUMERIC" },
90 { QUOTE_NONE, "QUOTE_NONE" },
91 { 0 }
Skip Montanarob4a04172003-03-20 23:29:12 +000092};
93
94typedef struct {
Antoine Pitrouc83ea132010-05-09 14:46:46 +000095 PyObject_HEAD
Skip Montanarob4a04172003-03-20 23:29:12 +000096
Antoine Pitrouc83ea132010-05-09 14:46:46 +000097 int doublequote; /* is " represented by ""? */
98 char delimiter; /* field separator */
99 char quotechar; /* quote character */
100 char escapechar; /* escape character */
101 int skipinitialspace; /* ignore spaces following delimiter? */
102 PyObject *lineterminator; /* string to write between records */
103 int quoting; /* style of quoting to write */
104
105 int strict; /* raise exception on bad CSV */
Skip Montanarob4a04172003-03-20 23:29:12 +0000106} DialectObj;
107
108staticforward PyTypeObject Dialect_Type;
109
110typedef struct {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000111 PyObject_HEAD
Skip Montanarob4a04172003-03-20 23:29:12 +0000112
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000113 PyObject *input_iter; /* iterate over this for input lines */
Skip Montanarob4a04172003-03-20 23:29:12 +0000114
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000115 DialectObj *dialect; /* parsing dialect */
Skip Montanarob4a04172003-03-20 23:29:12 +0000116
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000117 PyObject *fields; /* field list for current record */
118 ParserState state; /* current CSV parse state */
119 char *field; /* build current field in here */
120 int field_size; /* size of allocated buffer */
121 int field_len; /* length of current field */
122 int numeric_field; /* treat field as numeric */
123 unsigned long line_num; /* Source-file line number */
Skip Montanarob4a04172003-03-20 23:29:12 +0000124} ReaderObj;
125
126staticforward PyTypeObject Reader_Type;
127
Christian Heimese93237d2007-12-19 02:37:44 +0000128#define ReaderObject_Check(v) (Py_TYPE(v) == &Reader_Type)
Skip Montanarob4a04172003-03-20 23:29:12 +0000129
130typedef struct {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000131 PyObject_HEAD
Skip Montanarob4a04172003-03-20 23:29:12 +0000132
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000133 PyObject *writeline; /* write output lines to this file */
Skip Montanarob4a04172003-03-20 23:29:12 +0000134
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000135 DialectObj *dialect; /* parsing dialect */
Skip Montanarob4a04172003-03-20 23:29:12 +0000136
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000137 char *rec; /* buffer for parser.join */
138 int rec_size; /* size of allocated record */
139 int rec_len; /* length of record */
140 int num_fields; /* number of fields in record */
141} WriterObj;
Skip Montanarob4a04172003-03-20 23:29:12 +0000142
143staticforward PyTypeObject Writer_Type;
144
145/*
146 * DIALECT class
147 */
148
149static PyObject *
150get_dialect_from_registry(PyObject * name_obj)
151{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000152 PyObject *dialect_obj;
Skip Montanarob4a04172003-03-20 23:29:12 +0000153
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000154 dialect_obj = PyDict_GetItem(dialects, name_obj);
155 if (dialect_obj == NULL) {
156 if (!PyErr_Occurred())
157 PyErr_Format(error_obj, "unknown dialect");
158 }
159 else
160 Py_INCREF(dialect_obj);
161 return dialect_obj;
Skip Montanarob4a04172003-03-20 23:29:12 +0000162}
163
Skip Montanarob4a04172003-03-20 23:29:12 +0000164static PyObject *
165get_string(PyObject *str)
166{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000167 Py_XINCREF(str);
168 return str;
Skip Montanarob4a04172003-03-20 23:29:12 +0000169}
170
Skip Montanarob4a04172003-03-20 23:29:12 +0000171static PyObject *
172get_nullchar_as_None(char c)
173{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000174 if (c == '\0') {
175 Py_INCREF(Py_None);
176 return Py_None;
177 }
178 else
179 return PyString_FromStringAndSize((char*)&c, 1);
Skip Montanarob4a04172003-03-20 23:29:12 +0000180}
181
Skip Montanarob4a04172003-03-20 23:29:12 +0000182static PyObject *
183Dialect_get_lineterminator(DialectObj *self)
184{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000185 return get_string(self->lineterminator);
Skip Montanarob4a04172003-03-20 23:29:12 +0000186}
187
Skip Montanarob4a04172003-03-20 23:29:12 +0000188static PyObject *
189Dialect_get_escapechar(DialectObj *self)
190{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000191 return get_nullchar_as_None(self->escapechar);
Skip Montanarob4a04172003-03-20 23:29:12 +0000192}
193
Andrew McNamara1196cf12005-01-07 04:42:45 +0000194static PyObject *
195Dialect_get_quotechar(DialectObj *self)
Skip Montanarob4a04172003-03-20 23:29:12 +0000196{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000197 return get_nullchar_as_None(self->quotechar);
Skip Montanarob4a04172003-03-20 23:29:12 +0000198}
199
200static PyObject *
201Dialect_get_quoting(DialectObj *self)
202{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000203 return PyInt_FromLong(self->quoting);
Skip Montanarob4a04172003-03-20 23:29:12 +0000204}
205
206static int
Andrew McNamara1196cf12005-01-07 04:42:45 +0000207_set_bool(const char *name, int *target, PyObject *src, int dflt)
Skip Montanarob4a04172003-03-20 23:29:12 +0000208{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000209 if (src == NULL)
210 *target = dflt;
Antoine Pitrouc5bef752012-08-15 23:16:51 +0200211 else {
212 int b = PyObject_IsTrue(src);
213 if (b < 0)
214 return -1;
215 *target = b;
216 }
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000217 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000218}
219
Andrew McNamara1196cf12005-01-07 04:42:45 +0000220static int
221_set_int(const char *name, int *target, PyObject *src, int dflt)
222{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000223 if (src == NULL)
224 *target = dflt;
225 else {
226 if (!PyInt_Check(src)) {
227 PyErr_Format(PyExc_TypeError,
228 "\"%s\" must be an integer", name);
229 return -1;
230 }
231 *target = PyInt_AsLong(src);
232 }
233 return 0;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000234}
235
236static int
237_set_char(const char *name, char *target, PyObject *src, char dflt)
238{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000239 if (src == NULL)
240 *target = dflt;
241 else {
242 if (src == Py_None || PyString_Size(src) == 0)
243 *target = '\0';
244 else if (!PyString_Check(src) || PyString_Size(src) != 1) {
245 PyErr_Format(PyExc_TypeError,
246 "\"%s\" must be an 1-character string",
247 name);
248 return -1;
249 }
250 else {
251 char *s = PyString_AsString(src);
252 if (s == NULL)
253 return -1;
254 *target = s[0];
255 }
256 }
257 return 0;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000258}
259
260static int
261_set_str(const char *name, PyObject **target, PyObject *src, const char *dflt)
262{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000263 if (src == NULL)
264 *target = PyString_FromString(dflt);
265 else {
266 if (src == Py_None)
267 *target = NULL;
268 else if (!IS_BASESTRING(src)) {
269 PyErr_Format(PyExc_TypeError,
270 "\"%s\" must be an string", name);
271 return -1;
272 }
273 else {
274 Py_XDECREF(*target);
275 Py_INCREF(src);
276 *target = src;
277 }
278 }
279 return 0;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000280}
281
282static int
283dialect_check_quoting(int quoting)
284{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000285 StyleDesc *qs = quote_styles;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000286
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000287 for (qs = quote_styles; qs->name; qs++) {
288 if (qs->style == quoting)
289 return 0;
290 }
291 PyErr_Format(PyExc_TypeError, "bad \"quoting\" value");
292 return -1;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000293}
Skip Montanarob4a04172003-03-20 23:29:12 +0000294
295#define D_OFF(x) offsetof(DialectObj, x)
296
297static struct PyMemberDef Dialect_memberlist[] = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000298 { "delimiter", T_CHAR, D_OFF(delimiter), READONLY },
299 { "skipinitialspace", T_INT, D_OFF(skipinitialspace), READONLY },
300 { "doublequote", T_INT, D_OFF(doublequote), READONLY },
301 { "strict", T_INT, D_OFF(strict), READONLY },
302 { NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +0000303};
304
305static PyGetSetDef Dialect_getsetlist[] = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000306 { "escapechar", (getter)Dialect_get_escapechar},
307 { "lineterminator", (getter)Dialect_get_lineterminator},
308 { "quotechar", (getter)Dialect_get_quotechar},
309 { "quoting", (getter)Dialect_get_quoting},
310 {NULL},
Skip Montanarob4a04172003-03-20 23:29:12 +0000311};
312
313static void
314Dialect_dealloc(DialectObj *self)
315{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000316 Py_XDECREF(self->lineterminator);
317 Py_TYPE(self)->tp_free((PyObject *)self);
Skip Montanarob4a04172003-03-20 23:29:12 +0000318}
319
Martin v. Löwis02cbf4a2006-02-27 17:20:04 +0000320static char *dialect_kws[] = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000321 "dialect",
322 "delimiter",
323 "doublequote",
324 "escapechar",
325 "lineterminator",
326 "quotechar",
327 "quoting",
328 "skipinitialspace",
329 "strict",
330 NULL
Andrew McNamara1196cf12005-01-07 04:42:45 +0000331};
332
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000333static PyObject *
334dialect_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
Skip Montanarob4a04172003-03-20 23:29:12 +0000335{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000336 DialectObj *self;
337 PyObject *ret = NULL;
338 PyObject *dialect = NULL;
339 PyObject *delimiter = NULL;
340 PyObject *doublequote = NULL;
341 PyObject *escapechar = NULL;
342 PyObject *lineterminator = NULL;
343 PyObject *quotechar = NULL;
344 PyObject *quoting = NULL;
345 PyObject *skipinitialspace = NULL;
346 PyObject *strict = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000347
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000348 if (!PyArg_ParseTupleAndKeywords(args, kwargs,
349 "|OOOOOOOOO", dialect_kws,
350 &dialect,
351 &delimiter,
352 &doublequote,
353 &escapechar,
354 &lineterminator,
355 &quotechar,
356 &quoting,
357 &skipinitialspace,
358 &strict))
359 return NULL;
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000360
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000361 if (dialect != NULL) {
362 if (IS_BASESTRING(dialect)) {
363 dialect = get_dialect_from_registry(dialect);
364 if (dialect == NULL)
365 return NULL;
366 }
367 else
368 Py_INCREF(dialect);
369 /* Can we reuse this instance? */
370 if (PyObject_TypeCheck(dialect, &Dialect_Type) &&
371 delimiter == 0 &&
372 doublequote == 0 &&
373 escapechar == 0 &&
374 lineterminator == 0 &&
375 quotechar == 0 &&
376 quoting == 0 &&
377 skipinitialspace == 0 &&
378 strict == 0)
379 return dialect;
380 }
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000381
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000382 self = (DialectObj *)type->tp_alloc(type, 0);
383 if (self == NULL) {
384 Py_XDECREF(dialect);
385 return NULL;
386 }
387 self->lineterminator = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000388
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000389 Py_XINCREF(delimiter);
390 Py_XINCREF(doublequote);
391 Py_XINCREF(escapechar);
392 Py_XINCREF(lineterminator);
393 Py_XINCREF(quotechar);
394 Py_XINCREF(quoting);
395 Py_XINCREF(skipinitialspace);
396 Py_XINCREF(strict);
397 if (dialect != NULL) {
Andrew McNamara1196cf12005-01-07 04:42:45 +0000398#define DIALECT_GETATTR(v, n) \
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000399 if (v == NULL) \
400 v = PyObject_GetAttrString(dialect, n)
401 DIALECT_GETATTR(delimiter, "delimiter");
402 DIALECT_GETATTR(doublequote, "doublequote");
403 DIALECT_GETATTR(escapechar, "escapechar");
404 DIALECT_GETATTR(lineterminator, "lineterminator");
405 DIALECT_GETATTR(quotechar, "quotechar");
406 DIALECT_GETATTR(quoting, "quoting");
407 DIALECT_GETATTR(skipinitialspace, "skipinitialspace");
408 DIALECT_GETATTR(strict, "strict");
409 PyErr_Clear();
410 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000411
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000412 /* check types and convert to C values */
Andrew McNamara1196cf12005-01-07 04:42:45 +0000413#define DIASET(meth, name, target, src, dflt) \
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000414 if (meth(name, target, src, dflt)) \
415 goto err
416 DIASET(_set_char, "delimiter", &self->delimiter, delimiter, ',');
417 DIASET(_set_bool, "doublequote", &self->doublequote, doublequote, 1);
418 DIASET(_set_char, "escapechar", &self->escapechar, escapechar, 0);
419 DIASET(_set_str, "lineterminator", &self->lineterminator, lineterminator, "\r\n");
420 DIASET(_set_char, "quotechar", &self->quotechar, quotechar, '"');
421 DIASET(_set_int, "quoting", &self->quoting, quoting, QUOTE_MINIMAL);
422 DIASET(_set_bool, "skipinitialspace", &self->skipinitialspace, skipinitialspace, 0);
423 DIASET(_set_bool, "strict", &self->strict, strict, 0);
Skip Montanarob4a04172003-03-20 23:29:12 +0000424
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000425 /* validate options */
426 if (dialect_check_quoting(self->quoting))
427 goto err;
428 if (self->delimiter == 0) {
429 PyErr_SetString(PyExc_TypeError, "delimiter must be set");
430 goto err;
431 }
432 if (quotechar == Py_None && quoting == NULL)
433 self->quoting = QUOTE_NONE;
434 if (self->quoting != QUOTE_NONE && self->quotechar == 0) {
435 PyErr_SetString(PyExc_TypeError,
436 "quotechar must be set if quoting enabled");
437 goto err;
438 }
439 if (self->lineterminator == 0) {
440 PyErr_SetString(PyExc_TypeError, "lineterminator must be set");
441 goto err;
442 }
Andrew McNamara1196cf12005-01-07 04:42:45 +0000443
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000444 ret = (PyObject *)self;
445 Py_INCREF(self);
Andrew McNamara1196cf12005-01-07 04:42:45 +0000446err:
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000447 Py_XDECREF(self);
448 Py_XDECREF(dialect);
449 Py_XDECREF(delimiter);
450 Py_XDECREF(doublequote);
451 Py_XDECREF(escapechar);
452 Py_XDECREF(lineterminator);
453 Py_XDECREF(quotechar);
454 Py_XDECREF(quoting);
455 Py_XDECREF(skipinitialspace);
456 Py_XDECREF(strict);
457 return ret;
Skip Montanarob4a04172003-03-20 23:29:12 +0000458}
459
460
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000461PyDoc_STRVAR(Dialect_Type_doc,
Skip Montanarob4a04172003-03-20 23:29:12 +0000462"CSV dialect\n"
463"\n"
464"The Dialect type records CSV parsing and generation options.\n");
465
466static PyTypeObject Dialect_Type = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000467 PyVarObject_HEAD_INIT(NULL, 0)
468 "_csv.Dialect", /* tp_name */
469 sizeof(DialectObj), /* tp_basicsize */
470 0, /* tp_itemsize */
471 /* methods */
472 (destructor)Dialect_dealloc, /* tp_dealloc */
473 (printfunc)0, /* tp_print */
474 (getattrfunc)0, /* tp_getattr */
475 (setattrfunc)0, /* tp_setattr */
476 (cmpfunc)0, /* tp_compare */
477 (reprfunc)0, /* tp_repr */
478 0, /* tp_as_number */
479 0, /* tp_as_sequence */
480 0, /* tp_as_mapping */
481 (hashfunc)0, /* tp_hash */
482 (ternaryfunc)0, /* tp_call */
483 (reprfunc)0, /* tp_str */
484 0, /* tp_getattro */
485 0, /* tp_setattro */
486 0, /* tp_as_buffer */
487 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
488 Dialect_Type_doc, /* tp_doc */
489 0, /* tp_traverse */
490 0, /* tp_clear */
491 0, /* tp_richcompare */
492 0, /* tp_weaklistoffset */
493 0, /* tp_iter */
494 0, /* tp_iternext */
495 0, /* tp_methods */
496 Dialect_memberlist, /* tp_members */
497 Dialect_getsetlist, /* tp_getset */
498 0, /* tp_base */
499 0, /* tp_dict */
500 0, /* tp_descr_get */
501 0, /* tp_descr_set */
502 0, /* tp_dictoffset */
503 0, /* tp_init */
504 0, /* tp_alloc */
505 dialect_new, /* tp_new */
506 0, /* tp_free */
Skip Montanarob4a04172003-03-20 23:29:12 +0000507};
508
Andrew McNamara91b97462005-01-11 01:07:23 +0000509/*
510 * Return an instance of the dialect type, given a Python instance or kwarg
511 * description of the dialect
512 */
513static PyObject *
514_call_dialect(PyObject *dialect_inst, PyObject *kwargs)
515{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000516 PyObject *ctor_args;
517 PyObject *dialect;
Andrew McNamara91b97462005-01-11 01:07:23 +0000518
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000519 ctor_args = Py_BuildValue(dialect_inst ? "(O)" : "()", dialect_inst);
520 if (ctor_args == NULL)
521 return NULL;
522 dialect = PyObject_Call((PyObject *)&Dialect_Type, ctor_args, kwargs);
523 Py_DECREF(ctor_args);
524 return dialect;
Andrew McNamara91b97462005-01-11 01:07:23 +0000525}
526
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000527/*
528 * READER
529 */
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000530static int
Skip Montanarob4a04172003-03-20 23:29:12 +0000531parse_save_field(ReaderObj *self)
532{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000533 PyObject *field;
Skip Montanarob4a04172003-03-20 23:29:12 +0000534
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000535 field = PyString_FromStringAndSize(self->field, self->field_len);
536 if (field == NULL)
537 return -1;
538 self->field_len = 0;
539 if (self->numeric_field) {
540 PyObject *tmp;
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000541
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000542 self->numeric_field = 0;
543 tmp = PyNumber_Float(field);
544 if (tmp == NULL) {
545 Py_DECREF(field);
546 return -1;
547 }
548 Py_DECREF(field);
549 field = tmp;
550 }
551 PyList_Append(self->fields, field);
552 Py_DECREF(field);
553 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000554}
555
556static int
557parse_grow_buff(ReaderObj *self)
558{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000559 if (self->field_size == 0) {
560 self->field_size = 4096;
561 if (self->field != NULL)
562 PyMem_Free(self->field);
563 self->field = PyMem_Malloc(self->field_size);
564 }
565 else {
566 if (self->field_size > INT_MAX / 2) {
567 PyErr_NoMemory();
568 return 0;
569 }
570 self->field_size *= 2;
571 self->field = PyMem_Realloc(self->field, self->field_size);
572 }
573 if (self->field == NULL) {
574 PyErr_NoMemory();
575 return 0;
576 }
577 return 1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000578}
579
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000580static int
Skip Montanarob4a04172003-03-20 23:29:12 +0000581parse_add_char(ReaderObj *self, char c)
582{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000583 if (self->field_len >= field_limit) {
584 PyErr_Format(error_obj, "field larger than field limit (%ld)",
585 field_limit);
586 return -1;
587 }
588 if (self->field_len == self->field_size && !parse_grow_buff(self))
589 return -1;
590 self->field[self->field_len++] = c;
591 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000592}
593
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000594static int
Skip Montanarob4a04172003-03-20 23:29:12 +0000595parse_process_char(ReaderObj *self, char c)
596{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000597 DialectObj *dialect = self->dialect;
Skip Montanarob4a04172003-03-20 23:29:12 +0000598
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000599 switch (self->state) {
600 case START_RECORD:
601 /* start of record */
602 if (c == '\0')
603 /* empty line - return [] */
604 break;
605 else if (c == '\n' || c == '\r') {
606 self->state = EAT_CRNL;
607 break;
608 }
609 /* normal character - handle as START_FIELD */
610 self->state = START_FIELD;
611 /* fallthru */
612 case START_FIELD:
613 /* expecting field */
614 if (c == '\n' || c == '\r' || c == '\0') {
615 /* save empty field - return [fields] */
616 if (parse_save_field(self) < 0)
617 return -1;
618 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
619 }
620 else if (c == dialect->quotechar &&
621 dialect->quoting != QUOTE_NONE) {
622 /* start quoted field */
623 self->state = IN_QUOTED_FIELD;
624 }
625 else if (c == dialect->escapechar) {
626 /* possible escaped character */
627 self->state = ESCAPED_CHAR;
628 }
629 else if (c == ' ' && dialect->skipinitialspace)
630 /* ignore space at start of field */
631 ;
632 else if (c == dialect->delimiter) {
633 /* save empty field */
634 if (parse_save_field(self) < 0)
635 return -1;
636 }
637 else {
638 /* begin new unquoted field */
639 if (dialect->quoting == QUOTE_NONNUMERIC)
640 self->numeric_field = 1;
641 if (parse_add_char(self, c) < 0)
642 return -1;
643 self->state = IN_FIELD;
644 }
645 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000646
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000647 case ESCAPED_CHAR:
648 if (c == '\0')
649 c = '\n';
650 if (parse_add_char(self, c) < 0)
651 return -1;
652 self->state = IN_FIELD;
653 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000654
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000655 case IN_FIELD:
656 /* in unquoted field */
657 if (c == '\n' || c == '\r' || c == '\0') {
658 /* end of line - return [fields] */
659 if (parse_save_field(self) < 0)
660 return -1;
661 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
662 }
663 else if (c == dialect->escapechar) {
664 /* possible escaped character */
665 self->state = ESCAPED_CHAR;
666 }
667 else if (c == dialect->delimiter) {
668 /* save field - wait for new field */
669 if (parse_save_field(self) < 0)
670 return -1;
671 self->state = START_FIELD;
672 }
673 else {
674 /* normal character - save in field */
675 if (parse_add_char(self, c) < 0)
676 return -1;
677 }
678 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000679
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000680 case IN_QUOTED_FIELD:
681 /* in quoted field */
682 if (c == '\0')
683 ;
684 else if (c == dialect->escapechar) {
685 /* Possible escape character */
686 self->state = ESCAPE_IN_QUOTED_FIELD;
687 }
688 else if (c == dialect->quotechar &&
689 dialect->quoting != QUOTE_NONE) {
690 if (dialect->doublequote) {
691 /* doublequote; " represented by "" */
692 self->state = QUOTE_IN_QUOTED_FIELD;
693 }
694 else {
695 /* end of quote part of field */
696 self->state = IN_FIELD;
697 }
698 }
699 else {
700 /* normal character - save in field */
701 if (parse_add_char(self, c) < 0)
702 return -1;
703 }
704 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000705
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000706 case ESCAPE_IN_QUOTED_FIELD:
707 if (c == '\0')
708 c = '\n';
709 if (parse_add_char(self, c) < 0)
710 return -1;
711 self->state = IN_QUOTED_FIELD;
712 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000713
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000714 case QUOTE_IN_QUOTED_FIELD:
715 /* doublequote - seen a quote in an quoted field */
716 if (dialect->quoting != QUOTE_NONE &&
717 c == dialect->quotechar) {
718 /* save "" as " */
719 if (parse_add_char(self, c) < 0)
720 return -1;
721 self->state = IN_QUOTED_FIELD;
722 }
723 else if (c == dialect->delimiter) {
724 /* save field - wait for new field */
725 if (parse_save_field(self) < 0)
726 return -1;
727 self->state = START_FIELD;
728 }
729 else if (c == '\n' || c == '\r' || c == '\0') {
730 /* end of line - return [fields] */
731 if (parse_save_field(self) < 0)
732 return -1;
733 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
734 }
735 else if (!dialect->strict) {
736 if (parse_add_char(self, c) < 0)
737 return -1;
738 self->state = IN_FIELD;
739 }
740 else {
741 /* illegal */
742 PyErr_Format(error_obj, "'%c' expected after '%c'",
743 dialect->delimiter,
744 dialect->quotechar);
745 return -1;
746 }
747 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000748
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000749 case EAT_CRNL:
750 if (c == '\n' || c == '\r')
751 ;
752 else if (c == '\0')
753 self->state = START_RECORD;
754 else {
755 PyErr_Format(error_obj, "new-line character seen in unquoted field - do you need to open the file in universal-newline mode?");
756 return -1;
757 }
758 break;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000759
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000760 }
761 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000762}
763
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000764static int
765parse_reset(ReaderObj *self)
766{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000767 Py_XDECREF(self->fields);
768 self->fields = PyList_New(0);
769 if (self->fields == NULL)
770 return -1;
771 self->field_len = 0;
772 self->state = START_RECORD;
773 self->numeric_field = 0;
774 return 0;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000775}
Skip Montanarob4a04172003-03-20 23:29:12 +0000776
777static PyObject *
Skip Montanarob4a04172003-03-20 23:29:12 +0000778Reader_iternext(ReaderObj *self)
779{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000780 PyObject *lineobj;
781 PyObject *fields = NULL;
782 char *line, c;
783 int linelen;
Skip Montanarob4a04172003-03-20 23:29:12 +0000784
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000785 if (parse_reset(self) < 0)
786 return NULL;
787 do {
788 lineobj = PyIter_Next(self->input_iter);
789 if (lineobj == NULL) {
790 /* End of input OR exception */
791 if (!PyErr_Occurred() && self->field_len != 0)
792 PyErr_Format(error_obj,
793 "newline inside string");
794 return NULL;
795 }
796 ++self->line_num;
Skip Montanarob4a04172003-03-20 23:29:12 +0000797
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000798 line = PyString_AsString(lineobj);
799 linelen = PyString_Size(lineobj);
Skip Montanarob4a04172003-03-20 23:29:12 +0000800
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000801 if (line == NULL || linelen < 0) {
802 Py_DECREF(lineobj);
803 return NULL;
804 }
805 while (linelen--) {
806 c = *line++;
807 if (c == '\0') {
Skip Montanarob4a04172003-03-20 23:29:12 +0000808 Py_DECREF(lineobj);
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000809 PyErr_Format(error_obj,
810 "line contains NULL byte");
811 goto err;
812 }
813 if (parse_process_char(self, c) < 0) {
814 Py_DECREF(lineobj);
815 goto err;
816 }
817 }
818 Py_DECREF(lineobj);
819 if (parse_process_char(self, 0) < 0)
820 goto err;
821 } while (self->state != START_RECORD);
Skip Montanarob4a04172003-03-20 23:29:12 +0000822
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000823 fields = self->fields;
824 self->fields = NULL;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000825err:
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000826 return fields;
Skip Montanarob4a04172003-03-20 23:29:12 +0000827}
828
829static void
830Reader_dealloc(ReaderObj *self)
831{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000832 PyObject_GC_UnTrack(self);
833 Py_XDECREF(self->dialect);
834 Py_XDECREF(self->input_iter);
835 Py_XDECREF(self->fields);
836 if (self->field != NULL)
837 PyMem_Free(self->field);
838 PyObject_GC_Del(self);
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000839}
840
841static int
842Reader_traverse(ReaderObj *self, visitproc visit, void *arg)
843{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000844 Py_VISIT(self->dialect);
845 Py_VISIT(self->input_iter);
846 Py_VISIT(self->fields);
847 return 0;
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000848}
849
850static int
851Reader_clear(ReaderObj *self)
852{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000853 Py_CLEAR(self->dialect);
854 Py_CLEAR(self->input_iter);
855 Py_CLEAR(self->fields);
856 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000857}
858
859PyDoc_STRVAR(Reader_Type_doc,
860"CSV reader\n"
861"\n"
862"Reader objects are responsible for reading and parsing tabular data\n"
863"in CSV format.\n"
864);
865
866static struct PyMethodDef Reader_methods[] = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000867 { NULL, NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +0000868};
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000869#define R_OFF(x) offsetof(ReaderObj, x)
870
871static struct PyMemberDef Reader_memberlist[] = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000872 { "dialect", T_OBJECT, R_OFF(dialect), RO },
873 { "line_num", T_ULONG, R_OFF(line_num), RO },
874 { NULL }
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000875};
876
Skip Montanarob4a04172003-03-20 23:29:12 +0000877
878static PyTypeObject Reader_Type = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000879 PyVarObject_HEAD_INIT(NULL, 0)
880 "_csv.reader", /*tp_name*/
881 sizeof(ReaderObj), /*tp_basicsize*/
882 0, /*tp_itemsize*/
883 /* methods */
884 (destructor)Reader_dealloc, /*tp_dealloc*/
885 (printfunc)0, /*tp_print*/
886 (getattrfunc)0, /*tp_getattr*/
887 (setattrfunc)0, /*tp_setattr*/
888 (cmpfunc)0, /*tp_compare*/
889 (reprfunc)0, /*tp_repr*/
890 0, /*tp_as_number*/
891 0, /*tp_as_sequence*/
892 0, /*tp_as_mapping*/
893 (hashfunc)0, /*tp_hash*/
894 (ternaryfunc)0, /*tp_call*/
895 (reprfunc)0, /*tp_str*/
896 0, /*tp_getattro*/
897 0, /*tp_setattro*/
898 0, /*tp_as_buffer*/
899 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
900 Py_TPFLAGS_HAVE_GC, /*tp_flags*/
901 Reader_Type_doc, /*tp_doc*/
902 (traverseproc)Reader_traverse, /*tp_traverse*/
903 (inquiry)Reader_clear, /*tp_clear*/
904 0, /*tp_richcompare*/
905 0, /*tp_weaklistoffset*/
906 PyObject_SelfIter, /*tp_iter*/
907 (getiterfunc)Reader_iternext, /*tp_iternext*/
908 Reader_methods, /*tp_methods*/
909 Reader_memberlist, /*tp_members*/
910 0, /*tp_getset*/
Skip Montanarob4a04172003-03-20 23:29:12 +0000911
912};
913
914static PyObject *
915csv_reader(PyObject *module, PyObject *args, PyObject *keyword_args)
916{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000917 PyObject * iterator, * dialect = NULL;
918 ReaderObj * self = PyObject_GC_New(ReaderObj, &Reader_Type);
Skip Montanarob4a04172003-03-20 23:29:12 +0000919
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000920 if (!self)
921 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000922
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000923 self->dialect = NULL;
924 self->fields = NULL;
925 self->input_iter = NULL;
926 self->field = NULL;
927 self->field_size = 0;
928 self->line_num = 0;
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000929
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000930 if (parse_reset(self) < 0) {
931 Py_DECREF(self);
932 return NULL;
933 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000934
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000935 if (!PyArg_UnpackTuple(args, "", 1, 2, &iterator, &dialect)) {
936 Py_DECREF(self);
937 return NULL;
938 }
939 self->input_iter = PyObject_GetIter(iterator);
940 if (self->input_iter == NULL) {
941 PyErr_SetString(PyExc_TypeError,
942 "argument 1 must be an iterator");
943 Py_DECREF(self);
944 return NULL;
945 }
946 self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args);
947 if (self->dialect == NULL) {
948 Py_DECREF(self);
949 return NULL;
950 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000951
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000952 PyObject_GC_Track(self);
953 return (PyObject *)self;
Skip Montanarob4a04172003-03-20 23:29:12 +0000954}
955
956/*
957 * WRITER
958 */
959/* ---------------------------------------------------------------- */
960static void
961join_reset(WriterObj *self)
962{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000963 self->rec_len = 0;
964 self->num_fields = 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000965}
966
967#define MEM_INCR 32768
968
969/* Calculate new record length or append field to record. Return new
970 * record length.
971 */
972static int
973join_append_data(WriterObj *self, char *field, int quote_empty,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000974 int *quoted, int copy_phase)
Skip Montanarob4a04172003-03-20 23:29:12 +0000975{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000976 DialectObj *dialect = self->dialect;
977 int i, rec_len;
978 char *lineterm;
Andrew McNamarac89f2842005-01-12 07:44:42 +0000979
980#define ADDCH(c) \
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000981 do {\
982 if (copy_phase) \
983 self->rec[rec_len] = c;\
984 rec_len++;\
985 } while(0)
Andrew McNamarac89f2842005-01-12 07:44:42 +0000986
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000987 lineterm = PyString_AsString(dialect->lineterminator);
988 if (lineterm == NULL)
989 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000990
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000991 rec_len = self->rec_len;
Skip Montanarob4a04172003-03-20 23:29:12 +0000992
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000993 /* If this is not the first field we need a field separator */
994 if (self->num_fields > 0)
995 ADDCH(dialect->delimiter);
Andrew McNamarac89f2842005-01-12 07:44:42 +0000996
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000997 /* Handle preceding quote */
998 if (copy_phase && *quoted)
999 ADDCH(dialect->quotechar);
Andrew McNamarac89f2842005-01-12 07:44:42 +00001000
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001001 /* Copy/count field data */
1002 for (i = 0;; i++) {
1003 char c = field[i];
1004 int want_escape = 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001005
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001006 if (c == '\0')
1007 break;
Skip Montanarob4a04172003-03-20 23:29:12 +00001008
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001009 if (c == dialect->delimiter ||
1010 c == dialect->escapechar ||
1011 c == dialect->quotechar ||
1012 strchr(lineterm, c)) {
1013 if (dialect->quoting == QUOTE_NONE)
1014 want_escape = 1;
1015 else {
1016 if (c == dialect->quotechar) {
1017 if (dialect->doublequote)
1018 ADDCH(dialect->quotechar);
1019 else
1020 want_escape = 1;
1021 }
1022 if (!want_escape)
1023 *quoted = 1;
1024 }
1025 if (want_escape) {
1026 if (!dialect->escapechar) {
1027 PyErr_Format(error_obj,
1028 "need to escape, but no escapechar set");
1029 return -1;
1030 }
1031 ADDCH(dialect->escapechar);
1032 }
1033 }
1034 /* Copy field character into record buffer.
1035 */
1036 ADDCH(c);
1037 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001038
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001039 /* If field is empty check if it needs to be quoted.
1040 */
1041 if (i == 0 && quote_empty) {
1042 if (dialect->quoting == QUOTE_NONE) {
1043 PyErr_Format(error_obj,
1044 "single empty field record must be quoted");
1045 return -1;
1046 }
1047 else
1048 *quoted = 1;
1049 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001050
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001051 if (*quoted) {
1052 if (copy_phase)
1053 ADDCH(dialect->quotechar);
1054 else
1055 rec_len += 2;
1056 }
1057 return rec_len;
Andrew McNamarac89f2842005-01-12 07:44:42 +00001058#undef ADDCH
Skip Montanarob4a04172003-03-20 23:29:12 +00001059}
1060
1061static int
1062join_check_rec_size(WriterObj *self, int rec_len)
1063{
Gregory P. Smith9d534572008-06-11 07:41:16 +00001064
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001065 if (rec_len < 0 || rec_len > INT_MAX - MEM_INCR) {
1066 PyErr_NoMemory();
1067 return 0;
1068 }
Gregory P. Smith9d534572008-06-11 07:41:16 +00001069
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001070 if (rec_len > self->rec_size) {
1071 if (self->rec_size == 0) {
1072 self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR;
1073 if (self->rec != NULL)
1074 PyMem_Free(self->rec);
1075 self->rec = PyMem_Malloc(self->rec_size);
1076 }
1077 else {
1078 char *old_rec = self->rec;
Skip Montanarob4a04172003-03-20 23:29:12 +00001079
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001080 self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR;
1081 self->rec = PyMem_Realloc(self->rec, self->rec_size);
1082 if (self->rec == NULL)
1083 PyMem_Free(old_rec);
1084 }
1085 if (self->rec == NULL) {
1086 PyErr_NoMemory();
1087 return 0;
1088 }
1089 }
1090 return 1;
Skip Montanarob4a04172003-03-20 23:29:12 +00001091}
1092
1093static int
1094join_append(WriterObj *self, char *field, int *quoted, int quote_empty)
1095{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001096 int rec_len;
Skip Montanarob4a04172003-03-20 23:29:12 +00001097
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001098 rec_len = join_append_data(self, field, quote_empty, quoted, 0);
1099 if (rec_len < 0)
1100 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001101
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001102 /* grow record buffer if necessary */
1103 if (!join_check_rec_size(self, rec_len))
1104 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001105
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001106 self->rec_len = join_append_data(self, field, quote_empty, quoted, 1);
1107 self->num_fields++;
Skip Montanarob4a04172003-03-20 23:29:12 +00001108
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001109 return 1;
Skip Montanarob4a04172003-03-20 23:29:12 +00001110}
1111
1112static int
1113join_append_lineterminator(WriterObj *self)
1114{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001115 int terminator_len;
1116 char *terminator;
Skip Montanarob4a04172003-03-20 23:29:12 +00001117
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001118 terminator_len = PyString_Size(self->dialect->lineterminator);
1119 if (terminator_len == -1)
1120 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001121
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001122 /* grow record buffer if necessary */
1123 if (!join_check_rec_size(self, self->rec_len + terminator_len))
1124 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001125
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001126 terminator = PyString_AsString(self->dialect->lineterminator);
1127 if (terminator == NULL)
1128 return 0;
1129 memmove(self->rec + self->rec_len, terminator, terminator_len);
1130 self->rec_len += terminator_len;
Skip Montanarob4a04172003-03-20 23:29:12 +00001131
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001132 return 1;
Skip Montanarob4a04172003-03-20 23:29:12 +00001133}
1134
1135PyDoc_STRVAR(csv_writerow_doc,
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001136"writerow(sequence)\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001137"\n"
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001138"Construct and write a CSV record from a sequence of fields. Non-string\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001139"elements will be converted to string.");
1140
1141static PyObject *
1142csv_writerow(WriterObj *self, PyObject *seq)
1143{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001144 DialectObj *dialect = self->dialect;
1145 int len, i;
Skip Montanarob4a04172003-03-20 23:29:12 +00001146
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001147 if (!PySequence_Check(seq))
1148 return PyErr_Format(error_obj, "sequence expected");
Skip Montanarob4a04172003-03-20 23:29:12 +00001149
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001150 len = PySequence_Length(seq);
1151 if (len < 0)
1152 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001153
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001154 /* Join all fields in internal buffer.
1155 */
1156 join_reset(self);
1157 for (i = 0; i < len; i++) {
1158 PyObject *field;
1159 int append_ok;
1160 int quoted;
Skip Montanarob4a04172003-03-20 23:29:12 +00001161
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001162 field = PySequence_GetItem(seq, i);
1163 if (field == NULL)
1164 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001165
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001166 switch (dialect->quoting) {
1167 case QUOTE_NONNUMERIC:
1168 quoted = !PyNumber_Check(field);
1169 break;
1170 case QUOTE_ALL:
1171 quoted = 1;
1172 break;
1173 default:
1174 quoted = 0;
1175 break;
1176 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001177
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001178 if (PyString_Check(field)) {
1179 append_ok = join_append(self,
1180 PyString_AS_STRING(field),
1181 &quoted, len == 1);
1182 Py_DECREF(field);
1183 }
1184 else if (field == Py_None) {
1185 append_ok = join_append(self, "", &quoted, len == 1);
1186 Py_DECREF(field);
1187 }
1188 else {
1189 PyObject *str;
Skip Montanarob4a04172003-03-20 23:29:12 +00001190
Raymond Hettingerf5377022011-12-11 22:31:09 -08001191 if (PyFloat_Check(field)) {
1192 str = PyObject_Repr(field);
1193 } else {
1194 str = PyObject_Str(field);
1195 }
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001196 Py_DECREF(field);
1197 if (str == NULL)
1198 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001199
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001200 append_ok = join_append(self, PyString_AS_STRING(str),
1201 &quoted, len == 1);
1202 Py_DECREF(str);
1203 }
1204 if (!append_ok)
1205 return NULL;
1206 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001207
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001208 /* Add line terminator.
1209 */
1210 if (!join_append_lineterminator(self))
1211 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001212
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001213 return PyObject_CallFunction(self->writeline,
1214 "(s#)", self->rec, self->rec_len);
Skip Montanarob4a04172003-03-20 23:29:12 +00001215}
1216
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001217PyDoc_STRVAR(csv_writerows_doc,
1218"writerows(sequence of sequences)\n"
1219"\n"
1220"Construct and write a series of sequences to a csv file. Non-string\n"
1221"elements will be converted to string.");
1222
Skip Montanarob4a04172003-03-20 23:29:12 +00001223static PyObject *
1224csv_writerows(WriterObj *self, PyObject *seqseq)
1225{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001226 PyObject *row_iter, *row_obj, *result;
Skip Montanarob4a04172003-03-20 23:29:12 +00001227
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001228 row_iter = PyObject_GetIter(seqseq);
1229 if (row_iter == NULL) {
1230 PyErr_SetString(PyExc_TypeError,
1231 "writerows() argument must be iterable");
1232 return NULL;
1233 }
1234 while ((row_obj = PyIter_Next(row_iter))) {
1235 result = csv_writerow(self, row_obj);
1236 Py_DECREF(row_obj);
1237 if (!result) {
1238 Py_DECREF(row_iter);
1239 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001240 }
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001241 else
1242 Py_DECREF(result);
1243 }
1244 Py_DECREF(row_iter);
1245 if (PyErr_Occurred())
1246 return NULL;
1247 Py_INCREF(Py_None);
1248 return Py_None;
Skip Montanarob4a04172003-03-20 23:29:12 +00001249}
1250
1251static struct PyMethodDef Writer_methods[] = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001252 { "writerow", (PyCFunction)csv_writerow, METH_O, csv_writerow_doc},
1253 { "writerows", (PyCFunction)csv_writerows, METH_O, csv_writerows_doc},
1254 { NULL, NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +00001255};
1256
1257#define W_OFF(x) offsetof(WriterObj, x)
1258
1259static struct PyMemberDef Writer_memberlist[] = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001260 { "dialect", T_OBJECT, W_OFF(dialect), RO },
1261 { NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +00001262};
1263
1264static void
1265Writer_dealloc(WriterObj *self)
1266{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001267 PyObject_GC_UnTrack(self);
1268 Py_XDECREF(self->dialect);
1269 Py_XDECREF(self->writeline);
1270 if (self->rec != NULL)
1271 PyMem_Free(self->rec);
1272 PyObject_GC_Del(self);
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001273}
1274
1275static int
1276Writer_traverse(WriterObj *self, visitproc visit, void *arg)
1277{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001278 Py_VISIT(self->dialect);
1279 Py_VISIT(self->writeline);
1280 return 0;
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001281}
1282
1283static int
1284Writer_clear(WriterObj *self)
1285{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001286 Py_CLEAR(self->dialect);
1287 Py_CLEAR(self->writeline);
1288 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001289}
1290
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001291PyDoc_STRVAR(Writer_Type_doc,
Skip Montanarob4a04172003-03-20 23:29:12 +00001292"CSV writer\n"
1293"\n"
1294"Writer objects are responsible for generating tabular data\n"
1295"in CSV format from sequence input.\n"
1296);
1297
1298static PyTypeObject Writer_Type = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001299 PyVarObject_HEAD_INIT(NULL, 0)
1300 "_csv.writer", /*tp_name*/
1301 sizeof(WriterObj), /*tp_basicsize*/
1302 0, /*tp_itemsize*/
1303 /* methods */
1304 (destructor)Writer_dealloc, /*tp_dealloc*/
1305 (printfunc)0, /*tp_print*/
1306 (getattrfunc)0, /*tp_getattr*/
1307 (setattrfunc)0, /*tp_setattr*/
1308 (cmpfunc)0, /*tp_compare*/
1309 (reprfunc)0, /*tp_repr*/
1310 0, /*tp_as_number*/
1311 0, /*tp_as_sequence*/
1312 0, /*tp_as_mapping*/
1313 (hashfunc)0, /*tp_hash*/
1314 (ternaryfunc)0, /*tp_call*/
1315 (reprfunc)0, /*tp_str*/
1316 0, /*tp_getattro*/
1317 0, /*tp_setattro*/
1318 0, /*tp_as_buffer*/
1319 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
1320 Py_TPFLAGS_HAVE_GC, /*tp_flags*/
1321 Writer_Type_doc,
1322 (traverseproc)Writer_traverse, /*tp_traverse*/
1323 (inquiry)Writer_clear, /*tp_clear*/
1324 0, /*tp_richcompare*/
1325 0, /*tp_weaklistoffset*/
1326 (getiterfunc)0, /*tp_iter*/
1327 (getiterfunc)0, /*tp_iternext*/
1328 Writer_methods, /*tp_methods*/
1329 Writer_memberlist, /*tp_members*/
1330 0, /*tp_getset*/
Skip Montanarob4a04172003-03-20 23:29:12 +00001331};
1332
1333static PyObject *
1334csv_writer(PyObject *module, PyObject *args, PyObject *keyword_args)
1335{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001336 PyObject * output_file, * dialect = NULL;
1337 WriterObj * self = PyObject_GC_New(WriterObj, &Writer_Type);
Skip Montanarob4a04172003-03-20 23:29:12 +00001338
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001339 if (!self)
1340 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001341
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001342 self->dialect = NULL;
1343 self->writeline = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001344
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001345 self->rec = NULL;
1346 self->rec_size = 0;
1347 self->rec_len = 0;
1348 self->num_fields = 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001349
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001350 if (!PyArg_UnpackTuple(args, "", 1, 2, &output_file, &dialect)) {
1351 Py_DECREF(self);
1352 return NULL;
1353 }
1354 self->writeline = PyObject_GetAttrString(output_file, "write");
1355 if (self->writeline == NULL || !PyCallable_Check(self->writeline)) {
1356 PyErr_SetString(PyExc_TypeError,
1357 "argument 1 must have a \"write\" method");
1358 Py_DECREF(self);
1359 return NULL;
1360 }
1361 self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args);
1362 if (self->dialect == NULL) {
1363 Py_DECREF(self);
1364 return NULL;
1365 }
1366 PyObject_GC_Track(self);
1367 return (PyObject *)self;
Skip Montanarob4a04172003-03-20 23:29:12 +00001368}
1369
1370/*
1371 * DIALECT REGISTRY
1372 */
1373static PyObject *
1374csv_list_dialects(PyObject *module, PyObject *args)
1375{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001376 return PyDict_Keys(dialects);
Skip Montanarob4a04172003-03-20 23:29:12 +00001377}
1378
1379static PyObject *
Andrew McNamara86625972005-01-11 01:28:33 +00001380csv_register_dialect(PyObject *module, PyObject *args, PyObject *kwargs)
Skip Montanarob4a04172003-03-20 23:29:12 +00001381{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001382 PyObject *name_obj, *dialect_obj = NULL;
1383 PyObject *dialect;
Skip Montanarob4a04172003-03-20 23:29:12 +00001384
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001385 if (!PyArg_UnpackTuple(args, "", 1, 2, &name_obj, &dialect_obj))
1386 return NULL;
1387 if (!IS_BASESTRING(name_obj)) {
1388 PyErr_SetString(PyExc_TypeError,
1389 "dialect name must be a string or unicode");
1390 return NULL;
1391 }
1392 dialect = _call_dialect(dialect_obj, kwargs);
1393 if (dialect == NULL)
1394 return NULL;
1395 if (PyDict_SetItem(dialects, name_obj, dialect) < 0) {
1396 Py_DECREF(dialect);
1397 return NULL;
1398 }
1399 Py_DECREF(dialect);
1400 Py_INCREF(Py_None);
1401 return Py_None;
Skip Montanarob4a04172003-03-20 23:29:12 +00001402}
1403
1404static PyObject *
Skip Montanaro577c7a72003-04-12 19:17:14 +00001405csv_unregister_dialect(PyObject *module, PyObject *name_obj)
Skip Montanarob4a04172003-03-20 23:29:12 +00001406{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001407 if (PyDict_DelItem(dialects, name_obj) < 0)
1408 return PyErr_Format(error_obj, "unknown dialect");
1409 Py_INCREF(Py_None);
1410 return Py_None;
Skip Montanarob4a04172003-03-20 23:29:12 +00001411}
1412
1413static PyObject *
Skip Montanaro577c7a72003-04-12 19:17:14 +00001414csv_get_dialect(PyObject *module, PyObject *name_obj)
Skip Montanarob4a04172003-03-20 23:29:12 +00001415{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001416 return get_dialect_from_registry(name_obj);
Skip Montanarob4a04172003-03-20 23:29:12 +00001417}
1418
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001419static PyObject *
Andrew McNamara31d88962005-01-12 03:45:10 +00001420csv_field_size_limit(PyObject *module, PyObject *args)
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001421{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001422 PyObject *new_limit = NULL;
1423 long old_limit = field_limit;
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001424
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001425 if (!PyArg_UnpackTuple(args, "field_size_limit", 0, 1, &new_limit))
1426 return NULL;
1427 if (new_limit != NULL) {
1428 if (!PyInt_Check(new_limit)) {
1429 PyErr_Format(PyExc_TypeError,
1430 "limit must be an integer");
1431 return NULL;
1432 }
1433 field_limit = PyInt_AsLong(new_limit);
1434 }
1435 return PyInt_FromLong(old_limit);
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001436}
1437
Skip Montanarob4a04172003-03-20 23:29:12 +00001438/*
1439 * MODULE
1440 */
1441
1442PyDoc_STRVAR(csv_module_doc,
1443"CSV parsing and writing.\n"
1444"\n"
1445"This module provides classes that assist in the reading and writing\n"
1446"of Comma Separated Value (CSV) files, and implements the interface\n"
1447"described by PEP 305. Although many CSV files are simple to parse,\n"
1448"the format is not formally defined by a stable specification and\n"
1449"is subtle enough that parsing lines of a CSV file with something\n"
1450"like line.split(\",\") is bound to fail. The module supports three\n"
1451"basic APIs: reading, writing, and registration of dialects.\n"
1452"\n"
1453"\n"
1454"DIALECT REGISTRATION:\n"
1455"\n"
1456"Readers and writers support a dialect argument, which is a convenient\n"
1457"handle on a group of settings. When the dialect argument is a string,\n"
1458"it identifies one of the dialects previously registered with the module.\n"
1459"If it is a class or instance, the attributes of the argument are used as\n"
1460"the settings for the reader or writer:\n"
1461"\n"
1462" class excel:\n"
1463" delimiter = ','\n"
1464" quotechar = '\"'\n"
1465" escapechar = None\n"
1466" doublequote = True\n"
1467" skipinitialspace = False\n"
Johannes Gijsbers8d3b9dd2004-08-15 12:23:10 +00001468" lineterminator = '\\r\\n'\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001469" quoting = QUOTE_MINIMAL\n"
1470"\n"
1471"SETTINGS:\n"
1472"\n"
1473" * quotechar - specifies a one-character string to use as the \n"
1474" quoting character. It defaults to '\"'.\n"
1475" * delimiter - specifies a one-character string to use as the \n"
1476" field separator. It defaults to ','.\n"
1477" * skipinitialspace - specifies how to interpret whitespace which\n"
1478" immediately follows a delimiter. It defaults to False, which\n"
1479" means that whitespace immediately following a delimiter is part\n"
1480" of the following field.\n"
1481" * lineterminator - specifies the character sequence which should \n"
1482" terminate rows.\n"
1483" * quoting - controls when quotes should be generated by the writer.\n"
1484" It can take on any of the following module constants:\n"
1485"\n"
1486" csv.QUOTE_MINIMAL means only when required, for example, when a\n"
1487" field contains either the quotechar or the delimiter\n"
1488" csv.QUOTE_ALL means that quotes are always placed around fields.\n"
1489" csv.QUOTE_NONNUMERIC means that quotes are always placed around\n"
Skip Montanaro148eb6a2003-12-02 18:57:47 +00001490" fields which do not parse as integers or floating point\n"
1491" numbers.\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001492" csv.QUOTE_NONE means that quotes are never placed around fields.\n"
1493" * escapechar - specifies a one-character string used to escape \n"
1494" the delimiter when quoting is set to QUOTE_NONE.\n"
1495" * doublequote - controls the handling of quotes inside fields. When\n"
1496" True, two consecutive quotes are interpreted as one during read,\n"
1497" and when writing, each quote character embedded in the data is\n"
1498" written as two quotes\n");
1499
1500PyDoc_STRVAR(csv_reader_doc,
1501" csv_reader = reader(iterable [, dialect='excel']\n"
1502" [optional keyword args])\n"
1503" for row in csv_reader:\n"
1504" process(row)\n"
1505"\n"
1506"The \"iterable\" argument can be any object that returns a line\n"
1507"of input for each iteration, such as a file object or a list. The\n"
1508"optional \"dialect\" parameter is discussed below. The function\n"
1509"also accepts optional keyword arguments which override settings\n"
1510"provided by the dialect.\n"
1511"\n"
1512"The returned object is an iterator. Each iteration returns a row\n"
Johannes Gijsbers8d3b9dd2004-08-15 12:23:10 +00001513"of the CSV file (which can span multiple input lines):\n");
Skip Montanarob4a04172003-03-20 23:29:12 +00001514
1515PyDoc_STRVAR(csv_writer_doc,
1516" csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1517" [optional keyword args])\n"
Fredrik Lundh4aaaa492006-04-04 16:51:13 +00001518" for row in sequence:\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001519" csv_writer.writerow(row)\n"
1520"\n"
1521" [or]\n"
1522"\n"
1523" csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1524" [optional keyword args])\n"
1525" csv_writer.writerows(rows)\n"
1526"\n"
1527"The \"fileobj\" argument can be any object that supports the file API.\n");
1528
1529PyDoc_STRVAR(csv_list_dialects_doc,
1530"Return a list of all know dialect names.\n"
1531" names = csv.list_dialects()");
1532
1533PyDoc_STRVAR(csv_get_dialect_doc,
1534"Return the dialect instance associated with name.\n"
1535" dialect = csv.get_dialect(name)");
1536
1537PyDoc_STRVAR(csv_register_dialect_doc,
1538"Create a mapping from a string name to a dialect class.\n"
1539" dialect = csv.register_dialect(name, dialect)");
1540
1541PyDoc_STRVAR(csv_unregister_dialect_doc,
1542"Delete the name/dialect mapping associated with a string name.\n"
1543" csv.unregister_dialect(name)");
1544
Andrew McNamara31d88962005-01-12 03:45:10 +00001545PyDoc_STRVAR(csv_field_size_limit_doc,
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001546"Sets an upper limit on parsed fields.\n"
Andrew McNamara31d88962005-01-12 03:45:10 +00001547" csv.field_size_limit([limit])\n"
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001548"\n"
1549"Returns old limit. If limit is not given, no new limit is set and\n"
1550"the old limit is returned");
1551
Skip Montanarob4a04172003-03-20 23:29:12 +00001552static struct PyMethodDef csv_methods[] = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001553 { "reader", (PyCFunction)csv_reader,
1554 METH_VARARGS | METH_KEYWORDS, csv_reader_doc},
1555 { "writer", (PyCFunction)csv_writer,
1556 METH_VARARGS | METH_KEYWORDS, csv_writer_doc},
1557 { "list_dialects", (PyCFunction)csv_list_dialects,
1558 METH_NOARGS, csv_list_dialects_doc},
1559 { "register_dialect", (PyCFunction)csv_register_dialect,
1560 METH_VARARGS | METH_KEYWORDS, csv_register_dialect_doc},
1561 { "unregister_dialect", (PyCFunction)csv_unregister_dialect,
1562 METH_O, csv_unregister_dialect_doc},
1563 { "get_dialect", (PyCFunction)csv_get_dialect,
1564 METH_O, csv_get_dialect_doc},
1565 { "field_size_limit", (PyCFunction)csv_field_size_limit,
1566 METH_VARARGS, csv_field_size_limit_doc},
1567 { NULL, NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +00001568};
1569
1570PyMODINIT_FUNC
1571init_csv(void)
1572{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001573 PyObject *module;
1574 StyleDesc *style;
Skip Montanarob4a04172003-03-20 23:29:12 +00001575
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001576 if (PyType_Ready(&Dialect_Type) < 0)
1577 return;
Skip Montanarob4a04172003-03-20 23:29:12 +00001578
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001579 if (PyType_Ready(&Reader_Type) < 0)
1580 return;
Skip Montanarob4a04172003-03-20 23:29:12 +00001581
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001582 if (PyType_Ready(&Writer_Type) < 0)
1583 return;
Skip Montanarob4a04172003-03-20 23:29:12 +00001584
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001585 /* Create the module and add the functions */
1586 module = Py_InitModule3("_csv", csv_methods, csv_module_doc);
1587 if (module == NULL)
1588 return;
Skip Montanarob4a04172003-03-20 23:29:12 +00001589
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001590 /* Add version to the module. */
1591 if (PyModule_AddStringConstant(module, "__version__",
1592 MODULE_VERSION) == -1)
1593 return;
Skip Montanarob4a04172003-03-20 23:29:12 +00001594
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001595 /* Add _dialects dictionary */
1596 dialects = PyDict_New();
1597 if (dialects == NULL)
1598 return;
1599 if (PyModule_AddObject(module, "_dialects", dialects))
1600 return;
Skip Montanarob4a04172003-03-20 23:29:12 +00001601
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001602 /* Add quote styles into dictionary */
1603 for (style = quote_styles; style->name; style++) {
1604 if (PyModule_AddIntConstant(module, style->name,
1605 style->style) == -1)
1606 return;
1607 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001608
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001609 /* Add the Dialect type */
1610 Py_INCREF(&Dialect_Type);
1611 if (PyModule_AddObject(module, "Dialect", (PyObject *)&Dialect_Type))
1612 return;
Skip Montanarob4a04172003-03-20 23:29:12 +00001613
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001614 /* Add the CSV exception object to the module. */
1615 error_obj = PyErr_NewException("_csv.Error", NULL, NULL);
1616 if (error_obj == NULL)
1617 return;
1618 PyModule_AddObject(module, "Error", error_obj);
Skip Montanarob4a04172003-03-20 23:29:12 +00001619}