blob: a5787d3ac63bcd4492eb45dac95558a46fc5079b [file] [log] [blame]
Skip Montanaroa16b21f2003-03-23 14:32:54 +00001/* csv module */
2
3/*
4
5This module provides the low-level underpinnings of a CSV reading/writing
6module. Users should not use this module directly, but import the csv.py
7module instead.
8
9**** For people modifying this code, please note that as of this writing
Skip Montanarodfa35fa2003-04-11 21:40:01 +000010**** (2003-03-23), it is intended that this code should work with Python
Skip Montanaroa16b21f2003-03-23 14:32:54 +000011**** 2.2.
12
Skip Montanarob4a04172003-03-20 23:29:12 +000013*/
14
Skip Montanaro7b01a832003-04-12 19:23:46 +000015#define MODULE_VERSION "1.0"
16
Skip Montanarob4a04172003-03-20 23:29:12 +000017#include "Python.h"
18#include "structmember.h"
19
Skip Montanaroa16b21f2003-03-23 14:32:54 +000020
Skip Montanarob4a04172003-03-20 23:29:12 +000021/* begin 2.2 compatibility macros */
22#ifndef PyDoc_STRVAR
23/* Define macros for inline documentation. */
24#define PyDoc_VAR(name) static char name[]
25#define PyDoc_STRVAR(name,str) PyDoc_VAR(name) = PyDoc_STR(str)
26#ifdef WITH_DOC_STRINGS
27#define PyDoc_STR(str) str
28#else
29#define PyDoc_STR(str) ""
30#endif
31#endif /* ifndef PyDoc_STRVAR */
32
33#ifndef PyMODINIT_FUNC
34# if defined(__cplusplus)
35# define PyMODINIT_FUNC extern "C" void
36# else /* __cplusplus */
37# define PyMODINIT_FUNC void
38# endif /* __cplusplus */
39#endif
Thomas Wouters2742c5e2006-04-15 17:33:14 +000040
41#ifndef Py_CLEAR
42#define Py_CLEAR(op) \
43 do { \
44 if (op) { \
45 PyObject *tmp = (PyObject *)(op); \
46 (op) = NULL; \
47 Py_DECREF(tmp); \
48 } \
49 } while (0)
50#endif
Thomas Woutersc6e55062006-04-15 21:47:09 +000051#ifndef Py_VISIT
52#define Py_VISIT(op) \
53 do { \
54 if (op) { \
55 int vret = visit((PyObject *)(op), arg); \
56 if (vret) \
57 return vret; \
58 } \
59 } while (0)
60#endif
Thomas Wouters2742c5e2006-04-15 17:33:14 +000061
Skip Montanarob4a04172003-03-20 23:29:12 +000062/* end 2.2 compatibility macros */
63
Andrew McNamara37d2bdf2005-01-10 12:22:48 +000064#define IS_BASESTRING(o) \
65 PyObject_TypeCheck(o, &PyBaseString_Type)
66
Skip Montanarob4a04172003-03-20 23:29:12 +000067static PyObject *error_obj; /* CSV exception */
68static PyObject *dialects; /* Dialect registry */
Andrew McNamarae4d05c42005-01-11 07:32:02 +000069static long field_limit = 128 * 1024; /* max parsed field size */
Skip Montanarob4a04172003-03-20 23:29:12 +000070
71typedef enum {
72 START_RECORD, START_FIELD, ESCAPED_CHAR, IN_FIELD,
Andrew McNamaraf69d94f2005-01-13 11:30:54 +000073 IN_QUOTED_FIELD, ESCAPE_IN_QUOTED_FIELD, QUOTE_IN_QUOTED_FIELD,
74 EAT_CRNL
Skip Montanarob4a04172003-03-20 23:29:12 +000075} ParserState;
76
77typedef enum {
78 QUOTE_MINIMAL, QUOTE_ALL, QUOTE_NONNUMERIC, QUOTE_NONE
79} QuoteStyle;
80
81typedef struct {
82 QuoteStyle style;
83 char *name;
84} StyleDesc;
85
86static StyleDesc quote_styles[] = {
87 { QUOTE_MINIMAL, "QUOTE_MINIMAL" },
88 { QUOTE_ALL, "QUOTE_ALL" },
89 { QUOTE_NONNUMERIC, "QUOTE_NONNUMERIC" },
90 { QUOTE_NONE, "QUOTE_NONE" },
91 { 0 }
92};
93
94typedef struct {
95 PyObject_HEAD
96
97 int doublequote; /* is " represented by ""? */
98 char delimiter; /* field separator */
99 char quotechar; /* quote character */
100 char escapechar; /* escape character */
101 int skipinitialspace; /* ignore spaces following delimiter? */
102 PyObject *lineterminator; /* string to write between records */
Andrew McNamara1196cf12005-01-07 04:42:45 +0000103 int quoting; /* style of quoting to write */
Skip Montanarob4a04172003-03-20 23:29:12 +0000104
105 int strict; /* raise exception on bad CSV */
106} DialectObj;
107
108staticforward PyTypeObject Dialect_Type;
109
110typedef struct {
111 PyObject_HEAD
112
113 PyObject *input_iter; /* iterate over this for input lines */
114
115 DialectObj *dialect; /* parsing dialect */
116
117 PyObject *fields; /* field list for current record */
118 ParserState state; /* current CSV parse state */
119 char *field; /* build current field in here */
120 int field_size; /* size of allocated buffer */
121 int field_len; /* length of current field */
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000122 int numeric_field; /* treat field as numeric */
Andrew McNamara7f2053e2005-01-12 11:17:16 +0000123 unsigned long line_num; /* Source-file line number */
Skip Montanarob4a04172003-03-20 23:29:12 +0000124} ReaderObj;
125
126staticforward PyTypeObject Reader_Type;
127
Christian Heimese93237d2007-12-19 02:37:44 +0000128#define ReaderObject_Check(v) (Py_TYPE(v) == &Reader_Type)
Skip Montanarob4a04172003-03-20 23:29:12 +0000129
130typedef struct {
131 PyObject_HEAD
132
133 PyObject *writeline; /* write output lines to this file */
134
135 DialectObj *dialect; /* parsing dialect */
136
137 char *rec; /* buffer for parser.join */
138 int rec_size; /* size of allocated record */
139 int rec_len; /* length of record */
140 int num_fields; /* number of fields in record */
141} WriterObj;
142
143staticforward PyTypeObject Writer_Type;
144
145/*
146 * DIALECT class
147 */
148
149static PyObject *
150get_dialect_from_registry(PyObject * name_obj)
151{
152 PyObject *dialect_obj;
153
154 dialect_obj = PyDict_GetItem(dialects, name_obj);
Andrew McNamaradbce2612005-01-10 23:17:35 +0000155 if (dialect_obj == NULL) {
156 if (!PyErr_Occurred())
157 PyErr_Format(error_obj, "unknown dialect");
158 }
159 else
160 Py_INCREF(dialect_obj);
Skip Montanarob4a04172003-03-20 23:29:12 +0000161 return dialect_obj;
162}
163
Skip Montanarob4a04172003-03-20 23:29:12 +0000164static PyObject *
165get_string(PyObject *str)
166{
167 Py_XINCREF(str);
168 return str;
169}
170
Skip Montanarob4a04172003-03-20 23:29:12 +0000171static PyObject *
172get_nullchar_as_None(char c)
173{
174 if (c == '\0') {
175 Py_INCREF(Py_None);
176 return Py_None;
177 }
178 else
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000179 return PyString_FromStringAndSize((char*)&c, 1);
Skip Montanarob4a04172003-03-20 23:29:12 +0000180}
181
Skip Montanarob4a04172003-03-20 23:29:12 +0000182static PyObject *
183Dialect_get_lineterminator(DialectObj *self)
184{
185 return get_string(self->lineterminator);
186}
187
Skip Montanarob4a04172003-03-20 23:29:12 +0000188static PyObject *
189Dialect_get_escapechar(DialectObj *self)
190{
191 return get_nullchar_as_None(self->escapechar);
192}
193
Andrew McNamara1196cf12005-01-07 04:42:45 +0000194static PyObject *
195Dialect_get_quotechar(DialectObj *self)
Skip Montanarob4a04172003-03-20 23:29:12 +0000196{
Andrew McNamara1196cf12005-01-07 04:42:45 +0000197 return get_nullchar_as_None(self->quotechar);
Skip Montanarob4a04172003-03-20 23:29:12 +0000198}
199
200static PyObject *
201Dialect_get_quoting(DialectObj *self)
202{
203 return PyInt_FromLong(self->quoting);
204}
205
206static int
Andrew McNamara1196cf12005-01-07 04:42:45 +0000207_set_bool(const char *name, int *target, PyObject *src, int dflt)
Skip Montanarob4a04172003-03-20 23:29:12 +0000208{
Andrew McNamara1196cf12005-01-07 04:42:45 +0000209 if (src == NULL)
210 *target = dflt;
211 else
212 *target = PyObject_IsTrue(src);
213 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000214}
215
Andrew McNamara1196cf12005-01-07 04:42:45 +0000216static int
217_set_int(const char *name, int *target, PyObject *src, int dflt)
218{
219 if (src == NULL)
220 *target = dflt;
221 else {
222 if (!PyInt_Check(src)) {
223 PyErr_Format(PyExc_TypeError,
224 "\"%s\" must be an integer", name);
225 return -1;
226 }
227 *target = PyInt_AsLong(src);
228 }
229 return 0;
230}
231
232static int
233_set_char(const char *name, char *target, PyObject *src, char dflt)
234{
235 if (src == NULL)
236 *target = dflt;
237 else {
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000238 if (src == Py_None || PyString_Size(src) == 0)
Andrew McNamara1196cf12005-01-07 04:42:45 +0000239 *target = '\0';
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000240 else if (!PyString_Check(src) || PyString_Size(src) != 1) {
Andrew McNamara1196cf12005-01-07 04:42:45 +0000241 PyErr_Format(PyExc_TypeError,
242 "\"%s\" must be an 1-character string",
243 name);
244 return -1;
245 }
246 else {
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000247 char *s = PyString_AsString(src);
Andrew McNamara1196cf12005-01-07 04:42:45 +0000248 if (s == NULL)
249 return -1;
250 *target = s[0];
251 }
252 }
253 return 0;
254}
255
256static int
257_set_str(const char *name, PyObject **target, PyObject *src, const char *dflt)
258{
259 if (src == NULL)
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000260 *target = PyString_FromString(dflt);
Andrew McNamara1196cf12005-01-07 04:42:45 +0000261 else {
262 if (src == Py_None)
263 *target = NULL;
Andrew McNamara37d2bdf2005-01-10 12:22:48 +0000264 else if (!IS_BASESTRING(src)) {
Andrew McNamara1196cf12005-01-07 04:42:45 +0000265 PyErr_Format(PyExc_TypeError,
266 "\"%s\" must be an string", name);
267 return -1;
Andrew McNamaradd3e6cb2005-01-07 06:46:50 +0000268 }
269 else {
Andrew McNamara1196cf12005-01-07 04:42:45 +0000270 Py_XDECREF(*target);
271 Py_INCREF(src);
272 *target = src;
273 }
274 }
275 return 0;
276}
277
278static int
279dialect_check_quoting(int quoting)
280{
281 StyleDesc *qs = quote_styles;
282
283 for (qs = quote_styles; qs->name; qs++) {
284 if (qs->style == quoting)
285 return 0;
286 }
287 PyErr_Format(PyExc_TypeError, "bad \"quoting\" value");
288 return -1;
289}
Skip Montanarob4a04172003-03-20 23:29:12 +0000290
291#define D_OFF(x) offsetof(DialectObj, x)
292
293static struct PyMemberDef Dialect_memberlist[] = {
Andrew McNamara1196cf12005-01-07 04:42:45 +0000294 { "delimiter", T_CHAR, D_OFF(delimiter), READONLY },
295 { "skipinitialspace", T_INT, D_OFF(skipinitialspace), READONLY },
296 { "doublequote", T_INT, D_OFF(doublequote), READONLY },
297 { "strict", T_INT, D_OFF(strict), READONLY },
Skip Montanarob4a04172003-03-20 23:29:12 +0000298 { NULL }
299};
300
301static PyGetSetDef Dialect_getsetlist[] = {
Andrew McNamara1196cf12005-01-07 04:42:45 +0000302 { "escapechar", (getter)Dialect_get_escapechar},
303 { "lineterminator", (getter)Dialect_get_lineterminator},
304 { "quotechar", (getter)Dialect_get_quotechar},
305 { "quoting", (getter)Dialect_get_quoting},
306 {NULL},
Skip Montanarob4a04172003-03-20 23:29:12 +0000307};
308
309static void
310Dialect_dealloc(DialectObj *self)
311{
312 Py_XDECREF(self->lineterminator);
Christian Heimese93237d2007-12-19 02:37:44 +0000313 Py_TYPE(self)->tp_free((PyObject *)self);
Skip Montanarob4a04172003-03-20 23:29:12 +0000314}
315
Martin v. Löwis02cbf4a2006-02-27 17:20:04 +0000316static char *dialect_kws[] = {
Andrew McNamara1196cf12005-01-07 04:42:45 +0000317 "dialect",
318 "delimiter",
319 "doublequote",
320 "escapechar",
321 "lineterminator",
322 "quotechar",
323 "quoting",
324 "skipinitialspace",
325 "strict",
326 NULL
327};
328
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000329static PyObject *
330dialect_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
Skip Montanarob4a04172003-03-20 23:29:12 +0000331{
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000332 DialectObj *self;
333 PyObject *ret = NULL;
334 PyObject *dialect = NULL;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000335 PyObject *delimiter = NULL;
336 PyObject *doublequote = NULL;
337 PyObject *escapechar = NULL;
338 PyObject *lineterminator = NULL;
339 PyObject *quotechar = NULL;
340 PyObject *quoting = NULL;
341 PyObject *skipinitialspace = NULL;
342 PyObject *strict = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000343
Andrew McNamara1196cf12005-01-07 04:42:45 +0000344 if (!PyArg_ParseTupleAndKeywords(args, kwargs,
345 "|OOOOOOOOO", dialect_kws,
346 &dialect,
347 &delimiter,
348 &doublequote,
349 &escapechar,
350 &lineterminator,
351 &quotechar,
352 &quoting,
353 &skipinitialspace,
354 &strict))
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000355 return NULL;
356
357 if (dialect != NULL) {
358 if (IS_BASESTRING(dialect)) {
359 dialect = get_dialect_from_registry(dialect);
360 if (dialect == NULL)
361 return NULL;
362 }
363 else
364 Py_INCREF(dialect);
365 /* Can we reuse this instance? */
366 if (PyObject_TypeCheck(dialect, &Dialect_Type) &&
367 delimiter == 0 &&
368 doublequote == 0 &&
369 escapechar == 0 &&
370 lineterminator == 0 &&
371 quotechar == 0 &&
372 quoting == 0 &&
373 skipinitialspace == 0 &&
374 strict == 0)
375 return dialect;
376 }
377
378 self = (DialectObj *)type->tp_alloc(type, 0);
379 if (self == NULL) {
380 Py_XDECREF(dialect);
381 return NULL;
382 }
383 self->lineterminator = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000384
Andrew McNamara1196cf12005-01-07 04:42:45 +0000385 Py_XINCREF(delimiter);
386 Py_XINCREF(doublequote);
387 Py_XINCREF(escapechar);
388 Py_XINCREF(lineterminator);
389 Py_XINCREF(quotechar);
390 Py_XINCREF(quoting);
391 Py_XINCREF(skipinitialspace);
392 Py_XINCREF(strict);
393 if (dialect != NULL) {
Andrew McNamara1196cf12005-01-07 04:42:45 +0000394#define DIALECT_GETATTR(v, n) \
395 if (v == NULL) \
396 v = PyObject_GetAttrString(dialect, n)
Andrew McNamara1196cf12005-01-07 04:42:45 +0000397 DIALECT_GETATTR(delimiter, "delimiter");
398 DIALECT_GETATTR(doublequote, "doublequote");
399 DIALECT_GETATTR(escapechar, "escapechar");
400 DIALECT_GETATTR(lineterminator, "lineterminator");
401 DIALECT_GETATTR(quotechar, "quotechar");
402 DIALECT_GETATTR(quoting, "quoting");
403 DIALECT_GETATTR(skipinitialspace, "skipinitialspace");
404 DIALECT_GETATTR(strict, "strict");
405 PyErr_Clear();
Andrew McNamara1196cf12005-01-07 04:42:45 +0000406 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000407
Andrew McNamara1196cf12005-01-07 04:42:45 +0000408 /* check types and convert to C values */
409#define DIASET(meth, name, target, src, dflt) \
410 if (meth(name, target, src, dflt)) \
411 goto err
412 DIASET(_set_char, "delimiter", &self->delimiter, delimiter, ',');
413 DIASET(_set_bool, "doublequote", &self->doublequote, doublequote, 1);
414 DIASET(_set_char, "escapechar", &self->escapechar, escapechar, 0);
415 DIASET(_set_str, "lineterminator", &self->lineterminator, lineterminator, "\r\n");
416 DIASET(_set_char, "quotechar", &self->quotechar, quotechar, '"');
417 DIASET(_set_int, "quoting", &self->quoting, quoting, QUOTE_MINIMAL);
418 DIASET(_set_bool, "skipinitialspace", &self->skipinitialspace, skipinitialspace, 0);
419 DIASET(_set_bool, "strict", &self->strict, strict, 0);
Skip Montanarob4a04172003-03-20 23:29:12 +0000420
Andrew McNamara1196cf12005-01-07 04:42:45 +0000421 /* validate options */
422 if (dialect_check_quoting(self->quoting))
423 goto err;
424 if (self->delimiter == 0) {
425 PyErr_SetString(PyExc_TypeError, "delimiter must be set");
426 goto err;
427 }
Andrew McNamara5d45a8d2005-01-12 08:16:17 +0000428 if (quotechar == Py_None && quoting == NULL)
Andrew McNamara1196cf12005-01-07 04:42:45 +0000429 self->quoting = QUOTE_NONE;
430 if (self->quoting != QUOTE_NONE && self->quotechar == 0) {
431 PyErr_SetString(PyExc_TypeError,
432 "quotechar must be set if quoting enabled");
433 goto err;
434 }
435 if (self->lineterminator == 0) {
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000436 PyErr_SetString(PyExc_TypeError, "lineterminator must be set");
Andrew McNamara1196cf12005-01-07 04:42:45 +0000437 goto err;
438 }
439
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000440 ret = (PyObject *)self;
Skip Montanarod60fbd42005-06-15 01:33:30 +0000441 Py_INCREF(self);
Andrew McNamara1196cf12005-01-07 04:42:45 +0000442err:
Skip Montanarod60fbd42005-06-15 01:33:30 +0000443 Py_XDECREF(self);
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000444 Py_XDECREF(dialect);
Andrew McNamara1196cf12005-01-07 04:42:45 +0000445 Py_XDECREF(delimiter);
446 Py_XDECREF(doublequote);
447 Py_XDECREF(escapechar);
448 Py_XDECREF(lineterminator);
449 Py_XDECREF(quotechar);
450 Py_XDECREF(quoting);
451 Py_XDECREF(skipinitialspace);
452 Py_XDECREF(strict);
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000453 return ret;
Skip Montanarob4a04172003-03-20 23:29:12 +0000454}
455
456
457PyDoc_STRVAR(Dialect_Type_doc,
458"CSV dialect\n"
459"\n"
460"The Dialect type records CSV parsing and generation options.\n");
461
462static PyTypeObject Dialect_Type = {
Martin v. Löwis68192102007-07-21 06:55:02 +0000463 PyVarObject_HEAD_INIT(NULL, 0)
Skip Montanarob4a04172003-03-20 23:29:12 +0000464 "_csv.Dialect", /* tp_name */
465 sizeof(DialectObj), /* tp_basicsize */
466 0, /* tp_itemsize */
467 /* methods */
468 (destructor)Dialect_dealloc, /* tp_dealloc */
469 (printfunc)0, /* tp_print */
470 (getattrfunc)0, /* tp_getattr */
471 (setattrfunc)0, /* tp_setattr */
472 (cmpfunc)0, /* tp_compare */
473 (reprfunc)0, /* tp_repr */
474 0, /* tp_as_number */
475 0, /* tp_as_sequence */
476 0, /* tp_as_mapping */
477 (hashfunc)0, /* tp_hash */
478 (ternaryfunc)0, /* tp_call */
479 (reprfunc)0, /* tp_str */
480 0, /* tp_getattro */
481 0, /* tp_setattro */
482 0, /* tp_as_buffer */
483 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
484 Dialect_Type_doc, /* tp_doc */
485 0, /* tp_traverse */
486 0, /* tp_clear */
487 0, /* tp_richcompare */
488 0, /* tp_weaklistoffset */
489 0, /* tp_iter */
490 0, /* tp_iternext */
Andrew McNamara1196cf12005-01-07 04:42:45 +0000491 0, /* tp_methods */
Skip Montanarob4a04172003-03-20 23:29:12 +0000492 Dialect_memberlist, /* tp_members */
493 Dialect_getsetlist, /* tp_getset */
494 0, /* tp_base */
495 0, /* tp_dict */
496 0, /* tp_descr_get */
497 0, /* tp_descr_set */
498 0, /* tp_dictoffset */
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000499 0, /* tp_init */
500 0, /* tp_alloc */
Skip Montanarob4a04172003-03-20 23:29:12 +0000501 dialect_new, /* tp_new */
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000502 0, /* tp_free */
Skip Montanarob4a04172003-03-20 23:29:12 +0000503};
504
Andrew McNamara91b97462005-01-11 01:07:23 +0000505/*
506 * Return an instance of the dialect type, given a Python instance or kwarg
507 * description of the dialect
508 */
509static PyObject *
510_call_dialect(PyObject *dialect_inst, PyObject *kwargs)
511{
512 PyObject *ctor_args;
513 PyObject *dialect;
514
515 ctor_args = Py_BuildValue(dialect_inst ? "(O)" : "()", dialect_inst);
516 if (ctor_args == NULL)
517 return NULL;
518 dialect = PyObject_Call((PyObject *)&Dialect_Type, ctor_args, kwargs);
519 Py_DECREF(ctor_args);
520 return dialect;
521}
522
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000523/*
524 * READER
525 */
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000526static int
Skip Montanarob4a04172003-03-20 23:29:12 +0000527parse_save_field(ReaderObj *self)
528{
529 PyObject *field;
530
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000531 field = PyString_FromStringAndSize(self->field, self->field_len);
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000532 if (field == NULL)
533 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000534 self->field_len = 0;
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000535 if (self->numeric_field) {
536 PyObject *tmp;
537
538 self->numeric_field = 0;
539 tmp = PyNumber_Float(field);
540 if (tmp == NULL) {
541 Py_DECREF(field);
542 return -1;
543 }
544 Py_DECREF(field);
545 field = tmp;
546 }
547 PyList_Append(self->fields, field);
548 Py_DECREF(field);
549 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000550}
551
552static int
553parse_grow_buff(ReaderObj *self)
554{
555 if (self->field_size == 0) {
556 self->field_size = 4096;
Andrew McNamaradcfb38c2003-06-09 05:59:23 +0000557 if (self->field != NULL)
558 PyMem_Free(self->field);
Skip Montanarob4a04172003-03-20 23:29:12 +0000559 self->field = PyMem_Malloc(self->field_size);
560 }
561 else {
Gregory P. Smith9d534572008-06-11 07:41:16 +0000562 if (self->field_size > INT_MAX / 2) {
563 PyErr_NoMemory();
564 return 0;
565 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000566 self->field_size *= 2;
567 self->field = PyMem_Realloc(self->field, self->field_size);
568 }
569 if (self->field == NULL) {
570 PyErr_NoMemory();
571 return 0;
572 }
573 return 1;
574}
575
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000576static int
Skip Montanarob4a04172003-03-20 23:29:12 +0000577parse_add_char(ReaderObj *self, char c)
578{
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000579 if (self->field_len >= field_limit) {
580 PyErr_Format(error_obj, "field larger than field limit (%ld)",
581 field_limit);
582 return -1;
583 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000584 if (self->field_len == self->field_size && !parse_grow_buff(self))
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000585 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000586 self->field[self->field_len++] = c;
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000587 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000588}
589
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000590static int
Skip Montanarob4a04172003-03-20 23:29:12 +0000591parse_process_char(ReaderObj *self, char c)
592{
593 DialectObj *dialect = self->dialect;
594
595 switch (self->state) {
596 case START_RECORD:
597 /* start of record */
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000598 if (c == '\0')
Skip Montanarob4a04172003-03-20 23:29:12 +0000599 /* empty line - return [] */
600 break;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000601 else if (c == '\n' || c == '\r') {
602 self->state = EAT_CRNL;
603 break;
604 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000605 /* normal character - handle as START_FIELD */
606 self->state = START_FIELD;
607 /* fallthru */
608 case START_FIELD:
609 /* expecting field */
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000610 if (c == '\n' || c == '\r' || c == '\0') {
Skip Montanarob4a04172003-03-20 23:29:12 +0000611 /* save empty field - return [fields] */
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000612 if (parse_save_field(self) < 0)
613 return -1;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000614 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
Skip Montanarob4a04172003-03-20 23:29:12 +0000615 }
Andrew McNamara1196cf12005-01-07 04:42:45 +0000616 else if (c == dialect->quotechar &&
617 dialect->quoting != QUOTE_NONE) {
Skip Montanarob4a04172003-03-20 23:29:12 +0000618 /* start quoted field */
619 self->state = IN_QUOTED_FIELD;
620 }
621 else if (c == dialect->escapechar) {
622 /* possible escaped character */
623 self->state = ESCAPED_CHAR;
624 }
625 else if (c == ' ' && dialect->skipinitialspace)
626 /* ignore space at start of field */
627 ;
628 else if (c == dialect->delimiter) {
629 /* save empty field */
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000630 if (parse_save_field(self) < 0)
631 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000632 }
633 else {
634 /* begin new unquoted field */
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000635 if (dialect->quoting == QUOTE_NONNUMERIC)
636 self->numeric_field = 1;
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000637 if (parse_add_char(self, c) < 0)
638 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000639 self->state = IN_FIELD;
640 }
641 break;
642
643 case ESCAPED_CHAR:
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000644 if (c == '\0')
645 c = '\n';
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000646 if (parse_add_char(self, c) < 0)
647 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000648 self->state = IN_FIELD;
649 break;
650
651 case IN_FIELD:
652 /* in unquoted field */
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000653 if (c == '\n' || c == '\r' || c == '\0') {
Skip Montanarob4a04172003-03-20 23:29:12 +0000654 /* end of line - return [fields] */
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000655 if (parse_save_field(self) < 0)
656 return -1;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000657 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
Skip Montanarob4a04172003-03-20 23:29:12 +0000658 }
659 else if (c == dialect->escapechar) {
660 /* possible escaped character */
661 self->state = ESCAPED_CHAR;
662 }
663 else if (c == dialect->delimiter) {
664 /* save field - wait for new field */
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000665 if (parse_save_field(self) < 0)
666 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000667 self->state = START_FIELD;
668 }
669 else {
670 /* normal character - save in field */
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000671 if (parse_add_char(self, c) < 0)
672 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000673 }
674 break;
675
676 case IN_QUOTED_FIELD:
677 /* in quoted field */
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000678 if (c == '\0')
679 ;
Skip Montanarob4a04172003-03-20 23:29:12 +0000680 else if (c == dialect->escapechar) {
681 /* Possible escape character */
682 self->state = ESCAPE_IN_QUOTED_FIELD;
683 }
Andrew McNamara1196cf12005-01-07 04:42:45 +0000684 else if (c == dialect->quotechar &&
685 dialect->quoting != QUOTE_NONE) {
Skip Montanarob4a04172003-03-20 23:29:12 +0000686 if (dialect->doublequote) {
687 /* doublequote; " represented by "" */
688 self->state = QUOTE_IN_QUOTED_FIELD;
689 }
690 else {
691 /* end of quote part of field */
692 self->state = IN_FIELD;
693 }
694 }
695 else {
696 /* normal character - save in field */
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000697 if (parse_add_char(self, c) < 0)
698 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000699 }
700 break;
701
702 case ESCAPE_IN_QUOTED_FIELD:
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000703 if (c == '\0')
704 c = '\n';
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000705 if (parse_add_char(self, c) < 0)
706 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000707 self->state = IN_QUOTED_FIELD;
708 break;
709
710 case QUOTE_IN_QUOTED_FIELD:
711 /* doublequote - seen a quote in an quoted field */
712 if (dialect->quoting != QUOTE_NONE &&
713 c == dialect->quotechar) {
714 /* save "" as " */
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000715 if (parse_add_char(self, c) < 0)
716 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000717 self->state = IN_QUOTED_FIELD;
718 }
719 else if (c == dialect->delimiter) {
720 /* save field - wait for new field */
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000721 if (parse_save_field(self) < 0)
722 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000723 self->state = START_FIELD;
724 }
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000725 else if (c == '\n' || c == '\r' || c == '\0') {
Skip Montanarob4a04172003-03-20 23:29:12 +0000726 /* end of line - return [fields] */
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000727 if (parse_save_field(self) < 0)
728 return -1;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000729 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
Skip Montanarob4a04172003-03-20 23:29:12 +0000730 }
731 else if (!dialect->strict) {
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000732 if (parse_add_char(self, c) < 0)
733 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000734 self->state = IN_FIELD;
735 }
736 else {
737 /* illegal */
Andrew McNamara5cfd8372005-01-12 11:39:50 +0000738 PyErr_Format(error_obj, "'%c' expected after '%c'",
Skip Montanarob4a04172003-03-20 23:29:12 +0000739 dialect->delimiter,
740 dialect->quotechar);
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000741 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000742 }
743 break;
744
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000745 case EAT_CRNL:
746 if (c == '\n' || c == '\r')
747 ;
748 else if (c == '\0')
749 self->state = START_RECORD;
750 else {
751 PyErr_Format(error_obj, "new-line character seen in unquoted field - do you need to open the file in universal-newline mode?");
752 return -1;
753 }
754 break;
755
Skip Montanarob4a04172003-03-20 23:29:12 +0000756 }
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000757 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000758}
759
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000760static int
761parse_reset(ReaderObj *self)
762{
763 Py_XDECREF(self->fields);
764 self->fields = PyList_New(0);
765 if (self->fields == NULL)
766 return -1;
767 self->field_len = 0;
768 self->state = START_RECORD;
769 self->numeric_field = 0;
770 return 0;
771}
Skip Montanarob4a04172003-03-20 23:29:12 +0000772
773static PyObject *
Skip Montanarob4a04172003-03-20 23:29:12 +0000774Reader_iternext(ReaderObj *self)
775{
776 PyObject *lineobj;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000777 PyObject *fields = NULL;
778 char *line, c;
779 int linelen;
Skip Montanarob4a04172003-03-20 23:29:12 +0000780
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000781 if (parse_reset(self) < 0)
782 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000783 do {
784 lineobj = PyIter_Next(self->input_iter);
785 if (lineobj == NULL) {
786 /* End of input OR exception */
787 if (!PyErr_Occurred() && self->field_len != 0)
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000788 PyErr_Format(error_obj,
789 "newline inside string");
Skip Montanarob4a04172003-03-20 23:29:12 +0000790 return NULL;
791 }
Andrew McNamara7f2053e2005-01-12 11:17:16 +0000792 ++self->line_num;
Skip Montanarob4a04172003-03-20 23:29:12 +0000793
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000794 line = PyString_AsString(lineobj);
795 linelen = PyString_Size(lineobj);
Skip Montanarob4a04172003-03-20 23:29:12 +0000796
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000797 if (line == NULL || linelen < 0) {
Skip Montanarob4a04172003-03-20 23:29:12 +0000798 Py_DECREF(lineobj);
799 return NULL;
800 }
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000801 while (linelen--) {
802 c = *line++;
803 if (c == '\0') {
804 Py_DECREF(lineobj);
805 PyErr_Format(error_obj,
806 "line contains NULL byte");
807 goto err;
808 }
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000809 if (parse_process_char(self, c) < 0) {
810 Py_DECREF(lineobj);
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000811 goto err;
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000812 }
813 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000814 Py_DECREF(lineobj);
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000815 if (parse_process_char(self, 0) < 0)
816 goto err;
Skip Montanarob4a04172003-03-20 23:29:12 +0000817 } while (self->state != START_RECORD);
818
819 fields = self->fields;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000820 self->fields = NULL;
821err:
Skip Montanarob4a04172003-03-20 23:29:12 +0000822 return fields;
823}
824
825static void
826Reader_dealloc(ReaderObj *self)
827{
Andrew McNamara77ead872005-01-10 02:09:41 +0000828 PyObject_GC_UnTrack(self);
Skip Montanarob4a04172003-03-20 23:29:12 +0000829 Py_XDECREF(self->dialect);
830 Py_XDECREF(self->input_iter);
831 Py_XDECREF(self->fields);
Andrew McNamaradcfb38c2003-06-09 05:59:23 +0000832 if (self->field != NULL)
833 PyMem_Free(self->field);
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000834 PyObject_GC_Del(self);
835}
836
837static int
838Reader_traverse(ReaderObj *self, visitproc visit, void *arg)
839{
Thomas Woutersc6e55062006-04-15 21:47:09 +0000840 Py_VISIT(self->dialect);
841 Py_VISIT(self->input_iter);
842 Py_VISIT(self->fields);
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000843 return 0;
844}
845
846static int
847Reader_clear(ReaderObj *self)
848{
Thomas Woutersedf17d82006-04-15 17:28:34 +0000849 Py_CLEAR(self->dialect);
850 Py_CLEAR(self->input_iter);
851 Py_CLEAR(self->fields);
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000852 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000853}
854
855PyDoc_STRVAR(Reader_Type_doc,
856"CSV reader\n"
857"\n"
858"Reader objects are responsible for reading and parsing tabular data\n"
859"in CSV format.\n"
860);
861
862static struct PyMethodDef Reader_methods[] = {
863 { NULL, NULL }
864};
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000865#define R_OFF(x) offsetof(ReaderObj, x)
866
867static struct PyMemberDef Reader_memberlist[] = {
868 { "dialect", T_OBJECT, R_OFF(dialect), RO },
869 { "line_num", T_ULONG, R_OFF(line_num), RO },
870 { NULL }
871};
872
Skip Montanarob4a04172003-03-20 23:29:12 +0000873
874static PyTypeObject Reader_Type = {
Martin v. Löwis68192102007-07-21 06:55:02 +0000875 PyVarObject_HEAD_INIT(NULL, 0)
Skip Montanarob4a04172003-03-20 23:29:12 +0000876 "_csv.reader", /*tp_name*/
877 sizeof(ReaderObj), /*tp_basicsize*/
878 0, /*tp_itemsize*/
879 /* methods */
880 (destructor)Reader_dealloc, /*tp_dealloc*/
881 (printfunc)0, /*tp_print*/
882 (getattrfunc)0, /*tp_getattr*/
883 (setattrfunc)0, /*tp_setattr*/
884 (cmpfunc)0, /*tp_compare*/
885 (reprfunc)0, /*tp_repr*/
886 0, /*tp_as_number*/
887 0, /*tp_as_sequence*/
888 0, /*tp_as_mapping*/
889 (hashfunc)0, /*tp_hash*/
890 (ternaryfunc)0, /*tp_call*/
891 (reprfunc)0, /*tp_str*/
892 0, /*tp_getattro*/
893 0, /*tp_setattro*/
894 0, /*tp_as_buffer*/
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000895 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
896 Py_TPFLAGS_HAVE_GC, /*tp_flags*/
Skip Montanarob4a04172003-03-20 23:29:12 +0000897 Reader_Type_doc, /*tp_doc*/
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000898 (traverseproc)Reader_traverse, /*tp_traverse*/
899 (inquiry)Reader_clear, /*tp_clear*/
Skip Montanarob4a04172003-03-20 23:29:12 +0000900 0, /*tp_richcompare*/
901 0, /*tp_weaklistoffset*/
Andrew McNamara575a00b2005-01-06 02:25:41 +0000902 PyObject_SelfIter, /*tp_iter*/
Skip Montanarob4a04172003-03-20 23:29:12 +0000903 (getiterfunc)Reader_iternext, /*tp_iternext*/
904 Reader_methods, /*tp_methods*/
905 Reader_memberlist, /*tp_members*/
906 0, /*tp_getset*/
907
908};
909
910static PyObject *
911csv_reader(PyObject *module, PyObject *args, PyObject *keyword_args)
912{
Andrew McNamara91b97462005-01-11 01:07:23 +0000913 PyObject * iterator, * dialect = NULL;
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000914 ReaderObj * self = PyObject_GC_New(ReaderObj, &Reader_Type);
Skip Montanarob4a04172003-03-20 23:29:12 +0000915
916 if (!self)
917 return NULL;
918
919 self->dialect = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000920 self->fields = NULL;
921 self->input_iter = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000922 self->field = NULL;
923 self->field_size = 0;
Andrew McNamara7f2053e2005-01-12 11:17:16 +0000924 self->line_num = 0;
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000925
926 if (parse_reset(self) < 0) {
927 Py_DECREF(self);
928 return NULL;
929 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000930
Raymond Hettinger1761a7c2004-06-20 04:23:19 +0000931 if (!PyArg_UnpackTuple(args, "", 1, 2, &iterator, &dialect)) {
Skip Montanarob4a04172003-03-20 23:29:12 +0000932 Py_DECREF(self);
933 return NULL;
934 }
935 self->input_iter = PyObject_GetIter(iterator);
936 if (self->input_iter == NULL) {
937 PyErr_SetString(PyExc_TypeError,
938 "argument 1 must be an iterator");
939 Py_DECREF(self);
940 return NULL;
941 }
Andrew McNamara91b97462005-01-11 01:07:23 +0000942 self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args);
Skip Montanarob4a04172003-03-20 23:29:12 +0000943 if (self->dialect == NULL) {
944 Py_DECREF(self);
945 return NULL;
946 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000947
Andrew McNamara77ead872005-01-10 02:09:41 +0000948 PyObject_GC_Track(self);
Skip Montanarob4a04172003-03-20 23:29:12 +0000949 return (PyObject *)self;
950}
951
952/*
953 * WRITER
954 */
955/* ---------------------------------------------------------------- */
956static void
957join_reset(WriterObj *self)
958{
959 self->rec_len = 0;
960 self->num_fields = 0;
961}
962
963#define MEM_INCR 32768
964
965/* Calculate new record length or append field to record. Return new
966 * record length.
967 */
968static int
969join_append_data(WriterObj *self, char *field, int quote_empty,
970 int *quoted, int copy_phase)
971{
972 DialectObj *dialect = self->dialect;
973 int i, rec_len;
Andrew McNamarac89f2842005-01-12 07:44:42 +0000974 char *lineterm;
975
976#define ADDCH(c) \
977 do {\
978 if (copy_phase) \
979 self->rec[rec_len] = c;\
980 rec_len++;\
981 } while(0)
982
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000983 lineterm = PyString_AsString(dialect->lineterminator);
Andrew McNamarac89f2842005-01-12 07:44:42 +0000984 if (lineterm == NULL)
985 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000986
987 rec_len = self->rec_len;
988
Andrew McNamarac89f2842005-01-12 07:44:42 +0000989 /* If this is not the first field we need a field separator */
990 if (self->num_fields > 0)
991 ADDCH(dialect->delimiter);
992
993 /* Handle preceding quote */
994 if (copy_phase && *quoted)
995 ADDCH(dialect->quotechar);
996
997 /* Copy/count field data */
Skip Montanarob4a04172003-03-20 23:29:12 +0000998 for (i = 0;; i++) {
999 char c = field[i];
Andrew McNamarac89f2842005-01-12 07:44:42 +00001000 int want_escape = 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001001
1002 if (c == '\0')
1003 break;
Skip Montanarob4a04172003-03-20 23:29:12 +00001004
Andrew McNamarac89f2842005-01-12 07:44:42 +00001005 if (c == dialect->delimiter ||
1006 c == dialect->escapechar ||
1007 c == dialect->quotechar ||
1008 strchr(lineterm, c)) {
1009 if (dialect->quoting == QUOTE_NONE)
1010 want_escape = 1;
Skip Montanarob4a04172003-03-20 23:29:12 +00001011 else {
Andrew McNamarac89f2842005-01-12 07:44:42 +00001012 if (c == dialect->quotechar) {
1013 if (dialect->doublequote)
1014 ADDCH(dialect->quotechar);
1015 else
1016 want_escape = 1;
1017 }
1018 if (!want_escape)
1019 *quoted = 1;
1020 }
1021 if (want_escape) {
1022 if (!dialect->escapechar) {
1023 PyErr_Format(error_obj,
1024 "need to escape, but no escapechar set");
1025 return -1;
1026 }
1027 ADDCH(dialect->escapechar);
Skip Montanarob4a04172003-03-20 23:29:12 +00001028 }
1029 }
1030 /* Copy field character into record buffer.
1031 */
Andrew McNamarac89f2842005-01-12 07:44:42 +00001032 ADDCH(c);
Skip Montanarob4a04172003-03-20 23:29:12 +00001033 }
1034
1035 /* If field is empty check if it needs to be quoted.
1036 */
1037 if (i == 0 && quote_empty) {
1038 if (dialect->quoting == QUOTE_NONE) {
1039 PyErr_Format(error_obj,
1040 "single empty field record must be quoted");
1041 return -1;
Andrew McNamaradd3e6cb2005-01-07 06:46:50 +00001042 }
1043 else
Skip Montanarob4a04172003-03-20 23:29:12 +00001044 *quoted = 1;
1045 }
1046
Skip Montanarob4a04172003-03-20 23:29:12 +00001047 if (*quoted) {
1048 if (copy_phase)
Andrew McNamarac89f2842005-01-12 07:44:42 +00001049 ADDCH(dialect->quotechar);
Skip Montanarob4a04172003-03-20 23:29:12 +00001050 else
Andrew McNamarac89f2842005-01-12 07:44:42 +00001051 rec_len += 2;
Skip Montanarob4a04172003-03-20 23:29:12 +00001052 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001053 return rec_len;
Andrew McNamarac89f2842005-01-12 07:44:42 +00001054#undef ADDCH
Skip Montanarob4a04172003-03-20 23:29:12 +00001055}
1056
1057static int
1058join_check_rec_size(WriterObj *self, int rec_len)
1059{
Gregory P. Smith9d534572008-06-11 07:41:16 +00001060
1061 if (rec_len < 0 || rec_len > INT_MAX - MEM_INCR) {
1062 PyErr_NoMemory();
1063 return 0;
1064 }
1065
Skip Montanarob4a04172003-03-20 23:29:12 +00001066 if (rec_len > self->rec_size) {
1067 if (self->rec_size == 0) {
1068 self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR;
Andrew McNamaradcfb38c2003-06-09 05:59:23 +00001069 if (self->rec != NULL)
1070 PyMem_Free(self->rec);
Skip Montanarob4a04172003-03-20 23:29:12 +00001071 self->rec = PyMem_Malloc(self->rec_size);
1072 }
1073 else {
1074 char *old_rec = self->rec;
1075
1076 self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR;
1077 self->rec = PyMem_Realloc(self->rec, self->rec_size);
1078 if (self->rec == NULL)
1079 PyMem_Free(old_rec);
1080 }
1081 if (self->rec == NULL) {
1082 PyErr_NoMemory();
1083 return 0;
1084 }
1085 }
1086 return 1;
1087}
1088
1089static int
1090join_append(WriterObj *self, char *field, int *quoted, int quote_empty)
1091{
1092 int rec_len;
1093
1094 rec_len = join_append_data(self, field, quote_empty, quoted, 0);
1095 if (rec_len < 0)
1096 return 0;
1097
1098 /* grow record buffer if necessary */
1099 if (!join_check_rec_size(self, rec_len))
1100 return 0;
1101
1102 self->rec_len = join_append_data(self, field, quote_empty, quoted, 1);
1103 self->num_fields++;
1104
1105 return 1;
1106}
1107
1108static int
1109join_append_lineterminator(WriterObj *self)
1110{
1111 int terminator_len;
Andrew McNamaracf0fd5a2005-01-12 01:16:35 +00001112 char *terminator;
Skip Montanarob4a04172003-03-20 23:29:12 +00001113
Gregory P. Smithdd96db62008-06-09 04:58:54 +00001114 terminator_len = PyString_Size(self->dialect->lineterminator);
Neal Norwitzc6a989a2006-05-10 06:57:58 +00001115 if (terminator_len == -1)
1116 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001117
1118 /* grow record buffer if necessary */
1119 if (!join_check_rec_size(self, self->rec_len + terminator_len))
1120 return 0;
1121
Gregory P. Smithdd96db62008-06-09 04:58:54 +00001122 terminator = PyString_AsString(self->dialect->lineterminator);
Andrew McNamaracf0fd5a2005-01-12 01:16:35 +00001123 if (terminator == NULL)
1124 return 0;
1125 memmove(self->rec + self->rec_len, terminator, terminator_len);
Skip Montanarob4a04172003-03-20 23:29:12 +00001126 self->rec_len += terminator_len;
1127
1128 return 1;
1129}
1130
1131PyDoc_STRVAR(csv_writerow_doc,
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001132"writerow(sequence)\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001133"\n"
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001134"Construct and write a CSV record from a sequence of fields. Non-string\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001135"elements will be converted to string.");
1136
1137static PyObject *
1138csv_writerow(WriterObj *self, PyObject *seq)
1139{
1140 DialectObj *dialect = self->dialect;
1141 int len, i;
1142
1143 if (!PySequence_Check(seq))
1144 return PyErr_Format(error_obj, "sequence expected");
1145
1146 len = PySequence_Length(seq);
1147 if (len < 0)
1148 return NULL;
1149
1150 /* Join all fields in internal buffer.
1151 */
1152 join_reset(self);
1153 for (i = 0; i < len; i++) {
1154 PyObject *field;
1155 int append_ok;
1156 int quoted;
1157
1158 field = PySequence_GetItem(seq, i);
1159 if (field == NULL)
1160 return NULL;
1161
Andrew McNamarac89f2842005-01-12 07:44:42 +00001162 switch (dialect->quoting) {
1163 case QUOTE_NONNUMERIC:
1164 quoted = !PyNumber_Check(field);
1165 break;
1166 case QUOTE_ALL:
1167 quoted = 1;
1168 break;
1169 default:
1170 quoted = 0;
1171 break;
Skip Montanarob4a04172003-03-20 23:29:12 +00001172 }
1173
Gregory P. Smithdd96db62008-06-09 04:58:54 +00001174 if (PyString_Check(field)) {
Skip Montanaro577c7a72003-04-12 19:17:14 +00001175 append_ok = join_append(self,
Gregory P. Smithdd96db62008-06-09 04:58:54 +00001176 PyString_AS_STRING(field),
Skip Montanarob4a04172003-03-20 23:29:12 +00001177 &quoted, len == 1);
1178 Py_DECREF(field);
1179 }
1180 else if (field == Py_None) {
1181 append_ok = join_append(self, "", &quoted, len == 1);
1182 Py_DECREF(field);
1183 }
1184 else {
1185 PyObject *str;
1186
1187 str = PyObject_Str(field);
1188 Py_DECREF(field);
1189 if (str == NULL)
1190 return NULL;
1191
Gregory P. Smithdd96db62008-06-09 04:58:54 +00001192 append_ok = join_append(self, PyString_AS_STRING(str),
Skip Montanarob4a04172003-03-20 23:29:12 +00001193 &quoted, len == 1);
1194 Py_DECREF(str);
1195 }
1196 if (!append_ok)
1197 return NULL;
1198 }
1199
1200 /* Add line terminator.
1201 */
1202 if (!join_append_lineterminator(self))
1203 return 0;
1204
1205 return PyObject_CallFunction(self->writeline,
1206 "(s#)", self->rec, self->rec_len);
1207}
1208
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001209PyDoc_STRVAR(csv_writerows_doc,
1210"writerows(sequence of sequences)\n"
1211"\n"
1212"Construct and write a series of sequences to a csv file. Non-string\n"
1213"elements will be converted to string.");
1214
Skip Montanarob4a04172003-03-20 23:29:12 +00001215static PyObject *
1216csv_writerows(WriterObj *self, PyObject *seqseq)
1217{
1218 PyObject *row_iter, *row_obj, *result;
1219
1220 row_iter = PyObject_GetIter(seqseq);
1221 if (row_iter == NULL) {
1222 PyErr_SetString(PyExc_TypeError,
Skip Montanaro98f16e02003-04-11 23:10:13 +00001223 "writerows() argument must be iterable");
Skip Montanarob4a04172003-03-20 23:29:12 +00001224 return NULL;
1225 }
1226 while ((row_obj = PyIter_Next(row_iter))) {
1227 result = csv_writerow(self, row_obj);
1228 Py_DECREF(row_obj);
1229 if (!result) {
1230 Py_DECREF(row_iter);
1231 return NULL;
1232 }
1233 else
1234 Py_DECREF(result);
1235 }
1236 Py_DECREF(row_iter);
1237 if (PyErr_Occurred())
1238 return NULL;
1239 Py_INCREF(Py_None);
1240 return Py_None;
1241}
1242
1243static struct PyMethodDef Writer_methods[] = {
1244 { "writerow", (PyCFunction)csv_writerow, METH_O, csv_writerow_doc},
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001245 { "writerows", (PyCFunction)csv_writerows, METH_O, csv_writerows_doc},
Skip Montanarob4a04172003-03-20 23:29:12 +00001246 { NULL, NULL }
1247};
1248
1249#define W_OFF(x) offsetof(WriterObj, x)
1250
1251static struct PyMemberDef Writer_memberlist[] = {
1252 { "dialect", T_OBJECT, W_OFF(dialect), RO },
1253 { NULL }
1254};
1255
1256static void
1257Writer_dealloc(WriterObj *self)
1258{
Andrew McNamara77ead872005-01-10 02:09:41 +00001259 PyObject_GC_UnTrack(self);
Skip Montanarob4a04172003-03-20 23:29:12 +00001260 Py_XDECREF(self->dialect);
1261 Py_XDECREF(self->writeline);
Andrew McNamaradcfb38c2003-06-09 05:59:23 +00001262 if (self->rec != NULL)
1263 PyMem_Free(self->rec);
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001264 PyObject_GC_Del(self);
1265}
1266
1267static int
1268Writer_traverse(WriterObj *self, visitproc visit, void *arg)
1269{
Thomas Woutersc6e55062006-04-15 21:47:09 +00001270 Py_VISIT(self->dialect);
1271 Py_VISIT(self->writeline);
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001272 return 0;
1273}
1274
1275static int
1276Writer_clear(WriterObj *self)
1277{
Thomas Woutersedf17d82006-04-15 17:28:34 +00001278 Py_CLEAR(self->dialect);
1279 Py_CLEAR(self->writeline);
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001280 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001281}
1282
1283PyDoc_STRVAR(Writer_Type_doc,
1284"CSV writer\n"
1285"\n"
1286"Writer objects are responsible for generating tabular data\n"
1287"in CSV format from sequence input.\n"
1288);
1289
1290static PyTypeObject Writer_Type = {
Martin v. Löwis68192102007-07-21 06:55:02 +00001291 PyVarObject_HEAD_INIT(NULL, 0)
Skip Montanarob4a04172003-03-20 23:29:12 +00001292 "_csv.writer", /*tp_name*/
1293 sizeof(WriterObj), /*tp_basicsize*/
1294 0, /*tp_itemsize*/
1295 /* methods */
1296 (destructor)Writer_dealloc, /*tp_dealloc*/
1297 (printfunc)0, /*tp_print*/
1298 (getattrfunc)0, /*tp_getattr*/
1299 (setattrfunc)0, /*tp_setattr*/
1300 (cmpfunc)0, /*tp_compare*/
1301 (reprfunc)0, /*tp_repr*/
1302 0, /*tp_as_number*/
1303 0, /*tp_as_sequence*/
1304 0, /*tp_as_mapping*/
1305 (hashfunc)0, /*tp_hash*/
1306 (ternaryfunc)0, /*tp_call*/
1307 (reprfunc)0, /*tp_str*/
1308 0, /*tp_getattro*/
1309 0, /*tp_setattro*/
1310 0, /*tp_as_buffer*/
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001311 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
1312 Py_TPFLAGS_HAVE_GC, /*tp_flags*/
Skip Montanarob4a04172003-03-20 23:29:12 +00001313 Writer_Type_doc,
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001314 (traverseproc)Writer_traverse, /*tp_traverse*/
1315 (inquiry)Writer_clear, /*tp_clear*/
Skip Montanarob4a04172003-03-20 23:29:12 +00001316 0, /*tp_richcompare*/
1317 0, /*tp_weaklistoffset*/
1318 (getiterfunc)0, /*tp_iter*/
1319 (getiterfunc)0, /*tp_iternext*/
1320 Writer_methods, /*tp_methods*/
1321 Writer_memberlist, /*tp_members*/
1322 0, /*tp_getset*/
1323};
1324
1325static PyObject *
1326csv_writer(PyObject *module, PyObject *args, PyObject *keyword_args)
1327{
Andrew McNamara91b97462005-01-11 01:07:23 +00001328 PyObject * output_file, * dialect = NULL;
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001329 WriterObj * self = PyObject_GC_New(WriterObj, &Writer_Type);
Skip Montanarob4a04172003-03-20 23:29:12 +00001330
1331 if (!self)
1332 return NULL;
1333
1334 self->dialect = NULL;
1335 self->writeline = NULL;
1336
1337 self->rec = NULL;
1338 self->rec_size = 0;
1339 self->rec_len = 0;
1340 self->num_fields = 0;
1341
Raymond Hettinger1761a7c2004-06-20 04:23:19 +00001342 if (!PyArg_UnpackTuple(args, "", 1, 2, &output_file, &dialect)) {
Skip Montanarob4a04172003-03-20 23:29:12 +00001343 Py_DECREF(self);
1344 return NULL;
1345 }
1346 self->writeline = PyObject_GetAttrString(output_file, "write");
1347 if (self->writeline == NULL || !PyCallable_Check(self->writeline)) {
1348 PyErr_SetString(PyExc_TypeError,
Andrew McNamara5cfd8372005-01-12 11:39:50 +00001349 "argument 1 must have a \"write\" method");
Skip Montanarob4a04172003-03-20 23:29:12 +00001350 Py_DECREF(self);
1351 return NULL;
1352 }
Andrew McNamara91b97462005-01-11 01:07:23 +00001353 self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args);
Skip Montanarob4a04172003-03-20 23:29:12 +00001354 if (self->dialect == NULL) {
1355 Py_DECREF(self);
1356 return NULL;
1357 }
Andrew McNamara77ead872005-01-10 02:09:41 +00001358 PyObject_GC_Track(self);
Skip Montanarob4a04172003-03-20 23:29:12 +00001359 return (PyObject *)self;
1360}
1361
1362/*
1363 * DIALECT REGISTRY
1364 */
1365static PyObject *
1366csv_list_dialects(PyObject *module, PyObject *args)
1367{
1368 return PyDict_Keys(dialects);
1369}
1370
1371static PyObject *
Andrew McNamara86625972005-01-11 01:28:33 +00001372csv_register_dialect(PyObject *module, PyObject *args, PyObject *kwargs)
Skip Montanarob4a04172003-03-20 23:29:12 +00001373{
Andrew McNamara86625972005-01-11 01:28:33 +00001374 PyObject *name_obj, *dialect_obj = NULL;
1375 PyObject *dialect;
Skip Montanarob4a04172003-03-20 23:29:12 +00001376
Andrew McNamara86625972005-01-11 01:28:33 +00001377 if (!PyArg_UnpackTuple(args, "", 1, 2, &name_obj, &dialect_obj))
Skip Montanarob4a04172003-03-20 23:29:12 +00001378 return NULL;
Andrew McNamara37d2bdf2005-01-10 12:22:48 +00001379 if (!IS_BASESTRING(name_obj)) {
Skip Montanarob4a04172003-03-20 23:29:12 +00001380 PyErr_SetString(PyExc_TypeError,
1381 "dialect name must be a string or unicode");
1382 return NULL;
1383 }
Andrew McNamara86625972005-01-11 01:28:33 +00001384 dialect = _call_dialect(dialect_obj, kwargs);
1385 if (dialect == NULL)
1386 return NULL;
1387 if (PyDict_SetItem(dialects, name_obj, dialect) < 0) {
1388 Py_DECREF(dialect);
Skip Montanarob4a04172003-03-20 23:29:12 +00001389 return NULL;
1390 }
Andrew McNamara86625972005-01-11 01:28:33 +00001391 Py_DECREF(dialect);
Skip Montanarob4a04172003-03-20 23:29:12 +00001392 Py_INCREF(Py_None);
1393 return Py_None;
1394}
1395
1396static PyObject *
Skip Montanaro577c7a72003-04-12 19:17:14 +00001397csv_unregister_dialect(PyObject *module, PyObject *name_obj)
Skip Montanarob4a04172003-03-20 23:29:12 +00001398{
Skip Montanarob4a04172003-03-20 23:29:12 +00001399 if (PyDict_DelItem(dialects, name_obj) < 0)
1400 return PyErr_Format(error_obj, "unknown dialect");
1401 Py_INCREF(Py_None);
1402 return Py_None;
1403}
1404
1405static PyObject *
Skip Montanaro577c7a72003-04-12 19:17:14 +00001406csv_get_dialect(PyObject *module, PyObject *name_obj)
Skip Montanarob4a04172003-03-20 23:29:12 +00001407{
Skip Montanarob4a04172003-03-20 23:29:12 +00001408 return get_dialect_from_registry(name_obj);
1409}
1410
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001411static PyObject *
Andrew McNamara31d88962005-01-12 03:45:10 +00001412csv_field_size_limit(PyObject *module, PyObject *args)
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001413{
1414 PyObject *new_limit = NULL;
1415 long old_limit = field_limit;
1416
Andrew McNamara31d88962005-01-12 03:45:10 +00001417 if (!PyArg_UnpackTuple(args, "field_size_limit", 0, 1, &new_limit))
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001418 return NULL;
1419 if (new_limit != NULL) {
1420 if (!PyInt_Check(new_limit)) {
1421 PyErr_Format(PyExc_TypeError,
1422 "limit must be an integer");
1423 return NULL;
1424 }
1425 field_limit = PyInt_AsLong(new_limit);
1426 }
1427 return PyInt_FromLong(old_limit);
1428}
1429
Skip Montanarob4a04172003-03-20 23:29:12 +00001430/*
1431 * MODULE
1432 */
1433
1434PyDoc_STRVAR(csv_module_doc,
1435"CSV parsing and writing.\n"
1436"\n"
1437"This module provides classes that assist in the reading and writing\n"
1438"of Comma Separated Value (CSV) files, and implements the interface\n"
1439"described by PEP 305. Although many CSV files are simple to parse,\n"
1440"the format is not formally defined by a stable specification and\n"
1441"is subtle enough that parsing lines of a CSV file with something\n"
1442"like line.split(\",\") is bound to fail. The module supports three\n"
1443"basic APIs: reading, writing, and registration of dialects.\n"
1444"\n"
1445"\n"
1446"DIALECT REGISTRATION:\n"
1447"\n"
1448"Readers and writers support a dialect argument, which is a convenient\n"
1449"handle on a group of settings. When the dialect argument is a string,\n"
1450"it identifies one of the dialects previously registered with the module.\n"
1451"If it is a class or instance, the attributes of the argument are used as\n"
1452"the settings for the reader or writer:\n"
1453"\n"
1454" class excel:\n"
1455" delimiter = ','\n"
1456" quotechar = '\"'\n"
1457" escapechar = None\n"
1458" doublequote = True\n"
1459" skipinitialspace = False\n"
Johannes Gijsbers8d3b9dd2004-08-15 12:23:10 +00001460" lineterminator = '\\r\\n'\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001461" quoting = QUOTE_MINIMAL\n"
1462"\n"
1463"SETTINGS:\n"
1464"\n"
1465" * quotechar - specifies a one-character string to use as the \n"
1466" quoting character. It defaults to '\"'.\n"
1467" * delimiter - specifies a one-character string to use as the \n"
1468" field separator. It defaults to ','.\n"
1469" * skipinitialspace - specifies how to interpret whitespace which\n"
1470" immediately follows a delimiter. It defaults to False, which\n"
1471" means that whitespace immediately following a delimiter is part\n"
1472" of the following field.\n"
1473" * lineterminator - specifies the character sequence which should \n"
1474" terminate rows.\n"
1475" * quoting - controls when quotes should be generated by the writer.\n"
1476" It can take on any of the following module constants:\n"
1477"\n"
1478" csv.QUOTE_MINIMAL means only when required, for example, when a\n"
1479" field contains either the quotechar or the delimiter\n"
1480" csv.QUOTE_ALL means that quotes are always placed around fields.\n"
1481" csv.QUOTE_NONNUMERIC means that quotes are always placed around\n"
Skip Montanaro148eb6a2003-12-02 18:57:47 +00001482" fields which do not parse as integers or floating point\n"
1483" numbers.\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001484" csv.QUOTE_NONE means that quotes are never placed around fields.\n"
1485" * escapechar - specifies a one-character string used to escape \n"
1486" the delimiter when quoting is set to QUOTE_NONE.\n"
1487" * doublequote - controls the handling of quotes inside fields. When\n"
1488" True, two consecutive quotes are interpreted as one during read,\n"
1489" and when writing, each quote character embedded in the data is\n"
1490" written as two quotes\n");
1491
1492PyDoc_STRVAR(csv_reader_doc,
1493" csv_reader = reader(iterable [, dialect='excel']\n"
1494" [optional keyword args])\n"
1495" for row in csv_reader:\n"
1496" process(row)\n"
1497"\n"
1498"The \"iterable\" argument can be any object that returns a line\n"
1499"of input for each iteration, such as a file object or a list. The\n"
1500"optional \"dialect\" parameter is discussed below. The function\n"
1501"also accepts optional keyword arguments which override settings\n"
1502"provided by the dialect.\n"
1503"\n"
1504"The returned object is an iterator. Each iteration returns a row\n"
Johannes Gijsbers8d3b9dd2004-08-15 12:23:10 +00001505"of the CSV file (which can span multiple input lines):\n");
Skip Montanarob4a04172003-03-20 23:29:12 +00001506
1507PyDoc_STRVAR(csv_writer_doc,
1508" csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1509" [optional keyword args])\n"
Fredrik Lundh4aaaa492006-04-04 16:51:13 +00001510" for row in sequence:\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001511" csv_writer.writerow(row)\n"
1512"\n"
1513" [or]\n"
1514"\n"
1515" csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1516" [optional keyword args])\n"
1517" csv_writer.writerows(rows)\n"
1518"\n"
1519"The \"fileobj\" argument can be any object that supports the file API.\n");
1520
1521PyDoc_STRVAR(csv_list_dialects_doc,
1522"Return a list of all know dialect names.\n"
1523" names = csv.list_dialects()");
1524
1525PyDoc_STRVAR(csv_get_dialect_doc,
1526"Return the dialect instance associated with name.\n"
1527" dialect = csv.get_dialect(name)");
1528
1529PyDoc_STRVAR(csv_register_dialect_doc,
1530"Create a mapping from a string name to a dialect class.\n"
1531" dialect = csv.register_dialect(name, dialect)");
1532
1533PyDoc_STRVAR(csv_unregister_dialect_doc,
1534"Delete the name/dialect mapping associated with a string name.\n"
1535" csv.unregister_dialect(name)");
1536
Andrew McNamara31d88962005-01-12 03:45:10 +00001537PyDoc_STRVAR(csv_field_size_limit_doc,
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001538"Sets an upper limit on parsed fields.\n"
Andrew McNamara31d88962005-01-12 03:45:10 +00001539" csv.field_size_limit([limit])\n"
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001540"\n"
1541"Returns old limit. If limit is not given, no new limit is set and\n"
1542"the old limit is returned");
1543
Skip Montanarob4a04172003-03-20 23:29:12 +00001544static struct PyMethodDef csv_methods[] = {
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001545 { "reader", (PyCFunction)csv_reader,
1546 METH_VARARGS | METH_KEYWORDS, csv_reader_doc},
1547 { "writer", (PyCFunction)csv_writer,
1548 METH_VARARGS | METH_KEYWORDS, csv_writer_doc},
1549 { "list_dialects", (PyCFunction)csv_list_dialects,
1550 METH_NOARGS, csv_list_dialects_doc},
1551 { "register_dialect", (PyCFunction)csv_register_dialect,
Andrew McNamara86625972005-01-11 01:28:33 +00001552 METH_VARARGS | METH_KEYWORDS, csv_register_dialect_doc},
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001553 { "unregister_dialect", (PyCFunction)csv_unregister_dialect,
1554 METH_O, csv_unregister_dialect_doc},
1555 { "get_dialect", (PyCFunction)csv_get_dialect,
1556 METH_O, csv_get_dialect_doc},
Andrew McNamara31d88962005-01-12 03:45:10 +00001557 { "field_size_limit", (PyCFunction)csv_field_size_limit,
1558 METH_VARARGS, csv_field_size_limit_doc},
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001559 { NULL, NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +00001560};
1561
1562PyMODINIT_FUNC
1563init_csv(void)
1564{
1565 PyObject *module;
Skip Montanarob4a04172003-03-20 23:29:12 +00001566 StyleDesc *style;
1567
1568 if (PyType_Ready(&Dialect_Type) < 0)
1569 return;
1570
1571 if (PyType_Ready(&Reader_Type) < 0)
1572 return;
1573
1574 if (PyType_Ready(&Writer_Type) < 0)
1575 return;
1576
1577 /* Create the module and add the functions */
1578 module = Py_InitModule3("_csv", csv_methods, csv_module_doc);
1579 if (module == NULL)
1580 return;
1581
1582 /* Add version to the module. */
Skip Montanaro7b01a832003-04-12 19:23:46 +00001583 if (PyModule_AddStringConstant(module, "__version__",
1584 MODULE_VERSION) == -1)
Skip Montanarob4a04172003-03-20 23:29:12 +00001585 return;
1586
1587 /* Add _dialects dictionary */
1588 dialects = PyDict_New();
1589 if (dialects == NULL)
1590 return;
1591 if (PyModule_AddObject(module, "_dialects", dialects))
1592 return;
1593
1594 /* Add quote styles into dictionary */
1595 for (style = quote_styles; style->name; style++) {
Skip Montanaro7b01a832003-04-12 19:23:46 +00001596 if (PyModule_AddIntConstant(module, style->name,
1597 style->style) == -1)
Skip Montanarob4a04172003-03-20 23:29:12 +00001598 return;
1599 }
1600
1601 /* Add the Dialect type */
Skip Montanaro32c5d422005-06-15 13:35:08 +00001602 Py_INCREF(&Dialect_Type);
Skip Montanarob4a04172003-03-20 23:29:12 +00001603 if (PyModule_AddObject(module, "Dialect", (PyObject *)&Dialect_Type))
1604 return;
1605
1606 /* Add the CSV exception object to the module. */
1607 error_obj = PyErr_NewException("_csv.Error", NULL, NULL);
1608 if (error_obj == NULL)
1609 return;
1610 PyModule_AddObject(module, "Error", error_obj);
1611}