blob: 638079293ca4060d7a8d31c0f665bc7375183d8e [file] [log] [blame]
Skip Montanaroa16b21f2003-03-23 14:32:54 +00001/* csv module */
2
3/*
4
5This module provides the low-level underpinnings of a CSV reading/writing
6module. Users should not use this module directly, but import the csv.py
7module instead.
8
9**** For people modifying this code, please note that as of this writing
Skip Montanarodfa35fa2003-04-11 21:40:01 +000010**** (2003-03-23), it is intended that this code should work with Python
Skip Montanaroa16b21f2003-03-23 14:32:54 +000011**** 2.2.
12
Skip Montanarob4a04172003-03-20 23:29:12 +000013*/
14
Skip Montanaro7b01a832003-04-12 19:23:46 +000015#define MODULE_VERSION "1.0"
16
Skip Montanarob4a04172003-03-20 23:29:12 +000017#include "Python.h"
18#include "structmember.h"
19
Skip Montanaroa16b21f2003-03-23 14:32:54 +000020
Skip Montanarob4a04172003-03-20 23:29:12 +000021/* begin 2.2 compatibility macros */
22#ifndef PyDoc_STRVAR
23/* Define macros for inline documentation. */
24#define PyDoc_VAR(name) static char name[]
25#define PyDoc_STRVAR(name,str) PyDoc_VAR(name) = PyDoc_STR(str)
26#ifdef WITH_DOC_STRINGS
27#define PyDoc_STR(str) str
28#else
29#define PyDoc_STR(str) ""
30#endif
31#endif /* ifndef PyDoc_STRVAR */
32
33#ifndef PyMODINIT_FUNC
34# if defined(__cplusplus)
35# define PyMODINIT_FUNC extern "C" void
36# else /* __cplusplus */
37# define PyMODINIT_FUNC void
38# endif /* __cplusplus */
39#endif
40/* end 2.2 compatibility macros */
41
Andrew McNamara37d2bdf2005-01-10 12:22:48 +000042#define IS_BASESTRING(o) \
43 PyObject_TypeCheck(o, &PyBaseString_Type)
44
Skip Montanarob4a04172003-03-20 23:29:12 +000045static PyObject *error_obj; /* CSV exception */
46static PyObject *dialects; /* Dialect registry */
Andrew McNamarae4d05c42005-01-11 07:32:02 +000047static long field_limit = 128 * 1024; /* max parsed field size */
Skip Montanarob4a04172003-03-20 23:29:12 +000048
49typedef enum {
50 START_RECORD, START_FIELD, ESCAPED_CHAR, IN_FIELD,
Andrew McNamaraf69d94f2005-01-13 11:30:54 +000051 IN_QUOTED_FIELD, ESCAPE_IN_QUOTED_FIELD, QUOTE_IN_QUOTED_FIELD,
52 EAT_CRNL
Skip Montanarob4a04172003-03-20 23:29:12 +000053} ParserState;
54
55typedef enum {
56 QUOTE_MINIMAL, QUOTE_ALL, QUOTE_NONNUMERIC, QUOTE_NONE
57} QuoteStyle;
58
59typedef struct {
60 QuoteStyle style;
61 char *name;
62} StyleDesc;
63
64static StyleDesc quote_styles[] = {
65 { QUOTE_MINIMAL, "QUOTE_MINIMAL" },
66 { QUOTE_ALL, "QUOTE_ALL" },
67 { QUOTE_NONNUMERIC, "QUOTE_NONNUMERIC" },
68 { QUOTE_NONE, "QUOTE_NONE" },
69 { 0 }
70};
71
72typedef struct {
73 PyObject_HEAD
74
75 int doublequote; /* is " represented by ""? */
76 char delimiter; /* field separator */
77 char quotechar; /* quote character */
78 char escapechar; /* escape character */
79 int skipinitialspace; /* ignore spaces following delimiter? */
80 PyObject *lineterminator; /* string to write between records */
Andrew McNamara1196cf12005-01-07 04:42:45 +000081 int quoting; /* style of quoting to write */
Skip Montanarob4a04172003-03-20 23:29:12 +000082
83 int strict; /* raise exception on bad CSV */
84} DialectObj;
85
86staticforward PyTypeObject Dialect_Type;
87
88typedef struct {
89 PyObject_HEAD
90
91 PyObject *input_iter; /* iterate over this for input lines */
92
93 DialectObj *dialect; /* parsing dialect */
94
95 PyObject *fields; /* field list for current record */
96 ParserState state; /* current CSV parse state */
97 char *field; /* build current field in here */
98 int field_size; /* size of allocated buffer */
99 int field_len; /* length of current field */
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000100 int numeric_field; /* treat field as numeric */
Andrew McNamara7f2053e2005-01-12 11:17:16 +0000101 unsigned long line_num; /* Source-file line number */
Skip Montanarob4a04172003-03-20 23:29:12 +0000102} ReaderObj;
103
104staticforward PyTypeObject Reader_Type;
105
106#define ReaderObject_Check(v) ((v)->ob_type == &Reader_Type)
107
108typedef struct {
109 PyObject_HEAD
110
111 PyObject *writeline; /* write output lines to this file */
112
113 DialectObj *dialect; /* parsing dialect */
114
115 char *rec; /* buffer for parser.join */
116 int rec_size; /* size of allocated record */
117 int rec_len; /* length of record */
118 int num_fields; /* number of fields in record */
119} WriterObj;
120
121staticforward PyTypeObject Writer_Type;
122
123/*
124 * DIALECT class
125 */
126
127static PyObject *
128get_dialect_from_registry(PyObject * name_obj)
129{
130 PyObject *dialect_obj;
131
132 dialect_obj = PyDict_GetItem(dialects, name_obj);
Andrew McNamaradbce2612005-01-10 23:17:35 +0000133 if (dialect_obj == NULL) {
134 if (!PyErr_Occurred())
135 PyErr_Format(error_obj, "unknown dialect");
136 }
137 else
138 Py_INCREF(dialect_obj);
Skip Montanarob4a04172003-03-20 23:29:12 +0000139 return dialect_obj;
140}
141
Skip Montanarob4a04172003-03-20 23:29:12 +0000142static PyObject *
143get_string(PyObject *str)
144{
145 Py_XINCREF(str);
146 return str;
147}
148
Skip Montanarob4a04172003-03-20 23:29:12 +0000149static PyObject *
150get_nullchar_as_None(char c)
151{
152 if (c == '\0') {
153 Py_INCREF(Py_None);
154 return Py_None;
155 }
156 else
157 return PyString_FromStringAndSize((char*)&c, 1);
158}
159
Skip Montanarob4a04172003-03-20 23:29:12 +0000160static PyObject *
161Dialect_get_lineterminator(DialectObj *self)
162{
163 return get_string(self->lineterminator);
164}
165
Skip Montanarob4a04172003-03-20 23:29:12 +0000166static PyObject *
167Dialect_get_escapechar(DialectObj *self)
168{
169 return get_nullchar_as_None(self->escapechar);
170}
171
Andrew McNamara1196cf12005-01-07 04:42:45 +0000172static PyObject *
173Dialect_get_quotechar(DialectObj *self)
Skip Montanarob4a04172003-03-20 23:29:12 +0000174{
Andrew McNamara1196cf12005-01-07 04:42:45 +0000175 return get_nullchar_as_None(self->quotechar);
Skip Montanarob4a04172003-03-20 23:29:12 +0000176}
177
178static PyObject *
179Dialect_get_quoting(DialectObj *self)
180{
181 return PyInt_FromLong(self->quoting);
182}
183
184static int
Andrew McNamara1196cf12005-01-07 04:42:45 +0000185_set_bool(const char *name, int *target, PyObject *src, int dflt)
Skip Montanarob4a04172003-03-20 23:29:12 +0000186{
Andrew McNamara1196cf12005-01-07 04:42:45 +0000187 if (src == NULL)
188 *target = dflt;
189 else
190 *target = PyObject_IsTrue(src);
191 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000192}
193
Andrew McNamara1196cf12005-01-07 04:42:45 +0000194static int
195_set_int(const char *name, int *target, PyObject *src, int dflt)
196{
197 if (src == NULL)
198 *target = dflt;
199 else {
200 if (!PyInt_Check(src)) {
201 PyErr_Format(PyExc_TypeError,
202 "\"%s\" must be an integer", name);
203 return -1;
204 }
205 *target = PyInt_AsLong(src);
206 }
207 return 0;
208}
209
210static int
211_set_char(const char *name, char *target, PyObject *src, char dflt)
212{
213 if (src == NULL)
214 *target = dflt;
215 else {
Andrew McNamaraa8292632005-01-10 12:25:11 +0000216 if (src == Py_None || PyString_Size(src) == 0)
Andrew McNamara1196cf12005-01-07 04:42:45 +0000217 *target = '\0';
218 else if (!PyString_Check(src) || PyString_Size(src) != 1) {
219 PyErr_Format(PyExc_TypeError,
220 "\"%s\" must be an 1-character string",
221 name);
222 return -1;
223 }
224 else {
225 char *s = PyString_AsString(src);
226 if (s == NULL)
227 return -1;
228 *target = s[0];
229 }
230 }
231 return 0;
232}
233
234static int
235_set_str(const char *name, PyObject **target, PyObject *src, const char *dflt)
236{
237 if (src == NULL)
238 *target = PyString_FromString(dflt);
239 else {
240 if (src == Py_None)
241 *target = NULL;
Andrew McNamara37d2bdf2005-01-10 12:22:48 +0000242 else if (!IS_BASESTRING(src)) {
Andrew McNamara1196cf12005-01-07 04:42:45 +0000243 PyErr_Format(PyExc_TypeError,
244 "\"%s\" must be an string", name);
245 return -1;
Andrew McNamaradd3e6cb2005-01-07 06:46:50 +0000246 }
247 else {
Andrew McNamara1196cf12005-01-07 04:42:45 +0000248 Py_XDECREF(*target);
249 Py_INCREF(src);
250 *target = src;
251 }
252 }
253 return 0;
254}
255
256static int
257dialect_check_quoting(int quoting)
258{
259 StyleDesc *qs = quote_styles;
260
261 for (qs = quote_styles; qs->name; qs++) {
262 if (qs->style == quoting)
263 return 0;
264 }
265 PyErr_Format(PyExc_TypeError, "bad \"quoting\" value");
266 return -1;
267}
Skip Montanarob4a04172003-03-20 23:29:12 +0000268
269#define D_OFF(x) offsetof(DialectObj, x)
270
271static struct PyMemberDef Dialect_memberlist[] = {
Andrew McNamara1196cf12005-01-07 04:42:45 +0000272 { "delimiter", T_CHAR, D_OFF(delimiter), READONLY },
273 { "skipinitialspace", T_INT, D_OFF(skipinitialspace), READONLY },
274 { "doublequote", T_INT, D_OFF(doublequote), READONLY },
275 { "strict", T_INT, D_OFF(strict), READONLY },
Skip Montanarob4a04172003-03-20 23:29:12 +0000276 { NULL }
277};
278
279static PyGetSetDef Dialect_getsetlist[] = {
Andrew McNamara1196cf12005-01-07 04:42:45 +0000280 { "escapechar", (getter)Dialect_get_escapechar},
281 { "lineterminator", (getter)Dialect_get_lineterminator},
282 { "quotechar", (getter)Dialect_get_quotechar},
283 { "quoting", (getter)Dialect_get_quoting},
284 {NULL},
Skip Montanarob4a04172003-03-20 23:29:12 +0000285};
286
287static void
288Dialect_dealloc(DialectObj *self)
289{
290 Py_XDECREF(self->lineterminator);
Skip Montanarob4a04172003-03-20 23:29:12 +0000291 self->ob_type->tp_free((PyObject *)self);
292}
293
Andrew McNamara1196cf12005-01-07 04:42:45 +0000294static char *dialect_kws[] = {
295 "dialect",
296 "delimiter",
297 "doublequote",
298 "escapechar",
299 "lineterminator",
300 "quotechar",
301 "quoting",
302 "skipinitialspace",
303 "strict",
304 NULL
305};
306
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000307static PyObject *
308dialect_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
Skip Montanarob4a04172003-03-20 23:29:12 +0000309{
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000310 DialectObj *self;
311 PyObject *ret = NULL;
312 PyObject *dialect = NULL;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000313 PyObject *delimiter = NULL;
314 PyObject *doublequote = NULL;
315 PyObject *escapechar = NULL;
316 PyObject *lineterminator = NULL;
317 PyObject *quotechar = NULL;
318 PyObject *quoting = NULL;
319 PyObject *skipinitialspace = NULL;
320 PyObject *strict = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000321
Andrew McNamara1196cf12005-01-07 04:42:45 +0000322 if (!PyArg_ParseTupleAndKeywords(args, kwargs,
323 "|OOOOOOOOO", dialect_kws,
324 &dialect,
325 &delimiter,
326 &doublequote,
327 &escapechar,
328 &lineterminator,
329 &quotechar,
330 &quoting,
331 &skipinitialspace,
332 &strict))
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000333 return NULL;
334
335 if (dialect != NULL) {
336 if (IS_BASESTRING(dialect)) {
337 dialect = get_dialect_from_registry(dialect);
338 if (dialect == NULL)
339 return NULL;
340 }
341 else
342 Py_INCREF(dialect);
343 /* Can we reuse this instance? */
344 if (PyObject_TypeCheck(dialect, &Dialect_Type) &&
345 delimiter == 0 &&
346 doublequote == 0 &&
347 escapechar == 0 &&
348 lineterminator == 0 &&
349 quotechar == 0 &&
350 quoting == 0 &&
351 skipinitialspace == 0 &&
352 strict == 0)
353 return dialect;
354 }
355
356 self = (DialectObj *)type->tp_alloc(type, 0);
357 if (self == NULL) {
358 Py_XDECREF(dialect);
359 return NULL;
360 }
361 self->lineterminator = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000362
Andrew McNamara1196cf12005-01-07 04:42:45 +0000363 Py_XINCREF(delimiter);
364 Py_XINCREF(doublequote);
365 Py_XINCREF(escapechar);
366 Py_XINCREF(lineterminator);
367 Py_XINCREF(quotechar);
368 Py_XINCREF(quoting);
369 Py_XINCREF(skipinitialspace);
370 Py_XINCREF(strict);
371 if (dialect != NULL) {
Andrew McNamara1196cf12005-01-07 04:42:45 +0000372#define DIALECT_GETATTR(v, n) \
373 if (v == NULL) \
374 v = PyObject_GetAttrString(dialect, n)
Andrew McNamara1196cf12005-01-07 04:42:45 +0000375 DIALECT_GETATTR(delimiter, "delimiter");
376 DIALECT_GETATTR(doublequote, "doublequote");
377 DIALECT_GETATTR(escapechar, "escapechar");
378 DIALECT_GETATTR(lineterminator, "lineterminator");
379 DIALECT_GETATTR(quotechar, "quotechar");
380 DIALECT_GETATTR(quoting, "quoting");
381 DIALECT_GETATTR(skipinitialspace, "skipinitialspace");
382 DIALECT_GETATTR(strict, "strict");
383 PyErr_Clear();
Andrew McNamara1196cf12005-01-07 04:42:45 +0000384 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000385
Andrew McNamara1196cf12005-01-07 04:42:45 +0000386 /* check types and convert to C values */
387#define DIASET(meth, name, target, src, dflt) \
388 if (meth(name, target, src, dflt)) \
389 goto err
390 DIASET(_set_char, "delimiter", &self->delimiter, delimiter, ',');
391 DIASET(_set_bool, "doublequote", &self->doublequote, doublequote, 1);
392 DIASET(_set_char, "escapechar", &self->escapechar, escapechar, 0);
393 DIASET(_set_str, "lineterminator", &self->lineterminator, lineterminator, "\r\n");
394 DIASET(_set_char, "quotechar", &self->quotechar, quotechar, '"');
395 DIASET(_set_int, "quoting", &self->quoting, quoting, QUOTE_MINIMAL);
396 DIASET(_set_bool, "skipinitialspace", &self->skipinitialspace, skipinitialspace, 0);
397 DIASET(_set_bool, "strict", &self->strict, strict, 0);
Skip Montanarob4a04172003-03-20 23:29:12 +0000398
Andrew McNamara1196cf12005-01-07 04:42:45 +0000399 /* validate options */
400 if (dialect_check_quoting(self->quoting))
401 goto err;
402 if (self->delimiter == 0) {
403 PyErr_SetString(PyExc_TypeError, "delimiter must be set");
404 goto err;
405 }
Andrew McNamara5d45a8d2005-01-12 08:16:17 +0000406 if (quotechar == Py_None && quoting == NULL)
Andrew McNamara1196cf12005-01-07 04:42:45 +0000407 self->quoting = QUOTE_NONE;
408 if (self->quoting != QUOTE_NONE && self->quotechar == 0) {
409 PyErr_SetString(PyExc_TypeError,
410 "quotechar must be set if quoting enabled");
411 goto err;
412 }
413 if (self->lineterminator == 0) {
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000414 PyErr_SetString(PyExc_TypeError, "lineterminator must be set");
Andrew McNamara1196cf12005-01-07 04:42:45 +0000415 goto err;
416 }
417
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000418 ret = (PyObject *)self;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000419err:
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000420 Py_XDECREF(dialect);
Andrew McNamara1196cf12005-01-07 04:42:45 +0000421 Py_XDECREF(delimiter);
422 Py_XDECREF(doublequote);
423 Py_XDECREF(escapechar);
424 Py_XDECREF(lineterminator);
425 Py_XDECREF(quotechar);
426 Py_XDECREF(quoting);
427 Py_XDECREF(skipinitialspace);
428 Py_XDECREF(strict);
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000429 return ret;
Skip Montanarob4a04172003-03-20 23:29:12 +0000430}
431
432
433PyDoc_STRVAR(Dialect_Type_doc,
434"CSV dialect\n"
435"\n"
436"The Dialect type records CSV parsing and generation options.\n");
437
438static PyTypeObject Dialect_Type = {
439 PyObject_HEAD_INIT(NULL)
440 0, /* ob_size */
441 "_csv.Dialect", /* tp_name */
442 sizeof(DialectObj), /* tp_basicsize */
443 0, /* tp_itemsize */
444 /* methods */
445 (destructor)Dialect_dealloc, /* tp_dealloc */
446 (printfunc)0, /* tp_print */
447 (getattrfunc)0, /* tp_getattr */
448 (setattrfunc)0, /* tp_setattr */
449 (cmpfunc)0, /* tp_compare */
450 (reprfunc)0, /* tp_repr */
451 0, /* tp_as_number */
452 0, /* tp_as_sequence */
453 0, /* tp_as_mapping */
454 (hashfunc)0, /* tp_hash */
455 (ternaryfunc)0, /* tp_call */
456 (reprfunc)0, /* tp_str */
457 0, /* tp_getattro */
458 0, /* tp_setattro */
459 0, /* tp_as_buffer */
460 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
461 Dialect_Type_doc, /* tp_doc */
462 0, /* tp_traverse */
463 0, /* tp_clear */
464 0, /* tp_richcompare */
465 0, /* tp_weaklistoffset */
466 0, /* tp_iter */
467 0, /* tp_iternext */
Andrew McNamara1196cf12005-01-07 04:42:45 +0000468 0, /* tp_methods */
Skip Montanarob4a04172003-03-20 23:29:12 +0000469 Dialect_memberlist, /* tp_members */
470 Dialect_getsetlist, /* tp_getset */
471 0, /* tp_base */
472 0, /* tp_dict */
473 0, /* tp_descr_get */
474 0, /* tp_descr_set */
475 0, /* tp_dictoffset */
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000476 0, /* tp_init */
477 0, /* tp_alloc */
Skip Montanarob4a04172003-03-20 23:29:12 +0000478 dialect_new, /* tp_new */
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000479 0, /* tp_free */
Skip Montanarob4a04172003-03-20 23:29:12 +0000480};
481
Andrew McNamara91b97462005-01-11 01:07:23 +0000482/*
483 * Return an instance of the dialect type, given a Python instance or kwarg
484 * description of the dialect
485 */
486static PyObject *
487_call_dialect(PyObject *dialect_inst, PyObject *kwargs)
488{
489 PyObject *ctor_args;
490 PyObject *dialect;
491
492 ctor_args = Py_BuildValue(dialect_inst ? "(O)" : "()", dialect_inst);
493 if (ctor_args == NULL)
494 return NULL;
495 dialect = PyObject_Call((PyObject *)&Dialect_Type, ctor_args, kwargs);
496 Py_DECREF(ctor_args);
497 return dialect;
498}
499
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000500/*
501 * READER
502 */
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000503static int
Skip Montanarob4a04172003-03-20 23:29:12 +0000504parse_save_field(ReaderObj *self)
505{
506 PyObject *field;
507
508 field = PyString_FromStringAndSize(self->field, self->field_len);
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000509 if (field == NULL)
510 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000511 self->field_len = 0;
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000512 if (self->numeric_field) {
513 PyObject *tmp;
514
515 self->numeric_field = 0;
516 tmp = PyNumber_Float(field);
517 if (tmp == NULL) {
518 Py_DECREF(field);
519 return -1;
520 }
521 Py_DECREF(field);
522 field = tmp;
523 }
524 PyList_Append(self->fields, field);
525 Py_DECREF(field);
526 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000527}
528
529static int
530parse_grow_buff(ReaderObj *self)
531{
532 if (self->field_size == 0) {
533 self->field_size = 4096;
Andrew McNamaradcfb38c2003-06-09 05:59:23 +0000534 if (self->field != NULL)
535 PyMem_Free(self->field);
Skip Montanarob4a04172003-03-20 23:29:12 +0000536 self->field = PyMem_Malloc(self->field_size);
537 }
538 else {
539 self->field_size *= 2;
540 self->field = PyMem_Realloc(self->field, self->field_size);
541 }
542 if (self->field == NULL) {
543 PyErr_NoMemory();
544 return 0;
545 }
546 return 1;
547}
548
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000549static int
Skip Montanarob4a04172003-03-20 23:29:12 +0000550parse_add_char(ReaderObj *self, char c)
551{
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000552 if (self->field_len >= field_limit) {
553 PyErr_Format(error_obj, "field larger than field limit (%ld)",
554 field_limit);
555 return -1;
556 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000557 if (self->field_len == self->field_size && !parse_grow_buff(self))
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000558 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000559 self->field[self->field_len++] = c;
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000560 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000561}
562
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000563static int
Skip Montanarob4a04172003-03-20 23:29:12 +0000564parse_process_char(ReaderObj *self, char c)
565{
566 DialectObj *dialect = self->dialect;
567
568 switch (self->state) {
569 case START_RECORD:
570 /* start of record */
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000571 if (c == '\0')
Skip Montanarob4a04172003-03-20 23:29:12 +0000572 /* empty line - return [] */
573 break;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000574 else if (c == '\n' || c == '\r') {
575 self->state = EAT_CRNL;
576 break;
577 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000578 /* normal character - handle as START_FIELD */
579 self->state = START_FIELD;
580 /* fallthru */
581 case START_FIELD:
582 /* expecting field */
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000583 if (c == '\n' || c == '\r' || c == '\0') {
Skip Montanarob4a04172003-03-20 23:29:12 +0000584 /* save empty field - return [fields] */
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000585 if (parse_save_field(self) < 0)
586 return -1;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000587 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
Skip Montanarob4a04172003-03-20 23:29:12 +0000588 }
Andrew McNamara1196cf12005-01-07 04:42:45 +0000589 else if (c == dialect->quotechar &&
590 dialect->quoting != QUOTE_NONE) {
Skip Montanarob4a04172003-03-20 23:29:12 +0000591 /* start quoted field */
592 self->state = IN_QUOTED_FIELD;
593 }
594 else if (c == dialect->escapechar) {
595 /* possible escaped character */
596 self->state = ESCAPED_CHAR;
597 }
598 else if (c == ' ' && dialect->skipinitialspace)
599 /* ignore space at start of field */
600 ;
601 else if (c == dialect->delimiter) {
602 /* save empty field */
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000603 if (parse_save_field(self) < 0)
604 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000605 }
606 else {
607 /* begin new unquoted field */
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000608 if (dialect->quoting == QUOTE_NONNUMERIC)
609 self->numeric_field = 1;
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000610 if (parse_add_char(self, c) < 0)
611 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000612 self->state = IN_FIELD;
613 }
614 break;
615
616 case ESCAPED_CHAR:
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000617 if (c == '\0')
618 c = '\n';
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000619 if (parse_add_char(self, c) < 0)
620 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000621 self->state = IN_FIELD;
622 break;
623
624 case IN_FIELD:
625 /* in unquoted field */
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000626 if (c == '\n' || c == '\r' || c == '\0') {
Skip Montanarob4a04172003-03-20 23:29:12 +0000627 /* end of line - return [fields] */
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000628 if (parse_save_field(self) < 0)
629 return -1;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000630 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
Skip Montanarob4a04172003-03-20 23:29:12 +0000631 }
632 else if (c == dialect->escapechar) {
633 /* possible escaped character */
634 self->state = ESCAPED_CHAR;
635 }
636 else if (c == dialect->delimiter) {
637 /* save field - wait for new field */
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000638 if (parse_save_field(self) < 0)
639 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000640 self->state = START_FIELD;
641 }
642 else {
643 /* normal character - save in field */
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000644 if (parse_add_char(self, c) < 0)
645 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000646 }
647 break;
648
649 case IN_QUOTED_FIELD:
650 /* in quoted field */
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000651 if (c == '\0')
652 ;
Skip Montanarob4a04172003-03-20 23:29:12 +0000653 else if (c == dialect->escapechar) {
654 /* Possible escape character */
655 self->state = ESCAPE_IN_QUOTED_FIELD;
656 }
Andrew McNamara1196cf12005-01-07 04:42:45 +0000657 else if (c == dialect->quotechar &&
658 dialect->quoting != QUOTE_NONE) {
Skip Montanarob4a04172003-03-20 23:29:12 +0000659 if (dialect->doublequote) {
660 /* doublequote; " represented by "" */
661 self->state = QUOTE_IN_QUOTED_FIELD;
662 }
663 else {
664 /* end of quote part of field */
665 self->state = IN_FIELD;
666 }
667 }
668 else {
669 /* normal character - save in field */
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000670 if (parse_add_char(self, c) < 0)
671 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000672 }
673 break;
674
675 case ESCAPE_IN_QUOTED_FIELD:
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000676 if (c == '\0')
677 c = '\n';
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000678 if (parse_add_char(self, c) < 0)
679 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000680 self->state = IN_QUOTED_FIELD;
681 break;
682
683 case QUOTE_IN_QUOTED_FIELD:
684 /* doublequote - seen a quote in an quoted field */
685 if (dialect->quoting != QUOTE_NONE &&
686 c == dialect->quotechar) {
687 /* save "" as " */
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000688 if (parse_add_char(self, c) < 0)
689 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000690 self->state = IN_QUOTED_FIELD;
691 }
692 else if (c == dialect->delimiter) {
693 /* save field - wait for new field */
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000694 if (parse_save_field(self) < 0)
695 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000696 self->state = START_FIELD;
697 }
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000698 else if (c == '\n' || c == '\r' || c == '\0') {
Skip Montanarob4a04172003-03-20 23:29:12 +0000699 /* end of line - return [fields] */
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000700 if (parse_save_field(self) < 0)
701 return -1;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000702 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
Skip Montanarob4a04172003-03-20 23:29:12 +0000703 }
704 else if (!dialect->strict) {
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000705 if (parse_add_char(self, c) < 0)
706 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000707 self->state = IN_FIELD;
708 }
709 else {
710 /* illegal */
Andrew McNamara5cfd8372005-01-12 11:39:50 +0000711 PyErr_Format(error_obj, "'%c' expected after '%c'",
Skip Montanarob4a04172003-03-20 23:29:12 +0000712 dialect->delimiter,
713 dialect->quotechar);
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000714 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000715 }
716 break;
717
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000718 case EAT_CRNL:
719 if (c == '\n' || c == '\r')
720 ;
721 else if (c == '\0')
722 self->state = START_RECORD;
723 else {
724 PyErr_Format(error_obj, "new-line character seen in unquoted field - do you need to open the file in universal-newline mode?");
725 return -1;
726 }
727 break;
728
Skip Montanarob4a04172003-03-20 23:29:12 +0000729 }
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000730 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000731}
732
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000733static int
734parse_reset(ReaderObj *self)
735{
736 Py_XDECREF(self->fields);
737 self->fields = PyList_New(0);
738 if (self->fields == NULL)
739 return -1;
740 self->field_len = 0;
741 self->state = START_RECORD;
742 self->numeric_field = 0;
743 return 0;
744}
Skip Montanarob4a04172003-03-20 23:29:12 +0000745
746static PyObject *
Skip Montanarob4a04172003-03-20 23:29:12 +0000747Reader_iternext(ReaderObj *self)
748{
749 PyObject *lineobj;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000750 PyObject *fields = NULL;
751 char *line, c;
752 int linelen;
Skip Montanarob4a04172003-03-20 23:29:12 +0000753
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000754 if (parse_reset(self) < 0)
755 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000756 do {
757 lineobj = PyIter_Next(self->input_iter);
758 if (lineobj == NULL) {
759 /* End of input OR exception */
760 if (!PyErr_Occurred() && self->field_len != 0)
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000761 PyErr_Format(error_obj,
762 "newline inside string");
Skip Montanarob4a04172003-03-20 23:29:12 +0000763 return NULL;
764 }
Andrew McNamara7f2053e2005-01-12 11:17:16 +0000765 ++self->line_num;
Skip Montanarob4a04172003-03-20 23:29:12 +0000766
Skip Montanarob4a04172003-03-20 23:29:12 +0000767 line = PyString_AsString(lineobj);
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000768 linelen = PyString_Size(lineobj);
Skip Montanarob4a04172003-03-20 23:29:12 +0000769
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000770 if (line == NULL || linelen < 0) {
Skip Montanarob4a04172003-03-20 23:29:12 +0000771 Py_DECREF(lineobj);
772 return NULL;
773 }
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000774 while (linelen--) {
775 c = *line++;
776 if (c == '\0') {
777 Py_DECREF(lineobj);
778 PyErr_Format(error_obj,
779 "line contains NULL byte");
780 goto err;
781 }
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000782 if (parse_process_char(self, c) < 0) {
783 Py_DECREF(lineobj);
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000784 goto err;
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000785 }
786 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000787 Py_DECREF(lineobj);
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000788 if (parse_process_char(self, 0) < 0)
789 goto err;
Skip Montanarob4a04172003-03-20 23:29:12 +0000790 } while (self->state != START_RECORD);
791
792 fields = self->fields;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000793 self->fields = NULL;
794err:
Skip Montanarob4a04172003-03-20 23:29:12 +0000795 return fields;
796}
797
798static void
799Reader_dealloc(ReaderObj *self)
800{
Andrew McNamara77ead872005-01-10 02:09:41 +0000801 PyObject_GC_UnTrack(self);
Skip Montanarob4a04172003-03-20 23:29:12 +0000802 Py_XDECREF(self->dialect);
803 Py_XDECREF(self->input_iter);
804 Py_XDECREF(self->fields);
Andrew McNamaradcfb38c2003-06-09 05:59:23 +0000805 if (self->field != NULL)
806 PyMem_Free(self->field);
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000807 PyObject_GC_Del(self);
808}
809
810static int
811Reader_traverse(ReaderObj *self, visitproc visit, void *arg)
812{
813 int err;
814#define VISIT(SLOT) \
815 if (SLOT) { \
816 err = visit((PyObject *)(SLOT), arg); \
817 if (err) \
818 return err; \
819 }
820 VISIT(self->dialect);
821 VISIT(self->input_iter);
822 VISIT(self->fields);
823 return 0;
824}
825
826static int
827Reader_clear(ReaderObj *self)
828{
829 Py_XDECREF(self->dialect);
830 Py_XDECREF(self->input_iter);
831 Py_XDECREF(self->fields);
832 self->dialect = NULL;
833 self->input_iter = NULL;
834 self->fields = NULL;
835 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000836}
837
838PyDoc_STRVAR(Reader_Type_doc,
839"CSV reader\n"
840"\n"
841"Reader objects are responsible for reading and parsing tabular data\n"
842"in CSV format.\n"
843);
844
845static struct PyMethodDef Reader_methods[] = {
846 { NULL, NULL }
847};
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000848#define R_OFF(x) offsetof(ReaderObj, x)
849
850static struct PyMemberDef Reader_memberlist[] = {
851 { "dialect", T_OBJECT, R_OFF(dialect), RO },
852 { "line_num", T_ULONG, R_OFF(line_num), RO },
853 { NULL }
854};
855
Skip Montanarob4a04172003-03-20 23:29:12 +0000856
857static PyTypeObject Reader_Type = {
858 PyObject_HEAD_INIT(NULL)
859 0, /*ob_size*/
860 "_csv.reader", /*tp_name*/
861 sizeof(ReaderObj), /*tp_basicsize*/
862 0, /*tp_itemsize*/
863 /* methods */
864 (destructor)Reader_dealloc, /*tp_dealloc*/
865 (printfunc)0, /*tp_print*/
866 (getattrfunc)0, /*tp_getattr*/
867 (setattrfunc)0, /*tp_setattr*/
868 (cmpfunc)0, /*tp_compare*/
869 (reprfunc)0, /*tp_repr*/
870 0, /*tp_as_number*/
871 0, /*tp_as_sequence*/
872 0, /*tp_as_mapping*/
873 (hashfunc)0, /*tp_hash*/
874 (ternaryfunc)0, /*tp_call*/
875 (reprfunc)0, /*tp_str*/
876 0, /*tp_getattro*/
877 0, /*tp_setattro*/
878 0, /*tp_as_buffer*/
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000879 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
880 Py_TPFLAGS_HAVE_GC, /*tp_flags*/
Skip Montanarob4a04172003-03-20 23:29:12 +0000881 Reader_Type_doc, /*tp_doc*/
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000882 (traverseproc)Reader_traverse, /*tp_traverse*/
883 (inquiry)Reader_clear, /*tp_clear*/
Skip Montanarob4a04172003-03-20 23:29:12 +0000884 0, /*tp_richcompare*/
885 0, /*tp_weaklistoffset*/
Andrew McNamara575a00b2005-01-06 02:25:41 +0000886 PyObject_SelfIter, /*tp_iter*/
Skip Montanarob4a04172003-03-20 23:29:12 +0000887 (getiterfunc)Reader_iternext, /*tp_iternext*/
888 Reader_methods, /*tp_methods*/
889 Reader_memberlist, /*tp_members*/
890 0, /*tp_getset*/
891
892};
893
894static PyObject *
895csv_reader(PyObject *module, PyObject *args, PyObject *keyword_args)
896{
Andrew McNamara91b97462005-01-11 01:07:23 +0000897 PyObject * iterator, * dialect = NULL;
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000898 ReaderObj * self = PyObject_GC_New(ReaderObj, &Reader_Type);
Skip Montanarob4a04172003-03-20 23:29:12 +0000899
900 if (!self)
901 return NULL;
902
903 self->dialect = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000904 self->fields = NULL;
905 self->input_iter = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000906 self->field = NULL;
907 self->field_size = 0;
Andrew McNamara7f2053e2005-01-12 11:17:16 +0000908 self->line_num = 0;
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000909
910 if (parse_reset(self) < 0) {
911 Py_DECREF(self);
912 return NULL;
913 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000914
Raymond Hettinger1761a7c2004-06-20 04:23:19 +0000915 if (!PyArg_UnpackTuple(args, "", 1, 2, &iterator, &dialect)) {
Skip Montanarob4a04172003-03-20 23:29:12 +0000916 Py_DECREF(self);
917 return NULL;
918 }
919 self->input_iter = PyObject_GetIter(iterator);
920 if (self->input_iter == NULL) {
921 PyErr_SetString(PyExc_TypeError,
922 "argument 1 must be an iterator");
923 Py_DECREF(self);
924 return NULL;
925 }
Andrew McNamara91b97462005-01-11 01:07:23 +0000926 self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args);
Skip Montanarob4a04172003-03-20 23:29:12 +0000927 if (self->dialect == NULL) {
928 Py_DECREF(self);
929 return NULL;
930 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000931
Andrew McNamara77ead872005-01-10 02:09:41 +0000932 PyObject_GC_Track(self);
Skip Montanarob4a04172003-03-20 23:29:12 +0000933 return (PyObject *)self;
934}
935
936/*
937 * WRITER
938 */
939/* ---------------------------------------------------------------- */
940static void
941join_reset(WriterObj *self)
942{
943 self->rec_len = 0;
944 self->num_fields = 0;
945}
946
947#define MEM_INCR 32768
948
949/* Calculate new record length or append field to record. Return new
950 * record length.
951 */
952static int
953join_append_data(WriterObj *self, char *field, int quote_empty,
954 int *quoted, int copy_phase)
955{
956 DialectObj *dialect = self->dialect;
957 int i, rec_len;
Andrew McNamarac89f2842005-01-12 07:44:42 +0000958 char *lineterm;
959
960#define ADDCH(c) \
961 do {\
962 if (copy_phase) \
963 self->rec[rec_len] = c;\
964 rec_len++;\
965 } while(0)
966
967 lineterm = PyString_AsString(dialect->lineterminator);
968 if (lineterm == NULL)
969 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000970
971 rec_len = self->rec_len;
972
Andrew McNamarac89f2842005-01-12 07:44:42 +0000973 /* If this is not the first field we need a field separator */
974 if (self->num_fields > 0)
975 ADDCH(dialect->delimiter);
976
977 /* Handle preceding quote */
978 if (copy_phase && *quoted)
979 ADDCH(dialect->quotechar);
980
981 /* Copy/count field data */
Skip Montanarob4a04172003-03-20 23:29:12 +0000982 for (i = 0;; i++) {
983 char c = field[i];
Andrew McNamarac89f2842005-01-12 07:44:42 +0000984 int want_escape = 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000985
986 if (c == '\0')
987 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000988
Andrew McNamarac89f2842005-01-12 07:44:42 +0000989 if (c == dialect->delimiter ||
990 c == dialect->escapechar ||
991 c == dialect->quotechar ||
992 strchr(lineterm, c)) {
993 if (dialect->quoting == QUOTE_NONE)
994 want_escape = 1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000995 else {
Andrew McNamarac89f2842005-01-12 07:44:42 +0000996 if (c == dialect->quotechar) {
997 if (dialect->doublequote)
998 ADDCH(dialect->quotechar);
999 else
1000 want_escape = 1;
1001 }
1002 if (!want_escape)
1003 *quoted = 1;
1004 }
1005 if (want_escape) {
1006 if (!dialect->escapechar) {
1007 PyErr_Format(error_obj,
1008 "need to escape, but no escapechar set");
1009 return -1;
1010 }
1011 ADDCH(dialect->escapechar);
Skip Montanarob4a04172003-03-20 23:29:12 +00001012 }
1013 }
1014 /* Copy field character into record buffer.
1015 */
Andrew McNamarac89f2842005-01-12 07:44:42 +00001016 ADDCH(c);
Skip Montanarob4a04172003-03-20 23:29:12 +00001017 }
1018
1019 /* If field is empty check if it needs to be quoted.
1020 */
1021 if (i == 0 && quote_empty) {
1022 if (dialect->quoting == QUOTE_NONE) {
1023 PyErr_Format(error_obj,
1024 "single empty field record must be quoted");
1025 return -1;
Andrew McNamaradd3e6cb2005-01-07 06:46:50 +00001026 }
1027 else
Skip Montanarob4a04172003-03-20 23:29:12 +00001028 *quoted = 1;
1029 }
1030
Skip Montanarob4a04172003-03-20 23:29:12 +00001031 if (*quoted) {
1032 if (copy_phase)
Andrew McNamarac89f2842005-01-12 07:44:42 +00001033 ADDCH(dialect->quotechar);
Skip Montanarob4a04172003-03-20 23:29:12 +00001034 else
Andrew McNamarac89f2842005-01-12 07:44:42 +00001035 rec_len += 2;
Skip Montanarob4a04172003-03-20 23:29:12 +00001036 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001037 return rec_len;
Andrew McNamarac89f2842005-01-12 07:44:42 +00001038#undef ADDCH
Skip Montanarob4a04172003-03-20 23:29:12 +00001039}
1040
1041static int
1042join_check_rec_size(WriterObj *self, int rec_len)
1043{
1044 if (rec_len > self->rec_size) {
1045 if (self->rec_size == 0) {
1046 self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR;
Andrew McNamaradcfb38c2003-06-09 05:59:23 +00001047 if (self->rec != NULL)
1048 PyMem_Free(self->rec);
Skip Montanarob4a04172003-03-20 23:29:12 +00001049 self->rec = PyMem_Malloc(self->rec_size);
1050 }
1051 else {
1052 char *old_rec = self->rec;
1053
1054 self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR;
1055 self->rec = PyMem_Realloc(self->rec, self->rec_size);
1056 if (self->rec == NULL)
1057 PyMem_Free(old_rec);
1058 }
1059 if (self->rec == NULL) {
1060 PyErr_NoMemory();
1061 return 0;
1062 }
1063 }
1064 return 1;
1065}
1066
1067static int
1068join_append(WriterObj *self, char *field, int *quoted, int quote_empty)
1069{
1070 int rec_len;
1071
1072 rec_len = join_append_data(self, field, quote_empty, quoted, 0);
1073 if (rec_len < 0)
1074 return 0;
1075
1076 /* grow record buffer if necessary */
1077 if (!join_check_rec_size(self, rec_len))
1078 return 0;
1079
1080 self->rec_len = join_append_data(self, field, quote_empty, quoted, 1);
1081 self->num_fields++;
1082
1083 return 1;
1084}
1085
1086static int
1087join_append_lineterminator(WriterObj *self)
1088{
1089 int terminator_len;
Andrew McNamaracf0fd5a2005-01-12 01:16:35 +00001090 char *terminator;
Skip Montanarob4a04172003-03-20 23:29:12 +00001091
1092 terminator_len = PyString_Size(self->dialect->lineterminator);
1093
1094 /* grow record buffer if necessary */
1095 if (!join_check_rec_size(self, self->rec_len + terminator_len))
1096 return 0;
1097
Andrew McNamaracf0fd5a2005-01-12 01:16:35 +00001098 terminator = PyString_AsString(self->dialect->lineterminator);
1099 if (terminator == NULL)
1100 return 0;
1101 memmove(self->rec + self->rec_len, terminator, terminator_len);
Skip Montanarob4a04172003-03-20 23:29:12 +00001102 self->rec_len += terminator_len;
1103
1104 return 1;
1105}
1106
1107PyDoc_STRVAR(csv_writerow_doc,
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001108"writerow(sequence)\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001109"\n"
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001110"Construct and write a CSV record from a sequence of fields. Non-string\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001111"elements will be converted to string.");
1112
1113static PyObject *
1114csv_writerow(WriterObj *self, PyObject *seq)
1115{
1116 DialectObj *dialect = self->dialect;
1117 int len, i;
1118
1119 if (!PySequence_Check(seq))
1120 return PyErr_Format(error_obj, "sequence expected");
1121
1122 len = PySequence_Length(seq);
1123 if (len < 0)
1124 return NULL;
1125
1126 /* Join all fields in internal buffer.
1127 */
1128 join_reset(self);
1129 for (i = 0; i < len; i++) {
1130 PyObject *field;
1131 int append_ok;
1132 int quoted;
1133
1134 field = PySequence_GetItem(seq, i);
1135 if (field == NULL)
1136 return NULL;
1137
Andrew McNamarac89f2842005-01-12 07:44:42 +00001138 switch (dialect->quoting) {
1139 case QUOTE_NONNUMERIC:
1140 quoted = !PyNumber_Check(field);
1141 break;
1142 case QUOTE_ALL:
1143 quoted = 1;
1144 break;
1145 default:
1146 quoted = 0;
1147 break;
Skip Montanarob4a04172003-03-20 23:29:12 +00001148 }
1149
1150 if (PyString_Check(field)) {
Skip Montanaro577c7a72003-04-12 19:17:14 +00001151 append_ok = join_append(self,
1152 PyString_AS_STRING(field),
Skip Montanarob4a04172003-03-20 23:29:12 +00001153 &quoted, len == 1);
1154 Py_DECREF(field);
1155 }
1156 else if (field == Py_None) {
1157 append_ok = join_append(self, "", &quoted, len == 1);
1158 Py_DECREF(field);
1159 }
1160 else {
1161 PyObject *str;
1162
1163 str = PyObject_Str(field);
1164 Py_DECREF(field);
1165 if (str == NULL)
1166 return NULL;
1167
Skip Montanaro577c7a72003-04-12 19:17:14 +00001168 append_ok = join_append(self, PyString_AS_STRING(str),
Skip Montanarob4a04172003-03-20 23:29:12 +00001169 &quoted, len == 1);
1170 Py_DECREF(str);
1171 }
1172 if (!append_ok)
1173 return NULL;
1174 }
1175
1176 /* Add line terminator.
1177 */
1178 if (!join_append_lineterminator(self))
1179 return 0;
1180
1181 return PyObject_CallFunction(self->writeline,
1182 "(s#)", self->rec, self->rec_len);
1183}
1184
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001185PyDoc_STRVAR(csv_writerows_doc,
1186"writerows(sequence of sequences)\n"
1187"\n"
1188"Construct and write a series of sequences to a csv file. Non-string\n"
1189"elements will be converted to string.");
1190
Skip Montanarob4a04172003-03-20 23:29:12 +00001191static PyObject *
1192csv_writerows(WriterObj *self, PyObject *seqseq)
1193{
1194 PyObject *row_iter, *row_obj, *result;
1195
1196 row_iter = PyObject_GetIter(seqseq);
1197 if (row_iter == NULL) {
1198 PyErr_SetString(PyExc_TypeError,
Skip Montanaro98f16e02003-04-11 23:10:13 +00001199 "writerows() argument must be iterable");
Skip Montanarob4a04172003-03-20 23:29:12 +00001200 return NULL;
1201 }
1202 while ((row_obj = PyIter_Next(row_iter))) {
1203 result = csv_writerow(self, row_obj);
1204 Py_DECREF(row_obj);
1205 if (!result) {
1206 Py_DECREF(row_iter);
1207 return NULL;
1208 }
1209 else
1210 Py_DECREF(result);
1211 }
1212 Py_DECREF(row_iter);
1213 if (PyErr_Occurred())
1214 return NULL;
1215 Py_INCREF(Py_None);
1216 return Py_None;
1217}
1218
1219static struct PyMethodDef Writer_methods[] = {
1220 { "writerow", (PyCFunction)csv_writerow, METH_O, csv_writerow_doc},
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001221 { "writerows", (PyCFunction)csv_writerows, METH_O, csv_writerows_doc},
Skip Montanarob4a04172003-03-20 23:29:12 +00001222 { NULL, NULL }
1223};
1224
1225#define W_OFF(x) offsetof(WriterObj, x)
1226
1227static struct PyMemberDef Writer_memberlist[] = {
1228 { "dialect", T_OBJECT, W_OFF(dialect), RO },
1229 { NULL }
1230};
1231
1232static void
1233Writer_dealloc(WriterObj *self)
1234{
Andrew McNamara77ead872005-01-10 02:09:41 +00001235 PyObject_GC_UnTrack(self);
Skip Montanarob4a04172003-03-20 23:29:12 +00001236 Py_XDECREF(self->dialect);
1237 Py_XDECREF(self->writeline);
Andrew McNamaradcfb38c2003-06-09 05:59:23 +00001238 if (self->rec != NULL)
1239 PyMem_Free(self->rec);
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001240 PyObject_GC_Del(self);
1241}
1242
1243static int
1244Writer_traverse(WriterObj *self, visitproc visit, void *arg)
1245{
1246 int err;
1247#define VISIT(SLOT) \
1248 if (SLOT) { \
1249 err = visit((PyObject *)(SLOT), arg); \
1250 if (err) \
1251 return err; \
1252 }
1253 VISIT(self->dialect);
1254 VISIT(self->writeline);
1255 return 0;
1256}
1257
1258static int
1259Writer_clear(WriterObj *self)
1260{
1261 Py_XDECREF(self->dialect);
1262 Py_XDECREF(self->writeline);
1263 self->dialect = NULL;
1264 self->writeline = NULL;
1265 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001266}
1267
1268PyDoc_STRVAR(Writer_Type_doc,
1269"CSV writer\n"
1270"\n"
1271"Writer objects are responsible for generating tabular data\n"
1272"in CSV format from sequence input.\n"
1273);
1274
1275static PyTypeObject Writer_Type = {
1276 PyObject_HEAD_INIT(NULL)
1277 0, /*ob_size*/
1278 "_csv.writer", /*tp_name*/
1279 sizeof(WriterObj), /*tp_basicsize*/
1280 0, /*tp_itemsize*/
1281 /* methods */
1282 (destructor)Writer_dealloc, /*tp_dealloc*/
1283 (printfunc)0, /*tp_print*/
1284 (getattrfunc)0, /*tp_getattr*/
1285 (setattrfunc)0, /*tp_setattr*/
1286 (cmpfunc)0, /*tp_compare*/
1287 (reprfunc)0, /*tp_repr*/
1288 0, /*tp_as_number*/
1289 0, /*tp_as_sequence*/
1290 0, /*tp_as_mapping*/
1291 (hashfunc)0, /*tp_hash*/
1292 (ternaryfunc)0, /*tp_call*/
1293 (reprfunc)0, /*tp_str*/
1294 0, /*tp_getattro*/
1295 0, /*tp_setattro*/
1296 0, /*tp_as_buffer*/
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001297 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
1298 Py_TPFLAGS_HAVE_GC, /*tp_flags*/
Skip Montanarob4a04172003-03-20 23:29:12 +00001299 Writer_Type_doc,
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001300 (traverseproc)Writer_traverse, /*tp_traverse*/
1301 (inquiry)Writer_clear, /*tp_clear*/
Skip Montanarob4a04172003-03-20 23:29:12 +00001302 0, /*tp_richcompare*/
1303 0, /*tp_weaklistoffset*/
1304 (getiterfunc)0, /*tp_iter*/
1305 (getiterfunc)0, /*tp_iternext*/
1306 Writer_methods, /*tp_methods*/
1307 Writer_memberlist, /*tp_members*/
1308 0, /*tp_getset*/
1309};
1310
1311static PyObject *
1312csv_writer(PyObject *module, PyObject *args, PyObject *keyword_args)
1313{
Andrew McNamara91b97462005-01-11 01:07:23 +00001314 PyObject * output_file, * dialect = NULL;
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001315 WriterObj * self = PyObject_GC_New(WriterObj, &Writer_Type);
Skip Montanarob4a04172003-03-20 23:29:12 +00001316
1317 if (!self)
1318 return NULL;
1319
1320 self->dialect = NULL;
1321 self->writeline = NULL;
1322
1323 self->rec = NULL;
1324 self->rec_size = 0;
1325 self->rec_len = 0;
1326 self->num_fields = 0;
1327
Raymond Hettinger1761a7c2004-06-20 04:23:19 +00001328 if (!PyArg_UnpackTuple(args, "", 1, 2, &output_file, &dialect)) {
Skip Montanarob4a04172003-03-20 23:29:12 +00001329 Py_DECREF(self);
1330 return NULL;
1331 }
1332 self->writeline = PyObject_GetAttrString(output_file, "write");
1333 if (self->writeline == NULL || !PyCallable_Check(self->writeline)) {
1334 PyErr_SetString(PyExc_TypeError,
Andrew McNamara5cfd8372005-01-12 11:39:50 +00001335 "argument 1 must have a \"write\" method");
Skip Montanarob4a04172003-03-20 23:29:12 +00001336 Py_DECREF(self);
1337 return NULL;
1338 }
Andrew McNamara91b97462005-01-11 01:07:23 +00001339 self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args);
Skip Montanarob4a04172003-03-20 23:29:12 +00001340 if (self->dialect == NULL) {
1341 Py_DECREF(self);
1342 return NULL;
1343 }
Andrew McNamara77ead872005-01-10 02:09:41 +00001344 PyObject_GC_Track(self);
Skip Montanarob4a04172003-03-20 23:29:12 +00001345 return (PyObject *)self;
1346}
1347
1348/*
1349 * DIALECT REGISTRY
1350 */
1351static PyObject *
1352csv_list_dialects(PyObject *module, PyObject *args)
1353{
1354 return PyDict_Keys(dialects);
1355}
1356
1357static PyObject *
Andrew McNamara86625972005-01-11 01:28:33 +00001358csv_register_dialect(PyObject *module, PyObject *args, PyObject *kwargs)
Skip Montanarob4a04172003-03-20 23:29:12 +00001359{
Andrew McNamara86625972005-01-11 01:28:33 +00001360 PyObject *name_obj, *dialect_obj = NULL;
1361 PyObject *dialect;
Skip Montanarob4a04172003-03-20 23:29:12 +00001362
Andrew McNamara86625972005-01-11 01:28:33 +00001363 if (!PyArg_UnpackTuple(args, "", 1, 2, &name_obj, &dialect_obj))
Skip Montanarob4a04172003-03-20 23:29:12 +00001364 return NULL;
Andrew McNamara37d2bdf2005-01-10 12:22:48 +00001365 if (!IS_BASESTRING(name_obj)) {
Skip Montanarob4a04172003-03-20 23:29:12 +00001366 PyErr_SetString(PyExc_TypeError,
1367 "dialect name must be a string or unicode");
1368 return NULL;
1369 }
Andrew McNamara86625972005-01-11 01:28:33 +00001370 dialect = _call_dialect(dialect_obj, kwargs);
1371 if (dialect == NULL)
1372 return NULL;
1373 if (PyDict_SetItem(dialects, name_obj, dialect) < 0) {
1374 Py_DECREF(dialect);
Skip Montanarob4a04172003-03-20 23:29:12 +00001375 return NULL;
1376 }
Andrew McNamara86625972005-01-11 01:28:33 +00001377 Py_DECREF(dialect);
Skip Montanarob4a04172003-03-20 23:29:12 +00001378 Py_INCREF(Py_None);
1379 return Py_None;
1380}
1381
1382static PyObject *
Skip Montanaro577c7a72003-04-12 19:17:14 +00001383csv_unregister_dialect(PyObject *module, PyObject *name_obj)
Skip Montanarob4a04172003-03-20 23:29:12 +00001384{
Skip Montanarob4a04172003-03-20 23:29:12 +00001385 if (PyDict_DelItem(dialects, name_obj) < 0)
1386 return PyErr_Format(error_obj, "unknown dialect");
1387 Py_INCREF(Py_None);
1388 return Py_None;
1389}
1390
1391static PyObject *
Skip Montanaro577c7a72003-04-12 19:17:14 +00001392csv_get_dialect(PyObject *module, PyObject *name_obj)
Skip Montanarob4a04172003-03-20 23:29:12 +00001393{
Skip Montanarob4a04172003-03-20 23:29:12 +00001394 return get_dialect_from_registry(name_obj);
1395}
1396
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001397static PyObject *
Andrew McNamara31d88962005-01-12 03:45:10 +00001398csv_field_size_limit(PyObject *module, PyObject *args)
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001399{
1400 PyObject *new_limit = NULL;
1401 long old_limit = field_limit;
1402
Andrew McNamara31d88962005-01-12 03:45:10 +00001403 if (!PyArg_UnpackTuple(args, "field_size_limit", 0, 1, &new_limit))
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001404 return NULL;
1405 if (new_limit != NULL) {
1406 if (!PyInt_Check(new_limit)) {
1407 PyErr_Format(PyExc_TypeError,
1408 "limit must be an integer");
1409 return NULL;
1410 }
1411 field_limit = PyInt_AsLong(new_limit);
1412 }
1413 return PyInt_FromLong(old_limit);
1414}
1415
Skip Montanarob4a04172003-03-20 23:29:12 +00001416/*
1417 * MODULE
1418 */
1419
1420PyDoc_STRVAR(csv_module_doc,
1421"CSV parsing and writing.\n"
1422"\n"
1423"This module provides classes that assist in the reading and writing\n"
1424"of Comma Separated Value (CSV) files, and implements the interface\n"
1425"described by PEP 305. Although many CSV files are simple to parse,\n"
1426"the format is not formally defined by a stable specification and\n"
1427"is subtle enough that parsing lines of a CSV file with something\n"
1428"like line.split(\",\") is bound to fail. The module supports three\n"
1429"basic APIs: reading, writing, and registration of dialects.\n"
1430"\n"
1431"\n"
1432"DIALECT REGISTRATION:\n"
1433"\n"
1434"Readers and writers support a dialect argument, which is a convenient\n"
1435"handle on a group of settings. When the dialect argument is a string,\n"
1436"it identifies one of the dialects previously registered with the module.\n"
1437"If it is a class or instance, the attributes of the argument are used as\n"
1438"the settings for the reader or writer:\n"
1439"\n"
1440" class excel:\n"
1441" delimiter = ','\n"
1442" quotechar = '\"'\n"
1443" escapechar = None\n"
1444" doublequote = True\n"
1445" skipinitialspace = False\n"
Johannes Gijsbers8d3b9dd2004-08-15 12:23:10 +00001446" lineterminator = '\\r\\n'\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001447" quoting = QUOTE_MINIMAL\n"
1448"\n"
1449"SETTINGS:\n"
1450"\n"
1451" * quotechar - specifies a one-character string to use as the \n"
1452" quoting character. It defaults to '\"'.\n"
1453" * delimiter - specifies a one-character string to use as the \n"
1454" field separator. It defaults to ','.\n"
1455" * skipinitialspace - specifies how to interpret whitespace which\n"
1456" immediately follows a delimiter. It defaults to False, which\n"
1457" means that whitespace immediately following a delimiter is part\n"
1458" of the following field.\n"
1459" * lineterminator - specifies the character sequence which should \n"
1460" terminate rows.\n"
1461" * quoting - controls when quotes should be generated by the writer.\n"
1462" It can take on any of the following module constants:\n"
1463"\n"
1464" csv.QUOTE_MINIMAL means only when required, for example, when a\n"
1465" field contains either the quotechar or the delimiter\n"
1466" csv.QUOTE_ALL means that quotes are always placed around fields.\n"
1467" csv.QUOTE_NONNUMERIC means that quotes are always placed around\n"
Skip Montanaro148eb6a2003-12-02 18:57:47 +00001468" fields which do not parse as integers or floating point\n"
1469" numbers.\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001470" csv.QUOTE_NONE means that quotes are never placed around fields.\n"
1471" * escapechar - specifies a one-character string used to escape \n"
1472" the delimiter when quoting is set to QUOTE_NONE.\n"
1473" * doublequote - controls the handling of quotes inside fields. When\n"
1474" True, two consecutive quotes are interpreted as one during read,\n"
1475" and when writing, each quote character embedded in the data is\n"
1476" written as two quotes\n");
1477
1478PyDoc_STRVAR(csv_reader_doc,
1479" csv_reader = reader(iterable [, dialect='excel']\n"
1480" [optional keyword args])\n"
1481" for row in csv_reader:\n"
1482" process(row)\n"
1483"\n"
1484"The \"iterable\" argument can be any object that returns a line\n"
1485"of input for each iteration, such as a file object or a list. The\n"
1486"optional \"dialect\" parameter is discussed below. The function\n"
1487"also accepts optional keyword arguments which override settings\n"
1488"provided by the dialect.\n"
1489"\n"
1490"The returned object is an iterator. Each iteration returns a row\n"
Johannes Gijsbers8d3b9dd2004-08-15 12:23:10 +00001491"of the CSV file (which can span multiple input lines):\n");
Skip Montanarob4a04172003-03-20 23:29:12 +00001492
1493PyDoc_STRVAR(csv_writer_doc,
1494" csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1495" [optional keyword args])\n"
1496" for row in csv_writer:\n"
1497" csv_writer.writerow(row)\n"
1498"\n"
1499" [or]\n"
1500"\n"
1501" csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1502" [optional keyword args])\n"
1503" csv_writer.writerows(rows)\n"
1504"\n"
1505"The \"fileobj\" argument can be any object that supports the file API.\n");
1506
1507PyDoc_STRVAR(csv_list_dialects_doc,
1508"Return a list of all know dialect names.\n"
1509" names = csv.list_dialects()");
1510
1511PyDoc_STRVAR(csv_get_dialect_doc,
1512"Return the dialect instance associated with name.\n"
1513" dialect = csv.get_dialect(name)");
1514
1515PyDoc_STRVAR(csv_register_dialect_doc,
1516"Create a mapping from a string name to a dialect class.\n"
1517" dialect = csv.register_dialect(name, dialect)");
1518
1519PyDoc_STRVAR(csv_unregister_dialect_doc,
1520"Delete the name/dialect mapping associated with a string name.\n"
1521" csv.unregister_dialect(name)");
1522
Andrew McNamara31d88962005-01-12 03:45:10 +00001523PyDoc_STRVAR(csv_field_size_limit_doc,
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001524"Sets an upper limit on parsed fields.\n"
Andrew McNamara31d88962005-01-12 03:45:10 +00001525" csv.field_size_limit([limit])\n"
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001526"\n"
1527"Returns old limit. If limit is not given, no new limit is set and\n"
1528"the old limit is returned");
1529
Skip Montanarob4a04172003-03-20 23:29:12 +00001530static struct PyMethodDef csv_methods[] = {
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001531 { "reader", (PyCFunction)csv_reader,
1532 METH_VARARGS | METH_KEYWORDS, csv_reader_doc},
1533 { "writer", (PyCFunction)csv_writer,
1534 METH_VARARGS | METH_KEYWORDS, csv_writer_doc},
1535 { "list_dialects", (PyCFunction)csv_list_dialects,
1536 METH_NOARGS, csv_list_dialects_doc},
1537 { "register_dialect", (PyCFunction)csv_register_dialect,
Andrew McNamara86625972005-01-11 01:28:33 +00001538 METH_VARARGS | METH_KEYWORDS, csv_register_dialect_doc},
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001539 { "unregister_dialect", (PyCFunction)csv_unregister_dialect,
1540 METH_O, csv_unregister_dialect_doc},
1541 { "get_dialect", (PyCFunction)csv_get_dialect,
1542 METH_O, csv_get_dialect_doc},
Andrew McNamara31d88962005-01-12 03:45:10 +00001543 { "field_size_limit", (PyCFunction)csv_field_size_limit,
1544 METH_VARARGS, csv_field_size_limit_doc},
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001545 { NULL, NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +00001546};
1547
1548PyMODINIT_FUNC
1549init_csv(void)
1550{
1551 PyObject *module;
Skip Montanarob4a04172003-03-20 23:29:12 +00001552 StyleDesc *style;
1553
1554 if (PyType_Ready(&Dialect_Type) < 0)
1555 return;
1556
1557 if (PyType_Ready(&Reader_Type) < 0)
1558 return;
1559
1560 if (PyType_Ready(&Writer_Type) < 0)
1561 return;
1562
1563 /* Create the module and add the functions */
1564 module = Py_InitModule3("_csv", csv_methods, csv_module_doc);
1565 if (module == NULL)
1566 return;
1567
1568 /* Add version to the module. */
Skip Montanaro7b01a832003-04-12 19:23:46 +00001569 if (PyModule_AddStringConstant(module, "__version__",
1570 MODULE_VERSION) == -1)
Skip Montanarob4a04172003-03-20 23:29:12 +00001571 return;
1572
1573 /* Add _dialects dictionary */
1574 dialects = PyDict_New();
1575 if (dialects == NULL)
1576 return;
1577 if (PyModule_AddObject(module, "_dialects", dialects))
1578 return;
1579
1580 /* Add quote styles into dictionary */
1581 for (style = quote_styles; style->name; style++) {
Skip Montanaro7b01a832003-04-12 19:23:46 +00001582 if (PyModule_AddIntConstant(module, style->name,
1583 style->style) == -1)
Skip Montanarob4a04172003-03-20 23:29:12 +00001584 return;
1585 }
1586
1587 /* Add the Dialect type */
1588 if (PyModule_AddObject(module, "Dialect", (PyObject *)&Dialect_Type))
1589 return;
1590
1591 /* Add the CSV exception object to the module. */
1592 error_obj = PyErr_NewException("_csv.Error", NULL, NULL);
1593 if (error_obj == NULL)
1594 return;
1595 PyModule_AddObject(module, "Error", error_obj);
1596}