blob: 4c3a3adcd7283dd731f92de0aaf35a609fb7fe9f [file] [log] [blame]
Skip Montanaroa16b21f2003-03-23 14:32:54 +00001/* csv module */
2
3/*
4
5This module provides the low-level underpinnings of a CSV reading/writing
6module. Users should not use this module directly, but import the csv.py
7module instead.
8
9**** For people modifying this code, please note that as of this writing
Skip Montanarodfa35fa2003-04-11 21:40:01 +000010**** (2003-03-23), it is intended that this code should work with Python
Skip Montanaroa16b21f2003-03-23 14:32:54 +000011**** 2.2.
12
Skip Montanarob4a04172003-03-20 23:29:12 +000013*/
14
Skip Montanaro7b01a832003-04-12 19:23:46 +000015#define MODULE_VERSION "1.0"
16
Skip Montanarob4a04172003-03-20 23:29:12 +000017#include "Python.h"
18#include "structmember.h"
19
Skip Montanaroa16b21f2003-03-23 14:32:54 +000020
Skip Montanarob4a04172003-03-20 23:29:12 +000021/* begin 2.2 compatibility macros */
22#ifndef PyDoc_STRVAR
23/* Define macros for inline documentation. */
24#define PyDoc_VAR(name) static char name[]
25#define PyDoc_STRVAR(name,str) PyDoc_VAR(name) = PyDoc_STR(str)
26#ifdef WITH_DOC_STRINGS
27#define PyDoc_STR(str) str
28#else
29#define PyDoc_STR(str) ""
30#endif
31#endif /* ifndef PyDoc_STRVAR */
32
33#ifndef PyMODINIT_FUNC
34# if defined(__cplusplus)
35# define PyMODINIT_FUNC extern "C" void
36# else /* __cplusplus */
37# define PyMODINIT_FUNC void
38# endif /* __cplusplus */
39#endif
40/* end 2.2 compatibility macros */
41
42static PyObject *error_obj; /* CSV exception */
43static PyObject *dialects; /* Dialect registry */
44
45typedef enum {
46 START_RECORD, START_FIELD, ESCAPED_CHAR, IN_FIELD,
47 IN_QUOTED_FIELD, ESCAPE_IN_QUOTED_FIELD, QUOTE_IN_QUOTED_FIELD
48} ParserState;
49
50typedef enum {
51 QUOTE_MINIMAL, QUOTE_ALL, QUOTE_NONNUMERIC, QUOTE_NONE
52} QuoteStyle;
53
54typedef struct {
55 QuoteStyle style;
56 char *name;
57} StyleDesc;
58
59static StyleDesc quote_styles[] = {
60 { QUOTE_MINIMAL, "QUOTE_MINIMAL" },
61 { QUOTE_ALL, "QUOTE_ALL" },
62 { QUOTE_NONNUMERIC, "QUOTE_NONNUMERIC" },
63 { QUOTE_NONE, "QUOTE_NONE" },
64 { 0 }
65};
66
67typedef struct {
68 PyObject_HEAD
69
70 int doublequote; /* is " represented by ""? */
71 char delimiter; /* field separator */
72 char quotechar; /* quote character */
73 char escapechar; /* escape character */
74 int skipinitialspace; /* ignore spaces following delimiter? */
75 PyObject *lineterminator; /* string to write between records */
76 QuoteStyle quoting; /* style of quoting to write */
77
78 int strict; /* raise exception on bad CSV */
79} DialectObj;
80
81staticforward PyTypeObject Dialect_Type;
82
83typedef struct {
84 PyObject_HEAD
85
86 PyObject *input_iter; /* iterate over this for input lines */
87
88 DialectObj *dialect; /* parsing dialect */
89
90 PyObject *fields; /* field list for current record */
91 ParserState state; /* current CSV parse state */
92 char *field; /* build current field in here */
93 int field_size; /* size of allocated buffer */
94 int field_len; /* length of current field */
95 int had_parse_error; /* did we have a parse error? */
96} ReaderObj;
97
98staticforward PyTypeObject Reader_Type;
99
100#define ReaderObject_Check(v) ((v)->ob_type == &Reader_Type)
101
102typedef struct {
103 PyObject_HEAD
104
105 PyObject *writeline; /* write output lines to this file */
106
107 DialectObj *dialect; /* parsing dialect */
108
109 char *rec; /* buffer for parser.join */
110 int rec_size; /* size of allocated record */
111 int rec_len; /* length of record */
112 int num_fields; /* number of fields in record */
113} WriterObj;
114
115staticforward PyTypeObject Writer_Type;
116
117/*
118 * DIALECT class
119 */
120
121static PyObject *
122get_dialect_from_registry(PyObject * name_obj)
123{
124 PyObject *dialect_obj;
125
126 dialect_obj = PyDict_GetItem(dialects, name_obj);
127 if (dialect_obj == NULL)
128 return PyErr_Format(error_obj, "unknown dialect");
129 Py_INCREF(dialect_obj);
130 return dialect_obj;
131}
132
133static int
134check_delattr(PyObject *v)
135{
136 if (v == NULL) {
137 PyErr_SetString(PyExc_TypeError,
138 "Cannot delete attribute");
139 return -1;
140 }
141 return 0;
142}
143
144static PyObject *
145get_string(PyObject *str)
146{
147 Py_XINCREF(str);
148 return str;
149}
150
151static int
152set_string(PyObject **str, PyObject *v)
153{
154 if (check_delattr(v) < 0)
155 return -1;
Skip Montanaro860fc0b2003-04-12 18:57:52 +0000156 if (!PyString_Check(v)
157#ifdef Py_USING_UNICODE
158&& !PyUnicode_Check(v)
159#endif
160) {
Skip Montanarob4a04172003-03-20 23:29:12 +0000161 PyErr_BadArgument();
162 return -1;
163 }
164 Py_XDECREF(*str);
165 Py_INCREF(v);
166 *str = v;
167 return 0;
168}
169
170static PyObject *
171get_nullchar_as_None(char c)
172{
173 if (c == '\0') {
174 Py_INCREF(Py_None);
175 return Py_None;
176 }
177 else
178 return PyString_FromStringAndSize((char*)&c, 1);
179}
180
181static int
182set_None_as_nullchar(char * addr, PyObject *v)
183{
184 if (check_delattr(v) < 0)
185 return -1;
186 if (v == Py_None)
187 *addr = '\0';
188 else if (!PyString_Check(v) || PyString_Size(v) != 1) {
189 PyErr_BadArgument();
190 return -1;
191 }
Skip Montanaro577c7a72003-04-12 19:17:14 +0000192 else {
193 char *s = PyString_AsString(v);
194 if (s == NULL)
195 return -1;
196 *addr = s[0];
197 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000198 return 0;
199}
200
201static PyObject *
202Dialect_get_lineterminator(DialectObj *self)
203{
204 return get_string(self->lineterminator);
205}
206
207static int
208Dialect_set_lineterminator(DialectObj *self, PyObject *value)
209{
210 return set_string(&self->lineterminator, value);
211}
212
213static PyObject *
214Dialect_get_escapechar(DialectObj *self)
215{
216 return get_nullchar_as_None(self->escapechar);
217}
218
219static int
220Dialect_set_escapechar(DialectObj *self, PyObject *value)
221{
222 return set_None_as_nullchar(&self->escapechar, value);
223}
224
225static PyObject *
226Dialect_get_quoting(DialectObj *self)
227{
228 return PyInt_FromLong(self->quoting);
229}
230
231static int
232Dialect_set_quoting(DialectObj *self, PyObject *v)
233{
234 int quoting;
235 StyleDesc *qs = quote_styles;
236
237 if (check_delattr(v) < 0)
238 return -1;
239 if (!PyInt_Check(v)) {
240 PyErr_BadArgument();
241 return -1;
242 }
243 quoting = PyInt_AsLong(v);
244 for (qs = quote_styles; qs->name; qs++) {
245 if (qs->style == quoting) {
246 self->quoting = quoting;
247 return 0;
248 }
249 }
250 PyErr_BadArgument();
251 return -1;
252}
253
254static struct PyMethodDef Dialect_methods[] = {
255 { NULL, NULL }
256};
257
258#define D_OFF(x) offsetof(DialectObj, x)
259
260static struct PyMemberDef Dialect_memberlist[] = {
261 { "quotechar", T_CHAR, D_OFF(quotechar) },
262 { "delimiter", T_CHAR, D_OFF(delimiter) },
263 { "skipinitialspace", T_INT, D_OFF(skipinitialspace) },
264 { "doublequote", T_INT, D_OFF(doublequote) },
265 { "strict", T_INT, D_OFF(strict) },
266 { NULL }
267};
268
269static PyGetSetDef Dialect_getsetlist[] = {
270 { "escapechar", (getter)Dialect_get_escapechar,
271 (setter)Dialect_set_escapechar },
272 { "lineterminator", (getter)Dialect_get_lineterminator,
273 (setter)Dialect_set_lineterminator },
274 { "quoting", (getter)Dialect_get_quoting,
275 (setter)Dialect_set_quoting },
276 {NULL},
277};
278
279static void
280Dialect_dealloc(DialectObj *self)
281{
282 Py_XDECREF(self->lineterminator);
Skip Montanarob4a04172003-03-20 23:29:12 +0000283 self->ob_type->tp_free((PyObject *)self);
284}
285
286static int
287dialect_init(DialectObj * self, PyObject * args, PyObject * kwargs)
288{
289 PyObject *dialect = NULL, *name_obj, *value_obj;
290
291 self->quotechar = '"';
292 self->delimiter = ',';
293 self->escapechar = '\0';
294 self->skipinitialspace = 0;
295 Py_XDECREF(self->lineterminator);
296 self->lineterminator = PyString_FromString("\r\n");
297 if (self->lineterminator == NULL)
298 return -1;
299 self->quoting = QUOTE_MINIMAL;
300 self->doublequote = 1;
301 self->strict = 0;
302
Raymond Hettinger1761a7c2004-06-20 04:23:19 +0000303 if (!PyArg_UnpackTuple(args, "", 0, 1, &dialect))
Skip Montanarob4a04172003-03-20 23:29:12 +0000304 return -1;
305 Py_XINCREF(dialect);
306 if (kwargs != NULL) {
307 PyObject * key = PyString_FromString("dialect");
308 PyObject * d;
309
310 d = PyDict_GetItem(kwargs, key);
311 if (d) {
312 Py_INCREF(d);
313 Py_XDECREF(dialect);
314 PyDict_DelItem(kwargs, key);
315 dialect = d;
316 }
317 Py_DECREF(key);
318 }
319 if (dialect != NULL) {
320 int i;
321 PyObject * dir_list;
322
323 /* If dialect is a string, look it up in our registry */
Skip Montanaro860fc0b2003-04-12 18:57:52 +0000324 if (PyString_Check(dialect)
325#ifdef Py_USING_UNICODE
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000326 || PyUnicode_Check(dialect)
Skip Montanaro860fc0b2003-04-12 18:57:52 +0000327#endif
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000328 ) {
Skip Montanarob4a04172003-03-20 23:29:12 +0000329 PyObject * new_dia;
330 new_dia = get_dialect_from_registry(dialect);
331 Py_DECREF(dialect);
332 if (new_dia == NULL)
333 return -1;
334 dialect = new_dia;
335 }
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000336 /* A class rather than an instance? Instantiate */
Skip Montanarob4a04172003-03-20 23:29:12 +0000337 if (PyObject_TypeCheck(dialect, &PyClass_Type)) {
338 PyObject * new_dia;
339 new_dia = PyObject_CallFunction(dialect, "");
340 Py_DECREF(dialect);
341 if (new_dia == NULL)
342 return -1;
343 dialect = new_dia;
344 }
345 /* Make sure we finally have an instance */
346 if (!PyInstance_Check(dialect) ||
347 (dir_list = PyObject_Dir(dialect)) == NULL) {
348 PyErr_SetString(PyExc_TypeError,
349 "dialect must be an instance");
350 Py_DECREF(dialect);
351 return -1;
352 }
353 /* And extract the attributes */
354 for (i = 0; i < PyList_GET_SIZE(dir_list); ++i) {
Tim Peters38fc8372003-04-13 03:25:15 +0000355 char *s;
Skip Montanarob4a04172003-03-20 23:29:12 +0000356 name_obj = PyList_GET_ITEM(dir_list, i);
Tim Peters38fc8372003-04-13 03:25:15 +0000357 s = PyString_AsString(name_obj);
Skip Montanaro577c7a72003-04-12 19:17:14 +0000358 if (s == NULL)
359 return -1;
360 if (s[0] == '_')
Skip Montanarob4a04172003-03-20 23:29:12 +0000361 continue;
362 value_obj = PyObject_GetAttr(dialect, name_obj);
363 if (value_obj) {
364 if (PyObject_SetAttr((PyObject *)self,
365 name_obj, value_obj)) {
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000366 Py_DECREF(value_obj);
Skip Montanarob4a04172003-03-20 23:29:12 +0000367 Py_DECREF(dir_list);
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000368 Py_DECREF(dialect);
Skip Montanarob4a04172003-03-20 23:29:12 +0000369 return -1;
370 }
371 Py_DECREF(value_obj);
372 }
373 }
374 Py_DECREF(dir_list);
375 Py_DECREF(dialect);
376 }
377 if (kwargs != NULL) {
378 int pos = 0;
379
380 while (PyDict_Next(kwargs, &pos, &name_obj, &value_obj)) {
381 if (PyObject_SetAttr((PyObject *)self,
382 name_obj, value_obj))
383 return -1;
384 }
385 }
386 return 0;
387}
388
389static PyObject *
390dialect_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
391{
392 DialectObj *self;
393 self = (DialectObj *)type->tp_alloc(type, 0);
394 if (self != NULL) {
395 self->lineterminator = NULL;
396 }
397 return (PyObject *)self;
398}
399
400
401PyDoc_STRVAR(Dialect_Type_doc,
402"CSV dialect\n"
403"\n"
404"The Dialect type records CSV parsing and generation options.\n");
405
406static PyTypeObject Dialect_Type = {
407 PyObject_HEAD_INIT(NULL)
408 0, /* ob_size */
409 "_csv.Dialect", /* tp_name */
410 sizeof(DialectObj), /* tp_basicsize */
411 0, /* tp_itemsize */
412 /* methods */
413 (destructor)Dialect_dealloc, /* tp_dealloc */
414 (printfunc)0, /* tp_print */
415 (getattrfunc)0, /* tp_getattr */
416 (setattrfunc)0, /* tp_setattr */
417 (cmpfunc)0, /* tp_compare */
418 (reprfunc)0, /* tp_repr */
419 0, /* tp_as_number */
420 0, /* tp_as_sequence */
421 0, /* tp_as_mapping */
422 (hashfunc)0, /* tp_hash */
423 (ternaryfunc)0, /* tp_call */
424 (reprfunc)0, /* tp_str */
425 0, /* tp_getattro */
426 0, /* tp_setattro */
427 0, /* tp_as_buffer */
428 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
429 Dialect_Type_doc, /* tp_doc */
430 0, /* tp_traverse */
431 0, /* tp_clear */
432 0, /* tp_richcompare */
433 0, /* tp_weaklistoffset */
434 0, /* tp_iter */
435 0, /* tp_iternext */
436 Dialect_methods, /* tp_methods */
437 Dialect_memberlist, /* tp_members */
438 Dialect_getsetlist, /* tp_getset */
439 0, /* tp_base */
440 0, /* tp_dict */
441 0, /* tp_descr_get */
442 0, /* tp_descr_set */
443 0, /* tp_dictoffset */
444 (initproc)dialect_init, /* tp_init */
445 PyType_GenericAlloc, /* tp_alloc */
446 dialect_new, /* tp_new */
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000447 0, /* tp_free */
Skip Montanarob4a04172003-03-20 23:29:12 +0000448};
449
450static void
451parse_save_field(ReaderObj *self)
452{
453 PyObject *field;
454
455 field = PyString_FromStringAndSize(self->field, self->field_len);
456 if (field != NULL) {
457 PyList_Append(self->fields, field);
458 Py_XDECREF(field);
459 }
460 self->field_len = 0;
461}
462
463static int
464parse_grow_buff(ReaderObj *self)
465{
466 if (self->field_size == 0) {
467 self->field_size = 4096;
Andrew McNamaradcfb38c2003-06-09 05:59:23 +0000468 if (self->field != NULL)
469 PyMem_Free(self->field);
Skip Montanarob4a04172003-03-20 23:29:12 +0000470 self->field = PyMem_Malloc(self->field_size);
471 }
472 else {
473 self->field_size *= 2;
474 self->field = PyMem_Realloc(self->field, self->field_size);
475 }
476 if (self->field == NULL) {
477 PyErr_NoMemory();
478 return 0;
479 }
480 return 1;
481}
482
483static void
484parse_add_char(ReaderObj *self, char c)
485{
486 if (self->field_len == self->field_size && !parse_grow_buff(self))
487 return;
488 self->field[self->field_len++] = c;
489}
490
491static void
492parse_process_char(ReaderObj *self, char c)
493{
494 DialectObj *dialect = self->dialect;
495
496 switch (self->state) {
497 case START_RECORD:
498 /* start of record */
499 if (c == '\n')
500 /* empty line - return [] */
501 break;
502 /* normal character - handle as START_FIELD */
503 self->state = START_FIELD;
504 /* fallthru */
505 case START_FIELD:
506 /* expecting field */
507 if (c == '\n') {
508 /* save empty field - return [fields] */
509 parse_save_field(self);
510 self->state = START_RECORD;
511 }
512 else if (c == dialect->quotechar) {
513 /* start quoted field */
514 self->state = IN_QUOTED_FIELD;
515 }
516 else if (c == dialect->escapechar) {
517 /* possible escaped character */
518 self->state = ESCAPED_CHAR;
519 }
520 else if (c == ' ' && dialect->skipinitialspace)
521 /* ignore space at start of field */
522 ;
523 else if (c == dialect->delimiter) {
524 /* save empty field */
525 parse_save_field(self);
526 }
527 else {
528 /* begin new unquoted field */
529 parse_add_char(self, c);
530 self->state = IN_FIELD;
531 }
532 break;
533
534 case ESCAPED_CHAR:
535 if (c != dialect->escapechar &&
536 c != dialect->delimiter &&
537 c != dialect->quotechar)
538 parse_add_char(self, dialect->escapechar);
539 parse_add_char(self, c);
540 self->state = IN_FIELD;
541 break;
542
543 case IN_FIELD:
544 /* in unquoted field */
545 if (c == '\n') {
546 /* end of line - return [fields] */
547 parse_save_field(self);
548 self->state = START_RECORD;
549 }
550 else if (c == dialect->escapechar) {
551 /* possible escaped character */
552 self->state = ESCAPED_CHAR;
553 }
554 else if (c == dialect->delimiter) {
555 /* save field - wait for new field */
556 parse_save_field(self);
557 self->state = START_FIELD;
558 }
559 else {
560 /* normal character - save in field */
561 parse_add_char(self, c);
562 }
563 break;
564
565 case IN_QUOTED_FIELD:
566 /* in quoted field */
567 if (c == '\n') {
568 /* end of line - save '\n' in field */
569 parse_add_char(self, '\n');
570 }
571 else if (c == dialect->escapechar) {
572 /* Possible escape character */
573 self->state = ESCAPE_IN_QUOTED_FIELD;
574 }
575 else if (c == dialect->quotechar) {
576 if (dialect->doublequote) {
577 /* doublequote; " represented by "" */
578 self->state = QUOTE_IN_QUOTED_FIELD;
579 }
580 else {
581 /* end of quote part of field */
582 self->state = IN_FIELD;
583 }
584 }
585 else {
586 /* normal character - save in field */
587 parse_add_char(self, c);
588 }
589 break;
590
591 case ESCAPE_IN_QUOTED_FIELD:
592 if (c != dialect->escapechar &&
593 c != dialect->delimiter &&
594 c != dialect->quotechar)
595 parse_add_char(self, dialect->escapechar);
596 parse_add_char(self, c);
597 self->state = IN_QUOTED_FIELD;
598 break;
599
600 case QUOTE_IN_QUOTED_FIELD:
601 /* doublequote - seen a quote in an quoted field */
602 if (dialect->quoting != QUOTE_NONE &&
603 c == dialect->quotechar) {
604 /* save "" as " */
605 parse_add_char(self, c);
606 self->state = IN_QUOTED_FIELD;
607 }
608 else if (c == dialect->delimiter) {
609 /* save field - wait for new field */
610 parse_save_field(self);
611 self->state = START_FIELD;
612 }
613 else if (c == '\n') {
614 /* end of line - return [fields] */
615 parse_save_field(self);
616 self->state = START_RECORD;
617 }
618 else if (!dialect->strict) {
619 parse_add_char(self, c);
620 self->state = IN_FIELD;
621 }
622 else {
623 /* illegal */
624 self->had_parse_error = 1;
625 PyErr_Format(error_obj, "%c expected after %c",
626 dialect->delimiter,
627 dialect->quotechar);
628 }
629 break;
630
631 }
632}
633
634/*
635 * READER
636 */
637#define R_OFF(x) offsetof(ReaderObj, x)
638
639static struct PyMemberDef Reader_memberlist[] = {
640 { "dialect", T_OBJECT, R_OFF(dialect), RO },
641 { NULL }
642};
643
644static PyObject *
Skip Montanarob4a04172003-03-20 23:29:12 +0000645Reader_iternext(ReaderObj *self)
646{
647 PyObject *lineobj;
648 PyObject *fields;
649 char *line;
650
651 do {
652 lineobj = PyIter_Next(self->input_iter);
653 if (lineobj == NULL) {
654 /* End of input OR exception */
655 if (!PyErr_Occurred() && self->field_len != 0)
656 return PyErr_Format(error_obj,
657 "newline inside string");
658 return NULL;
659 }
660
661 if (self->had_parse_error) {
662 if (self->fields) {
663 Py_XDECREF(self->fields);
664 }
665 self->fields = PyList_New(0);
666 self->field_len = 0;
667 self->state = START_RECORD;
668 self->had_parse_error = 0;
669 }
670 line = PyString_AsString(lineobj);
671
672 if (line == NULL) {
673 Py_DECREF(lineobj);
674 return NULL;
675 }
Tim Petersef4b7ed2003-03-21 01:35:28 +0000676 if (strlen(line) < (size_t)PyString_GET_SIZE(lineobj)) {
Skip Montanarob4a04172003-03-20 23:29:12 +0000677 self->had_parse_error = 1;
678 Py_DECREF(lineobj);
679 return PyErr_Format(error_obj,
680 "string with NUL bytes");
681 }
682
683 /* Process line of text - send '\n' to processing code to
684 represent end of line. End of line which is not at end of
685 string is an error. */
686 while (*line) {
687 char c;
688
689 c = *line++;
690 if (c == '\r') {
691 c = *line++;
692 if (c == '\0')
693 /* macintosh end of line */
694 break;
695 if (c == '\n') {
696 c = *line++;
697 if (c == '\0')
698 /* DOS end of line */
699 break;
700 }
701 self->had_parse_error = 1;
702 Py_DECREF(lineobj);
703 return PyErr_Format(error_obj,
704 "newline inside string");
705 }
706 if (c == '\n') {
707 c = *line++;
708 if (c == '\0')
709 /* unix end of line */
710 break;
711 self->had_parse_error = 1;
712 Py_DECREF(lineobj);
713 return PyErr_Format(error_obj,
714 "newline inside string");
715 }
716 parse_process_char(self, c);
717 if (PyErr_Occurred()) {
718 Py_DECREF(lineobj);
719 return NULL;
720 }
721 }
722 parse_process_char(self, '\n');
723 Py_DECREF(lineobj);
724 } while (self->state != START_RECORD);
725
726 fields = self->fields;
727 self->fields = PyList_New(0);
728 return fields;
729}
730
731static void
732Reader_dealloc(ReaderObj *self)
733{
734 Py_XDECREF(self->dialect);
735 Py_XDECREF(self->input_iter);
736 Py_XDECREF(self->fields);
Andrew McNamaradcfb38c2003-06-09 05:59:23 +0000737 if (self->field != NULL)
738 PyMem_Free(self->field);
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000739 PyObject_GC_Del(self);
740}
741
742static int
743Reader_traverse(ReaderObj *self, visitproc visit, void *arg)
744{
745 int err;
746#define VISIT(SLOT) \
747 if (SLOT) { \
748 err = visit((PyObject *)(SLOT), arg); \
749 if (err) \
750 return err; \
751 }
752 VISIT(self->dialect);
753 VISIT(self->input_iter);
754 VISIT(self->fields);
755 return 0;
756}
757
758static int
759Reader_clear(ReaderObj *self)
760{
761 Py_XDECREF(self->dialect);
762 Py_XDECREF(self->input_iter);
763 Py_XDECREF(self->fields);
764 self->dialect = NULL;
765 self->input_iter = NULL;
766 self->fields = NULL;
767 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000768}
769
770PyDoc_STRVAR(Reader_Type_doc,
771"CSV reader\n"
772"\n"
773"Reader objects are responsible for reading and parsing tabular data\n"
774"in CSV format.\n"
775);
776
777static struct PyMethodDef Reader_methods[] = {
778 { NULL, NULL }
779};
780
781static PyTypeObject Reader_Type = {
782 PyObject_HEAD_INIT(NULL)
783 0, /*ob_size*/
784 "_csv.reader", /*tp_name*/
785 sizeof(ReaderObj), /*tp_basicsize*/
786 0, /*tp_itemsize*/
787 /* methods */
788 (destructor)Reader_dealloc, /*tp_dealloc*/
789 (printfunc)0, /*tp_print*/
790 (getattrfunc)0, /*tp_getattr*/
791 (setattrfunc)0, /*tp_setattr*/
792 (cmpfunc)0, /*tp_compare*/
793 (reprfunc)0, /*tp_repr*/
794 0, /*tp_as_number*/
795 0, /*tp_as_sequence*/
796 0, /*tp_as_mapping*/
797 (hashfunc)0, /*tp_hash*/
798 (ternaryfunc)0, /*tp_call*/
799 (reprfunc)0, /*tp_str*/
800 0, /*tp_getattro*/
801 0, /*tp_setattro*/
802 0, /*tp_as_buffer*/
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000803 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
804 Py_TPFLAGS_HAVE_GC, /*tp_flags*/
Skip Montanarob4a04172003-03-20 23:29:12 +0000805 Reader_Type_doc, /*tp_doc*/
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000806 (traverseproc)Reader_traverse, /*tp_traverse*/
807 (inquiry)Reader_clear, /*tp_clear*/
Skip Montanarob4a04172003-03-20 23:29:12 +0000808 0, /*tp_richcompare*/
809 0, /*tp_weaklistoffset*/
Andrew McNamara575a00b2005-01-06 02:25:41 +0000810 PyObject_SelfIter, /*tp_iter*/
Skip Montanarob4a04172003-03-20 23:29:12 +0000811 (getiterfunc)Reader_iternext, /*tp_iternext*/
812 Reader_methods, /*tp_methods*/
813 Reader_memberlist, /*tp_members*/
814 0, /*tp_getset*/
815
816};
817
818static PyObject *
819csv_reader(PyObject *module, PyObject *args, PyObject *keyword_args)
820{
821 PyObject * iterator, * dialect = NULL, *ctor_args;
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000822 ReaderObj * self = PyObject_GC_New(ReaderObj, &Reader_Type);
Skip Montanarob4a04172003-03-20 23:29:12 +0000823
824 if (!self)
825 return NULL;
826
827 self->dialect = NULL;
828 self->input_iter = self->fields = NULL;
829
830 self->fields = NULL;
831 self->input_iter = NULL;
832 self->had_parse_error = 0;
833 self->field = NULL;
834 self->field_size = 0;
835 self->field_len = 0;
836 self->state = START_RECORD;
837
Raymond Hettinger1761a7c2004-06-20 04:23:19 +0000838 if (!PyArg_UnpackTuple(args, "", 1, 2, &iterator, &dialect)) {
Skip Montanarob4a04172003-03-20 23:29:12 +0000839 Py_DECREF(self);
840 return NULL;
841 }
842 self->input_iter = PyObject_GetIter(iterator);
843 if (self->input_iter == NULL) {
844 PyErr_SetString(PyExc_TypeError,
845 "argument 1 must be an iterator");
846 Py_DECREF(self);
847 return NULL;
848 }
849 ctor_args = Py_BuildValue(dialect ? "(O)" : "()", dialect);
850 if (ctor_args == NULL) {
851 Py_DECREF(self);
852 return NULL;
853 }
854 self->dialect = (DialectObj *)PyObject_Call((PyObject *)&Dialect_Type,
855 ctor_args, keyword_args);
856 Py_DECREF(ctor_args);
857 if (self->dialect == NULL) {
858 Py_DECREF(self);
859 return NULL;
860 }
861 self->fields = PyList_New(0);
862 if (self->fields == NULL) {
863 Py_DECREF(self);
864 return NULL;
865 }
866
867 return (PyObject *)self;
868}
869
870/*
871 * WRITER
872 */
873/* ---------------------------------------------------------------- */
874static void
875join_reset(WriterObj *self)
876{
877 self->rec_len = 0;
878 self->num_fields = 0;
879}
880
881#define MEM_INCR 32768
882
883/* Calculate new record length or append field to record. Return new
884 * record length.
885 */
886static int
887join_append_data(WriterObj *self, char *field, int quote_empty,
888 int *quoted, int copy_phase)
889{
890 DialectObj *dialect = self->dialect;
891 int i, rec_len;
892
893 rec_len = self->rec_len;
894
895 /* If this is not the first field we need a field separator.
896 */
897 if (self->num_fields > 0) {
898 if (copy_phase)
899 self->rec[rec_len] = dialect->delimiter;
900 rec_len++;
901 }
902 /* Handle preceding quote.
903 */
904 switch (dialect->quoting) {
905 case QUOTE_ALL:
906 *quoted = 1;
907 if (copy_phase)
908 self->rec[rec_len] = dialect->quotechar;
909 rec_len++;
910 break;
911 case QUOTE_MINIMAL:
912 case QUOTE_NONNUMERIC:
913 /* We only know about quoted in the copy phase.
914 */
915 if (copy_phase && *quoted) {
916 self->rec[rec_len] = dialect->quotechar;
917 rec_len++;
918 }
919 break;
920 case QUOTE_NONE:
921 break;
922 }
923 /* Copy/count field data.
924 */
925 for (i = 0;; i++) {
926 char c = field[i];
927
928 if (c == '\0')
929 break;
930 /* If in doublequote mode we escape quote chars with a
931 * quote.
932 */
933 if (dialect->quoting != QUOTE_NONE &&
934 c == dialect->quotechar && dialect->doublequote) {
935 if (copy_phase)
936 self->rec[rec_len] = dialect->quotechar;
937 *quoted = 1;
938 rec_len++;
939 }
940
941 /* Some special characters need to be escaped. If we have a
942 * quote character switch to quoted field instead of escaping
943 * individual characters.
944 */
945 if (!*quoted
946 && (c == dialect->delimiter ||
947 c == dialect->escapechar ||
948 c == '\n' || c == '\r')) {
949 if (dialect->quoting != QUOTE_NONE)
950 *quoted = 1;
951 else if (dialect->escapechar) {
952 if (copy_phase)
953 self->rec[rec_len] = dialect->escapechar;
954 rec_len++;
955 }
956 else {
957 PyErr_Format(error_obj,
958 "delimiter must be quoted or escaped");
959 return -1;
960 }
961 }
962 /* Copy field character into record buffer.
963 */
964 if (copy_phase)
965 self->rec[rec_len] = c;
966 rec_len++;
967 }
968
969 /* If field is empty check if it needs to be quoted.
970 */
971 if (i == 0 && quote_empty) {
972 if (dialect->quoting == QUOTE_NONE) {
973 PyErr_Format(error_obj,
974 "single empty field record must be quoted");
975 return -1;
976 } else
977 *quoted = 1;
978 }
979
980 /* Handle final quote character on field.
981 */
982 if (*quoted) {
983 if (copy_phase)
984 self->rec[rec_len] = dialect->quotechar;
985 else
986 /* Didn't know about leading quote until we found it
987 * necessary in field data - compensate for it now.
988 */
989 rec_len++;
990 rec_len++;
991 }
992
993 return rec_len;
994}
995
996static int
997join_check_rec_size(WriterObj *self, int rec_len)
998{
999 if (rec_len > self->rec_size) {
1000 if (self->rec_size == 0) {
1001 self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR;
Andrew McNamaradcfb38c2003-06-09 05:59:23 +00001002 if (self->rec != NULL)
1003 PyMem_Free(self->rec);
Skip Montanarob4a04172003-03-20 23:29:12 +00001004 self->rec = PyMem_Malloc(self->rec_size);
1005 }
1006 else {
1007 char *old_rec = self->rec;
1008
1009 self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR;
1010 self->rec = PyMem_Realloc(self->rec, self->rec_size);
1011 if (self->rec == NULL)
1012 PyMem_Free(old_rec);
1013 }
1014 if (self->rec == NULL) {
1015 PyErr_NoMemory();
1016 return 0;
1017 }
1018 }
1019 return 1;
1020}
1021
1022static int
1023join_append(WriterObj *self, char *field, int *quoted, int quote_empty)
1024{
1025 int rec_len;
1026
1027 rec_len = join_append_data(self, field, quote_empty, quoted, 0);
1028 if (rec_len < 0)
1029 return 0;
1030
1031 /* grow record buffer if necessary */
1032 if (!join_check_rec_size(self, rec_len))
1033 return 0;
1034
1035 self->rec_len = join_append_data(self, field, quote_empty, quoted, 1);
1036 self->num_fields++;
1037
1038 return 1;
1039}
1040
1041static int
1042join_append_lineterminator(WriterObj *self)
1043{
1044 int terminator_len;
1045
1046 terminator_len = PyString_Size(self->dialect->lineterminator);
1047
1048 /* grow record buffer if necessary */
1049 if (!join_check_rec_size(self, self->rec_len + terminator_len))
1050 return 0;
1051
1052 memmove(self->rec + self->rec_len,
Skip Montanaro577c7a72003-04-12 19:17:14 +00001053 /* should not be NULL */
Skip Montanarob4a04172003-03-20 23:29:12 +00001054 PyString_AsString(self->dialect->lineterminator),
1055 terminator_len);
1056 self->rec_len += terminator_len;
1057
1058 return 1;
1059}
1060
1061PyDoc_STRVAR(csv_writerow_doc,
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001062"writerow(sequence)\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001063"\n"
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001064"Construct and write a CSV record from a sequence of fields. Non-string\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001065"elements will be converted to string.");
1066
1067static PyObject *
1068csv_writerow(WriterObj *self, PyObject *seq)
1069{
1070 DialectObj *dialect = self->dialect;
1071 int len, i;
1072
1073 if (!PySequence_Check(seq))
1074 return PyErr_Format(error_obj, "sequence expected");
1075
1076 len = PySequence_Length(seq);
1077 if (len < 0)
1078 return NULL;
1079
1080 /* Join all fields in internal buffer.
1081 */
1082 join_reset(self);
1083 for (i = 0; i < len; i++) {
1084 PyObject *field;
1085 int append_ok;
1086 int quoted;
1087
1088 field = PySequence_GetItem(seq, i);
1089 if (field == NULL)
1090 return NULL;
1091
1092 quoted = 0;
1093 if (dialect->quoting == QUOTE_NONNUMERIC) {
1094 PyObject *num;
1095
1096 num = PyNumber_Float(field);
1097 if (num == NULL) {
1098 quoted = 1;
1099 PyErr_Clear();
1100 }
1101 else {
1102 Py_DECREF(num);
1103 }
1104 }
1105
1106 if (PyString_Check(field)) {
Skip Montanaro577c7a72003-04-12 19:17:14 +00001107 append_ok = join_append(self,
1108 PyString_AS_STRING(field),
Skip Montanarob4a04172003-03-20 23:29:12 +00001109 &quoted, len == 1);
1110 Py_DECREF(field);
1111 }
1112 else if (field == Py_None) {
1113 append_ok = join_append(self, "", &quoted, len == 1);
1114 Py_DECREF(field);
1115 }
1116 else {
1117 PyObject *str;
1118
1119 str = PyObject_Str(field);
1120 Py_DECREF(field);
1121 if (str == NULL)
1122 return NULL;
1123
Skip Montanaro577c7a72003-04-12 19:17:14 +00001124 append_ok = join_append(self, PyString_AS_STRING(str),
Skip Montanarob4a04172003-03-20 23:29:12 +00001125 &quoted, len == 1);
1126 Py_DECREF(str);
1127 }
1128 if (!append_ok)
1129 return NULL;
1130 }
1131
1132 /* Add line terminator.
1133 */
1134 if (!join_append_lineterminator(self))
1135 return 0;
1136
1137 return PyObject_CallFunction(self->writeline,
1138 "(s#)", self->rec, self->rec_len);
1139}
1140
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001141PyDoc_STRVAR(csv_writerows_doc,
1142"writerows(sequence of sequences)\n"
1143"\n"
1144"Construct and write a series of sequences to a csv file. Non-string\n"
1145"elements will be converted to string.");
1146
Skip Montanarob4a04172003-03-20 23:29:12 +00001147static PyObject *
1148csv_writerows(WriterObj *self, PyObject *seqseq)
1149{
1150 PyObject *row_iter, *row_obj, *result;
1151
1152 row_iter = PyObject_GetIter(seqseq);
1153 if (row_iter == NULL) {
1154 PyErr_SetString(PyExc_TypeError,
Skip Montanaro98f16e02003-04-11 23:10:13 +00001155 "writerows() argument must be iterable");
Skip Montanarob4a04172003-03-20 23:29:12 +00001156 return NULL;
1157 }
1158 while ((row_obj = PyIter_Next(row_iter))) {
1159 result = csv_writerow(self, row_obj);
1160 Py_DECREF(row_obj);
1161 if (!result) {
1162 Py_DECREF(row_iter);
1163 return NULL;
1164 }
1165 else
1166 Py_DECREF(result);
1167 }
1168 Py_DECREF(row_iter);
1169 if (PyErr_Occurred())
1170 return NULL;
1171 Py_INCREF(Py_None);
1172 return Py_None;
1173}
1174
1175static struct PyMethodDef Writer_methods[] = {
1176 { "writerow", (PyCFunction)csv_writerow, METH_O, csv_writerow_doc},
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001177 { "writerows", (PyCFunction)csv_writerows, METH_O, csv_writerows_doc},
Skip Montanarob4a04172003-03-20 23:29:12 +00001178 { NULL, NULL }
1179};
1180
1181#define W_OFF(x) offsetof(WriterObj, x)
1182
1183static struct PyMemberDef Writer_memberlist[] = {
1184 { "dialect", T_OBJECT, W_OFF(dialect), RO },
1185 { NULL }
1186};
1187
1188static void
1189Writer_dealloc(WriterObj *self)
1190{
1191 Py_XDECREF(self->dialect);
1192 Py_XDECREF(self->writeline);
Andrew McNamaradcfb38c2003-06-09 05:59:23 +00001193 if (self->rec != NULL)
1194 PyMem_Free(self->rec);
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001195 PyObject_GC_Del(self);
1196}
1197
1198static int
1199Writer_traverse(WriterObj *self, visitproc visit, void *arg)
1200{
1201 int err;
1202#define VISIT(SLOT) \
1203 if (SLOT) { \
1204 err = visit((PyObject *)(SLOT), arg); \
1205 if (err) \
1206 return err; \
1207 }
1208 VISIT(self->dialect);
1209 VISIT(self->writeline);
1210 return 0;
1211}
1212
1213static int
1214Writer_clear(WriterObj *self)
1215{
1216 Py_XDECREF(self->dialect);
1217 Py_XDECREF(self->writeline);
1218 self->dialect = NULL;
1219 self->writeline = NULL;
1220 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001221}
1222
1223PyDoc_STRVAR(Writer_Type_doc,
1224"CSV writer\n"
1225"\n"
1226"Writer objects are responsible for generating tabular data\n"
1227"in CSV format from sequence input.\n"
1228);
1229
1230static PyTypeObject Writer_Type = {
1231 PyObject_HEAD_INIT(NULL)
1232 0, /*ob_size*/
1233 "_csv.writer", /*tp_name*/
1234 sizeof(WriterObj), /*tp_basicsize*/
1235 0, /*tp_itemsize*/
1236 /* methods */
1237 (destructor)Writer_dealloc, /*tp_dealloc*/
1238 (printfunc)0, /*tp_print*/
1239 (getattrfunc)0, /*tp_getattr*/
1240 (setattrfunc)0, /*tp_setattr*/
1241 (cmpfunc)0, /*tp_compare*/
1242 (reprfunc)0, /*tp_repr*/
1243 0, /*tp_as_number*/
1244 0, /*tp_as_sequence*/
1245 0, /*tp_as_mapping*/
1246 (hashfunc)0, /*tp_hash*/
1247 (ternaryfunc)0, /*tp_call*/
1248 (reprfunc)0, /*tp_str*/
1249 0, /*tp_getattro*/
1250 0, /*tp_setattro*/
1251 0, /*tp_as_buffer*/
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001252 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
1253 Py_TPFLAGS_HAVE_GC, /*tp_flags*/
Skip Montanarob4a04172003-03-20 23:29:12 +00001254 Writer_Type_doc,
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001255 (traverseproc)Writer_traverse, /*tp_traverse*/
1256 (inquiry)Writer_clear, /*tp_clear*/
Skip Montanarob4a04172003-03-20 23:29:12 +00001257 0, /*tp_richcompare*/
1258 0, /*tp_weaklistoffset*/
1259 (getiterfunc)0, /*tp_iter*/
1260 (getiterfunc)0, /*tp_iternext*/
1261 Writer_methods, /*tp_methods*/
1262 Writer_memberlist, /*tp_members*/
1263 0, /*tp_getset*/
1264};
1265
1266static PyObject *
1267csv_writer(PyObject *module, PyObject *args, PyObject *keyword_args)
1268{
1269 PyObject * output_file, * dialect = NULL, *ctor_args;
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001270 WriterObj * self = PyObject_GC_New(WriterObj, &Writer_Type);
Skip Montanarob4a04172003-03-20 23:29:12 +00001271
1272 if (!self)
1273 return NULL;
1274
1275 self->dialect = NULL;
1276 self->writeline = NULL;
1277
1278 self->rec = NULL;
1279 self->rec_size = 0;
1280 self->rec_len = 0;
1281 self->num_fields = 0;
1282
Raymond Hettinger1761a7c2004-06-20 04:23:19 +00001283 if (!PyArg_UnpackTuple(args, "", 1, 2, &output_file, &dialect)) {
Skip Montanarob4a04172003-03-20 23:29:12 +00001284 Py_DECREF(self);
1285 return NULL;
1286 }
1287 self->writeline = PyObject_GetAttrString(output_file, "write");
1288 if (self->writeline == NULL || !PyCallable_Check(self->writeline)) {
1289 PyErr_SetString(PyExc_TypeError,
1290 "argument 1 must be an instance with a write method");
1291 Py_DECREF(self);
1292 return NULL;
1293 }
1294 ctor_args = Py_BuildValue(dialect ? "(O)" : "()", dialect);
1295 if (ctor_args == NULL) {
1296 Py_DECREF(self);
1297 return NULL;
1298 }
1299 self->dialect = (DialectObj *)PyObject_Call((PyObject *)&Dialect_Type,
1300 ctor_args, keyword_args);
1301 Py_DECREF(ctor_args);
1302 if (self->dialect == NULL) {
1303 Py_DECREF(self);
1304 return NULL;
1305 }
1306 return (PyObject *)self;
1307}
1308
1309/*
1310 * DIALECT REGISTRY
1311 */
1312static PyObject *
1313csv_list_dialects(PyObject *module, PyObject *args)
1314{
1315 return PyDict_Keys(dialects);
1316}
1317
1318static PyObject *
1319csv_register_dialect(PyObject *module, PyObject *args)
1320{
1321 PyObject *name_obj, *dialect_obj;
1322
Raymond Hettinger1761a7c2004-06-20 04:23:19 +00001323 if (!PyArg_UnpackTuple(args, "", 2, 2, &name_obj, &dialect_obj))
Skip Montanarob4a04172003-03-20 23:29:12 +00001324 return NULL;
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001325 if (!PyString_Check(name_obj)
1326#ifdef Py_USING_UNICODE
1327&& !PyUnicode_Check(name_obj)
1328#endif
1329) {
Skip Montanarob4a04172003-03-20 23:29:12 +00001330 PyErr_SetString(PyExc_TypeError,
1331 "dialect name must be a string or unicode");
1332 return NULL;
1333 }
1334 Py_INCREF(dialect_obj);
1335 /* A class rather than an instance? Instanciate */
1336 if (PyObject_TypeCheck(dialect_obj, &PyClass_Type)) {
1337 PyObject * new_dia;
1338 new_dia = PyObject_CallFunction(dialect_obj, "");
1339 Py_DECREF(dialect_obj);
1340 if (new_dia == NULL)
1341 return NULL;
1342 dialect_obj = new_dia;
1343 }
1344 /* Make sure we finally have an instance */
1345 if (!PyInstance_Check(dialect_obj)) {
1346 PyErr_SetString(PyExc_TypeError, "dialect must be an instance");
1347 Py_DECREF(dialect_obj);
1348 return NULL;
1349 }
1350 if (PyObject_SetAttrString(dialect_obj, "_name", name_obj) < 0) {
1351 Py_DECREF(dialect_obj);
1352 return NULL;
1353 }
1354 if (PyDict_SetItem(dialects, name_obj, dialect_obj) < 0) {
1355 Py_DECREF(dialect_obj);
1356 return NULL;
1357 }
1358 Py_DECREF(dialect_obj);
1359 Py_INCREF(Py_None);
1360 return Py_None;
1361}
1362
1363static PyObject *
Skip Montanaro577c7a72003-04-12 19:17:14 +00001364csv_unregister_dialect(PyObject *module, PyObject *name_obj)
Skip Montanarob4a04172003-03-20 23:29:12 +00001365{
Skip Montanarob4a04172003-03-20 23:29:12 +00001366 if (PyDict_DelItem(dialects, name_obj) < 0)
1367 return PyErr_Format(error_obj, "unknown dialect");
1368 Py_INCREF(Py_None);
1369 return Py_None;
1370}
1371
1372static PyObject *
Skip Montanaro577c7a72003-04-12 19:17:14 +00001373csv_get_dialect(PyObject *module, PyObject *name_obj)
Skip Montanarob4a04172003-03-20 23:29:12 +00001374{
Skip Montanarob4a04172003-03-20 23:29:12 +00001375 return get_dialect_from_registry(name_obj);
1376}
1377
1378/*
1379 * MODULE
1380 */
1381
1382PyDoc_STRVAR(csv_module_doc,
1383"CSV parsing and writing.\n"
1384"\n"
1385"This module provides classes that assist in the reading and writing\n"
1386"of Comma Separated Value (CSV) files, and implements the interface\n"
1387"described by PEP 305. Although many CSV files are simple to parse,\n"
1388"the format is not formally defined by a stable specification and\n"
1389"is subtle enough that parsing lines of a CSV file with something\n"
1390"like line.split(\",\") is bound to fail. The module supports three\n"
1391"basic APIs: reading, writing, and registration of dialects.\n"
1392"\n"
1393"\n"
1394"DIALECT REGISTRATION:\n"
1395"\n"
1396"Readers and writers support a dialect argument, which is a convenient\n"
1397"handle on a group of settings. When the dialect argument is a string,\n"
1398"it identifies one of the dialects previously registered with the module.\n"
1399"If it is a class or instance, the attributes of the argument are used as\n"
1400"the settings for the reader or writer:\n"
1401"\n"
1402" class excel:\n"
1403" delimiter = ','\n"
1404" quotechar = '\"'\n"
1405" escapechar = None\n"
1406" doublequote = True\n"
1407" skipinitialspace = False\n"
Johannes Gijsbers8d3b9dd2004-08-15 12:23:10 +00001408" lineterminator = '\\r\\n'\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001409" quoting = QUOTE_MINIMAL\n"
1410"\n"
1411"SETTINGS:\n"
1412"\n"
1413" * quotechar - specifies a one-character string to use as the \n"
1414" quoting character. It defaults to '\"'.\n"
1415" * delimiter - specifies a one-character string to use as the \n"
1416" field separator. It defaults to ','.\n"
1417" * skipinitialspace - specifies how to interpret whitespace which\n"
1418" immediately follows a delimiter. It defaults to False, which\n"
1419" means that whitespace immediately following a delimiter is part\n"
1420" of the following field.\n"
1421" * lineterminator - specifies the character sequence which should \n"
1422" terminate rows.\n"
1423" * quoting - controls when quotes should be generated by the writer.\n"
1424" It can take on any of the following module constants:\n"
1425"\n"
1426" csv.QUOTE_MINIMAL means only when required, for example, when a\n"
1427" field contains either the quotechar or the delimiter\n"
1428" csv.QUOTE_ALL means that quotes are always placed around fields.\n"
1429" csv.QUOTE_NONNUMERIC means that quotes are always placed around\n"
Skip Montanaro148eb6a2003-12-02 18:57:47 +00001430" fields which do not parse as integers or floating point\n"
1431" numbers.\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001432" csv.QUOTE_NONE means that quotes are never placed around fields.\n"
1433" * escapechar - specifies a one-character string used to escape \n"
1434" the delimiter when quoting is set to QUOTE_NONE.\n"
1435" * doublequote - controls the handling of quotes inside fields. When\n"
1436" True, two consecutive quotes are interpreted as one during read,\n"
1437" and when writing, each quote character embedded in the data is\n"
1438" written as two quotes\n");
1439
1440PyDoc_STRVAR(csv_reader_doc,
1441" csv_reader = reader(iterable [, dialect='excel']\n"
1442" [optional keyword args])\n"
1443" for row in csv_reader:\n"
1444" process(row)\n"
1445"\n"
1446"The \"iterable\" argument can be any object that returns a line\n"
1447"of input for each iteration, such as a file object or a list. The\n"
1448"optional \"dialect\" parameter is discussed below. The function\n"
1449"also accepts optional keyword arguments which override settings\n"
1450"provided by the dialect.\n"
1451"\n"
1452"The returned object is an iterator. Each iteration returns a row\n"
Johannes Gijsbers8d3b9dd2004-08-15 12:23:10 +00001453"of the CSV file (which can span multiple input lines):\n");
Skip Montanarob4a04172003-03-20 23:29:12 +00001454
1455PyDoc_STRVAR(csv_writer_doc,
1456" csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1457" [optional keyword args])\n"
1458" for row in csv_writer:\n"
1459" csv_writer.writerow(row)\n"
1460"\n"
1461" [or]\n"
1462"\n"
1463" csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1464" [optional keyword args])\n"
1465" csv_writer.writerows(rows)\n"
1466"\n"
1467"The \"fileobj\" argument can be any object that supports the file API.\n");
1468
1469PyDoc_STRVAR(csv_list_dialects_doc,
1470"Return a list of all know dialect names.\n"
1471" names = csv.list_dialects()");
1472
1473PyDoc_STRVAR(csv_get_dialect_doc,
1474"Return the dialect instance associated with name.\n"
1475" dialect = csv.get_dialect(name)");
1476
1477PyDoc_STRVAR(csv_register_dialect_doc,
1478"Create a mapping from a string name to a dialect class.\n"
1479" dialect = csv.register_dialect(name, dialect)");
1480
1481PyDoc_STRVAR(csv_unregister_dialect_doc,
1482"Delete the name/dialect mapping associated with a string name.\n"
1483" csv.unregister_dialect(name)");
1484
1485static struct PyMethodDef csv_methods[] = {
1486 { "reader", (PyCFunction)csv_reader,
1487 METH_VARARGS | METH_KEYWORDS, csv_reader_doc},
1488 { "writer", (PyCFunction)csv_writer,
1489 METH_VARARGS | METH_KEYWORDS, csv_writer_doc},
1490 { "list_dialects", (PyCFunction)csv_list_dialects,
1491 METH_NOARGS, csv_list_dialects_doc},
1492 { "register_dialect", (PyCFunction)csv_register_dialect,
1493 METH_VARARGS, csv_register_dialect_doc},
1494 { "unregister_dialect", (PyCFunction)csv_unregister_dialect,
Skip Montanaro577c7a72003-04-12 19:17:14 +00001495 METH_O, csv_unregister_dialect_doc},
Skip Montanarob4a04172003-03-20 23:29:12 +00001496 { "get_dialect", (PyCFunction)csv_get_dialect,
Skip Montanaro577c7a72003-04-12 19:17:14 +00001497 METH_O, csv_get_dialect_doc},
Skip Montanarob4a04172003-03-20 23:29:12 +00001498 { NULL, NULL }
1499};
1500
1501PyMODINIT_FUNC
1502init_csv(void)
1503{
1504 PyObject *module;
Skip Montanarob4a04172003-03-20 23:29:12 +00001505 StyleDesc *style;
1506
1507 if (PyType_Ready(&Dialect_Type) < 0)
1508 return;
1509
1510 if (PyType_Ready(&Reader_Type) < 0)
1511 return;
1512
1513 if (PyType_Ready(&Writer_Type) < 0)
1514 return;
1515
1516 /* Create the module and add the functions */
1517 module = Py_InitModule3("_csv", csv_methods, csv_module_doc);
1518 if (module == NULL)
1519 return;
1520
1521 /* Add version to the module. */
Skip Montanaro7b01a832003-04-12 19:23:46 +00001522 if (PyModule_AddStringConstant(module, "__version__",
1523 MODULE_VERSION) == -1)
Skip Montanarob4a04172003-03-20 23:29:12 +00001524 return;
1525
1526 /* Add _dialects dictionary */
1527 dialects = PyDict_New();
1528 if (dialects == NULL)
1529 return;
1530 if (PyModule_AddObject(module, "_dialects", dialects))
1531 return;
1532
1533 /* Add quote styles into dictionary */
1534 for (style = quote_styles; style->name; style++) {
Skip Montanaro7b01a832003-04-12 19:23:46 +00001535 if (PyModule_AddIntConstant(module, style->name,
1536 style->style) == -1)
Skip Montanarob4a04172003-03-20 23:29:12 +00001537 return;
1538 }
1539
1540 /* Add the Dialect type */
1541 if (PyModule_AddObject(module, "Dialect", (PyObject *)&Dialect_Type))
1542 return;
1543
1544 /* Add the CSV exception object to the module. */
1545 error_obj = PyErr_NewException("_csv.Error", NULL, NULL);
1546 if (error_obj == NULL)
1547 return;
1548 PyModule_AddObject(module, "Error", error_obj);
1549}