blob: 89b0a0dce0220f7ec1c517d57175762ac1d294cb [file] [log] [blame]
Skip Montanaroa16b21f2003-03-23 14:32:54 +00001/* csv module */
2
3/*
4
5This module provides the low-level underpinnings of a CSV reading/writing
6module. Users should not use this module directly, but import the csv.py
7module instead.
8
9**** For people modifying this code, please note that as of this writing
Skip Montanarodfa35fa2003-04-11 21:40:01 +000010**** (2003-03-23), it is intended that this code should work with Python
Skip Montanaroa16b21f2003-03-23 14:32:54 +000011**** 2.2.
12
Skip Montanarob4a04172003-03-20 23:29:12 +000013*/
14
Skip Montanaro7b01a832003-04-12 19:23:46 +000015#define MODULE_VERSION "1.0"
16
Skip Montanarob4a04172003-03-20 23:29:12 +000017#include "Python.h"
18#include "structmember.h"
19
Skip Montanaroa16b21f2003-03-23 14:32:54 +000020
Skip Montanarob4a04172003-03-20 23:29:12 +000021/* begin 2.2 compatibility macros */
22#ifndef PyDoc_STRVAR
23/* Define macros for inline documentation. */
24#define PyDoc_VAR(name) static char name[]
25#define PyDoc_STRVAR(name,str) PyDoc_VAR(name) = PyDoc_STR(str)
26#ifdef WITH_DOC_STRINGS
27#define PyDoc_STR(str) str
28#else
29#define PyDoc_STR(str) ""
30#endif
31#endif /* ifndef PyDoc_STRVAR */
32
33#ifndef PyMODINIT_FUNC
34# if defined(__cplusplus)
35# define PyMODINIT_FUNC extern "C" void
36# else /* __cplusplus */
37# define PyMODINIT_FUNC void
38# endif /* __cplusplus */
39#endif
40/* end 2.2 compatibility macros */
41
42static PyObject *error_obj; /* CSV exception */
43static PyObject *dialects; /* Dialect registry */
44
45typedef enum {
46 START_RECORD, START_FIELD, ESCAPED_CHAR, IN_FIELD,
47 IN_QUOTED_FIELD, ESCAPE_IN_QUOTED_FIELD, QUOTE_IN_QUOTED_FIELD
48} ParserState;
49
50typedef enum {
51 QUOTE_MINIMAL, QUOTE_ALL, QUOTE_NONNUMERIC, QUOTE_NONE
52} QuoteStyle;
53
54typedef struct {
55 QuoteStyle style;
56 char *name;
57} StyleDesc;
58
59static StyleDesc quote_styles[] = {
60 { QUOTE_MINIMAL, "QUOTE_MINIMAL" },
61 { QUOTE_ALL, "QUOTE_ALL" },
62 { QUOTE_NONNUMERIC, "QUOTE_NONNUMERIC" },
63 { QUOTE_NONE, "QUOTE_NONE" },
64 { 0 }
65};
66
67typedef struct {
68 PyObject_HEAD
69
70 int doublequote; /* is " represented by ""? */
71 char delimiter; /* field separator */
72 char quotechar; /* quote character */
73 char escapechar; /* escape character */
74 int skipinitialspace; /* ignore spaces following delimiter? */
75 PyObject *lineterminator; /* string to write between records */
76 QuoteStyle quoting; /* style of quoting to write */
77
78 int strict; /* raise exception on bad CSV */
79} DialectObj;
80
81staticforward PyTypeObject Dialect_Type;
82
83typedef struct {
84 PyObject_HEAD
85
86 PyObject *input_iter; /* iterate over this for input lines */
87
88 DialectObj *dialect; /* parsing dialect */
89
90 PyObject *fields; /* field list for current record */
91 ParserState state; /* current CSV parse state */
92 char *field; /* build current field in here */
93 int field_size; /* size of allocated buffer */
94 int field_len; /* length of current field */
95 int had_parse_error; /* did we have a parse error? */
96} ReaderObj;
97
98staticforward PyTypeObject Reader_Type;
99
100#define ReaderObject_Check(v) ((v)->ob_type == &Reader_Type)
101
102typedef struct {
103 PyObject_HEAD
104
105 PyObject *writeline; /* write output lines to this file */
106
107 DialectObj *dialect; /* parsing dialect */
108
109 char *rec; /* buffer for parser.join */
110 int rec_size; /* size of allocated record */
111 int rec_len; /* length of record */
112 int num_fields; /* number of fields in record */
113} WriterObj;
114
115staticforward PyTypeObject Writer_Type;
116
117/*
118 * DIALECT class
119 */
120
121static PyObject *
122get_dialect_from_registry(PyObject * name_obj)
123{
124 PyObject *dialect_obj;
125
126 dialect_obj = PyDict_GetItem(dialects, name_obj);
127 if (dialect_obj == NULL)
128 return PyErr_Format(error_obj, "unknown dialect");
129 Py_INCREF(dialect_obj);
130 return dialect_obj;
131}
132
133static int
134check_delattr(PyObject *v)
135{
136 if (v == NULL) {
137 PyErr_SetString(PyExc_TypeError,
138 "Cannot delete attribute");
139 return -1;
140 }
141 return 0;
142}
143
144static PyObject *
145get_string(PyObject *str)
146{
147 Py_XINCREF(str);
148 return str;
149}
150
151static int
152set_string(PyObject **str, PyObject *v)
153{
154 if (check_delattr(v) < 0)
155 return -1;
Skip Montanaro860fc0b2003-04-12 18:57:52 +0000156 if (!PyString_Check(v)
157#ifdef Py_USING_UNICODE
158&& !PyUnicode_Check(v)
159#endif
160) {
Skip Montanarob4a04172003-03-20 23:29:12 +0000161 PyErr_BadArgument();
162 return -1;
163 }
164 Py_XDECREF(*str);
165 Py_INCREF(v);
166 *str = v;
167 return 0;
168}
169
170static PyObject *
171get_nullchar_as_None(char c)
172{
173 if (c == '\0') {
174 Py_INCREF(Py_None);
175 return Py_None;
176 }
177 else
178 return PyString_FromStringAndSize((char*)&c, 1);
179}
180
181static int
182set_None_as_nullchar(char * addr, PyObject *v)
183{
184 if (check_delattr(v) < 0)
185 return -1;
186 if (v == Py_None)
187 *addr = '\0';
188 else if (!PyString_Check(v) || PyString_Size(v) != 1) {
189 PyErr_BadArgument();
190 return -1;
191 }
Skip Montanaro577c7a72003-04-12 19:17:14 +0000192 else {
193 char *s = PyString_AsString(v);
194 if (s == NULL)
195 return -1;
196 *addr = s[0];
197 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000198 return 0;
199}
200
201static PyObject *
202Dialect_get_lineterminator(DialectObj *self)
203{
204 return get_string(self->lineterminator);
205}
206
207static int
208Dialect_set_lineterminator(DialectObj *self, PyObject *value)
209{
210 return set_string(&self->lineterminator, value);
211}
212
213static PyObject *
214Dialect_get_escapechar(DialectObj *self)
215{
216 return get_nullchar_as_None(self->escapechar);
217}
218
219static int
220Dialect_set_escapechar(DialectObj *self, PyObject *value)
221{
222 return set_None_as_nullchar(&self->escapechar, value);
223}
224
225static PyObject *
226Dialect_get_quoting(DialectObj *self)
227{
228 return PyInt_FromLong(self->quoting);
229}
230
231static int
232Dialect_set_quoting(DialectObj *self, PyObject *v)
233{
234 int quoting;
235 StyleDesc *qs = quote_styles;
236
237 if (check_delattr(v) < 0)
238 return -1;
239 if (!PyInt_Check(v)) {
240 PyErr_BadArgument();
241 return -1;
242 }
243 quoting = PyInt_AsLong(v);
244 for (qs = quote_styles; qs->name; qs++) {
245 if (qs->style == quoting) {
246 self->quoting = quoting;
247 return 0;
248 }
249 }
250 PyErr_BadArgument();
251 return -1;
252}
253
254static struct PyMethodDef Dialect_methods[] = {
255 { NULL, NULL }
256};
257
258#define D_OFF(x) offsetof(DialectObj, x)
259
260static struct PyMemberDef Dialect_memberlist[] = {
261 { "quotechar", T_CHAR, D_OFF(quotechar) },
262 { "delimiter", T_CHAR, D_OFF(delimiter) },
263 { "skipinitialspace", T_INT, D_OFF(skipinitialspace) },
264 { "doublequote", T_INT, D_OFF(doublequote) },
265 { "strict", T_INT, D_OFF(strict) },
266 { NULL }
267};
268
269static PyGetSetDef Dialect_getsetlist[] = {
270 { "escapechar", (getter)Dialect_get_escapechar,
271 (setter)Dialect_set_escapechar },
272 { "lineterminator", (getter)Dialect_get_lineterminator,
273 (setter)Dialect_set_lineterminator },
274 { "quoting", (getter)Dialect_get_quoting,
275 (setter)Dialect_set_quoting },
276 {NULL},
277};
278
279static void
280Dialect_dealloc(DialectObj *self)
281{
282 Py_XDECREF(self->lineterminator);
Skip Montanarob4a04172003-03-20 23:29:12 +0000283 self->ob_type->tp_free((PyObject *)self);
284}
285
286static int
287dialect_init(DialectObj * self, PyObject * args, PyObject * kwargs)
288{
289 PyObject *dialect = NULL, *name_obj, *value_obj;
290
291 self->quotechar = '"';
292 self->delimiter = ',';
293 self->escapechar = '\0';
294 self->skipinitialspace = 0;
295 Py_XDECREF(self->lineterminator);
296 self->lineterminator = PyString_FromString("\r\n");
297 if (self->lineterminator == NULL)
298 return -1;
299 self->quoting = QUOTE_MINIMAL;
300 self->doublequote = 1;
301 self->strict = 0;
302
303 if (!PyArg_ParseTuple(args, "|O", &dialect))
304 return -1;
305 Py_XINCREF(dialect);
306 if (kwargs != NULL) {
307 PyObject * key = PyString_FromString("dialect");
308 PyObject * d;
309
310 d = PyDict_GetItem(kwargs, key);
311 if (d) {
312 Py_INCREF(d);
313 Py_XDECREF(dialect);
314 PyDict_DelItem(kwargs, key);
315 dialect = d;
316 }
317 Py_DECREF(key);
318 }
319 if (dialect != NULL) {
320 int i;
321 PyObject * dir_list;
322
323 /* If dialect is a string, look it up in our registry */
Skip Montanaro860fc0b2003-04-12 18:57:52 +0000324 if (PyString_Check(dialect)
325#ifdef Py_USING_UNICODE
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000326 || PyUnicode_Check(dialect)
Skip Montanaro860fc0b2003-04-12 18:57:52 +0000327#endif
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000328 ) {
Skip Montanarob4a04172003-03-20 23:29:12 +0000329 PyObject * new_dia;
330 new_dia = get_dialect_from_registry(dialect);
331 Py_DECREF(dialect);
332 if (new_dia == NULL)
333 return -1;
334 dialect = new_dia;
335 }
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000336 /* A class rather than an instance? Instantiate */
Skip Montanarob4a04172003-03-20 23:29:12 +0000337 if (PyObject_TypeCheck(dialect, &PyClass_Type)) {
338 PyObject * new_dia;
339 new_dia = PyObject_CallFunction(dialect, "");
340 Py_DECREF(dialect);
341 if (new_dia == NULL)
342 return -1;
343 dialect = new_dia;
344 }
345 /* Make sure we finally have an instance */
346 if (!PyInstance_Check(dialect) ||
347 (dir_list = PyObject_Dir(dialect)) == NULL) {
348 PyErr_SetString(PyExc_TypeError,
349 "dialect must be an instance");
350 Py_DECREF(dialect);
351 return -1;
352 }
353 /* And extract the attributes */
354 for (i = 0; i < PyList_GET_SIZE(dir_list); ++i) {
Tim Peters38fc8372003-04-13 03:25:15 +0000355 char *s;
Skip Montanarob4a04172003-03-20 23:29:12 +0000356 name_obj = PyList_GET_ITEM(dir_list, i);
Tim Peters38fc8372003-04-13 03:25:15 +0000357 s = PyString_AsString(name_obj);
Skip Montanaro577c7a72003-04-12 19:17:14 +0000358 if (s == NULL)
359 return -1;
360 if (s[0] == '_')
Skip Montanarob4a04172003-03-20 23:29:12 +0000361 continue;
362 value_obj = PyObject_GetAttr(dialect, name_obj);
363 if (value_obj) {
364 if (PyObject_SetAttr((PyObject *)self,
365 name_obj, value_obj)) {
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000366 Py_DECREF(value_obj);
Skip Montanarob4a04172003-03-20 23:29:12 +0000367 Py_DECREF(dir_list);
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000368 Py_DECREF(dialect);
Skip Montanarob4a04172003-03-20 23:29:12 +0000369 return -1;
370 }
371 Py_DECREF(value_obj);
372 }
373 }
374 Py_DECREF(dir_list);
375 Py_DECREF(dialect);
376 }
377 if (kwargs != NULL) {
378 int pos = 0;
379
380 while (PyDict_Next(kwargs, &pos, &name_obj, &value_obj)) {
381 if (PyObject_SetAttr((PyObject *)self,
382 name_obj, value_obj))
383 return -1;
384 }
385 }
386 return 0;
387}
388
389static PyObject *
390dialect_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
391{
392 DialectObj *self;
393 self = (DialectObj *)type->tp_alloc(type, 0);
394 if (self != NULL) {
395 self->lineterminator = NULL;
396 }
397 return (PyObject *)self;
398}
399
400
401PyDoc_STRVAR(Dialect_Type_doc,
402"CSV dialect\n"
403"\n"
404"The Dialect type records CSV parsing and generation options.\n");
405
406static PyTypeObject Dialect_Type = {
407 PyObject_HEAD_INIT(NULL)
408 0, /* ob_size */
409 "_csv.Dialect", /* tp_name */
410 sizeof(DialectObj), /* tp_basicsize */
411 0, /* tp_itemsize */
412 /* methods */
413 (destructor)Dialect_dealloc, /* tp_dealloc */
414 (printfunc)0, /* tp_print */
415 (getattrfunc)0, /* tp_getattr */
416 (setattrfunc)0, /* tp_setattr */
417 (cmpfunc)0, /* tp_compare */
418 (reprfunc)0, /* tp_repr */
419 0, /* tp_as_number */
420 0, /* tp_as_sequence */
421 0, /* tp_as_mapping */
422 (hashfunc)0, /* tp_hash */
423 (ternaryfunc)0, /* tp_call */
424 (reprfunc)0, /* tp_str */
425 0, /* tp_getattro */
426 0, /* tp_setattro */
427 0, /* tp_as_buffer */
428 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
429 Dialect_Type_doc, /* tp_doc */
430 0, /* tp_traverse */
431 0, /* tp_clear */
432 0, /* tp_richcompare */
433 0, /* tp_weaklistoffset */
434 0, /* tp_iter */
435 0, /* tp_iternext */
436 Dialect_methods, /* tp_methods */
437 Dialect_memberlist, /* tp_members */
438 Dialect_getsetlist, /* tp_getset */
439 0, /* tp_base */
440 0, /* tp_dict */
441 0, /* tp_descr_get */
442 0, /* tp_descr_set */
443 0, /* tp_dictoffset */
444 (initproc)dialect_init, /* tp_init */
445 PyType_GenericAlloc, /* tp_alloc */
446 dialect_new, /* tp_new */
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000447 0, /* tp_free */
Skip Montanarob4a04172003-03-20 23:29:12 +0000448};
449
450static void
451parse_save_field(ReaderObj *self)
452{
453 PyObject *field;
454
455 field = PyString_FromStringAndSize(self->field, self->field_len);
456 if (field != NULL) {
457 PyList_Append(self->fields, field);
458 Py_XDECREF(field);
459 }
460 self->field_len = 0;
461}
462
463static int
464parse_grow_buff(ReaderObj *self)
465{
466 if (self->field_size == 0) {
467 self->field_size = 4096;
Andrew McNamaradcfb38c2003-06-09 05:59:23 +0000468 if (self->field != NULL)
469 PyMem_Free(self->field);
Skip Montanarob4a04172003-03-20 23:29:12 +0000470 self->field = PyMem_Malloc(self->field_size);
471 }
472 else {
473 self->field_size *= 2;
474 self->field = PyMem_Realloc(self->field, self->field_size);
475 }
476 if (self->field == NULL) {
477 PyErr_NoMemory();
478 return 0;
479 }
480 return 1;
481}
482
483static void
484parse_add_char(ReaderObj *self, char c)
485{
486 if (self->field_len == self->field_size && !parse_grow_buff(self))
487 return;
488 self->field[self->field_len++] = c;
489}
490
491static void
492parse_process_char(ReaderObj *self, char c)
493{
494 DialectObj *dialect = self->dialect;
495
496 switch (self->state) {
497 case START_RECORD:
498 /* start of record */
499 if (c == '\n')
500 /* empty line - return [] */
501 break;
502 /* normal character - handle as START_FIELD */
503 self->state = START_FIELD;
504 /* fallthru */
505 case START_FIELD:
506 /* expecting field */
507 if (c == '\n') {
508 /* save empty field - return [fields] */
509 parse_save_field(self);
510 self->state = START_RECORD;
511 }
512 else if (c == dialect->quotechar) {
513 /* start quoted field */
514 self->state = IN_QUOTED_FIELD;
515 }
516 else if (c == dialect->escapechar) {
517 /* possible escaped character */
518 self->state = ESCAPED_CHAR;
519 }
520 else if (c == ' ' && dialect->skipinitialspace)
521 /* ignore space at start of field */
522 ;
523 else if (c == dialect->delimiter) {
524 /* save empty field */
525 parse_save_field(self);
526 }
527 else {
528 /* begin new unquoted field */
529 parse_add_char(self, c);
530 self->state = IN_FIELD;
531 }
532 break;
533
534 case ESCAPED_CHAR:
535 if (c != dialect->escapechar &&
536 c != dialect->delimiter &&
537 c != dialect->quotechar)
538 parse_add_char(self, dialect->escapechar);
539 parse_add_char(self, c);
540 self->state = IN_FIELD;
541 break;
542
543 case IN_FIELD:
544 /* in unquoted field */
545 if (c == '\n') {
546 /* end of line - return [fields] */
547 parse_save_field(self);
548 self->state = START_RECORD;
549 }
550 else if (c == dialect->escapechar) {
551 /* possible escaped character */
552 self->state = ESCAPED_CHAR;
553 }
554 else if (c == dialect->delimiter) {
555 /* save field - wait for new field */
556 parse_save_field(self);
557 self->state = START_FIELD;
558 }
559 else {
560 /* normal character - save in field */
561 parse_add_char(self, c);
562 }
563 break;
564
565 case IN_QUOTED_FIELD:
566 /* in quoted field */
567 if (c == '\n') {
568 /* end of line - save '\n' in field */
569 parse_add_char(self, '\n');
570 }
571 else if (c == dialect->escapechar) {
572 /* Possible escape character */
573 self->state = ESCAPE_IN_QUOTED_FIELD;
574 }
575 else if (c == dialect->quotechar) {
576 if (dialect->doublequote) {
577 /* doublequote; " represented by "" */
578 self->state = QUOTE_IN_QUOTED_FIELD;
579 }
580 else {
581 /* end of quote part of field */
582 self->state = IN_FIELD;
583 }
584 }
585 else {
586 /* normal character - save in field */
587 parse_add_char(self, c);
588 }
589 break;
590
591 case ESCAPE_IN_QUOTED_FIELD:
592 if (c != dialect->escapechar &&
593 c != dialect->delimiter &&
594 c != dialect->quotechar)
595 parse_add_char(self, dialect->escapechar);
596 parse_add_char(self, c);
597 self->state = IN_QUOTED_FIELD;
598 break;
599
600 case QUOTE_IN_QUOTED_FIELD:
601 /* doublequote - seen a quote in an quoted field */
602 if (dialect->quoting != QUOTE_NONE &&
603 c == dialect->quotechar) {
604 /* save "" as " */
605 parse_add_char(self, c);
606 self->state = IN_QUOTED_FIELD;
607 }
608 else if (c == dialect->delimiter) {
609 /* save field - wait for new field */
610 parse_save_field(self);
611 self->state = START_FIELD;
612 }
613 else if (c == '\n') {
614 /* end of line - return [fields] */
615 parse_save_field(self);
616 self->state = START_RECORD;
617 }
618 else if (!dialect->strict) {
619 parse_add_char(self, c);
620 self->state = IN_FIELD;
621 }
622 else {
623 /* illegal */
624 self->had_parse_error = 1;
625 PyErr_Format(error_obj, "%c expected after %c",
626 dialect->delimiter,
627 dialect->quotechar);
628 }
629 break;
630
631 }
632}
633
634/*
635 * READER
636 */
637#define R_OFF(x) offsetof(ReaderObj, x)
638
639static struct PyMemberDef Reader_memberlist[] = {
640 { "dialect", T_OBJECT, R_OFF(dialect), RO },
641 { NULL }
642};
643
644static PyObject *
645Reader_getiter(ReaderObj *self)
646{
647 Py_INCREF(self);
648 return (PyObject *)self;
649}
650
651static PyObject *
652Reader_iternext(ReaderObj *self)
653{
654 PyObject *lineobj;
655 PyObject *fields;
656 char *line;
657
658 do {
659 lineobj = PyIter_Next(self->input_iter);
660 if (lineobj == NULL) {
661 /* End of input OR exception */
662 if (!PyErr_Occurred() && self->field_len != 0)
663 return PyErr_Format(error_obj,
664 "newline inside string");
665 return NULL;
666 }
667
668 if (self->had_parse_error) {
669 if (self->fields) {
670 Py_XDECREF(self->fields);
671 }
672 self->fields = PyList_New(0);
673 self->field_len = 0;
674 self->state = START_RECORD;
675 self->had_parse_error = 0;
676 }
677 line = PyString_AsString(lineobj);
678
679 if (line == NULL) {
680 Py_DECREF(lineobj);
681 return NULL;
682 }
Tim Petersef4b7ed2003-03-21 01:35:28 +0000683 if (strlen(line) < (size_t)PyString_GET_SIZE(lineobj)) {
Skip Montanarob4a04172003-03-20 23:29:12 +0000684 self->had_parse_error = 1;
685 Py_DECREF(lineobj);
686 return PyErr_Format(error_obj,
687 "string with NUL bytes");
688 }
689
690 /* Process line of text - send '\n' to processing code to
691 represent end of line. End of line which is not at end of
692 string is an error. */
693 while (*line) {
694 char c;
695
696 c = *line++;
697 if (c == '\r') {
698 c = *line++;
699 if (c == '\0')
700 /* macintosh end of line */
701 break;
702 if (c == '\n') {
703 c = *line++;
704 if (c == '\0')
705 /* DOS end of line */
706 break;
707 }
708 self->had_parse_error = 1;
709 Py_DECREF(lineobj);
710 return PyErr_Format(error_obj,
711 "newline inside string");
712 }
713 if (c == '\n') {
714 c = *line++;
715 if (c == '\0')
716 /* unix end of line */
717 break;
718 self->had_parse_error = 1;
719 Py_DECREF(lineobj);
720 return PyErr_Format(error_obj,
721 "newline inside string");
722 }
723 parse_process_char(self, c);
724 if (PyErr_Occurred()) {
725 Py_DECREF(lineobj);
726 return NULL;
727 }
728 }
729 parse_process_char(self, '\n');
730 Py_DECREF(lineobj);
731 } while (self->state != START_RECORD);
732
733 fields = self->fields;
734 self->fields = PyList_New(0);
735 return fields;
736}
737
738static void
739Reader_dealloc(ReaderObj *self)
740{
741 Py_XDECREF(self->dialect);
742 Py_XDECREF(self->input_iter);
743 Py_XDECREF(self->fields);
Andrew McNamaradcfb38c2003-06-09 05:59:23 +0000744 if (self->field != NULL)
745 PyMem_Free(self->field);
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000746 PyObject_GC_Del(self);
747}
748
749static int
750Reader_traverse(ReaderObj *self, visitproc visit, void *arg)
751{
752 int err;
753#define VISIT(SLOT) \
754 if (SLOT) { \
755 err = visit((PyObject *)(SLOT), arg); \
756 if (err) \
757 return err; \
758 }
759 VISIT(self->dialect);
760 VISIT(self->input_iter);
761 VISIT(self->fields);
762 return 0;
763}
764
765static int
766Reader_clear(ReaderObj *self)
767{
768 Py_XDECREF(self->dialect);
769 Py_XDECREF(self->input_iter);
770 Py_XDECREF(self->fields);
771 self->dialect = NULL;
772 self->input_iter = NULL;
773 self->fields = NULL;
774 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000775}
776
777PyDoc_STRVAR(Reader_Type_doc,
778"CSV reader\n"
779"\n"
780"Reader objects are responsible for reading and parsing tabular data\n"
781"in CSV format.\n"
782);
783
784static struct PyMethodDef Reader_methods[] = {
785 { NULL, NULL }
786};
787
788static PyTypeObject Reader_Type = {
789 PyObject_HEAD_INIT(NULL)
790 0, /*ob_size*/
791 "_csv.reader", /*tp_name*/
792 sizeof(ReaderObj), /*tp_basicsize*/
793 0, /*tp_itemsize*/
794 /* methods */
795 (destructor)Reader_dealloc, /*tp_dealloc*/
796 (printfunc)0, /*tp_print*/
797 (getattrfunc)0, /*tp_getattr*/
798 (setattrfunc)0, /*tp_setattr*/
799 (cmpfunc)0, /*tp_compare*/
800 (reprfunc)0, /*tp_repr*/
801 0, /*tp_as_number*/
802 0, /*tp_as_sequence*/
803 0, /*tp_as_mapping*/
804 (hashfunc)0, /*tp_hash*/
805 (ternaryfunc)0, /*tp_call*/
806 (reprfunc)0, /*tp_str*/
807 0, /*tp_getattro*/
808 0, /*tp_setattro*/
809 0, /*tp_as_buffer*/
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000810 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
811 Py_TPFLAGS_HAVE_GC, /*tp_flags*/
Skip Montanarob4a04172003-03-20 23:29:12 +0000812 Reader_Type_doc, /*tp_doc*/
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000813 (traverseproc)Reader_traverse, /*tp_traverse*/
814 (inquiry)Reader_clear, /*tp_clear*/
Skip Montanarob4a04172003-03-20 23:29:12 +0000815 0, /*tp_richcompare*/
816 0, /*tp_weaklistoffset*/
817 (getiterfunc)Reader_getiter, /*tp_iter*/
818 (getiterfunc)Reader_iternext, /*tp_iternext*/
819 Reader_methods, /*tp_methods*/
820 Reader_memberlist, /*tp_members*/
821 0, /*tp_getset*/
822
823};
824
825static PyObject *
826csv_reader(PyObject *module, PyObject *args, PyObject *keyword_args)
827{
828 PyObject * iterator, * dialect = NULL, *ctor_args;
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000829 ReaderObj * self = PyObject_GC_New(ReaderObj, &Reader_Type);
Skip Montanarob4a04172003-03-20 23:29:12 +0000830
831 if (!self)
832 return NULL;
833
834 self->dialect = NULL;
835 self->input_iter = self->fields = NULL;
836
837 self->fields = NULL;
838 self->input_iter = NULL;
839 self->had_parse_error = 0;
840 self->field = NULL;
841 self->field_size = 0;
842 self->field_len = 0;
843 self->state = START_RECORD;
844
845 if (!PyArg_ParseTuple(args, "O|O", &iterator, &dialect)) {
846 Py_DECREF(self);
847 return NULL;
848 }
849 self->input_iter = PyObject_GetIter(iterator);
850 if (self->input_iter == NULL) {
851 PyErr_SetString(PyExc_TypeError,
852 "argument 1 must be an iterator");
853 Py_DECREF(self);
854 return NULL;
855 }
856 ctor_args = Py_BuildValue(dialect ? "(O)" : "()", dialect);
857 if (ctor_args == NULL) {
858 Py_DECREF(self);
859 return NULL;
860 }
861 self->dialect = (DialectObj *)PyObject_Call((PyObject *)&Dialect_Type,
862 ctor_args, keyword_args);
863 Py_DECREF(ctor_args);
864 if (self->dialect == NULL) {
865 Py_DECREF(self);
866 return NULL;
867 }
868 self->fields = PyList_New(0);
869 if (self->fields == NULL) {
870 Py_DECREF(self);
871 return NULL;
872 }
873
874 return (PyObject *)self;
875}
876
877/*
878 * WRITER
879 */
880/* ---------------------------------------------------------------- */
881static void
882join_reset(WriterObj *self)
883{
884 self->rec_len = 0;
885 self->num_fields = 0;
886}
887
888#define MEM_INCR 32768
889
890/* Calculate new record length or append field to record. Return new
891 * record length.
892 */
893static int
894join_append_data(WriterObj *self, char *field, int quote_empty,
895 int *quoted, int copy_phase)
896{
897 DialectObj *dialect = self->dialect;
898 int i, rec_len;
899
900 rec_len = self->rec_len;
901
902 /* If this is not the first field we need a field separator.
903 */
904 if (self->num_fields > 0) {
905 if (copy_phase)
906 self->rec[rec_len] = dialect->delimiter;
907 rec_len++;
908 }
909 /* Handle preceding quote.
910 */
911 switch (dialect->quoting) {
912 case QUOTE_ALL:
913 *quoted = 1;
914 if (copy_phase)
915 self->rec[rec_len] = dialect->quotechar;
916 rec_len++;
917 break;
918 case QUOTE_MINIMAL:
919 case QUOTE_NONNUMERIC:
920 /* We only know about quoted in the copy phase.
921 */
922 if (copy_phase && *quoted) {
923 self->rec[rec_len] = dialect->quotechar;
924 rec_len++;
925 }
926 break;
927 case QUOTE_NONE:
928 break;
929 }
930 /* Copy/count field data.
931 */
932 for (i = 0;; i++) {
933 char c = field[i];
934
935 if (c == '\0')
936 break;
937 /* If in doublequote mode we escape quote chars with a
938 * quote.
939 */
940 if (dialect->quoting != QUOTE_NONE &&
941 c == dialect->quotechar && dialect->doublequote) {
942 if (copy_phase)
943 self->rec[rec_len] = dialect->quotechar;
944 *quoted = 1;
945 rec_len++;
946 }
947
948 /* Some special characters need to be escaped. If we have a
949 * quote character switch to quoted field instead of escaping
950 * individual characters.
951 */
952 if (!*quoted
953 && (c == dialect->delimiter ||
954 c == dialect->escapechar ||
955 c == '\n' || c == '\r')) {
956 if (dialect->quoting != QUOTE_NONE)
957 *quoted = 1;
958 else if (dialect->escapechar) {
959 if (copy_phase)
960 self->rec[rec_len] = dialect->escapechar;
961 rec_len++;
962 }
963 else {
964 PyErr_Format(error_obj,
965 "delimiter must be quoted or escaped");
966 return -1;
967 }
968 }
969 /* Copy field character into record buffer.
970 */
971 if (copy_phase)
972 self->rec[rec_len] = c;
973 rec_len++;
974 }
975
976 /* If field is empty check if it needs to be quoted.
977 */
978 if (i == 0 && quote_empty) {
979 if (dialect->quoting == QUOTE_NONE) {
980 PyErr_Format(error_obj,
981 "single empty field record must be quoted");
982 return -1;
983 } else
984 *quoted = 1;
985 }
986
987 /* Handle final quote character on field.
988 */
989 if (*quoted) {
990 if (copy_phase)
991 self->rec[rec_len] = dialect->quotechar;
992 else
993 /* Didn't know about leading quote until we found it
994 * necessary in field data - compensate for it now.
995 */
996 rec_len++;
997 rec_len++;
998 }
999
1000 return rec_len;
1001}
1002
1003static int
1004join_check_rec_size(WriterObj *self, int rec_len)
1005{
1006 if (rec_len > self->rec_size) {
1007 if (self->rec_size == 0) {
1008 self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR;
Andrew McNamaradcfb38c2003-06-09 05:59:23 +00001009 if (self->rec != NULL)
1010 PyMem_Free(self->rec);
Skip Montanarob4a04172003-03-20 23:29:12 +00001011 self->rec = PyMem_Malloc(self->rec_size);
1012 }
1013 else {
1014 char *old_rec = self->rec;
1015
1016 self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR;
1017 self->rec = PyMem_Realloc(self->rec, self->rec_size);
1018 if (self->rec == NULL)
1019 PyMem_Free(old_rec);
1020 }
1021 if (self->rec == NULL) {
1022 PyErr_NoMemory();
1023 return 0;
1024 }
1025 }
1026 return 1;
1027}
1028
1029static int
1030join_append(WriterObj *self, char *field, int *quoted, int quote_empty)
1031{
1032 int rec_len;
1033
1034 rec_len = join_append_data(self, field, quote_empty, quoted, 0);
1035 if (rec_len < 0)
1036 return 0;
1037
1038 /* grow record buffer if necessary */
1039 if (!join_check_rec_size(self, rec_len))
1040 return 0;
1041
1042 self->rec_len = join_append_data(self, field, quote_empty, quoted, 1);
1043 self->num_fields++;
1044
1045 return 1;
1046}
1047
1048static int
1049join_append_lineterminator(WriterObj *self)
1050{
1051 int terminator_len;
1052
1053 terminator_len = PyString_Size(self->dialect->lineterminator);
1054
1055 /* grow record buffer if necessary */
1056 if (!join_check_rec_size(self, self->rec_len + terminator_len))
1057 return 0;
1058
1059 memmove(self->rec + self->rec_len,
Skip Montanaro577c7a72003-04-12 19:17:14 +00001060 /* should not be NULL */
Skip Montanarob4a04172003-03-20 23:29:12 +00001061 PyString_AsString(self->dialect->lineterminator),
1062 terminator_len);
1063 self->rec_len += terminator_len;
1064
1065 return 1;
1066}
1067
1068PyDoc_STRVAR(csv_writerow_doc,
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001069"writerow(sequence)\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001070"\n"
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001071"Construct and write a CSV record from a sequence of fields. Non-string\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001072"elements will be converted to string.");
1073
1074static PyObject *
1075csv_writerow(WriterObj *self, PyObject *seq)
1076{
1077 DialectObj *dialect = self->dialect;
1078 int len, i;
1079
1080 if (!PySequence_Check(seq))
1081 return PyErr_Format(error_obj, "sequence expected");
1082
1083 len = PySequence_Length(seq);
1084 if (len < 0)
1085 return NULL;
1086
1087 /* Join all fields in internal buffer.
1088 */
1089 join_reset(self);
1090 for (i = 0; i < len; i++) {
1091 PyObject *field;
1092 int append_ok;
1093 int quoted;
1094
1095 field = PySequence_GetItem(seq, i);
1096 if (field == NULL)
1097 return NULL;
1098
1099 quoted = 0;
1100 if (dialect->quoting == QUOTE_NONNUMERIC) {
1101 PyObject *num;
1102
1103 num = PyNumber_Float(field);
1104 if (num == NULL) {
1105 quoted = 1;
1106 PyErr_Clear();
1107 }
1108 else {
1109 Py_DECREF(num);
1110 }
1111 }
1112
1113 if (PyString_Check(field)) {
Skip Montanaro577c7a72003-04-12 19:17:14 +00001114 append_ok = join_append(self,
1115 PyString_AS_STRING(field),
Skip Montanarob4a04172003-03-20 23:29:12 +00001116 &quoted, len == 1);
1117 Py_DECREF(field);
1118 }
1119 else if (field == Py_None) {
1120 append_ok = join_append(self, "", &quoted, len == 1);
1121 Py_DECREF(field);
1122 }
1123 else {
1124 PyObject *str;
1125
1126 str = PyObject_Str(field);
1127 Py_DECREF(field);
1128 if (str == NULL)
1129 return NULL;
1130
Skip Montanaro577c7a72003-04-12 19:17:14 +00001131 append_ok = join_append(self, PyString_AS_STRING(str),
Skip Montanarob4a04172003-03-20 23:29:12 +00001132 &quoted, len == 1);
1133 Py_DECREF(str);
1134 }
1135 if (!append_ok)
1136 return NULL;
1137 }
1138
1139 /* Add line terminator.
1140 */
1141 if (!join_append_lineterminator(self))
1142 return 0;
1143
1144 return PyObject_CallFunction(self->writeline,
1145 "(s#)", self->rec, self->rec_len);
1146}
1147
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001148PyDoc_STRVAR(csv_writerows_doc,
1149"writerows(sequence of sequences)\n"
1150"\n"
1151"Construct and write a series of sequences to a csv file. Non-string\n"
1152"elements will be converted to string.");
1153
Skip Montanarob4a04172003-03-20 23:29:12 +00001154static PyObject *
1155csv_writerows(WriterObj *self, PyObject *seqseq)
1156{
1157 PyObject *row_iter, *row_obj, *result;
1158
1159 row_iter = PyObject_GetIter(seqseq);
1160 if (row_iter == NULL) {
1161 PyErr_SetString(PyExc_TypeError,
Skip Montanaro98f16e02003-04-11 23:10:13 +00001162 "writerows() argument must be iterable");
Skip Montanarob4a04172003-03-20 23:29:12 +00001163 return NULL;
1164 }
1165 while ((row_obj = PyIter_Next(row_iter))) {
1166 result = csv_writerow(self, row_obj);
1167 Py_DECREF(row_obj);
1168 if (!result) {
1169 Py_DECREF(row_iter);
1170 return NULL;
1171 }
1172 else
1173 Py_DECREF(result);
1174 }
1175 Py_DECREF(row_iter);
1176 if (PyErr_Occurred())
1177 return NULL;
1178 Py_INCREF(Py_None);
1179 return Py_None;
1180}
1181
1182static struct PyMethodDef Writer_methods[] = {
1183 { "writerow", (PyCFunction)csv_writerow, METH_O, csv_writerow_doc},
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001184 { "writerows", (PyCFunction)csv_writerows, METH_O, csv_writerows_doc},
Skip Montanarob4a04172003-03-20 23:29:12 +00001185 { NULL, NULL }
1186};
1187
1188#define W_OFF(x) offsetof(WriterObj, x)
1189
1190static struct PyMemberDef Writer_memberlist[] = {
1191 { "dialect", T_OBJECT, W_OFF(dialect), RO },
1192 { NULL }
1193};
1194
1195static void
1196Writer_dealloc(WriterObj *self)
1197{
1198 Py_XDECREF(self->dialect);
1199 Py_XDECREF(self->writeline);
Andrew McNamaradcfb38c2003-06-09 05:59:23 +00001200 if (self->rec != NULL)
1201 PyMem_Free(self->rec);
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001202 PyObject_GC_Del(self);
1203}
1204
1205static int
1206Writer_traverse(WriterObj *self, visitproc visit, void *arg)
1207{
1208 int err;
1209#define VISIT(SLOT) \
1210 if (SLOT) { \
1211 err = visit((PyObject *)(SLOT), arg); \
1212 if (err) \
1213 return err; \
1214 }
1215 VISIT(self->dialect);
1216 VISIT(self->writeline);
1217 return 0;
1218}
1219
1220static int
1221Writer_clear(WriterObj *self)
1222{
1223 Py_XDECREF(self->dialect);
1224 Py_XDECREF(self->writeline);
1225 self->dialect = NULL;
1226 self->writeline = NULL;
1227 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001228}
1229
1230PyDoc_STRVAR(Writer_Type_doc,
1231"CSV writer\n"
1232"\n"
1233"Writer objects are responsible for generating tabular data\n"
1234"in CSV format from sequence input.\n"
1235);
1236
1237static PyTypeObject Writer_Type = {
1238 PyObject_HEAD_INIT(NULL)
1239 0, /*ob_size*/
1240 "_csv.writer", /*tp_name*/
1241 sizeof(WriterObj), /*tp_basicsize*/
1242 0, /*tp_itemsize*/
1243 /* methods */
1244 (destructor)Writer_dealloc, /*tp_dealloc*/
1245 (printfunc)0, /*tp_print*/
1246 (getattrfunc)0, /*tp_getattr*/
1247 (setattrfunc)0, /*tp_setattr*/
1248 (cmpfunc)0, /*tp_compare*/
1249 (reprfunc)0, /*tp_repr*/
1250 0, /*tp_as_number*/
1251 0, /*tp_as_sequence*/
1252 0, /*tp_as_mapping*/
1253 (hashfunc)0, /*tp_hash*/
1254 (ternaryfunc)0, /*tp_call*/
1255 (reprfunc)0, /*tp_str*/
1256 0, /*tp_getattro*/
1257 0, /*tp_setattro*/
1258 0, /*tp_as_buffer*/
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001259 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
1260 Py_TPFLAGS_HAVE_GC, /*tp_flags*/
Skip Montanarob4a04172003-03-20 23:29:12 +00001261 Writer_Type_doc,
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001262 (traverseproc)Writer_traverse, /*tp_traverse*/
1263 (inquiry)Writer_clear, /*tp_clear*/
Skip Montanarob4a04172003-03-20 23:29:12 +00001264 0, /*tp_richcompare*/
1265 0, /*tp_weaklistoffset*/
1266 (getiterfunc)0, /*tp_iter*/
1267 (getiterfunc)0, /*tp_iternext*/
1268 Writer_methods, /*tp_methods*/
1269 Writer_memberlist, /*tp_members*/
1270 0, /*tp_getset*/
1271};
1272
1273static PyObject *
1274csv_writer(PyObject *module, PyObject *args, PyObject *keyword_args)
1275{
1276 PyObject * output_file, * dialect = NULL, *ctor_args;
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001277 WriterObj * self = PyObject_GC_New(WriterObj, &Writer_Type);
Skip Montanarob4a04172003-03-20 23:29:12 +00001278
1279 if (!self)
1280 return NULL;
1281
1282 self->dialect = NULL;
1283 self->writeline = NULL;
1284
1285 self->rec = NULL;
1286 self->rec_size = 0;
1287 self->rec_len = 0;
1288 self->num_fields = 0;
1289
1290 if (!PyArg_ParseTuple(args, "O|O", &output_file, &dialect)) {
1291 Py_DECREF(self);
1292 return NULL;
1293 }
1294 self->writeline = PyObject_GetAttrString(output_file, "write");
1295 if (self->writeline == NULL || !PyCallable_Check(self->writeline)) {
1296 PyErr_SetString(PyExc_TypeError,
1297 "argument 1 must be an instance with a write method");
1298 Py_DECREF(self);
1299 return NULL;
1300 }
1301 ctor_args = Py_BuildValue(dialect ? "(O)" : "()", dialect);
1302 if (ctor_args == NULL) {
1303 Py_DECREF(self);
1304 return NULL;
1305 }
1306 self->dialect = (DialectObj *)PyObject_Call((PyObject *)&Dialect_Type,
1307 ctor_args, keyword_args);
1308 Py_DECREF(ctor_args);
1309 if (self->dialect == NULL) {
1310 Py_DECREF(self);
1311 return NULL;
1312 }
1313 return (PyObject *)self;
1314}
1315
1316/*
1317 * DIALECT REGISTRY
1318 */
1319static PyObject *
1320csv_list_dialects(PyObject *module, PyObject *args)
1321{
1322 return PyDict_Keys(dialects);
1323}
1324
1325static PyObject *
1326csv_register_dialect(PyObject *module, PyObject *args)
1327{
1328 PyObject *name_obj, *dialect_obj;
1329
1330 if (!PyArg_ParseTuple(args, "OO", &name_obj, &dialect_obj))
1331 return NULL;
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001332 if (!PyString_Check(name_obj)
1333#ifdef Py_USING_UNICODE
1334&& !PyUnicode_Check(name_obj)
1335#endif
1336) {
Skip Montanarob4a04172003-03-20 23:29:12 +00001337 PyErr_SetString(PyExc_TypeError,
1338 "dialect name must be a string or unicode");
1339 return NULL;
1340 }
1341 Py_INCREF(dialect_obj);
1342 /* A class rather than an instance? Instanciate */
1343 if (PyObject_TypeCheck(dialect_obj, &PyClass_Type)) {
1344 PyObject * new_dia;
1345 new_dia = PyObject_CallFunction(dialect_obj, "");
1346 Py_DECREF(dialect_obj);
1347 if (new_dia == NULL)
1348 return NULL;
1349 dialect_obj = new_dia;
1350 }
1351 /* Make sure we finally have an instance */
1352 if (!PyInstance_Check(dialect_obj)) {
1353 PyErr_SetString(PyExc_TypeError, "dialect must be an instance");
1354 Py_DECREF(dialect_obj);
1355 return NULL;
1356 }
1357 if (PyObject_SetAttrString(dialect_obj, "_name", name_obj) < 0) {
1358 Py_DECREF(dialect_obj);
1359 return NULL;
1360 }
1361 if (PyDict_SetItem(dialects, name_obj, dialect_obj) < 0) {
1362 Py_DECREF(dialect_obj);
1363 return NULL;
1364 }
1365 Py_DECREF(dialect_obj);
1366 Py_INCREF(Py_None);
1367 return Py_None;
1368}
1369
1370static PyObject *
Skip Montanaro577c7a72003-04-12 19:17:14 +00001371csv_unregister_dialect(PyObject *module, PyObject *name_obj)
Skip Montanarob4a04172003-03-20 23:29:12 +00001372{
Skip Montanarob4a04172003-03-20 23:29:12 +00001373 if (PyDict_DelItem(dialects, name_obj) < 0)
1374 return PyErr_Format(error_obj, "unknown dialect");
1375 Py_INCREF(Py_None);
1376 return Py_None;
1377}
1378
1379static PyObject *
Skip Montanaro577c7a72003-04-12 19:17:14 +00001380csv_get_dialect(PyObject *module, PyObject *name_obj)
Skip Montanarob4a04172003-03-20 23:29:12 +00001381{
Skip Montanarob4a04172003-03-20 23:29:12 +00001382 return get_dialect_from_registry(name_obj);
1383}
1384
1385/*
1386 * MODULE
1387 */
1388
1389PyDoc_STRVAR(csv_module_doc,
1390"CSV parsing and writing.\n"
1391"\n"
1392"This module provides classes that assist in the reading and writing\n"
1393"of Comma Separated Value (CSV) files, and implements the interface\n"
1394"described by PEP 305. Although many CSV files are simple to parse,\n"
1395"the format is not formally defined by a stable specification and\n"
1396"is subtle enough that parsing lines of a CSV file with something\n"
1397"like line.split(\",\") is bound to fail. The module supports three\n"
1398"basic APIs: reading, writing, and registration of dialects.\n"
1399"\n"
1400"\n"
1401"DIALECT REGISTRATION:\n"
1402"\n"
1403"Readers and writers support a dialect argument, which is a convenient\n"
1404"handle on a group of settings. When the dialect argument is a string,\n"
1405"it identifies one of the dialects previously registered with the module.\n"
1406"If it is a class or instance, the attributes of the argument are used as\n"
1407"the settings for the reader or writer:\n"
1408"\n"
1409" class excel:\n"
1410" delimiter = ','\n"
1411" quotechar = '\"'\n"
1412" escapechar = None\n"
1413" doublequote = True\n"
1414" skipinitialspace = False\n"
1415" lineterminator = '\r\n'\n"
1416" quoting = QUOTE_MINIMAL\n"
1417"\n"
1418"SETTINGS:\n"
1419"\n"
1420" * quotechar - specifies a one-character string to use as the \n"
1421" quoting character. It defaults to '\"'.\n"
1422" * delimiter - specifies a one-character string to use as the \n"
1423" field separator. It defaults to ','.\n"
1424" * skipinitialspace - specifies how to interpret whitespace which\n"
1425" immediately follows a delimiter. It defaults to False, which\n"
1426" means that whitespace immediately following a delimiter is part\n"
1427" of the following field.\n"
1428" * lineterminator - specifies the character sequence which should \n"
1429" terminate rows.\n"
1430" * quoting - controls when quotes should be generated by the writer.\n"
1431" It can take on any of the following module constants:\n"
1432"\n"
1433" csv.QUOTE_MINIMAL means only when required, for example, when a\n"
1434" field contains either the quotechar or the delimiter\n"
1435" csv.QUOTE_ALL means that quotes are always placed around fields.\n"
1436" csv.QUOTE_NONNUMERIC means that quotes are always placed around\n"
1437" fields which contain characters other than [+-0-9.].\n"
1438" csv.QUOTE_NONE means that quotes are never placed around fields.\n"
1439" * escapechar - specifies a one-character string used to escape \n"
1440" the delimiter when quoting is set to QUOTE_NONE.\n"
1441" * doublequote - controls the handling of quotes inside fields. When\n"
1442" True, two consecutive quotes are interpreted as one during read,\n"
1443" and when writing, each quote character embedded in the data is\n"
1444" written as two quotes\n");
1445
1446PyDoc_STRVAR(csv_reader_doc,
1447" csv_reader = reader(iterable [, dialect='excel']\n"
1448" [optional keyword args])\n"
1449" for row in csv_reader:\n"
1450" process(row)\n"
1451"\n"
1452"The \"iterable\" argument can be any object that returns a line\n"
1453"of input for each iteration, such as a file object or a list. The\n"
1454"optional \"dialect\" parameter is discussed below. The function\n"
1455"also accepts optional keyword arguments which override settings\n"
1456"provided by the dialect.\n"
1457"\n"
1458"The returned object is an iterator. Each iteration returns a row\n"
1459 "of the CSV file (which can span multiple input lines):\n");
1460
1461PyDoc_STRVAR(csv_writer_doc,
1462" csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1463" [optional keyword args])\n"
1464" for row in csv_writer:\n"
1465" csv_writer.writerow(row)\n"
1466"\n"
1467" [or]\n"
1468"\n"
1469" csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1470" [optional keyword args])\n"
1471" csv_writer.writerows(rows)\n"
1472"\n"
1473"The \"fileobj\" argument can be any object that supports the file API.\n");
1474
1475PyDoc_STRVAR(csv_list_dialects_doc,
1476"Return a list of all know dialect names.\n"
1477" names = csv.list_dialects()");
1478
1479PyDoc_STRVAR(csv_get_dialect_doc,
1480"Return the dialect instance associated with name.\n"
1481" dialect = csv.get_dialect(name)");
1482
1483PyDoc_STRVAR(csv_register_dialect_doc,
1484"Create a mapping from a string name to a dialect class.\n"
1485" dialect = csv.register_dialect(name, dialect)");
1486
1487PyDoc_STRVAR(csv_unregister_dialect_doc,
1488"Delete the name/dialect mapping associated with a string name.\n"
1489" csv.unregister_dialect(name)");
1490
1491static struct PyMethodDef csv_methods[] = {
1492 { "reader", (PyCFunction)csv_reader,
1493 METH_VARARGS | METH_KEYWORDS, csv_reader_doc},
1494 { "writer", (PyCFunction)csv_writer,
1495 METH_VARARGS | METH_KEYWORDS, csv_writer_doc},
1496 { "list_dialects", (PyCFunction)csv_list_dialects,
1497 METH_NOARGS, csv_list_dialects_doc},
1498 { "register_dialect", (PyCFunction)csv_register_dialect,
1499 METH_VARARGS, csv_register_dialect_doc},
1500 { "unregister_dialect", (PyCFunction)csv_unregister_dialect,
Skip Montanaro577c7a72003-04-12 19:17:14 +00001501 METH_O, csv_unregister_dialect_doc},
Skip Montanarob4a04172003-03-20 23:29:12 +00001502 { "get_dialect", (PyCFunction)csv_get_dialect,
Skip Montanaro577c7a72003-04-12 19:17:14 +00001503 METH_O, csv_get_dialect_doc},
Skip Montanarob4a04172003-03-20 23:29:12 +00001504 { NULL, NULL }
1505};
1506
1507PyMODINIT_FUNC
1508init_csv(void)
1509{
1510 PyObject *module;
Skip Montanarob4a04172003-03-20 23:29:12 +00001511 StyleDesc *style;
1512
1513 if (PyType_Ready(&Dialect_Type) < 0)
1514 return;
1515
1516 if (PyType_Ready(&Reader_Type) < 0)
1517 return;
1518
1519 if (PyType_Ready(&Writer_Type) < 0)
1520 return;
1521
1522 /* Create the module and add the functions */
1523 module = Py_InitModule3("_csv", csv_methods, csv_module_doc);
1524 if (module == NULL)
1525 return;
1526
1527 /* Add version to the module. */
Skip Montanaro7b01a832003-04-12 19:23:46 +00001528 if (PyModule_AddStringConstant(module, "__version__",
1529 MODULE_VERSION) == -1)
Skip Montanarob4a04172003-03-20 23:29:12 +00001530 return;
1531
1532 /* Add _dialects dictionary */
1533 dialects = PyDict_New();
1534 if (dialects == NULL)
1535 return;
1536 if (PyModule_AddObject(module, "_dialects", dialects))
1537 return;
1538
1539 /* Add quote styles into dictionary */
1540 for (style = quote_styles; style->name; style++) {
Skip Montanaro7b01a832003-04-12 19:23:46 +00001541 if (PyModule_AddIntConstant(module, style->name,
1542 style->style) == -1)
Skip Montanarob4a04172003-03-20 23:29:12 +00001543 return;
1544 }
1545
1546 /* Add the Dialect type */
1547 if (PyModule_AddObject(module, "Dialect", (PyObject *)&Dialect_Type))
1548 return;
1549
1550 /* Add the CSV exception object to the module. */
1551 error_obj = PyErr_NewException("_csv.Error", NULL, NULL);
1552 if (error_obj == NULL)
1553 return;
1554 PyModule_AddObject(module, "Error", error_obj);
1555}