blob: ab9add2b77174b06777f8178a1747615ad7396f7 [file] [log] [blame]
Skip Montanaroa16b21f2003-03-23 14:32:54 +00001/* csv module */
2
3/*
4
5This module provides the low-level underpinnings of a CSV reading/writing
6module. Users should not use this module directly, but import the csv.py
7module instead.
8
9**** For people modifying this code, please note that as of this writing
Skip Montanarodfa35fa2003-04-11 21:40:01 +000010**** (2003-03-23), it is intended that this code should work with Python
Skip Montanaroa16b21f2003-03-23 14:32:54 +000011**** 2.2.
12
Skip Montanarob4a04172003-03-20 23:29:12 +000013*/
14
Skip Montanaro7b01a832003-04-12 19:23:46 +000015#define MODULE_VERSION "1.0"
16
Skip Montanarob4a04172003-03-20 23:29:12 +000017#include "Python.h"
18#include "structmember.h"
19
Skip Montanaroa16b21f2003-03-23 14:32:54 +000020
Skip Montanarob4a04172003-03-20 23:29:12 +000021/* begin 2.2 compatibility macros */
22#ifndef PyDoc_STRVAR
23/* Define macros for inline documentation. */
24#define PyDoc_VAR(name) static char name[]
25#define PyDoc_STRVAR(name,str) PyDoc_VAR(name) = PyDoc_STR(str)
26#ifdef WITH_DOC_STRINGS
27#define PyDoc_STR(str) str
28#else
29#define PyDoc_STR(str) ""
30#endif
31#endif /* ifndef PyDoc_STRVAR */
32
33#ifndef PyMODINIT_FUNC
34# if defined(__cplusplus)
35# define PyMODINIT_FUNC extern "C" void
36# else /* __cplusplus */
37# define PyMODINIT_FUNC void
38# endif /* __cplusplus */
39#endif
40/* end 2.2 compatibility macros */
41
42static PyObject *error_obj; /* CSV exception */
43static PyObject *dialects; /* Dialect registry */
44
45typedef enum {
46 START_RECORD, START_FIELD, ESCAPED_CHAR, IN_FIELD,
47 IN_QUOTED_FIELD, ESCAPE_IN_QUOTED_FIELD, QUOTE_IN_QUOTED_FIELD
48} ParserState;
49
50typedef enum {
51 QUOTE_MINIMAL, QUOTE_ALL, QUOTE_NONNUMERIC, QUOTE_NONE
52} QuoteStyle;
53
54typedef struct {
55 QuoteStyle style;
56 char *name;
57} StyleDesc;
58
59static StyleDesc quote_styles[] = {
60 { QUOTE_MINIMAL, "QUOTE_MINIMAL" },
61 { QUOTE_ALL, "QUOTE_ALL" },
62 { QUOTE_NONNUMERIC, "QUOTE_NONNUMERIC" },
63 { QUOTE_NONE, "QUOTE_NONE" },
64 { 0 }
65};
66
67typedef struct {
68 PyObject_HEAD
69
70 int doublequote; /* is " represented by ""? */
71 char delimiter; /* field separator */
72 char quotechar; /* quote character */
73 char escapechar; /* escape character */
74 int skipinitialspace; /* ignore spaces following delimiter? */
75 PyObject *lineterminator; /* string to write between records */
76 QuoteStyle quoting; /* style of quoting to write */
77
78 int strict; /* raise exception on bad CSV */
79} DialectObj;
80
81staticforward PyTypeObject Dialect_Type;
82
83typedef struct {
84 PyObject_HEAD
85
86 PyObject *input_iter; /* iterate over this for input lines */
87
88 DialectObj *dialect; /* parsing dialect */
89
90 PyObject *fields; /* field list for current record */
91 ParserState state; /* current CSV parse state */
92 char *field; /* build current field in here */
93 int field_size; /* size of allocated buffer */
94 int field_len; /* length of current field */
95 int had_parse_error; /* did we have a parse error? */
96} ReaderObj;
97
98staticforward PyTypeObject Reader_Type;
99
100#define ReaderObject_Check(v) ((v)->ob_type == &Reader_Type)
101
102typedef struct {
103 PyObject_HEAD
104
105 PyObject *writeline; /* write output lines to this file */
106
107 DialectObj *dialect; /* parsing dialect */
108
109 char *rec; /* buffer for parser.join */
110 int rec_size; /* size of allocated record */
111 int rec_len; /* length of record */
112 int num_fields; /* number of fields in record */
113} WriterObj;
114
115staticforward PyTypeObject Writer_Type;
116
117/*
118 * DIALECT class
119 */
120
121static PyObject *
122get_dialect_from_registry(PyObject * name_obj)
123{
124 PyObject *dialect_obj;
125
126 dialect_obj = PyDict_GetItem(dialects, name_obj);
127 if (dialect_obj == NULL)
128 return PyErr_Format(error_obj, "unknown dialect");
129 Py_INCREF(dialect_obj);
130 return dialect_obj;
131}
132
133static int
134check_delattr(PyObject *v)
135{
136 if (v == NULL) {
137 PyErr_SetString(PyExc_TypeError,
138 "Cannot delete attribute");
139 return -1;
140 }
141 return 0;
142}
143
144static PyObject *
145get_string(PyObject *str)
146{
147 Py_XINCREF(str);
148 return str;
149}
150
151static int
152set_string(PyObject **str, PyObject *v)
153{
154 if (check_delattr(v) < 0)
155 return -1;
Skip Montanaro860fc0b2003-04-12 18:57:52 +0000156 if (!PyString_Check(v)
157#ifdef Py_USING_UNICODE
158&& !PyUnicode_Check(v)
159#endif
160) {
Skip Montanarob4a04172003-03-20 23:29:12 +0000161 PyErr_BadArgument();
162 return -1;
163 }
164 Py_XDECREF(*str);
165 Py_INCREF(v);
166 *str = v;
167 return 0;
168}
169
170static PyObject *
171get_nullchar_as_None(char c)
172{
173 if (c == '\0') {
174 Py_INCREF(Py_None);
175 return Py_None;
176 }
177 else
178 return PyString_FromStringAndSize((char*)&c, 1);
179}
180
181static int
182set_None_as_nullchar(char * addr, PyObject *v)
183{
184 if (check_delattr(v) < 0)
185 return -1;
186 if (v == Py_None)
187 *addr = '\0';
188 else if (!PyString_Check(v) || PyString_Size(v) != 1) {
189 PyErr_BadArgument();
190 return -1;
191 }
Skip Montanaro577c7a72003-04-12 19:17:14 +0000192 else {
193 char *s = PyString_AsString(v);
194 if (s == NULL)
195 return -1;
196 *addr = s[0];
197 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000198 return 0;
199}
200
201static PyObject *
202Dialect_get_lineterminator(DialectObj *self)
203{
204 return get_string(self->lineterminator);
205}
206
207static int
208Dialect_set_lineterminator(DialectObj *self, PyObject *value)
209{
210 return set_string(&self->lineterminator, value);
211}
212
213static PyObject *
214Dialect_get_escapechar(DialectObj *self)
215{
216 return get_nullchar_as_None(self->escapechar);
217}
218
219static int
220Dialect_set_escapechar(DialectObj *self, PyObject *value)
221{
222 return set_None_as_nullchar(&self->escapechar, value);
223}
224
225static PyObject *
226Dialect_get_quoting(DialectObj *self)
227{
228 return PyInt_FromLong(self->quoting);
229}
230
231static int
232Dialect_set_quoting(DialectObj *self, PyObject *v)
233{
234 int quoting;
235 StyleDesc *qs = quote_styles;
236
237 if (check_delattr(v) < 0)
238 return -1;
239 if (!PyInt_Check(v)) {
240 PyErr_BadArgument();
241 return -1;
242 }
243 quoting = PyInt_AsLong(v);
244 for (qs = quote_styles; qs->name; qs++) {
245 if (qs->style == quoting) {
246 self->quoting = quoting;
247 return 0;
248 }
249 }
250 PyErr_BadArgument();
251 return -1;
252}
253
254static struct PyMethodDef Dialect_methods[] = {
255 { NULL, NULL }
256};
257
258#define D_OFF(x) offsetof(DialectObj, x)
259
260static struct PyMemberDef Dialect_memberlist[] = {
261 { "quotechar", T_CHAR, D_OFF(quotechar) },
262 { "delimiter", T_CHAR, D_OFF(delimiter) },
263 { "skipinitialspace", T_INT, D_OFF(skipinitialspace) },
264 { "doublequote", T_INT, D_OFF(doublequote) },
265 { "strict", T_INT, D_OFF(strict) },
266 { NULL }
267};
268
269static PyGetSetDef Dialect_getsetlist[] = {
270 { "escapechar", (getter)Dialect_get_escapechar,
271 (setter)Dialect_set_escapechar },
272 { "lineterminator", (getter)Dialect_get_lineterminator,
273 (setter)Dialect_set_lineterminator },
274 { "quoting", (getter)Dialect_get_quoting,
275 (setter)Dialect_set_quoting },
276 {NULL},
277};
278
279static void
280Dialect_dealloc(DialectObj *self)
281{
282 Py_XDECREF(self->lineterminator);
Skip Montanarob4a04172003-03-20 23:29:12 +0000283 self->ob_type->tp_free((PyObject *)self);
284}
285
286static int
287dialect_init(DialectObj * self, PyObject * args, PyObject * kwargs)
288{
289 PyObject *dialect = NULL, *name_obj, *value_obj;
290
291 self->quotechar = '"';
292 self->delimiter = ',';
293 self->escapechar = '\0';
294 self->skipinitialspace = 0;
295 Py_XDECREF(self->lineterminator);
296 self->lineterminator = PyString_FromString("\r\n");
297 if (self->lineterminator == NULL)
298 return -1;
299 self->quoting = QUOTE_MINIMAL;
300 self->doublequote = 1;
301 self->strict = 0;
302
303 if (!PyArg_ParseTuple(args, "|O", &dialect))
304 return -1;
305 Py_XINCREF(dialect);
306 if (kwargs != NULL) {
307 PyObject * key = PyString_FromString("dialect");
308 PyObject * d;
309
310 d = PyDict_GetItem(kwargs, key);
311 if (d) {
312 Py_INCREF(d);
313 Py_XDECREF(dialect);
314 PyDict_DelItem(kwargs, key);
315 dialect = d;
316 }
317 Py_DECREF(key);
318 }
319 if (dialect != NULL) {
320 int i;
321 PyObject * dir_list;
322
323 /* If dialect is a string, look it up in our registry */
Skip Montanaro860fc0b2003-04-12 18:57:52 +0000324 if (PyString_Check(dialect)
325#ifdef Py_USING_UNICODE
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000326 || PyUnicode_Check(dialect)
Skip Montanaro860fc0b2003-04-12 18:57:52 +0000327#endif
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000328 ) {
Skip Montanarob4a04172003-03-20 23:29:12 +0000329 PyObject * new_dia;
330 new_dia = get_dialect_from_registry(dialect);
331 Py_DECREF(dialect);
332 if (new_dia == NULL)
333 return -1;
334 dialect = new_dia;
335 }
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000336 /* A class rather than an instance? Instantiate */
Skip Montanarob4a04172003-03-20 23:29:12 +0000337 if (PyObject_TypeCheck(dialect, &PyClass_Type)) {
338 PyObject * new_dia;
339 new_dia = PyObject_CallFunction(dialect, "");
340 Py_DECREF(dialect);
341 if (new_dia == NULL)
342 return -1;
343 dialect = new_dia;
344 }
345 /* Make sure we finally have an instance */
346 if (!PyInstance_Check(dialect) ||
347 (dir_list = PyObject_Dir(dialect)) == NULL) {
348 PyErr_SetString(PyExc_TypeError,
349 "dialect must be an instance");
350 Py_DECREF(dialect);
351 return -1;
352 }
353 /* And extract the attributes */
354 for (i = 0; i < PyList_GET_SIZE(dir_list); ++i) {
Tim Peters38fc8372003-04-13 03:25:15 +0000355 char *s;
Skip Montanarob4a04172003-03-20 23:29:12 +0000356 name_obj = PyList_GET_ITEM(dir_list, i);
Tim Peters38fc8372003-04-13 03:25:15 +0000357 s = PyString_AsString(name_obj);
Skip Montanaro577c7a72003-04-12 19:17:14 +0000358 if (s == NULL)
359 return -1;
360 if (s[0] == '_')
Skip Montanarob4a04172003-03-20 23:29:12 +0000361 continue;
362 value_obj = PyObject_GetAttr(dialect, name_obj);
363 if (value_obj) {
364 if (PyObject_SetAttr((PyObject *)self,
365 name_obj, value_obj)) {
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000366 Py_DECREF(value_obj);
Skip Montanarob4a04172003-03-20 23:29:12 +0000367 Py_DECREF(dir_list);
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000368 Py_DECREF(dialect);
Skip Montanarob4a04172003-03-20 23:29:12 +0000369 return -1;
370 }
371 Py_DECREF(value_obj);
372 }
373 }
374 Py_DECREF(dir_list);
375 Py_DECREF(dialect);
376 }
377 if (kwargs != NULL) {
378 int pos = 0;
379
380 while (PyDict_Next(kwargs, &pos, &name_obj, &value_obj)) {
381 if (PyObject_SetAttr((PyObject *)self,
382 name_obj, value_obj))
383 return -1;
384 }
385 }
386 return 0;
387}
388
389static PyObject *
390dialect_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
391{
392 DialectObj *self;
393 self = (DialectObj *)type->tp_alloc(type, 0);
394 if (self != NULL) {
395 self->lineterminator = NULL;
396 }
397 return (PyObject *)self;
398}
399
400
401PyDoc_STRVAR(Dialect_Type_doc,
402"CSV dialect\n"
403"\n"
404"The Dialect type records CSV parsing and generation options.\n");
405
406static PyTypeObject Dialect_Type = {
407 PyObject_HEAD_INIT(NULL)
408 0, /* ob_size */
409 "_csv.Dialect", /* tp_name */
410 sizeof(DialectObj), /* tp_basicsize */
411 0, /* tp_itemsize */
412 /* methods */
413 (destructor)Dialect_dealloc, /* tp_dealloc */
414 (printfunc)0, /* tp_print */
415 (getattrfunc)0, /* tp_getattr */
416 (setattrfunc)0, /* tp_setattr */
417 (cmpfunc)0, /* tp_compare */
418 (reprfunc)0, /* tp_repr */
419 0, /* tp_as_number */
420 0, /* tp_as_sequence */
421 0, /* tp_as_mapping */
422 (hashfunc)0, /* tp_hash */
423 (ternaryfunc)0, /* tp_call */
424 (reprfunc)0, /* tp_str */
425 0, /* tp_getattro */
426 0, /* tp_setattro */
427 0, /* tp_as_buffer */
428 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
429 Dialect_Type_doc, /* tp_doc */
430 0, /* tp_traverse */
431 0, /* tp_clear */
432 0, /* tp_richcompare */
433 0, /* tp_weaklistoffset */
434 0, /* tp_iter */
435 0, /* tp_iternext */
436 Dialect_methods, /* tp_methods */
437 Dialect_memberlist, /* tp_members */
438 Dialect_getsetlist, /* tp_getset */
439 0, /* tp_base */
440 0, /* tp_dict */
441 0, /* tp_descr_get */
442 0, /* tp_descr_set */
443 0, /* tp_dictoffset */
444 (initproc)dialect_init, /* tp_init */
445 PyType_GenericAlloc, /* tp_alloc */
446 dialect_new, /* tp_new */
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000447 0, /* tp_free */
Skip Montanarob4a04172003-03-20 23:29:12 +0000448};
449
450static void
451parse_save_field(ReaderObj *self)
452{
453 PyObject *field;
454
455 field = PyString_FromStringAndSize(self->field, self->field_len);
456 if (field != NULL) {
457 PyList_Append(self->fields, field);
458 Py_XDECREF(field);
459 }
460 self->field_len = 0;
461}
462
463static int
464parse_grow_buff(ReaderObj *self)
465{
466 if (self->field_size == 0) {
467 self->field_size = 4096;
468 self->field = PyMem_Malloc(self->field_size);
469 }
470 else {
471 self->field_size *= 2;
472 self->field = PyMem_Realloc(self->field, self->field_size);
473 }
474 if (self->field == NULL) {
475 PyErr_NoMemory();
476 return 0;
477 }
478 return 1;
479}
480
481static void
482parse_add_char(ReaderObj *self, char c)
483{
484 if (self->field_len == self->field_size && !parse_grow_buff(self))
485 return;
486 self->field[self->field_len++] = c;
487}
488
489static void
490parse_process_char(ReaderObj *self, char c)
491{
492 DialectObj *dialect = self->dialect;
493
494 switch (self->state) {
495 case START_RECORD:
496 /* start of record */
497 if (c == '\n')
498 /* empty line - return [] */
499 break;
500 /* normal character - handle as START_FIELD */
501 self->state = START_FIELD;
502 /* fallthru */
503 case START_FIELD:
504 /* expecting field */
505 if (c == '\n') {
506 /* save empty field - return [fields] */
507 parse_save_field(self);
508 self->state = START_RECORD;
509 }
510 else if (c == dialect->quotechar) {
511 /* start quoted field */
512 self->state = IN_QUOTED_FIELD;
513 }
514 else if (c == dialect->escapechar) {
515 /* possible escaped character */
516 self->state = ESCAPED_CHAR;
517 }
518 else if (c == ' ' && dialect->skipinitialspace)
519 /* ignore space at start of field */
520 ;
521 else if (c == dialect->delimiter) {
522 /* save empty field */
523 parse_save_field(self);
524 }
525 else {
526 /* begin new unquoted field */
527 parse_add_char(self, c);
528 self->state = IN_FIELD;
529 }
530 break;
531
532 case ESCAPED_CHAR:
533 if (c != dialect->escapechar &&
534 c != dialect->delimiter &&
535 c != dialect->quotechar)
536 parse_add_char(self, dialect->escapechar);
537 parse_add_char(self, c);
538 self->state = IN_FIELD;
539 break;
540
541 case IN_FIELD:
542 /* in unquoted field */
543 if (c == '\n') {
544 /* end of line - return [fields] */
545 parse_save_field(self);
546 self->state = START_RECORD;
547 }
548 else if (c == dialect->escapechar) {
549 /* possible escaped character */
550 self->state = ESCAPED_CHAR;
551 }
552 else if (c == dialect->delimiter) {
553 /* save field - wait for new field */
554 parse_save_field(self);
555 self->state = START_FIELD;
556 }
557 else {
558 /* normal character - save in field */
559 parse_add_char(self, c);
560 }
561 break;
562
563 case IN_QUOTED_FIELD:
564 /* in quoted field */
565 if (c == '\n') {
566 /* end of line - save '\n' in field */
567 parse_add_char(self, '\n');
568 }
569 else if (c == dialect->escapechar) {
570 /* Possible escape character */
571 self->state = ESCAPE_IN_QUOTED_FIELD;
572 }
573 else if (c == dialect->quotechar) {
574 if (dialect->doublequote) {
575 /* doublequote; " represented by "" */
576 self->state = QUOTE_IN_QUOTED_FIELD;
577 }
578 else {
579 /* end of quote part of field */
580 self->state = IN_FIELD;
581 }
582 }
583 else {
584 /* normal character - save in field */
585 parse_add_char(self, c);
586 }
587 break;
588
589 case ESCAPE_IN_QUOTED_FIELD:
590 if (c != dialect->escapechar &&
591 c != dialect->delimiter &&
592 c != dialect->quotechar)
593 parse_add_char(self, dialect->escapechar);
594 parse_add_char(self, c);
595 self->state = IN_QUOTED_FIELD;
596 break;
597
598 case QUOTE_IN_QUOTED_FIELD:
599 /* doublequote - seen a quote in an quoted field */
600 if (dialect->quoting != QUOTE_NONE &&
601 c == dialect->quotechar) {
602 /* save "" as " */
603 parse_add_char(self, c);
604 self->state = IN_QUOTED_FIELD;
605 }
606 else if (c == dialect->delimiter) {
607 /* save field - wait for new field */
608 parse_save_field(self);
609 self->state = START_FIELD;
610 }
611 else if (c == '\n') {
612 /* end of line - return [fields] */
613 parse_save_field(self);
614 self->state = START_RECORD;
615 }
616 else if (!dialect->strict) {
617 parse_add_char(self, c);
618 self->state = IN_FIELD;
619 }
620 else {
621 /* illegal */
622 self->had_parse_error = 1;
623 PyErr_Format(error_obj, "%c expected after %c",
624 dialect->delimiter,
625 dialect->quotechar);
626 }
627 break;
628
629 }
630}
631
632/*
633 * READER
634 */
635#define R_OFF(x) offsetof(ReaderObj, x)
636
637static struct PyMemberDef Reader_memberlist[] = {
638 { "dialect", T_OBJECT, R_OFF(dialect), RO },
639 { NULL }
640};
641
642static PyObject *
643Reader_getiter(ReaderObj *self)
644{
645 Py_INCREF(self);
646 return (PyObject *)self;
647}
648
649static PyObject *
650Reader_iternext(ReaderObj *self)
651{
652 PyObject *lineobj;
653 PyObject *fields;
654 char *line;
655
656 do {
657 lineobj = PyIter_Next(self->input_iter);
658 if (lineobj == NULL) {
659 /* End of input OR exception */
660 if (!PyErr_Occurred() && self->field_len != 0)
661 return PyErr_Format(error_obj,
662 "newline inside string");
663 return NULL;
664 }
665
666 if (self->had_parse_error) {
667 if (self->fields) {
668 Py_XDECREF(self->fields);
669 }
670 self->fields = PyList_New(0);
671 self->field_len = 0;
672 self->state = START_RECORD;
673 self->had_parse_error = 0;
674 }
675 line = PyString_AsString(lineobj);
676
677 if (line == NULL) {
678 Py_DECREF(lineobj);
679 return NULL;
680 }
Tim Petersef4b7ed2003-03-21 01:35:28 +0000681 if (strlen(line) < (size_t)PyString_GET_SIZE(lineobj)) {
Skip Montanarob4a04172003-03-20 23:29:12 +0000682 self->had_parse_error = 1;
683 Py_DECREF(lineobj);
684 return PyErr_Format(error_obj,
685 "string with NUL bytes");
686 }
687
688 /* Process line of text - send '\n' to processing code to
689 represent end of line. End of line which is not at end of
690 string is an error. */
691 while (*line) {
692 char c;
693
694 c = *line++;
695 if (c == '\r') {
696 c = *line++;
697 if (c == '\0')
698 /* macintosh end of line */
699 break;
700 if (c == '\n') {
701 c = *line++;
702 if (c == '\0')
703 /* DOS end of line */
704 break;
705 }
706 self->had_parse_error = 1;
707 Py_DECREF(lineobj);
708 return PyErr_Format(error_obj,
709 "newline inside string");
710 }
711 if (c == '\n') {
712 c = *line++;
713 if (c == '\0')
714 /* unix end of line */
715 break;
716 self->had_parse_error = 1;
717 Py_DECREF(lineobj);
718 return PyErr_Format(error_obj,
719 "newline inside string");
720 }
721 parse_process_char(self, c);
722 if (PyErr_Occurred()) {
723 Py_DECREF(lineobj);
724 return NULL;
725 }
726 }
727 parse_process_char(self, '\n');
728 Py_DECREF(lineobj);
729 } while (self->state != START_RECORD);
730
731 fields = self->fields;
732 self->fields = PyList_New(0);
733 return fields;
734}
735
736static void
737Reader_dealloc(ReaderObj *self)
738{
739 Py_XDECREF(self->dialect);
740 Py_XDECREF(self->input_iter);
741 Py_XDECREF(self->fields);
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000742 PyObject_GC_Del(self);
743}
744
745static int
746Reader_traverse(ReaderObj *self, visitproc visit, void *arg)
747{
748 int err;
749#define VISIT(SLOT) \
750 if (SLOT) { \
751 err = visit((PyObject *)(SLOT), arg); \
752 if (err) \
753 return err; \
754 }
755 VISIT(self->dialect);
756 VISIT(self->input_iter);
757 VISIT(self->fields);
758 return 0;
759}
760
761static int
762Reader_clear(ReaderObj *self)
763{
764 Py_XDECREF(self->dialect);
765 Py_XDECREF(self->input_iter);
766 Py_XDECREF(self->fields);
767 self->dialect = NULL;
768 self->input_iter = NULL;
769 self->fields = NULL;
770 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000771}
772
773PyDoc_STRVAR(Reader_Type_doc,
774"CSV reader\n"
775"\n"
776"Reader objects are responsible for reading and parsing tabular data\n"
777"in CSV format.\n"
778);
779
780static struct PyMethodDef Reader_methods[] = {
781 { NULL, NULL }
782};
783
784static PyTypeObject Reader_Type = {
785 PyObject_HEAD_INIT(NULL)
786 0, /*ob_size*/
787 "_csv.reader", /*tp_name*/
788 sizeof(ReaderObj), /*tp_basicsize*/
789 0, /*tp_itemsize*/
790 /* methods */
791 (destructor)Reader_dealloc, /*tp_dealloc*/
792 (printfunc)0, /*tp_print*/
793 (getattrfunc)0, /*tp_getattr*/
794 (setattrfunc)0, /*tp_setattr*/
795 (cmpfunc)0, /*tp_compare*/
796 (reprfunc)0, /*tp_repr*/
797 0, /*tp_as_number*/
798 0, /*tp_as_sequence*/
799 0, /*tp_as_mapping*/
800 (hashfunc)0, /*tp_hash*/
801 (ternaryfunc)0, /*tp_call*/
802 (reprfunc)0, /*tp_str*/
803 0, /*tp_getattro*/
804 0, /*tp_setattro*/
805 0, /*tp_as_buffer*/
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000806 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
807 Py_TPFLAGS_HAVE_GC, /*tp_flags*/
Skip Montanarob4a04172003-03-20 23:29:12 +0000808 Reader_Type_doc, /*tp_doc*/
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000809 (traverseproc)Reader_traverse, /*tp_traverse*/
810 (inquiry)Reader_clear, /*tp_clear*/
Skip Montanarob4a04172003-03-20 23:29:12 +0000811 0, /*tp_richcompare*/
812 0, /*tp_weaklistoffset*/
813 (getiterfunc)Reader_getiter, /*tp_iter*/
814 (getiterfunc)Reader_iternext, /*tp_iternext*/
815 Reader_methods, /*tp_methods*/
816 Reader_memberlist, /*tp_members*/
817 0, /*tp_getset*/
818
819};
820
821static PyObject *
822csv_reader(PyObject *module, PyObject *args, PyObject *keyword_args)
823{
824 PyObject * iterator, * dialect = NULL, *ctor_args;
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000825 ReaderObj * self = PyObject_GC_New(ReaderObj, &Reader_Type);
Skip Montanarob4a04172003-03-20 23:29:12 +0000826
827 if (!self)
828 return NULL;
829
830 self->dialect = NULL;
831 self->input_iter = self->fields = NULL;
832
833 self->fields = NULL;
834 self->input_iter = NULL;
835 self->had_parse_error = 0;
836 self->field = NULL;
837 self->field_size = 0;
838 self->field_len = 0;
839 self->state = START_RECORD;
840
841 if (!PyArg_ParseTuple(args, "O|O", &iterator, &dialect)) {
842 Py_DECREF(self);
843 return NULL;
844 }
845 self->input_iter = PyObject_GetIter(iterator);
846 if (self->input_iter == NULL) {
847 PyErr_SetString(PyExc_TypeError,
848 "argument 1 must be an iterator");
849 Py_DECREF(self);
850 return NULL;
851 }
852 ctor_args = Py_BuildValue(dialect ? "(O)" : "()", dialect);
853 if (ctor_args == NULL) {
854 Py_DECREF(self);
855 return NULL;
856 }
857 self->dialect = (DialectObj *)PyObject_Call((PyObject *)&Dialect_Type,
858 ctor_args, keyword_args);
859 Py_DECREF(ctor_args);
860 if (self->dialect == NULL) {
861 Py_DECREF(self);
862 return NULL;
863 }
864 self->fields = PyList_New(0);
865 if (self->fields == NULL) {
866 Py_DECREF(self);
867 return NULL;
868 }
869
870 return (PyObject *)self;
871}
872
873/*
874 * WRITER
875 */
876/* ---------------------------------------------------------------- */
877static void
878join_reset(WriterObj *self)
879{
880 self->rec_len = 0;
881 self->num_fields = 0;
882}
883
884#define MEM_INCR 32768
885
886/* Calculate new record length or append field to record. Return new
887 * record length.
888 */
889static int
890join_append_data(WriterObj *self, char *field, int quote_empty,
891 int *quoted, int copy_phase)
892{
893 DialectObj *dialect = self->dialect;
894 int i, rec_len;
895
896 rec_len = self->rec_len;
897
898 /* If this is not the first field we need a field separator.
899 */
900 if (self->num_fields > 0) {
901 if (copy_phase)
902 self->rec[rec_len] = dialect->delimiter;
903 rec_len++;
904 }
905 /* Handle preceding quote.
906 */
907 switch (dialect->quoting) {
908 case QUOTE_ALL:
909 *quoted = 1;
910 if (copy_phase)
911 self->rec[rec_len] = dialect->quotechar;
912 rec_len++;
913 break;
914 case QUOTE_MINIMAL:
915 case QUOTE_NONNUMERIC:
916 /* We only know about quoted in the copy phase.
917 */
918 if (copy_phase && *quoted) {
919 self->rec[rec_len] = dialect->quotechar;
920 rec_len++;
921 }
922 break;
923 case QUOTE_NONE:
924 break;
925 }
926 /* Copy/count field data.
927 */
928 for (i = 0;; i++) {
929 char c = field[i];
930
931 if (c == '\0')
932 break;
933 /* If in doublequote mode we escape quote chars with a
934 * quote.
935 */
936 if (dialect->quoting != QUOTE_NONE &&
937 c == dialect->quotechar && dialect->doublequote) {
938 if (copy_phase)
939 self->rec[rec_len] = dialect->quotechar;
940 *quoted = 1;
941 rec_len++;
942 }
943
944 /* Some special characters need to be escaped. If we have a
945 * quote character switch to quoted field instead of escaping
946 * individual characters.
947 */
948 if (!*quoted
949 && (c == dialect->delimiter ||
950 c == dialect->escapechar ||
951 c == '\n' || c == '\r')) {
952 if (dialect->quoting != QUOTE_NONE)
953 *quoted = 1;
954 else if (dialect->escapechar) {
955 if (copy_phase)
956 self->rec[rec_len] = dialect->escapechar;
957 rec_len++;
958 }
959 else {
960 PyErr_Format(error_obj,
961 "delimiter must be quoted or escaped");
962 return -1;
963 }
964 }
965 /* Copy field character into record buffer.
966 */
967 if (copy_phase)
968 self->rec[rec_len] = c;
969 rec_len++;
970 }
971
972 /* If field is empty check if it needs to be quoted.
973 */
974 if (i == 0 && quote_empty) {
975 if (dialect->quoting == QUOTE_NONE) {
976 PyErr_Format(error_obj,
977 "single empty field record must be quoted");
978 return -1;
979 } else
980 *quoted = 1;
981 }
982
983 /* Handle final quote character on field.
984 */
985 if (*quoted) {
986 if (copy_phase)
987 self->rec[rec_len] = dialect->quotechar;
988 else
989 /* Didn't know about leading quote until we found it
990 * necessary in field data - compensate for it now.
991 */
992 rec_len++;
993 rec_len++;
994 }
995
996 return rec_len;
997}
998
999static int
1000join_check_rec_size(WriterObj *self, int rec_len)
1001{
1002 if (rec_len > self->rec_size) {
1003 if (self->rec_size == 0) {
1004 self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR;
1005 self->rec = PyMem_Malloc(self->rec_size);
1006 }
1007 else {
1008 char *old_rec = self->rec;
1009
1010 self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR;
1011 self->rec = PyMem_Realloc(self->rec, self->rec_size);
1012 if (self->rec == NULL)
1013 PyMem_Free(old_rec);
1014 }
1015 if (self->rec == NULL) {
1016 PyErr_NoMemory();
1017 return 0;
1018 }
1019 }
1020 return 1;
1021}
1022
1023static int
1024join_append(WriterObj *self, char *field, int *quoted, int quote_empty)
1025{
1026 int rec_len;
1027
1028 rec_len = join_append_data(self, field, quote_empty, quoted, 0);
1029 if (rec_len < 0)
1030 return 0;
1031
1032 /* grow record buffer if necessary */
1033 if (!join_check_rec_size(self, rec_len))
1034 return 0;
1035
1036 self->rec_len = join_append_data(self, field, quote_empty, quoted, 1);
1037 self->num_fields++;
1038
1039 return 1;
1040}
1041
1042static int
1043join_append_lineterminator(WriterObj *self)
1044{
1045 int terminator_len;
1046
1047 terminator_len = PyString_Size(self->dialect->lineterminator);
1048
1049 /* grow record buffer if necessary */
1050 if (!join_check_rec_size(self, self->rec_len + terminator_len))
1051 return 0;
1052
1053 memmove(self->rec + self->rec_len,
Skip Montanaro577c7a72003-04-12 19:17:14 +00001054 /* should not be NULL */
Skip Montanarob4a04172003-03-20 23:29:12 +00001055 PyString_AsString(self->dialect->lineterminator),
1056 terminator_len);
1057 self->rec_len += terminator_len;
1058
1059 return 1;
1060}
1061
1062PyDoc_STRVAR(csv_writerow_doc,
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001063"writerow(sequence)\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001064"\n"
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001065"Construct and write a CSV record from a sequence of fields. Non-string\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001066"elements will be converted to string.");
1067
1068static PyObject *
1069csv_writerow(WriterObj *self, PyObject *seq)
1070{
1071 DialectObj *dialect = self->dialect;
1072 int len, i;
1073
1074 if (!PySequence_Check(seq))
1075 return PyErr_Format(error_obj, "sequence expected");
1076
1077 len = PySequence_Length(seq);
1078 if (len < 0)
1079 return NULL;
1080
1081 /* Join all fields in internal buffer.
1082 */
1083 join_reset(self);
1084 for (i = 0; i < len; i++) {
1085 PyObject *field;
1086 int append_ok;
1087 int quoted;
1088
1089 field = PySequence_GetItem(seq, i);
1090 if (field == NULL)
1091 return NULL;
1092
1093 quoted = 0;
1094 if (dialect->quoting == QUOTE_NONNUMERIC) {
1095 PyObject *num;
1096
1097 num = PyNumber_Float(field);
1098 if (num == NULL) {
1099 quoted = 1;
1100 PyErr_Clear();
1101 }
1102 else {
1103 Py_DECREF(num);
1104 }
1105 }
1106
1107 if (PyString_Check(field)) {
Skip Montanaro577c7a72003-04-12 19:17:14 +00001108 append_ok = join_append(self,
1109 PyString_AS_STRING(field),
Skip Montanarob4a04172003-03-20 23:29:12 +00001110 &quoted, len == 1);
1111 Py_DECREF(field);
1112 }
1113 else if (field == Py_None) {
1114 append_ok = join_append(self, "", &quoted, len == 1);
1115 Py_DECREF(field);
1116 }
1117 else {
1118 PyObject *str;
1119
1120 str = PyObject_Str(field);
1121 Py_DECREF(field);
1122 if (str == NULL)
1123 return NULL;
1124
Skip Montanaro577c7a72003-04-12 19:17:14 +00001125 append_ok = join_append(self, PyString_AS_STRING(str),
Skip Montanarob4a04172003-03-20 23:29:12 +00001126 &quoted, len == 1);
1127 Py_DECREF(str);
1128 }
1129 if (!append_ok)
1130 return NULL;
1131 }
1132
1133 /* Add line terminator.
1134 */
1135 if (!join_append_lineterminator(self))
1136 return 0;
1137
1138 return PyObject_CallFunction(self->writeline,
1139 "(s#)", self->rec, self->rec_len);
1140}
1141
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001142PyDoc_STRVAR(csv_writerows_doc,
1143"writerows(sequence of sequences)\n"
1144"\n"
1145"Construct and write a series of sequences to a csv file. Non-string\n"
1146"elements will be converted to string.");
1147
Skip Montanarob4a04172003-03-20 23:29:12 +00001148static PyObject *
1149csv_writerows(WriterObj *self, PyObject *seqseq)
1150{
1151 PyObject *row_iter, *row_obj, *result;
1152
1153 row_iter = PyObject_GetIter(seqseq);
1154 if (row_iter == NULL) {
1155 PyErr_SetString(PyExc_TypeError,
Skip Montanaro98f16e02003-04-11 23:10:13 +00001156 "writerows() argument must be iterable");
Skip Montanarob4a04172003-03-20 23:29:12 +00001157 return NULL;
1158 }
1159 while ((row_obj = PyIter_Next(row_iter))) {
1160 result = csv_writerow(self, row_obj);
1161 Py_DECREF(row_obj);
1162 if (!result) {
1163 Py_DECREF(row_iter);
1164 return NULL;
1165 }
1166 else
1167 Py_DECREF(result);
1168 }
1169 Py_DECREF(row_iter);
1170 if (PyErr_Occurred())
1171 return NULL;
1172 Py_INCREF(Py_None);
1173 return Py_None;
1174}
1175
1176static struct PyMethodDef Writer_methods[] = {
1177 { "writerow", (PyCFunction)csv_writerow, METH_O, csv_writerow_doc},
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001178 { "writerows", (PyCFunction)csv_writerows, METH_O, csv_writerows_doc},
Skip Montanarob4a04172003-03-20 23:29:12 +00001179 { NULL, NULL }
1180};
1181
1182#define W_OFF(x) offsetof(WriterObj, x)
1183
1184static struct PyMemberDef Writer_memberlist[] = {
1185 { "dialect", T_OBJECT, W_OFF(dialect), RO },
1186 { NULL }
1187};
1188
1189static void
1190Writer_dealloc(WriterObj *self)
1191{
1192 Py_XDECREF(self->dialect);
1193 Py_XDECREF(self->writeline);
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001194 PyObject_GC_Del(self);
1195}
1196
1197static int
1198Writer_traverse(WriterObj *self, visitproc visit, void *arg)
1199{
1200 int err;
1201#define VISIT(SLOT) \
1202 if (SLOT) { \
1203 err = visit((PyObject *)(SLOT), arg); \
1204 if (err) \
1205 return err; \
1206 }
1207 VISIT(self->dialect);
1208 VISIT(self->writeline);
1209 return 0;
1210}
1211
1212static int
1213Writer_clear(WriterObj *self)
1214{
1215 Py_XDECREF(self->dialect);
1216 Py_XDECREF(self->writeline);
1217 self->dialect = NULL;
1218 self->writeline = NULL;
1219 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001220}
1221
1222PyDoc_STRVAR(Writer_Type_doc,
1223"CSV writer\n"
1224"\n"
1225"Writer objects are responsible for generating tabular data\n"
1226"in CSV format from sequence input.\n"
1227);
1228
1229static PyTypeObject Writer_Type = {
1230 PyObject_HEAD_INIT(NULL)
1231 0, /*ob_size*/
1232 "_csv.writer", /*tp_name*/
1233 sizeof(WriterObj), /*tp_basicsize*/
1234 0, /*tp_itemsize*/
1235 /* methods */
1236 (destructor)Writer_dealloc, /*tp_dealloc*/
1237 (printfunc)0, /*tp_print*/
1238 (getattrfunc)0, /*tp_getattr*/
1239 (setattrfunc)0, /*tp_setattr*/
1240 (cmpfunc)0, /*tp_compare*/
1241 (reprfunc)0, /*tp_repr*/
1242 0, /*tp_as_number*/
1243 0, /*tp_as_sequence*/
1244 0, /*tp_as_mapping*/
1245 (hashfunc)0, /*tp_hash*/
1246 (ternaryfunc)0, /*tp_call*/
1247 (reprfunc)0, /*tp_str*/
1248 0, /*tp_getattro*/
1249 0, /*tp_setattro*/
1250 0, /*tp_as_buffer*/
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001251 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
1252 Py_TPFLAGS_HAVE_GC, /*tp_flags*/
Skip Montanarob4a04172003-03-20 23:29:12 +00001253 Writer_Type_doc,
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001254 (traverseproc)Writer_traverse, /*tp_traverse*/
1255 (inquiry)Writer_clear, /*tp_clear*/
Skip Montanarob4a04172003-03-20 23:29:12 +00001256 0, /*tp_richcompare*/
1257 0, /*tp_weaklistoffset*/
1258 (getiterfunc)0, /*tp_iter*/
1259 (getiterfunc)0, /*tp_iternext*/
1260 Writer_methods, /*tp_methods*/
1261 Writer_memberlist, /*tp_members*/
1262 0, /*tp_getset*/
1263};
1264
1265static PyObject *
1266csv_writer(PyObject *module, PyObject *args, PyObject *keyword_args)
1267{
1268 PyObject * output_file, * dialect = NULL, *ctor_args;
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001269 WriterObj * self = PyObject_GC_New(WriterObj, &Writer_Type);
Skip Montanarob4a04172003-03-20 23:29:12 +00001270
1271 if (!self)
1272 return NULL;
1273
1274 self->dialect = NULL;
1275 self->writeline = NULL;
1276
1277 self->rec = NULL;
1278 self->rec_size = 0;
1279 self->rec_len = 0;
1280 self->num_fields = 0;
1281
1282 if (!PyArg_ParseTuple(args, "O|O", &output_file, &dialect)) {
1283 Py_DECREF(self);
1284 return NULL;
1285 }
1286 self->writeline = PyObject_GetAttrString(output_file, "write");
1287 if (self->writeline == NULL || !PyCallable_Check(self->writeline)) {
1288 PyErr_SetString(PyExc_TypeError,
1289 "argument 1 must be an instance with a write method");
1290 Py_DECREF(self);
1291 return NULL;
1292 }
1293 ctor_args = Py_BuildValue(dialect ? "(O)" : "()", dialect);
1294 if (ctor_args == NULL) {
1295 Py_DECREF(self);
1296 return NULL;
1297 }
1298 self->dialect = (DialectObj *)PyObject_Call((PyObject *)&Dialect_Type,
1299 ctor_args, keyword_args);
1300 Py_DECREF(ctor_args);
1301 if (self->dialect == NULL) {
1302 Py_DECREF(self);
1303 return NULL;
1304 }
1305 return (PyObject *)self;
1306}
1307
1308/*
1309 * DIALECT REGISTRY
1310 */
1311static PyObject *
1312csv_list_dialects(PyObject *module, PyObject *args)
1313{
1314 return PyDict_Keys(dialects);
1315}
1316
1317static PyObject *
1318csv_register_dialect(PyObject *module, PyObject *args)
1319{
1320 PyObject *name_obj, *dialect_obj;
1321
1322 if (!PyArg_ParseTuple(args, "OO", &name_obj, &dialect_obj))
1323 return NULL;
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001324 if (!PyString_Check(name_obj)
1325#ifdef Py_USING_UNICODE
1326&& !PyUnicode_Check(name_obj)
1327#endif
1328) {
Skip Montanarob4a04172003-03-20 23:29:12 +00001329 PyErr_SetString(PyExc_TypeError,
1330 "dialect name must be a string or unicode");
1331 return NULL;
1332 }
1333 Py_INCREF(dialect_obj);
1334 /* A class rather than an instance? Instanciate */
1335 if (PyObject_TypeCheck(dialect_obj, &PyClass_Type)) {
1336 PyObject * new_dia;
1337 new_dia = PyObject_CallFunction(dialect_obj, "");
1338 Py_DECREF(dialect_obj);
1339 if (new_dia == NULL)
1340 return NULL;
1341 dialect_obj = new_dia;
1342 }
1343 /* Make sure we finally have an instance */
1344 if (!PyInstance_Check(dialect_obj)) {
1345 PyErr_SetString(PyExc_TypeError, "dialect must be an instance");
1346 Py_DECREF(dialect_obj);
1347 return NULL;
1348 }
1349 if (PyObject_SetAttrString(dialect_obj, "_name", name_obj) < 0) {
1350 Py_DECREF(dialect_obj);
1351 return NULL;
1352 }
1353 if (PyDict_SetItem(dialects, name_obj, dialect_obj) < 0) {
1354 Py_DECREF(dialect_obj);
1355 return NULL;
1356 }
1357 Py_DECREF(dialect_obj);
1358 Py_INCREF(Py_None);
1359 return Py_None;
1360}
1361
1362static PyObject *
Skip Montanaro577c7a72003-04-12 19:17:14 +00001363csv_unregister_dialect(PyObject *module, PyObject *name_obj)
Skip Montanarob4a04172003-03-20 23:29:12 +00001364{
Skip Montanarob4a04172003-03-20 23:29:12 +00001365 if (PyDict_DelItem(dialects, name_obj) < 0)
1366 return PyErr_Format(error_obj, "unknown dialect");
1367 Py_INCREF(Py_None);
1368 return Py_None;
1369}
1370
1371static PyObject *
Skip Montanaro577c7a72003-04-12 19:17:14 +00001372csv_get_dialect(PyObject *module, PyObject *name_obj)
Skip Montanarob4a04172003-03-20 23:29:12 +00001373{
Skip Montanarob4a04172003-03-20 23:29:12 +00001374 return get_dialect_from_registry(name_obj);
1375}
1376
1377/*
1378 * MODULE
1379 */
1380
1381PyDoc_STRVAR(csv_module_doc,
1382"CSV parsing and writing.\n"
1383"\n"
1384"This module provides classes that assist in the reading and writing\n"
1385"of Comma Separated Value (CSV) files, and implements the interface\n"
1386"described by PEP 305. Although many CSV files are simple to parse,\n"
1387"the format is not formally defined by a stable specification and\n"
1388"is subtle enough that parsing lines of a CSV file with something\n"
1389"like line.split(\",\") is bound to fail. The module supports three\n"
1390"basic APIs: reading, writing, and registration of dialects.\n"
1391"\n"
1392"\n"
1393"DIALECT REGISTRATION:\n"
1394"\n"
1395"Readers and writers support a dialect argument, which is a convenient\n"
1396"handle on a group of settings. When the dialect argument is a string,\n"
1397"it identifies one of the dialects previously registered with the module.\n"
1398"If it is a class or instance, the attributes of the argument are used as\n"
1399"the settings for the reader or writer:\n"
1400"\n"
1401" class excel:\n"
1402" delimiter = ','\n"
1403" quotechar = '\"'\n"
1404" escapechar = None\n"
1405" doublequote = True\n"
1406" skipinitialspace = False\n"
1407" lineterminator = '\r\n'\n"
1408" quoting = QUOTE_MINIMAL\n"
1409"\n"
1410"SETTINGS:\n"
1411"\n"
1412" * quotechar - specifies a one-character string to use as the \n"
1413" quoting character. It defaults to '\"'.\n"
1414" * delimiter - specifies a one-character string to use as the \n"
1415" field separator. It defaults to ','.\n"
1416" * skipinitialspace - specifies how to interpret whitespace which\n"
1417" immediately follows a delimiter. It defaults to False, which\n"
1418" means that whitespace immediately following a delimiter is part\n"
1419" of the following field.\n"
1420" * lineterminator - specifies the character sequence which should \n"
1421" terminate rows.\n"
1422" * quoting - controls when quotes should be generated by the writer.\n"
1423" It can take on any of the following module constants:\n"
1424"\n"
1425" csv.QUOTE_MINIMAL means only when required, for example, when a\n"
1426" field contains either the quotechar or the delimiter\n"
1427" csv.QUOTE_ALL means that quotes are always placed around fields.\n"
1428" csv.QUOTE_NONNUMERIC means that quotes are always placed around\n"
1429" fields which contain characters other than [+-0-9.].\n"
1430" csv.QUOTE_NONE means that quotes are never placed around fields.\n"
1431" * escapechar - specifies a one-character string used to escape \n"
1432" the delimiter when quoting is set to QUOTE_NONE.\n"
1433" * doublequote - controls the handling of quotes inside fields. When\n"
1434" True, two consecutive quotes are interpreted as one during read,\n"
1435" and when writing, each quote character embedded in the data is\n"
1436" written as two quotes\n");
1437
1438PyDoc_STRVAR(csv_reader_doc,
1439" csv_reader = reader(iterable [, dialect='excel']\n"
1440" [optional keyword args])\n"
1441" for row in csv_reader:\n"
1442" process(row)\n"
1443"\n"
1444"The \"iterable\" argument can be any object that returns a line\n"
1445"of input for each iteration, such as a file object or a list. The\n"
1446"optional \"dialect\" parameter is discussed below. The function\n"
1447"also accepts optional keyword arguments which override settings\n"
1448"provided by the dialect.\n"
1449"\n"
1450"The returned object is an iterator. Each iteration returns a row\n"
1451 "of the CSV file (which can span multiple input lines):\n");
1452
1453PyDoc_STRVAR(csv_writer_doc,
1454" csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1455" [optional keyword args])\n"
1456" for row in csv_writer:\n"
1457" csv_writer.writerow(row)\n"
1458"\n"
1459" [or]\n"
1460"\n"
1461" csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1462" [optional keyword args])\n"
1463" csv_writer.writerows(rows)\n"
1464"\n"
1465"The \"fileobj\" argument can be any object that supports the file API.\n");
1466
1467PyDoc_STRVAR(csv_list_dialects_doc,
1468"Return a list of all know dialect names.\n"
1469" names = csv.list_dialects()");
1470
1471PyDoc_STRVAR(csv_get_dialect_doc,
1472"Return the dialect instance associated with name.\n"
1473" dialect = csv.get_dialect(name)");
1474
1475PyDoc_STRVAR(csv_register_dialect_doc,
1476"Create a mapping from a string name to a dialect class.\n"
1477" dialect = csv.register_dialect(name, dialect)");
1478
1479PyDoc_STRVAR(csv_unregister_dialect_doc,
1480"Delete the name/dialect mapping associated with a string name.\n"
1481" csv.unregister_dialect(name)");
1482
1483static struct PyMethodDef csv_methods[] = {
1484 { "reader", (PyCFunction)csv_reader,
1485 METH_VARARGS | METH_KEYWORDS, csv_reader_doc},
1486 { "writer", (PyCFunction)csv_writer,
1487 METH_VARARGS | METH_KEYWORDS, csv_writer_doc},
1488 { "list_dialects", (PyCFunction)csv_list_dialects,
1489 METH_NOARGS, csv_list_dialects_doc},
1490 { "register_dialect", (PyCFunction)csv_register_dialect,
1491 METH_VARARGS, csv_register_dialect_doc},
1492 { "unregister_dialect", (PyCFunction)csv_unregister_dialect,
Skip Montanaro577c7a72003-04-12 19:17:14 +00001493 METH_O, csv_unregister_dialect_doc},
Skip Montanarob4a04172003-03-20 23:29:12 +00001494 { "get_dialect", (PyCFunction)csv_get_dialect,
Skip Montanaro577c7a72003-04-12 19:17:14 +00001495 METH_O, csv_get_dialect_doc},
Skip Montanarob4a04172003-03-20 23:29:12 +00001496 { NULL, NULL }
1497};
1498
1499PyMODINIT_FUNC
1500init_csv(void)
1501{
1502 PyObject *module;
Skip Montanarob4a04172003-03-20 23:29:12 +00001503 StyleDesc *style;
1504
1505 if (PyType_Ready(&Dialect_Type) < 0)
1506 return;
1507
1508 if (PyType_Ready(&Reader_Type) < 0)
1509 return;
1510
1511 if (PyType_Ready(&Writer_Type) < 0)
1512 return;
1513
1514 /* Create the module and add the functions */
1515 module = Py_InitModule3("_csv", csv_methods, csv_module_doc);
1516 if (module == NULL)
1517 return;
1518
1519 /* Add version to the module. */
Skip Montanaro7b01a832003-04-12 19:23:46 +00001520 if (PyModule_AddStringConstant(module, "__version__",
1521 MODULE_VERSION) == -1)
Skip Montanarob4a04172003-03-20 23:29:12 +00001522 return;
1523
1524 /* Add _dialects dictionary */
1525 dialects = PyDict_New();
1526 if (dialects == NULL)
1527 return;
1528 if (PyModule_AddObject(module, "_dialects", dialects))
1529 return;
1530
1531 /* Add quote styles into dictionary */
1532 for (style = quote_styles; style->name; style++) {
Skip Montanaro7b01a832003-04-12 19:23:46 +00001533 if (PyModule_AddIntConstant(module, style->name,
1534 style->style) == -1)
Skip Montanarob4a04172003-03-20 23:29:12 +00001535 return;
1536 }
1537
1538 /* Add the Dialect type */
1539 if (PyModule_AddObject(module, "Dialect", (PyObject *)&Dialect_Type))
1540 return;
1541
1542 /* Add the CSV exception object to the module. */
1543 error_obj = PyErr_NewException("_csv.Error", NULL, NULL);
1544 if (error_obj == NULL)
1545 return;
1546 PyModule_AddObject(module, "Error", error_obj);
1547}