blob: 68c6a81e4df7c375ae39b2beb968405ac3f7509c [file] [log] [blame]
Skip Montanaroa16b21f2003-03-23 14:32:54 +00001/* csv module */
2
3/*
4
5This module provides the low-level underpinnings of a CSV reading/writing
6module. Users should not use this module directly, but import the csv.py
7module instead.
8
9**** For people modifying this code, please note that as of this writing
Skip Montanarodfa35fa2003-04-11 21:40:01 +000010**** (2003-03-23), it is intended that this code should work with Python
Skip Montanaroa16b21f2003-03-23 14:32:54 +000011**** 2.2.
12
Skip Montanarob4a04172003-03-20 23:29:12 +000013*/
14
15#include "Python.h"
16#include "structmember.h"
17
Skip Montanaroa16b21f2003-03-23 14:32:54 +000018
Skip Montanarob4a04172003-03-20 23:29:12 +000019/* begin 2.2 compatibility macros */
20#ifndef PyDoc_STRVAR
21/* Define macros for inline documentation. */
22#define PyDoc_VAR(name) static char name[]
23#define PyDoc_STRVAR(name,str) PyDoc_VAR(name) = PyDoc_STR(str)
24#ifdef WITH_DOC_STRINGS
25#define PyDoc_STR(str) str
26#else
27#define PyDoc_STR(str) ""
28#endif
29#endif /* ifndef PyDoc_STRVAR */
30
31#ifndef PyMODINIT_FUNC
32# if defined(__cplusplus)
33# define PyMODINIT_FUNC extern "C" void
34# else /* __cplusplus */
35# define PyMODINIT_FUNC void
36# endif /* __cplusplus */
37#endif
38/* end 2.2 compatibility macros */
39
40static PyObject *error_obj; /* CSV exception */
41static PyObject *dialects; /* Dialect registry */
42
43typedef enum {
44 START_RECORD, START_FIELD, ESCAPED_CHAR, IN_FIELD,
45 IN_QUOTED_FIELD, ESCAPE_IN_QUOTED_FIELD, QUOTE_IN_QUOTED_FIELD
46} ParserState;
47
48typedef enum {
49 QUOTE_MINIMAL, QUOTE_ALL, QUOTE_NONNUMERIC, QUOTE_NONE
50} QuoteStyle;
51
52typedef struct {
53 QuoteStyle style;
54 char *name;
55} StyleDesc;
56
57static StyleDesc quote_styles[] = {
58 { QUOTE_MINIMAL, "QUOTE_MINIMAL" },
59 { QUOTE_ALL, "QUOTE_ALL" },
60 { QUOTE_NONNUMERIC, "QUOTE_NONNUMERIC" },
61 { QUOTE_NONE, "QUOTE_NONE" },
62 { 0 }
63};
64
65typedef struct {
66 PyObject_HEAD
67
68 int doublequote; /* is " represented by ""? */
69 char delimiter; /* field separator */
70 char quotechar; /* quote character */
71 char escapechar; /* escape character */
72 int skipinitialspace; /* ignore spaces following delimiter? */
73 PyObject *lineterminator; /* string to write between records */
74 QuoteStyle quoting; /* style of quoting to write */
75
76 int strict; /* raise exception on bad CSV */
77} DialectObj;
78
79staticforward PyTypeObject Dialect_Type;
80
81typedef struct {
82 PyObject_HEAD
83
84 PyObject *input_iter; /* iterate over this for input lines */
85
86 DialectObj *dialect; /* parsing dialect */
87
88 PyObject *fields; /* field list for current record */
89 ParserState state; /* current CSV parse state */
90 char *field; /* build current field in here */
91 int field_size; /* size of allocated buffer */
92 int field_len; /* length of current field */
93 int had_parse_error; /* did we have a parse error? */
94} ReaderObj;
95
96staticforward PyTypeObject Reader_Type;
97
98#define ReaderObject_Check(v) ((v)->ob_type == &Reader_Type)
99
100typedef struct {
101 PyObject_HEAD
102
103 PyObject *writeline; /* write output lines to this file */
104
105 DialectObj *dialect; /* parsing dialect */
106
107 char *rec; /* buffer for parser.join */
108 int rec_size; /* size of allocated record */
109 int rec_len; /* length of record */
110 int num_fields; /* number of fields in record */
111} WriterObj;
112
113staticforward PyTypeObject Writer_Type;
114
115/*
116 * DIALECT class
117 */
118
119static PyObject *
120get_dialect_from_registry(PyObject * name_obj)
121{
122 PyObject *dialect_obj;
123
124 dialect_obj = PyDict_GetItem(dialects, name_obj);
125 if (dialect_obj == NULL)
126 return PyErr_Format(error_obj, "unknown dialect");
127 Py_INCREF(dialect_obj);
128 return dialect_obj;
129}
130
131static int
132check_delattr(PyObject *v)
133{
134 if (v == NULL) {
135 PyErr_SetString(PyExc_TypeError,
136 "Cannot delete attribute");
137 return -1;
138 }
139 return 0;
140}
141
142static PyObject *
143get_string(PyObject *str)
144{
145 Py_XINCREF(str);
146 return str;
147}
148
149static int
150set_string(PyObject **str, PyObject *v)
151{
152 if (check_delattr(v) < 0)
153 return -1;
154 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
155 PyErr_BadArgument();
156 return -1;
157 }
158 Py_XDECREF(*str);
159 Py_INCREF(v);
160 *str = v;
161 return 0;
162}
163
164static PyObject *
165get_nullchar_as_None(char c)
166{
167 if (c == '\0') {
168 Py_INCREF(Py_None);
169 return Py_None;
170 }
171 else
172 return PyString_FromStringAndSize((char*)&c, 1);
173}
174
175static int
176set_None_as_nullchar(char * addr, PyObject *v)
177{
178 if (check_delattr(v) < 0)
179 return -1;
180 if (v == Py_None)
181 *addr = '\0';
182 else if (!PyString_Check(v) || PyString_Size(v) != 1) {
183 PyErr_BadArgument();
184 return -1;
185 }
186 else
187 *addr = PyString_AsString(v)[0];
188 return 0;
189}
190
191static PyObject *
192Dialect_get_lineterminator(DialectObj *self)
193{
194 return get_string(self->lineterminator);
195}
196
197static int
198Dialect_set_lineterminator(DialectObj *self, PyObject *value)
199{
200 return set_string(&self->lineterminator, value);
201}
202
203static PyObject *
204Dialect_get_escapechar(DialectObj *self)
205{
206 return get_nullchar_as_None(self->escapechar);
207}
208
209static int
210Dialect_set_escapechar(DialectObj *self, PyObject *value)
211{
212 return set_None_as_nullchar(&self->escapechar, value);
213}
214
215static PyObject *
216Dialect_get_quoting(DialectObj *self)
217{
218 return PyInt_FromLong(self->quoting);
219}
220
221static int
222Dialect_set_quoting(DialectObj *self, PyObject *v)
223{
224 int quoting;
225 StyleDesc *qs = quote_styles;
226
227 if (check_delattr(v) < 0)
228 return -1;
229 if (!PyInt_Check(v)) {
230 PyErr_BadArgument();
231 return -1;
232 }
233 quoting = PyInt_AsLong(v);
234 for (qs = quote_styles; qs->name; qs++) {
235 if (qs->style == quoting) {
236 self->quoting = quoting;
237 return 0;
238 }
239 }
240 PyErr_BadArgument();
241 return -1;
242}
243
244static struct PyMethodDef Dialect_methods[] = {
245 { NULL, NULL }
246};
247
248#define D_OFF(x) offsetof(DialectObj, x)
249
250static struct PyMemberDef Dialect_memberlist[] = {
251 { "quotechar", T_CHAR, D_OFF(quotechar) },
252 { "delimiter", T_CHAR, D_OFF(delimiter) },
253 { "skipinitialspace", T_INT, D_OFF(skipinitialspace) },
254 { "doublequote", T_INT, D_OFF(doublequote) },
255 { "strict", T_INT, D_OFF(strict) },
256 { NULL }
257};
258
259static PyGetSetDef Dialect_getsetlist[] = {
260 { "escapechar", (getter)Dialect_get_escapechar,
261 (setter)Dialect_set_escapechar },
262 { "lineterminator", (getter)Dialect_get_lineterminator,
263 (setter)Dialect_set_lineterminator },
264 { "quoting", (getter)Dialect_get_quoting,
265 (setter)Dialect_set_quoting },
266 {NULL},
267};
268
269static void
270Dialect_dealloc(DialectObj *self)
271{
272 Py_XDECREF(self->lineterminator);
Skip Montanarob4a04172003-03-20 23:29:12 +0000273 self->ob_type->tp_free((PyObject *)self);
274}
275
276static int
277dialect_init(DialectObj * self, PyObject * args, PyObject * kwargs)
278{
279 PyObject *dialect = NULL, *name_obj, *value_obj;
280
281 self->quotechar = '"';
282 self->delimiter = ',';
283 self->escapechar = '\0';
284 self->skipinitialspace = 0;
285 Py_XDECREF(self->lineterminator);
286 self->lineterminator = PyString_FromString("\r\n");
287 if (self->lineterminator == NULL)
288 return -1;
289 self->quoting = QUOTE_MINIMAL;
290 self->doublequote = 1;
291 self->strict = 0;
292
293 if (!PyArg_ParseTuple(args, "|O", &dialect))
294 return -1;
295 Py_XINCREF(dialect);
296 if (kwargs != NULL) {
297 PyObject * key = PyString_FromString("dialect");
298 PyObject * d;
299
300 d = PyDict_GetItem(kwargs, key);
301 if (d) {
302 Py_INCREF(d);
303 Py_XDECREF(dialect);
304 PyDict_DelItem(kwargs, key);
305 dialect = d;
306 }
307 Py_DECREF(key);
308 }
309 if (dialect != NULL) {
310 int i;
311 PyObject * dir_list;
312
313 /* If dialect is a string, look it up in our registry */
314 if (PyString_Check(dialect) || PyUnicode_Check(dialect)) {
315 PyObject * new_dia;
316 new_dia = get_dialect_from_registry(dialect);
317 Py_DECREF(dialect);
318 if (new_dia == NULL)
319 return -1;
320 dialect = new_dia;
321 }
322 /* A class rather than an instance? Instanciate */
323 if (PyObject_TypeCheck(dialect, &PyClass_Type)) {
324 PyObject * new_dia;
325 new_dia = PyObject_CallFunction(dialect, "");
326 Py_DECREF(dialect);
327 if (new_dia == NULL)
328 return -1;
329 dialect = new_dia;
330 }
331 /* Make sure we finally have an instance */
332 if (!PyInstance_Check(dialect) ||
333 (dir_list = PyObject_Dir(dialect)) == NULL) {
334 PyErr_SetString(PyExc_TypeError,
335 "dialect must be an instance");
336 Py_DECREF(dialect);
337 return -1;
338 }
339 /* And extract the attributes */
340 for (i = 0; i < PyList_GET_SIZE(dir_list); ++i) {
341 name_obj = PyList_GET_ITEM(dir_list, i);
342 if (PyString_AsString(name_obj)[0] == '_')
343 continue;
344 value_obj = PyObject_GetAttr(dialect, name_obj);
345 if (value_obj) {
346 if (PyObject_SetAttr((PyObject *)self,
347 name_obj, value_obj)) {
348 Py_DECREF(dir_list);
349 return -1;
350 }
351 Py_DECREF(value_obj);
352 }
353 }
354 Py_DECREF(dir_list);
355 Py_DECREF(dialect);
356 }
357 if (kwargs != NULL) {
358 int pos = 0;
359
360 while (PyDict_Next(kwargs, &pos, &name_obj, &value_obj)) {
361 if (PyObject_SetAttr((PyObject *)self,
362 name_obj, value_obj))
363 return -1;
364 }
365 }
366 return 0;
367}
368
369static PyObject *
370dialect_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
371{
372 DialectObj *self;
373 self = (DialectObj *)type->tp_alloc(type, 0);
374 if (self != NULL) {
375 self->lineterminator = NULL;
376 }
377 return (PyObject *)self;
378}
379
380
381PyDoc_STRVAR(Dialect_Type_doc,
382"CSV dialect\n"
383"\n"
384"The Dialect type records CSV parsing and generation options.\n");
385
386static PyTypeObject Dialect_Type = {
387 PyObject_HEAD_INIT(NULL)
388 0, /* ob_size */
389 "_csv.Dialect", /* tp_name */
390 sizeof(DialectObj), /* tp_basicsize */
391 0, /* tp_itemsize */
392 /* methods */
393 (destructor)Dialect_dealloc, /* tp_dealloc */
394 (printfunc)0, /* tp_print */
395 (getattrfunc)0, /* tp_getattr */
396 (setattrfunc)0, /* tp_setattr */
397 (cmpfunc)0, /* tp_compare */
398 (reprfunc)0, /* tp_repr */
399 0, /* tp_as_number */
400 0, /* tp_as_sequence */
401 0, /* tp_as_mapping */
402 (hashfunc)0, /* tp_hash */
403 (ternaryfunc)0, /* tp_call */
404 (reprfunc)0, /* tp_str */
405 0, /* tp_getattro */
406 0, /* tp_setattro */
407 0, /* tp_as_buffer */
408 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
409 Dialect_Type_doc, /* tp_doc */
410 0, /* tp_traverse */
411 0, /* tp_clear */
412 0, /* tp_richcompare */
413 0, /* tp_weaklistoffset */
414 0, /* tp_iter */
415 0, /* tp_iternext */
416 Dialect_methods, /* tp_methods */
417 Dialect_memberlist, /* tp_members */
418 Dialect_getsetlist, /* tp_getset */
419 0, /* tp_base */
420 0, /* tp_dict */
421 0, /* tp_descr_get */
422 0, /* tp_descr_set */
423 0, /* tp_dictoffset */
424 (initproc)dialect_init, /* tp_init */
425 PyType_GenericAlloc, /* tp_alloc */
426 dialect_new, /* tp_new */
427 0, /* tp_free */
428};
429
430static void
431parse_save_field(ReaderObj *self)
432{
433 PyObject *field;
434
435 field = PyString_FromStringAndSize(self->field, self->field_len);
436 if (field != NULL) {
437 PyList_Append(self->fields, field);
438 Py_XDECREF(field);
439 }
440 self->field_len = 0;
441}
442
443static int
444parse_grow_buff(ReaderObj *self)
445{
446 if (self->field_size == 0) {
447 self->field_size = 4096;
448 self->field = PyMem_Malloc(self->field_size);
449 }
450 else {
451 self->field_size *= 2;
452 self->field = PyMem_Realloc(self->field, self->field_size);
453 }
454 if (self->field == NULL) {
455 PyErr_NoMemory();
456 return 0;
457 }
458 return 1;
459}
460
461static void
462parse_add_char(ReaderObj *self, char c)
463{
464 if (self->field_len == self->field_size && !parse_grow_buff(self))
465 return;
466 self->field[self->field_len++] = c;
467}
468
469static void
470parse_process_char(ReaderObj *self, char c)
471{
472 DialectObj *dialect = self->dialect;
473
474 switch (self->state) {
475 case START_RECORD:
476 /* start of record */
477 if (c == '\n')
478 /* empty line - return [] */
479 break;
480 /* normal character - handle as START_FIELD */
481 self->state = START_FIELD;
482 /* fallthru */
483 case START_FIELD:
484 /* expecting field */
485 if (c == '\n') {
486 /* save empty field - return [fields] */
487 parse_save_field(self);
488 self->state = START_RECORD;
489 }
490 else if (c == dialect->quotechar) {
491 /* start quoted field */
492 self->state = IN_QUOTED_FIELD;
493 }
494 else if (c == dialect->escapechar) {
495 /* possible escaped character */
496 self->state = ESCAPED_CHAR;
497 }
498 else if (c == ' ' && dialect->skipinitialspace)
499 /* ignore space at start of field */
500 ;
501 else if (c == dialect->delimiter) {
502 /* save empty field */
503 parse_save_field(self);
504 }
505 else {
506 /* begin new unquoted field */
507 parse_add_char(self, c);
508 self->state = IN_FIELD;
509 }
510 break;
511
512 case ESCAPED_CHAR:
513 if (c != dialect->escapechar &&
514 c != dialect->delimiter &&
515 c != dialect->quotechar)
516 parse_add_char(self, dialect->escapechar);
517 parse_add_char(self, c);
518 self->state = IN_FIELD;
519 break;
520
521 case IN_FIELD:
522 /* in unquoted field */
523 if (c == '\n') {
524 /* end of line - return [fields] */
525 parse_save_field(self);
526 self->state = START_RECORD;
527 }
528 else if (c == dialect->escapechar) {
529 /* possible escaped character */
530 self->state = ESCAPED_CHAR;
531 }
532 else if (c == dialect->delimiter) {
533 /* save field - wait for new field */
534 parse_save_field(self);
535 self->state = START_FIELD;
536 }
537 else {
538 /* normal character - save in field */
539 parse_add_char(self, c);
540 }
541 break;
542
543 case IN_QUOTED_FIELD:
544 /* in quoted field */
545 if (c == '\n') {
546 /* end of line - save '\n' in field */
547 parse_add_char(self, '\n');
548 }
549 else if (c == dialect->escapechar) {
550 /* Possible escape character */
551 self->state = ESCAPE_IN_QUOTED_FIELD;
552 }
553 else if (c == dialect->quotechar) {
554 if (dialect->doublequote) {
555 /* doublequote; " represented by "" */
556 self->state = QUOTE_IN_QUOTED_FIELD;
557 }
558 else {
559 /* end of quote part of field */
560 self->state = IN_FIELD;
561 }
562 }
563 else {
564 /* normal character - save in field */
565 parse_add_char(self, c);
566 }
567 break;
568
569 case ESCAPE_IN_QUOTED_FIELD:
570 if (c != dialect->escapechar &&
571 c != dialect->delimiter &&
572 c != dialect->quotechar)
573 parse_add_char(self, dialect->escapechar);
574 parse_add_char(self, c);
575 self->state = IN_QUOTED_FIELD;
576 break;
577
578 case QUOTE_IN_QUOTED_FIELD:
579 /* doublequote - seen a quote in an quoted field */
580 if (dialect->quoting != QUOTE_NONE &&
581 c == dialect->quotechar) {
582 /* save "" as " */
583 parse_add_char(self, c);
584 self->state = IN_QUOTED_FIELD;
585 }
586 else if (c == dialect->delimiter) {
587 /* save field - wait for new field */
588 parse_save_field(self);
589 self->state = START_FIELD;
590 }
591 else if (c == '\n') {
592 /* end of line - return [fields] */
593 parse_save_field(self);
594 self->state = START_RECORD;
595 }
596 else if (!dialect->strict) {
597 parse_add_char(self, c);
598 self->state = IN_FIELD;
599 }
600 else {
601 /* illegal */
602 self->had_parse_error = 1;
603 PyErr_Format(error_obj, "%c expected after %c",
604 dialect->delimiter,
605 dialect->quotechar);
606 }
607 break;
608
609 }
610}
611
612/*
613 * READER
614 */
615#define R_OFF(x) offsetof(ReaderObj, x)
616
617static struct PyMemberDef Reader_memberlist[] = {
618 { "dialect", T_OBJECT, R_OFF(dialect), RO },
619 { NULL }
620};
621
622static PyObject *
623Reader_getiter(ReaderObj *self)
624{
625 Py_INCREF(self);
626 return (PyObject *)self;
627}
628
629static PyObject *
630Reader_iternext(ReaderObj *self)
631{
632 PyObject *lineobj;
633 PyObject *fields;
634 char *line;
635
636 do {
637 lineobj = PyIter_Next(self->input_iter);
638 if (lineobj == NULL) {
639 /* End of input OR exception */
640 if (!PyErr_Occurred() && self->field_len != 0)
641 return PyErr_Format(error_obj,
642 "newline inside string");
643 return NULL;
644 }
645
646 if (self->had_parse_error) {
647 if (self->fields) {
648 Py_XDECREF(self->fields);
649 }
650 self->fields = PyList_New(0);
651 self->field_len = 0;
652 self->state = START_RECORD;
653 self->had_parse_error = 0;
654 }
655 line = PyString_AsString(lineobj);
656
657 if (line == NULL) {
658 Py_DECREF(lineobj);
659 return NULL;
660 }
Tim Petersef4b7ed2003-03-21 01:35:28 +0000661 if (strlen(line) < (size_t)PyString_GET_SIZE(lineobj)) {
Skip Montanarob4a04172003-03-20 23:29:12 +0000662 self->had_parse_error = 1;
663 Py_DECREF(lineobj);
664 return PyErr_Format(error_obj,
665 "string with NUL bytes");
666 }
667
668 /* Process line of text - send '\n' to processing code to
669 represent end of line. End of line which is not at end of
670 string is an error. */
671 while (*line) {
672 char c;
673
674 c = *line++;
675 if (c == '\r') {
676 c = *line++;
677 if (c == '\0')
678 /* macintosh end of line */
679 break;
680 if (c == '\n') {
681 c = *line++;
682 if (c == '\0')
683 /* DOS end of line */
684 break;
685 }
686 self->had_parse_error = 1;
687 Py_DECREF(lineobj);
688 return PyErr_Format(error_obj,
689 "newline inside string");
690 }
691 if (c == '\n') {
692 c = *line++;
693 if (c == '\0')
694 /* unix end of line */
695 break;
696 self->had_parse_error = 1;
697 Py_DECREF(lineobj);
698 return PyErr_Format(error_obj,
699 "newline inside string");
700 }
701 parse_process_char(self, c);
702 if (PyErr_Occurred()) {
703 Py_DECREF(lineobj);
704 return NULL;
705 }
706 }
707 parse_process_char(self, '\n');
708 Py_DECREF(lineobj);
709 } while (self->state != START_RECORD);
710
711 fields = self->fields;
712 self->fields = PyList_New(0);
713 return fields;
714}
715
716static void
717Reader_dealloc(ReaderObj *self)
718{
719 Py_XDECREF(self->dialect);
720 Py_XDECREF(self->input_iter);
721 Py_XDECREF(self->fields);
722 PyMem_DEL(self);
723}
724
725PyDoc_STRVAR(Reader_Type_doc,
726"CSV reader\n"
727"\n"
728"Reader objects are responsible for reading and parsing tabular data\n"
729"in CSV format.\n"
730);
731
732static struct PyMethodDef Reader_methods[] = {
733 { NULL, NULL }
734};
735
736static PyTypeObject Reader_Type = {
737 PyObject_HEAD_INIT(NULL)
738 0, /*ob_size*/
739 "_csv.reader", /*tp_name*/
740 sizeof(ReaderObj), /*tp_basicsize*/
741 0, /*tp_itemsize*/
742 /* methods */
743 (destructor)Reader_dealloc, /*tp_dealloc*/
744 (printfunc)0, /*tp_print*/
745 (getattrfunc)0, /*tp_getattr*/
746 (setattrfunc)0, /*tp_setattr*/
747 (cmpfunc)0, /*tp_compare*/
748 (reprfunc)0, /*tp_repr*/
749 0, /*tp_as_number*/
750 0, /*tp_as_sequence*/
751 0, /*tp_as_mapping*/
752 (hashfunc)0, /*tp_hash*/
753 (ternaryfunc)0, /*tp_call*/
754 (reprfunc)0, /*tp_str*/
755 0, /*tp_getattro*/
756 0, /*tp_setattro*/
757 0, /*tp_as_buffer*/
758 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
759 Reader_Type_doc, /*tp_doc*/
760 0, /*tp_traverse*/
761 0, /*tp_clear*/
762 0, /*tp_richcompare*/
763 0, /*tp_weaklistoffset*/
764 (getiterfunc)Reader_getiter, /*tp_iter*/
765 (getiterfunc)Reader_iternext, /*tp_iternext*/
766 Reader_methods, /*tp_methods*/
767 Reader_memberlist, /*tp_members*/
768 0, /*tp_getset*/
769
770};
771
772static PyObject *
773csv_reader(PyObject *module, PyObject *args, PyObject *keyword_args)
774{
775 PyObject * iterator, * dialect = NULL, *ctor_args;
776 ReaderObj * self = PyObject_NEW(ReaderObj, &Reader_Type);
777
778 if (!self)
779 return NULL;
780
781 self->dialect = NULL;
782 self->input_iter = self->fields = NULL;
783
784 self->fields = NULL;
785 self->input_iter = NULL;
786 self->had_parse_error = 0;
787 self->field = NULL;
788 self->field_size = 0;
789 self->field_len = 0;
790 self->state = START_RECORD;
791
792 if (!PyArg_ParseTuple(args, "O|O", &iterator, &dialect)) {
793 Py_DECREF(self);
794 return NULL;
795 }
796 self->input_iter = PyObject_GetIter(iterator);
797 if (self->input_iter == NULL) {
798 PyErr_SetString(PyExc_TypeError,
799 "argument 1 must be an iterator");
800 Py_DECREF(self);
801 return NULL;
802 }
803 ctor_args = Py_BuildValue(dialect ? "(O)" : "()", dialect);
804 if (ctor_args == NULL) {
805 Py_DECREF(self);
806 return NULL;
807 }
808 self->dialect = (DialectObj *)PyObject_Call((PyObject *)&Dialect_Type,
809 ctor_args, keyword_args);
810 Py_DECREF(ctor_args);
811 if (self->dialect == NULL) {
812 Py_DECREF(self);
813 return NULL;
814 }
815 self->fields = PyList_New(0);
816 if (self->fields == NULL) {
817 Py_DECREF(self);
818 return NULL;
819 }
820
821 return (PyObject *)self;
822}
823
824/*
825 * WRITER
826 */
827/* ---------------------------------------------------------------- */
828static void
829join_reset(WriterObj *self)
830{
831 self->rec_len = 0;
832 self->num_fields = 0;
833}
834
835#define MEM_INCR 32768
836
837/* Calculate new record length or append field to record. Return new
838 * record length.
839 */
840static int
841join_append_data(WriterObj *self, char *field, int quote_empty,
842 int *quoted, int copy_phase)
843{
844 DialectObj *dialect = self->dialect;
845 int i, rec_len;
846
847 rec_len = self->rec_len;
848
849 /* If this is not the first field we need a field separator.
850 */
851 if (self->num_fields > 0) {
852 if (copy_phase)
853 self->rec[rec_len] = dialect->delimiter;
854 rec_len++;
855 }
856 /* Handle preceding quote.
857 */
858 switch (dialect->quoting) {
859 case QUOTE_ALL:
860 *quoted = 1;
861 if (copy_phase)
862 self->rec[rec_len] = dialect->quotechar;
863 rec_len++;
864 break;
865 case QUOTE_MINIMAL:
866 case QUOTE_NONNUMERIC:
867 /* We only know about quoted in the copy phase.
868 */
869 if (copy_phase && *quoted) {
870 self->rec[rec_len] = dialect->quotechar;
871 rec_len++;
872 }
873 break;
874 case QUOTE_NONE:
875 break;
876 }
877 /* Copy/count field data.
878 */
879 for (i = 0;; i++) {
880 char c = field[i];
881
882 if (c == '\0')
883 break;
884 /* If in doublequote mode we escape quote chars with a
885 * quote.
886 */
887 if (dialect->quoting != QUOTE_NONE &&
888 c == dialect->quotechar && dialect->doublequote) {
889 if (copy_phase)
890 self->rec[rec_len] = dialect->quotechar;
891 *quoted = 1;
892 rec_len++;
893 }
894
895 /* Some special characters need to be escaped. If we have a
896 * quote character switch to quoted field instead of escaping
897 * individual characters.
898 */
899 if (!*quoted
900 && (c == dialect->delimiter ||
901 c == dialect->escapechar ||
902 c == '\n' || c == '\r')) {
903 if (dialect->quoting != QUOTE_NONE)
904 *quoted = 1;
905 else if (dialect->escapechar) {
906 if (copy_phase)
907 self->rec[rec_len] = dialect->escapechar;
908 rec_len++;
909 }
910 else {
911 PyErr_Format(error_obj,
912 "delimiter must be quoted or escaped");
913 return -1;
914 }
915 }
916 /* Copy field character into record buffer.
917 */
918 if (copy_phase)
919 self->rec[rec_len] = c;
920 rec_len++;
921 }
922
923 /* If field is empty check if it needs to be quoted.
924 */
925 if (i == 0 && quote_empty) {
926 if (dialect->quoting == QUOTE_NONE) {
927 PyErr_Format(error_obj,
928 "single empty field record must be quoted");
929 return -1;
930 } else
931 *quoted = 1;
932 }
933
934 /* Handle final quote character on field.
935 */
936 if (*quoted) {
937 if (copy_phase)
938 self->rec[rec_len] = dialect->quotechar;
939 else
940 /* Didn't know about leading quote until we found it
941 * necessary in field data - compensate for it now.
942 */
943 rec_len++;
944 rec_len++;
945 }
946
947 return rec_len;
948}
949
950static int
951join_check_rec_size(WriterObj *self, int rec_len)
952{
953 if (rec_len > self->rec_size) {
954 if (self->rec_size == 0) {
955 self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR;
956 self->rec = PyMem_Malloc(self->rec_size);
957 }
958 else {
959 char *old_rec = self->rec;
960
961 self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR;
962 self->rec = PyMem_Realloc(self->rec, self->rec_size);
963 if (self->rec == NULL)
964 PyMem_Free(old_rec);
965 }
966 if (self->rec == NULL) {
967 PyErr_NoMemory();
968 return 0;
969 }
970 }
971 return 1;
972}
973
974static int
975join_append(WriterObj *self, char *field, int *quoted, int quote_empty)
976{
977 int rec_len;
978
979 rec_len = join_append_data(self, field, quote_empty, quoted, 0);
980 if (rec_len < 0)
981 return 0;
982
983 /* grow record buffer if necessary */
984 if (!join_check_rec_size(self, rec_len))
985 return 0;
986
987 self->rec_len = join_append_data(self, field, quote_empty, quoted, 1);
988 self->num_fields++;
989
990 return 1;
991}
992
993static int
994join_append_lineterminator(WriterObj *self)
995{
996 int terminator_len;
997
998 terminator_len = PyString_Size(self->dialect->lineterminator);
999
1000 /* grow record buffer if necessary */
1001 if (!join_check_rec_size(self, self->rec_len + terminator_len))
1002 return 0;
1003
1004 memmove(self->rec + self->rec_len,
1005 PyString_AsString(self->dialect->lineterminator),
1006 terminator_len);
1007 self->rec_len += terminator_len;
1008
1009 return 1;
1010}
1011
1012PyDoc_STRVAR(csv_writerow_doc,
1013"join(sequence) -> string\n"
1014"\n"
1015"Construct a CSV record from a sequence of fields. Non-string\n"
1016"elements will be converted to string.");
1017
1018static PyObject *
1019csv_writerow(WriterObj *self, PyObject *seq)
1020{
1021 DialectObj *dialect = self->dialect;
1022 int len, i;
1023
1024 if (!PySequence_Check(seq))
1025 return PyErr_Format(error_obj, "sequence expected");
1026
1027 len = PySequence_Length(seq);
1028 if (len < 0)
1029 return NULL;
1030
1031 /* Join all fields in internal buffer.
1032 */
1033 join_reset(self);
1034 for (i = 0; i < len; i++) {
1035 PyObject *field;
1036 int append_ok;
1037 int quoted;
1038
1039 field = PySequence_GetItem(seq, i);
1040 if (field == NULL)
1041 return NULL;
1042
1043 quoted = 0;
1044 if (dialect->quoting == QUOTE_NONNUMERIC) {
1045 PyObject *num;
1046
1047 num = PyNumber_Float(field);
1048 if (num == NULL) {
1049 quoted = 1;
1050 PyErr_Clear();
1051 }
1052 else {
1053 Py_DECREF(num);
1054 }
1055 }
1056
1057 if (PyString_Check(field)) {
1058 append_ok = join_append(self, PyString_AsString(field),
1059 &quoted, len == 1);
1060 Py_DECREF(field);
1061 }
1062 else if (field == Py_None) {
1063 append_ok = join_append(self, "", &quoted, len == 1);
1064 Py_DECREF(field);
1065 }
1066 else {
1067 PyObject *str;
1068
1069 str = PyObject_Str(field);
1070 Py_DECREF(field);
1071 if (str == NULL)
1072 return NULL;
1073
1074 append_ok = join_append(self, PyString_AsString(str),
1075 &quoted, len == 1);
1076 Py_DECREF(str);
1077 }
1078 if (!append_ok)
1079 return NULL;
1080 }
1081
1082 /* Add line terminator.
1083 */
1084 if (!join_append_lineterminator(self))
1085 return 0;
1086
1087 return PyObject_CallFunction(self->writeline,
1088 "(s#)", self->rec, self->rec_len);
1089}
1090
1091static PyObject *
1092csv_writerows(WriterObj *self, PyObject *seqseq)
1093{
1094 PyObject *row_iter, *row_obj, *result;
1095
1096 row_iter = PyObject_GetIter(seqseq);
1097 if (row_iter == NULL) {
1098 PyErr_SetString(PyExc_TypeError,
1099 "writerows() argument must be iteratable");
1100 return NULL;
1101 }
1102 while ((row_obj = PyIter_Next(row_iter))) {
1103 result = csv_writerow(self, row_obj);
1104 Py_DECREF(row_obj);
1105 if (!result) {
1106 Py_DECREF(row_iter);
1107 return NULL;
1108 }
1109 else
1110 Py_DECREF(result);
1111 }
1112 Py_DECREF(row_iter);
1113 if (PyErr_Occurred())
1114 return NULL;
1115 Py_INCREF(Py_None);
1116 return Py_None;
1117}
1118
1119static struct PyMethodDef Writer_methods[] = {
1120 { "writerow", (PyCFunction)csv_writerow, METH_O, csv_writerow_doc},
1121 { "writerows", (PyCFunction)csv_writerows, METH_O},
1122 { NULL, NULL }
1123};
1124
1125#define W_OFF(x) offsetof(WriterObj, x)
1126
1127static struct PyMemberDef Writer_memberlist[] = {
1128 { "dialect", T_OBJECT, W_OFF(dialect), RO },
1129 { NULL }
1130};
1131
1132static void
1133Writer_dealloc(WriterObj *self)
1134{
1135 Py_XDECREF(self->dialect);
1136 Py_XDECREF(self->writeline);
1137 PyMem_DEL(self);
1138}
1139
1140PyDoc_STRVAR(Writer_Type_doc,
1141"CSV writer\n"
1142"\n"
1143"Writer objects are responsible for generating tabular data\n"
1144"in CSV format from sequence input.\n"
1145);
1146
1147static PyTypeObject Writer_Type = {
1148 PyObject_HEAD_INIT(NULL)
1149 0, /*ob_size*/
1150 "_csv.writer", /*tp_name*/
1151 sizeof(WriterObj), /*tp_basicsize*/
1152 0, /*tp_itemsize*/
1153 /* methods */
1154 (destructor)Writer_dealloc, /*tp_dealloc*/
1155 (printfunc)0, /*tp_print*/
1156 (getattrfunc)0, /*tp_getattr*/
1157 (setattrfunc)0, /*tp_setattr*/
1158 (cmpfunc)0, /*tp_compare*/
1159 (reprfunc)0, /*tp_repr*/
1160 0, /*tp_as_number*/
1161 0, /*tp_as_sequence*/
1162 0, /*tp_as_mapping*/
1163 (hashfunc)0, /*tp_hash*/
1164 (ternaryfunc)0, /*tp_call*/
1165 (reprfunc)0, /*tp_str*/
1166 0, /*tp_getattro*/
1167 0, /*tp_setattro*/
1168 0, /*tp_as_buffer*/
1169 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
1170 Writer_Type_doc,
1171 0, /*tp_traverse*/
1172 0, /*tp_clear*/
1173 0, /*tp_richcompare*/
1174 0, /*tp_weaklistoffset*/
1175 (getiterfunc)0, /*tp_iter*/
1176 (getiterfunc)0, /*tp_iternext*/
1177 Writer_methods, /*tp_methods*/
1178 Writer_memberlist, /*tp_members*/
1179 0, /*tp_getset*/
1180};
1181
1182static PyObject *
1183csv_writer(PyObject *module, PyObject *args, PyObject *keyword_args)
1184{
1185 PyObject * output_file, * dialect = NULL, *ctor_args;
1186 WriterObj * self = PyObject_NEW(WriterObj, &Writer_Type);
1187
1188 if (!self)
1189 return NULL;
1190
1191 self->dialect = NULL;
1192 self->writeline = NULL;
1193
1194 self->rec = NULL;
1195 self->rec_size = 0;
1196 self->rec_len = 0;
1197 self->num_fields = 0;
1198
1199 if (!PyArg_ParseTuple(args, "O|O", &output_file, &dialect)) {
1200 Py_DECREF(self);
1201 return NULL;
1202 }
1203 self->writeline = PyObject_GetAttrString(output_file, "write");
1204 if (self->writeline == NULL || !PyCallable_Check(self->writeline)) {
1205 PyErr_SetString(PyExc_TypeError,
1206 "argument 1 must be an instance with a write method");
1207 Py_DECREF(self);
1208 return NULL;
1209 }
1210 ctor_args = Py_BuildValue(dialect ? "(O)" : "()", dialect);
1211 if (ctor_args == NULL) {
1212 Py_DECREF(self);
1213 return NULL;
1214 }
1215 self->dialect = (DialectObj *)PyObject_Call((PyObject *)&Dialect_Type,
1216 ctor_args, keyword_args);
1217 Py_DECREF(ctor_args);
1218 if (self->dialect == NULL) {
1219 Py_DECREF(self);
1220 return NULL;
1221 }
1222 return (PyObject *)self;
1223}
1224
1225/*
1226 * DIALECT REGISTRY
1227 */
1228static PyObject *
1229csv_list_dialects(PyObject *module, PyObject *args)
1230{
1231 return PyDict_Keys(dialects);
1232}
1233
1234static PyObject *
1235csv_register_dialect(PyObject *module, PyObject *args)
1236{
1237 PyObject *name_obj, *dialect_obj;
1238
1239 if (!PyArg_ParseTuple(args, "OO", &name_obj, &dialect_obj))
1240 return NULL;
1241 if (!PyString_Check(name_obj) && !PyUnicode_Check(name_obj)) {
1242 PyErr_SetString(PyExc_TypeError,
1243 "dialect name must be a string or unicode");
1244 return NULL;
1245 }
1246 Py_INCREF(dialect_obj);
1247 /* A class rather than an instance? Instanciate */
1248 if (PyObject_TypeCheck(dialect_obj, &PyClass_Type)) {
1249 PyObject * new_dia;
1250 new_dia = PyObject_CallFunction(dialect_obj, "");
1251 Py_DECREF(dialect_obj);
1252 if (new_dia == NULL)
1253 return NULL;
1254 dialect_obj = new_dia;
1255 }
1256 /* Make sure we finally have an instance */
1257 if (!PyInstance_Check(dialect_obj)) {
1258 PyErr_SetString(PyExc_TypeError, "dialect must be an instance");
1259 Py_DECREF(dialect_obj);
1260 return NULL;
1261 }
1262 if (PyObject_SetAttrString(dialect_obj, "_name", name_obj) < 0) {
1263 Py_DECREF(dialect_obj);
1264 return NULL;
1265 }
1266 if (PyDict_SetItem(dialects, name_obj, dialect_obj) < 0) {
1267 Py_DECREF(dialect_obj);
1268 return NULL;
1269 }
1270 Py_DECREF(dialect_obj);
1271 Py_INCREF(Py_None);
1272 return Py_None;
1273}
1274
1275static PyObject *
1276csv_unregister_dialect(PyObject *module, PyObject *args)
1277{
1278 PyObject *name_obj;
1279
1280 if (!PyArg_ParseTuple(args, "O", &name_obj))
1281 return NULL;
1282 if (PyDict_DelItem(dialects, name_obj) < 0)
1283 return PyErr_Format(error_obj, "unknown dialect");
1284 Py_INCREF(Py_None);
1285 return Py_None;
1286}
1287
1288static PyObject *
1289csv_get_dialect(PyObject *module, PyObject *args)
1290{
1291 PyObject *name_obj;
1292
1293 if (!PyArg_ParseTuple(args, "O", &name_obj))
1294 return NULL;
1295 return get_dialect_from_registry(name_obj);
1296}
1297
1298/*
1299 * MODULE
1300 */
1301
1302PyDoc_STRVAR(csv_module_doc,
1303"CSV parsing and writing.\n"
1304"\n"
1305"This module provides classes that assist in the reading and writing\n"
1306"of Comma Separated Value (CSV) files, and implements the interface\n"
1307"described by PEP 305. Although many CSV files are simple to parse,\n"
1308"the format is not formally defined by a stable specification and\n"
1309"is subtle enough that parsing lines of a CSV file with something\n"
1310"like line.split(\",\") is bound to fail. The module supports three\n"
1311"basic APIs: reading, writing, and registration of dialects.\n"
1312"\n"
1313"\n"
1314"DIALECT REGISTRATION:\n"
1315"\n"
1316"Readers and writers support a dialect argument, which is a convenient\n"
1317"handle on a group of settings. When the dialect argument is a string,\n"
1318"it identifies one of the dialects previously registered with the module.\n"
1319"If it is a class or instance, the attributes of the argument are used as\n"
1320"the settings for the reader or writer:\n"
1321"\n"
1322" class excel:\n"
1323" delimiter = ','\n"
1324" quotechar = '\"'\n"
1325" escapechar = None\n"
1326" doublequote = True\n"
1327" skipinitialspace = False\n"
1328" lineterminator = '\r\n'\n"
1329" quoting = QUOTE_MINIMAL\n"
1330"\n"
1331"SETTINGS:\n"
1332"\n"
1333" * quotechar - specifies a one-character string to use as the \n"
1334" quoting character. It defaults to '\"'.\n"
1335" * delimiter - specifies a one-character string to use as the \n"
1336" field separator. It defaults to ','.\n"
1337" * skipinitialspace - specifies how to interpret whitespace which\n"
1338" immediately follows a delimiter. It defaults to False, which\n"
1339" means that whitespace immediately following a delimiter is part\n"
1340" of the following field.\n"
1341" * lineterminator - specifies the character sequence which should \n"
1342" terminate rows.\n"
1343" * quoting - controls when quotes should be generated by the writer.\n"
1344" It can take on any of the following module constants:\n"
1345"\n"
1346" csv.QUOTE_MINIMAL means only when required, for example, when a\n"
1347" field contains either the quotechar or the delimiter\n"
1348" csv.QUOTE_ALL means that quotes are always placed around fields.\n"
1349" csv.QUOTE_NONNUMERIC means that quotes are always placed around\n"
1350" fields which contain characters other than [+-0-9.].\n"
1351" csv.QUOTE_NONE means that quotes are never placed around fields.\n"
1352" * escapechar - specifies a one-character string used to escape \n"
1353" the delimiter when quoting is set to QUOTE_NONE.\n"
1354" * doublequote - controls the handling of quotes inside fields. When\n"
1355" True, two consecutive quotes are interpreted as one during read,\n"
1356" and when writing, each quote character embedded in the data is\n"
1357" written as two quotes\n");
1358
1359PyDoc_STRVAR(csv_reader_doc,
1360" csv_reader = reader(iterable [, dialect='excel']\n"
1361" [optional keyword args])\n"
1362" for row in csv_reader:\n"
1363" process(row)\n"
1364"\n"
1365"The \"iterable\" argument can be any object that returns a line\n"
1366"of input for each iteration, such as a file object or a list. The\n"
1367"optional \"dialect\" parameter is discussed below. The function\n"
1368"also accepts optional keyword arguments which override settings\n"
1369"provided by the dialect.\n"
1370"\n"
1371"The returned object is an iterator. Each iteration returns a row\n"
1372 "of the CSV file (which can span multiple input lines):\n");
1373
1374PyDoc_STRVAR(csv_writer_doc,
1375" csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1376" [optional keyword args])\n"
1377" for row in csv_writer:\n"
1378" csv_writer.writerow(row)\n"
1379"\n"
1380" [or]\n"
1381"\n"
1382" csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1383" [optional keyword args])\n"
1384" csv_writer.writerows(rows)\n"
1385"\n"
1386"The \"fileobj\" argument can be any object that supports the file API.\n");
1387
1388PyDoc_STRVAR(csv_list_dialects_doc,
1389"Return a list of all know dialect names.\n"
1390" names = csv.list_dialects()");
1391
1392PyDoc_STRVAR(csv_get_dialect_doc,
1393"Return the dialect instance associated with name.\n"
1394" dialect = csv.get_dialect(name)");
1395
1396PyDoc_STRVAR(csv_register_dialect_doc,
1397"Create a mapping from a string name to a dialect class.\n"
1398" dialect = csv.register_dialect(name, dialect)");
1399
1400PyDoc_STRVAR(csv_unregister_dialect_doc,
1401"Delete the name/dialect mapping associated with a string name.\n"
1402" csv.unregister_dialect(name)");
1403
1404static struct PyMethodDef csv_methods[] = {
1405 { "reader", (PyCFunction)csv_reader,
1406 METH_VARARGS | METH_KEYWORDS, csv_reader_doc},
1407 { "writer", (PyCFunction)csv_writer,
1408 METH_VARARGS | METH_KEYWORDS, csv_writer_doc},
1409 { "list_dialects", (PyCFunction)csv_list_dialects,
1410 METH_NOARGS, csv_list_dialects_doc},
1411 { "register_dialect", (PyCFunction)csv_register_dialect,
1412 METH_VARARGS, csv_register_dialect_doc},
1413 { "unregister_dialect", (PyCFunction)csv_unregister_dialect,
1414 METH_VARARGS, csv_unregister_dialect_doc},
1415 { "get_dialect", (PyCFunction)csv_get_dialect,
1416 METH_VARARGS, csv_get_dialect_doc},
1417 { NULL, NULL }
1418};
1419
1420PyMODINIT_FUNC
1421init_csv(void)
1422{
1423 PyObject *module;
1424 PyObject *rev;
1425 PyObject *v;
1426 int res;
1427 StyleDesc *style;
1428
1429 if (PyType_Ready(&Dialect_Type) < 0)
1430 return;
1431
1432 if (PyType_Ready(&Reader_Type) < 0)
1433 return;
1434
1435 if (PyType_Ready(&Writer_Type) < 0)
1436 return;
1437
1438 /* Create the module and add the functions */
1439 module = Py_InitModule3("_csv", csv_methods, csv_module_doc);
1440 if (module == NULL)
1441 return;
1442
1443 /* Add version to the module. */
1444 rev = PyString_FromString("1.0");
1445 if (rev == NULL)
1446 return;
1447 if (PyModule_AddObject(module, "__version__", rev) < 0)
1448 return;
1449
1450 /* Add _dialects dictionary */
1451 dialects = PyDict_New();
1452 if (dialects == NULL)
1453 return;
1454 if (PyModule_AddObject(module, "_dialects", dialects))
1455 return;
1456
1457 /* Add quote styles into dictionary */
1458 for (style = quote_styles; style->name; style++) {
1459 v = PyInt_FromLong(style->style);
1460 if (v == NULL)
1461 return;
1462 res = PyModule_AddObject(module, style->name, v);
1463 if (res < 0)
1464 return;
1465 }
1466
1467 /* Add the Dialect type */
1468 if (PyModule_AddObject(module, "Dialect", (PyObject *)&Dialect_Type))
1469 return;
1470
1471 /* Add the CSV exception object to the module. */
1472 error_obj = PyErr_NewException("_csv.Error", NULL, NULL);
1473 if (error_obj == NULL)
1474 return;
1475 PyModule_AddObject(module, "Error", error_obj);
1476}