blob: e62999ffbfa88e022c248823265e103dc8fb5b0c [file] [log] [blame]
Skip Montanaroa16b21f2003-03-23 14:32:54 +00001/* csv module */
2
3/*
4
5This module provides the low-level underpinnings of a CSV reading/writing
6module. Users should not use this module directly, but import the csv.py
7module instead.
8
9**** For people modifying this code, please note that as of this writing
10**** (2003-03-23), it is intended that this code should work with Python
11**** 2.2.
12
Skip Montanarob4a04172003-03-20 23:29:12 +000013*/
14
15#include "Python.h"
16#include "structmember.h"
17
Skip Montanaroa16b21f2003-03-23 14:32:54 +000018
Skip Montanarob4a04172003-03-20 23:29:12 +000019/* begin 2.2 compatibility macros */
20#ifndef PyDoc_STRVAR
21/* Define macros for inline documentation. */
22#define PyDoc_VAR(name) static char name[]
23#define PyDoc_STRVAR(name,str) PyDoc_VAR(name) = PyDoc_STR(str)
24#ifdef WITH_DOC_STRINGS
25#define PyDoc_STR(str) str
26#else
27#define PyDoc_STR(str) ""
28#endif
29#endif /* ifndef PyDoc_STRVAR */
30
31#ifndef PyMODINIT_FUNC
32# if defined(__cplusplus)
33# define PyMODINIT_FUNC extern "C" void
34# else /* __cplusplus */
35# define PyMODINIT_FUNC void
36# endif /* __cplusplus */
37#endif
38/* end 2.2 compatibility macros */
39
40static PyObject *error_obj; /* CSV exception */
41static PyObject *dialects; /* Dialect registry */
42
43typedef enum {
44 START_RECORD, START_FIELD, ESCAPED_CHAR, IN_FIELD,
45 IN_QUOTED_FIELD, ESCAPE_IN_QUOTED_FIELD, QUOTE_IN_QUOTED_FIELD
46} ParserState;
47
48typedef enum {
49 QUOTE_MINIMAL, QUOTE_ALL, QUOTE_NONNUMERIC, QUOTE_NONE
50} QuoteStyle;
51
52typedef struct {
53 QuoteStyle style;
54 char *name;
55} StyleDesc;
56
57static StyleDesc quote_styles[] = {
58 { QUOTE_MINIMAL, "QUOTE_MINIMAL" },
59 { QUOTE_ALL, "QUOTE_ALL" },
60 { QUOTE_NONNUMERIC, "QUOTE_NONNUMERIC" },
61 { QUOTE_NONE, "QUOTE_NONE" },
62 { 0 }
63};
64
65typedef struct {
66 PyObject_HEAD
67
68 int doublequote; /* is " represented by ""? */
69 char delimiter; /* field separator */
70 char quotechar; /* quote character */
71 char escapechar; /* escape character */
72 int skipinitialspace; /* ignore spaces following delimiter? */
73 PyObject *lineterminator; /* string to write between records */
74 QuoteStyle quoting; /* style of quoting to write */
75
76 int strict; /* raise exception on bad CSV */
77} DialectObj;
78
79staticforward PyTypeObject Dialect_Type;
80
81typedef struct {
82 PyObject_HEAD
83
84 PyObject *input_iter; /* iterate over this for input lines */
85
86 DialectObj *dialect; /* parsing dialect */
87
88 PyObject *fields; /* field list for current record */
89 ParserState state; /* current CSV parse state */
90 char *field; /* build current field in here */
91 int field_size; /* size of allocated buffer */
92 int field_len; /* length of current field */
93 int had_parse_error; /* did we have a parse error? */
94} ReaderObj;
95
96staticforward PyTypeObject Reader_Type;
97
98#define ReaderObject_Check(v) ((v)->ob_type == &Reader_Type)
99
100typedef struct {
101 PyObject_HEAD
102
103 PyObject *writeline; /* write output lines to this file */
104
105 DialectObj *dialect; /* parsing dialect */
106
107 char *rec; /* buffer for parser.join */
108 int rec_size; /* size of allocated record */
109 int rec_len; /* length of record */
110 int num_fields; /* number of fields in record */
111} WriterObj;
112
113staticforward PyTypeObject Writer_Type;
114
115/*
116 * DIALECT class
117 */
118
119static PyObject *
120get_dialect_from_registry(PyObject * name_obj)
121{
122 PyObject *dialect_obj;
123
124 dialect_obj = PyDict_GetItem(dialects, name_obj);
125 if (dialect_obj == NULL)
126 return PyErr_Format(error_obj, "unknown dialect");
127 Py_INCREF(dialect_obj);
128 return dialect_obj;
129}
130
131static int
132check_delattr(PyObject *v)
133{
134 if (v == NULL) {
135 PyErr_SetString(PyExc_TypeError,
136 "Cannot delete attribute");
137 return -1;
138 }
139 return 0;
140}
141
142static PyObject *
143get_string(PyObject *str)
144{
145 Py_XINCREF(str);
146 return str;
147}
148
149static int
150set_string(PyObject **str, PyObject *v)
151{
152 if (check_delattr(v) < 0)
153 return -1;
154 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
155 PyErr_BadArgument();
156 return -1;
157 }
158 Py_XDECREF(*str);
159 Py_INCREF(v);
160 *str = v;
161 return 0;
162}
163
164static PyObject *
165get_nullchar_as_None(char c)
166{
167 if (c == '\0') {
168 Py_INCREF(Py_None);
169 return Py_None;
170 }
171 else
172 return PyString_FromStringAndSize((char*)&c, 1);
173}
174
175static int
176set_None_as_nullchar(char * addr, PyObject *v)
177{
178 if (check_delattr(v) < 0)
179 return -1;
180 if (v == Py_None)
181 *addr = '\0';
182 else if (!PyString_Check(v) || PyString_Size(v) != 1) {
183 PyErr_BadArgument();
184 return -1;
185 }
186 else
187 *addr = PyString_AsString(v)[0];
188 return 0;
189}
190
191static PyObject *
192Dialect_get_lineterminator(DialectObj *self)
193{
194 return get_string(self->lineterminator);
195}
196
197static int
198Dialect_set_lineterminator(DialectObj *self, PyObject *value)
199{
200 return set_string(&self->lineterminator, value);
201}
202
203static PyObject *
204Dialect_get_escapechar(DialectObj *self)
205{
206 return get_nullchar_as_None(self->escapechar);
207}
208
209static int
210Dialect_set_escapechar(DialectObj *self, PyObject *value)
211{
212 return set_None_as_nullchar(&self->escapechar, value);
213}
214
215static PyObject *
216Dialect_get_quoting(DialectObj *self)
217{
218 return PyInt_FromLong(self->quoting);
219}
220
221static int
222Dialect_set_quoting(DialectObj *self, PyObject *v)
223{
224 int quoting;
225 StyleDesc *qs = quote_styles;
226
227 if (check_delattr(v) < 0)
228 return -1;
229 if (!PyInt_Check(v)) {
230 PyErr_BadArgument();
231 return -1;
232 }
233 quoting = PyInt_AsLong(v);
234 for (qs = quote_styles; qs->name; qs++) {
235 if (qs->style == quoting) {
236 self->quoting = quoting;
237 return 0;
238 }
239 }
240 PyErr_BadArgument();
241 return -1;
242}
243
244static struct PyMethodDef Dialect_methods[] = {
245 { NULL, NULL }
246};
247
248#define D_OFF(x) offsetof(DialectObj, x)
249
250static struct PyMemberDef Dialect_memberlist[] = {
251 { "quotechar", T_CHAR, D_OFF(quotechar) },
252 { "delimiter", T_CHAR, D_OFF(delimiter) },
253 { "skipinitialspace", T_INT, D_OFF(skipinitialspace) },
254 { "doublequote", T_INT, D_OFF(doublequote) },
255 { "strict", T_INT, D_OFF(strict) },
256 { NULL }
257};
258
259static PyGetSetDef Dialect_getsetlist[] = {
260 { "escapechar", (getter)Dialect_get_escapechar,
261 (setter)Dialect_set_escapechar },
262 { "lineterminator", (getter)Dialect_get_lineterminator,
263 (setter)Dialect_set_lineterminator },
264 { "quoting", (getter)Dialect_get_quoting,
265 (setter)Dialect_set_quoting },
266 {NULL},
267};
268
269static void
270Dialect_dealloc(DialectObj *self)
271{
272 Py_XDECREF(self->lineterminator);
273 /*PyMem_DEL(self);*/
274 self->ob_type->tp_free((PyObject *)self);
275}
276
277static int
278dialect_init(DialectObj * self, PyObject * args, PyObject * kwargs)
279{
280 PyObject *dialect = NULL, *name_obj, *value_obj;
281
282 self->quotechar = '"';
283 self->delimiter = ',';
284 self->escapechar = '\0';
285 self->skipinitialspace = 0;
286 Py_XDECREF(self->lineterminator);
287 self->lineterminator = PyString_FromString("\r\n");
288 if (self->lineterminator == NULL)
289 return -1;
290 self->quoting = QUOTE_MINIMAL;
291 self->doublequote = 1;
292 self->strict = 0;
293
294 if (!PyArg_ParseTuple(args, "|O", &dialect))
295 return -1;
296 Py_XINCREF(dialect);
297 if (kwargs != NULL) {
298 PyObject * key = PyString_FromString("dialect");
299 PyObject * d;
300
301 d = PyDict_GetItem(kwargs, key);
302 if (d) {
303 Py_INCREF(d);
304 Py_XDECREF(dialect);
305 PyDict_DelItem(kwargs, key);
306 dialect = d;
307 }
308 Py_DECREF(key);
309 }
310 if (dialect != NULL) {
311 int i;
312 PyObject * dir_list;
313
314 /* If dialect is a string, look it up in our registry */
315 if (PyString_Check(dialect) || PyUnicode_Check(dialect)) {
316 PyObject * new_dia;
317 new_dia = get_dialect_from_registry(dialect);
318 Py_DECREF(dialect);
319 if (new_dia == NULL)
320 return -1;
321 dialect = new_dia;
322 }
323 /* A class rather than an instance? Instanciate */
324 if (PyObject_TypeCheck(dialect, &PyClass_Type)) {
325 PyObject * new_dia;
326 new_dia = PyObject_CallFunction(dialect, "");
327 Py_DECREF(dialect);
328 if (new_dia == NULL)
329 return -1;
330 dialect = new_dia;
331 }
332 /* Make sure we finally have an instance */
333 if (!PyInstance_Check(dialect) ||
334 (dir_list = PyObject_Dir(dialect)) == NULL) {
335 PyErr_SetString(PyExc_TypeError,
336 "dialect must be an instance");
337 Py_DECREF(dialect);
338 return -1;
339 }
340 /* And extract the attributes */
341 for (i = 0; i < PyList_GET_SIZE(dir_list); ++i) {
342 name_obj = PyList_GET_ITEM(dir_list, i);
343 if (PyString_AsString(name_obj)[0] == '_')
344 continue;
345 value_obj = PyObject_GetAttr(dialect, name_obj);
346 if (value_obj) {
347 if (PyObject_SetAttr((PyObject *)self,
348 name_obj, value_obj)) {
349 Py_DECREF(dir_list);
350 return -1;
351 }
352 Py_DECREF(value_obj);
353 }
354 }
355 Py_DECREF(dir_list);
356 Py_DECREF(dialect);
357 }
358 if (kwargs != NULL) {
359 int pos = 0;
360
361 while (PyDict_Next(kwargs, &pos, &name_obj, &value_obj)) {
362 if (PyObject_SetAttr((PyObject *)self,
363 name_obj, value_obj))
364 return -1;
365 }
366 }
367 return 0;
368}
369
370static PyObject *
371dialect_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
372{
373 DialectObj *self;
374 self = (DialectObj *)type->tp_alloc(type, 0);
375 if (self != NULL) {
376 self->lineterminator = NULL;
377 }
378 return (PyObject *)self;
379}
380
381
382PyDoc_STRVAR(Dialect_Type_doc,
383"CSV dialect\n"
384"\n"
385"The Dialect type records CSV parsing and generation options.\n");
386
387static PyTypeObject Dialect_Type = {
388 PyObject_HEAD_INIT(NULL)
389 0, /* ob_size */
390 "_csv.Dialect", /* tp_name */
391 sizeof(DialectObj), /* tp_basicsize */
392 0, /* tp_itemsize */
393 /* methods */
394 (destructor)Dialect_dealloc, /* tp_dealloc */
395 (printfunc)0, /* tp_print */
396 (getattrfunc)0, /* tp_getattr */
397 (setattrfunc)0, /* tp_setattr */
398 (cmpfunc)0, /* tp_compare */
399 (reprfunc)0, /* tp_repr */
400 0, /* tp_as_number */
401 0, /* tp_as_sequence */
402 0, /* tp_as_mapping */
403 (hashfunc)0, /* tp_hash */
404 (ternaryfunc)0, /* tp_call */
405 (reprfunc)0, /* tp_str */
406 0, /* tp_getattro */
407 0, /* tp_setattro */
408 0, /* tp_as_buffer */
409 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
410 Dialect_Type_doc, /* tp_doc */
411 0, /* tp_traverse */
412 0, /* tp_clear */
413 0, /* tp_richcompare */
414 0, /* tp_weaklistoffset */
415 0, /* tp_iter */
416 0, /* tp_iternext */
417 Dialect_methods, /* tp_methods */
418 Dialect_memberlist, /* tp_members */
419 Dialect_getsetlist, /* tp_getset */
420 0, /* tp_base */
421 0, /* tp_dict */
422 0, /* tp_descr_get */
423 0, /* tp_descr_set */
424 0, /* tp_dictoffset */
425 (initproc)dialect_init, /* tp_init */
426 PyType_GenericAlloc, /* tp_alloc */
427 dialect_new, /* tp_new */
428 0, /* tp_free */
429};
430
431static void
432parse_save_field(ReaderObj *self)
433{
434 PyObject *field;
435
436 field = PyString_FromStringAndSize(self->field, self->field_len);
437 if (field != NULL) {
438 PyList_Append(self->fields, field);
439 Py_XDECREF(field);
440 }
441 self->field_len = 0;
442}
443
444static int
445parse_grow_buff(ReaderObj *self)
446{
447 if (self->field_size == 0) {
448 self->field_size = 4096;
449 self->field = PyMem_Malloc(self->field_size);
450 }
451 else {
452 self->field_size *= 2;
453 self->field = PyMem_Realloc(self->field, self->field_size);
454 }
455 if (self->field == NULL) {
456 PyErr_NoMemory();
457 return 0;
458 }
459 return 1;
460}
461
462static void
463parse_add_char(ReaderObj *self, char c)
464{
465 if (self->field_len == self->field_size && !parse_grow_buff(self))
466 return;
467 self->field[self->field_len++] = c;
468}
469
470static void
471parse_process_char(ReaderObj *self, char c)
472{
473 DialectObj *dialect = self->dialect;
474
475 switch (self->state) {
476 case START_RECORD:
477 /* start of record */
478 if (c == '\n')
479 /* empty line - return [] */
480 break;
481 /* normal character - handle as START_FIELD */
482 self->state = START_FIELD;
483 /* fallthru */
484 case START_FIELD:
485 /* expecting field */
486 if (c == '\n') {
487 /* save empty field - return [fields] */
488 parse_save_field(self);
489 self->state = START_RECORD;
490 }
491 else if (c == dialect->quotechar) {
492 /* start quoted field */
493 self->state = IN_QUOTED_FIELD;
494 }
495 else if (c == dialect->escapechar) {
496 /* possible escaped character */
497 self->state = ESCAPED_CHAR;
498 }
499 else if (c == ' ' && dialect->skipinitialspace)
500 /* ignore space at start of field */
501 ;
502 else if (c == dialect->delimiter) {
503 /* save empty field */
504 parse_save_field(self);
505 }
506 else {
507 /* begin new unquoted field */
508 parse_add_char(self, c);
509 self->state = IN_FIELD;
510 }
511 break;
512
513 case ESCAPED_CHAR:
514 if (c != dialect->escapechar &&
515 c != dialect->delimiter &&
516 c != dialect->quotechar)
517 parse_add_char(self, dialect->escapechar);
518 parse_add_char(self, c);
519 self->state = IN_FIELD;
520 break;
521
522 case IN_FIELD:
523 /* in unquoted field */
524 if (c == '\n') {
525 /* end of line - return [fields] */
526 parse_save_field(self);
527 self->state = START_RECORD;
528 }
529 else if (c == dialect->escapechar) {
530 /* possible escaped character */
531 self->state = ESCAPED_CHAR;
532 }
533 else if (c == dialect->delimiter) {
534 /* save field - wait for new field */
535 parse_save_field(self);
536 self->state = START_FIELD;
537 }
538 else {
539 /* normal character - save in field */
540 parse_add_char(self, c);
541 }
542 break;
543
544 case IN_QUOTED_FIELD:
545 /* in quoted field */
546 if (c == '\n') {
547 /* end of line - save '\n' in field */
548 parse_add_char(self, '\n');
549 }
550 else if (c == dialect->escapechar) {
551 /* Possible escape character */
552 self->state = ESCAPE_IN_QUOTED_FIELD;
553 }
554 else if (c == dialect->quotechar) {
555 if (dialect->doublequote) {
556 /* doublequote; " represented by "" */
557 self->state = QUOTE_IN_QUOTED_FIELD;
558 }
559 else {
560 /* end of quote part of field */
561 self->state = IN_FIELD;
562 }
563 }
564 else {
565 /* normal character - save in field */
566 parse_add_char(self, c);
567 }
568 break;
569
570 case ESCAPE_IN_QUOTED_FIELD:
571 if (c != dialect->escapechar &&
572 c != dialect->delimiter &&
573 c != dialect->quotechar)
574 parse_add_char(self, dialect->escapechar);
575 parse_add_char(self, c);
576 self->state = IN_QUOTED_FIELD;
577 break;
578
579 case QUOTE_IN_QUOTED_FIELD:
580 /* doublequote - seen a quote in an quoted field */
581 if (dialect->quoting != QUOTE_NONE &&
582 c == dialect->quotechar) {
583 /* save "" as " */
584 parse_add_char(self, c);
585 self->state = IN_QUOTED_FIELD;
586 }
587 else if (c == dialect->delimiter) {
588 /* save field - wait for new field */
589 parse_save_field(self);
590 self->state = START_FIELD;
591 }
592 else if (c == '\n') {
593 /* end of line - return [fields] */
594 parse_save_field(self);
595 self->state = START_RECORD;
596 }
597 else if (!dialect->strict) {
598 parse_add_char(self, c);
599 self->state = IN_FIELD;
600 }
601 else {
602 /* illegal */
603 self->had_parse_error = 1;
604 PyErr_Format(error_obj, "%c expected after %c",
605 dialect->delimiter,
606 dialect->quotechar);
607 }
608 break;
609
610 }
611}
612
613/*
614 * READER
615 */
616#define R_OFF(x) offsetof(ReaderObj, x)
617
618static struct PyMemberDef Reader_memberlist[] = {
619 { "dialect", T_OBJECT, R_OFF(dialect), RO },
620 { NULL }
621};
622
623static PyObject *
624Reader_getiter(ReaderObj *self)
625{
626 Py_INCREF(self);
627 return (PyObject *)self;
628}
629
630static PyObject *
631Reader_iternext(ReaderObj *self)
632{
633 PyObject *lineobj;
634 PyObject *fields;
635 char *line;
636
637 do {
638 lineobj = PyIter_Next(self->input_iter);
639 if (lineobj == NULL) {
640 /* End of input OR exception */
641 if (!PyErr_Occurred() && self->field_len != 0)
642 return PyErr_Format(error_obj,
643 "newline inside string");
644 return NULL;
645 }
646
647 if (self->had_parse_error) {
648 if (self->fields) {
649 Py_XDECREF(self->fields);
650 }
651 self->fields = PyList_New(0);
652 self->field_len = 0;
653 self->state = START_RECORD;
654 self->had_parse_error = 0;
655 }
656 line = PyString_AsString(lineobj);
657
658 if (line == NULL) {
659 Py_DECREF(lineobj);
660 return NULL;
661 }
Tim Petersef4b7ed2003-03-21 01:35:28 +0000662 if (strlen(line) < (size_t)PyString_GET_SIZE(lineobj)) {
Skip Montanarob4a04172003-03-20 23:29:12 +0000663 self->had_parse_error = 1;
664 Py_DECREF(lineobj);
665 return PyErr_Format(error_obj,
666 "string with NUL bytes");
667 }
668
669 /* Process line of text - send '\n' to processing code to
670 represent end of line. End of line which is not at end of
671 string is an error. */
672 while (*line) {
673 char c;
674
675 c = *line++;
676 if (c == '\r') {
677 c = *line++;
678 if (c == '\0')
679 /* macintosh end of line */
680 break;
681 if (c == '\n') {
682 c = *line++;
683 if (c == '\0')
684 /* DOS end of line */
685 break;
686 }
687 self->had_parse_error = 1;
688 Py_DECREF(lineobj);
689 return PyErr_Format(error_obj,
690 "newline inside string");
691 }
692 if (c == '\n') {
693 c = *line++;
694 if (c == '\0')
695 /* unix end of line */
696 break;
697 self->had_parse_error = 1;
698 Py_DECREF(lineobj);
699 return PyErr_Format(error_obj,
700 "newline inside string");
701 }
702 parse_process_char(self, c);
703 if (PyErr_Occurred()) {
704 Py_DECREF(lineobj);
705 return NULL;
706 }
707 }
708 parse_process_char(self, '\n');
709 Py_DECREF(lineobj);
710 } while (self->state != START_RECORD);
711
712 fields = self->fields;
713 self->fields = PyList_New(0);
714 return fields;
715}
716
717static void
718Reader_dealloc(ReaderObj *self)
719{
720 Py_XDECREF(self->dialect);
721 Py_XDECREF(self->input_iter);
722 Py_XDECREF(self->fields);
723 PyMem_DEL(self);
724}
725
726PyDoc_STRVAR(Reader_Type_doc,
727"CSV reader\n"
728"\n"
729"Reader objects are responsible for reading and parsing tabular data\n"
730"in CSV format.\n"
731);
732
733static struct PyMethodDef Reader_methods[] = {
734 { NULL, NULL }
735};
736
737static PyTypeObject Reader_Type = {
738 PyObject_HEAD_INIT(NULL)
739 0, /*ob_size*/
740 "_csv.reader", /*tp_name*/
741 sizeof(ReaderObj), /*tp_basicsize*/
742 0, /*tp_itemsize*/
743 /* methods */
744 (destructor)Reader_dealloc, /*tp_dealloc*/
745 (printfunc)0, /*tp_print*/
746 (getattrfunc)0, /*tp_getattr*/
747 (setattrfunc)0, /*tp_setattr*/
748 (cmpfunc)0, /*tp_compare*/
749 (reprfunc)0, /*tp_repr*/
750 0, /*tp_as_number*/
751 0, /*tp_as_sequence*/
752 0, /*tp_as_mapping*/
753 (hashfunc)0, /*tp_hash*/
754 (ternaryfunc)0, /*tp_call*/
755 (reprfunc)0, /*tp_str*/
756 0, /*tp_getattro*/
757 0, /*tp_setattro*/
758 0, /*tp_as_buffer*/
759 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
760 Reader_Type_doc, /*tp_doc*/
761 0, /*tp_traverse*/
762 0, /*tp_clear*/
763 0, /*tp_richcompare*/
764 0, /*tp_weaklistoffset*/
765 (getiterfunc)Reader_getiter, /*tp_iter*/
766 (getiterfunc)Reader_iternext, /*tp_iternext*/
767 Reader_methods, /*tp_methods*/
768 Reader_memberlist, /*tp_members*/
769 0, /*tp_getset*/
770
771};
772
773static PyObject *
774csv_reader(PyObject *module, PyObject *args, PyObject *keyword_args)
775{
776 PyObject * iterator, * dialect = NULL, *ctor_args;
777 ReaderObj * self = PyObject_NEW(ReaderObj, &Reader_Type);
778
779 if (!self)
780 return NULL;
781
782 self->dialect = NULL;
783 self->input_iter = self->fields = NULL;
784
785 self->fields = NULL;
786 self->input_iter = NULL;
787 self->had_parse_error = 0;
788 self->field = NULL;
789 self->field_size = 0;
790 self->field_len = 0;
791 self->state = START_RECORD;
792
793 if (!PyArg_ParseTuple(args, "O|O", &iterator, &dialect)) {
794 Py_DECREF(self);
795 return NULL;
796 }
797 self->input_iter = PyObject_GetIter(iterator);
798 if (self->input_iter == NULL) {
799 PyErr_SetString(PyExc_TypeError,
800 "argument 1 must be an iterator");
801 Py_DECREF(self);
802 return NULL;
803 }
804 ctor_args = Py_BuildValue(dialect ? "(O)" : "()", dialect);
805 if (ctor_args == NULL) {
806 Py_DECREF(self);
807 return NULL;
808 }
809 self->dialect = (DialectObj *)PyObject_Call((PyObject *)&Dialect_Type,
810 ctor_args, keyword_args);
811 Py_DECREF(ctor_args);
812 if (self->dialect == NULL) {
813 Py_DECREF(self);
814 return NULL;
815 }
816 self->fields = PyList_New(0);
817 if (self->fields == NULL) {
818 Py_DECREF(self);
819 return NULL;
820 }
821
822 return (PyObject *)self;
823}
824
825/*
826 * WRITER
827 */
828/* ---------------------------------------------------------------- */
829static void
830join_reset(WriterObj *self)
831{
832 self->rec_len = 0;
833 self->num_fields = 0;
834}
835
836#define MEM_INCR 32768
837
838/* Calculate new record length or append field to record. Return new
839 * record length.
840 */
841static int
842join_append_data(WriterObj *self, char *field, int quote_empty,
843 int *quoted, int copy_phase)
844{
845 DialectObj *dialect = self->dialect;
846 int i, rec_len;
847
848 rec_len = self->rec_len;
849
850 /* If this is not the first field we need a field separator.
851 */
852 if (self->num_fields > 0) {
853 if (copy_phase)
854 self->rec[rec_len] = dialect->delimiter;
855 rec_len++;
856 }
857 /* Handle preceding quote.
858 */
859 switch (dialect->quoting) {
860 case QUOTE_ALL:
861 *quoted = 1;
862 if (copy_phase)
863 self->rec[rec_len] = dialect->quotechar;
864 rec_len++;
865 break;
866 case QUOTE_MINIMAL:
867 case QUOTE_NONNUMERIC:
868 /* We only know about quoted in the copy phase.
869 */
870 if (copy_phase && *quoted) {
871 self->rec[rec_len] = dialect->quotechar;
872 rec_len++;
873 }
874 break;
875 case QUOTE_NONE:
876 break;
877 }
878 /* Copy/count field data.
879 */
880 for (i = 0;; i++) {
881 char c = field[i];
882
883 if (c == '\0')
884 break;
885 /* If in doublequote mode we escape quote chars with a
886 * quote.
887 */
888 if (dialect->quoting != QUOTE_NONE &&
889 c == dialect->quotechar && dialect->doublequote) {
890 if (copy_phase)
891 self->rec[rec_len] = dialect->quotechar;
892 *quoted = 1;
893 rec_len++;
894 }
895
896 /* Some special characters need to be escaped. If we have a
897 * quote character switch to quoted field instead of escaping
898 * individual characters.
899 */
900 if (!*quoted
901 && (c == dialect->delimiter ||
902 c == dialect->escapechar ||
903 c == '\n' || c == '\r')) {
904 if (dialect->quoting != QUOTE_NONE)
905 *quoted = 1;
906 else if (dialect->escapechar) {
907 if (copy_phase)
908 self->rec[rec_len] = dialect->escapechar;
909 rec_len++;
910 }
911 else {
912 PyErr_Format(error_obj,
913 "delimiter must be quoted or escaped");
914 return -1;
915 }
916 }
917 /* Copy field character into record buffer.
918 */
919 if (copy_phase)
920 self->rec[rec_len] = c;
921 rec_len++;
922 }
923
924 /* If field is empty check if it needs to be quoted.
925 */
926 if (i == 0 && quote_empty) {
927 if (dialect->quoting == QUOTE_NONE) {
928 PyErr_Format(error_obj,
929 "single empty field record must be quoted");
930 return -1;
931 } else
932 *quoted = 1;
933 }
934
935 /* Handle final quote character on field.
936 */
937 if (*quoted) {
938 if (copy_phase)
939 self->rec[rec_len] = dialect->quotechar;
940 else
941 /* Didn't know about leading quote until we found it
942 * necessary in field data - compensate for it now.
943 */
944 rec_len++;
945 rec_len++;
946 }
947
948 return rec_len;
949}
950
951static int
952join_check_rec_size(WriterObj *self, int rec_len)
953{
954 if (rec_len > self->rec_size) {
955 if (self->rec_size == 0) {
956 self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR;
957 self->rec = PyMem_Malloc(self->rec_size);
958 }
959 else {
960 char *old_rec = self->rec;
961
962 self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR;
963 self->rec = PyMem_Realloc(self->rec, self->rec_size);
964 if (self->rec == NULL)
965 PyMem_Free(old_rec);
966 }
967 if (self->rec == NULL) {
968 PyErr_NoMemory();
969 return 0;
970 }
971 }
972 return 1;
973}
974
975static int
976join_append(WriterObj *self, char *field, int *quoted, int quote_empty)
977{
978 int rec_len;
979
980 rec_len = join_append_data(self, field, quote_empty, quoted, 0);
981 if (rec_len < 0)
982 return 0;
983
984 /* grow record buffer if necessary */
985 if (!join_check_rec_size(self, rec_len))
986 return 0;
987
988 self->rec_len = join_append_data(self, field, quote_empty, quoted, 1);
989 self->num_fields++;
990
991 return 1;
992}
993
994static int
995join_append_lineterminator(WriterObj *self)
996{
997 int terminator_len;
998
999 terminator_len = PyString_Size(self->dialect->lineterminator);
1000
1001 /* grow record buffer if necessary */
1002 if (!join_check_rec_size(self, self->rec_len + terminator_len))
1003 return 0;
1004
1005 memmove(self->rec + self->rec_len,
1006 PyString_AsString(self->dialect->lineterminator),
1007 terminator_len);
1008 self->rec_len += terminator_len;
1009
1010 return 1;
1011}
1012
1013PyDoc_STRVAR(csv_writerow_doc,
1014"join(sequence) -> string\n"
1015"\n"
1016"Construct a CSV record from a sequence of fields. Non-string\n"
1017"elements will be converted to string.");
1018
1019static PyObject *
1020csv_writerow(WriterObj *self, PyObject *seq)
1021{
1022 DialectObj *dialect = self->dialect;
1023 int len, i;
1024
1025 if (!PySequence_Check(seq))
1026 return PyErr_Format(error_obj, "sequence expected");
1027
1028 len = PySequence_Length(seq);
1029 if (len < 0)
1030 return NULL;
1031
1032 /* Join all fields in internal buffer.
1033 */
1034 join_reset(self);
1035 for (i = 0; i < len; i++) {
1036 PyObject *field;
1037 int append_ok;
1038 int quoted;
1039
1040 field = PySequence_GetItem(seq, i);
1041 if (field == NULL)
1042 return NULL;
1043
1044 quoted = 0;
1045 if (dialect->quoting == QUOTE_NONNUMERIC) {
1046 PyObject *num;
1047
1048 num = PyNumber_Float(field);
1049 if (num == NULL) {
1050 quoted = 1;
1051 PyErr_Clear();
1052 }
1053 else {
1054 Py_DECREF(num);
1055 }
1056 }
1057
1058 if (PyString_Check(field)) {
1059 append_ok = join_append(self, PyString_AsString(field),
1060 &quoted, len == 1);
1061 Py_DECREF(field);
1062 }
1063 else if (field == Py_None) {
1064 append_ok = join_append(self, "", &quoted, len == 1);
1065 Py_DECREF(field);
1066 }
1067 else {
1068 PyObject *str;
1069
1070 str = PyObject_Str(field);
1071 Py_DECREF(field);
1072 if (str == NULL)
1073 return NULL;
1074
1075 append_ok = join_append(self, PyString_AsString(str),
1076 &quoted, len == 1);
1077 Py_DECREF(str);
1078 }
1079 if (!append_ok)
1080 return NULL;
1081 }
1082
1083 /* Add line terminator.
1084 */
1085 if (!join_append_lineterminator(self))
1086 return 0;
1087
1088 return PyObject_CallFunction(self->writeline,
1089 "(s#)", self->rec, self->rec_len);
1090}
1091
1092static PyObject *
1093csv_writerows(WriterObj *self, PyObject *seqseq)
1094{
1095 PyObject *row_iter, *row_obj, *result;
1096
1097 row_iter = PyObject_GetIter(seqseq);
1098 if (row_iter == NULL) {
1099 PyErr_SetString(PyExc_TypeError,
1100 "writerows() argument must be iteratable");
1101 return NULL;
1102 }
1103 while ((row_obj = PyIter_Next(row_iter))) {
1104 result = csv_writerow(self, row_obj);
1105 Py_DECREF(row_obj);
1106 if (!result) {
1107 Py_DECREF(row_iter);
1108 return NULL;
1109 }
1110 else
1111 Py_DECREF(result);
1112 }
1113 Py_DECREF(row_iter);
1114 if (PyErr_Occurred())
1115 return NULL;
1116 Py_INCREF(Py_None);
1117 return Py_None;
1118}
1119
1120static struct PyMethodDef Writer_methods[] = {
1121 { "writerow", (PyCFunction)csv_writerow, METH_O, csv_writerow_doc},
1122 { "writerows", (PyCFunction)csv_writerows, METH_O},
1123 { NULL, NULL }
1124};
1125
1126#define W_OFF(x) offsetof(WriterObj, x)
1127
1128static struct PyMemberDef Writer_memberlist[] = {
1129 { "dialect", T_OBJECT, W_OFF(dialect), RO },
1130 { NULL }
1131};
1132
1133static void
1134Writer_dealloc(WriterObj *self)
1135{
1136 Py_XDECREF(self->dialect);
1137 Py_XDECREF(self->writeline);
1138 PyMem_DEL(self);
1139}
1140
1141PyDoc_STRVAR(Writer_Type_doc,
1142"CSV writer\n"
1143"\n"
1144"Writer objects are responsible for generating tabular data\n"
1145"in CSV format from sequence input.\n"
1146);
1147
1148static PyTypeObject Writer_Type = {
1149 PyObject_HEAD_INIT(NULL)
1150 0, /*ob_size*/
1151 "_csv.writer", /*tp_name*/
1152 sizeof(WriterObj), /*tp_basicsize*/
1153 0, /*tp_itemsize*/
1154 /* methods */
1155 (destructor)Writer_dealloc, /*tp_dealloc*/
1156 (printfunc)0, /*tp_print*/
1157 (getattrfunc)0, /*tp_getattr*/
1158 (setattrfunc)0, /*tp_setattr*/
1159 (cmpfunc)0, /*tp_compare*/
1160 (reprfunc)0, /*tp_repr*/
1161 0, /*tp_as_number*/
1162 0, /*tp_as_sequence*/
1163 0, /*tp_as_mapping*/
1164 (hashfunc)0, /*tp_hash*/
1165 (ternaryfunc)0, /*tp_call*/
1166 (reprfunc)0, /*tp_str*/
1167 0, /*tp_getattro*/
1168 0, /*tp_setattro*/
1169 0, /*tp_as_buffer*/
1170 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
1171 Writer_Type_doc,
1172 0, /*tp_traverse*/
1173 0, /*tp_clear*/
1174 0, /*tp_richcompare*/
1175 0, /*tp_weaklistoffset*/
1176 (getiterfunc)0, /*tp_iter*/
1177 (getiterfunc)0, /*tp_iternext*/
1178 Writer_methods, /*tp_methods*/
1179 Writer_memberlist, /*tp_members*/
1180 0, /*tp_getset*/
1181};
1182
1183static PyObject *
1184csv_writer(PyObject *module, PyObject *args, PyObject *keyword_args)
1185{
1186 PyObject * output_file, * dialect = NULL, *ctor_args;
1187 WriterObj * self = PyObject_NEW(WriterObj, &Writer_Type);
1188
1189 if (!self)
1190 return NULL;
1191
1192 self->dialect = NULL;
1193 self->writeline = NULL;
1194
1195 self->rec = NULL;
1196 self->rec_size = 0;
1197 self->rec_len = 0;
1198 self->num_fields = 0;
1199
1200 if (!PyArg_ParseTuple(args, "O|O", &output_file, &dialect)) {
1201 Py_DECREF(self);
1202 return NULL;
1203 }
1204 self->writeline = PyObject_GetAttrString(output_file, "write");
1205 if (self->writeline == NULL || !PyCallable_Check(self->writeline)) {
1206 PyErr_SetString(PyExc_TypeError,
1207 "argument 1 must be an instance with a write method");
1208 Py_DECREF(self);
1209 return NULL;
1210 }
1211 ctor_args = Py_BuildValue(dialect ? "(O)" : "()", dialect);
1212 if (ctor_args == NULL) {
1213 Py_DECREF(self);
1214 return NULL;
1215 }
1216 self->dialect = (DialectObj *)PyObject_Call((PyObject *)&Dialect_Type,
1217 ctor_args, keyword_args);
1218 Py_DECREF(ctor_args);
1219 if (self->dialect == NULL) {
1220 Py_DECREF(self);
1221 return NULL;
1222 }
1223 return (PyObject *)self;
1224}
1225
1226/*
1227 * DIALECT REGISTRY
1228 */
1229static PyObject *
1230csv_list_dialects(PyObject *module, PyObject *args)
1231{
1232 return PyDict_Keys(dialects);
1233}
1234
1235static PyObject *
1236csv_register_dialect(PyObject *module, PyObject *args)
1237{
1238 PyObject *name_obj, *dialect_obj;
1239
1240 if (!PyArg_ParseTuple(args, "OO", &name_obj, &dialect_obj))
1241 return NULL;
1242 if (!PyString_Check(name_obj) && !PyUnicode_Check(name_obj)) {
1243 PyErr_SetString(PyExc_TypeError,
1244 "dialect name must be a string or unicode");
1245 return NULL;
1246 }
1247 Py_INCREF(dialect_obj);
1248 /* A class rather than an instance? Instanciate */
1249 if (PyObject_TypeCheck(dialect_obj, &PyClass_Type)) {
1250 PyObject * new_dia;
1251 new_dia = PyObject_CallFunction(dialect_obj, "");
1252 Py_DECREF(dialect_obj);
1253 if (new_dia == NULL)
1254 return NULL;
1255 dialect_obj = new_dia;
1256 }
1257 /* Make sure we finally have an instance */
1258 if (!PyInstance_Check(dialect_obj)) {
1259 PyErr_SetString(PyExc_TypeError, "dialect must be an instance");
1260 Py_DECREF(dialect_obj);
1261 return NULL;
1262 }
1263 if (PyObject_SetAttrString(dialect_obj, "_name", name_obj) < 0) {
1264 Py_DECREF(dialect_obj);
1265 return NULL;
1266 }
1267 if (PyDict_SetItem(dialects, name_obj, dialect_obj) < 0) {
1268 Py_DECREF(dialect_obj);
1269 return NULL;
1270 }
1271 Py_DECREF(dialect_obj);
1272 Py_INCREF(Py_None);
1273 return Py_None;
1274}
1275
1276static PyObject *
1277csv_unregister_dialect(PyObject *module, PyObject *args)
1278{
1279 PyObject *name_obj;
1280
1281 if (!PyArg_ParseTuple(args, "O", &name_obj))
1282 return NULL;
1283 if (PyDict_DelItem(dialects, name_obj) < 0)
1284 return PyErr_Format(error_obj, "unknown dialect");
1285 Py_INCREF(Py_None);
1286 return Py_None;
1287}
1288
1289static PyObject *
1290csv_get_dialect(PyObject *module, PyObject *args)
1291{
1292 PyObject *name_obj;
1293
1294 if (!PyArg_ParseTuple(args, "O", &name_obj))
1295 return NULL;
1296 return get_dialect_from_registry(name_obj);
1297}
1298
1299/*
1300 * MODULE
1301 */
1302
1303PyDoc_STRVAR(csv_module_doc,
1304"CSV parsing and writing.\n"
1305"\n"
1306"This module provides classes that assist in the reading and writing\n"
1307"of Comma Separated Value (CSV) files, and implements the interface\n"
1308"described by PEP 305. Although many CSV files are simple to parse,\n"
1309"the format is not formally defined by a stable specification and\n"
1310"is subtle enough that parsing lines of a CSV file with something\n"
1311"like line.split(\",\") is bound to fail. The module supports three\n"
1312"basic APIs: reading, writing, and registration of dialects.\n"
1313"\n"
1314"\n"
1315"DIALECT REGISTRATION:\n"
1316"\n"
1317"Readers and writers support a dialect argument, which is a convenient\n"
1318"handle on a group of settings. When the dialect argument is a string,\n"
1319"it identifies one of the dialects previously registered with the module.\n"
1320"If it is a class or instance, the attributes of the argument are used as\n"
1321"the settings for the reader or writer:\n"
1322"\n"
1323" class excel:\n"
1324" delimiter = ','\n"
1325" quotechar = '\"'\n"
1326" escapechar = None\n"
1327" doublequote = True\n"
1328" skipinitialspace = False\n"
1329" lineterminator = '\r\n'\n"
1330" quoting = QUOTE_MINIMAL\n"
1331"\n"
1332"SETTINGS:\n"
1333"\n"
1334" * quotechar - specifies a one-character string to use as the \n"
1335" quoting character. It defaults to '\"'.\n"
1336" * delimiter - specifies a one-character string to use as the \n"
1337" field separator. It defaults to ','.\n"
1338" * skipinitialspace - specifies how to interpret whitespace which\n"
1339" immediately follows a delimiter. It defaults to False, which\n"
1340" means that whitespace immediately following a delimiter is part\n"
1341" of the following field.\n"
1342" * lineterminator - specifies the character sequence which should \n"
1343" terminate rows.\n"
1344" * quoting - controls when quotes should be generated by the writer.\n"
1345" It can take on any of the following module constants:\n"
1346"\n"
1347" csv.QUOTE_MINIMAL means only when required, for example, when a\n"
1348" field contains either the quotechar or the delimiter\n"
1349" csv.QUOTE_ALL means that quotes are always placed around fields.\n"
1350" csv.QUOTE_NONNUMERIC means that quotes are always placed around\n"
1351" fields which contain characters other than [+-0-9.].\n"
1352" csv.QUOTE_NONE means that quotes are never placed around fields.\n"
1353" * escapechar - specifies a one-character string used to escape \n"
1354" the delimiter when quoting is set to QUOTE_NONE.\n"
1355" * doublequote - controls the handling of quotes inside fields. When\n"
1356" True, two consecutive quotes are interpreted as one during read,\n"
1357" and when writing, each quote character embedded in the data is\n"
1358" written as two quotes\n");
1359
1360PyDoc_STRVAR(csv_reader_doc,
1361" csv_reader = reader(iterable [, dialect='excel']\n"
1362" [optional keyword args])\n"
1363" for row in csv_reader:\n"
1364" process(row)\n"
1365"\n"
1366"The \"iterable\" argument can be any object that returns a line\n"
1367"of input for each iteration, such as a file object or a list. The\n"
1368"optional \"dialect\" parameter is discussed below. The function\n"
1369"also accepts optional keyword arguments which override settings\n"
1370"provided by the dialect.\n"
1371"\n"
1372"The returned object is an iterator. Each iteration returns a row\n"
1373 "of the CSV file (which can span multiple input lines):\n");
1374
1375PyDoc_STRVAR(csv_writer_doc,
1376" csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1377" [optional keyword args])\n"
1378" for row in csv_writer:\n"
1379" csv_writer.writerow(row)\n"
1380"\n"
1381" [or]\n"
1382"\n"
1383" csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1384" [optional keyword args])\n"
1385" csv_writer.writerows(rows)\n"
1386"\n"
1387"The \"fileobj\" argument can be any object that supports the file API.\n");
1388
1389PyDoc_STRVAR(csv_list_dialects_doc,
1390"Return a list of all know dialect names.\n"
1391" names = csv.list_dialects()");
1392
1393PyDoc_STRVAR(csv_get_dialect_doc,
1394"Return the dialect instance associated with name.\n"
1395" dialect = csv.get_dialect(name)");
1396
1397PyDoc_STRVAR(csv_register_dialect_doc,
1398"Create a mapping from a string name to a dialect class.\n"
1399" dialect = csv.register_dialect(name, dialect)");
1400
1401PyDoc_STRVAR(csv_unregister_dialect_doc,
1402"Delete the name/dialect mapping associated with a string name.\n"
1403" csv.unregister_dialect(name)");
1404
1405static struct PyMethodDef csv_methods[] = {
1406 { "reader", (PyCFunction)csv_reader,
1407 METH_VARARGS | METH_KEYWORDS, csv_reader_doc},
1408 { "writer", (PyCFunction)csv_writer,
1409 METH_VARARGS | METH_KEYWORDS, csv_writer_doc},
1410 { "list_dialects", (PyCFunction)csv_list_dialects,
1411 METH_NOARGS, csv_list_dialects_doc},
1412 { "register_dialect", (PyCFunction)csv_register_dialect,
1413 METH_VARARGS, csv_register_dialect_doc},
1414 { "unregister_dialect", (PyCFunction)csv_unregister_dialect,
1415 METH_VARARGS, csv_unregister_dialect_doc},
1416 { "get_dialect", (PyCFunction)csv_get_dialect,
1417 METH_VARARGS, csv_get_dialect_doc},
1418 { NULL, NULL }
1419};
1420
1421PyMODINIT_FUNC
1422init_csv(void)
1423{
1424 PyObject *module;
1425 PyObject *rev;
1426 PyObject *v;
1427 int res;
1428 StyleDesc *style;
1429
1430 if (PyType_Ready(&Dialect_Type) < 0)
1431 return;
1432
1433 if (PyType_Ready(&Reader_Type) < 0)
1434 return;
1435
1436 if (PyType_Ready(&Writer_Type) < 0)
1437 return;
1438
1439 /* Create the module and add the functions */
1440 module = Py_InitModule3("_csv", csv_methods, csv_module_doc);
1441 if (module == NULL)
1442 return;
1443
1444 /* Add version to the module. */
1445 rev = PyString_FromString("1.0");
1446 if (rev == NULL)
1447 return;
1448 if (PyModule_AddObject(module, "__version__", rev) < 0)
1449 return;
1450
1451 /* Add _dialects dictionary */
1452 dialects = PyDict_New();
1453 if (dialects == NULL)
1454 return;
1455 if (PyModule_AddObject(module, "_dialects", dialects))
1456 return;
1457
1458 /* Add quote styles into dictionary */
1459 for (style = quote_styles; style->name; style++) {
1460 v = PyInt_FromLong(style->style);
1461 if (v == NULL)
1462 return;
1463 res = PyModule_AddObject(module, style->name, v);
1464 if (res < 0)
1465 return;
1466 }
1467
1468 /* Add the Dialect type */
1469 if (PyModule_AddObject(module, "Dialect", (PyObject *)&Dialect_Type))
1470 return;
1471
1472 /* Add the CSV exception object to the module. */
1473 error_obj = PyErr_NewException("_csv.Error", NULL, NULL);
1474 if (error_obj == NULL)
1475 return;
1476 PyModule_AddObject(module, "Error", error_obj);
1477}