blob: 7a3bfc4502480d51b39bf135222af6168bd14512 [file] [log] [blame]
Skip Montanarob4a04172003-03-20 23:29:12 +00001/* TODO:
2*/
3
4#include "Python.h"
5#include "structmember.h"
6
7/* begin 2.2 compatibility macros */
8#ifndef PyDoc_STRVAR
9/* Define macros for inline documentation. */
10#define PyDoc_VAR(name) static char name[]
11#define PyDoc_STRVAR(name,str) PyDoc_VAR(name) = PyDoc_STR(str)
12#ifdef WITH_DOC_STRINGS
13#define PyDoc_STR(str) str
14#else
15#define PyDoc_STR(str) ""
16#endif
17#endif /* ifndef PyDoc_STRVAR */
18
19#ifndef PyMODINIT_FUNC
20# if defined(__cplusplus)
21# define PyMODINIT_FUNC extern "C" void
22# else /* __cplusplus */
23# define PyMODINIT_FUNC void
24# endif /* __cplusplus */
25#endif
26/* end 2.2 compatibility macros */
27
28static PyObject *error_obj; /* CSV exception */
29static PyObject *dialects; /* Dialect registry */
30
31typedef enum {
32 START_RECORD, START_FIELD, ESCAPED_CHAR, IN_FIELD,
33 IN_QUOTED_FIELD, ESCAPE_IN_QUOTED_FIELD, QUOTE_IN_QUOTED_FIELD
34} ParserState;
35
36typedef enum {
37 QUOTE_MINIMAL, QUOTE_ALL, QUOTE_NONNUMERIC, QUOTE_NONE
38} QuoteStyle;
39
40typedef struct {
41 QuoteStyle style;
42 char *name;
43} StyleDesc;
44
45static StyleDesc quote_styles[] = {
46 { QUOTE_MINIMAL, "QUOTE_MINIMAL" },
47 { QUOTE_ALL, "QUOTE_ALL" },
48 { QUOTE_NONNUMERIC, "QUOTE_NONNUMERIC" },
49 { QUOTE_NONE, "QUOTE_NONE" },
50 { 0 }
51};
52
53typedef struct {
54 PyObject_HEAD
55
56 int doublequote; /* is " represented by ""? */
57 char delimiter; /* field separator */
58 char quotechar; /* quote character */
59 char escapechar; /* escape character */
60 int skipinitialspace; /* ignore spaces following delimiter? */
61 PyObject *lineterminator; /* string to write between records */
62 QuoteStyle quoting; /* style of quoting to write */
63
64 int strict; /* raise exception on bad CSV */
65} DialectObj;
66
67staticforward PyTypeObject Dialect_Type;
68
69typedef struct {
70 PyObject_HEAD
71
72 PyObject *input_iter; /* iterate over this for input lines */
73
74 DialectObj *dialect; /* parsing dialect */
75
76 PyObject *fields; /* field list for current record */
77 ParserState state; /* current CSV parse state */
78 char *field; /* build current field in here */
79 int field_size; /* size of allocated buffer */
80 int field_len; /* length of current field */
81 int had_parse_error; /* did we have a parse error? */
82} ReaderObj;
83
84staticforward PyTypeObject Reader_Type;
85
86#define ReaderObject_Check(v) ((v)->ob_type == &Reader_Type)
87
88typedef struct {
89 PyObject_HEAD
90
91 PyObject *writeline; /* write output lines to this file */
92
93 DialectObj *dialect; /* parsing dialect */
94
95 char *rec; /* buffer for parser.join */
96 int rec_size; /* size of allocated record */
97 int rec_len; /* length of record */
98 int num_fields; /* number of fields in record */
99} WriterObj;
100
101staticforward PyTypeObject Writer_Type;
102
103/*
104 * DIALECT class
105 */
106
107static PyObject *
108get_dialect_from_registry(PyObject * name_obj)
109{
110 PyObject *dialect_obj;
111
112 dialect_obj = PyDict_GetItem(dialects, name_obj);
113 if (dialect_obj == NULL)
114 return PyErr_Format(error_obj, "unknown dialect");
115 Py_INCREF(dialect_obj);
116 return dialect_obj;
117}
118
119static int
120check_delattr(PyObject *v)
121{
122 if (v == NULL) {
123 PyErr_SetString(PyExc_TypeError,
124 "Cannot delete attribute");
125 return -1;
126 }
127 return 0;
128}
129
130static PyObject *
131get_string(PyObject *str)
132{
133 Py_XINCREF(str);
134 return str;
135}
136
137static int
138set_string(PyObject **str, PyObject *v)
139{
140 if (check_delattr(v) < 0)
141 return -1;
142 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
143 PyErr_BadArgument();
144 return -1;
145 }
146 Py_XDECREF(*str);
147 Py_INCREF(v);
148 *str = v;
149 return 0;
150}
151
152static PyObject *
153get_nullchar_as_None(char c)
154{
155 if (c == '\0') {
156 Py_INCREF(Py_None);
157 return Py_None;
158 }
159 else
160 return PyString_FromStringAndSize((char*)&c, 1);
161}
162
163static int
164set_None_as_nullchar(char * addr, PyObject *v)
165{
166 if (check_delattr(v) < 0)
167 return -1;
168 if (v == Py_None)
169 *addr = '\0';
170 else if (!PyString_Check(v) || PyString_Size(v) != 1) {
171 PyErr_BadArgument();
172 return -1;
173 }
174 else
175 *addr = PyString_AsString(v)[0];
176 return 0;
177}
178
179static PyObject *
180Dialect_get_lineterminator(DialectObj *self)
181{
182 return get_string(self->lineterminator);
183}
184
185static int
186Dialect_set_lineterminator(DialectObj *self, PyObject *value)
187{
188 return set_string(&self->lineterminator, value);
189}
190
191static PyObject *
192Dialect_get_escapechar(DialectObj *self)
193{
194 return get_nullchar_as_None(self->escapechar);
195}
196
197static int
198Dialect_set_escapechar(DialectObj *self, PyObject *value)
199{
200 return set_None_as_nullchar(&self->escapechar, value);
201}
202
203static PyObject *
204Dialect_get_quoting(DialectObj *self)
205{
206 return PyInt_FromLong(self->quoting);
207}
208
209static int
210Dialect_set_quoting(DialectObj *self, PyObject *v)
211{
212 int quoting;
213 StyleDesc *qs = quote_styles;
214
215 if (check_delattr(v) < 0)
216 return -1;
217 if (!PyInt_Check(v)) {
218 PyErr_BadArgument();
219 return -1;
220 }
221 quoting = PyInt_AsLong(v);
222 for (qs = quote_styles; qs->name; qs++) {
223 if (qs->style == quoting) {
224 self->quoting = quoting;
225 return 0;
226 }
227 }
228 PyErr_BadArgument();
229 return -1;
230}
231
232static struct PyMethodDef Dialect_methods[] = {
233 { NULL, NULL }
234};
235
236#define D_OFF(x) offsetof(DialectObj, x)
237
238static struct PyMemberDef Dialect_memberlist[] = {
239 { "quotechar", T_CHAR, D_OFF(quotechar) },
240 { "delimiter", T_CHAR, D_OFF(delimiter) },
241 { "skipinitialspace", T_INT, D_OFF(skipinitialspace) },
242 { "doublequote", T_INT, D_OFF(doublequote) },
243 { "strict", T_INT, D_OFF(strict) },
244 { NULL }
245};
246
247static PyGetSetDef Dialect_getsetlist[] = {
248 { "escapechar", (getter)Dialect_get_escapechar,
249 (setter)Dialect_set_escapechar },
250 { "lineterminator", (getter)Dialect_get_lineterminator,
251 (setter)Dialect_set_lineterminator },
252 { "quoting", (getter)Dialect_get_quoting,
253 (setter)Dialect_set_quoting },
254 {NULL},
255};
256
257static void
258Dialect_dealloc(DialectObj *self)
259{
260 Py_XDECREF(self->lineterminator);
261 /*PyMem_DEL(self);*/
262 self->ob_type->tp_free((PyObject *)self);
263}
264
265static int
266dialect_init(DialectObj * self, PyObject * args, PyObject * kwargs)
267{
268 PyObject *dialect = NULL, *name_obj, *value_obj;
269
270 self->quotechar = '"';
271 self->delimiter = ',';
272 self->escapechar = '\0';
273 self->skipinitialspace = 0;
274 Py_XDECREF(self->lineterminator);
275 self->lineterminator = PyString_FromString("\r\n");
276 if (self->lineterminator == NULL)
277 return -1;
278 self->quoting = QUOTE_MINIMAL;
279 self->doublequote = 1;
280 self->strict = 0;
281
282 if (!PyArg_ParseTuple(args, "|O", &dialect))
283 return -1;
284 Py_XINCREF(dialect);
285 if (kwargs != NULL) {
286 PyObject * key = PyString_FromString("dialect");
287 PyObject * d;
288
289 d = PyDict_GetItem(kwargs, key);
290 if (d) {
291 Py_INCREF(d);
292 Py_XDECREF(dialect);
293 PyDict_DelItem(kwargs, key);
294 dialect = d;
295 }
296 Py_DECREF(key);
297 }
298 if (dialect != NULL) {
299 int i;
300 PyObject * dir_list;
301
302 /* If dialect is a string, look it up in our registry */
303 if (PyString_Check(dialect) || PyUnicode_Check(dialect)) {
304 PyObject * new_dia;
305 new_dia = get_dialect_from_registry(dialect);
306 Py_DECREF(dialect);
307 if (new_dia == NULL)
308 return -1;
309 dialect = new_dia;
310 }
311 /* A class rather than an instance? Instanciate */
312 if (PyObject_TypeCheck(dialect, &PyClass_Type)) {
313 PyObject * new_dia;
314 new_dia = PyObject_CallFunction(dialect, "");
315 Py_DECREF(dialect);
316 if (new_dia == NULL)
317 return -1;
318 dialect = new_dia;
319 }
320 /* Make sure we finally have an instance */
321 if (!PyInstance_Check(dialect) ||
322 (dir_list = PyObject_Dir(dialect)) == NULL) {
323 PyErr_SetString(PyExc_TypeError,
324 "dialect must be an instance");
325 Py_DECREF(dialect);
326 return -1;
327 }
328 /* And extract the attributes */
329 for (i = 0; i < PyList_GET_SIZE(dir_list); ++i) {
330 name_obj = PyList_GET_ITEM(dir_list, i);
331 if (PyString_AsString(name_obj)[0] == '_')
332 continue;
333 value_obj = PyObject_GetAttr(dialect, name_obj);
334 if (value_obj) {
335 if (PyObject_SetAttr((PyObject *)self,
336 name_obj, value_obj)) {
337 Py_DECREF(dir_list);
338 return -1;
339 }
340 Py_DECREF(value_obj);
341 }
342 }
343 Py_DECREF(dir_list);
344 Py_DECREF(dialect);
345 }
346 if (kwargs != NULL) {
347 int pos = 0;
348
349 while (PyDict_Next(kwargs, &pos, &name_obj, &value_obj)) {
350 if (PyObject_SetAttr((PyObject *)self,
351 name_obj, value_obj))
352 return -1;
353 }
354 }
355 return 0;
356}
357
358static PyObject *
359dialect_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
360{
361 DialectObj *self;
362 self = (DialectObj *)type->tp_alloc(type, 0);
363 if (self != NULL) {
364 self->lineterminator = NULL;
365 }
366 return (PyObject *)self;
367}
368
369
370PyDoc_STRVAR(Dialect_Type_doc,
371"CSV dialect\n"
372"\n"
373"The Dialect type records CSV parsing and generation options.\n");
374
375static PyTypeObject Dialect_Type = {
376 PyObject_HEAD_INIT(NULL)
377 0, /* ob_size */
378 "_csv.Dialect", /* tp_name */
379 sizeof(DialectObj), /* tp_basicsize */
380 0, /* tp_itemsize */
381 /* methods */
382 (destructor)Dialect_dealloc, /* tp_dealloc */
383 (printfunc)0, /* tp_print */
384 (getattrfunc)0, /* tp_getattr */
385 (setattrfunc)0, /* tp_setattr */
386 (cmpfunc)0, /* tp_compare */
387 (reprfunc)0, /* tp_repr */
388 0, /* tp_as_number */
389 0, /* tp_as_sequence */
390 0, /* tp_as_mapping */
391 (hashfunc)0, /* tp_hash */
392 (ternaryfunc)0, /* tp_call */
393 (reprfunc)0, /* tp_str */
394 0, /* tp_getattro */
395 0, /* tp_setattro */
396 0, /* tp_as_buffer */
397 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
398 Dialect_Type_doc, /* tp_doc */
399 0, /* tp_traverse */
400 0, /* tp_clear */
401 0, /* tp_richcompare */
402 0, /* tp_weaklistoffset */
403 0, /* tp_iter */
404 0, /* tp_iternext */
405 Dialect_methods, /* tp_methods */
406 Dialect_memberlist, /* tp_members */
407 Dialect_getsetlist, /* tp_getset */
408 0, /* tp_base */
409 0, /* tp_dict */
410 0, /* tp_descr_get */
411 0, /* tp_descr_set */
412 0, /* tp_dictoffset */
413 (initproc)dialect_init, /* tp_init */
414 PyType_GenericAlloc, /* tp_alloc */
415 dialect_new, /* tp_new */
416 0, /* tp_free */
417};
418
419static void
420parse_save_field(ReaderObj *self)
421{
422 PyObject *field;
423
424 field = PyString_FromStringAndSize(self->field, self->field_len);
425 if (field != NULL) {
426 PyList_Append(self->fields, field);
427 Py_XDECREF(field);
428 }
429 self->field_len = 0;
430}
431
432static int
433parse_grow_buff(ReaderObj *self)
434{
435 if (self->field_size == 0) {
436 self->field_size = 4096;
437 self->field = PyMem_Malloc(self->field_size);
438 }
439 else {
440 self->field_size *= 2;
441 self->field = PyMem_Realloc(self->field, self->field_size);
442 }
443 if (self->field == NULL) {
444 PyErr_NoMemory();
445 return 0;
446 }
447 return 1;
448}
449
450static void
451parse_add_char(ReaderObj *self, char c)
452{
453 if (self->field_len == self->field_size && !parse_grow_buff(self))
454 return;
455 self->field[self->field_len++] = c;
456}
457
458static void
459parse_process_char(ReaderObj *self, char c)
460{
461 DialectObj *dialect = self->dialect;
462
463 switch (self->state) {
464 case START_RECORD:
465 /* start of record */
466 if (c == '\n')
467 /* empty line - return [] */
468 break;
469 /* normal character - handle as START_FIELD */
470 self->state = START_FIELD;
471 /* fallthru */
472 case START_FIELD:
473 /* expecting field */
474 if (c == '\n') {
475 /* save empty field - return [fields] */
476 parse_save_field(self);
477 self->state = START_RECORD;
478 }
479 else if (c == dialect->quotechar) {
480 /* start quoted field */
481 self->state = IN_QUOTED_FIELD;
482 }
483 else if (c == dialect->escapechar) {
484 /* possible escaped character */
485 self->state = ESCAPED_CHAR;
486 }
487 else if (c == ' ' && dialect->skipinitialspace)
488 /* ignore space at start of field */
489 ;
490 else if (c == dialect->delimiter) {
491 /* save empty field */
492 parse_save_field(self);
493 }
494 else {
495 /* begin new unquoted field */
496 parse_add_char(self, c);
497 self->state = IN_FIELD;
498 }
499 break;
500
501 case ESCAPED_CHAR:
502 if (c != dialect->escapechar &&
503 c != dialect->delimiter &&
504 c != dialect->quotechar)
505 parse_add_char(self, dialect->escapechar);
506 parse_add_char(self, c);
507 self->state = IN_FIELD;
508 break;
509
510 case IN_FIELD:
511 /* in unquoted field */
512 if (c == '\n') {
513 /* end of line - return [fields] */
514 parse_save_field(self);
515 self->state = START_RECORD;
516 }
517 else if (c == dialect->escapechar) {
518 /* possible escaped character */
519 self->state = ESCAPED_CHAR;
520 }
521 else if (c == dialect->delimiter) {
522 /* save field - wait for new field */
523 parse_save_field(self);
524 self->state = START_FIELD;
525 }
526 else {
527 /* normal character - save in field */
528 parse_add_char(self, c);
529 }
530 break;
531
532 case IN_QUOTED_FIELD:
533 /* in quoted field */
534 if (c == '\n') {
535 /* end of line - save '\n' in field */
536 parse_add_char(self, '\n');
537 }
538 else if (c == dialect->escapechar) {
539 /* Possible escape character */
540 self->state = ESCAPE_IN_QUOTED_FIELD;
541 }
542 else if (c == dialect->quotechar) {
543 if (dialect->doublequote) {
544 /* doublequote; " represented by "" */
545 self->state = QUOTE_IN_QUOTED_FIELD;
546 }
547 else {
548 /* end of quote part of field */
549 self->state = IN_FIELD;
550 }
551 }
552 else {
553 /* normal character - save in field */
554 parse_add_char(self, c);
555 }
556 break;
557
558 case ESCAPE_IN_QUOTED_FIELD:
559 if (c != dialect->escapechar &&
560 c != dialect->delimiter &&
561 c != dialect->quotechar)
562 parse_add_char(self, dialect->escapechar);
563 parse_add_char(self, c);
564 self->state = IN_QUOTED_FIELD;
565 break;
566
567 case QUOTE_IN_QUOTED_FIELD:
568 /* doublequote - seen a quote in an quoted field */
569 if (dialect->quoting != QUOTE_NONE &&
570 c == dialect->quotechar) {
571 /* save "" as " */
572 parse_add_char(self, c);
573 self->state = IN_QUOTED_FIELD;
574 }
575 else if (c == dialect->delimiter) {
576 /* save field - wait for new field */
577 parse_save_field(self);
578 self->state = START_FIELD;
579 }
580 else if (c == '\n') {
581 /* end of line - return [fields] */
582 parse_save_field(self);
583 self->state = START_RECORD;
584 }
585 else if (!dialect->strict) {
586 parse_add_char(self, c);
587 self->state = IN_FIELD;
588 }
589 else {
590 /* illegal */
591 self->had_parse_error = 1;
592 PyErr_Format(error_obj, "%c expected after %c",
593 dialect->delimiter,
594 dialect->quotechar);
595 }
596 break;
597
598 }
599}
600
601/*
602 * READER
603 */
604#define R_OFF(x) offsetof(ReaderObj, x)
605
606static struct PyMemberDef Reader_memberlist[] = {
607 { "dialect", T_OBJECT, R_OFF(dialect), RO },
608 { NULL }
609};
610
611static PyObject *
612Reader_getiter(ReaderObj *self)
613{
614 Py_INCREF(self);
615 return (PyObject *)self;
616}
617
618static PyObject *
619Reader_iternext(ReaderObj *self)
620{
621 PyObject *lineobj;
622 PyObject *fields;
623 char *line;
624
625 do {
626 lineobj = PyIter_Next(self->input_iter);
627 if (lineobj == NULL) {
628 /* End of input OR exception */
629 if (!PyErr_Occurred() && self->field_len != 0)
630 return PyErr_Format(error_obj,
631 "newline inside string");
632 return NULL;
633 }
634
635 if (self->had_parse_error) {
636 if (self->fields) {
637 Py_XDECREF(self->fields);
638 }
639 self->fields = PyList_New(0);
640 self->field_len = 0;
641 self->state = START_RECORD;
642 self->had_parse_error = 0;
643 }
644 line = PyString_AsString(lineobj);
645
646 if (line == NULL) {
647 Py_DECREF(lineobj);
648 return NULL;
649 }
Tim Petersef4b7ed2003-03-21 01:35:28 +0000650 if (strlen(line) < (size_t)PyString_GET_SIZE(lineobj)) {
Skip Montanarob4a04172003-03-20 23:29:12 +0000651 self->had_parse_error = 1;
652 Py_DECREF(lineobj);
653 return PyErr_Format(error_obj,
654 "string with NUL bytes");
655 }
656
657 /* Process line of text - send '\n' to processing code to
658 represent end of line. End of line which is not at end of
659 string is an error. */
660 while (*line) {
661 char c;
662
663 c = *line++;
664 if (c == '\r') {
665 c = *line++;
666 if (c == '\0')
667 /* macintosh end of line */
668 break;
669 if (c == '\n') {
670 c = *line++;
671 if (c == '\0')
672 /* DOS end of line */
673 break;
674 }
675 self->had_parse_error = 1;
676 Py_DECREF(lineobj);
677 return PyErr_Format(error_obj,
678 "newline inside string");
679 }
680 if (c == '\n') {
681 c = *line++;
682 if (c == '\0')
683 /* unix end of line */
684 break;
685 self->had_parse_error = 1;
686 Py_DECREF(lineobj);
687 return PyErr_Format(error_obj,
688 "newline inside string");
689 }
690 parse_process_char(self, c);
691 if (PyErr_Occurred()) {
692 Py_DECREF(lineobj);
693 return NULL;
694 }
695 }
696 parse_process_char(self, '\n');
697 Py_DECREF(lineobj);
698 } while (self->state != START_RECORD);
699
700 fields = self->fields;
701 self->fields = PyList_New(0);
702 return fields;
703}
704
705static void
706Reader_dealloc(ReaderObj *self)
707{
708 Py_XDECREF(self->dialect);
709 Py_XDECREF(self->input_iter);
710 Py_XDECREF(self->fields);
711 PyMem_DEL(self);
712}
713
714PyDoc_STRVAR(Reader_Type_doc,
715"CSV reader\n"
716"\n"
717"Reader objects are responsible for reading and parsing tabular data\n"
718"in CSV format.\n"
719);
720
721static struct PyMethodDef Reader_methods[] = {
722 { NULL, NULL }
723};
724
725static PyTypeObject Reader_Type = {
726 PyObject_HEAD_INIT(NULL)
727 0, /*ob_size*/
728 "_csv.reader", /*tp_name*/
729 sizeof(ReaderObj), /*tp_basicsize*/
730 0, /*tp_itemsize*/
731 /* methods */
732 (destructor)Reader_dealloc, /*tp_dealloc*/
733 (printfunc)0, /*tp_print*/
734 (getattrfunc)0, /*tp_getattr*/
735 (setattrfunc)0, /*tp_setattr*/
736 (cmpfunc)0, /*tp_compare*/
737 (reprfunc)0, /*tp_repr*/
738 0, /*tp_as_number*/
739 0, /*tp_as_sequence*/
740 0, /*tp_as_mapping*/
741 (hashfunc)0, /*tp_hash*/
742 (ternaryfunc)0, /*tp_call*/
743 (reprfunc)0, /*tp_str*/
744 0, /*tp_getattro*/
745 0, /*tp_setattro*/
746 0, /*tp_as_buffer*/
747 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
748 Reader_Type_doc, /*tp_doc*/
749 0, /*tp_traverse*/
750 0, /*tp_clear*/
751 0, /*tp_richcompare*/
752 0, /*tp_weaklistoffset*/
753 (getiterfunc)Reader_getiter, /*tp_iter*/
754 (getiterfunc)Reader_iternext, /*tp_iternext*/
755 Reader_methods, /*tp_methods*/
756 Reader_memberlist, /*tp_members*/
757 0, /*tp_getset*/
758
759};
760
761static PyObject *
762csv_reader(PyObject *module, PyObject *args, PyObject *keyword_args)
763{
764 PyObject * iterator, * dialect = NULL, *ctor_args;
765 ReaderObj * self = PyObject_NEW(ReaderObj, &Reader_Type);
766
767 if (!self)
768 return NULL;
769
770 self->dialect = NULL;
771 self->input_iter = self->fields = NULL;
772
773 self->fields = NULL;
774 self->input_iter = NULL;
775 self->had_parse_error = 0;
776 self->field = NULL;
777 self->field_size = 0;
778 self->field_len = 0;
779 self->state = START_RECORD;
780
781 if (!PyArg_ParseTuple(args, "O|O", &iterator, &dialect)) {
782 Py_DECREF(self);
783 return NULL;
784 }
785 self->input_iter = PyObject_GetIter(iterator);
786 if (self->input_iter == NULL) {
787 PyErr_SetString(PyExc_TypeError,
788 "argument 1 must be an iterator");
789 Py_DECREF(self);
790 return NULL;
791 }
792 ctor_args = Py_BuildValue(dialect ? "(O)" : "()", dialect);
793 if (ctor_args == NULL) {
794 Py_DECREF(self);
795 return NULL;
796 }
797 self->dialect = (DialectObj *)PyObject_Call((PyObject *)&Dialect_Type,
798 ctor_args, keyword_args);
799 Py_DECREF(ctor_args);
800 if (self->dialect == NULL) {
801 Py_DECREF(self);
802 return NULL;
803 }
804 self->fields = PyList_New(0);
805 if (self->fields == NULL) {
806 Py_DECREF(self);
807 return NULL;
808 }
809
810 return (PyObject *)self;
811}
812
813/*
814 * WRITER
815 */
816/* ---------------------------------------------------------------- */
817static void
818join_reset(WriterObj *self)
819{
820 self->rec_len = 0;
821 self->num_fields = 0;
822}
823
824#define MEM_INCR 32768
825
826/* Calculate new record length or append field to record. Return new
827 * record length.
828 */
829static int
830join_append_data(WriterObj *self, char *field, int quote_empty,
831 int *quoted, int copy_phase)
832{
833 DialectObj *dialect = self->dialect;
834 int i, rec_len;
835
836 rec_len = self->rec_len;
837
838 /* If this is not the first field we need a field separator.
839 */
840 if (self->num_fields > 0) {
841 if (copy_phase)
842 self->rec[rec_len] = dialect->delimiter;
843 rec_len++;
844 }
845 /* Handle preceding quote.
846 */
847 switch (dialect->quoting) {
848 case QUOTE_ALL:
849 *quoted = 1;
850 if (copy_phase)
851 self->rec[rec_len] = dialect->quotechar;
852 rec_len++;
853 break;
854 case QUOTE_MINIMAL:
855 case QUOTE_NONNUMERIC:
856 /* We only know about quoted in the copy phase.
857 */
858 if (copy_phase && *quoted) {
859 self->rec[rec_len] = dialect->quotechar;
860 rec_len++;
861 }
862 break;
863 case QUOTE_NONE:
864 break;
865 }
866 /* Copy/count field data.
867 */
868 for (i = 0;; i++) {
869 char c = field[i];
870
871 if (c == '\0')
872 break;
873 /* If in doublequote mode we escape quote chars with a
874 * quote.
875 */
876 if (dialect->quoting != QUOTE_NONE &&
877 c == dialect->quotechar && dialect->doublequote) {
878 if (copy_phase)
879 self->rec[rec_len] = dialect->quotechar;
880 *quoted = 1;
881 rec_len++;
882 }
883
884 /* Some special characters need to be escaped. If we have a
885 * quote character switch to quoted field instead of escaping
886 * individual characters.
887 */
888 if (!*quoted
889 && (c == dialect->delimiter ||
890 c == dialect->escapechar ||
891 c == '\n' || c == '\r')) {
892 if (dialect->quoting != QUOTE_NONE)
893 *quoted = 1;
894 else if (dialect->escapechar) {
895 if (copy_phase)
896 self->rec[rec_len] = dialect->escapechar;
897 rec_len++;
898 }
899 else {
900 PyErr_Format(error_obj,
901 "delimiter must be quoted or escaped");
902 return -1;
903 }
904 }
905 /* Copy field character into record buffer.
906 */
907 if (copy_phase)
908 self->rec[rec_len] = c;
909 rec_len++;
910 }
911
912 /* If field is empty check if it needs to be quoted.
913 */
914 if (i == 0 && quote_empty) {
915 if (dialect->quoting == QUOTE_NONE) {
916 PyErr_Format(error_obj,
917 "single empty field record must be quoted");
918 return -1;
919 } else
920 *quoted = 1;
921 }
922
923 /* Handle final quote character on field.
924 */
925 if (*quoted) {
926 if (copy_phase)
927 self->rec[rec_len] = dialect->quotechar;
928 else
929 /* Didn't know about leading quote until we found it
930 * necessary in field data - compensate for it now.
931 */
932 rec_len++;
933 rec_len++;
934 }
935
936 return rec_len;
937}
938
939static int
940join_check_rec_size(WriterObj *self, int rec_len)
941{
942 if (rec_len > self->rec_size) {
943 if (self->rec_size == 0) {
944 self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR;
945 self->rec = PyMem_Malloc(self->rec_size);
946 }
947 else {
948 char *old_rec = self->rec;
949
950 self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR;
951 self->rec = PyMem_Realloc(self->rec, self->rec_size);
952 if (self->rec == NULL)
953 PyMem_Free(old_rec);
954 }
955 if (self->rec == NULL) {
956 PyErr_NoMemory();
957 return 0;
958 }
959 }
960 return 1;
961}
962
963static int
964join_append(WriterObj *self, char *field, int *quoted, int quote_empty)
965{
966 int rec_len;
967
968 rec_len = join_append_data(self, field, quote_empty, quoted, 0);
969 if (rec_len < 0)
970 return 0;
971
972 /* grow record buffer if necessary */
973 if (!join_check_rec_size(self, rec_len))
974 return 0;
975
976 self->rec_len = join_append_data(self, field, quote_empty, quoted, 1);
977 self->num_fields++;
978
979 return 1;
980}
981
982static int
983join_append_lineterminator(WriterObj *self)
984{
985 int terminator_len;
986
987 terminator_len = PyString_Size(self->dialect->lineterminator);
988
989 /* grow record buffer if necessary */
990 if (!join_check_rec_size(self, self->rec_len + terminator_len))
991 return 0;
992
993 memmove(self->rec + self->rec_len,
994 PyString_AsString(self->dialect->lineterminator),
995 terminator_len);
996 self->rec_len += terminator_len;
997
998 return 1;
999}
1000
1001PyDoc_STRVAR(csv_writerow_doc,
1002"join(sequence) -> string\n"
1003"\n"
1004"Construct a CSV record from a sequence of fields. Non-string\n"
1005"elements will be converted to string.");
1006
1007static PyObject *
1008csv_writerow(WriterObj *self, PyObject *seq)
1009{
1010 DialectObj *dialect = self->dialect;
1011 int len, i;
1012
1013 if (!PySequence_Check(seq))
1014 return PyErr_Format(error_obj, "sequence expected");
1015
1016 len = PySequence_Length(seq);
1017 if (len < 0)
1018 return NULL;
1019
1020 /* Join all fields in internal buffer.
1021 */
1022 join_reset(self);
1023 for (i = 0; i < len; i++) {
1024 PyObject *field;
1025 int append_ok;
1026 int quoted;
1027
1028 field = PySequence_GetItem(seq, i);
1029 if (field == NULL)
1030 return NULL;
1031
1032 quoted = 0;
1033 if (dialect->quoting == QUOTE_NONNUMERIC) {
1034 PyObject *num;
1035
1036 num = PyNumber_Float(field);
1037 if (num == NULL) {
1038 quoted = 1;
1039 PyErr_Clear();
1040 }
1041 else {
1042 Py_DECREF(num);
1043 }
1044 }
1045
1046 if (PyString_Check(field)) {
1047 append_ok = join_append(self, PyString_AsString(field),
1048 &quoted, len == 1);
1049 Py_DECREF(field);
1050 }
1051 else if (field == Py_None) {
1052 append_ok = join_append(self, "", &quoted, len == 1);
1053 Py_DECREF(field);
1054 }
1055 else {
1056 PyObject *str;
1057
1058 str = PyObject_Str(field);
1059 Py_DECREF(field);
1060 if (str == NULL)
1061 return NULL;
1062
1063 append_ok = join_append(self, PyString_AsString(str),
1064 &quoted, len == 1);
1065 Py_DECREF(str);
1066 }
1067 if (!append_ok)
1068 return NULL;
1069 }
1070
1071 /* Add line terminator.
1072 */
1073 if (!join_append_lineterminator(self))
1074 return 0;
1075
1076 return PyObject_CallFunction(self->writeline,
1077 "(s#)", self->rec, self->rec_len);
1078}
1079
1080static PyObject *
1081csv_writerows(WriterObj *self, PyObject *seqseq)
1082{
1083 PyObject *row_iter, *row_obj, *result;
1084
1085 row_iter = PyObject_GetIter(seqseq);
1086 if (row_iter == NULL) {
1087 PyErr_SetString(PyExc_TypeError,
1088 "writerows() argument must be iteratable");
1089 return NULL;
1090 }
1091 while ((row_obj = PyIter_Next(row_iter))) {
1092 result = csv_writerow(self, row_obj);
1093 Py_DECREF(row_obj);
1094 if (!result) {
1095 Py_DECREF(row_iter);
1096 return NULL;
1097 }
1098 else
1099 Py_DECREF(result);
1100 }
1101 Py_DECREF(row_iter);
1102 if (PyErr_Occurred())
1103 return NULL;
1104 Py_INCREF(Py_None);
1105 return Py_None;
1106}
1107
1108static struct PyMethodDef Writer_methods[] = {
1109 { "writerow", (PyCFunction)csv_writerow, METH_O, csv_writerow_doc},
1110 { "writerows", (PyCFunction)csv_writerows, METH_O},
1111 { NULL, NULL }
1112};
1113
1114#define W_OFF(x) offsetof(WriterObj, x)
1115
1116static struct PyMemberDef Writer_memberlist[] = {
1117 { "dialect", T_OBJECT, W_OFF(dialect), RO },
1118 { NULL }
1119};
1120
1121static void
1122Writer_dealloc(WriterObj *self)
1123{
1124 Py_XDECREF(self->dialect);
1125 Py_XDECREF(self->writeline);
1126 PyMem_DEL(self);
1127}
1128
1129PyDoc_STRVAR(Writer_Type_doc,
1130"CSV writer\n"
1131"\n"
1132"Writer objects are responsible for generating tabular data\n"
1133"in CSV format from sequence input.\n"
1134);
1135
1136static PyTypeObject Writer_Type = {
1137 PyObject_HEAD_INIT(NULL)
1138 0, /*ob_size*/
1139 "_csv.writer", /*tp_name*/
1140 sizeof(WriterObj), /*tp_basicsize*/
1141 0, /*tp_itemsize*/
1142 /* methods */
1143 (destructor)Writer_dealloc, /*tp_dealloc*/
1144 (printfunc)0, /*tp_print*/
1145 (getattrfunc)0, /*tp_getattr*/
1146 (setattrfunc)0, /*tp_setattr*/
1147 (cmpfunc)0, /*tp_compare*/
1148 (reprfunc)0, /*tp_repr*/
1149 0, /*tp_as_number*/
1150 0, /*tp_as_sequence*/
1151 0, /*tp_as_mapping*/
1152 (hashfunc)0, /*tp_hash*/
1153 (ternaryfunc)0, /*tp_call*/
1154 (reprfunc)0, /*tp_str*/
1155 0, /*tp_getattro*/
1156 0, /*tp_setattro*/
1157 0, /*tp_as_buffer*/
1158 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
1159 Writer_Type_doc,
1160 0, /*tp_traverse*/
1161 0, /*tp_clear*/
1162 0, /*tp_richcompare*/
1163 0, /*tp_weaklistoffset*/
1164 (getiterfunc)0, /*tp_iter*/
1165 (getiterfunc)0, /*tp_iternext*/
1166 Writer_methods, /*tp_methods*/
1167 Writer_memberlist, /*tp_members*/
1168 0, /*tp_getset*/
1169};
1170
1171static PyObject *
1172csv_writer(PyObject *module, PyObject *args, PyObject *keyword_args)
1173{
1174 PyObject * output_file, * dialect = NULL, *ctor_args;
1175 WriterObj * self = PyObject_NEW(WriterObj, &Writer_Type);
1176
1177 if (!self)
1178 return NULL;
1179
1180 self->dialect = NULL;
1181 self->writeline = NULL;
1182
1183 self->rec = NULL;
1184 self->rec_size = 0;
1185 self->rec_len = 0;
1186 self->num_fields = 0;
1187
1188 if (!PyArg_ParseTuple(args, "O|O", &output_file, &dialect)) {
1189 Py_DECREF(self);
1190 return NULL;
1191 }
1192 self->writeline = PyObject_GetAttrString(output_file, "write");
1193 if (self->writeline == NULL || !PyCallable_Check(self->writeline)) {
1194 PyErr_SetString(PyExc_TypeError,
1195 "argument 1 must be an instance with a write method");
1196 Py_DECREF(self);
1197 return NULL;
1198 }
1199 ctor_args = Py_BuildValue(dialect ? "(O)" : "()", dialect);
1200 if (ctor_args == NULL) {
1201 Py_DECREF(self);
1202 return NULL;
1203 }
1204 self->dialect = (DialectObj *)PyObject_Call((PyObject *)&Dialect_Type,
1205 ctor_args, keyword_args);
1206 Py_DECREF(ctor_args);
1207 if (self->dialect == NULL) {
1208 Py_DECREF(self);
1209 return NULL;
1210 }
1211 return (PyObject *)self;
1212}
1213
1214/*
1215 * DIALECT REGISTRY
1216 */
1217static PyObject *
1218csv_list_dialects(PyObject *module, PyObject *args)
1219{
1220 return PyDict_Keys(dialects);
1221}
1222
1223static PyObject *
1224csv_register_dialect(PyObject *module, PyObject *args)
1225{
1226 PyObject *name_obj, *dialect_obj;
1227
1228 if (!PyArg_ParseTuple(args, "OO", &name_obj, &dialect_obj))
1229 return NULL;
1230 if (!PyString_Check(name_obj) && !PyUnicode_Check(name_obj)) {
1231 PyErr_SetString(PyExc_TypeError,
1232 "dialect name must be a string or unicode");
1233 return NULL;
1234 }
1235 Py_INCREF(dialect_obj);
1236 /* A class rather than an instance? Instanciate */
1237 if (PyObject_TypeCheck(dialect_obj, &PyClass_Type)) {
1238 PyObject * new_dia;
1239 new_dia = PyObject_CallFunction(dialect_obj, "");
1240 Py_DECREF(dialect_obj);
1241 if (new_dia == NULL)
1242 return NULL;
1243 dialect_obj = new_dia;
1244 }
1245 /* Make sure we finally have an instance */
1246 if (!PyInstance_Check(dialect_obj)) {
1247 PyErr_SetString(PyExc_TypeError, "dialect must be an instance");
1248 Py_DECREF(dialect_obj);
1249 return NULL;
1250 }
1251 if (PyObject_SetAttrString(dialect_obj, "_name", name_obj) < 0) {
1252 Py_DECREF(dialect_obj);
1253 return NULL;
1254 }
1255 if (PyDict_SetItem(dialects, name_obj, dialect_obj) < 0) {
1256 Py_DECREF(dialect_obj);
1257 return NULL;
1258 }
1259 Py_DECREF(dialect_obj);
1260 Py_INCREF(Py_None);
1261 return Py_None;
1262}
1263
1264static PyObject *
1265csv_unregister_dialect(PyObject *module, PyObject *args)
1266{
1267 PyObject *name_obj;
1268
1269 if (!PyArg_ParseTuple(args, "O", &name_obj))
1270 return NULL;
1271 if (PyDict_DelItem(dialects, name_obj) < 0)
1272 return PyErr_Format(error_obj, "unknown dialect");
1273 Py_INCREF(Py_None);
1274 return Py_None;
1275}
1276
1277static PyObject *
1278csv_get_dialect(PyObject *module, PyObject *args)
1279{
1280 PyObject *name_obj;
1281
1282 if (!PyArg_ParseTuple(args, "O", &name_obj))
1283 return NULL;
1284 return get_dialect_from_registry(name_obj);
1285}
1286
1287/*
1288 * MODULE
1289 */
1290
1291PyDoc_STRVAR(csv_module_doc,
1292"CSV parsing and writing.\n"
1293"\n"
1294"This module provides classes that assist in the reading and writing\n"
1295"of Comma Separated Value (CSV) files, and implements the interface\n"
1296"described by PEP 305. Although many CSV files are simple to parse,\n"
1297"the format is not formally defined by a stable specification and\n"
1298"is subtle enough that parsing lines of a CSV file with something\n"
1299"like line.split(\",\") is bound to fail. The module supports three\n"
1300"basic APIs: reading, writing, and registration of dialects.\n"
1301"\n"
1302"\n"
1303"DIALECT REGISTRATION:\n"
1304"\n"
1305"Readers and writers support a dialect argument, which is a convenient\n"
1306"handle on a group of settings. When the dialect argument is a string,\n"
1307"it identifies one of the dialects previously registered with the module.\n"
1308"If it is a class or instance, the attributes of the argument are used as\n"
1309"the settings for the reader or writer:\n"
1310"\n"
1311" class excel:\n"
1312" delimiter = ','\n"
1313" quotechar = '\"'\n"
1314" escapechar = None\n"
1315" doublequote = True\n"
1316" skipinitialspace = False\n"
1317" lineterminator = '\r\n'\n"
1318" quoting = QUOTE_MINIMAL\n"
1319"\n"
1320"SETTINGS:\n"
1321"\n"
1322" * quotechar - specifies a one-character string to use as the \n"
1323" quoting character. It defaults to '\"'.\n"
1324" * delimiter - specifies a one-character string to use as the \n"
1325" field separator. It defaults to ','.\n"
1326" * skipinitialspace - specifies how to interpret whitespace which\n"
1327" immediately follows a delimiter. It defaults to False, which\n"
1328" means that whitespace immediately following a delimiter is part\n"
1329" of the following field.\n"
1330" * lineterminator - specifies the character sequence which should \n"
1331" terminate rows.\n"
1332" * quoting - controls when quotes should be generated by the writer.\n"
1333" It can take on any of the following module constants:\n"
1334"\n"
1335" csv.QUOTE_MINIMAL means only when required, for example, when a\n"
1336" field contains either the quotechar or the delimiter\n"
1337" csv.QUOTE_ALL means that quotes are always placed around fields.\n"
1338" csv.QUOTE_NONNUMERIC means that quotes are always placed around\n"
1339" fields which contain characters other than [+-0-9.].\n"
1340" csv.QUOTE_NONE means that quotes are never placed around fields.\n"
1341" * escapechar - specifies a one-character string used to escape \n"
1342" the delimiter when quoting is set to QUOTE_NONE.\n"
1343" * doublequote - controls the handling of quotes inside fields. When\n"
1344" True, two consecutive quotes are interpreted as one during read,\n"
1345" and when writing, each quote character embedded in the data is\n"
1346" written as two quotes\n");
1347
1348PyDoc_STRVAR(csv_reader_doc,
1349" csv_reader = reader(iterable [, dialect='excel']\n"
1350" [optional keyword args])\n"
1351" for row in csv_reader:\n"
1352" process(row)\n"
1353"\n"
1354"The \"iterable\" argument can be any object that returns a line\n"
1355"of input for each iteration, such as a file object or a list. The\n"
1356"optional \"dialect\" parameter is discussed below. The function\n"
1357"also accepts optional keyword arguments which override settings\n"
1358"provided by the dialect.\n"
1359"\n"
1360"The returned object is an iterator. Each iteration returns a row\n"
1361 "of the CSV file (which can span multiple input lines):\n");
1362
1363PyDoc_STRVAR(csv_writer_doc,
1364" csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1365" [optional keyword args])\n"
1366" for row in csv_writer:\n"
1367" csv_writer.writerow(row)\n"
1368"\n"
1369" [or]\n"
1370"\n"
1371" csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1372" [optional keyword args])\n"
1373" csv_writer.writerows(rows)\n"
1374"\n"
1375"The \"fileobj\" argument can be any object that supports the file API.\n");
1376
1377PyDoc_STRVAR(csv_list_dialects_doc,
1378"Return a list of all know dialect names.\n"
1379" names = csv.list_dialects()");
1380
1381PyDoc_STRVAR(csv_get_dialect_doc,
1382"Return the dialect instance associated with name.\n"
1383" dialect = csv.get_dialect(name)");
1384
1385PyDoc_STRVAR(csv_register_dialect_doc,
1386"Create a mapping from a string name to a dialect class.\n"
1387" dialect = csv.register_dialect(name, dialect)");
1388
1389PyDoc_STRVAR(csv_unregister_dialect_doc,
1390"Delete the name/dialect mapping associated with a string name.\n"
1391" csv.unregister_dialect(name)");
1392
1393static struct PyMethodDef csv_methods[] = {
1394 { "reader", (PyCFunction)csv_reader,
1395 METH_VARARGS | METH_KEYWORDS, csv_reader_doc},
1396 { "writer", (PyCFunction)csv_writer,
1397 METH_VARARGS | METH_KEYWORDS, csv_writer_doc},
1398 { "list_dialects", (PyCFunction)csv_list_dialects,
1399 METH_NOARGS, csv_list_dialects_doc},
1400 { "register_dialect", (PyCFunction)csv_register_dialect,
1401 METH_VARARGS, csv_register_dialect_doc},
1402 { "unregister_dialect", (PyCFunction)csv_unregister_dialect,
1403 METH_VARARGS, csv_unregister_dialect_doc},
1404 { "get_dialect", (PyCFunction)csv_get_dialect,
1405 METH_VARARGS, csv_get_dialect_doc},
1406 { NULL, NULL }
1407};
1408
1409PyMODINIT_FUNC
1410init_csv(void)
1411{
1412 PyObject *module;
1413 PyObject *rev;
1414 PyObject *v;
1415 int res;
1416 StyleDesc *style;
1417
1418 if (PyType_Ready(&Dialect_Type) < 0)
1419 return;
1420
1421 if (PyType_Ready(&Reader_Type) < 0)
1422 return;
1423
1424 if (PyType_Ready(&Writer_Type) < 0)
1425 return;
1426
1427 /* Create the module and add the functions */
1428 module = Py_InitModule3("_csv", csv_methods, csv_module_doc);
1429 if (module == NULL)
1430 return;
1431
1432 /* Add version to the module. */
1433 rev = PyString_FromString("1.0");
1434 if (rev == NULL)
1435 return;
1436 if (PyModule_AddObject(module, "__version__", rev) < 0)
1437 return;
1438
1439 /* Add _dialects dictionary */
1440 dialects = PyDict_New();
1441 if (dialects == NULL)
1442 return;
1443 if (PyModule_AddObject(module, "_dialects", dialects))
1444 return;
1445
1446 /* Add quote styles into dictionary */
1447 for (style = quote_styles; style->name; style++) {
1448 v = PyInt_FromLong(style->style);
1449 if (v == NULL)
1450 return;
1451 res = PyModule_AddObject(module, style->name, v);
1452 if (res < 0)
1453 return;
1454 }
1455
1456 /* Add the Dialect type */
1457 if (PyModule_AddObject(module, "Dialect", (PyObject *)&Dialect_Type))
1458 return;
1459
1460 /* Add the CSV exception object to the module. */
1461 error_obj = PyErr_NewException("_csv.Error", NULL, NULL);
1462 if (error_obj == NULL)
1463 return;
1464 PyModule_AddObject(module, "Error", error_obj);
1465}