blob: ba49236defed79f61256dda27836ba145e1da13e [file] [log] [blame]
Skip Montanaroa16b21f2003-03-23 14:32:54 +00001/* csv module */
2
3/*
4
5This module provides the low-level underpinnings of a CSV reading/writing
6module. Users should not use this module directly, but import the csv.py
7module instead.
8
9**** For people modifying this code, please note that as of this writing
Skip Montanarodfa35fa2003-04-11 21:40:01 +000010**** (2003-03-23), it is intended that this code should work with Python
Skip Montanaroa16b21f2003-03-23 14:32:54 +000011**** 2.2.
12
Skip Montanarob4a04172003-03-20 23:29:12 +000013*/
14
Skip Montanaro7b01a832003-04-12 19:23:46 +000015#define MODULE_VERSION "1.0"
16
Skip Montanarob4a04172003-03-20 23:29:12 +000017#include "Python.h"
18#include "structmember.h"
19
Skip Montanaroa16b21f2003-03-23 14:32:54 +000020
Skip Montanarob4a04172003-03-20 23:29:12 +000021/* begin 2.2 compatibility macros */
22#ifndef PyDoc_STRVAR
23/* Define macros for inline documentation. */
24#define PyDoc_VAR(name) static char name[]
25#define PyDoc_STRVAR(name,str) PyDoc_VAR(name) = PyDoc_STR(str)
26#ifdef WITH_DOC_STRINGS
27#define PyDoc_STR(str) str
28#else
29#define PyDoc_STR(str) ""
30#endif
31#endif /* ifndef PyDoc_STRVAR */
32
33#ifndef PyMODINIT_FUNC
34# if defined(__cplusplus)
35# define PyMODINIT_FUNC extern "C" void
36# else /* __cplusplus */
37# define PyMODINIT_FUNC void
38# endif /* __cplusplus */
39#endif
40/* end 2.2 compatibility macros */
41
42static PyObject *error_obj; /* CSV exception */
43static PyObject *dialects; /* Dialect registry */
44
45typedef enum {
46 START_RECORD, START_FIELD, ESCAPED_CHAR, IN_FIELD,
47 IN_QUOTED_FIELD, ESCAPE_IN_QUOTED_FIELD, QUOTE_IN_QUOTED_FIELD
48} ParserState;
49
50typedef enum {
51 QUOTE_MINIMAL, QUOTE_ALL, QUOTE_NONNUMERIC, QUOTE_NONE
52} QuoteStyle;
53
54typedef struct {
55 QuoteStyle style;
56 char *name;
57} StyleDesc;
58
59static StyleDesc quote_styles[] = {
60 { QUOTE_MINIMAL, "QUOTE_MINIMAL" },
61 { QUOTE_ALL, "QUOTE_ALL" },
62 { QUOTE_NONNUMERIC, "QUOTE_NONNUMERIC" },
63 { QUOTE_NONE, "QUOTE_NONE" },
64 { 0 }
65};
66
67typedef struct {
68 PyObject_HEAD
69
70 int doublequote; /* is " represented by ""? */
71 char delimiter; /* field separator */
72 char quotechar; /* quote character */
73 char escapechar; /* escape character */
74 int skipinitialspace; /* ignore spaces following delimiter? */
75 PyObject *lineterminator; /* string to write between records */
Andrew McNamara1196cf12005-01-07 04:42:45 +000076 int quoting; /* style of quoting to write */
Skip Montanarob4a04172003-03-20 23:29:12 +000077
78 int strict; /* raise exception on bad CSV */
79} DialectObj;
80
81staticforward PyTypeObject Dialect_Type;
82
83typedef struct {
84 PyObject_HEAD
85
86 PyObject *input_iter; /* iterate over this for input lines */
87
88 DialectObj *dialect; /* parsing dialect */
89
90 PyObject *fields; /* field list for current record */
91 ParserState state; /* current CSV parse state */
92 char *field; /* build current field in here */
93 int field_size; /* size of allocated buffer */
94 int field_len; /* length of current field */
95 int had_parse_error; /* did we have a parse error? */
96} ReaderObj;
97
98staticforward PyTypeObject Reader_Type;
99
100#define ReaderObject_Check(v) ((v)->ob_type == &Reader_Type)
101
102typedef struct {
103 PyObject_HEAD
104
105 PyObject *writeline; /* write output lines to this file */
106
107 DialectObj *dialect; /* parsing dialect */
108
109 char *rec; /* buffer for parser.join */
110 int rec_size; /* size of allocated record */
111 int rec_len; /* length of record */
112 int num_fields; /* number of fields in record */
113} WriterObj;
114
115staticforward PyTypeObject Writer_Type;
116
117/*
118 * DIALECT class
119 */
120
121static PyObject *
122get_dialect_from_registry(PyObject * name_obj)
123{
124 PyObject *dialect_obj;
125
126 dialect_obj = PyDict_GetItem(dialects, name_obj);
127 if (dialect_obj == NULL)
128 return PyErr_Format(error_obj, "unknown dialect");
129 Py_INCREF(dialect_obj);
130 return dialect_obj;
131}
132
Skip Montanarob4a04172003-03-20 23:29:12 +0000133static PyObject *
134get_string(PyObject *str)
135{
136 Py_XINCREF(str);
137 return str;
138}
139
Skip Montanarob4a04172003-03-20 23:29:12 +0000140static PyObject *
141get_nullchar_as_None(char c)
142{
143 if (c == '\0') {
144 Py_INCREF(Py_None);
145 return Py_None;
146 }
147 else
148 return PyString_FromStringAndSize((char*)&c, 1);
149}
150
Skip Montanarob4a04172003-03-20 23:29:12 +0000151static PyObject *
152Dialect_get_lineterminator(DialectObj *self)
153{
154 return get_string(self->lineterminator);
155}
156
Skip Montanarob4a04172003-03-20 23:29:12 +0000157static PyObject *
158Dialect_get_escapechar(DialectObj *self)
159{
160 return get_nullchar_as_None(self->escapechar);
161}
162
Andrew McNamara1196cf12005-01-07 04:42:45 +0000163static PyObject *
164Dialect_get_quotechar(DialectObj *self)
Skip Montanarob4a04172003-03-20 23:29:12 +0000165{
Andrew McNamara1196cf12005-01-07 04:42:45 +0000166 return get_nullchar_as_None(self->quotechar);
Skip Montanarob4a04172003-03-20 23:29:12 +0000167}
168
169static PyObject *
170Dialect_get_quoting(DialectObj *self)
171{
172 return PyInt_FromLong(self->quoting);
173}
174
175static int
Andrew McNamara1196cf12005-01-07 04:42:45 +0000176_set_bool(const char *name, int *target, PyObject *src, int dflt)
Skip Montanarob4a04172003-03-20 23:29:12 +0000177{
Andrew McNamara1196cf12005-01-07 04:42:45 +0000178 if (src == NULL)
179 *target = dflt;
180 else
181 *target = PyObject_IsTrue(src);
182 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000183}
184
Andrew McNamara1196cf12005-01-07 04:42:45 +0000185static int
186_set_int(const char *name, int *target, PyObject *src, int dflt)
187{
188 if (src == NULL)
189 *target = dflt;
190 else {
191 if (!PyInt_Check(src)) {
192 PyErr_Format(PyExc_TypeError,
193 "\"%s\" must be an integer", name);
194 return -1;
195 }
196 *target = PyInt_AsLong(src);
197 }
198 return 0;
199}
200
201static int
202_set_char(const char *name, char *target, PyObject *src, char dflt)
203{
204 if (src == NULL)
205 *target = dflt;
206 else {
207 if (src == Py_None)
208 *target = '\0';
209 else if (!PyString_Check(src) || PyString_Size(src) != 1) {
210 PyErr_Format(PyExc_TypeError,
211 "\"%s\" must be an 1-character string",
212 name);
213 return -1;
214 }
215 else {
216 char *s = PyString_AsString(src);
217 if (s == NULL)
218 return -1;
219 *target = s[0];
220 }
221 }
222 return 0;
223}
224
225static int
226_set_str(const char *name, PyObject **target, PyObject *src, const char *dflt)
227{
228 if (src == NULL)
229 *target = PyString_FromString(dflt);
230 else {
231 if (src == Py_None)
232 *target = NULL;
233 else if (!PyString_Check(src)
234#ifdef Py_USING_UNICODE
235 && !PyUnicode_Check(src)
236#endif
237 ) {
238 PyErr_Format(PyExc_TypeError,
239 "\"%s\" must be an string", name);
240 return -1;
Andrew McNamaradd3e6cb2005-01-07 06:46:50 +0000241 }
242 else {
Andrew McNamara1196cf12005-01-07 04:42:45 +0000243 Py_XDECREF(*target);
244 Py_INCREF(src);
245 *target = src;
246 }
247 }
248 return 0;
249}
250
251static int
252dialect_check_quoting(int quoting)
253{
254 StyleDesc *qs = quote_styles;
255
256 for (qs = quote_styles; qs->name; qs++) {
257 if (qs->style == quoting)
258 return 0;
259 }
260 PyErr_Format(PyExc_TypeError, "bad \"quoting\" value");
261 return -1;
262}
Skip Montanarob4a04172003-03-20 23:29:12 +0000263
264#define D_OFF(x) offsetof(DialectObj, x)
265
266static struct PyMemberDef Dialect_memberlist[] = {
Andrew McNamara1196cf12005-01-07 04:42:45 +0000267 { "delimiter", T_CHAR, D_OFF(delimiter), READONLY },
268 { "skipinitialspace", T_INT, D_OFF(skipinitialspace), READONLY },
269 { "doublequote", T_INT, D_OFF(doublequote), READONLY },
270 { "strict", T_INT, D_OFF(strict), READONLY },
Skip Montanarob4a04172003-03-20 23:29:12 +0000271 { NULL }
272};
273
274static PyGetSetDef Dialect_getsetlist[] = {
Andrew McNamara1196cf12005-01-07 04:42:45 +0000275 { "escapechar", (getter)Dialect_get_escapechar},
276 { "lineterminator", (getter)Dialect_get_lineterminator},
277 { "quotechar", (getter)Dialect_get_quotechar},
278 { "quoting", (getter)Dialect_get_quoting},
279 {NULL},
Skip Montanarob4a04172003-03-20 23:29:12 +0000280};
281
282static void
283Dialect_dealloc(DialectObj *self)
284{
285 Py_XDECREF(self->lineterminator);
Skip Montanarob4a04172003-03-20 23:29:12 +0000286 self->ob_type->tp_free((PyObject *)self);
287}
288
Andrew McNamara1196cf12005-01-07 04:42:45 +0000289/*
290 * Return a new reference to a dialect instance
291 *
292 * If given a string, looks up the name in our dialect registry
293 * If given a class, instantiate (which runs python validity checks)
294 * If given an instance, return a new reference to the instance
295 */
296static PyObject *
297dialect_instantiate(PyObject *dialect)
298{
299 Py_INCREF(dialect);
300 /* If dialect is a string, look it up in our registry */
301 if (PyString_Check(dialect)
302#ifdef Py_USING_UNICODE
303 || PyUnicode_Check(dialect)
304#endif
305 ) {
306 PyObject * new_dia;
307 new_dia = get_dialect_from_registry(dialect);
308 Py_DECREF(dialect);
309 return new_dia;
310 }
311 /* A class rather than an instance? Instantiate */
312 if (PyObject_TypeCheck(dialect, &PyClass_Type)) {
313 PyObject * new_dia;
314 new_dia = PyObject_CallFunction(dialect, "");
315 Py_DECREF(dialect);
316 return new_dia;
317 }
318 /* Make sure we finally have an instance */
319 if (!PyInstance_Check(dialect)) {
320 PyErr_SetString(PyExc_TypeError, "dialect must be an instance");
321 Py_DECREF(dialect);
322 return NULL;
323 }
324 return dialect;
325}
326
327static char *dialect_kws[] = {
328 "dialect",
329 "delimiter",
330 "doublequote",
331 "escapechar",
332 "lineterminator",
333 "quotechar",
334 "quoting",
335 "skipinitialspace",
336 "strict",
337 NULL
338};
339
Skip Montanarob4a04172003-03-20 23:29:12 +0000340static int
341dialect_init(DialectObj * self, PyObject * args, PyObject * kwargs)
342{
Andrew McNamara1196cf12005-01-07 04:42:45 +0000343 int ret = -1;
344 PyObject *dialect = NULL;
345 PyObject *delimiter = NULL;
346 PyObject *doublequote = NULL;
347 PyObject *escapechar = NULL;
348 PyObject *lineterminator = NULL;
349 PyObject *quotechar = NULL;
350 PyObject *quoting = NULL;
351 PyObject *skipinitialspace = NULL;
352 PyObject *strict = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000353
Andrew McNamara1196cf12005-01-07 04:42:45 +0000354 if (!PyArg_ParseTupleAndKeywords(args, kwargs,
355 "|OOOOOOOOO", dialect_kws,
356 &dialect,
357 &delimiter,
358 &doublequote,
359 &escapechar,
360 &lineterminator,
361 &quotechar,
362 &quoting,
363 &skipinitialspace,
364 &strict))
Skip Montanarob4a04172003-03-20 23:29:12 +0000365 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000366
Andrew McNamara1196cf12005-01-07 04:42:45 +0000367 Py_XINCREF(delimiter);
368 Py_XINCREF(doublequote);
369 Py_XINCREF(escapechar);
370 Py_XINCREF(lineterminator);
371 Py_XINCREF(quotechar);
372 Py_XINCREF(quoting);
373 Py_XINCREF(skipinitialspace);
374 Py_XINCREF(strict);
375 if (dialect != NULL) {
376 dialect = dialect_instantiate(dialect);
377 if (dialect == NULL)
378 goto err;
379#define DIALECT_GETATTR(v, n) \
380 if (v == NULL) \
381 v = PyObject_GetAttrString(dialect, n)
Skip Montanarob4a04172003-03-20 23:29:12 +0000382
Andrew McNamara1196cf12005-01-07 04:42:45 +0000383 DIALECT_GETATTR(delimiter, "delimiter");
384 DIALECT_GETATTR(doublequote, "doublequote");
385 DIALECT_GETATTR(escapechar, "escapechar");
386 DIALECT_GETATTR(lineterminator, "lineterminator");
387 DIALECT_GETATTR(quotechar, "quotechar");
388 DIALECT_GETATTR(quoting, "quoting");
389 DIALECT_GETATTR(skipinitialspace, "skipinitialspace");
390 DIALECT_GETATTR(strict, "strict");
391 PyErr_Clear();
392 Py_DECREF(dialect);
393 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000394
Andrew McNamara1196cf12005-01-07 04:42:45 +0000395 /* check types and convert to C values */
396#define DIASET(meth, name, target, src, dflt) \
397 if (meth(name, target, src, dflt)) \
398 goto err
399 DIASET(_set_char, "delimiter", &self->delimiter, delimiter, ',');
400 DIASET(_set_bool, "doublequote", &self->doublequote, doublequote, 1);
401 DIASET(_set_char, "escapechar", &self->escapechar, escapechar, 0);
402 DIASET(_set_str, "lineterminator", &self->lineterminator, lineterminator, "\r\n");
403 DIASET(_set_char, "quotechar", &self->quotechar, quotechar, '"');
404 DIASET(_set_int, "quoting", &self->quoting, quoting, QUOTE_MINIMAL);
405 DIASET(_set_bool, "skipinitialspace", &self->skipinitialspace, skipinitialspace, 0);
406 DIASET(_set_bool, "strict", &self->strict, strict, 0);
Skip Montanarob4a04172003-03-20 23:29:12 +0000407
Andrew McNamara1196cf12005-01-07 04:42:45 +0000408 /* validate options */
409 if (dialect_check_quoting(self->quoting))
410 goto err;
411 if (self->delimiter == 0) {
412 PyErr_SetString(PyExc_TypeError, "delimiter must be set");
413 goto err;
414 }
415 if (quotechar == Py_None && self->quoting != QUOTE_NONE)
416 self->quoting = QUOTE_NONE;
417 if (self->quoting != QUOTE_NONE && self->quotechar == 0) {
418 PyErr_SetString(PyExc_TypeError,
419 "quotechar must be set if quoting enabled");
420 goto err;
421 }
422 if (self->lineterminator == 0) {
423 PyErr_SetString(PyExc_TypeError, "lineterminator must be set");
424 goto err;
425 }
426
427 ret = 0;
428err:
429 Py_XDECREF(delimiter);
430 Py_XDECREF(doublequote);
431 Py_XDECREF(escapechar);
432 Py_XDECREF(lineterminator);
433 Py_XDECREF(quotechar);
434 Py_XDECREF(quoting);
435 Py_XDECREF(skipinitialspace);
436 Py_XDECREF(strict);
437 return ret;
Skip Montanarob4a04172003-03-20 23:29:12 +0000438}
439
440static PyObject *
441dialect_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
442{
443 DialectObj *self;
444 self = (DialectObj *)type->tp_alloc(type, 0);
445 if (self != NULL) {
446 self->lineterminator = NULL;
447 }
448 return (PyObject *)self;
449}
450
451
452PyDoc_STRVAR(Dialect_Type_doc,
453"CSV dialect\n"
454"\n"
455"The Dialect type records CSV parsing and generation options.\n");
456
457static PyTypeObject Dialect_Type = {
458 PyObject_HEAD_INIT(NULL)
459 0, /* ob_size */
460 "_csv.Dialect", /* tp_name */
461 sizeof(DialectObj), /* tp_basicsize */
462 0, /* tp_itemsize */
463 /* methods */
464 (destructor)Dialect_dealloc, /* tp_dealloc */
465 (printfunc)0, /* tp_print */
466 (getattrfunc)0, /* tp_getattr */
467 (setattrfunc)0, /* tp_setattr */
468 (cmpfunc)0, /* tp_compare */
469 (reprfunc)0, /* tp_repr */
470 0, /* tp_as_number */
471 0, /* tp_as_sequence */
472 0, /* tp_as_mapping */
473 (hashfunc)0, /* tp_hash */
474 (ternaryfunc)0, /* tp_call */
475 (reprfunc)0, /* tp_str */
476 0, /* tp_getattro */
477 0, /* tp_setattro */
478 0, /* tp_as_buffer */
479 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
480 Dialect_Type_doc, /* tp_doc */
481 0, /* tp_traverse */
482 0, /* tp_clear */
483 0, /* tp_richcompare */
484 0, /* tp_weaklistoffset */
485 0, /* tp_iter */
486 0, /* tp_iternext */
Andrew McNamara1196cf12005-01-07 04:42:45 +0000487 0, /* tp_methods */
Skip Montanarob4a04172003-03-20 23:29:12 +0000488 Dialect_memberlist, /* tp_members */
489 Dialect_getsetlist, /* tp_getset */
490 0, /* tp_base */
491 0, /* tp_dict */
492 0, /* tp_descr_get */
493 0, /* tp_descr_set */
494 0, /* tp_dictoffset */
495 (initproc)dialect_init, /* tp_init */
496 PyType_GenericAlloc, /* tp_alloc */
497 dialect_new, /* tp_new */
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000498 0, /* tp_free */
Skip Montanarob4a04172003-03-20 23:29:12 +0000499};
500
501static void
502parse_save_field(ReaderObj *self)
503{
504 PyObject *field;
505
506 field = PyString_FromStringAndSize(self->field, self->field_len);
507 if (field != NULL) {
508 PyList_Append(self->fields, field);
509 Py_XDECREF(field);
510 }
511 self->field_len = 0;
512}
513
514static int
515parse_grow_buff(ReaderObj *self)
516{
517 if (self->field_size == 0) {
518 self->field_size = 4096;
Andrew McNamaradcfb38c2003-06-09 05:59:23 +0000519 if (self->field != NULL)
520 PyMem_Free(self->field);
Skip Montanarob4a04172003-03-20 23:29:12 +0000521 self->field = PyMem_Malloc(self->field_size);
522 }
523 else {
524 self->field_size *= 2;
525 self->field = PyMem_Realloc(self->field, self->field_size);
526 }
527 if (self->field == NULL) {
528 PyErr_NoMemory();
529 return 0;
530 }
531 return 1;
532}
533
534static void
535parse_add_char(ReaderObj *self, char c)
536{
537 if (self->field_len == self->field_size && !parse_grow_buff(self))
538 return;
539 self->field[self->field_len++] = c;
540}
541
542static void
543parse_process_char(ReaderObj *self, char c)
544{
545 DialectObj *dialect = self->dialect;
546
547 switch (self->state) {
548 case START_RECORD:
549 /* start of record */
550 if (c == '\n')
551 /* empty line - return [] */
552 break;
553 /* normal character - handle as START_FIELD */
554 self->state = START_FIELD;
555 /* fallthru */
556 case START_FIELD:
557 /* expecting field */
558 if (c == '\n') {
559 /* save empty field - return [fields] */
560 parse_save_field(self);
561 self->state = START_RECORD;
562 }
Andrew McNamara1196cf12005-01-07 04:42:45 +0000563 else if (c == dialect->quotechar &&
564 dialect->quoting != QUOTE_NONE) {
Skip Montanarob4a04172003-03-20 23:29:12 +0000565 /* start quoted field */
566 self->state = IN_QUOTED_FIELD;
567 }
568 else if (c == dialect->escapechar) {
569 /* possible escaped character */
570 self->state = ESCAPED_CHAR;
571 }
572 else if (c == ' ' && dialect->skipinitialspace)
573 /* ignore space at start of field */
574 ;
575 else if (c == dialect->delimiter) {
576 /* save empty field */
577 parse_save_field(self);
578 }
579 else {
580 /* begin new unquoted field */
581 parse_add_char(self, c);
582 self->state = IN_FIELD;
583 }
584 break;
585
586 case ESCAPED_CHAR:
Skip Montanarob4a04172003-03-20 23:29:12 +0000587 parse_add_char(self, c);
588 self->state = IN_FIELD;
589 break;
590
591 case IN_FIELD:
592 /* in unquoted field */
593 if (c == '\n') {
594 /* end of line - return [fields] */
595 parse_save_field(self);
596 self->state = START_RECORD;
597 }
598 else if (c == dialect->escapechar) {
599 /* possible escaped character */
600 self->state = ESCAPED_CHAR;
601 }
602 else if (c == dialect->delimiter) {
603 /* save field - wait for new field */
604 parse_save_field(self);
605 self->state = START_FIELD;
606 }
607 else {
608 /* normal character - save in field */
609 parse_add_char(self, c);
610 }
611 break;
612
613 case IN_QUOTED_FIELD:
614 /* in quoted field */
615 if (c == '\n') {
616 /* end of line - save '\n' in field */
617 parse_add_char(self, '\n');
618 }
619 else if (c == dialect->escapechar) {
620 /* Possible escape character */
621 self->state = ESCAPE_IN_QUOTED_FIELD;
622 }
Andrew McNamara1196cf12005-01-07 04:42:45 +0000623 else if (c == dialect->quotechar &&
624 dialect->quoting != QUOTE_NONE) {
Skip Montanarob4a04172003-03-20 23:29:12 +0000625 if (dialect->doublequote) {
626 /* doublequote; " represented by "" */
627 self->state = QUOTE_IN_QUOTED_FIELD;
628 }
629 else {
630 /* end of quote part of field */
631 self->state = IN_FIELD;
632 }
633 }
634 else {
635 /* normal character - save in field */
636 parse_add_char(self, c);
637 }
638 break;
639
640 case ESCAPE_IN_QUOTED_FIELD:
Skip Montanarob4a04172003-03-20 23:29:12 +0000641 parse_add_char(self, c);
642 self->state = IN_QUOTED_FIELD;
643 break;
644
645 case QUOTE_IN_QUOTED_FIELD:
646 /* doublequote - seen a quote in an quoted field */
647 if (dialect->quoting != QUOTE_NONE &&
648 c == dialect->quotechar) {
649 /* save "" as " */
650 parse_add_char(self, c);
651 self->state = IN_QUOTED_FIELD;
652 }
653 else if (c == dialect->delimiter) {
654 /* save field - wait for new field */
655 parse_save_field(self);
656 self->state = START_FIELD;
657 }
658 else if (c == '\n') {
659 /* end of line - return [fields] */
660 parse_save_field(self);
661 self->state = START_RECORD;
662 }
663 else if (!dialect->strict) {
664 parse_add_char(self, c);
665 self->state = IN_FIELD;
666 }
667 else {
668 /* illegal */
669 self->had_parse_error = 1;
670 PyErr_Format(error_obj, "%c expected after %c",
671 dialect->delimiter,
672 dialect->quotechar);
673 }
674 break;
675
676 }
677}
678
679/*
680 * READER
681 */
682#define R_OFF(x) offsetof(ReaderObj, x)
683
684static struct PyMemberDef Reader_memberlist[] = {
685 { "dialect", T_OBJECT, R_OFF(dialect), RO },
686 { NULL }
687};
688
689static PyObject *
Skip Montanarob4a04172003-03-20 23:29:12 +0000690Reader_iternext(ReaderObj *self)
691{
692 PyObject *lineobj;
693 PyObject *fields;
694 char *line;
695
696 do {
697 lineobj = PyIter_Next(self->input_iter);
698 if (lineobj == NULL) {
699 /* End of input OR exception */
700 if (!PyErr_Occurred() && self->field_len != 0)
701 return PyErr_Format(error_obj,
702 "newline inside string");
703 return NULL;
704 }
705
706 if (self->had_parse_error) {
707 if (self->fields) {
708 Py_XDECREF(self->fields);
709 }
710 self->fields = PyList_New(0);
711 self->field_len = 0;
712 self->state = START_RECORD;
713 self->had_parse_error = 0;
714 }
715 line = PyString_AsString(lineobj);
716
717 if (line == NULL) {
718 Py_DECREF(lineobj);
719 return NULL;
720 }
Tim Petersef4b7ed2003-03-21 01:35:28 +0000721 if (strlen(line) < (size_t)PyString_GET_SIZE(lineobj)) {
Skip Montanarob4a04172003-03-20 23:29:12 +0000722 self->had_parse_error = 1;
723 Py_DECREF(lineobj);
724 return PyErr_Format(error_obj,
725 "string with NUL bytes");
726 }
727
728 /* Process line of text - send '\n' to processing code to
729 represent end of line. End of line which is not at end of
730 string is an error. */
731 while (*line) {
732 char c;
733
734 c = *line++;
735 if (c == '\r') {
736 c = *line++;
737 if (c == '\0')
738 /* macintosh end of line */
739 break;
740 if (c == '\n') {
741 c = *line++;
742 if (c == '\0')
743 /* DOS end of line */
744 break;
745 }
746 self->had_parse_error = 1;
747 Py_DECREF(lineobj);
748 return PyErr_Format(error_obj,
749 "newline inside string");
750 }
751 if (c == '\n') {
752 c = *line++;
753 if (c == '\0')
754 /* unix end of line */
755 break;
756 self->had_parse_error = 1;
757 Py_DECREF(lineobj);
758 return PyErr_Format(error_obj,
759 "newline inside string");
760 }
761 parse_process_char(self, c);
762 if (PyErr_Occurred()) {
763 Py_DECREF(lineobj);
764 return NULL;
765 }
766 }
767 parse_process_char(self, '\n');
768 Py_DECREF(lineobj);
769 } while (self->state != START_RECORD);
770
771 fields = self->fields;
772 self->fields = PyList_New(0);
773 return fields;
774}
775
776static void
777Reader_dealloc(ReaderObj *self)
778{
Andrew McNamara77ead872005-01-10 02:09:41 +0000779 PyObject_GC_UnTrack(self);
Skip Montanarob4a04172003-03-20 23:29:12 +0000780 Py_XDECREF(self->dialect);
781 Py_XDECREF(self->input_iter);
782 Py_XDECREF(self->fields);
Andrew McNamaradcfb38c2003-06-09 05:59:23 +0000783 if (self->field != NULL)
784 PyMem_Free(self->field);
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000785 PyObject_GC_Del(self);
786}
787
788static int
789Reader_traverse(ReaderObj *self, visitproc visit, void *arg)
790{
791 int err;
792#define VISIT(SLOT) \
793 if (SLOT) { \
794 err = visit((PyObject *)(SLOT), arg); \
795 if (err) \
796 return err; \
797 }
798 VISIT(self->dialect);
799 VISIT(self->input_iter);
800 VISIT(self->fields);
801 return 0;
802}
803
804static int
805Reader_clear(ReaderObj *self)
806{
807 Py_XDECREF(self->dialect);
808 Py_XDECREF(self->input_iter);
809 Py_XDECREF(self->fields);
810 self->dialect = NULL;
811 self->input_iter = NULL;
812 self->fields = NULL;
813 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000814}
815
816PyDoc_STRVAR(Reader_Type_doc,
817"CSV reader\n"
818"\n"
819"Reader objects are responsible for reading and parsing tabular data\n"
820"in CSV format.\n"
821);
822
823static struct PyMethodDef Reader_methods[] = {
824 { NULL, NULL }
825};
826
827static PyTypeObject Reader_Type = {
828 PyObject_HEAD_INIT(NULL)
829 0, /*ob_size*/
830 "_csv.reader", /*tp_name*/
831 sizeof(ReaderObj), /*tp_basicsize*/
832 0, /*tp_itemsize*/
833 /* methods */
834 (destructor)Reader_dealloc, /*tp_dealloc*/
835 (printfunc)0, /*tp_print*/
836 (getattrfunc)0, /*tp_getattr*/
837 (setattrfunc)0, /*tp_setattr*/
838 (cmpfunc)0, /*tp_compare*/
839 (reprfunc)0, /*tp_repr*/
840 0, /*tp_as_number*/
841 0, /*tp_as_sequence*/
842 0, /*tp_as_mapping*/
843 (hashfunc)0, /*tp_hash*/
844 (ternaryfunc)0, /*tp_call*/
845 (reprfunc)0, /*tp_str*/
846 0, /*tp_getattro*/
847 0, /*tp_setattro*/
848 0, /*tp_as_buffer*/
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000849 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
850 Py_TPFLAGS_HAVE_GC, /*tp_flags*/
Skip Montanarob4a04172003-03-20 23:29:12 +0000851 Reader_Type_doc, /*tp_doc*/
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000852 (traverseproc)Reader_traverse, /*tp_traverse*/
853 (inquiry)Reader_clear, /*tp_clear*/
Skip Montanarob4a04172003-03-20 23:29:12 +0000854 0, /*tp_richcompare*/
855 0, /*tp_weaklistoffset*/
Andrew McNamara575a00b2005-01-06 02:25:41 +0000856 PyObject_SelfIter, /*tp_iter*/
Skip Montanarob4a04172003-03-20 23:29:12 +0000857 (getiterfunc)Reader_iternext, /*tp_iternext*/
858 Reader_methods, /*tp_methods*/
859 Reader_memberlist, /*tp_members*/
860 0, /*tp_getset*/
861
862};
863
864static PyObject *
865csv_reader(PyObject *module, PyObject *args, PyObject *keyword_args)
866{
867 PyObject * iterator, * dialect = NULL, *ctor_args;
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000868 ReaderObj * self = PyObject_GC_New(ReaderObj, &Reader_Type);
Skip Montanarob4a04172003-03-20 23:29:12 +0000869
870 if (!self)
871 return NULL;
872
873 self->dialect = NULL;
874 self->input_iter = self->fields = NULL;
875
876 self->fields = NULL;
877 self->input_iter = NULL;
878 self->had_parse_error = 0;
879 self->field = NULL;
880 self->field_size = 0;
881 self->field_len = 0;
882 self->state = START_RECORD;
883
Raymond Hettinger1761a7c2004-06-20 04:23:19 +0000884 if (!PyArg_UnpackTuple(args, "", 1, 2, &iterator, &dialect)) {
Skip Montanarob4a04172003-03-20 23:29:12 +0000885 Py_DECREF(self);
886 return NULL;
887 }
888 self->input_iter = PyObject_GetIter(iterator);
889 if (self->input_iter == NULL) {
890 PyErr_SetString(PyExc_TypeError,
891 "argument 1 must be an iterator");
892 Py_DECREF(self);
893 return NULL;
894 }
895 ctor_args = Py_BuildValue(dialect ? "(O)" : "()", dialect);
896 if (ctor_args == NULL) {
897 Py_DECREF(self);
898 return NULL;
899 }
900 self->dialect = (DialectObj *)PyObject_Call((PyObject *)&Dialect_Type,
901 ctor_args, keyword_args);
902 Py_DECREF(ctor_args);
903 if (self->dialect == NULL) {
904 Py_DECREF(self);
905 return NULL;
906 }
907 self->fields = PyList_New(0);
908 if (self->fields == NULL) {
909 Py_DECREF(self);
910 return NULL;
911 }
912
Andrew McNamara77ead872005-01-10 02:09:41 +0000913 PyObject_GC_Track(self);
Skip Montanarob4a04172003-03-20 23:29:12 +0000914 return (PyObject *)self;
915}
916
917/*
918 * WRITER
919 */
920/* ---------------------------------------------------------------- */
921static void
922join_reset(WriterObj *self)
923{
924 self->rec_len = 0;
925 self->num_fields = 0;
926}
927
928#define MEM_INCR 32768
929
930/* Calculate new record length or append field to record. Return new
931 * record length.
932 */
933static int
934join_append_data(WriterObj *self, char *field, int quote_empty,
935 int *quoted, int copy_phase)
936{
937 DialectObj *dialect = self->dialect;
938 int i, rec_len;
939
940 rec_len = self->rec_len;
941
942 /* If this is not the first field we need a field separator.
943 */
944 if (self->num_fields > 0) {
945 if (copy_phase)
946 self->rec[rec_len] = dialect->delimiter;
947 rec_len++;
948 }
949 /* Handle preceding quote.
950 */
951 switch (dialect->quoting) {
952 case QUOTE_ALL:
953 *quoted = 1;
954 if (copy_phase)
955 self->rec[rec_len] = dialect->quotechar;
956 rec_len++;
957 break;
958 case QUOTE_MINIMAL:
959 case QUOTE_NONNUMERIC:
960 /* We only know about quoted in the copy phase.
961 */
962 if (copy_phase && *quoted) {
963 self->rec[rec_len] = dialect->quotechar;
964 rec_len++;
965 }
966 break;
967 case QUOTE_NONE:
968 break;
969 }
970 /* Copy/count field data.
971 */
972 for (i = 0;; i++) {
973 char c = field[i];
974
975 if (c == '\0')
976 break;
977 /* If in doublequote mode we escape quote chars with a
978 * quote.
979 */
980 if (dialect->quoting != QUOTE_NONE &&
981 c == dialect->quotechar && dialect->doublequote) {
982 if (copy_phase)
983 self->rec[rec_len] = dialect->quotechar;
984 *quoted = 1;
985 rec_len++;
986 }
987
988 /* Some special characters need to be escaped. If we have a
989 * quote character switch to quoted field instead of escaping
990 * individual characters.
991 */
992 if (!*quoted
993 && (c == dialect->delimiter ||
994 c == dialect->escapechar ||
995 c == '\n' || c == '\r')) {
996 if (dialect->quoting != QUOTE_NONE)
997 *quoted = 1;
998 else if (dialect->escapechar) {
999 if (copy_phase)
1000 self->rec[rec_len] = dialect->escapechar;
1001 rec_len++;
1002 }
1003 else {
1004 PyErr_Format(error_obj,
1005 "delimiter must be quoted or escaped");
1006 return -1;
1007 }
1008 }
1009 /* Copy field character into record buffer.
1010 */
1011 if (copy_phase)
1012 self->rec[rec_len] = c;
1013 rec_len++;
1014 }
1015
1016 /* If field is empty check if it needs to be quoted.
1017 */
1018 if (i == 0 && quote_empty) {
1019 if (dialect->quoting == QUOTE_NONE) {
1020 PyErr_Format(error_obj,
1021 "single empty field record must be quoted");
1022 return -1;
Andrew McNamaradd3e6cb2005-01-07 06:46:50 +00001023 }
1024 else
Skip Montanarob4a04172003-03-20 23:29:12 +00001025 *quoted = 1;
1026 }
1027
1028 /* Handle final quote character on field.
1029 */
1030 if (*quoted) {
1031 if (copy_phase)
1032 self->rec[rec_len] = dialect->quotechar;
1033 else
1034 /* Didn't know about leading quote until we found it
1035 * necessary in field data - compensate for it now.
1036 */
1037 rec_len++;
1038 rec_len++;
1039 }
1040
1041 return rec_len;
1042}
1043
1044static int
1045join_check_rec_size(WriterObj *self, int rec_len)
1046{
1047 if (rec_len > self->rec_size) {
1048 if (self->rec_size == 0) {
1049 self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR;
Andrew McNamaradcfb38c2003-06-09 05:59:23 +00001050 if (self->rec != NULL)
1051 PyMem_Free(self->rec);
Skip Montanarob4a04172003-03-20 23:29:12 +00001052 self->rec = PyMem_Malloc(self->rec_size);
1053 }
1054 else {
1055 char *old_rec = self->rec;
1056
1057 self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR;
1058 self->rec = PyMem_Realloc(self->rec, self->rec_size);
1059 if (self->rec == NULL)
1060 PyMem_Free(old_rec);
1061 }
1062 if (self->rec == NULL) {
1063 PyErr_NoMemory();
1064 return 0;
1065 }
1066 }
1067 return 1;
1068}
1069
1070static int
1071join_append(WriterObj *self, char *field, int *quoted, int quote_empty)
1072{
1073 int rec_len;
1074
1075 rec_len = join_append_data(self, field, quote_empty, quoted, 0);
1076 if (rec_len < 0)
1077 return 0;
1078
1079 /* grow record buffer if necessary */
1080 if (!join_check_rec_size(self, rec_len))
1081 return 0;
1082
1083 self->rec_len = join_append_data(self, field, quote_empty, quoted, 1);
1084 self->num_fields++;
1085
1086 return 1;
1087}
1088
1089static int
1090join_append_lineterminator(WriterObj *self)
1091{
1092 int terminator_len;
1093
1094 terminator_len = PyString_Size(self->dialect->lineterminator);
1095
1096 /* grow record buffer if necessary */
1097 if (!join_check_rec_size(self, self->rec_len + terminator_len))
1098 return 0;
1099
1100 memmove(self->rec + self->rec_len,
Skip Montanaro577c7a72003-04-12 19:17:14 +00001101 /* should not be NULL */
Skip Montanarob4a04172003-03-20 23:29:12 +00001102 PyString_AsString(self->dialect->lineterminator),
1103 terminator_len);
1104 self->rec_len += terminator_len;
1105
1106 return 1;
1107}
1108
1109PyDoc_STRVAR(csv_writerow_doc,
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001110"writerow(sequence)\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001111"\n"
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001112"Construct and write a CSV record from a sequence of fields. Non-string\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001113"elements will be converted to string.");
1114
1115static PyObject *
1116csv_writerow(WriterObj *self, PyObject *seq)
1117{
1118 DialectObj *dialect = self->dialect;
1119 int len, i;
1120
1121 if (!PySequence_Check(seq))
1122 return PyErr_Format(error_obj, "sequence expected");
1123
1124 len = PySequence_Length(seq);
1125 if (len < 0)
1126 return NULL;
1127
1128 /* Join all fields in internal buffer.
1129 */
1130 join_reset(self);
1131 for (i = 0; i < len; i++) {
1132 PyObject *field;
1133 int append_ok;
1134 int quoted;
1135
1136 field = PySequence_GetItem(seq, i);
1137 if (field == NULL)
1138 return NULL;
1139
1140 quoted = 0;
1141 if (dialect->quoting == QUOTE_NONNUMERIC) {
1142 PyObject *num;
1143
1144 num = PyNumber_Float(field);
1145 if (num == NULL) {
1146 quoted = 1;
1147 PyErr_Clear();
1148 }
1149 else {
1150 Py_DECREF(num);
1151 }
1152 }
1153
1154 if (PyString_Check(field)) {
Skip Montanaro577c7a72003-04-12 19:17:14 +00001155 append_ok = join_append(self,
1156 PyString_AS_STRING(field),
Skip Montanarob4a04172003-03-20 23:29:12 +00001157 &quoted, len == 1);
1158 Py_DECREF(field);
1159 }
1160 else if (field == Py_None) {
1161 append_ok = join_append(self, "", &quoted, len == 1);
1162 Py_DECREF(field);
1163 }
1164 else {
1165 PyObject *str;
1166
1167 str = PyObject_Str(field);
1168 Py_DECREF(field);
1169 if (str == NULL)
1170 return NULL;
1171
Skip Montanaro577c7a72003-04-12 19:17:14 +00001172 append_ok = join_append(self, PyString_AS_STRING(str),
Skip Montanarob4a04172003-03-20 23:29:12 +00001173 &quoted, len == 1);
1174 Py_DECREF(str);
1175 }
1176 if (!append_ok)
1177 return NULL;
1178 }
1179
1180 /* Add line terminator.
1181 */
1182 if (!join_append_lineterminator(self))
1183 return 0;
1184
1185 return PyObject_CallFunction(self->writeline,
1186 "(s#)", self->rec, self->rec_len);
1187}
1188
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001189PyDoc_STRVAR(csv_writerows_doc,
1190"writerows(sequence of sequences)\n"
1191"\n"
1192"Construct and write a series of sequences to a csv file. Non-string\n"
1193"elements will be converted to string.");
1194
Skip Montanarob4a04172003-03-20 23:29:12 +00001195static PyObject *
1196csv_writerows(WriterObj *self, PyObject *seqseq)
1197{
1198 PyObject *row_iter, *row_obj, *result;
1199
1200 row_iter = PyObject_GetIter(seqseq);
1201 if (row_iter == NULL) {
1202 PyErr_SetString(PyExc_TypeError,
Skip Montanaro98f16e02003-04-11 23:10:13 +00001203 "writerows() argument must be iterable");
Skip Montanarob4a04172003-03-20 23:29:12 +00001204 return NULL;
1205 }
1206 while ((row_obj = PyIter_Next(row_iter))) {
1207 result = csv_writerow(self, row_obj);
1208 Py_DECREF(row_obj);
1209 if (!result) {
1210 Py_DECREF(row_iter);
1211 return NULL;
1212 }
1213 else
1214 Py_DECREF(result);
1215 }
1216 Py_DECREF(row_iter);
1217 if (PyErr_Occurred())
1218 return NULL;
1219 Py_INCREF(Py_None);
1220 return Py_None;
1221}
1222
1223static struct PyMethodDef Writer_methods[] = {
1224 { "writerow", (PyCFunction)csv_writerow, METH_O, csv_writerow_doc},
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001225 { "writerows", (PyCFunction)csv_writerows, METH_O, csv_writerows_doc},
Skip Montanarob4a04172003-03-20 23:29:12 +00001226 { NULL, NULL }
1227};
1228
1229#define W_OFF(x) offsetof(WriterObj, x)
1230
1231static struct PyMemberDef Writer_memberlist[] = {
1232 { "dialect", T_OBJECT, W_OFF(dialect), RO },
1233 { NULL }
1234};
1235
1236static void
1237Writer_dealloc(WriterObj *self)
1238{
Andrew McNamara77ead872005-01-10 02:09:41 +00001239 PyObject_GC_UnTrack(self);
Skip Montanarob4a04172003-03-20 23:29:12 +00001240 Py_XDECREF(self->dialect);
1241 Py_XDECREF(self->writeline);
Andrew McNamaradcfb38c2003-06-09 05:59:23 +00001242 if (self->rec != NULL)
1243 PyMem_Free(self->rec);
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001244 PyObject_GC_Del(self);
1245}
1246
1247static int
1248Writer_traverse(WriterObj *self, visitproc visit, void *arg)
1249{
1250 int err;
1251#define VISIT(SLOT) \
1252 if (SLOT) { \
1253 err = visit((PyObject *)(SLOT), arg); \
1254 if (err) \
1255 return err; \
1256 }
1257 VISIT(self->dialect);
1258 VISIT(self->writeline);
1259 return 0;
1260}
1261
1262static int
1263Writer_clear(WriterObj *self)
1264{
1265 Py_XDECREF(self->dialect);
1266 Py_XDECREF(self->writeline);
1267 self->dialect = NULL;
1268 self->writeline = NULL;
1269 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001270}
1271
1272PyDoc_STRVAR(Writer_Type_doc,
1273"CSV writer\n"
1274"\n"
1275"Writer objects are responsible for generating tabular data\n"
1276"in CSV format from sequence input.\n"
1277);
1278
1279static PyTypeObject Writer_Type = {
1280 PyObject_HEAD_INIT(NULL)
1281 0, /*ob_size*/
1282 "_csv.writer", /*tp_name*/
1283 sizeof(WriterObj), /*tp_basicsize*/
1284 0, /*tp_itemsize*/
1285 /* methods */
1286 (destructor)Writer_dealloc, /*tp_dealloc*/
1287 (printfunc)0, /*tp_print*/
1288 (getattrfunc)0, /*tp_getattr*/
1289 (setattrfunc)0, /*tp_setattr*/
1290 (cmpfunc)0, /*tp_compare*/
1291 (reprfunc)0, /*tp_repr*/
1292 0, /*tp_as_number*/
1293 0, /*tp_as_sequence*/
1294 0, /*tp_as_mapping*/
1295 (hashfunc)0, /*tp_hash*/
1296 (ternaryfunc)0, /*tp_call*/
1297 (reprfunc)0, /*tp_str*/
1298 0, /*tp_getattro*/
1299 0, /*tp_setattro*/
1300 0, /*tp_as_buffer*/
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001301 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
1302 Py_TPFLAGS_HAVE_GC, /*tp_flags*/
Skip Montanarob4a04172003-03-20 23:29:12 +00001303 Writer_Type_doc,
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001304 (traverseproc)Writer_traverse, /*tp_traverse*/
1305 (inquiry)Writer_clear, /*tp_clear*/
Skip Montanarob4a04172003-03-20 23:29:12 +00001306 0, /*tp_richcompare*/
1307 0, /*tp_weaklistoffset*/
1308 (getiterfunc)0, /*tp_iter*/
1309 (getiterfunc)0, /*tp_iternext*/
1310 Writer_methods, /*tp_methods*/
1311 Writer_memberlist, /*tp_members*/
1312 0, /*tp_getset*/
1313};
1314
1315static PyObject *
1316csv_writer(PyObject *module, PyObject *args, PyObject *keyword_args)
1317{
1318 PyObject * output_file, * dialect = NULL, *ctor_args;
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001319 WriterObj * self = PyObject_GC_New(WriterObj, &Writer_Type);
Skip Montanarob4a04172003-03-20 23:29:12 +00001320
1321 if (!self)
1322 return NULL;
1323
1324 self->dialect = NULL;
1325 self->writeline = NULL;
1326
1327 self->rec = NULL;
1328 self->rec_size = 0;
1329 self->rec_len = 0;
1330 self->num_fields = 0;
1331
Raymond Hettinger1761a7c2004-06-20 04:23:19 +00001332 if (!PyArg_UnpackTuple(args, "", 1, 2, &output_file, &dialect)) {
Skip Montanarob4a04172003-03-20 23:29:12 +00001333 Py_DECREF(self);
1334 return NULL;
1335 }
1336 self->writeline = PyObject_GetAttrString(output_file, "write");
1337 if (self->writeline == NULL || !PyCallable_Check(self->writeline)) {
1338 PyErr_SetString(PyExc_TypeError,
1339 "argument 1 must be an instance with a write method");
1340 Py_DECREF(self);
1341 return NULL;
1342 }
1343 ctor_args = Py_BuildValue(dialect ? "(O)" : "()", dialect);
1344 if (ctor_args == NULL) {
1345 Py_DECREF(self);
1346 return NULL;
1347 }
1348 self->dialect = (DialectObj *)PyObject_Call((PyObject *)&Dialect_Type,
1349 ctor_args, keyword_args);
1350 Py_DECREF(ctor_args);
1351 if (self->dialect == NULL) {
1352 Py_DECREF(self);
1353 return NULL;
1354 }
Andrew McNamara77ead872005-01-10 02:09:41 +00001355 PyObject_GC_Track(self);
Skip Montanarob4a04172003-03-20 23:29:12 +00001356 return (PyObject *)self;
1357}
1358
1359/*
1360 * DIALECT REGISTRY
1361 */
1362static PyObject *
1363csv_list_dialects(PyObject *module, PyObject *args)
1364{
1365 return PyDict_Keys(dialects);
1366}
1367
1368static PyObject *
1369csv_register_dialect(PyObject *module, PyObject *args)
1370{
1371 PyObject *name_obj, *dialect_obj;
1372
Raymond Hettinger1761a7c2004-06-20 04:23:19 +00001373 if (!PyArg_UnpackTuple(args, "", 2, 2, &name_obj, &dialect_obj))
Skip Montanarob4a04172003-03-20 23:29:12 +00001374 return NULL;
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001375 if (!PyString_Check(name_obj)
1376#ifdef Py_USING_UNICODE
1377&& !PyUnicode_Check(name_obj)
1378#endif
1379) {
Skip Montanarob4a04172003-03-20 23:29:12 +00001380 PyErr_SetString(PyExc_TypeError,
1381 "dialect name must be a string or unicode");
1382 return NULL;
1383 }
1384 Py_INCREF(dialect_obj);
Andrew McNamara1196cf12005-01-07 04:42:45 +00001385 /* A class rather than an instance? Instantiate */
Skip Montanarob4a04172003-03-20 23:29:12 +00001386 if (PyObject_TypeCheck(dialect_obj, &PyClass_Type)) {
1387 PyObject * new_dia;
1388 new_dia = PyObject_CallFunction(dialect_obj, "");
1389 Py_DECREF(dialect_obj);
1390 if (new_dia == NULL)
1391 return NULL;
1392 dialect_obj = new_dia;
1393 }
1394 /* Make sure we finally have an instance */
1395 if (!PyInstance_Check(dialect_obj)) {
1396 PyErr_SetString(PyExc_TypeError, "dialect must be an instance");
1397 Py_DECREF(dialect_obj);
1398 return NULL;
1399 }
1400 if (PyObject_SetAttrString(dialect_obj, "_name", name_obj) < 0) {
1401 Py_DECREF(dialect_obj);
1402 return NULL;
1403 }
1404 if (PyDict_SetItem(dialects, name_obj, dialect_obj) < 0) {
1405 Py_DECREF(dialect_obj);
1406 return NULL;
1407 }
1408 Py_DECREF(dialect_obj);
1409 Py_INCREF(Py_None);
1410 return Py_None;
1411}
1412
1413static PyObject *
Skip Montanaro577c7a72003-04-12 19:17:14 +00001414csv_unregister_dialect(PyObject *module, PyObject *name_obj)
Skip Montanarob4a04172003-03-20 23:29:12 +00001415{
Skip Montanarob4a04172003-03-20 23:29:12 +00001416 if (PyDict_DelItem(dialects, name_obj) < 0)
1417 return PyErr_Format(error_obj, "unknown dialect");
1418 Py_INCREF(Py_None);
1419 return Py_None;
1420}
1421
1422static PyObject *
Skip Montanaro577c7a72003-04-12 19:17:14 +00001423csv_get_dialect(PyObject *module, PyObject *name_obj)
Skip Montanarob4a04172003-03-20 23:29:12 +00001424{
Skip Montanarob4a04172003-03-20 23:29:12 +00001425 return get_dialect_from_registry(name_obj);
1426}
1427
1428/*
1429 * MODULE
1430 */
1431
1432PyDoc_STRVAR(csv_module_doc,
1433"CSV parsing and writing.\n"
1434"\n"
1435"This module provides classes that assist in the reading and writing\n"
1436"of Comma Separated Value (CSV) files, and implements the interface\n"
1437"described by PEP 305. Although many CSV files are simple to parse,\n"
1438"the format is not formally defined by a stable specification and\n"
1439"is subtle enough that parsing lines of a CSV file with something\n"
1440"like line.split(\",\") is bound to fail. The module supports three\n"
1441"basic APIs: reading, writing, and registration of dialects.\n"
1442"\n"
1443"\n"
1444"DIALECT REGISTRATION:\n"
1445"\n"
1446"Readers and writers support a dialect argument, which is a convenient\n"
1447"handle on a group of settings. When the dialect argument is a string,\n"
1448"it identifies one of the dialects previously registered with the module.\n"
1449"If it is a class or instance, the attributes of the argument are used as\n"
1450"the settings for the reader or writer:\n"
1451"\n"
1452" class excel:\n"
1453" delimiter = ','\n"
1454" quotechar = '\"'\n"
1455" escapechar = None\n"
1456" doublequote = True\n"
1457" skipinitialspace = False\n"
Johannes Gijsbers8d3b9dd2004-08-15 12:23:10 +00001458" lineterminator = '\\r\\n'\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001459" quoting = QUOTE_MINIMAL\n"
1460"\n"
1461"SETTINGS:\n"
1462"\n"
1463" * quotechar - specifies a one-character string to use as the \n"
1464" quoting character. It defaults to '\"'.\n"
1465" * delimiter - specifies a one-character string to use as the \n"
1466" field separator. It defaults to ','.\n"
1467" * skipinitialspace - specifies how to interpret whitespace which\n"
1468" immediately follows a delimiter. It defaults to False, which\n"
1469" means that whitespace immediately following a delimiter is part\n"
1470" of the following field.\n"
1471" * lineterminator - specifies the character sequence which should \n"
1472" terminate rows.\n"
1473" * quoting - controls when quotes should be generated by the writer.\n"
1474" It can take on any of the following module constants:\n"
1475"\n"
1476" csv.QUOTE_MINIMAL means only when required, for example, when a\n"
1477" field contains either the quotechar or the delimiter\n"
1478" csv.QUOTE_ALL means that quotes are always placed around fields.\n"
1479" csv.QUOTE_NONNUMERIC means that quotes are always placed around\n"
Skip Montanaro148eb6a2003-12-02 18:57:47 +00001480" fields which do not parse as integers or floating point\n"
1481" numbers.\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001482" csv.QUOTE_NONE means that quotes are never placed around fields.\n"
1483" * escapechar - specifies a one-character string used to escape \n"
1484" the delimiter when quoting is set to QUOTE_NONE.\n"
1485" * doublequote - controls the handling of quotes inside fields. When\n"
1486" True, two consecutive quotes are interpreted as one during read,\n"
1487" and when writing, each quote character embedded in the data is\n"
1488" written as two quotes\n");
1489
1490PyDoc_STRVAR(csv_reader_doc,
1491" csv_reader = reader(iterable [, dialect='excel']\n"
1492" [optional keyword args])\n"
1493" for row in csv_reader:\n"
1494" process(row)\n"
1495"\n"
1496"The \"iterable\" argument can be any object that returns a line\n"
1497"of input for each iteration, such as a file object or a list. The\n"
1498"optional \"dialect\" parameter is discussed below. The function\n"
1499"also accepts optional keyword arguments which override settings\n"
1500"provided by the dialect.\n"
1501"\n"
1502"The returned object is an iterator. Each iteration returns a row\n"
Johannes Gijsbers8d3b9dd2004-08-15 12:23:10 +00001503"of the CSV file (which can span multiple input lines):\n");
Skip Montanarob4a04172003-03-20 23:29:12 +00001504
1505PyDoc_STRVAR(csv_writer_doc,
1506" csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1507" [optional keyword args])\n"
1508" for row in csv_writer:\n"
1509" csv_writer.writerow(row)\n"
1510"\n"
1511" [or]\n"
1512"\n"
1513" csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1514" [optional keyword args])\n"
1515" csv_writer.writerows(rows)\n"
1516"\n"
1517"The \"fileobj\" argument can be any object that supports the file API.\n");
1518
1519PyDoc_STRVAR(csv_list_dialects_doc,
1520"Return a list of all know dialect names.\n"
1521" names = csv.list_dialects()");
1522
1523PyDoc_STRVAR(csv_get_dialect_doc,
1524"Return the dialect instance associated with name.\n"
1525" dialect = csv.get_dialect(name)");
1526
1527PyDoc_STRVAR(csv_register_dialect_doc,
1528"Create a mapping from a string name to a dialect class.\n"
1529" dialect = csv.register_dialect(name, dialect)");
1530
1531PyDoc_STRVAR(csv_unregister_dialect_doc,
1532"Delete the name/dialect mapping associated with a string name.\n"
1533" csv.unregister_dialect(name)");
1534
1535static struct PyMethodDef csv_methods[] = {
1536 { "reader", (PyCFunction)csv_reader,
1537 METH_VARARGS | METH_KEYWORDS, csv_reader_doc},
1538 { "writer", (PyCFunction)csv_writer,
1539 METH_VARARGS | METH_KEYWORDS, csv_writer_doc},
1540 { "list_dialects", (PyCFunction)csv_list_dialects,
1541 METH_NOARGS, csv_list_dialects_doc},
1542 { "register_dialect", (PyCFunction)csv_register_dialect,
1543 METH_VARARGS, csv_register_dialect_doc},
1544 { "unregister_dialect", (PyCFunction)csv_unregister_dialect,
Skip Montanaro577c7a72003-04-12 19:17:14 +00001545 METH_O, csv_unregister_dialect_doc},
Skip Montanarob4a04172003-03-20 23:29:12 +00001546 { "get_dialect", (PyCFunction)csv_get_dialect,
Skip Montanaro577c7a72003-04-12 19:17:14 +00001547 METH_O, csv_get_dialect_doc},
Skip Montanarob4a04172003-03-20 23:29:12 +00001548 { NULL, NULL }
1549};
1550
1551PyMODINIT_FUNC
1552init_csv(void)
1553{
1554 PyObject *module;
Skip Montanarob4a04172003-03-20 23:29:12 +00001555 StyleDesc *style;
1556
1557 if (PyType_Ready(&Dialect_Type) < 0)
1558 return;
1559
1560 if (PyType_Ready(&Reader_Type) < 0)
1561 return;
1562
1563 if (PyType_Ready(&Writer_Type) < 0)
1564 return;
1565
1566 /* Create the module and add the functions */
1567 module = Py_InitModule3("_csv", csv_methods, csv_module_doc);
1568 if (module == NULL)
1569 return;
1570
1571 /* Add version to the module. */
Skip Montanaro7b01a832003-04-12 19:23:46 +00001572 if (PyModule_AddStringConstant(module, "__version__",
1573 MODULE_VERSION) == -1)
Skip Montanarob4a04172003-03-20 23:29:12 +00001574 return;
1575
1576 /* Add _dialects dictionary */
1577 dialects = PyDict_New();
1578 if (dialects == NULL)
1579 return;
1580 if (PyModule_AddObject(module, "_dialects", dialects))
1581 return;
1582
1583 /* Add quote styles into dictionary */
1584 for (style = quote_styles; style->name; style++) {
Skip Montanaro7b01a832003-04-12 19:23:46 +00001585 if (PyModule_AddIntConstant(module, style->name,
1586 style->style) == -1)
Skip Montanarob4a04172003-03-20 23:29:12 +00001587 return;
1588 }
1589
1590 /* Add the Dialect type */
1591 if (PyModule_AddObject(module, "Dialect", (PyObject *)&Dialect_Type))
1592 return;
1593
1594 /* Add the CSV exception object to the module. */
1595 error_obj = PyErr_NewException("_csv.Error", NULL, NULL);
1596 if (error_obj == NULL)
1597 return;
1598 PyModule_AddObject(module, "Error", error_obj);
1599}