blob: e7f60c178e8377aab695dbd2de344cc58d2309d9 [file] [log] [blame]
Skip Montanaroa16b21f2003-03-23 14:32:54 +00001/* csv module */
2
3/*
4
5This module provides the low-level underpinnings of a CSV reading/writing
6module. Users should not use this module directly, but import the csv.py
7module instead.
8
9**** For people modifying this code, please note that as of this writing
Skip Montanarodfa35fa2003-04-11 21:40:01 +000010**** (2003-03-23), it is intended that this code should work with Python
Skip Montanaroa16b21f2003-03-23 14:32:54 +000011**** 2.2.
12
Skip Montanarob4a04172003-03-20 23:29:12 +000013*/
14
Skip Montanaro7b01a832003-04-12 19:23:46 +000015#define MODULE_VERSION "1.0"
16
Skip Montanarob4a04172003-03-20 23:29:12 +000017#include "Python.h"
18#include "structmember.h"
19
Skip Montanaroa16b21f2003-03-23 14:32:54 +000020
Skip Montanarob4a04172003-03-20 23:29:12 +000021/* begin 2.2 compatibility macros */
22#ifndef PyDoc_STRVAR
23/* Define macros for inline documentation. */
24#define PyDoc_VAR(name) static char name[]
25#define PyDoc_STRVAR(name,str) PyDoc_VAR(name) = PyDoc_STR(str)
26#ifdef WITH_DOC_STRINGS
27#define PyDoc_STR(str) str
28#else
29#define PyDoc_STR(str) ""
30#endif
31#endif /* ifndef PyDoc_STRVAR */
32
33#ifndef PyMODINIT_FUNC
34# if defined(__cplusplus)
35# define PyMODINIT_FUNC extern "C" void
36# else /* __cplusplus */
37# define PyMODINIT_FUNC void
38# endif /* __cplusplus */
39#endif
40/* end 2.2 compatibility macros */
41
Andrew McNamara37d2bdf2005-01-10 12:22:48 +000042#define IS_BASESTRING(o) \
43 PyObject_TypeCheck(o, &PyBaseString_Type)
44
Skip Montanarob4a04172003-03-20 23:29:12 +000045static PyObject *error_obj; /* CSV exception */
46static PyObject *dialects; /* Dialect registry */
47
48typedef enum {
49 START_RECORD, START_FIELD, ESCAPED_CHAR, IN_FIELD,
50 IN_QUOTED_FIELD, ESCAPE_IN_QUOTED_FIELD, QUOTE_IN_QUOTED_FIELD
51} ParserState;
52
53typedef enum {
54 QUOTE_MINIMAL, QUOTE_ALL, QUOTE_NONNUMERIC, QUOTE_NONE
55} QuoteStyle;
56
57typedef struct {
58 QuoteStyle style;
59 char *name;
60} StyleDesc;
61
62static StyleDesc quote_styles[] = {
63 { QUOTE_MINIMAL, "QUOTE_MINIMAL" },
64 { QUOTE_ALL, "QUOTE_ALL" },
65 { QUOTE_NONNUMERIC, "QUOTE_NONNUMERIC" },
66 { QUOTE_NONE, "QUOTE_NONE" },
67 { 0 }
68};
69
70typedef struct {
71 PyObject_HEAD
72
73 int doublequote; /* is " represented by ""? */
74 char delimiter; /* field separator */
75 char quotechar; /* quote character */
76 char escapechar; /* escape character */
77 int skipinitialspace; /* ignore spaces following delimiter? */
78 PyObject *lineterminator; /* string to write between records */
Andrew McNamara1196cf12005-01-07 04:42:45 +000079 int quoting; /* style of quoting to write */
Skip Montanarob4a04172003-03-20 23:29:12 +000080
81 int strict; /* raise exception on bad CSV */
82} DialectObj;
83
84staticforward PyTypeObject Dialect_Type;
85
86typedef struct {
87 PyObject_HEAD
88
89 PyObject *input_iter; /* iterate over this for input lines */
90
91 DialectObj *dialect; /* parsing dialect */
92
93 PyObject *fields; /* field list for current record */
94 ParserState state; /* current CSV parse state */
95 char *field; /* build current field in here */
96 int field_size; /* size of allocated buffer */
97 int field_len; /* length of current field */
98 int had_parse_error; /* did we have a parse error? */
99} ReaderObj;
100
101staticforward PyTypeObject Reader_Type;
102
103#define ReaderObject_Check(v) ((v)->ob_type == &Reader_Type)
104
105typedef struct {
106 PyObject_HEAD
107
108 PyObject *writeline; /* write output lines to this file */
109
110 DialectObj *dialect; /* parsing dialect */
111
112 char *rec; /* buffer for parser.join */
113 int rec_size; /* size of allocated record */
114 int rec_len; /* length of record */
115 int num_fields; /* number of fields in record */
116} WriterObj;
117
118staticforward PyTypeObject Writer_Type;
119
120/*
121 * DIALECT class
122 */
123
124static PyObject *
125get_dialect_from_registry(PyObject * name_obj)
126{
127 PyObject *dialect_obj;
128
129 dialect_obj = PyDict_GetItem(dialects, name_obj);
130 if (dialect_obj == NULL)
131 return PyErr_Format(error_obj, "unknown dialect");
132 Py_INCREF(dialect_obj);
133 return dialect_obj;
134}
135
Skip Montanarob4a04172003-03-20 23:29:12 +0000136static PyObject *
137get_string(PyObject *str)
138{
139 Py_XINCREF(str);
140 return str;
141}
142
Skip Montanarob4a04172003-03-20 23:29:12 +0000143static PyObject *
144get_nullchar_as_None(char c)
145{
146 if (c == '\0') {
147 Py_INCREF(Py_None);
148 return Py_None;
149 }
150 else
151 return PyString_FromStringAndSize((char*)&c, 1);
152}
153
Skip Montanarob4a04172003-03-20 23:29:12 +0000154static PyObject *
155Dialect_get_lineterminator(DialectObj *self)
156{
157 return get_string(self->lineterminator);
158}
159
Skip Montanarob4a04172003-03-20 23:29:12 +0000160static PyObject *
161Dialect_get_escapechar(DialectObj *self)
162{
163 return get_nullchar_as_None(self->escapechar);
164}
165
Andrew McNamara1196cf12005-01-07 04:42:45 +0000166static PyObject *
167Dialect_get_quotechar(DialectObj *self)
Skip Montanarob4a04172003-03-20 23:29:12 +0000168{
Andrew McNamara1196cf12005-01-07 04:42:45 +0000169 return get_nullchar_as_None(self->quotechar);
Skip Montanarob4a04172003-03-20 23:29:12 +0000170}
171
172static PyObject *
173Dialect_get_quoting(DialectObj *self)
174{
175 return PyInt_FromLong(self->quoting);
176}
177
178static int
Andrew McNamara1196cf12005-01-07 04:42:45 +0000179_set_bool(const char *name, int *target, PyObject *src, int dflt)
Skip Montanarob4a04172003-03-20 23:29:12 +0000180{
Andrew McNamara1196cf12005-01-07 04:42:45 +0000181 if (src == NULL)
182 *target = dflt;
183 else
184 *target = PyObject_IsTrue(src);
185 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000186}
187
Andrew McNamara1196cf12005-01-07 04:42:45 +0000188static int
189_set_int(const char *name, int *target, PyObject *src, int dflt)
190{
191 if (src == NULL)
192 *target = dflt;
193 else {
194 if (!PyInt_Check(src)) {
195 PyErr_Format(PyExc_TypeError,
196 "\"%s\" must be an integer", name);
197 return -1;
198 }
199 *target = PyInt_AsLong(src);
200 }
201 return 0;
202}
203
204static int
205_set_char(const char *name, char *target, PyObject *src, char dflt)
206{
207 if (src == NULL)
208 *target = dflt;
209 else {
210 if (src == Py_None)
211 *target = '\0';
212 else if (!PyString_Check(src) || PyString_Size(src) != 1) {
213 PyErr_Format(PyExc_TypeError,
214 "\"%s\" must be an 1-character string",
215 name);
216 return -1;
217 }
218 else {
219 char *s = PyString_AsString(src);
220 if (s == NULL)
221 return -1;
222 *target = s[0];
223 }
224 }
225 return 0;
226}
227
228static int
229_set_str(const char *name, PyObject **target, PyObject *src, const char *dflt)
230{
231 if (src == NULL)
232 *target = PyString_FromString(dflt);
233 else {
234 if (src == Py_None)
235 *target = NULL;
Andrew McNamara37d2bdf2005-01-10 12:22:48 +0000236 else if (!IS_BASESTRING(src)) {
Andrew McNamara1196cf12005-01-07 04:42:45 +0000237 PyErr_Format(PyExc_TypeError,
238 "\"%s\" must be an string", name);
239 return -1;
Andrew McNamaradd3e6cb2005-01-07 06:46:50 +0000240 }
241 else {
Andrew McNamara1196cf12005-01-07 04:42:45 +0000242 Py_XDECREF(*target);
243 Py_INCREF(src);
244 *target = src;
245 }
246 }
247 return 0;
248}
249
250static int
251dialect_check_quoting(int quoting)
252{
253 StyleDesc *qs = quote_styles;
254
255 for (qs = quote_styles; qs->name; qs++) {
256 if (qs->style == quoting)
257 return 0;
258 }
259 PyErr_Format(PyExc_TypeError, "bad \"quoting\" value");
260 return -1;
261}
Skip Montanarob4a04172003-03-20 23:29:12 +0000262
263#define D_OFF(x) offsetof(DialectObj, x)
264
265static struct PyMemberDef Dialect_memberlist[] = {
Andrew McNamara1196cf12005-01-07 04:42:45 +0000266 { "delimiter", T_CHAR, D_OFF(delimiter), READONLY },
267 { "skipinitialspace", T_INT, D_OFF(skipinitialspace), READONLY },
268 { "doublequote", T_INT, D_OFF(doublequote), READONLY },
269 { "strict", T_INT, D_OFF(strict), READONLY },
Skip Montanarob4a04172003-03-20 23:29:12 +0000270 { NULL }
271};
272
273static PyGetSetDef Dialect_getsetlist[] = {
Andrew McNamara1196cf12005-01-07 04:42:45 +0000274 { "escapechar", (getter)Dialect_get_escapechar},
275 { "lineterminator", (getter)Dialect_get_lineterminator},
276 { "quotechar", (getter)Dialect_get_quotechar},
277 { "quoting", (getter)Dialect_get_quoting},
278 {NULL},
Skip Montanarob4a04172003-03-20 23:29:12 +0000279};
280
281static void
282Dialect_dealloc(DialectObj *self)
283{
284 Py_XDECREF(self->lineterminator);
Skip Montanarob4a04172003-03-20 23:29:12 +0000285 self->ob_type->tp_free((PyObject *)self);
286}
287
Andrew McNamara1196cf12005-01-07 04:42:45 +0000288/*
289 * Return a new reference to a dialect instance
290 *
291 * If given a string, looks up the name in our dialect registry
292 * If given a class, instantiate (which runs python validity checks)
293 * If given an instance, return a new reference to the instance
294 */
295static PyObject *
296dialect_instantiate(PyObject *dialect)
297{
298 Py_INCREF(dialect);
299 /* If dialect is a string, look it up in our registry */
Andrew McNamara37d2bdf2005-01-10 12:22:48 +0000300 if (IS_BASESTRING(dialect)) {
Andrew McNamara1196cf12005-01-07 04:42:45 +0000301 PyObject * new_dia;
302 new_dia = get_dialect_from_registry(dialect);
303 Py_DECREF(dialect);
304 return new_dia;
305 }
306 /* A class rather than an instance? Instantiate */
307 if (PyObject_TypeCheck(dialect, &PyClass_Type)) {
308 PyObject * new_dia;
309 new_dia = PyObject_CallFunction(dialect, "");
310 Py_DECREF(dialect);
311 return new_dia;
312 }
313 /* Make sure we finally have an instance */
314 if (!PyInstance_Check(dialect)) {
315 PyErr_SetString(PyExc_TypeError, "dialect must be an instance");
316 Py_DECREF(dialect);
317 return NULL;
318 }
319 return dialect;
320}
321
322static char *dialect_kws[] = {
323 "dialect",
324 "delimiter",
325 "doublequote",
326 "escapechar",
327 "lineterminator",
328 "quotechar",
329 "quoting",
330 "skipinitialspace",
331 "strict",
332 NULL
333};
334
Skip Montanarob4a04172003-03-20 23:29:12 +0000335static int
336dialect_init(DialectObj * self, PyObject * args, PyObject * kwargs)
337{
Andrew McNamara1196cf12005-01-07 04:42:45 +0000338 int ret = -1;
339 PyObject *dialect = NULL;
340 PyObject *delimiter = NULL;
341 PyObject *doublequote = NULL;
342 PyObject *escapechar = NULL;
343 PyObject *lineterminator = NULL;
344 PyObject *quotechar = NULL;
345 PyObject *quoting = NULL;
346 PyObject *skipinitialspace = NULL;
347 PyObject *strict = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000348
Andrew McNamara1196cf12005-01-07 04:42:45 +0000349 if (!PyArg_ParseTupleAndKeywords(args, kwargs,
350 "|OOOOOOOOO", dialect_kws,
351 &dialect,
352 &delimiter,
353 &doublequote,
354 &escapechar,
355 &lineterminator,
356 &quotechar,
357 &quoting,
358 &skipinitialspace,
359 &strict))
Skip Montanarob4a04172003-03-20 23:29:12 +0000360 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000361
Andrew McNamara1196cf12005-01-07 04:42:45 +0000362 Py_XINCREF(delimiter);
363 Py_XINCREF(doublequote);
364 Py_XINCREF(escapechar);
365 Py_XINCREF(lineterminator);
366 Py_XINCREF(quotechar);
367 Py_XINCREF(quoting);
368 Py_XINCREF(skipinitialspace);
369 Py_XINCREF(strict);
370 if (dialect != NULL) {
371 dialect = dialect_instantiate(dialect);
372 if (dialect == NULL)
373 goto err;
374#define DIALECT_GETATTR(v, n) \
375 if (v == NULL) \
376 v = PyObject_GetAttrString(dialect, n)
Skip Montanarob4a04172003-03-20 23:29:12 +0000377
Andrew McNamara1196cf12005-01-07 04:42:45 +0000378 DIALECT_GETATTR(delimiter, "delimiter");
379 DIALECT_GETATTR(doublequote, "doublequote");
380 DIALECT_GETATTR(escapechar, "escapechar");
381 DIALECT_GETATTR(lineterminator, "lineterminator");
382 DIALECT_GETATTR(quotechar, "quotechar");
383 DIALECT_GETATTR(quoting, "quoting");
384 DIALECT_GETATTR(skipinitialspace, "skipinitialspace");
385 DIALECT_GETATTR(strict, "strict");
386 PyErr_Clear();
387 Py_DECREF(dialect);
388 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000389
Andrew McNamara1196cf12005-01-07 04:42:45 +0000390 /* check types and convert to C values */
391#define DIASET(meth, name, target, src, dflt) \
392 if (meth(name, target, src, dflt)) \
393 goto err
394 DIASET(_set_char, "delimiter", &self->delimiter, delimiter, ',');
395 DIASET(_set_bool, "doublequote", &self->doublequote, doublequote, 1);
396 DIASET(_set_char, "escapechar", &self->escapechar, escapechar, 0);
397 DIASET(_set_str, "lineterminator", &self->lineterminator, lineterminator, "\r\n");
398 DIASET(_set_char, "quotechar", &self->quotechar, quotechar, '"');
399 DIASET(_set_int, "quoting", &self->quoting, quoting, QUOTE_MINIMAL);
400 DIASET(_set_bool, "skipinitialspace", &self->skipinitialspace, skipinitialspace, 0);
401 DIASET(_set_bool, "strict", &self->strict, strict, 0);
Skip Montanarob4a04172003-03-20 23:29:12 +0000402
Andrew McNamara1196cf12005-01-07 04:42:45 +0000403 /* validate options */
404 if (dialect_check_quoting(self->quoting))
405 goto err;
406 if (self->delimiter == 0) {
407 PyErr_SetString(PyExc_TypeError, "delimiter must be set");
408 goto err;
409 }
410 if (quotechar == Py_None && self->quoting != QUOTE_NONE)
411 self->quoting = QUOTE_NONE;
412 if (self->quoting != QUOTE_NONE && self->quotechar == 0) {
413 PyErr_SetString(PyExc_TypeError,
414 "quotechar must be set if quoting enabled");
415 goto err;
416 }
417 if (self->lineterminator == 0) {
418 PyErr_SetString(PyExc_TypeError, "lineterminator must be set");
419 goto err;
420 }
421
422 ret = 0;
423err:
424 Py_XDECREF(delimiter);
425 Py_XDECREF(doublequote);
426 Py_XDECREF(escapechar);
427 Py_XDECREF(lineterminator);
428 Py_XDECREF(quotechar);
429 Py_XDECREF(quoting);
430 Py_XDECREF(skipinitialspace);
431 Py_XDECREF(strict);
432 return ret;
Skip Montanarob4a04172003-03-20 23:29:12 +0000433}
434
435static PyObject *
436dialect_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
437{
438 DialectObj *self;
439 self = (DialectObj *)type->tp_alloc(type, 0);
440 if (self != NULL) {
441 self->lineterminator = NULL;
442 }
443 return (PyObject *)self;
444}
445
446
447PyDoc_STRVAR(Dialect_Type_doc,
448"CSV dialect\n"
449"\n"
450"The Dialect type records CSV parsing and generation options.\n");
451
452static PyTypeObject Dialect_Type = {
453 PyObject_HEAD_INIT(NULL)
454 0, /* ob_size */
455 "_csv.Dialect", /* tp_name */
456 sizeof(DialectObj), /* tp_basicsize */
457 0, /* tp_itemsize */
458 /* methods */
459 (destructor)Dialect_dealloc, /* tp_dealloc */
460 (printfunc)0, /* tp_print */
461 (getattrfunc)0, /* tp_getattr */
462 (setattrfunc)0, /* tp_setattr */
463 (cmpfunc)0, /* tp_compare */
464 (reprfunc)0, /* tp_repr */
465 0, /* tp_as_number */
466 0, /* tp_as_sequence */
467 0, /* tp_as_mapping */
468 (hashfunc)0, /* tp_hash */
469 (ternaryfunc)0, /* tp_call */
470 (reprfunc)0, /* tp_str */
471 0, /* tp_getattro */
472 0, /* tp_setattro */
473 0, /* tp_as_buffer */
474 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
475 Dialect_Type_doc, /* tp_doc */
476 0, /* tp_traverse */
477 0, /* tp_clear */
478 0, /* tp_richcompare */
479 0, /* tp_weaklistoffset */
480 0, /* tp_iter */
481 0, /* tp_iternext */
Andrew McNamara1196cf12005-01-07 04:42:45 +0000482 0, /* tp_methods */
Skip Montanarob4a04172003-03-20 23:29:12 +0000483 Dialect_memberlist, /* tp_members */
484 Dialect_getsetlist, /* tp_getset */
485 0, /* tp_base */
486 0, /* tp_dict */
487 0, /* tp_descr_get */
488 0, /* tp_descr_set */
489 0, /* tp_dictoffset */
490 (initproc)dialect_init, /* tp_init */
491 PyType_GenericAlloc, /* tp_alloc */
492 dialect_new, /* tp_new */
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000493 0, /* tp_free */
Skip Montanarob4a04172003-03-20 23:29:12 +0000494};
495
496static void
497parse_save_field(ReaderObj *self)
498{
499 PyObject *field;
500
501 field = PyString_FromStringAndSize(self->field, self->field_len);
502 if (field != NULL) {
503 PyList_Append(self->fields, field);
504 Py_XDECREF(field);
505 }
506 self->field_len = 0;
507}
508
509static int
510parse_grow_buff(ReaderObj *self)
511{
512 if (self->field_size == 0) {
513 self->field_size = 4096;
Andrew McNamaradcfb38c2003-06-09 05:59:23 +0000514 if (self->field != NULL)
515 PyMem_Free(self->field);
Skip Montanarob4a04172003-03-20 23:29:12 +0000516 self->field = PyMem_Malloc(self->field_size);
517 }
518 else {
519 self->field_size *= 2;
520 self->field = PyMem_Realloc(self->field, self->field_size);
521 }
522 if (self->field == NULL) {
523 PyErr_NoMemory();
524 return 0;
525 }
526 return 1;
527}
528
529static void
530parse_add_char(ReaderObj *self, char c)
531{
532 if (self->field_len == self->field_size && !parse_grow_buff(self))
533 return;
534 self->field[self->field_len++] = c;
535}
536
537static void
538parse_process_char(ReaderObj *self, char c)
539{
540 DialectObj *dialect = self->dialect;
541
542 switch (self->state) {
543 case START_RECORD:
544 /* start of record */
545 if (c == '\n')
546 /* empty line - return [] */
547 break;
548 /* normal character - handle as START_FIELD */
549 self->state = START_FIELD;
550 /* fallthru */
551 case START_FIELD:
552 /* expecting field */
553 if (c == '\n') {
554 /* save empty field - return [fields] */
555 parse_save_field(self);
556 self->state = START_RECORD;
557 }
Andrew McNamara1196cf12005-01-07 04:42:45 +0000558 else if (c == dialect->quotechar &&
559 dialect->quoting != QUOTE_NONE) {
Skip Montanarob4a04172003-03-20 23:29:12 +0000560 /* start quoted field */
561 self->state = IN_QUOTED_FIELD;
562 }
563 else if (c == dialect->escapechar) {
564 /* possible escaped character */
565 self->state = ESCAPED_CHAR;
566 }
567 else if (c == ' ' && dialect->skipinitialspace)
568 /* ignore space at start of field */
569 ;
570 else if (c == dialect->delimiter) {
571 /* save empty field */
572 parse_save_field(self);
573 }
574 else {
575 /* begin new unquoted field */
576 parse_add_char(self, c);
577 self->state = IN_FIELD;
578 }
579 break;
580
581 case ESCAPED_CHAR:
Skip Montanarob4a04172003-03-20 23:29:12 +0000582 parse_add_char(self, c);
583 self->state = IN_FIELD;
584 break;
585
586 case IN_FIELD:
587 /* in unquoted field */
588 if (c == '\n') {
589 /* end of line - return [fields] */
590 parse_save_field(self);
591 self->state = START_RECORD;
592 }
593 else if (c == dialect->escapechar) {
594 /* possible escaped character */
595 self->state = ESCAPED_CHAR;
596 }
597 else if (c == dialect->delimiter) {
598 /* save field - wait for new field */
599 parse_save_field(self);
600 self->state = START_FIELD;
601 }
602 else {
603 /* normal character - save in field */
604 parse_add_char(self, c);
605 }
606 break;
607
608 case IN_QUOTED_FIELD:
609 /* in quoted field */
610 if (c == '\n') {
611 /* end of line - save '\n' in field */
612 parse_add_char(self, '\n');
613 }
614 else if (c == dialect->escapechar) {
615 /* Possible escape character */
616 self->state = ESCAPE_IN_QUOTED_FIELD;
617 }
Andrew McNamara1196cf12005-01-07 04:42:45 +0000618 else if (c == dialect->quotechar &&
619 dialect->quoting != QUOTE_NONE) {
Skip Montanarob4a04172003-03-20 23:29:12 +0000620 if (dialect->doublequote) {
621 /* doublequote; " represented by "" */
622 self->state = QUOTE_IN_QUOTED_FIELD;
623 }
624 else {
625 /* end of quote part of field */
626 self->state = IN_FIELD;
627 }
628 }
629 else {
630 /* normal character - save in field */
631 parse_add_char(self, c);
632 }
633 break;
634
635 case ESCAPE_IN_QUOTED_FIELD:
Skip Montanarob4a04172003-03-20 23:29:12 +0000636 parse_add_char(self, c);
637 self->state = IN_QUOTED_FIELD;
638 break;
639
640 case QUOTE_IN_QUOTED_FIELD:
641 /* doublequote - seen a quote in an quoted field */
642 if (dialect->quoting != QUOTE_NONE &&
643 c == dialect->quotechar) {
644 /* save "" as " */
645 parse_add_char(self, c);
646 self->state = IN_QUOTED_FIELD;
647 }
648 else if (c == dialect->delimiter) {
649 /* save field - wait for new field */
650 parse_save_field(self);
651 self->state = START_FIELD;
652 }
653 else if (c == '\n') {
654 /* end of line - return [fields] */
655 parse_save_field(self);
656 self->state = START_RECORD;
657 }
658 else if (!dialect->strict) {
659 parse_add_char(self, c);
660 self->state = IN_FIELD;
661 }
662 else {
663 /* illegal */
664 self->had_parse_error = 1;
665 PyErr_Format(error_obj, "%c expected after %c",
666 dialect->delimiter,
667 dialect->quotechar);
668 }
669 break;
670
671 }
672}
673
674/*
675 * READER
676 */
677#define R_OFF(x) offsetof(ReaderObj, x)
678
679static struct PyMemberDef Reader_memberlist[] = {
680 { "dialect", T_OBJECT, R_OFF(dialect), RO },
681 { NULL }
682};
683
684static PyObject *
Skip Montanarob4a04172003-03-20 23:29:12 +0000685Reader_iternext(ReaderObj *self)
686{
687 PyObject *lineobj;
688 PyObject *fields;
689 char *line;
690
691 do {
692 lineobj = PyIter_Next(self->input_iter);
693 if (lineobj == NULL) {
694 /* End of input OR exception */
695 if (!PyErr_Occurred() && self->field_len != 0)
696 return PyErr_Format(error_obj,
697 "newline inside string");
698 return NULL;
699 }
700
701 if (self->had_parse_error) {
702 if (self->fields) {
703 Py_XDECREF(self->fields);
704 }
705 self->fields = PyList_New(0);
706 self->field_len = 0;
707 self->state = START_RECORD;
708 self->had_parse_error = 0;
709 }
710 line = PyString_AsString(lineobj);
711
712 if (line == NULL) {
713 Py_DECREF(lineobj);
714 return NULL;
715 }
Tim Petersef4b7ed2003-03-21 01:35:28 +0000716 if (strlen(line) < (size_t)PyString_GET_SIZE(lineobj)) {
Skip Montanarob4a04172003-03-20 23:29:12 +0000717 self->had_parse_error = 1;
718 Py_DECREF(lineobj);
719 return PyErr_Format(error_obj,
720 "string with NUL bytes");
721 }
722
723 /* Process line of text - send '\n' to processing code to
724 represent end of line. End of line which is not at end of
725 string is an error. */
726 while (*line) {
727 char c;
728
729 c = *line++;
730 if (c == '\r') {
731 c = *line++;
732 if (c == '\0')
733 /* macintosh end of line */
734 break;
735 if (c == '\n') {
736 c = *line++;
737 if (c == '\0')
738 /* DOS end of line */
739 break;
740 }
741 self->had_parse_error = 1;
742 Py_DECREF(lineobj);
743 return PyErr_Format(error_obj,
744 "newline inside string");
745 }
746 if (c == '\n') {
747 c = *line++;
748 if (c == '\0')
749 /* unix end of line */
750 break;
751 self->had_parse_error = 1;
752 Py_DECREF(lineobj);
753 return PyErr_Format(error_obj,
754 "newline inside string");
755 }
756 parse_process_char(self, c);
757 if (PyErr_Occurred()) {
758 Py_DECREF(lineobj);
759 return NULL;
760 }
761 }
762 parse_process_char(self, '\n');
763 Py_DECREF(lineobj);
764 } while (self->state != START_RECORD);
765
766 fields = self->fields;
767 self->fields = PyList_New(0);
768 return fields;
769}
770
771static void
772Reader_dealloc(ReaderObj *self)
773{
Andrew McNamara77ead872005-01-10 02:09:41 +0000774 PyObject_GC_UnTrack(self);
Skip Montanarob4a04172003-03-20 23:29:12 +0000775 Py_XDECREF(self->dialect);
776 Py_XDECREF(self->input_iter);
777 Py_XDECREF(self->fields);
Andrew McNamaradcfb38c2003-06-09 05:59:23 +0000778 if (self->field != NULL)
779 PyMem_Free(self->field);
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000780 PyObject_GC_Del(self);
781}
782
783static int
784Reader_traverse(ReaderObj *self, visitproc visit, void *arg)
785{
786 int err;
787#define VISIT(SLOT) \
788 if (SLOT) { \
789 err = visit((PyObject *)(SLOT), arg); \
790 if (err) \
791 return err; \
792 }
793 VISIT(self->dialect);
794 VISIT(self->input_iter);
795 VISIT(self->fields);
796 return 0;
797}
798
799static int
800Reader_clear(ReaderObj *self)
801{
802 Py_XDECREF(self->dialect);
803 Py_XDECREF(self->input_iter);
804 Py_XDECREF(self->fields);
805 self->dialect = NULL;
806 self->input_iter = NULL;
807 self->fields = NULL;
808 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000809}
810
811PyDoc_STRVAR(Reader_Type_doc,
812"CSV reader\n"
813"\n"
814"Reader objects are responsible for reading and parsing tabular data\n"
815"in CSV format.\n"
816);
817
818static struct PyMethodDef Reader_methods[] = {
819 { NULL, NULL }
820};
821
822static PyTypeObject Reader_Type = {
823 PyObject_HEAD_INIT(NULL)
824 0, /*ob_size*/
825 "_csv.reader", /*tp_name*/
826 sizeof(ReaderObj), /*tp_basicsize*/
827 0, /*tp_itemsize*/
828 /* methods */
829 (destructor)Reader_dealloc, /*tp_dealloc*/
830 (printfunc)0, /*tp_print*/
831 (getattrfunc)0, /*tp_getattr*/
832 (setattrfunc)0, /*tp_setattr*/
833 (cmpfunc)0, /*tp_compare*/
834 (reprfunc)0, /*tp_repr*/
835 0, /*tp_as_number*/
836 0, /*tp_as_sequence*/
837 0, /*tp_as_mapping*/
838 (hashfunc)0, /*tp_hash*/
839 (ternaryfunc)0, /*tp_call*/
840 (reprfunc)0, /*tp_str*/
841 0, /*tp_getattro*/
842 0, /*tp_setattro*/
843 0, /*tp_as_buffer*/
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000844 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
845 Py_TPFLAGS_HAVE_GC, /*tp_flags*/
Skip Montanarob4a04172003-03-20 23:29:12 +0000846 Reader_Type_doc, /*tp_doc*/
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000847 (traverseproc)Reader_traverse, /*tp_traverse*/
848 (inquiry)Reader_clear, /*tp_clear*/
Skip Montanarob4a04172003-03-20 23:29:12 +0000849 0, /*tp_richcompare*/
850 0, /*tp_weaklistoffset*/
Andrew McNamara575a00b2005-01-06 02:25:41 +0000851 PyObject_SelfIter, /*tp_iter*/
Skip Montanarob4a04172003-03-20 23:29:12 +0000852 (getiterfunc)Reader_iternext, /*tp_iternext*/
853 Reader_methods, /*tp_methods*/
854 Reader_memberlist, /*tp_members*/
855 0, /*tp_getset*/
856
857};
858
859static PyObject *
860csv_reader(PyObject *module, PyObject *args, PyObject *keyword_args)
861{
862 PyObject * iterator, * dialect = NULL, *ctor_args;
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000863 ReaderObj * self = PyObject_GC_New(ReaderObj, &Reader_Type);
Skip Montanarob4a04172003-03-20 23:29:12 +0000864
865 if (!self)
866 return NULL;
867
868 self->dialect = NULL;
869 self->input_iter = self->fields = NULL;
870
871 self->fields = NULL;
872 self->input_iter = NULL;
873 self->had_parse_error = 0;
874 self->field = NULL;
875 self->field_size = 0;
876 self->field_len = 0;
877 self->state = START_RECORD;
878
Raymond Hettinger1761a7c2004-06-20 04:23:19 +0000879 if (!PyArg_UnpackTuple(args, "", 1, 2, &iterator, &dialect)) {
Skip Montanarob4a04172003-03-20 23:29:12 +0000880 Py_DECREF(self);
881 return NULL;
882 }
883 self->input_iter = PyObject_GetIter(iterator);
884 if (self->input_iter == NULL) {
885 PyErr_SetString(PyExc_TypeError,
886 "argument 1 must be an iterator");
887 Py_DECREF(self);
888 return NULL;
889 }
890 ctor_args = Py_BuildValue(dialect ? "(O)" : "()", dialect);
891 if (ctor_args == NULL) {
892 Py_DECREF(self);
893 return NULL;
894 }
895 self->dialect = (DialectObj *)PyObject_Call((PyObject *)&Dialect_Type,
896 ctor_args, keyword_args);
897 Py_DECREF(ctor_args);
898 if (self->dialect == NULL) {
899 Py_DECREF(self);
900 return NULL;
901 }
902 self->fields = PyList_New(0);
903 if (self->fields == NULL) {
904 Py_DECREF(self);
905 return NULL;
906 }
907
Andrew McNamara77ead872005-01-10 02:09:41 +0000908 PyObject_GC_Track(self);
Skip Montanarob4a04172003-03-20 23:29:12 +0000909 return (PyObject *)self;
910}
911
912/*
913 * WRITER
914 */
915/* ---------------------------------------------------------------- */
916static void
917join_reset(WriterObj *self)
918{
919 self->rec_len = 0;
920 self->num_fields = 0;
921}
922
923#define MEM_INCR 32768
924
925/* Calculate new record length or append field to record. Return new
926 * record length.
927 */
928static int
929join_append_data(WriterObj *self, char *field, int quote_empty,
930 int *quoted, int copy_phase)
931{
932 DialectObj *dialect = self->dialect;
933 int i, rec_len;
934
935 rec_len = self->rec_len;
936
937 /* If this is not the first field we need a field separator.
938 */
939 if (self->num_fields > 0) {
940 if (copy_phase)
941 self->rec[rec_len] = dialect->delimiter;
942 rec_len++;
943 }
944 /* Handle preceding quote.
945 */
946 switch (dialect->quoting) {
947 case QUOTE_ALL:
948 *quoted = 1;
949 if (copy_phase)
950 self->rec[rec_len] = dialect->quotechar;
951 rec_len++;
952 break;
953 case QUOTE_MINIMAL:
954 case QUOTE_NONNUMERIC:
955 /* We only know about quoted in the copy phase.
956 */
957 if (copy_phase && *quoted) {
958 self->rec[rec_len] = dialect->quotechar;
959 rec_len++;
960 }
961 break;
962 case QUOTE_NONE:
963 break;
964 }
965 /* Copy/count field data.
966 */
967 for (i = 0;; i++) {
968 char c = field[i];
969
970 if (c == '\0')
971 break;
972 /* If in doublequote mode we escape quote chars with a
973 * quote.
974 */
975 if (dialect->quoting != QUOTE_NONE &&
976 c == dialect->quotechar && dialect->doublequote) {
977 if (copy_phase)
978 self->rec[rec_len] = dialect->quotechar;
979 *quoted = 1;
980 rec_len++;
981 }
982
983 /* Some special characters need to be escaped. If we have a
984 * quote character switch to quoted field instead of escaping
985 * individual characters.
986 */
987 if (!*quoted
988 && (c == dialect->delimiter ||
989 c == dialect->escapechar ||
990 c == '\n' || c == '\r')) {
991 if (dialect->quoting != QUOTE_NONE)
992 *quoted = 1;
993 else if (dialect->escapechar) {
994 if (copy_phase)
995 self->rec[rec_len] = dialect->escapechar;
996 rec_len++;
997 }
998 else {
999 PyErr_Format(error_obj,
1000 "delimiter must be quoted or escaped");
1001 return -1;
1002 }
1003 }
1004 /* Copy field character into record buffer.
1005 */
1006 if (copy_phase)
1007 self->rec[rec_len] = c;
1008 rec_len++;
1009 }
1010
1011 /* If field is empty check if it needs to be quoted.
1012 */
1013 if (i == 0 && quote_empty) {
1014 if (dialect->quoting == QUOTE_NONE) {
1015 PyErr_Format(error_obj,
1016 "single empty field record must be quoted");
1017 return -1;
Andrew McNamaradd3e6cb2005-01-07 06:46:50 +00001018 }
1019 else
Skip Montanarob4a04172003-03-20 23:29:12 +00001020 *quoted = 1;
1021 }
1022
1023 /* Handle final quote character on field.
1024 */
1025 if (*quoted) {
1026 if (copy_phase)
1027 self->rec[rec_len] = dialect->quotechar;
1028 else
1029 /* Didn't know about leading quote until we found it
1030 * necessary in field data - compensate for it now.
1031 */
1032 rec_len++;
1033 rec_len++;
1034 }
1035
1036 return rec_len;
1037}
1038
1039static int
1040join_check_rec_size(WriterObj *self, int rec_len)
1041{
1042 if (rec_len > self->rec_size) {
1043 if (self->rec_size == 0) {
1044 self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR;
Andrew McNamaradcfb38c2003-06-09 05:59:23 +00001045 if (self->rec != NULL)
1046 PyMem_Free(self->rec);
Skip Montanarob4a04172003-03-20 23:29:12 +00001047 self->rec = PyMem_Malloc(self->rec_size);
1048 }
1049 else {
1050 char *old_rec = self->rec;
1051
1052 self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR;
1053 self->rec = PyMem_Realloc(self->rec, self->rec_size);
1054 if (self->rec == NULL)
1055 PyMem_Free(old_rec);
1056 }
1057 if (self->rec == NULL) {
1058 PyErr_NoMemory();
1059 return 0;
1060 }
1061 }
1062 return 1;
1063}
1064
1065static int
1066join_append(WriterObj *self, char *field, int *quoted, int quote_empty)
1067{
1068 int rec_len;
1069
1070 rec_len = join_append_data(self, field, quote_empty, quoted, 0);
1071 if (rec_len < 0)
1072 return 0;
1073
1074 /* grow record buffer if necessary */
1075 if (!join_check_rec_size(self, rec_len))
1076 return 0;
1077
1078 self->rec_len = join_append_data(self, field, quote_empty, quoted, 1);
1079 self->num_fields++;
1080
1081 return 1;
1082}
1083
1084static int
1085join_append_lineterminator(WriterObj *self)
1086{
1087 int terminator_len;
1088
1089 terminator_len = PyString_Size(self->dialect->lineterminator);
1090
1091 /* grow record buffer if necessary */
1092 if (!join_check_rec_size(self, self->rec_len + terminator_len))
1093 return 0;
1094
1095 memmove(self->rec + self->rec_len,
Skip Montanaro577c7a72003-04-12 19:17:14 +00001096 /* should not be NULL */
Skip Montanarob4a04172003-03-20 23:29:12 +00001097 PyString_AsString(self->dialect->lineterminator),
1098 terminator_len);
1099 self->rec_len += terminator_len;
1100
1101 return 1;
1102}
1103
1104PyDoc_STRVAR(csv_writerow_doc,
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001105"writerow(sequence)\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001106"\n"
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001107"Construct and write a CSV record from a sequence of fields. Non-string\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001108"elements will be converted to string.");
1109
1110static PyObject *
1111csv_writerow(WriterObj *self, PyObject *seq)
1112{
1113 DialectObj *dialect = self->dialect;
1114 int len, i;
1115
1116 if (!PySequence_Check(seq))
1117 return PyErr_Format(error_obj, "sequence expected");
1118
1119 len = PySequence_Length(seq);
1120 if (len < 0)
1121 return NULL;
1122
1123 /* Join all fields in internal buffer.
1124 */
1125 join_reset(self);
1126 for (i = 0; i < len; i++) {
1127 PyObject *field;
1128 int append_ok;
1129 int quoted;
1130
1131 field = PySequence_GetItem(seq, i);
1132 if (field == NULL)
1133 return NULL;
1134
1135 quoted = 0;
1136 if (dialect->quoting == QUOTE_NONNUMERIC) {
1137 PyObject *num;
1138
1139 num = PyNumber_Float(field);
1140 if (num == NULL) {
1141 quoted = 1;
1142 PyErr_Clear();
1143 }
1144 else {
1145 Py_DECREF(num);
1146 }
1147 }
1148
1149 if (PyString_Check(field)) {
Skip Montanaro577c7a72003-04-12 19:17:14 +00001150 append_ok = join_append(self,
1151 PyString_AS_STRING(field),
Skip Montanarob4a04172003-03-20 23:29:12 +00001152 &quoted, len == 1);
1153 Py_DECREF(field);
1154 }
1155 else if (field == Py_None) {
1156 append_ok = join_append(self, "", &quoted, len == 1);
1157 Py_DECREF(field);
1158 }
1159 else {
1160 PyObject *str;
1161
1162 str = PyObject_Str(field);
1163 Py_DECREF(field);
1164 if (str == NULL)
1165 return NULL;
1166
Skip Montanaro577c7a72003-04-12 19:17:14 +00001167 append_ok = join_append(self, PyString_AS_STRING(str),
Skip Montanarob4a04172003-03-20 23:29:12 +00001168 &quoted, len == 1);
1169 Py_DECREF(str);
1170 }
1171 if (!append_ok)
1172 return NULL;
1173 }
1174
1175 /* Add line terminator.
1176 */
1177 if (!join_append_lineterminator(self))
1178 return 0;
1179
1180 return PyObject_CallFunction(self->writeline,
1181 "(s#)", self->rec, self->rec_len);
1182}
1183
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001184PyDoc_STRVAR(csv_writerows_doc,
1185"writerows(sequence of sequences)\n"
1186"\n"
1187"Construct and write a series of sequences to a csv file. Non-string\n"
1188"elements will be converted to string.");
1189
Skip Montanarob4a04172003-03-20 23:29:12 +00001190static PyObject *
1191csv_writerows(WriterObj *self, PyObject *seqseq)
1192{
1193 PyObject *row_iter, *row_obj, *result;
1194
1195 row_iter = PyObject_GetIter(seqseq);
1196 if (row_iter == NULL) {
1197 PyErr_SetString(PyExc_TypeError,
Skip Montanaro98f16e02003-04-11 23:10:13 +00001198 "writerows() argument must be iterable");
Skip Montanarob4a04172003-03-20 23:29:12 +00001199 return NULL;
1200 }
1201 while ((row_obj = PyIter_Next(row_iter))) {
1202 result = csv_writerow(self, row_obj);
1203 Py_DECREF(row_obj);
1204 if (!result) {
1205 Py_DECREF(row_iter);
1206 return NULL;
1207 }
1208 else
1209 Py_DECREF(result);
1210 }
1211 Py_DECREF(row_iter);
1212 if (PyErr_Occurred())
1213 return NULL;
1214 Py_INCREF(Py_None);
1215 return Py_None;
1216}
1217
1218static struct PyMethodDef Writer_methods[] = {
1219 { "writerow", (PyCFunction)csv_writerow, METH_O, csv_writerow_doc},
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001220 { "writerows", (PyCFunction)csv_writerows, METH_O, csv_writerows_doc},
Skip Montanarob4a04172003-03-20 23:29:12 +00001221 { NULL, NULL }
1222};
1223
1224#define W_OFF(x) offsetof(WriterObj, x)
1225
1226static struct PyMemberDef Writer_memberlist[] = {
1227 { "dialect", T_OBJECT, W_OFF(dialect), RO },
1228 { NULL }
1229};
1230
1231static void
1232Writer_dealloc(WriterObj *self)
1233{
Andrew McNamara77ead872005-01-10 02:09:41 +00001234 PyObject_GC_UnTrack(self);
Skip Montanarob4a04172003-03-20 23:29:12 +00001235 Py_XDECREF(self->dialect);
1236 Py_XDECREF(self->writeline);
Andrew McNamaradcfb38c2003-06-09 05:59:23 +00001237 if (self->rec != NULL)
1238 PyMem_Free(self->rec);
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001239 PyObject_GC_Del(self);
1240}
1241
1242static int
1243Writer_traverse(WriterObj *self, visitproc visit, void *arg)
1244{
1245 int err;
1246#define VISIT(SLOT) \
1247 if (SLOT) { \
1248 err = visit((PyObject *)(SLOT), arg); \
1249 if (err) \
1250 return err; \
1251 }
1252 VISIT(self->dialect);
1253 VISIT(self->writeline);
1254 return 0;
1255}
1256
1257static int
1258Writer_clear(WriterObj *self)
1259{
1260 Py_XDECREF(self->dialect);
1261 Py_XDECREF(self->writeline);
1262 self->dialect = NULL;
1263 self->writeline = NULL;
1264 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001265}
1266
1267PyDoc_STRVAR(Writer_Type_doc,
1268"CSV writer\n"
1269"\n"
1270"Writer objects are responsible for generating tabular data\n"
1271"in CSV format from sequence input.\n"
1272);
1273
1274static PyTypeObject Writer_Type = {
1275 PyObject_HEAD_INIT(NULL)
1276 0, /*ob_size*/
1277 "_csv.writer", /*tp_name*/
1278 sizeof(WriterObj), /*tp_basicsize*/
1279 0, /*tp_itemsize*/
1280 /* methods */
1281 (destructor)Writer_dealloc, /*tp_dealloc*/
1282 (printfunc)0, /*tp_print*/
1283 (getattrfunc)0, /*tp_getattr*/
1284 (setattrfunc)0, /*tp_setattr*/
1285 (cmpfunc)0, /*tp_compare*/
1286 (reprfunc)0, /*tp_repr*/
1287 0, /*tp_as_number*/
1288 0, /*tp_as_sequence*/
1289 0, /*tp_as_mapping*/
1290 (hashfunc)0, /*tp_hash*/
1291 (ternaryfunc)0, /*tp_call*/
1292 (reprfunc)0, /*tp_str*/
1293 0, /*tp_getattro*/
1294 0, /*tp_setattro*/
1295 0, /*tp_as_buffer*/
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001296 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
1297 Py_TPFLAGS_HAVE_GC, /*tp_flags*/
Skip Montanarob4a04172003-03-20 23:29:12 +00001298 Writer_Type_doc,
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001299 (traverseproc)Writer_traverse, /*tp_traverse*/
1300 (inquiry)Writer_clear, /*tp_clear*/
Skip Montanarob4a04172003-03-20 23:29:12 +00001301 0, /*tp_richcompare*/
1302 0, /*tp_weaklistoffset*/
1303 (getiterfunc)0, /*tp_iter*/
1304 (getiterfunc)0, /*tp_iternext*/
1305 Writer_methods, /*tp_methods*/
1306 Writer_memberlist, /*tp_members*/
1307 0, /*tp_getset*/
1308};
1309
1310static PyObject *
1311csv_writer(PyObject *module, PyObject *args, PyObject *keyword_args)
1312{
1313 PyObject * output_file, * dialect = NULL, *ctor_args;
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001314 WriterObj * self = PyObject_GC_New(WriterObj, &Writer_Type);
Skip Montanarob4a04172003-03-20 23:29:12 +00001315
1316 if (!self)
1317 return NULL;
1318
1319 self->dialect = NULL;
1320 self->writeline = NULL;
1321
1322 self->rec = NULL;
1323 self->rec_size = 0;
1324 self->rec_len = 0;
1325 self->num_fields = 0;
1326
Raymond Hettinger1761a7c2004-06-20 04:23:19 +00001327 if (!PyArg_UnpackTuple(args, "", 1, 2, &output_file, &dialect)) {
Skip Montanarob4a04172003-03-20 23:29:12 +00001328 Py_DECREF(self);
1329 return NULL;
1330 }
1331 self->writeline = PyObject_GetAttrString(output_file, "write");
1332 if (self->writeline == NULL || !PyCallable_Check(self->writeline)) {
1333 PyErr_SetString(PyExc_TypeError,
1334 "argument 1 must be an instance with a write method");
1335 Py_DECREF(self);
1336 return NULL;
1337 }
1338 ctor_args = Py_BuildValue(dialect ? "(O)" : "()", dialect);
1339 if (ctor_args == NULL) {
1340 Py_DECREF(self);
1341 return NULL;
1342 }
1343 self->dialect = (DialectObj *)PyObject_Call((PyObject *)&Dialect_Type,
1344 ctor_args, keyword_args);
1345 Py_DECREF(ctor_args);
1346 if (self->dialect == NULL) {
1347 Py_DECREF(self);
1348 return NULL;
1349 }
Andrew McNamara77ead872005-01-10 02:09:41 +00001350 PyObject_GC_Track(self);
Skip Montanarob4a04172003-03-20 23:29:12 +00001351 return (PyObject *)self;
1352}
1353
1354/*
1355 * DIALECT REGISTRY
1356 */
1357static PyObject *
1358csv_list_dialects(PyObject *module, PyObject *args)
1359{
1360 return PyDict_Keys(dialects);
1361}
1362
1363static PyObject *
1364csv_register_dialect(PyObject *module, PyObject *args)
1365{
1366 PyObject *name_obj, *dialect_obj;
1367
Raymond Hettinger1761a7c2004-06-20 04:23:19 +00001368 if (!PyArg_UnpackTuple(args, "", 2, 2, &name_obj, &dialect_obj))
Skip Montanarob4a04172003-03-20 23:29:12 +00001369 return NULL;
Andrew McNamara37d2bdf2005-01-10 12:22:48 +00001370 if (!IS_BASESTRING(name_obj)) {
Skip Montanarob4a04172003-03-20 23:29:12 +00001371 PyErr_SetString(PyExc_TypeError,
1372 "dialect name must be a string or unicode");
1373 return NULL;
1374 }
1375 Py_INCREF(dialect_obj);
Andrew McNamara1196cf12005-01-07 04:42:45 +00001376 /* A class rather than an instance? Instantiate */
Skip Montanarob4a04172003-03-20 23:29:12 +00001377 if (PyObject_TypeCheck(dialect_obj, &PyClass_Type)) {
1378 PyObject * new_dia;
1379 new_dia = PyObject_CallFunction(dialect_obj, "");
1380 Py_DECREF(dialect_obj);
1381 if (new_dia == NULL)
1382 return NULL;
1383 dialect_obj = new_dia;
1384 }
1385 /* Make sure we finally have an instance */
1386 if (!PyInstance_Check(dialect_obj)) {
1387 PyErr_SetString(PyExc_TypeError, "dialect must be an instance");
1388 Py_DECREF(dialect_obj);
1389 return NULL;
1390 }
1391 if (PyObject_SetAttrString(dialect_obj, "_name", name_obj) < 0) {
1392 Py_DECREF(dialect_obj);
1393 return NULL;
1394 }
1395 if (PyDict_SetItem(dialects, name_obj, dialect_obj) < 0) {
1396 Py_DECREF(dialect_obj);
1397 return NULL;
1398 }
1399 Py_DECREF(dialect_obj);
1400 Py_INCREF(Py_None);
1401 return Py_None;
1402}
1403
1404static PyObject *
Skip Montanaro577c7a72003-04-12 19:17:14 +00001405csv_unregister_dialect(PyObject *module, PyObject *name_obj)
Skip Montanarob4a04172003-03-20 23:29:12 +00001406{
Skip Montanarob4a04172003-03-20 23:29:12 +00001407 if (PyDict_DelItem(dialects, name_obj) < 0)
1408 return PyErr_Format(error_obj, "unknown dialect");
1409 Py_INCREF(Py_None);
1410 return Py_None;
1411}
1412
1413static PyObject *
Skip Montanaro577c7a72003-04-12 19:17:14 +00001414csv_get_dialect(PyObject *module, PyObject *name_obj)
Skip Montanarob4a04172003-03-20 23:29:12 +00001415{
Skip Montanarob4a04172003-03-20 23:29:12 +00001416 return get_dialect_from_registry(name_obj);
1417}
1418
1419/*
1420 * MODULE
1421 */
1422
1423PyDoc_STRVAR(csv_module_doc,
1424"CSV parsing and writing.\n"
1425"\n"
1426"This module provides classes that assist in the reading and writing\n"
1427"of Comma Separated Value (CSV) files, and implements the interface\n"
1428"described by PEP 305. Although many CSV files are simple to parse,\n"
1429"the format is not formally defined by a stable specification and\n"
1430"is subtle enough that parsing lines of a CSV file with something\n"
1431"like line.split(\",\") is bound to fail. The module supports three\n"
1432"basic APIs: reading, writing, and registration of dialects.\n"
1433"\n"
1434"\n"
1435"DIALECT REGISTRATION:\n"
1436"\n"
1437"Readers and writers support a dialect argument, which is a convenient\n"
1438"handle on a group of settings. When the dialect argument is a string,\n"
1439"it identifies one of the dialects previously registered with the module.\n"
1440"If it is a class or instance, the attributes of the argument are used as\n"
1441"the settings for the reader or writer:\n"
1442"\n"
1443" class excel:\n"
1444" delimiter = ','\n"
1445" quotechar = '\"'\n"
1446" escapechar = None\n"
1447" doublequote = True\n"
1448" skipinitialspace = False\n"
Johannes Gijsbers8d3b9dd2004-08-15 12:23:10 +00001449" lineterminator = '\\r\\n'\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001450" quoting = QUOTE_MINIMAL\n"
1451"\n"
1452"SETTINGS:\n"
1453"\n"
1454" * quotechar - specifies a one-character string to use as the \n"
1455" quoting character. It defaults to '\"'.\n"
1456" * delimiter - specifies a one-character string to use as the \n"
1457" field separator. It defaults to ','.\n"
1458" * skipinitialspace - specifies how to interpret whitespace which\n"
1459" immediately follows a delimiter. It defaults to False, which\n"
1460" means that whitespace immediately following a delimiter is part\n"
1461" of the following field.\n"
1462" * lineterminator - specifies the character sequence which should \n"
1463" terminate rows.\n"
1464" * quoting - controls when quotes should be generated by the writer.\n"
1465" It can take on any of the following module constants:\n"
1466"\n"
1467" csv.QUOTE_MINIMAL means only when required, for example, when a\n"
1468" field contains either the quotechar or the delimiter\n"
1469" csv.QUOTE_ALL means that quotes are always placed around fields.\n"
1470" csv.QUOTE_NONNUMERIC means that quotes are always placed around\n"
Skip Montanaro148eb6a2003-12-02 18:57:47 +00001471" fields which do not parse as integers or floating point\n"
1472" numbers.\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001473" csv.QUOTE_NONE means that quotes are never placed around fields.\n"
1474" * escapechar - specifies a one-character string used to escape \n"
1475" the delimiter when quoting is set to QUOTE_NONE.\n"
1476" * doublequote - controls the handling of quotes inside fields. When\n"
1477" True, two consecutive quotes are interpreted as one during read,\n"
1478" and when writing, each quote character embedded in the data is\n"
1479" written as two quotes\n");
1480
1481PyDoc_STRVAR(csv_reader_doc,
1482" csv_reader = reader(iterable [, dialect='excel']\n"
1483" [optional keyword args])\n"
1484" for row in csv_reader:\n"
1485" process(row)\n"
1486"\n"
1487"The \"iterable\" argument can be any object that returns a line\n"
1488"of input for each iteration, such as a file object or a list. The\n"
1489"optional \"dialect\" parameter is discussed below. The function\n"
1490"also accepts optional keyword arguments which override settings\n"
1491"provided by the dialect.\n"
1492"\n"
1493"The returned object is an iterator. Each iteration returns a row\n"
Johannes Gijsbers8d3b9dd2004-08-15 12:23:10 +00001494"of the CSV file (which can span multiple input lines):\n");
Skip Montanarob4a04172003-03-20 23:29:12 +00001495
1496PyDoc_STRVAR(csv_writer_doc,
1497" csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1498" [optional keyword args])\n"
1499" for row in csv_writer:\n"
1500" csv_writer.writerow(row)\n"
1501"\n"
1502" [or]\n"
1503"\n"
1504" csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1505" [optional keyword args])\n"
1506" csv_writer.writerows(rows)\n"
1507"\n"
1508"The \"fileobj\" argument can be any object that supports the file API.\n");
1509
1510PyDoc_STRVAR(csv_list_dialects_doc,
1511"Return a list of all know dialect names.\n"
1512" names = csv.list_dialects()");
1513
1514PyDoc_STRVAR(csv_get_dialect_doc,
1515"Return the dialect instance associated with name.\n"
1516" dialect = csv.get_dialect(name)");
1517
1518PyDoc_STRVAR(csv_register_dialect_doc,
1519"Create a mapping from a string name to a dialect class.\n"
1520" dialect = csv.register_dialect(name, dialect)");
1521
1522PyDoc_STRVAR(csv_unregister_dialect_doc,
1523"Delete the name/dialect mapping associated with a string name.\n"
1524" csv.unregister_dialect(name)");
1525
1526static struct PyMethodDef csv_methods[] = {
1527 { "reader", (PyCFunction)csv_reader,
1528 METH_VARARGS | METH_KEYWORDS, csv_reader_doc},
1529 { "writer", (PyCFunction)csv_writer,
1530 METH_VARARGS | METH_KEYWORDS, csv_writer_doc},
1531 { "list_dialects", (PyCFunction)csv_list_dialects,
1532 METH_NOARGS, csv_list_dialects_doc},
1533 { "register_dialect", (PyCFunction)csv_register_dialect,
1534 METH_VARARGS, csv_register_dialect_doc},
1535 { "unregister_dialect", (PyCFunction)csv_unregister_dialect,
Skip Montanaro577c7a72003-04-12 19:17:14 +00001536 METH_O, csv_unregister_dialect_doc},
Skip Montanarob4a04172003-03-20 23:29:12 +00001537 { "get_dialect", (PyCFunction)csv_get_dialect,
Skip Montanaro577c7a72003-04-12 19:17:14 +00001538 METH_O, csv_get_dialect_doc},
Skip Montanarob4a04172003-03-20 23:29:12 +00001539 { NULL, NULL }
1540};
1541
1542PyMODINIT_FUNC
1543init_csv(void)
1544{
1545 PyObject *module;
Skip Montanarob4a04172003-03-20 23:29:12 +00001546 StyleDesc *style;
1547
1548 if (PyType_Ready(&Dialect_Type) < 0)
1549 return;
1550
1551 if (PyType_Ready(&Reader_Type) < 0)
1552 return;
1553
1554 if (PyType_Ready(&Writer_Type) < 0)
1555 return;
1556
1557 /* Create the module and add the functions */
1558 module = Py_InitModule3("_csv", csv_methods, csv_module_doc);
1559 if (module == NULL)
1560 return;
1561
1562 /* Add version to the module. */
Skip Montanaro7b01a832003-04-12 19:23:46 +00001563 if (PyModule_AddStringConstant(module, "__version__",
1564 MODULE_VERSION) == -1)
Skip Montanarob4a04172003-03-20 23:29:12 +00001565 return;
1566
1567 /* Add _dialects dictionary */
1568 dialects = PyDict_New();
1569 if (dialects == NULL)
1570 return;
1571 if (PyModule_AddObject(module, "_dialects", dialects))
1572 return;
1573
1574 /* Add quote styles into dictionary */
1575 for (style = quote_styles; style->name; style++) {
Skip Montanaro7b01a832003-04-12 19:23:46 +00001576 if (PyModule_AddIntConstant(module, style->name,
1577 style->style) == -1)
Skip Montanarob4a04172003-03-20 23:29:12 +00001578 return;
1579 }
1580
1581 /* Add the Dialect type */
1582 if (PyModule_AddObject(module, "Dialect", (PyObject *)&Dialect_Type))
1583 return;
1584
1585 /* Add the CSV exception object to the module. */
1586 error_obj = PyErr_NewException("_csv.Error", NULL, NULL);
1587 if (error_obj == NULL)
1588 return;
1589 PyModule_AddObject(module, "Error", error_obj);
1590}