blob: c47e9d607a22b3ee9f0162625ddcf41c69cfbcb4 [file] [log] [blame]
Skip Montanaroa16b21f2003-03-23 14:32:54 +00001/* csv module */
2
3/*
4
5This module provides the low-level underpinnings of a CSV reading/writing
6module. Users should not use this module directly, but import the csv.py
7module instead.
8
9**** For people modifying this code, please note that as of this writing
Skip Montanarodfa35fa2003-04-11 21:40:01 +000010**** (2003-03-23), it is intended that this code should work with Python
Skip Montanaroa16b21f2003-03-23 14:32:54 +000011**** 2.2.
12
Skip Montanarob4a04172003-03-20 23:29:12 +000013*/
14
Skip Montanaro7b01a832003-04-12 19:23:46 +000015#define MODULE_VERSION "1.0"
16
Skip Montanarob4a04172003-03-20 23:29:12 +000017#include "Python.h"
18#include "structmember.h"
19
Skip Montanaroa16b21f2003-03-23 14:32:54 +000020
Skip Montanarob4a04172003-03-20 23:29:12 +000021/* begin 2.2 compatibility macros */
22#ifndef PyDoc_STRVAR
23/* Define macros for inline documentation. */
24#define PyDoc_VAR(name) static char name[]
25#define PyDoc_STRVAR(name,str) PyDoc_VAR(name) = PyDoc_STR(str)
26#ifdef WITH_DOC_STRINGS
27#define PyDoc_STR(str) str
28#else
29#define PyDoc_STR(str) ""
30#endif
31#endif /* ifndef PyDoc_STRVAR */
32
33#ifndef PyMODINIT_FUNC
34# if defined(__cplusplus)
35# define PyMODINIT_FUNC extern "C" void
36# else /* __cplusplus */
37# define PyMODINIT_FUNC void
38# endif /* __cplusplus */
39#endif
40/* end 2.2 compatibility macros */
41
42static PyObject *error_obj; /* CSV exception */
43static PyObject *dialects; /* Dialect registry */
44
45typedef enum {
46 START_RECORD, START_FIELD, ESCAPED_CHAR, IN_FIELD,
47 IN_QUOTED_FIELD, ESCAPE_IN_QUOTED_FIELD, QUOTE_IN_QUOTED_FIELD
48} ParserState;
49
50typedef enum {
51 QUOTE_MINIMAL, QUOTE_ALL, QUOTE_NONNUMERIC, QUOTE_NONE
52} QuoteStyle;
53
54typedef struct {
55 QuoteStyle style;
56 char *name;
57} StyleDesc;
58
59static StyleDesc quote_styles[] = {
60 { QUOTE_MINIMAL, "QUOTE_MINIMAL" },
61 { QUOTE_ALL, "QUOTE_ALL" },
62 { QUOTE_NONNUMERIC, "QUOTE_NONNUMERIC" },
63 { QUOTE_NONE, "QUOTE_NONE" },
64 { 0 }
65};
66
67typedef struct {
68 PyObject_HEAD
69
70 int doublequote; /* is " represented by ""? */
71 char delimiter; /* field separator */
72 char quotechar; /* quote character */
73 char escapechar; /* escape character */
74 int skipinitialspace; /* ignore spaces following delimiter? */
75 PyObject *lineterminator; /* string to write between records */
Andrew McNamara1196cf12005-01-07 04:42:45 +000076 int quoting; /* style of quoting to write */
Skip Montanarob4a04172003-03-20 23:29:12 +000077
78 int strict; /* raise exception on bad CSV */
79} DialectObj;
80
81staticforward PyTypeObject Dialect_Type;
82
83typedef struct {
84 PyObject_HEAD
85
86 PyObject *input_iter; /* iterate over this for input lines */
87
88 DialectObj *dialect; /* parsing dialect */
89
90 PyObject *fields; /* field list for current record */
91 ParserState state; /* current CSV parse state */
92 char *field; /* build current field in here */
93 int field_size; /* size of allocated buffer */
94 int field_len; /* length of current field */
95 int had_parse_error; /* did we have a parse error? */
96} ReaderObj;
97
98staticforward PyTypeObject Reader_Type;
99
100#define ReaderObject_Check(v) ((v)->ob_type == &Reader_Type)
101
102typedef struct {
103 PyObject_HEAD
104
105 PyObject *writeline; /* write output lines to this file */
106
107 DialectObj *dialect; /* parsing dialect */
108
109 char *rec; /* buffer for parser.join */
110 int rec_size; /* size of allocated record */
111 int rec_len; /* length of record */
112 int num_fields; /* number of fields in record */
113} WriterObj;
114
115staticforward PyTypeObject Writer_Type;
116
117/*
118 * DIALECT class
119 */
120
121static PyObject *
122get_dialect_from_registry(PyObject * name_obj)
123{
124 PyObject *dialect_obj;
125
126 dialect_obj = PyDict_GetItem(dialects, name_obj);
127 if (dialect_obj == NULL)
128 return PyErr_Format(error_obj, "unknown dialect");
129 Py_INCREF(dialect_obj);
130 return dialect_obj;
131}
132
Skip Montanarob4a04172003-03-20 23:29:12 +0000133static PyObject *
134get_string(PyObject *str)
135{
136 Py_XINCREF(str);
137 return str;
138}
139
Skip Montanarob4a04172003-03-20 23:29:12 +0000140static PyObject *
141get_nullchar_as_None(char c)
142{
143 if (c == '\0') {
144 Py_INCREF(Py_None);
145 return Py_None;
146 }
147 else
148 return PyString_FromStringAndSize((char*)&c, 1);
149}
150
Skip Montanarob4a04172003-03-20 23:29:12 +0000151static PyObject *
152Dialect_get_lineterminator(DialectObj *self)
153{
154 return get_string(self->lineterminator);
155}
156
Skip Montanarob4a04172003-03-20 23:29:12 +0000157static PyObject *
158Dialect_get_escapechar(DialectObj *self)
159{
160 return get_nullchar_as_None(self->escapechar);
161}
162
Andrew McNamara1196cf12005-01-07 04:42:45 +0000163static PyObject *
164Dialect_get_quotechar(DialectObj *self)
Skip Montanarob4a04172003-03-20 23:29:12 +0000165{
Andrew McNamara1196cf12005-01-07 04:42:45 +0000166 return get_nullchar_as_None(self->quotechar);
Skip Montanarob4a04172003-03-20 23:29:12 +0000167}
168
169static PyObject *
170Dialect_get_quoting(DialectObj *self)
171{
172 return PyInt_FromLong(self->quoting);
173}
174
175static int
Andrew McNamara1196cf12005-01-07 04:42:45 +0000176_set_bool(const char *name, int *target, PyObject *src, int dflt)
Skip Montanarob4a04172003-03-20 23:29:12 +0000177{
Andrew McNamara1196cf12005-01-07 04:42:45 +0000178 if (src == NULL)
179 *target = dflt;
180 else
181 *target = PyObject_IsTrue(src);
182 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000183}
184
Andrew McNamara1196cf12005-01-07 04:42:45 +0000185static int
186_set_int(const char *name, int *target, PyObject *src, int dflt)
187{
188 if (src == NULL)
189 *target = dflt;
190 else {
191 if (!PyInt_Check(src)) {
192 PyErr_Format(PyExc_TypeError,
193 "\"%s\" must be an integer", name);
194 return -1;
195 }
196 *target = PyInt_AsLong(src);
197 }
198 return 0;
199}
200
201static int
202_set_char(const char *name, char *target, PyObject *src, char dflt)
203{
204 if (src == NULL)
205 *target = dflt;
206 else {
207 if (src == Py_None)
208 *target = '\0';
209 else if (!PyString_Check(src) || PyString_Size(src) != 1) {
210 PyErr_Format(PyExc_TypeError,
211 "\"%s\" must be an 1-character string",
212 name);
213 return -1;
214 }
215 else {
216 char *s = PyString_AsString(src);
217 if (s == NULL)
218 return -1;
219 *target = s[0];
220 }
221 }
222 return 0;
223}
224
225static int
226_set_str(const char *name, PyObject **target, PyObject *src, const char *dflt)
227{
228 if (src == NULL)
229 *target = PyString_FromString(dflt);
230 else {
231 if (src == Py_None)
232 *target = NULL;
233 else if (!PyString_Check(src)
234#ifdef Py_USING_UNICODE
235 && !PyUnicode_Check(src)
236#endif
237 ) {
238 PyErr_Format(PyExc_TypeError,
239 "\"%s\" must be an string", name);
240 return -1;
241 } else {
242 Py_XDECREF(*target);
243 Py_INCREF(src);
244 *target = src;
245 }
246 }
247 return 0;
248}
249
250static int
251dialect_check_quoting(int quoting)
252{
253 StyleDesc *qs = quote_styles;
254
255 for (qs = quote_styles; qs->name; qs++) {
256 if (qs->style == quoting)
257 return 0;
258 }
259 PyErr_Format(PyExc_TypeError, "bad \"quoting\" value");
260 return -1;
261}
Skip Montanarob4a04172003-03-20 23:29:12 +0000262
263#define D_OFF(x) offsetof(DialectObj, x)
264
265static struct PyMemberDef Dialect_memberlist[] = {
Andrew McNamara1196cf12005-01-07 04:42:45 +0000266 { "delimiter", T_CHAR, D_OFF(delimiter), READONLY },
267 { "skipinitialspace", T_INT, D_OFF(skipinitialspace), READONLY },
268 { "doublequote", T_INT, D_OFF(doublequote), READONLY },
269 { "strict", T_INT, D_OFF(strict), READONLY },
Skip Montanarob4a04172003-03-20 23:29:12 +0000270 { NULL }
271};
272
273static PyGetSetDef Dialect_getsetlist[] = {
Andrew McNamara1196cf12005-01-07 04:42:45 +0000274 { "escapechar", (getter)Dialect_get_escapechar},
275 { "lineterminator", (getter)Dialect_get_lineterminator},
276 { "quotechar", (getter)Dialect_get_quotechar},
277 { "quoting", (getter)Dialect_get_quoting},
278 {NULL},
Skip Montanarob4a04172003-03-20 23:29:12 +0000279};
280
281static void
282Dialect_dealloc(DialectObj *self)
283{
284 Py_XDECREF(self->lineterminator);
Skip Montanarob4a04172003-03-20 23:29:12 +0000285 self->ob_type->tp_free((PyObject *)self);
286}
287
Andrew McNamara1196cf12005-01-07 04:42:45 +0000288/*
289 * Return a new reference to a dialect instance
290 *
291 * If given a string, looks up the name in our dialect registry
292 * If given a class, instantiate (which runs python validity checks)
293 * If given an instance, return a new reference to the instance
294 */
295static PyObject *
296dialect_instantiate(PyObject *dialect)
297{
298 Py_INCREF(dialect);
299 /* If dialect is a string, look it up in our registry */
300 if (PyString_Check(dialect)
301#ifdef Py_USING_UNICODE
302 || PyUnicode_Check(dialect)
303#endif
304 ) {
305 PyObject * new_dia;
306 new_dia = get_dialect_from_registry(dialect);
307 Py_DECREF(dialect);
308 return new_dia;
309 }
310 /* A class rather than an instance? Instantiate */
311 if (PyObject_TypeCheck(dialect, &PyClass_Type)) {
312 PyObject * new_dia;
313 new_dia = PyObject_CallFunction(dialect, "");
314 Py_DECREF(dialect);
315 return new_dia;
316 }
317 /* Make sure we finally have an instance */
318 if (!PyInstance_Check(dialect)) {
319 PyErr_SetString(PyExc_TypeError, "dialect must be an instance");
320 Py_DECREF(dialect);
321 return NULL;
322 }
323 return dialect;
324}
325
326static char *dialect_kws[] = {
327 "dialect",
328 "delimiter",
329 "doublequote",
330 "escapechar",
331 "lineterminator",
332 "quotechar",
333 "quoting",
334 "skipinitialspace",
335 "strict",
336 NULL
337};
338
Skip Montanarob4a04172003-03-20 23:29:12 +0000339static int
340dialect_init(DialectObj * self, PyObject * args, PyObject * kwargs)
341{
Andrew McNamara1196cf12005-01-07 04:42:45 +0000342 int ret = -1;
343 PyObject *dialect = NULL;
344 PyObject *delimiter = NULL;
345 PyObject *doublequote = NULL;
346 PyObject *escapechar = NULL;
347 PyObject *lineterminator = NULL;
348 PyObject *quotechar = NULL;
349 PyObject *quoting = NULL;
350 PyObject *skipinitialspace = NULL;
351 PyObject *strict = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000352
Andrew McNamara1196cf12005-01-07 04:42:45 +0000353 if (!PyArg_ParseTupleAndKeywords(args, kwargs,
354 "|OOOOOOOOO", dialect_kws,
355 &dialect,
356 &delimiter,
357 &doublequote,
358 &escapechar,
359 &lineterminator,
360 &quotechar,
361 &quoting,
362 &skipinitialspace,
363 &strict))
Skip Montanarob4a04172003-03-20 23:29:12 +0000364 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000365
Andrew McNamara1196cf12005-01-07 04:42:45 +0000366 Py_XINCREF(delimiter);
367 Py_XINCREF(doublequote);
368 Py_XINCREF(escapechar);
369 Py_XINCREF(lineterminator);
370 Py_XINCREF(quotechar);
371 Py_XINCREF(quoting);
372 Py_XINCREF(skipinitialspace);
373 Py_XINCREF(strict);
374 if (dialect != NULL) {
375 dialect = dialect_instantiate(dialect);
376 if (dialect == NULL)
377 goto err;
378#define DIALECT_GETATTR(v, n) \
379 if (v == NULL) \
380 v = PyObject_GetAttrString(dialect, n)
Skip Montanarob4a04172003-03-20 23:29:12 +0000381
Andrew McNamara1196cf12005-01-07 04:42:45 +0000382 DIALECT_GETATTR(delimiter, "delimiter");
383 DIALECT_GETATTR(doublequote, "doublequote");
384 DIALECT_GETATTR(escapechar, "escapechar");
385 DIALECT_GETATTR(lineterminator, "lineterminator");
386 DIALECT_GETATTR(quotechar, "quotechar");
387 DIALECT_GETATTR(quoting, "quoting");
388 DIALECT_GETATTR(skipinitialspace, "skipinitialspace");
389 DIALECT_GETATTR(strict, "strict");
390 PyErr_Clear();
391 Py_DECREF(dialect);
392 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000393
Andrew McNamara1196cf12005-01-07 04:42:45 +0000394 /* check types and convert to C values */
395#define DIASET(meth, name, target, src, dflt) \
396 if (meth(name, target, src, dflt)) \
397 goto err
398 DIASET(_set_char, "delimiter", &self->delimiter, delimiter, ',');
399 DIASET(_set_bool, "doublequote", &self->doublequote, doublequote, 1);
400 DIASET(_set_char, "escapechar", &self->escapechar, escapechar, 0);
401 DIASET(_set_str, "lineterminator", &self->lineterminator, lineterminator, "\r\n");
402 DIASET(_set_char, "quotechar", &self->quotechar, quotechar, '"');
403 DIASET(_set_int, "quoting", &self->quoting, quoting, QUOTE_MINIMAL);
404 DIASET(_set_bool, "skipinitialspace", &self->skipinitialspace, skipinitialspace, 0);
405 DIASET(_set_bool, "strict", &self->strict, strict, 0);
Skip Montanarob4a04172003-03-20 23:29:12 +0000406
Andrew McNamara1196cf12005-01-07 04:42:45 +0000407 /* validate options */
408 if (dialect_check_quoting(self->quoting))
409 goto err;
410 if (self->delimiter == 0) {
411 PyErr_SetString(PyExc_TypeError, "delimiter must be set");
412 goto err;
413 }
414 if (quotechar == Py_None && self->quoting != QUOTE_NONE)
415 self->quoting = QUOTE_NONE;
416 if (self->quoting != QUOTE_NONE && self->quotechar == 0) {
417 PyErr_SetString(PyExc_TypeError,
418 "quotechar must be set if quoting enabled");
419 goto err;
420 }
421 if (self->lineterminator == 0) {
422 PyErr_SetString(PyExc_TypeError, "lineterminator must be set");
423 goto err;
424 }
425
426 ret = 0;
427err:
428 Py_XDECREF(delimiter);
429 Py_XDECREF(doublequote);
430 Py_XDECREF(escapechar);
431 Py_XDECREF(lineterminator);
432 Py_XDECREF(quotechar);
433 Py_XDECREF(quoting);
434 Py_XDECREF(skipinitialspace);
435 Py_XDECREF(strict);
436 return ret;
Skip Montanarob4a04172003-03-20 23:29:12 +0000437}
438
439static PyObject *
440dialect_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
441{
442 DialectObj *self;
443 self = (DialectObj *)type->tp_alloc(type, 0);
444 if (self != NULL) {
445 self->lineterminator = NULL;
446 }
447 return (PyObject *)self;
448}
449
450
451PyDoc_STRVAR(Dialect_Type_doc,
452"CSV dialect\n"
453"\n"
454"The Dialect type records CSV parsing and generation options.\n");
455
456static PyTypeObject Dialect_Type = {
457 PyObject_HEAD_INIT(NULL)
458 0, /* ob_size */
459 "_csv.Dialect", /* tp_name */
460 sizeof(DialectObj), /* tp_basicsize */
461 0, /* tp_itemsize */
462 /* methods */
463 (destructor)Dialect_dealloc, /* tp_dealloc */
464 (printfunc)0, /* tp_print */
465 (getattrfunc)0, /* tp_getattr */
466 (setattrfunc)0, /* tp_setattr */
467 (cmpfunc)0, /* tp_compare */
468 (reprfunc)0, /* tp_repr */
469 0, /* tp_as_number */
470 0, /* tp_as_sequence */
471 0, /* tp_as_mapping */
472 (hashfunc)0, /* tp_hash */
473 (ternaryfunc)0, /* tp_call */
474 (reprfunc)0, /* tp_str */
475 0, /* tp_getattro */
476 0, /* tp_setattro */
477 0, /* tp_as_buffer */
478 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
479 Dialect_Type_doc, /* tp_doc */
480 0, /* tp_traverse */
481 0, /* tp_clear */
482 0, /* tp_richcompare */
483 0, /* tp_weaklistoffset */
484 0, /* tp_iter */
485 0, /* tp_iternext */
Andrew McNamara1196cf12005-01-07 04:42:45 +0000486 0, /* tp_methods */
Skip Montanarob4a04172003-03-20 23:29:12 +0000487 Dialect_memberlist, /* tp_members */
488 Dialect_getsetlist, /* tp_getset */
489 0, /* tp_base */
490 0, /* tp_dict */
491 0, /* tp_descr_get */
492 0, /* tp_descr_set */
493 0, /* tp_dictoffset */
494 (initproc)dialect_init, /* tp_init */
495 PyType_GenericAlloc, /* tp_alloc */
496 dialect_new, /* tp_new */
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000497 0, /* tp_free */
Skip Montanarob4a04172003-03-20 23:29:12 +0000498};
499
500static void
501parse_save_field(ReaderObj *self)
502{
503 PyObject *field;
504
505 field = PyString_FromStringAndSize(self->field, self->field_len);
506 if (field != NULL) {
507 PyList_Append(self->fields, field);
508 Py_XDECREF(field);
509 }
510 self->field_len = 0;
511}
512
513static int
514parse_grow_buff(ReaderObj *self)
515{
516 if (self->field_size == 0) {
517 self->field_size = 4096;
Andrew McNamaradcfb38c2003-06-09 05:59:23 +0000518 if (self->field != NULL)
519 PyMem_Free(self->field);
Skip Montanarob4a04172003-03-20 23:29:12 +0000520 self->field = PyMem_Malloc(self->field_size);
521 }
522 else {
523 self->field_size *= 2;
524 self->field = PyMem_Realloc(self->field, self->field_size);
525 }
526 if (self->field == NULL) {
527 PyErr_NoMemory();
528 return 0;
529 }
530 return 1;
531}
532
533static void
534parse_add_char(ReaderObj *self, char c)
535{
536 if (self->field_len == self->field_size && !parse_grow_buff(self))
537 return;
538 self->field[self->field_len++] = c;
539}
540
541static void
542parse_process_char(ReaderObj *self, char c)
543{
544 DialectObj *dialect = self->dialect;
545
546 switch (self->state) {
547 case START_RECORD:
548 /* start of record */
549 if (c == '\n')
550 /* empty line - return [] */
551 break;
552 /* normal character - handle as START_FIELD */
553 self->state = START_FIELD;
554 /* fallthru */
555 case START_FIELD:
556 /* expecting field */
557 if (c == '\n') {
558 /* save empty field - return [fields] */
559 parse_save_field(self);
560 self->state = START_RECORD;
561 }
Andrew McNamara1196cf12005-01-07 04:42:45 +0000562 else if (c == dialect->quotechar &&
563 dialect->quoting != QUOTE_NONE) {
Skip Montanarob4a04172003-03-20 23:29:12 +0000564 /* start quoted field */
565 self->state = IN_QUOTED_FIELD;
566 }
567 else if (c == dialect->escapechar) {
568 /* possible escaped character */
569 self->state = ESCAPED_CHAR;
570 }
571 else if (c == ' ' && dialect->skipinitialspace)
572 /* ignore space at start of field */
573 ;
574 else if (c == dialect->delimiter) {
575 /* save empty field */
576 parse_save_field(self);
577 }
578 else {
579 /* begin new unquoted field */
580 parse_add_char(self, c);
581 self->state = IN_FIELD;
582 }
583 break;
584
585 case ESCAPED_CHAR:
586 if (c != dialect->escapechar &&
587 c != dialect->delimiter &&
588 c != dialect->quotechar)
589 parse_add_char(self, dialect->escapechar);
590 parse_add_char(self, c);
591 self->state = IN_FIELD;
592 break;
593
594 case IN_FIELD:
595 /* in unquoted field */
596 if (c == '\n') {
597 /* end of line - return [fields] */
598 parse_save_field(self);
599 self->state = START_RECORD;
600 }
601 else if (c == dialect->escapechar) {
602 /* possible escaped character */
603 self->state = ESCAPED_CHAR;
604 }
605 else if (c == dialect->delimiter) {
606 /* save field - wait for new field */
607 parse_save_field(self);
608 self->state = START_FIELD;
609 }
610 else {
611 /* normal character - save in field */
612 parse_add_char(self, c);
613 }
614 break;
615
616 case IN_QUOTED_FIELD:
617 /* in quoted field */
618 if (c == '\n') {
619 /* end of line - save '\n' in field */
620 parse_add_char(self, '\n');
621 }
622 else if (c == dialect->escapechar) {
623 /* Possible escape character */
624 self->state = ESCAPE_IN_QUOTED_FIELD;
625 }
Andrew McNamara1196cf12005-01-07 04:42:45 +0000626 else if (c == dialect->quotechar &&
627 dialect->quoting != QUOTE_NONE) {
Skip Montanarob4a04172003-03-20 23:29:12 +0000628 if (dialect->doublequote) {
629 /* doublequote; " represented by "" */
630 self->state = QUOTE_IN_QUOTED_FIELD;
631 }
632 else {
633 /* end of quote part of field */
634 self->state = IN_FIELD;
635 }
636 }
637 else {
638 /* normal character - save in field */
639 parse_add_char(self, c);
640 }
641 break;
642
643 case ESCAPE_IN_QUOTED_FIELD:
644 if (c != dialect->escapechar &&
645 c != dialect->delimiter &&
646 c != dialect->quotechar)
647 parse_add_char(self, dialect->escapechar);
648 parse_add_char(self, c);
649 self->state = IN_QUOTED_FIELD;
650 break;
651
652 case QUOTE_IN_QUOTED_FIELD:
653 /* doublequote - seen a quote in an quoted field */
654 if (dialect->quoting != QUOTE_NONE &&
655 c == dialect->quotechar) {
656 /* save "" as " */
657 parse_add_char(self, c);
658 self->state = IN_QUOTED_FIELD;
659 }
660 else if (c == dialect->delimiter) {
661 /* save field - wait for new field */
662 parse_save_field(self);
663 self->state = START_FIELD;
664 }
665 else if (c == '\n') {
666 /* end of line - return [fields] */
667 parse_save_field(self);
668 self->state = START_RECORD;
669 }
670 else if (!dialect->strict) {
671 parse_add_char(self, c);
672 self->state = IN_FIELD;
673 }
674 else {
675 /* illegal */
676 self->had_parse_error = 1;
677 PyErr_Format(error_obj, "%c expected after %c",
678 dialect->delimiter,
679 dialect->quotechar);
680 }
681 break;
682
683 }
684}
685
686/*
687 * READER
688 */
689#define R_OFF(x) offsetof(ReaderObj, x)
690
691static struct PyMemberDef Reader_memberlist[] = {
692 { "dialect", T_OBJECT, R_OFF(dialect), RO },
693 { NULL }
694};
695
696static PyObject *
Skip Montanarob4a04172003-03-20 23:29:12 +0000697Reader_iternext(ReaderObj *self)
698{
699 PyObject *lineobj;
700 PyObject *fields;
701 char *line;
702
703 do {
704 lineobj = PyIter_Next(self->input_iter);
705 if (lineobj == NULL) {
706 /* End of input OR exception */
707 if (!PyErr_Occurred() && self->field_len != 0)
708 return PyErr_Format(error_obj,
709 "newline inside string");
710 return NULL;
711 }
712
713 if (self->had_parse_error) {
714 if (self->fields) {
715 Py_XDECREF(self->fields);
716 }
717 self->fields = PyList_New(0);
718 self->field_len = 0;
719 self->state = START_RECORD;
720 self->had_parse_error = 0;
721 }
722 line = PyString_AsString(lineobj);
723
724 if (line == NULL) {
725 Py_DECREF(lineobj);
726 return NULL;
727 }
Tim Petersef4b7ed2003-03-21 01:35:28 +0000728 if (strlen(line) < (size_t)PyString_GET_SIZE(lineobj)) {
Skip Montanarob4a04172003-03-20 23:29:12 +0000729 self->had_parse_error = 1;
730 Py_DECREF(lineobj);
731 return PyErr_Format(error_obj,
732 "string with NUL bytes");
733 }
734
735 /* Process line of text - send '\n' to processing code to
736 represent end of line. End of line which is not at end of
737 string is an error. */
738 while (*line) {
739 char c;
740
741 c = *line++;
742 if (c == '\r') {
743 c = *line++;
744 if (c == '\0')
745 /* macintosh end of line */
746 break;
747 if (c == '\n') {
748 c = *line++;
749 if (c == '\0')
750 /* DOS end of line */
751 break;
752 }
753 self->had_parse_error = 1;
754 Py_DECREF(lineobj);
755 return PyErr_Format(error_obj,
756 "newline inside string");
757 }
758 if (c == '\n') {
759 c = *line++;
760 if (c == '\0')
761 /* unix end of line */
762 break;
763 self->had_parse_error = 1;
764 Py_DECREF(lineobj);
765 return PyErr_Format(error_obj,
766 "newline inside string");
767 }
768 parse_process_char(self, c);
769 if (PyErr_Occurred()) {
770 Py_DECREF(lineobj);
771 return NULL;
772 }
773 }
774 parse_process_char(self, '\n');
775 Py_DECREF(lineobj);
776 } while (self->state != START_RECORD);
777
778 fields = self->fields;
779 self->fields = PyList_New(0);
780 return fields;
781}
782
783static void
784Reader_dealloc(ReaderObj *self)
785{
786 Py_XDECREF(self->dialect);
787 Py_XDECREF(self->input_iter);
788 Py_XDECREF(self->fields);
Andrew McNamaradcfb38c2003-06-09 05:59:23 +0000789 if (self->field != NULL)
790 PyMem_Free(self->field);
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000791 PyObject_GC_Del(self);
792}
793
794static int
795Reader_traverse(ReaderObj *self, visitproc visit, void *arg)
796{
797 int err;
798#define VISIT(SLOT) \
799 if (SLOT) { \
800 err = visit((PyObject *)(SLOT), arg); \
801 if (err) \
802 return err; \
803 }
804 VISIT(self->dialect);
805 VISIT(self->input_iter);
806 VISIT(self->fields);
807 return 0;
808}
809
810static int
811Reader_clear(ReaderObj *self)
812{
813 Py_XDECREF(self->dialect);
814 Py_XDECREF(self->input_iter);
815 Py_XDECREF(self->fields);
816 self->dialect = NULL;
817 self->input_iter = NULL;
818 self->fields = NULL;
819 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000820}
821
822PyDoc_STRVAR(Reader_Type_doc,
823"CSV reader\n"
824"\n"
825"Reader objects are responsible for reading and parsing tabular data\n"
826"in CSV format.\n"
827);
828
829static struct PyMethodDef Reader_methods[] = {
830 { NULL, NULL }
831};
832
833static PyTypeObject Reader_Type = {
834 PyObject_HEAD_INIT(NULL)
835 0, /*ob_size*/
836 "_csv.reader", /*tp_name*/
837 sizeof(ReaderObj), /*tp_basicsize*/
838 0, /*tp_itemsize*/
839 /* methods */
840 (destructor)Reader_dealloc, /*tp_dealloc*/
841 (printfunc)0, /*tp_print*/
842 (getattrfunc)0, /*tp_getattr*/
843 (setattrfunc)0, /*tp_setattr*/
844 (cmpfunc)0, /*tp_compare*/
845 (reprfunc)0, /*tp_repr*/
846 0, /*tp_as_number*/
847 0, /*tp_as_sequence*/
848 0, /*tp_as_mapping*/
849 (hashfunc)0, /*tp_hash*/
850 (ternaryfunc)0, /*tp_call*/
851 (reprfunc)0, /*tp_str*/
852 0, /*tp_getattro*/
853 0, /*tp_setattro*/
854 0, /*tp_as_buffer*/
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000855 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
856 Py_TPFLAGS_HAVE_GC, /*tp_flags*/
Skip Montanarob4a04172003-03-20 23:29:12 +0000857 Reader_Type_doc, /*tp_doc*/
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000858 (traverseproc)Reader_traverse, /*tp_traverse*/
859 (inquiry)Reader_clear, /*tp_clear*/
Skip Montanarob4a04172003-03-20 23:29:12 +0000860 0, /*tp_richcompare*/
861 0, /*tp_weaklistoffset*/
Andrew McNamara575a00b2005-01-06 02:25:41 +0000862 PyObject_SelfIter, /*tp_iter*/
Skip Montanarob4a04172003-03-20 23:29:12 +0000863 (getiterfunc)Reader_iternext, /*tp_iternext*/
864 Reader_methods, /*tp_methods*/
865 Reader_memberlist, /*tp_members*/
866 0, /*tp_getset*/
867
868};
869
870static PyObject *
871csv_reader(PyObject *module, PyObject *args, PyObject *keyword_args)
872{
873 PyObject * iterator, * dialect = NULL, *ctor_args;
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000874 ReaderObj * self = PyObject_GC_New(ReaderObj, &Reader_Type);
Skip Montanarob4a04172003-03-20 23:29:12 +0000875
876 if (!self)
877 return NULL;
878
879 self->dialect = NULL;
880 self->input_iter = self->fields = NULL;
881
882 self->fields = NULL;
883 self->input_iter = NULL;
884 self->had_parse_error = 0;
885 self->field = NULL;
886 self->field_size = 0;
887 self->field_len = 0;
888 self->state = START_RECORD;
889
Raymond Hettinger1761a7c2004-06-20 04:23:19 +0000890 if (!PyArg_UnpackTuple(args, "", 1, 2, &iterator, &dialect)) {
Skip Montanarob4a04172003-03-20 23:29:12 +0000891 Py_DECREF(self);
892 return NULL;
893 }
894 self->input_iter = PyObject_GetIter(iterator);
895 if (self->input_iter == NULL) {
896 PyErr_SetString(PyExc_TypeError,
897 "argument 1 must be an iterator");
898 Py_DECREF(self);
899 return NULL;
900 }
901 ctor_args = Py_BuildValue(dialect ? "(O)" : "()", dialect);
902 if (ctor_args == NULL) {
903 Py_DECREF(self);
904 return NULL;
905 }
906 self->dialect = (DialectObj *)PyObject_Call((PyObject *)&Dialect_Type,
907 ctor_args, keyword_args);
908 Py_DECREF(ctor_args);
909 if (self->dialect == NULL) {
910 Py_DECREF(self);
911 return NULL;
912 }
913 self->fields = PyList_New(0);
914 if (self->fields == NULL) {
915 Py_DECREF(self);
916 return NULL;
917 }
918
919 return (PyObject *)self;
920}
921
922/*
923 * WRITER
924 */
925/* ---------------------------------------------------------------- */
926static void
927join_reset(WriterObj *self)
928{
929 self->rec_len = 0;
930 self->num_fields = 0;
931}
932
933#define MEM_INCR 32768
934
935/* Calculate new record length or append field to record. Return new
936 * record length.
937 */
938static int
939join_append_data(WriterObj *self, char *field, int quote_empty,
940 int *quoted, int copy_phase)
941{
942 DialectObj *dialect = self->dialect;
943 int i, rec_len;
944
945 rec_len = self->rec_len;
946
947 /* If this is not the first field we need a field separator.
948 */
949 if (self->num_fields > 0) {
950 if (copy_phase)
951 self->rec[rec_len] = dialect->delimiter;
952 rec_len++;
953 }
954 /* Handle preceding quote.
955 */
956 switch (dialect->quoting) {
957 case QUOTE_ALL:
958 *quoted = 1;
959 if (copy_phase)
960 self->rec[rec_len] = dialect->quotechar;
961 rec_len++;
962 break;
963 case QUOTE_MINIMAL:
964 case QUOTE_NONNUMERIC:
965 /* We only know about quoted in the copy phase.
966 */
967 if (copy_phase && *quoted) {
968 self->rec[rec_len] = dialect->quotechar;
969 rec_len++;
970 }
971 break;
972 case QUOTE_NONE:
973 break;
974 }
975 /* Copy/count field data.
976 */
977 for (i = 0;; i++) {
978 char c = field[i];
979
980 if (c == '\0')
981 break;
982 /* If in doublequote mode we escape quote chars with a
983 * quote.
984 */
985 if (dialect->quoting != QUOTE_NONE &&
986 c == dialect->quotechar && dialect->doublequote) {
987 if (copy_phase)
988 self->rec[rec_len] = dialect->quotechar;
989 *quoted = 1;
990 rec_len++;
991 }
992
993 /* Some special characters need to be escaped. If we have a
994 * quote character switch to quoted field instead of escaping
995 * individual characters.
996 */
997 if (!*quoted
998 && (c == dialect->delimiter ||
999 c == dialect->escapechar ||
1000 c == '\n' || c == '\r')) {
1001 if (dialect->quoting != QUOTE_NONE)
1002 *quoted = 1;
1003 else if (dialect->escapechar) {
1004 if (copy_phase)
1005 self->rec[rec_len] = dialect->escapechar;
1006 rec_len++;
1007 }
1008 else {
1009 PyErr_Format(error_obj,
1010 "delimiter must be quoted or escaped");
1011 return -1;
1012 }
1013 }
1014 /* Copy field character into record buffer.
1015 */
1016 if (copy_phase)
1017 self->rec[rec_len] = c;
1018 rec_len++;
1019 }
1020
1021 /* If field is empty check if it needs to be quoted.
1022 */
1023 if (i == 0 && quote_empty) {
1024 if (dialect->quoting == QUOTE_NONE) {
1025 PyErr_Format(error_obj,
1026 "single empty field record must be quoted");
1027 return -1;
1028 } else
1029 *quoted = 1;
1030 }
1031
1032 /* Handle final quote character on field.
1033 */
1034 if (*quoted) {
1035 if (copy_phase)
1036 self->rec[rec_len] = dialect->quotechar;
1037 else
1038 /* Didn't know about leading quote until we found it
1039 * necessary in field data - compensate for it now.
1040 */
1041 rec_len++;
1042 rec_len++;
1043 }
1044
1045 return rec_len;
1046}
1047
1048static int
1049join_check_rec_size(WriterObj *self, int rec_len)
1050{
1051 if (rec_len > self->rec_size) {
1052 if (self->rec_size == 0) {
1053 self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR;
Andrew McNamaradcfb38c2003-06-09 05:59:23 +00001054 if (self->rec != NULL)
1055 PyMem_Free(self->rec);
Skip Montanarob4a04172003-03-20 23:29:12 +00001056 self->rec = PyMem_Malloc(self->rec_size);
1057 }
1058 else {
1059 char *old_rec = self->rec;
1060
1061 self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR;
1062 self->rec = PyMem_Realloc(self->rec, self->rec_size);
1063 if (self->rec == NULL)
1064 PyMem_Free(old_rec);
1065 }
1066 if (self->rec == NULL) {
1067 PyErr_NoMemory();
1068 return 0;
1069 }
1070 }
1071 return 1;
1072}
1073
1074static int
1075join_append(WriterObj *self, char *field, int *quoted, int quote_empty)
1076{
1077 int rec_len;
1078
1079 rec_len = join_append_data(self, field, quote_empty, quoted, 0);
1080 if (rec_len < 0)
1081 return 0;
1082
1083 /* grow record buffer if necessary */
1084 if (!join_check_rec_size(self, rec_len))
1085 return 0;
1086
1087 self->rec_len = join_append_data(self, field, quote_empty, quoted, 1);
1088 self->num_fields++;
1089
1090 return 1;
1091}
1092
1093static int
1094join_append_lineterminator(WriterObj *self)
1095{
1096 int terminator_len;
1097
1098 terminator_len = PyString_Size(self->dialect->lineterminator);
1099
1100 /* grow record buffer if necessary */
1101 if (!join_check_rec_size(self, self->rec_len + terminator_len))
1102 return 0;
1103
1104 memmove(self->rec + self->rec_len,
Skip Montanaro577c7a72003-04-12 19:17:14 +00001105 /* should not be NULL */
Skip Montanarob4a04172003-03-20 23:29:12 +00001106 PyString_AsString(self->dialect->lineterminator),
1107 terminator_len);
1108 self->rec_len += terminator_len;
1109
1110 return 1;
1111}
1112
1113PyDoc_STRVAR(csv_writerow_doc,
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001114"writerow(sequence)\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001115"\n"
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001116"Construct and write a CSV record from a sequence of fields. Non-string\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001117"elements will be converted to string.");
1118
1119static PyObject *
1120csv_writerow(WriterObj *self, PyObject *seq)
1121{
1122 DialectObj *dialect = self->dialect;
1123 int len, i;
1124
1125 if (!PySequence_Check(seq))
1126 return PyErr_Format(error_obj, "sequence expected");
1127
1128 len = PySequence_Length(seq);
1129 if (len < 0)
1130 return NULL;
1131
1132 /* Join all fields in internal buffer.
1133 */
1134 join_reset(self);
1135 for (i = 0; i < len; i++) {
1136 PyObject *field;
1137 int append_ok;
1138 int quoted;
1139
1140 field = PySequence_GetItem(seq, i);
1141 if (field == NULL)
1142 return NULL;
1143
1144 quoted = 0;
1145 if (dialect->quoting == QUOTE_NONNUMERIC) {
1146 PyObject *num;
1147
1148 num = PyNumber_Float(field);
1149 if (num == NULL) {
1150 quoted = 1;
1151 PyErr_Clear();
1152 }
1153 else {
1154 Py_DECREF(num);
1155 }
1156 }
1157
1158 if (PyString_Check(field)) {
Skip Montanaro577c7a72003-04-12 19:17:14 +00001159 append_ok = join_append(self,
1160 PyString_AS_STRING(field),
Skip Montanarob4a04172003-03-20 23:29:12 +00001161 &quoted, len == 1);
1162 Py_DECREF(field);
1163 }
1164 else if (field == Py_None) {
1165 append_ok = join_append(self, "", &quoted, len == 1);
1166 Py_DECREF(field);
1167 }
1168 else {
1169 PyObject *str;
1170
1171 str = PyObject_Str(field);
1172 Py_DECREF(field);
1173 if (str == NULL)
1174 return NULL;
1175
Skip Montanaro577c7a72003-04-12 19:17:14 +00001176 append_ok = join_append(self, PyString_AS_STRING(str),
Skip Montanarob4a04172003-03-20 23:29:12 +00001177 &quoted, len == 1);
1178 Py_DECREF(str);
1179 }
1180 if (!append_ok)
1181 return NULL;
1182 }
1183
1184 /* Add line terminator.
1185 */
1186 if (!join_append_lineterminator(self))
1187 return 0;
1188
1189 return PyObject_CallFunction(self->writeline,
1190 "(s#)", self->rec, self->rec_len);
1191}
1192
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001193PyDoc_STRVAR(csv_writerows_doc,
1194"writerows(sequence of sequences)\n"
1195"\n"
1196"Construct and write a series of sequences to a csv file. Non-string\n"
1197"elements will be converted to string.");
1198
Skip Montanarob4a04172003-03-20 23:29:12 +00001199static PyObject *
1200csv_writerows(WriterObj *self, PyObject *seqseq)
1201{
1202 PyObject *row_iter, *row_obj, *result;
1203
1204 row_iter = PyObject_GetIter(seqseq);
1205 if (row_iter == NULL) {
1206 PyErr_SetString(PyExc_TypeError,
Skip Montanaro98f16e02003-04-11 23:10:13 +00001207 "writerows() argument must be iterable");
Skip Montanarob4a04172003-03-20 23:29:12 +00001208 return NULL;
1209 }
1210 while ((row_obj = PyIter_Next(row_iter))) {
1211 result = csv_writerow(self, row_obj);
1212 Py_DECREF(row_obj);
1213 if (!result) {
1214 Py_DECREF(row_iter);
1215 return NULL;
1216 }
1217 else
1218 Py_DECREF(result);
1219 }
1220 Py_DECREF(row_iter);
1221 if (PyErr_Occurred())
1222 return NULL;
1223 Py_INCREF(Py_None);
1224 return Py_None;
1225}
1226
1227static struct PyMethodDef Writer_methods[] = {
1228 { "writerow", (PyCFunction)csv_writerow, METH_O, csv_writerow_doc},
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001229 { "writerows", (PyCFunction)csv_writerows, METH_O, csv_writerows_doc},
Skip Montanarob4a04172003-03-20 23:29:12 +00001230 { NULL, NULL }
1231};
1232
1233#define W_OFF(x) offsetof(WriterObj, x)
1234
1235static struct PyMemberDef Writer_memberlist[] = {
1236 { "dialect", T_OBJECT, W_OFF(dialect), RO },
1237 { NULL }
1238};
1239
1240static void
1241Writer_dealloc(WriterObj *self)
1242{
1243 Py_XDECREF(self->dialect);
1244 Py_XDECREF(self->writeline);
Andrew McNamaradcfb38c2003-06-09 05:59:23 +00001245 if (self->rec != NULL)
1246 PyMem_Free(self->rec);
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001247 PyObject_GC_Del(self);
1248}
1249
1250static int
1251Writer_traverse(WriterObj *self, visitproc visit, void *arg)
1252{
1253 int err;
1254#define VISIT(SLOT) \
1255 if (SLOT) { \
1256 err = visit((PyObject *)(SLOT), arg); \
1257 if (err) \
1258 return err; \
1259 }
1260 VISIT(self->dialect);
1261 VISIT(self->writeline);
1262 return 0;
1263}
1264
1265static int
1266Writer_clear(WriterObj *self)
1267{
1268 Py_XDECREF(self->dialect);
1269 Py_XDECREF(self->writeline);
1270 self->dialect = NULL;
1271 self->writeline = NULL;
1272 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001273}
1274
1275PyDoc_STRVAR(Writer_Type_doc,
1276"CSV writer\n"
1277"\n"
1278"Writer objects are responsible for generating tabular data\n"
1279"in CSV format from sequence input.\n"
1280);
1281
1282static PyTypeObject Writer_Type = {
1283 PyObject_HEAD_INIT(NULL)
1284 0, /*ob_size*/
1285 "_csv.writer", /*tp_name*/
1286 sizeof(WriterObj), /*tp_basicsize*/
1287 0, /*tp_itemsize*/
1288 /* methods */
1289 (destructor)Writer_dealloc, /*tp_dealloc*/
1290 (printfunc)0, /*tp_print*/
1291 (getattrfunc)0, /*tp_getattr*/
1292 (setattrfunc)0, /*tp_setattr*/
1293 (cmpfunc)0, /*tp_compare*/
1294 (reprfunc)0, /*tp_repr*/
1295 0, /*tp_as_number*/
1296 0, /*tp_as_sequence*/
1297 0, /*tp_as_mapping*/
1298 (hashfunc)0, /*tp_hash*/
1299 (ternaryfunc)0, /*tp_call*/
1300 (reprfunc)0, /*tp_str*/
1301 0, /*tp_getattro*/
1302 0, /*tp_setattro*/
1303 0, /*tp_as_buffer*/
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001304 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
1305 Py_TPFLAGS_HAVE_GC, /*tp_flags*/
Skip Montanarob4a04172003-03-20 23:29:12 +00001306 Writer_Type_doc,
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001307 (traverseproc)Writer_traverse, /*tp_traverse*/
1308 (inquiry)Writer_clear, /*tp_clear*/
Skip Montanarob4a04172003-03-20 23:29:12 +00001309 0, /*tp_richcompare*/
1310 0, /*tp_weaklistoffset*/
1311 (getiterfunc)0, /*tp_iter*/
1312 (getiterfunc)0, /*tp_iternext*/
1313 Writer_methods, /*tp_methods*/
1314 Writer_memberlist, /*tp_members*/
1315 0, /*tp_getset*/
1316};
1317
1318static PyObject *
1319csv_writer(PyObject *module, PyObject *args, PyObject *keyword_args)
1320{
1321 PyObject * output_file, * dialect = NULL, *ctor_args;
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001322 WriterObj * self = PyObject_GC_New(WriterObj, &Writer_Type);
Skip Montanarob4a04172003-03-20 23:29:12 +00001323
1324 if (!self)
1325 return NULL;
1326
1327 self->dialect = NULL;
1328 self->writeline = NULL;
1329
1330 self->rec = NULL;
1331 self->rec_size = 0;
1332 self->rec_len = 0;
1333 self->num_fields = 0;
1334
Raymond Hettinger1761a7c2004-06-20 04:23:19 +00001335 if (!PyArg_UnpackTuple(args, "", 1, 2, &output_file, &dialect)) {
Skip Montanarob4a04172003-03-20 23:29:12 +00001336 Py_DECREF(self);
1337 return NULL;
1338 }
1339 self->writeline = PyObject_GetAttrString(output_file, "write");
1340 if (self->writeline == NULL || !PyCallable_Check(self->writeline)) {
1341 PyErr_SetString(PyExc_TypeError,
1342 "argument 1 must be an instance with a write method");
1343 Py_DECREF(self);
1344 return NULL;
1345 }
1346 ctor_args = Py_BuildValue(dialect ? "(O)" : "()", dialect);
1347 if (ctor_args == NULL) {
1348 Py_DECREF(self);
1349 return NULL;
1350 }
1351 self->dialect = (DialectObj *)PyObject_Call((PyObject *)&Dialect_Type,
1352 ctor_args, keyword_args);
1353 Py_DECREF(ctor_args);
1354 if (self->dialect == NULL) {
1355 Py_DECREF(self);
1356 return NULL;
1357 }
1358 return (PyObject *)self;
1359}
1360
1361/*
1362 * DIALECT REGISTRY
1363 */
1364static PyObject *
1365csv_list_dialects(PyObject *module, PyObject *args)
1366{
1367 return PyDict_Keys(dialects);
1368}
1369
1370static PyObject *
1371csv_register_dialect(PyObject *module, PyObject *args)
1372{
1373 PyObject *name_obj, *dialect_obj;
1374
Raymond Hettinger1761a7c2004-06-20 04:23:19 +00001375 if (!PyArg_UnpackTuple(args, "", 2, 2, &name_obj, &dialect_obj))
Skip Montanarob4a04172003-03-20 23:29:12 +00001376 return NULL;
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001377 if (!PyString_Check(name_obj)
1378#ifdef Py_USING_UNICODE
1379&& !PyUnicode_Check(name_obj)
1380#endif
1381) {
Skip Montanarob4a04172003-03-20 23:29:12 +00001382 PyErr_SetString(PyExc_TypeError,
1383 "dialect name must be a string or unicode");
1384 return NULL;
1385 }
1386 Py_INCREF(dialect_obj);
Andrew McNamara1196cf12005-01-07 04:42:45 +00001387 /* A class rather than an instance? Instantiate */
Skip Montanarob4a04172003-03-20 23:29:12 +00001388 if (PyObject_TypeCheck(dialect_obj, &PyClass_Type)) {
1389 PyObject * new_dia;
1390 new_dia = PyObject_CallFunction(dialect_obj, "");
1391 Py_DECREF(dialect_obj);
1392 if (new_dia == NULL)
1393 return NULL;
1394 dialect_obj = new_dia;
1395 }
1396 /* Make sure we finally have an instance */
1397 if (!PyInstance_Check(dialect_obj)) {
1398 PyErr_SetString(PyExc_TypeError, "dialect must be an instance");
1399 Py_DECREF(dialect_obj);
1400 return NULL;
1401 }
1402 if (PyObject_SetAttrString(dialect_obj, "_name", name_obj) < 0) {
1403 Py_DECREF(dialect_obj);
1404 return NULL;
1405 }
1406 if (PyDict_SetItem(dialects, name_obj, dialect_obj) < 0) {
1407 Py_DECREF(dialect_obj);
1408 return NULL;
1409 }
1410 Py_DECREF(dialect_obj);
1411 Py_INCREF(Py_None);
1412 return Py_None;
1413}
1414
1415static PyObject *
Skip Montanaro577c7a72003-04-12 19:17:14 +00001416csv_unregister_dialect(PyObject *module, PyObject *name_obj)
Skip Montanarob4a04172003-03-20 23:29:12 +00001417{
Skip Montanarob4a04172003-03-20 23:29:12 +00001418 if (PyDict_DelItem(dialects, name_obj) < 0)
1419 return PyErr_Format(error_obj, "unknown dialect");
1420 Py_INCREF(Py_None);
1421 return Py_None;
1422}
1423
1424static PyObject *
Skip Montanaro577c7a72003-04-12 19:17:14 +00001425csv_get_dialect(PyObject *module, PyObject *name_obj)
Skip Montanarob4a04172003-03-20 23:29:12 +00001426{
Skip Montanarob4a04172003-03-20 23:29:12 +00001427 return get_dialect_from_registry(name_obj);
1428}
1429
1430/*
1431 * MODULE
1432 */
1433
1434PyDoc_STRVAR(csv_module_doc,
1435"CSV parsing and writing.\n"
1436"\n"
1437"This module provides classes that assist in the reading and writing\n"
1438"of Comma Separated Value (CSV) files, and implements the interface\n"
1439"described by PEP 305. Although many CSV files are simple to parse,\n"
1440"the format is not formally defined by a stable specification and\n"
1441"is subtle enough that parsing lines of a CSV file with something\n"
1442"like line.split(\",\") is bound to fail. The module supports three\n"
1443"basic APIs: reading, writing, and registration of dialects.\n"
1444"\n"
1445"\n"
1446"DIALECT REGISTRATION:\n"
1447"\n"
1448"Readers and writers support a dialect argument, which is a convenient\n"
1449"handle on a group of settings. When the dialect argument is a string,\n"
1450"it identifies one of the dialects previously registered with the module.\n"
1451"If it is a class or instance, the attributes of the argument are used as\n"
1452"the settings for the reader or writer:\n"
1453"\n"
1454" class excel:\n"
1455" delimiter = ','\n"
1456" quotechar = '\"'\n"
1457" escapechar = None\n"
1458" doublequote = True\n"
1459" skipinitialspace = False\n"
Johannes Gijsbers8d3b9dd2004-08-15 12:23:10 +00001460" lineterminator = '\\r\\n'\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001461" quoting = QUOTE_MINIMAL\n"
1462"\n"
1463"SETTINGS:\n"
1464"\n"
1465" * quotechar - specifies a one-character string to use as the \n"
1466" quoting character. It defaults to '\"'.\n"
1467" * delimiter - specifies a one-character string to use as the \n"
1468" field separator. It defaults to ','.\n"
1469" * skipinitialspace - specifies how to interpret whitespace which\n"
1470" immediately follows a delimiter. It defaults to False, which\n"
1471" means that whitespace immediately following a delimiter is part\n"
1472" of the following field.\n"
1473" * lineterminator - specifies the character sequence which should \n"
1474" terminate rows.\n"
1475" * quoting - controls when quotes should be generated by the writer.\n"
1476" It can take on any of the following module constants:\n"
1477"\n"
1478" csv.QUOTE_MINIMAL means only when required, for example, when a\n"
1479" field contains either the quotechar or the delimiter\n"
1480" csv.QUOTE_ALL means that quotes are always placed around fields.\n"
1481" csv.QUOTE_NONNUMERIC means that quotes are always placed around\n"
Skip Montanaro148eb6a2003-12-02 18:57:47 +00001482" fields which do not parse as integers or floating point\n"
1483" numbers.\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001484" csv.QUOTE_NONE means that quotes are never placed around fields.\n"
1485" * escapechar - specifies a one-character string used to escape \n"
1486" the delimiter when quoting is set to QUOTE_NONE.\n"
1487" * doublequote - controls the handling of quotes inside fields. When\n"
1488" True, two consecutive quotes are interpreted as one during read,\n"
1489" and when writing, each quote character embedded in the data is\n"
1490" written as two quotes\n");
1491
1492PyDoc_STRVAR(csv_reader_doc,
1493" csv_reader = reader(iterable [, dialect='excel']\n"
1494" [optional keyword args])\n"
1495" for row in csv_reader:\n"
1496" process(row)\n"
1497"\n"
1498"The \"iterable\" argument can be any object that returns a line\n"
1499"of input for each iteration, such as a file object or a list. The\n"
1500"optional \"dialect\" parameter is discussed below. The function\n"
1501"also accepts optional keyword arguments which override settings\n"
1502"provided by the dialect.\n"
1503"\n"
1504"The returned object is an iterator. Each iteration returns a row\n"
Johannes Gijsbers8d3b9dd2004-08-15 12:23:10 +00001505"of the CSV file (which can span multiple input lines):\n");
Skip Montanarob4a04172003-03-20 23:29:12 +00001506
1507PyDoc_STRVAR(csv_writer_doc,
1508" csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1509" [optional keyword args])\n"
1510" for row in csv_writer:\n"
1511" csv_writer.writerow(row)\n"
1512"\n"
1513" [or]\n"
1514"\n"
1515" csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1516" [optional keyword args])\n"
1517" csv_writer.writerows(rows)\n"
1518"\n"
1519"The \"fileobj\" argument can be any object that supports the file API.\n");
1520
1521PyDoc_STRVAR(csv_list_dialects_doc,
1522"Return a list of all know dialect names.\n"
1523" names = csv.list_dialects()");
1524
1525PyDoc_STRVAR(csv_get_dialect_doc,
1526"Return the dialect instance associated with name.\n"
1527" dialect = csv.get_dialect(name)");
1528
1529PyDoc_STRVAR(csv_register_dialect_doc,
1530"Create a mapping from a string name to a dialect class.\n"
1531" dialect = csv.register_dialect(name, dialect)");
1532
1533PyDoc_STRVAR(csv_unregister_dialect_doc,
1534"Delete the name/dialect mapping associated with a string name.\n"
1535" csv.unregister_dialect(name)");
1536
1537static struct PyMethodDef csv_methods[] = {
1538 { "reader", (PyCFunction)csv_reader,
1539 METH_VARARGS | METH_KEYWORDS, csv_reader_doc},
1540 { "writer", (PyCFunction)csv_writer,
1541 METH_VARARGS | METH_KEYWORDS, csv_writer_doc},
1542 { "list_dialects", (PyCFunction)csv_list_dialects,
1543 METH_NOARGS, csv_list_dialects_doc},
1544 { "register_dialect", (PyCFunction)csv_register_dialect,
1545 METH_VARARGS, csv_register_dialect_doc},
1546 { "unregister_dialect", (PyCFunction)csv_unregister_dialect,
Skip Montanaro577c7a72003-04-12 19:17:14 +00001547 METH_O, csv_unregister_dialect_doc},
Skip Montanarob4a04172003-03-20 23:29:12 +00001548 { "get_dialect", (PyCFunction)csv_get_dialect,
Skip Montanaro577c7a72003-04-12 19:17:14 +00001549 METH_O, csv_get_dialect_doc},
Skip Montanarob4a04172003-03-20 23:29:12 +00001550 { NULL, NULL }
1551};
1552
1553PyMODINIT_FUNC
1554init_csv(void)
1555{
1556 PyObject *module;
Skip Montanarob4a04172003-03-20 23:29:12 +00001557 StyleDesc *style;
1558
1559 if (PyType_Ready(&Dialect_Type) < 0)
1560 return;
1561
1562 if (PyType_Ready(&Reader_Type) < 0)
1563 return;
1564
1565 if (PyType_Ready(&Writer_Type) < 0)
1566 return;
1567
1568 /* Create the module and add the functions */
1569 module = Py_InitModule3("_csv", csv_methods, csv_module_doc);
1570 if (module == NULL)
1571 return;
1572
1573 /* Add version to the module. */
Skip Montanaro7b01a832003-04-12 19:23:46 +00001574 if (PyModule_AddStringConstant(module, "__version__",
1575 MODULE_VERSION) == -1)
Skip Montanarob4a04172003-03-20 23:29:12 +00001576 return;
1577
1578 /* Add _dialects dictionary */
1579 dialects = PyDict_New();
1580 if (dialects == NULL)
1581 return;
1582 if (PyModule_AddObject(module, "_dialects", dialects))
1583 return;
1584
1585 /* Add quote styles into dictionary */
1586 for (style = quote_styles; style->name; style++) {
Skip Montanaro7b01a832003-04-12 19:23:46 +00001587 if (PyModule_AddIntConstant(module, style->name,
1588 style->style) == -1)
Skip Montanarob4a04172003-03-20 23:29:12 +00001589 return;
1590 }
1591
1592 /* Add the Dialect type */
1593 if (PyModule_AddObject(module, "Dialect", (PyObject *)&Dialect_Type))
1594 return;
1595
1596 /* Add the CSV exception object to the module. */
1597 error_obj = PyErr_NewException("_csv.Error", NULL, NULL);
1598 if (error_obj == NULL)
1599 return;
1600 PyModule_AddObject(module, "Error", error_obj);
1601}