blob: 69417145066f788590c8edba4b8226dd1282649d [file] [log] [blame]
Skip Montanaroa16b21f2003-03-23 14:32:54 +00001/* csv module */
2
3/*
4
5This module provides the low-level underpinnings of a CSV reading/writing
6module. Users should not use this module directly, but import the csv.py
7module instead.
8
9**** For people modifying this code, please note that as of this writing
Skip Montanarodfa35fa2003-04-11 21:40:01 +000010**** (2003-03-23), it is intended that this code should work with Python
Skip Montanaroa16b21f2003-03-23 14:32:54 +000011**** 2.2.
12
Skip Montanarob4a04172003-03-20 23:29:12 +000013*/
14
Skip Montanaro7b01a832003-04-12 19:23:46 +000015#define MODULE_VERSION "1.0"
16
Skip Montanarob4a04172003-03-20 23:29:12 +000017#include "Python.h"
18#include "structmember.h"
19
Skip Montanaroa16b21f2003-03-23 14:32:54 +000020
Skip Montanarob4a04172003-03-20 23:29:12 +000021/* begin 2.2 compatibility macros */
22#ifndef PyDoc_STRVAR
23/* Define macros for inline documentation. */
24#define PyDoc_VAR(name) static char name[]
25#define PyDoc_STRVAR(name,str) PyDoc_VAR(name) = PyDoc_STR(str)
26#ifdef WITH_DOC_STRINGS
27#define PyDoc_STR(str) str
28#else
29#define PyDoc_STR(str) ""
30#endif
31#endif /* ifndef PyDoc_STRVAR */
32
33#ifndef PyMODINIT_FUNC
34# if defined(__cplusplus)
35# define PyMODINIT_FUNC extern "C" void
36# else /* __cplusplus */
37# define PyMODINIT_FUNC void
38# endif /* __cplusplus */
39#endif
40/* end 2.2 compatibility macros */
41
Andrew McNamara37d2bdf2005-01-10 12:22:48 +000042#define IS_BASESTRING(o) \
43 PyObject_TypeCheck(o, &PyBaseString_Type)
44
Skip Montanarob4a04172003-03-20 23:29:12 +000045static PyObject *error_obj; /* CSV exception */
46static PyObject *dialects; /* Dialect registry */
Andrew McNamarae4d05c42005-01-11 07:32:02 +000047static long field_limit = 128 * 1024; /* max parsed field size */
Skip Montanarob4a04172003-03-20 23:29:12 +000048
49typedef enum {
50 START_RECORD, START_FIELD, ESCAPED_CHAR, IN_FIELD,
51 IN_QUOTED_FIELD, ESCAPE_IN_QUOTED_FIELD, QUOTE_IN_QUOTED_FIELD
52} ParserState;
53
54typedef enum {
55 QUOTE_MINIMAL, QUOTE_ALL, QUOTE_NONNUMERIC, QUOTE_NONE
56} QuoteStyle;
57
58typedef struct {
59 QuoteStyle style;
60 char *name;
61} StyleDesc;
62
63static StyleDesc quote_styles[] = {
64 { QUOTE_MINIMAL, "QUOTE_MINIMAL" },
65 { QUOTE_ALL, "QUOTE_ALL" },
66 { QUOTE_NONNUMERIC, "QUOTE_NONNUMERIC" },
67 { QUOTE_NONE, "QUOTE_NONE" },
68 { 0 }
69};
70
71typedef struct {
72 PyObject_HEAD
73
74 int doublequote; /* is " represented by ""? */
75 char delimiter; /* field separator */
76 char quotechar; /* quote character */
77 char escapechar; /* escape character */
78 int skipinitialspace; /* ignore spaces following delimiter? */
79 PyObject *lineterminator; /* string to write between records */
Andrew McNamara1196cf12005-01-07 04:42:45 +000080 int quoting; /* style of quoting to write */
Skip Montanarob4a04172003-03-20 23:29:12 +000081
82 int strict; /* raise exception on bad CSV */
83} DialectObj;
84
85staticforward PyTypeObject Dialect_Type;
86
87typedef struct {
88 PyObject_HEAD
89
90 PyObject *input_iter; /* iterate over this for input lines */
91
92 DialectObj *dialect; /* parsing dialect */
93
94 PyObject *fields; /* field list for current record */
95 ParserState state; /* current CSV parse state */
96 char *field; /* build current field in here */
97 int field_size; /* size of allocated buffer */
98 int field_len; /* length of current field */
99 int had_parse_error; /* did we have a parse error? */
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000100 int numeric_field; /* treat field as numeric */
Skip Montanarob4a04172003-03-20 23:29:12 +0000101} ReaderObj;
102
103staticforward PyTypeObject Reader_Type;
104
105#define ReaderObject_Check(v) ((v)->ob_type == &Reader_Type)
106
107typedef struct {
108 PyObject_HEAD
109
110 PyObject *writeline; /* write output lines to this file */
111
112 DialectObj *dialect; /* parsing dialect */
113
114 char *rec; /* buffer for parser.join */
115 int rec_size; /* size of allocated record */
116 int rec_len; /* length of record */
117 int num_fields; /* number of fields in record */
118} WriterObj;
119
120staticforward PyTypeObject Writer_Type;
121
122/*
123 * DIALECT class
124 */
125
126static PyObject *
127get_dialect_from_registry(PyObject * name_obj)
128{
129 PyObject *dialect_obj;
130
131 dialect_obj = PyDict_GetItem(dialects, name_obj);
Andrew McNamaradbce2612005-01-10 23:17:35 +0000132 if (dialect_obj == NULL) {
133 if (!PyErr_Occurred())
134 PyErr_Format(error_obj, "unknown dialect");
135 }
136 else
137 Py_INCREF(dialect_obj);
Skip Montanarob4a04172003-03-20 23:29:12 +0000138 return dialect_obj;
139}
140
Skip Montanarob4a04172003-03-20 23:29:12 +0000141static PyObject *
142get_string(PyObject *str)
143{
144 Py_XINCREF(str);
145 return str;
146}
147
Skip Montanarob4a04172003-03-20 23:29:12 +0000148static PyObject *
149get_nullchar_as_None(char c)
150{
151 if (c == '\0') {
152 Py_INCREF(Py_None);
153 return Py_None;
154 }
155 else
156 return PyString_FromStringAndSize((char*)&c, 1);
157}
158
Skip Montanarob4a04172003-03-20 23:29:12 +0000159static PyObject *
160Dialect_get_lineterminator(DialectObj *self)
161{
162 return get_string(self->lineterminator);
163}
164
Skip Montanarob4a04172003-03-20 23:29:12 +0000165static PyObject *
166Dialect_get_escapechar(DialectObj *self)
167{
168 return get_nullchar_as_None(self->escapechar);
169}
170
Andrew McNamara1196cf12005-01-07 04:42:45 +0000171static PyObject *
172Dialect_get_quotechar(DialectObj *self)
Skip Montanarob4a04172003-03-20 23:29:12 +0000173{
Andrew McNamara1196cf12005-01-07 04:42:45 +0000174 return get_nullchar_as_None(self->quotechar);
Skip Montanarob4a04172003-03-20 23:29:12 +0000175}
176
177static PyObject *
178Dialect_get_quoting(DialectObj *self)
179{
180 return PyInt_FromLong(self->quoting);
181}
182
183static int
Andrew McNamara1196cf12005-01-07 04:42:45 +0000184_set_bool(const char *name, int *target, PyObject *src, int dflt)
Skip Montanarob4a04172003-03-20 23:29:12 +0000185{
Andrew McNamara1196cf12005-01-07 04:42:45 +0000186 if (src == NULL)
187 *target = dflt;
188 else
189 *target = PyObject_IsTrue(src);
190 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000191}
192
Andrew McNamara1196cf12005-01-07 04:42:45 +0000193static int
194_set_int(const char *name, int *target, PyObject *src, int dflt)
195{
196 if (src == NULL)
197 *target = dflt;
198 else {
199 if (!PyInt_Check(src)) {
200 PyErr_Format(PyExc_TypeError,
201 "\"%s\" must be an integer", name);
202 return -1;
203 }
204 *target = PyInt_AsLong(src);
205 }
206 return 0;
207}
208
209static int
210_set_char(const char *name, char *target, PyObject *src, char dflt)
211{
212 if (src == NULL)
213 *target = dflt;
214 else {
Andrew McNamaraa8292632005-01-10 12:25:11 +0000215 if (src == Py_None || PyString_Size(src) == 0)
Andrew McNamara1196cf12005-01-07 04:42:45 +0000216 *target = '\0';
217 else if (!PyString_Check(src) || PyString_Size(src) != 1) {
218 PyErr_Format(PyExc_TypeError,
219 "\"%s\" must be an 1-character string",
220 name);
221 return -1;
222 }
223 else {
224 char *s = PyString_AsString(src);
225 if (s == NULL)
226 return -1;
227 *target = s[0];
228 }
229 }
230 return 0;
231}
232
233static int
234_set_str(const char *name, PyObject **target, PyObject *src, const char *dflt)
235{
236 if (src == NULL)
237 *target = PyString_FromString(dflt);
238 else {
239 if (src == Py_None)
240 *target = NULL;
Andrew McNamara37d2bdf2005-01-10 12:22:48 +0000241 else if (!IS_BASESTRING(src)) {
Andrew McNamara1196cf12005-01-07 04:42:45 +0000242 PyErr_Format(PyExc_TypeError,
243 "\"%s\" must be an string", name);
244 return -1;
Andrew McNamaradd3e6cb2005-01-07 06:46:50 +0000245 }
246 else {
Andrew McNamara1196cf12005-01-07 04:42:45 +0000247 Py_XDECREF(*target);
248 Py_INCREF(src);
249 *target = src;
250 }
251 }
252 return 0;
253}
254
255static int
256dialect_check_quoting(int quoting)
257{
258 StyleDesc *qs = quote_styles;
259
260 for (qs = quote_styles; qs->name; qs++) {
261 if (qs->style == quoting)
262 return 0;
263 }
264 PyErr_Format(PyExc_TypeError, "bad \"quoting\" value");
265 return -1;
266}
Skip Montanarob4a04172003-03-20 23:29:12 +0000267
268#define D_OFF(x) offsetof(DialectObj, x)
269
270static struct PyMemberDef Dialect_memberlist[] = {
Andrew McNamara1196cf12005-01-07 04:42:45 +0000271 { "delimiter", T_CHAR, D_OFF(delimiter), READONLY },
272 { "skipinitialspace", T_INT, D_OFF(skipinitialspace), READONLY },
273 { "doublequote", T_INT, D_OFF(doublequote), READONLY },
274 { "strict", T_INT, D_OFF(strict), READONLY },
Skip Montanarob4a04172003-03-20 23:29:12 +0000275 { NULL }
276};
277
278static PyGetSetDef Dialect_getsetlist[] = {
Andrew McNamara1196cf12005-01-07 04:42:45 +0000279 { "escapechar", (getter)Dialect_get_escapechar},
280 { "lineterminator", (getter)Dialect_get_lineterminator},
281 { "quotechar", (getter)Dialect_get_quotechar},
282 { "quoting", (getter)Dialect_get_quoting},
283 {NULL},
Skip Montanarob4a04172003-03-20 23:29:12 +0000284};
285
286static void
287Dialect_dealloc(DialectObj *self)
288{
289 Py_XDECREF(self->lineterminator);
Skip Montanarob4a04172003-03-20 23:29:12 +0000290 self->ob_type->tp_free((PyObject *)self);
291}
292
Andrew McNamara1196cf12005-01-07 04:42:45 +0000293static char *dialect_kws[] = {
294 "dialect",
295 "delimiter",
296 "doublequote",
297 "escapechar",
298 "lineterminator",
299 "quotechar",
300 "quoting",
301 "skipinitialspace",
302 "strict",
303 NULL
304};
305
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000306static PyObject *
307dialect_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
Skip Montanarob4a04172003-03-20 23:29:12 +0000308{
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000309 DialectObj *self;
310 PyObject *ret = NULL;
311 PyObject *dialect = NULL;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000312 PyObject *delimiter = NULL;
313 PyObject *doublequote = NULL;
314 PyObject *escapechar = NULL;
315 PyObject *lineterminator = NULL;
316 PyObject *quotechar = NULL;
317 PyObject *quoting = NULL;
318 PyObject *skipinitialspace = NULL;
319 PyObject *strict = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000320
Andrew McNamara1196cf12005-01-07 04:42:45 +0000321 if (!PyArg_ParseTupleAndKeywords(args, kwargs,
322 "|OOOOOOOOO", dialect_kws,
323 &dialect,
324 &delimiter,
325 &doublequote,
326 &escapechar,
327 &lineterminator,
328 &quotechar,
329 &quoting,
330 &skipinitialspace,
331 &strict))
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000332 return NULL;
333
334 if (dialect != NULL) {
335 if (IS_BASESTRING(dialect)) {
336 dialect = get_dialect_from_registry(dialect);
337 if (dialect == NULL)
338 return NULL;
339 }
340 else
341 Py_INCREF(dialect);
342 /* Can we reuse this instance? */
343 if (PyObject_TypeCheck(dialect, &Dialect_Type) &&
344 delimiter == 0 &&
345 doublequote == 0 &&
346 escapechar == 0 &&
347 lineterminator == 0 &&
348 quotechar == 0 &&
349 quoting == 0 &&
350 skipinitialspace == 0 &&
351 strict == 0)
352 return dialect;
353 }
354
355 self = (DialectObj *)type->tp_alloc(type, 0);
356 if (self == NULL) {
357 Py_XDECREF(dialect);
358 return NULL;
359 }
360 self->lineterminator = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000361
Andrew McNamara1196cf12005-01-07 04:42:45 +0000362 Py_XINCREF(delimiter);
363 Py_XINCREF(doublequote);
364 Py_XINCREF(escapechar);
365 Py_XINCREF(lineterminator);
366 Py_XINCREF(quotechar);
367 Py_XINCREF(quoting);
368 Py_XINCREF(skipinitialspace);
369 Py_XINCREF(strict);
370 if (dialect != NULL) {
Andrew McNamara1196cf12005-01-07 04:42:45 +0000371#define DIALECT_GETATTR(v, n) \
372 if (v == NULL) \
373 v = PyObject_GetAttrString(dialect, n)
Andrew McNamara1196cf12005-01-07 04:42:45 +0000374 DIALECT_GETATTR(delimiter, "delimiter");
375 DIALECT_GETATTR(doublequote, "doublequote");
376 DIALECT_GETATTR(escapechar, "escapechar");
377 DIALECT_GETATTR(lineterminator, "lineterminator");
378 DIALECT_GETATTR(quotechar, "quotechar");
379 DIALECT_GETATTR(quoting, "quoting");
380 DIALECT_GETATTR(skipinitialspace, "skipinitialspace");
381 DIALECT_GETATTR(strict, "strict");
382 PyErr_Clear();
Andrew McNamara1196cf12005-01-07 04:42:45 +0000383 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000384
Andrew McNamara1196cf12005-01-07 04:42:45 +0000385 /* check types and convert to C values */
386#define DIASET(meth, name, target, src, dflt) \
387 if (meth(name, target, src, dflt)) \
388 goto err
389 DIASET(_set_char, "delimiter", &self->delimiter, delimiter, ',');
390 DIASET(_set_bool, "doublequote", &self->doublequote, doublequote, 1);
391 DIASET(_set_char, "escapechar", &self->escapechar, escapechar, 0);
392 DIASET(_set_str, "lineterminator", &self->lineterminator, lineterminator, "\r\n");
393 DIASET(_set_char, "quotechar", &self->quotechar, quotechar, '"');
394 DIASET(_set_int, "quoting", &self->quoting, quoting, QUOTE_MINIMAL);
395 DIASET(_set_bool, "skipinitialspace", &self->skipinitialspace, skipinitialspace, 0);
396 DIASET(_set_bool, "strict", &self->strict, strict, 0);
Skip Montanarob4a04172003-03-20 23:29:12 +0000397
Andrew McNamara1196cf12005-01-07 04:42:45 +0000398 /* validate options */
399 if (dialect_check_quoting(self->quoting))
400 goto err;
401 if (self->delimiter == 0) {
402 PyErr_SetString(PyExc_TypeError, "delimiter must be set");
403 goto err;
404 }
Andrew McNamara5d45a8d2005-01-12 08:16:17 +0000405 if (quotechar == Py_None && quoting == NULL)
Andrew McNamara1196cf12005-01-07 04:42:45 +0000406 self->quoting = QUOTE_NONE;
407 if (self->quoting != QUOTE_NONE && self->quotechar == 0) {
408 PyErr_SetString(PyExc_TypeError,
409 "quotechar must be set if quoting enabled");
410 goto err;
411 }
412 if (self->lineterminator == 0) {
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000413 PyErr_SetString(PyExc_TypeError, "lineterminator must be set");
Andrew McNamara1196cf12005-01-07 04:42:45 +0000414 goto err;
415 }
416
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000417 ret = (PyObject *)self;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000418err:
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000419 Py_XDECREF(dialect);
Andrew McNamara1196cf12005-01-07 04:42:45 +0000420 Py_XDECREF(delimiter);
421 Py_XDECREF(doublequote);
422 Py_XDECREF(escapechar);
423 Py_XDECREF(lineterminator);
424 Py_XDECREF(quotechar);
425 Py_XDECREF(quoting);
426 Py_XDECREF(skipinitialspace);
427 Py_XDECREF(strict);
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000428 return ret;
Skip Montanarob4a04172003-03-20 23:29:12 +0000429}
430
431
432PyDoc_STRVAR(Dialect_Type_doc,
433"CSV dialect\n"
434"\n"
435"The Dialect type records CSV parsing and generation options.\n");
436
437static PyTypeObject Dialect_Type = {
438 PyObject_HEAD_INIT(NULL)
439 0, /* ob_size */
440 "_csv.Dialect", /* tp_name */
441 sizeof(DialectObj), /* tp_basicsize */
442 0, /* tp_itemsize */
443 /* methods */
444 (destructor)Dialect_dealloc, /* tp_dealloc */
445 (printfunc)0, /* tp_print */
446 (getattrfunc)0, /* tp_getattr */
447 (setattrfunc)0, /* tp_setattr */
448 (cmpfunc)0, /* tp_compare */
449 (reprfunc)0, /* tp_repr */
450 0, /* tp_as_number */
451 0, /* tp_as_sequence */
452 0, /* tp_as_mapping */
453 (hashfunc)0, /* tp_hash */
454 (ternaryfunc)0, /* tp_call */
455 (reprfunc)0, /* tp_str */
456 0, /* tp_getattro */
457 0, /* tp_setattro */
458 0, /* tp_as_buffer */
459 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
460 Dialect_Type_doc, /* tp_doc */
461 0, /* tp_traverse */
462 0, /* tp_clear */
463 0, /* tp_richcompare */
464 0, /* tp_weaklistoffset */
465 0, /* tp_iter */
466 0, /* tp_iternext */
Andrew McNamara1196cf12005-01-07 04:42:45 +0000467 0, /* tp_methods */
Skip Montanarob4a04172003-03-20 23:29:12 +0000468 Dialect_memberlist, /* tp_members */
469 Dialect_getsetlist, /* tp_getset */
470 0, /* tp_base */
471 0, /* tp_dict */
472 0, /* tp_descr_get */
473 0, /* tp_descr_set */
474 0, /* tp_dictoffset */
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000475 0, /* tp_init */
476 0, /* tp_alloc */
Skip Montanarob4a04172003-03-20 23:29:12 +0000477 dialect_new, /* tp_new */
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000478 0, /* tp_free */
Skip Montanarob4a04172003-03-20 23:29:12 +0000479};
480
Andrew McNamara91b97462005-01-11 01:07:23 +0000481/*
482 * Return an instance of the dialect type, given a Python instance or kwarg
483 * description of the dialect
484 */
485static PyObject *
486_call_dialect(PyObject *dialect_inst, PyObject *kwargs)
487{
488 PyObject *ctor_args;
489 PyObject *dialect;
490
491 ctor_args = Py_BuildValue(dialect_inst ? "(O)" : "()", dialect_inst);
492 if (ctor_args == NULL)
493 return NULL;
494 dialect = PyObject_Call((PyObject *)&Dialect_Type, ctor_args, kwargs);
495 Py_DECREF(ctor_args);
496 return dialect;
497}
498
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000499static int
Skip Montanarob4a04172003-03-20 23:29:12 +0000500parse_save_field(ReaderObj *self)
501{
502 PyObject *field;
503
504 field = PyString_FromStringAndSize(self->field, self->field_len);
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000505 if (field == NULL)
506 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000507 self->field_len = 0;
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000508 if (self->numeric_field) {
509 PyObject *tmp;
510
511 self->numeric_field = 0;
512 tmp = PyNumber_Float(field);
513 if (tmp == NULL) {
514 Py_DECREF(field);
515 return -1;
516 }
517 Py_DECREF(field);
518 field = tmp;
519 }
520 PyList_Append(self->fields, field);
521 Py_DECREF(field);
522 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000523}
524
525static int
526parse_grow_buff(ReaderObj *self)
527{
528 if (self->field_size == 0) {
529 self->field_size = 4096;
Andrew McNamaradcfb38c2003-06-09 05:59:23 +0000530 if (self->field != NULL)
531 PyMem_Free(self->field);
Skip Montanarob4a04172003-03-20 23:29:12 +0000532 self->field = PyMem_Malloc(self->field_size);
533 }
534 else {
535 self->field_size *= 2;
536 self->field = PyMem_Realloc(self->field, self->field_size);
537 }
538 if (self->field == NULL) {
539 PyErr_NoMemory();
540 return 0;
541 }
542 return 1;
543}
544
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000545static int
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000546parse_reset(ReaderObj *self)
547{
548 if (self->fields) {
549 Py_DECREF(self->fields);
550 }
551 self->fields = PyList_New(0);
552 if (self->fields == NULL)
553 return -1;
554 self->field_len = 0;
555 self->state = START_RECORD;
556 self->had_parse_error = 0;
557 self->numeric_field = 0;
558 return 0;
559}
560
561static int
Skip Montanarob4a04172003-03-20 23:29:12 +0000562parse_add_char(ReaderObj *self, char c)
563{
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000564 if (self->field_len >= field_limit) {
565 PyErr_Format(error_obj, "field larger than field limit (%ld)",
566 field_limit);
567 return -1;
568 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000569 if (self->field_len == self->field_size && !parse_grow_buff(self))
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000570 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000571 self->field[self->field_len++] = c;
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000572 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000573}
574
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000575static int
Skip Montanarob4a04172003-03-20 23:29:12 +0000576parse_process_char(ReaderObj *self, char c)
577{
578 DialectObj *dialect = self->dialect;
579
580 switch (self->state) {
581 case START_RECORD:
582 /* start of record */
583 if (c == '\n')
584 /* empty line - return [] */
585 break;
586 /* normal character - handle as START_FIELD */
587 self->state = START_FIELD;
588 /* fallthru */
589 case START_FIELD:
590 /* expecting field */
591 if (c == '\n') {
592 /* save empty field - return [fields] */
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000593 if (parse_save_field(self) < 0)
594 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000595 self->state = START_RECORD;
596 }
Andrew McNamara1196cf12005-01-07 04:42:45 +0000597 else if (c == dialect->quotechar &&
598 dialect->quoting != QUOTE_NONE) {
Skip Montanarob4a04172003-03-20 23:29:12 +0000599 /* start quoted field */
600 self->state = IN_QUOTED_FIELD;
601 }
602 else if (c == dialect->escapechar) {
603 /* possible escaped character */
604 self->state = ESCAPED_CHAR;
605 }
606 else if (c == ' ' && dialect->skipinitialspace)
607 /* ignore space at start of field */
608 ;
609 else if (c == dialect->delimiter) {
610 /* save empty field */
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000611 if (parse_save_field(self) < 0)
612 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000613 }
614 else {
615 /* begin new unquoted field */
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000616 if (dialect->quoting == QUOTE_NONNUMERIC)
617 self->numeric_field = 1;
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000618 if (parse_add_char(self, c) < 0)
619 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000620 self->state = IN_FIELD;
621 }
622 break;
623
624 case ESCAPED_CHAR:
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000625 if (parse_add_char(self, c) < 0)
626 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000627 self->state = IN_FIELD;
628 break;
629
630 case IN_FIELD:
631 /* in unquoted field */
632 if (c == '\n') {
633 /* end of line - return [fields] */
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000634 if (parse_save_field(self) < 0)
635 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000636 self->state = START_RECORD;
637 }
638 else if (c == dialect->escapechar) {
639 /* possible escaped character */
640 self->state = ESCAPED_CHAR;
641 }
642 else if (c == dialect->delimiter) {
643 /* save field - wait for new field */
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000644 if (parse_save_field(self) < 0)
645 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000646 self->state = START_FIELD;
647 }
648 else {
649 /* normal character - save in field */
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000650 if (parse_add_char(self, c) < 0)
651 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000652 }
653 break;
654
655 case IN_QUOTED_FIELD:
656 /* in quoted field */
657 if (c == '\n') {
658 /* end of line - save '\n' in field */
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000659 if (parse_add_char(self, '\n') < 0)
660 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000661 }
662 else if (c == dialect->escapechar) {
663 /* Possible escape character */
664 self->state = ESCAPE_IN_QUOTED_FIELD;
665 }
Andrew McNamara1196cf12005-01-07 04:42:45 +0000666 else if (c == dialect->quotechar &&
667 dialect->quoting != QUOTE_NONE) {
Skip Montanarob4a04172003-03-20 23:29:12 +0000668 if (dialect->doublequote) {
669 /* doublequote; " represented by "" */
670 self->state = QUOTE_IN_QUOTED_FIELD;
671 }
672 else {
673 /* end of quote part of field */
674 self->state = IN_FIELD;
675 }
676 }
677 else {
678 /* normal character - save in field */
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000679 if (parse_add_char(self, c) < 0)
680 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000681 }
682 break;
683
684 case ESCAPE_IN_QUOTED_FIELD:
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000685 if (parse_add_char(self, c) < 0)
686 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000687 self->state = IN_QUOTED_FIELD;
688 break;
689
690 case QUOTE_IN_QUOTED_FIELD:
691 /* doublequote - seen a quote in an quoted field */
692 if (dialect->quoting != QUOTE_NONE &&
693 c == dialect->quotechar) {
694 /* save "" as " */
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000695 if (parse_add_char(self, c) < 0)
696 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000697 self->state = IN_QUOTED_FIELD;
698 }
699 else if (c == dialect->delimiter) {
700 /* save field - wait for new field */
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000701 if (parse_save_field(self) < 0)
702 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000703 self->state = START_FIELD;
704 }
705 else if (c == '\n') {
706 /* end of line - return [fields] */
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000707 if (parse_save_field(self) < 0)
708 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000709 self->state = START_RECORD;
710 }
711 else if (!dialect->strict) {
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000712 if (parse_add_char(self, c) < 0)
713 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000714 self->state = IN_FIELD;
715 }
716 else {
717 /* illegal */
718 self->had_parse_error = 1;
719 PyErr_Format(error_obj, "%c expected after %c",
720 dialect->delimiter,
721 dialect->quotechar);
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000722 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000723 }
724 break;
725
726 }
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000727 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000728}
729
730/*
731 * READER
732 */
733#define R_OFF(x) offsetof(ReaderObj, x)
734
735static struct PyMemberDef Reader_memberlist[] = {
736 { "dialect", T_OBJECT, R_OFF(dialect), RO },
737 { NULL }
738};
739
740static PyObject *
Skip Montanarob4a04172003-03-20 23:29:12 +0000741Reader_iternext(ReaderObj *self)
742{
743 PyObject *lineobj;
744 PyObject *fields;
745 char *line;
746
747 do {
748 lineobj = PyIter_Next(self->input_iter);
749 if (lineobj == NULL) {
750 /* End of input OR exception */
751 if (!PyErr_Occurred() && self->field_len != 0)
752 return PyErr_Format(error_obj,
753 "newline inside string");
754 return NULL;
755 }
756
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000757 if (self->had_parse_error)
758 if (parse_reset(self) < 0) {
759 Py_DECREF(lineobj);
760 return NULL;
761 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000762 line = PyString_AsString(lineobj);
763
764 if (line == NULL) {
765 Py_DECREF(lineobj);
766 return NULL;
767 }
Tim Petersef4b7ed2003-03-21 01:35:28 +0000768 if (strlen(line) < (size_t)PyString_GET_SIZE(lineobj)) {
Skip Montanarob4a04172003-03-20 23:29:12 +0000769 self->had_parse_error = 1;
770 Py_DECREF(lineobj);
771 return PyErr_Format(error_obj,
772 "string with NUL bytes");
773 }
774
775 /* Process line of text - send '\n' to processing code to
776 represent end of line. End of line which is not at end of
777 string is an error. */
778 while (*line) {
779 char c;
780
781 c = *line++;
782 if (c == '\r') {
783 c = *line++;
784 if (c == '\0')
785 /* macintosh end of line */
786 break;
787 if (c == '\n') {
788 c = *line++;
789 if (c == '\0')
790 /* DOS end of line */
791 break;
792 }
793 self->had_parse_error = 1;
794 Py_DECREF(lineobj);
795 return PyErr_Format(error_obj,
796 "newline inside string");
797 }
798 if (c == '\n') {
799 c = *line++;
800 if (c == '\0')
801 /* unix end of line */
802 break;
803 self->had_parse_error = 1;
804 Py_DECREF(lineobj);
805 return PyErr_Format(error_obj,
806 "newline inside string");
807 }
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000808 if (parse_process_char(self, c) < 0) {
809 Py_DECREF(lineobj);
810 return NULL;
811 }
812 }
813 if (parse_process_char(self, '\n') < 0) {
814 Py_DECREF(lineobj);
815 return NULL;
816 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000817 Py_DECREF(lineobj);
818 } while (self->state != START_RECORD);
819
820 fields = self->fields;
821 self->fields = PyList_New(0);
822 return fields;
823}
824
825static void
826Reader_dealloc(ReaderObj *self)
827{
Andrew McNamara77ead872005-01-10 02:09:41 +0000828 PyObject_GC_UnTrack(self);
Skip Montanarob4a04172003-03-20 23:29:12 +0000829 Py_XDECREF(self->dialect);
830 Py_XDECREF(self->input_iter);
831 Py_XDECREF(self->fields);
Andrew McNamaradcfb38c2003-06-09 05:59:23 +0000832 if (self->field != NULL)
833 PyMem_Free(self->field);
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000834 PyObject_GC_Del(self);
835}
836
837static int
838Reader_traverse(ReaderObj *self, visitproc visit, void *arg)
839{
840 int err;
841#define VISIT(SLOT) \
842 if (SLOT) { \
843 err = visit((PyObject *)(SLOT), arg); \
844 if (err) \
845 return err; \
846 }
847 VISIT(self->dialect);
848 VISIT(self->input_iter);
849 VISIT(self->fields);
850 return 0;
851}
852
853static int
854Reader_clear(ReaderObj *self)
855{
856 Py_XDECREF(self->dialect);
857 Py_XDECREF(self->input_iter);
858 Py_XDECREF(self->fields);
859 self->dialect = NULL;
860 self->input_iter = NULL;
861 self->fields = NULL;
862 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000863}
864
865PyDoc_STRVAR(Reader_Type_doc,
866"CSV reader\n"
867"\n"
868"Reader objects are responsible for reading and parsing tabular data\n"
869"in CSV format.\n"
870);
871
872static struct PyMethodDef Reader_methods[] = {
873 { NULL, NULL }
874};
875
876static PyTypeObject Reader_Type = {
877 PyObject_HEAD_INIT(NULL)
878 0, /*ob_size*/
879 "_csv.reader", /*tp_name*/
880 sizeof(ReaderObj), /*tp_basicsize*/
881 0, /*tp_itemsize*/
882 /* methods */
883 (destructor)Reader_dealloc, /*tp_dealloc*/
884 (printfunc)0, /*tp_print*/
885 (getattrfunc)0, /*tp_getattr*/
886 (setattrfunc)0, /*tp_setattr*/
887 (cmpfunc)0, /*tp_compare*/
888 (reprfunc)0, /*tp_repr*/
889 0, /*tp_as_number*/
890 0, /*tp_as_sequence*/
891 0, /*tp_as_mapping*/
892 (hashfunc)0, /*tp_hash*/
893 (ternaryfunc)0, /*tp_call*/
894 (reprfunc)0, /*tp_str*/
895 0, /*tp_getattro*/
896 0, /*tp_setattro*/
897 0, /*tp_as_buffer*/
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000898 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
899 Py_TPFLAGS_HAVE_GC, /*tp_flags*/
Skip Montanarob4a04172003-03-20 23:29:12 +0000900 Reader_Type_doc, /*tp_doc*/
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000901 (traverseproc)Reader_traverse, /*tp_traverse*/
902 (inquiry)Reader_clear, /*tp_clear*/
Skip Montanarob4a04172003-03-20 23:29:12 +0000903 0, /*tp_richcompare*/
904 0, /*tp_weaklistoffset*/
Andrew McNamara575a00b2005-01-06 02:25:41 +0000905 PyObject_SelfIter, /*tp_iter*/
Skip Montanarob4a04172003-03-20 23:29:12 +0000906 (getiterfunc)Reader_iternext, /*tp_iternext*/
907 Reader_methods, /*tp_methods*/
908 Reader_memberlist, /*tp_members*/
909 0, /*tp_getset*/
910
911};
912
913static PyObject *
914csv_reader(PyObject *module, PyObject *args, PyObject *keyword_args)
915{
Andrew McNamara91b97462005-01-11 01:07:23 +0000916 PyObject * iterator, * dialect = NULL;
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000917 ReaderObj * self = PyObject_GC_New(ReaderObj, &Reader_Type);
Skip Montanarob4a04172003-03-20 23:29:12 +0000918
919 if (!self)
920 return NULL;
921
922 self->dialect = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000923 self->fields = NULL;
924 self->input_iter = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000925 self->field = NULL;
926 self->field_size = 0;
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000927
928 if (parse_reset(self) < 0) {
929 Py_DECREF(self);
930 return NULL;
931 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000932
Raymond Hettinger1761a7c2004-06-20 04:23:19 +0000933 if (!PyArg_UnpackTuple(args, "", 1, 2, &iterator, &dialect)) {
Skip Montanarob4a04172003-03-20 23:29:12 +0000934 Py_DECREF(self);
935 return NULL;
936 }
937 self->input_iter = PyObject_GetIter(iterator);
938 if (self->input_iter == NULL) {
939 PyErr_SetString(PyExc_TypeError,
940 "argument 1 must be an iterator");
941 Py_DECREF(self);
942 return NULL;
943 }
Andrew McNamara91b97462005-01-11 01:07:23 +0000944 self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args);
Skip Montanarob4a04172003-03-20 23:29:12 +0000945 if (self->dialect == NULL) {
946 Py_DECREF(self);
947 return NULL;
948 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000949
Andrew McNamara77ead872005-01-10 02:09:41 +0000950 PyObject_GC_Track(self);
Skip Montanarob4a04172003-03-20 23:29:12 +0000951 return (PyObject *)self;
952}
953
954/*
955 * WRITER
956 */
957/* ---------------------------------------------------------------- */
958static void
959join_reset(WriterObj *self)
960{
961 self->rec_len = 0;
962 self->num_fields = 0;
963}
964
965#define MEM_INCR 32768
966
967/* Calculate new record length or append field to record. Return new
968 * record length.
969 */
970static int
971join_append_data(WriterObj *self, char *field, int quote_empty,
972 int *quoted, int copy_phase)
973{
974 DialectObj *dialect = self->dialect;
975 int i, rec_len;
Andrew McNamarac89f2842005-01-12 07:44:42 +0000976 char *lineterm;
977
978#define ADDCH(c) \
979 do {\
980 if (copy_phase) \
981 self->rec[rec_len] = c;\
982 rec_len++;\
983 } while(0)
984
985 lineterm = PyString_AsString(dialect->lineterminator);
986 if (lineterm == NULL)
987 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000988
989 rec_len = self->rec_len;
990
Andrew McNamarac89f2842005-01-12 07:44:42 +0000991 /* If this is not the first field we need a field separator */
992 if (self->num_fields > 0)
993 ADDCH(dialect->delimiter);
994
995 /* Handle preceding quote */
996 if (copy_phase && *quoted)
997 ADDCH(dialect->quotechar);
998
999 /* Copy/count field data */
Skip Montanarob4a04172003-03-20 23:29:12 +00001000 for (i = 0;; i++) {
1001 char c = field[i];
Andrew McNamarac89f2842005-01-12 07:44:42 +00001002 int want_escape = 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001003
1004 if (c == '\0')
1005 break;
Skip Montanarob4a04172003-03-20 23:29:12 +00001006
Andrew McNamarac89f2842005-01-12 07:44:42 +00001007 if (c == dialect->delimiter ||
1008 c == dialect->escapechar ||
1009 c == dialect->quotechar ||
1010 strchr(lineterm, c)) {
1011 if (dialect->quoting == QUOTE_NONE)
1012 want_escape = 1;
Skip Montanarob4a04172003-03-20 23:29:12 +00001013 else {
Andrew McNamarac89f2842005-01-12 07:44:42 +00001014 if (c == dialect->quotechar) {
1015 if (dialect->doublequote)
1016 ADDCH(dialect->quotechar);
1017 else
1018 want_escape = 1;
1019 }
1020 if (!want_escape)
1021 *quoted = 1;
1022 }
1023 if (want_escape) {
1024 if (!dialect->escapechar) {
1025 PyErr_Format(error_obj,
1026 "need to escape, but no escapechar set");
1027 return -1;
1028 }
1029 ADDCH(dialect->escapechar);
Skip Montanarob4a04172003-03-20 23:29:12 +00001030 }
1031 }
1032 /* Copy field character into record buffer.
1033 */
Andrew McNamarac89f2842005-01-12 07:44:42 +00001034 ADDCH(c);
Skip Montanarob4a04172003-03-20 23:29:12 +00001035 }
1036
1037 /* If field is empty check if it needs to be quoted.
1038 */
1039 if (i == 0 && quote_empty) {
1040 if (dialect->quoting == QUOTE_NONE) {
1041 PyErr_Format(error_obj,
1042 "single empty field record must be quoted");
1043 return -1;
Andrew McNamaradd3e6cb2005-01-07 06:46:50 +00001044 }
1045 else
Skip Montanarob4a04172003-03-20 23:29:12 +00001046 *quoted = 1;
1047 }
1048
Skip Montanarob4a04172003-03-20 23:29:12 +00001049 if (*quoted) {
1050 if (copy_phase)
Andrew McNamarac89f2842005-01-12 07:44:42 +00001051 ADDCH(dialect->quotechar);
Skip Montanarob4a04172003-03-20 23:29:12 +00001052 else
Andrew McNamarac89f2842005-01-12 07:44:42 +00001053 rec_len += 2;
Skip Montanarob4a04172003-03-20 23:29:12 +00001054 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001055 return rec_len;
Andrew McNamarac89f2842005-01-12 07:44:42 +00001056#undef ADDCH
Skip Montanarob4a04172003-03-20 23:29:12 +00001057}
1058
1059static int
1060join_check_rec_size(WriterObj *self, int rec_len)
1061{
1062 if (rec_len > self->rec_size) {
1063 if (self->rec_size == 0) {
1064 self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR;
Andrew McNamaradcfb38c2003-06-09 05:59:23 +00001065 if (self->rec != NULL)
1066 PyMem_Free(self->rec);
Skip Montanarob4a04172003-03-20 23:29:12 +00001067 self->rec = PyMem_Malloc(self->rec_size);
1068 }
1069 else {
1070 char *old_rec = self->rec;
1071
1072 self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR;
1073 self->rec = PyMem_Realloc(self->rec, self->rec_size);
1074 if (self->rec == NULL)
1075 PyMem_Free(old_rec);
1076 }
1077 if (self->rec == NULL) {
1078 PyErr_NoMemory();
1079 return 0;
1080 }
1081 }
1082 return 1;
1083}
1084
1085static int
1086join_append(WriterObj *self, char *field, int *quoted, int quote_empty)
1087{
1088 int rec_len;
1089
1090 rec_len = join_append_data(self, field, quote_empty, quoted, 0);
1091 if (rec_len < 0)
1092 return 0;
1093
1094 /* grow record buffer if necessary */
1095 if (!join_check_rec_size(self, rec_len))
1096 return 0;
1097
1098 self->rec_len = join_append_data(self, field, quote_empty, quoted, 1);
1099 self->num_fields++;
1100
1101 return 1;
1102}
1103
1104static int
1105join_append_lineterminator(WriterObj *self)
1106{
1107 int terminator_len;
Andrew McNamaracf0fd5a2005-01-12 01:16:35 +00001108 char *terminator;
Skip Montanarob4a04172003-03-20 23:29:12 +00001109
1110 terminator_len = PyString_Size(self->dialect->lineterminator);
1111
1112 /* grow record buffer if necessary */
1113 if (!join_check_rec_size(self, self->rec_len + terminator_len))
1114 return 0;
1115
Andrew McNamaracf0fd5a2005-01-12 01:16:35 +00001116 terminator = PyString_AsString(self->dialect->lineterminator);
1117 if (terminator == NULL)
1118 return 0;
1119 memmove(self->rec + self->rec_len, terminator, terminator_len);
Skip Montanarob4a04172003-03-20 23:29:12 +00001120 self->rec_len += terminator_len;
1121
1122 return 1;
1123}
1124
1125PyDoc_STRVAR(csv_writerow_doc,
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001126"writerow(sequence)\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001127"\n"
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001128"Construct and write a CSV record from a sequence of fields. Non-string\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001129"elements will be converted to string.");
1130
1131static PyObject *
1132csv_writerow(WriterObj *self, PyObject *seq)
1133{
1134 DialectObj *dialect = self->dialect;
1135 int len, i;
1136
1137 if (!PySequence_Check(seq))
1138 return PyErr_Format(error_obj, "sequence expected");
1139
1140 len = PySequence_Length(seq);
1141 if (len < 0)
1142 return NULL;
1143
1144 /* Join all fields in internal buffer.
1145 */
1146 join_reset(self);
1147 for (i = 0; i < len; i++) {
1148 PyObject *field;
1149 int append_ok;
1150 int quoted;
1151
1152 field = PySequence_GetItem(seq, i);
1153 if (field == NULL)
1154 return NULL;
1155
Andrew McNamarac89f2842005-01-12 07:44:42 +00001156 switch (dialect->quoting) {
1157 case QUOTE_NONNUMERIC:
1158 quoted = !PyNumber_Check(field);
1159 break;
1160 case QUOTE_ALL:
1161 quoted = 1;
1162 break;
1163 default:
1164 quoted = 0;
1165 break;
Skip Montanarob4a04172003-03-20 23:29:12 +00001166 }
1167
1168 if (PyString_Check(field)) {
Skip Montanaro577c7a72003-04-12 19:17:14 +00001169 append_ok = join_append(self,
1170 PyString_AS_STRING(field),
Skip Montanarob4a04172003-03-20 23:29:12 +00001171 &quoted, len == 1);
1172 Py_DECREF(field);
1173 }
1174 else if (field == Py_None) {
1175 append_ok = join_append(self, "", &quoted, len == 1);
1176 Py_DECREF(field);
1177 }
1178 else {
1179 PyObject *str;
1180
1181 str = PyObject_Str(field);
1182 Py_DECREF(field);
1183 if (str == NULL)
1184 return NULL;
1185
Skip Montanaro577c7a72003-04-12 19:17:14 +00001186 append_ok = join_append(self, PyString_AS_STRING(str),
Skip Montanarob4a04172003-03-20 23:29:12 +00001187 &quoted, len == 1);
1188 Py_DECREF(str);
1189 }
1190 if (!append_ok)
1191 return NULL;
1192 }
1193
1194 /* Add line terminator.
1195 */
1196 if (!join_append_lineterminator(self))
1197 return 0;
1198
1199 return PyObject_CallFunction(self->writeline,
1200 "(s#)", self->rec, self->rec_len);
1201}
1202
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001203PyDoc_STRVAR(csv_writerows_doc,
1204"writerows(sequence of sequences)\n"
1205"\n"
1206"Construct and write a series of sequences to a csv file. Non-string\n"
1207"elements will be converted to string.");
1208
Skip Montanarob4a04172003-03-20 23:29:12 +00001209static PyObject *
1210csv_writerows(WriterObj *self, PyObject *seqseq)
1211{
1212 PyObject *row_iter, *row_obj, *result;
1213
1214 row_iter = PyObject_GetIter(seqseq);
1215 if (row_iter == NULL) {
1216 PyErr_SetString(PyExc_TypeError,
Skip Montanaro98f16e02003-04-11 23:10:13 +00001217 "writerows() argument must be iterable");
Skip Montanarob4a04172003-03-20 23:29:12 +00001218 return NULL;
1219 }
1220 while ((row_obj = PyIter_Next(row_iter))) {
1221 result = csv_writerow(self, row_obj);
1222 Py_DECREF(row_obj);
1223 if (!result) {
1224 Py_DECREF(row_iter);
1225 return NULL;
1226 }
1227 else
1228 Py_DECREF(result);
1229 }
1230 Py_DECREF(row_iter);
1231 if (PyErr_Occurred())
1232 return NULL;
1233 Py_INCREF(Py_None);
1234 return Py_None;
1235}
1236
1237static struct PyMethodDef Writer_methods[] = {
1238 { "writerow", (PyCFunction)csv_writerow, METH_O, csv_writerow_doc},
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001239 { "writerows", (PyCFunction)csv_writerows, METH_O, csv_writerows_doc},
Skip Montanarob4a04172003-03-20 23:29:12 +00001240 { NULL, NULL }
1241};
1242
1243#define W_OFF(x) offsetof(WriterObj, x)
1244
1245static struct PyMemberDef Writer_memberlist[] = {
1246 { "dialect", T_OBJECT, W_OFF(dialect), RO },
1247 { NULL }
1248};
1249
1250static void
1251Writer_dealloc(WriterObj *self)
1252{
Andrew McNamara77ead872005-01-10 02:09:41 +00001253 PyObject_GC_UnTrack(self);
Skip Montanarob4a04172003-03-20 23:29:12 +00001254 Py_XDECREF(self->dialect);
1255 Py_XDECREF(self->writeline);
Andrew McNamaradcfb38c2003-06-09 05:59:23 +00001256 if (self->rec != NULL)
1257 PyMem_Free(self->rec);
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001258 PyObject_GC_Del(self);
1259}
1260
1261static int
1262Writer_traverse(WriterObj *self, visitproc visit, void *arg)
1263{
1264 int err;
1265#define VISIT(SLOT) \
1266 if (SLOT) { \
1267 err = visit((PyObject *)(SLOT), arg); \
1268 if (err) \
1269 return err; \
1270 }
1271 VISIT(self->dialect);
1272 VISIT(self->writeline);
1273 return 0;
1274}
1275
1276static int
1277Writer_clear(WriterObj *self)
1278{
1279 Py_XDECREF(self->dialect);
1280 Py_XDECREF(self->writeline);
1281 self->dialect = NULL;
1282 self->writeline = NULL;
1283 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001284}
1285
1286PyDoc_STRVAR(Writer_Type_doc,
1287"CSV writer\n"
1288"\n"
1289"Writer objects are responsible for generating tabular data\n"
1290"in CSV format from sequence input.\n"
1291);
1292
1293static PyTypeObject Writer_Type = {
1294 PyObject_HEAD_INIT(NULL)
1295 0, /*ob_size*/
1296 "_csv.writer", /*tp_name*/
1297 sizeof(WriterObj), /*tp_basicsize*/
1298 0, /*tp_itemsize*/
1299 /* methods */
1300 (destructor)Writer_dealloc, /*tp_dealloc*/
1301 (printfunc)0, /*tp_print*/
1302 (getattrfunc)0, /*tp_getattr*/
1303 (setattrfunc)0, /*tp_setattr*/
1304 (cmpfunc)0, /*tp_compare*/
1305 (reprfunc)0, /*tp_repr*/
1306 0, /*tp_as_number*/
1307 0, /*tp_as_sequence*/
1308 0, /*tp_as_mapping*/
1309 (hashfunc)0, /*tp_hash*/
1310 (ternaryfunc)0, /*tp_call*/
1311 (reprfunc)0, /*tp_str*/
1312 0, /*tp_getattro*/
1313 0, /*tp_setattro*/
1314 0, /*tp_as_buffer*/
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001315 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
1316 Py_TPFLAGS_HAVE_GC, /*tp_flags*/
Skip Montanarob4a04172003-03-20 23:29:12 +00001317 Writer_Type_doc,
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001318 (traverseproc)Writer_traverse, /*tp_traverse*/
1319 (inquiry)Writer_clear, /*tp_clear*/
Skip Montanarob4a04172003-03-20 23:29:12 +00001320 0, /*tp_richcompare*/
1321 0, /*tp_weaklistoffset*/
1322 (getiterfunc)0, /*tp_iter*/
1323 (getiterfunc)0, /*tp_iternext*/
1324 Writer_methods, /*tp_methods*/
1325 Writer_memberlist, /*tp_members*/
1326 0, /*tp_getset*/
1327};
1328
1329static PyObject *
1330csv_writer(PyObject *module, PyObject *args, PyObject *keyword_args)
1331{
Andrew McNamara91b97462005-01-11 01:07:23 +00001332 PyObject * output_file, * dialect = NULL;
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001333 WriterObj * self = PyObject_GC_New(WriterObj, &Writer_Type);
Skip Montanarob4a04172003-03-20 23:29:12 +00001334
1335 if (!self)
1336 return NULL;
1337
1338 self->dialect = NULL;
1339 self->writeline = NULL;
1340
1341 self->rec = NULL;
1342 self->rec_size = 0;
1343 self->rec_len = 0;
1344 self->num_fields = 0;
1345
Raymond Hettinger1761a7c2004-06-20 04:23:19 +00001346 if (!PyArg_UnpackTuple(args, "", 1, 2, &output_file, &dialect)) {
Skip Montanarob4a04172003-03-20 23:29:12 +00001347 Py_DECREF(self);
1348 return NULL;
1349 }
1350 self->writeline = PyObject_GetAttrString(output_file, "write");
1351 if (self->writeline == NULL || !PyCallable_Check(self->writeline)) {
1352 PyErr_SetString(PyExc_TypeError,
1353 "argument 1 must be an instance with a write method");
1354 Py_DECREF(self);
1355 return NULL;
1356 }
Andrew McNamara91b97462005-01-11 01:07:23 +00001357 self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args);
Skip Montanarob4a04172003-03-20 23:29:12 +00001358 if (self->dialect == NULL) {
1359 Py_DECREF(self);
1360 return NULL;
1361 }
Andrew McNamara77ead872005-01-10 02:09:41 +00001362 PyObject_GC_Track(self);
Skip Montanarob4a04172003-03-20 23:29:12 +00001363 return (PyObject *)self;
1364}
1365
1366/*
1367 * DIALECT REGISTRY
1368 */
1369static PyObject *
1370csv_list_dialects(PyObject *module, PyObject *args)
1371{
1372 return PyDict_Keys(dialects);
1373}
1374
1375static PyObject *
Andrew McNamara86625972005-01-11 01:28:33 +00001376csv_register_dialect(PyObject *module, PyObject *args, PyObject *kwargs)
Skip Montanarob4a04172003-03-20 23:29:12 +00001377{
Andrew McNamara86625972005-01-11 01:28:33 +00001378 PyObject *name_obj, *dialect_obj = NULL;
1379 PyObject *dialect;
Skip Montanarob4a04172003-03-20 23:29:12 +00001380
Andrew McNamara86625972005-01-11 01:28:33 +00001381 if (!PyArg_UnpackTuple(args, "", 1, 2, &name_obj, &dialect_obj))
Skip Montanarob4a04172003-03-20 23:29:12 +00001382 return NULL;
Andrew McNamara37d2bdf2005-01-10 12:22:48 +00001383 if (!IS_BASESTRING(name_obj)) {
Skip Montanarob4a04172003-03-20 23:29:12 +00001384 PyErr_SetString(PyExc_TypeError,
1385 "dialect name must be a string or unicode");
1386 return NULL;
1387 }
Andrew McNamara86625972005-01-11 01:28:33 +00001388 dialect = _call_dialect(dialect_obj, kwargs);
1389 if (dialect == NULL)
1390 return NULL;
1391 if (PyDict_SetItem(dialects, name_obj, dialect) < 0) {
1392 Py_DECREF(dialect);
Skip Montanarob4a04172003-03-20 23:29:12 +00001393 return NULL;
1394 }
Andrew McNamara86625972005-01-11 01:28:33 +00001395 Py_DECREF(dialect);
Skip Montanarob4a04172003-03-20 23:29:12 +00001396 Py_INCREF(Py_None);
1397 return Py_None;
1398}
1399
1400static PyObject *
Skip Montanaro577c7a72003-04-12 19:17:14 +00001401csv_unregister_dialect(PyObject *module, PyObject *name_obj)
Skip Montanarob4a04172003-03-20 23:29:12 +00001402{
Skip Montanarob4a04172003-03-20 23:29:12 +00001403 if (PyDict_DelItem(dialects, name_obj) < 0)
1404 return PyErr_Format(error_obj, "unknown dialect");
1405 Py_INCREF(Py_None);
1406 return Py_None;
1407}
1408
1409static PyObject *
Skip Montanaro577c7a72003-04-12 19:17:14 +00001410csv_get_dialect(PyObject *module, PyObject *name_obj)
Skip Montanarob4a04172003-03-20 23:29:12 +00001411{
Skip Montanarob4a04172003-03-20 23:29:12 +00001412 return get_dialect_from_registry(name_obj);
1413}
1414
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001415static PyObject *
Andrew McNamara31d88962005-01-12 03:45:10 +00001416csv_field_size_limit(PyObject *module, PyObject *args)
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001417{
1418 PyObject *new_limit = NULL;
1419 long old_limit = field_limit;
1420
Andrew McNamara31d88962005-01-12 03:45:10 +00001421 if (!PyArg_UnpackTuple(args, "field_size_limit", 0, 1, &new_limit))
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001422 return NULL;
1423 if (new_limit != NULL) {
1424 if (!PyInt_Check(new_limit)) {
1425 PyErr_Format(PyExc_TypeError,
1426 "limit must be an integer");
1427 return NULL;
1428 }
1429 field_limit = PyInt_AsLong(new_limit);
1430 }
1431 return PyInt_FromLong(old_limit);
1432}
1433
Skip Montanarob4a04172003-03-20 23:29:12 +00001434/*
1435 * MODULE
1436 */
1437
1438PyDoc_STRVAR(csv_module_doc,
1439"CSV parsing and writing.\n"
1440"\n"
1441"This module provides classes that assist in the reading and writing\n"
1442"of Comma Separated Value (CSV) files, and implements the interface\n"
1443"described by PEP 305. Although many CSV files are simple to parse,\n"
1444"the format is not formally defined by a stable specification and\n"
1445"is subtle enough that parsing lines of a CSV file with something\n"
1446"like line.split(\",\") is bound to fail. The module supports three\n"
1447"basic APIs: reading, writing, and registration of dialects.\n"
1448"\n"
1449"\n"
1450"DIALECT REGISTRATION:\n"
1451"\n"
1452"Readers and writers support a dialect argument, which is a convenient\n"
1453"handle on a group of settings. When the dialect argument is a string,\n"
1454"it identifies one of the dialects previously registered with the module.\n"
1455"If it is a class or instance, the attributes of the argument are used as\n"
1456"the settings for the reader or writer:\n"
1457"\n"
1458" class excel:\n"
1459" delimiter = ','\n"
1460" quotechar = '\"'\n"
1461" escapechar = None\n"
1462" doublequote = True\n"
1463" skipinitialspace = False\n"
Johannes Gijsbers8d3b9dd2004-08-15 12:23:10 +00001464" lineterminator = '\\r\\n'\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001465" quoting = QUOTE_MINIMAL\n"
1466"\n"
1467"SETTINGS:\n"
1468"\n"
1469" * quotechar - specifies a one-character string to use as the \n"
1470" quoting character. It defaults to '\"'.\n"
1471" * delimiter - specifies a one-character string to use as the \n"
1472" field separator. It defaults to ','.\n"
1473" * skipinitialspace - specifies how to interpret whitespace which\n"
1474" immediately follows a delimiter. It defaults to False, which\n"
1475" means that whitespace immediately following a delimiter is part\n"
1476" of the following field.\n"
1477" * lineterminator - specifies the character sequence which should \n"
1478" terminate rows.\n"
1479" * quoting - controls when quotes should be generated by the writer.\n"
1480" It can take on any of the following module constants:\n"
1481"\n"
1482" csv.QUOTE_MINIMAL means only when required, for example, when a\n"
1483" field contains either the quotechar or the delimiter\n"
1484" csv.QUOTE_ALL means that quotes are always placed around fields.\n"
1485" csv.QUOTE_NONNUMERIC means that quotes are always placed around\n"
Skip Montanaro148eb6a2003-12-02 18:57:47 +00001486" fields which do not parse as integers or floating point\n"
1487" numbers.\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001488" csv.QUOTE_NONE means that quotes are never placed around fields.\n"
1489" * escapechar - specifies a one-character string used to escape \n"
1490" the delimiter when quoting is set to QUOTE_NONE.\n"
1491" * doublequote - controls the handling of quotes inside fields. When\n"
1492" True, two consecutive quotes are interpreted as one during read,\n"
1493" and when writing, each quote character embedded in the data is\n"
1494" written as two quotes\n");
1495
1496PyDoc_STRVAR(csv_reader_doc,
1497" csv_reader = reader(iterable [, dialect='excel']\n"
1498" [optional keyword args])\n"
1499" for row in csv_reader:\n"
1500" process(row)\n"
1501"\n"
1502"The \"iterable\" argument can be any object that returns a line\n"
1503"of input for each iteration, such as a file object or a list. The\n"
1504"optional \"dialect\" parameter is discussed below. The function\n"
1505"also accepts optional keyword arguments which override settings\n"
1506"provided by the dialect.\n"
1507"\n"
1508"The returned object is an iterator. Each iteration returns a row\n"
Johannes Gijsbers8d3b9dd2004-08-15 12:23:10 +00001509"of the CSV file (which can span multiple input lines):\n");
Skip Montanarob4a04172003-03-20 23:29:12 +00001510
1511PyDoc_STRVAR(csv_writer_doc,
1512" csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1513" [optional keyword args])\n"
1514" for row in csv_writer:\n"
1515" csv_writer.writerow(row)\n"
1516"\n"
1517" [or]\n"
1518"\n"
1519" csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1520" [optional keyword args])\n"
1521" csv_writer.writerows(rows)\n"
1522"\n"
1523"The \"fileobj\" argument can be any object that supports the file API.\n");
1524
1525PyDoc_STRVAR(csv_list_dialects_doc,
1526"Return a list of all know dialect names.\n"
1527" names = csv.list_dialects()");
1528
1529PyDoc_STRVAR(csv_get_dialect_doc,
1530"Return the dialect instance associated with name.\n"
1531" dialect = csv.get_dialect(name)");
1532
1533PyDoc_STRVAR(csv_register_dialect_doc,
1534"Create a mapping from a string name to a dialect class.\n"
1535" dialect = csv.register_dialect(name, dialect)");
1536
1537PyDoc_STRVAR(csv_unregister_dialect_doc,
1538"Delete the name/dialect mapping associated with a string name.\n"
1539" csv.unregister_dialect(name)");
1540
Andrew McNamara31d88962005-01-12 03:45:10 +00001541PyDoc_STRVAR(csv_field_size_limit_doc,
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001542"Sets an upper limit on parsed fields.\n"
Andrew McNamara31d88962005-01-12 03:45:10 +00001543" csv.field_size_limit([limit])\n"
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001544"\n"
1545"Returns old limit. If limit is not given, no new limit is set and\n"
1546"the old limit is returned");
1547
Skip Montanarob4a04172003-03-20 23:29:12 +00001548static struct PyMethodDef csv_methods[] = {
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001549 { "reader", (PyCFunction)csv_reader,
1550 METH_VARARGS | METH_KEYWORDS, csv_reader_doc},
1551 { "writer", (PyCFunction)csv_writer,
1552 METH_VARARGS | METH_KEYWORDS, csv_writer_doc},
1553 { "list_dialects", (PyCFunction)csv_list_dialects,
1554 METH_NOARGS, csv_list_dialects_doc},
1555 { "register_dialect", (PyCFunction)csv_register_dialect,
Andrew McNamara86625972005-01-11 01:28:33 +00001556 METH_VARARGS | METH_KEYWORDS, csv_register_dialect_doc},
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001557 { "unregister_dialect", (PyCFunction)csv_unregister_dialect,
1558 METH_O, csv_unregister_dialect_doc},
1559 { "get_dialect", (PyCFunction)csv_get_dialect,
1560 METH_O, csv_get_dialect_doc},
Andrew McNamara31d88962005-01-12 03:45:10 +00001561 { "field_size_limit", (PyCFunction)csv_field_size_limit,
1562 METH_VARARGS, csv_field_size_limit_doc},
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001563 { NULL, NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +00001564};
1565
1566PyMODINIT_FUNC
1567init_csv(void)
1568{
1569 PyObject *module;
Skip Montanarob4a04172003-03-20 23:29:12 +00001570 StyleDesc *style;
1571
1572 if (PyType_Ready(&Dialect_Type) < 0)
1573 return;
1574
1575 if (PyType_Ready(&Reader_Type) < 0)
1576 return;
1577
1578 if (PyType_Ready(&Writer_Type) < 0)
1579 return;
1580
1581 /* Create the module and add the functions */
1582 module = Py_InitModule3("_csv", csv_methods, csv_module_doc);
1583 if (module == NULL)
1584 return;
1585
1586 /* Add version to the module. */
Skip Montanaro7b01a832003-04-12 19:23:46 +00001587 if (PyModule_AddStringConstant(module, "__version__",
1588 MODULE_VERSION) == -1)
Skip Montanarob4a04172003-03-20 23:29:12 +00001589 return;
1590
1591 /* Add _dialects dictionary */
1592 dialects = PyDict_New();
1593 if (dialects == NULL)
1594 return;
1595 if (PyModule_AddObject(module, "_dialects", dialects))
1596 return;
1597
1598 /* Add quote styles into dictionary */
1599 for (style = quote_styles; style->name; style++) {
Skip Montanaro7b01a832003-04-12 19:23:46 +00001600 if (PyModule_AddIntConstant(module, style->name,
1601 style->style) == -1)
Skip Montanarob4a04172003-03-20 23:29:12 +00001602 return;
1603 }
1604
1605 /* Add the Dialect type */
1606 if (PyModule_AddObject(module, "Dialect", (PyObject *)&Dialect_Type))
1607 return;
1608
1609 /* Add the CSV exception object to the module. */
1610 error_obj = PyErr_NewException("_csv.Error", NULL, NULL);
1611 if (error_obj == NULL)
1612 return;
1613 PyModule_AddObject(module, "Error", error_obj);
1614}