blob: 88c72481d8901af566b0ad7bf79d9cb8123a8b90 [file] [log] [blame]
Skip Montanaroa16b21f2003-03-23 14:32:54 +00001/* csv module */
2
3/*
4
5This module provides the low-level underpinnings of a CSV reading/writing
6module. Users should not use this module directly, but import the csv.py
7module instead.
8
9**** For people modifying this code, please note that as of this writing
Skip Montanarodfa35fa2003-04-11 21:40:01 +000010**** (2003-03-23), it is intended that this code should work with Python
Skip Montanaroa16b21f2003-03-23 14:32:54 +000011**** 2.2.
12
Skip Montanarob4a04172003-03-20 23:29:12 +000013*/
14
Skip Montanaro7b01a832003-04-12 19:23:46 +000015#define MODULE_VERSION "1.0"
16
Skip Montanarob4a04172003-03-20 23:29:12 +000017#include "Python.h"
18#include "structmember.h"
19
Skip Montanaroa16b21f2003-03-23 14:32:54 +000020
Skip Montanarob4a04172003-03-20 23:29:12 +000021/* begin 2.2 compatibility macros */
22#ifndef PyDoc_STRVAR
23/* Define macros for inline documentation. */
24#define PyDoc_VAR(name) static char name[]
25#define PyDoc_STRVAR(name,str) PyDoc_VAR(name) = PyDoc_STR(str)
26#ifdef WITH_DOC_STRINGS
27#define PyDoc_STR(str) str
28#else
29#define PyDoc_STR(str) ""
30#endif
31#endif /* ifndef PyDoc_STRVAR */
32
33#ifndef PyMODINIT_FUNC
34# if defined(__cplusplus)
35# define PyMODINIT_FUNC extern "C" void
36# else /* __cplusplus */
37# define PyMODINIT_FUNC void
38# endif /* __cplusplus */
39#endif
40/* end 2.2 compatibility macros */
41
Andrew McNamara37d2bdf2005-01-10 12:22:48 +000042#define IS_BASESTRING(o) \
43 PyObject_TypeCheck(o, &PyBaseString_Type)
44
Skip Montanarob4a04172003-03-20 23:29:12 +000045static PyObject *error_obj; /* CSV exception */
46static PyObject *dialects; /* Dialect registry */
Andrew McNamarae4d05c42005-01-11 07:32:02 +000047static long field_limit = 128 * 1024; /* max parsed field size */
Skip Montanarob4a04172003-03-20 23:29:12 +000048
49typedef enum {
50 START_RECORD, START_FIELD, ESCAPED_CHAR, IN_FIELD,
Andrew McNamaraf69d94f2005-01-13 11:30:54 +000051 IN_QUOTED_FIELD, ESCAPE_IN_QUOTED_FIELD, QUOTE_IN_QUOTED_FIELD,
52 EAT_CRNL
Skip Montanarob4a04172003-03-20 23:29:12 +000053} ParserState;
54
55typedef enum {
56 QUOTE_MINIMAL, QUOTE_ALL, QUOTE_NONNUMERIC, QUOTE_NONE
57} QuoteStyle;
58
59typedef struct {
60 QuoteStyle style;
61 char *name;
62} StyleDesc;
63
64static StyleDesc quote_styles[] = {
65 { QUOTE_MINIMAL, "QUOTE_MINIMAL" },
66 { QUOTE_ALL, "QUOTE_ALL" },
67 { QUOTE_NONNUMERIC, "QUOTE_NONNUMERIC" },
68 { QUOTE_NONE, "QUOTE_NONE" },
69 { 0 }
70};
71
72typedef struct {
73 PyObject_HEAD
74
75 int doublequote; /* is " represented by ""? */
76 char delimiter; /* field separator */
77 char quotechar; /* quote character */
78 char escapechar; /* escape character */
79 int skipinitialspace; /* ignore spaces following delimiter? */
80 PyObject *lineterminator; /* string to write between records */
Andrew McNamara1196cf12005-01-07 04:42:45 +000081 int quoting; /* style of quoting to write */
Skip Montanarob4a04172003-03-20 23:29:12 +000082
83 int strict; /* raise exception on bad CSV */
84} DialectObj;
85
86staticforward PyTypeObject Dialect_Type;
87
88typedef struct {
89 PyObject_HEAD
90
91 PyObject *input_iter; /* iterate over this for input lines */
92
93 DialectObj *dialect; /* parsing dialect */
94
95 PyObject *fields; /* field list for current record */
96 ParserState state; /* current CSV parse state */
97 char *field; /* build current field in here */
98 int field_size; /* size of allocated buffer */
99 int field_len; /* length of current field */
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000100 int numeric_field; /* treat field as numeric */
Andrew McNamara7f2053e2005-01-12 11:17:16 +0000101 unsigned long line_num; /* Source-file line number */
Skip Montanarob4a04172003-03-20 23:29:12 +0000102} ReaderObj;
103
104staticforward PyTypeObject Reader_Type;
105
106#define ReaderObject_Check(v) ((v)->ob_type == &Reader_Type)
107
108typedef struct {
109 PyObject_HEAD
110
111 PyObject *writeline; /* write output lines to this file */
112
113 DialectObj *dialect; /* parsing dialect */
114
115 char *rec; /* buffer for parser.join */
116 int rec_size; /* size of allocated record */
117 int rec_len; /* length of record */
118 int num_fields; /* number of fields in record */
119} WriterObj;
120
121staticforward PyTypeObject Writer_Type;
122
123/*
124 * DIALECT class
125 */
126
127static PyObject *
128get_dialect_from_registry(PyObject * name_obj)
129{
130 PyObject *dialect_obj;
131
132 dialect_obj = PyDict_GetItem(dialects, name_obj);
Andrew McNamaradbce2612005-01-10 23:17:35 +0000133 if (dialect_obj == NULL) {
134 if (!PyErr_Occurred())
135 PyErr_Format(error_obj, "unknown dialect");
136 }
137 else
138 Py_INCREF(dialect_obj);
Skip Montanarob4a04172003-03-20 23:29:12 +0000139 return dialect_obj;
140}
141
Skip Montanarob4a04172003-03-20 23:29:12 +0000142static PyObject *
143get_string(PyObject *str)
144{
145 Py_XINCREF(str);
146 return str;
147}
148
Skip Montanarob4a04172003-03-20 23:29:12 +0000149static PyObject *
150get_nullchar_as_None(char c)
151{
152 if (c == '\0') {
153 Py_INCREF(Py_None);
154 return Py_None;
155 }
156 else
157 return PyString_FromStringAndSize((char*)&c, 1);
158}
159
Skip Montanarob4a04172003-03-20 23:29:12 +0000160static PyObject *
161Dialect_get_lineterminator(DialectObj *self)
162{
163 return get_string(self->lineterminator);
164}
165
Skip Montanarob4a04172003-03-20 23:29:12 +0000166static PyObject *
167Dialect_get_escapechar(DialectObj *self)
168{
169 return get_nullchar_as_None(self->escapechar);
170}
171
Andrew McNamara1196cf12005-01-07 04:42:45 +0000172static PyObject *
173Dialect_get_quotechar(DialectObj *self)
Skip Montanarob4a04172003-03-20 23:29:12 +0000174{
Andrew McNamara1196cf12005-01-07 04:42:45 +0000175 return get_nullchar_as_None(self->quotechar);
Skip Montanarob4a04172003-03-20 23:29:12 +0000176}
177
178static PyObject *
179Dialect_get_quoting(DialectObj *self)
180{
181 return PyInt_FromLong(self->quoting);
182}
183
184static int
Andrew McNamara1196cf12005-01-07 04:42:45 +0000185_set_bool(const char *name, int *target, PyObject *src, int dflt)
Skip Montanarob4a04172003-03-20 23:29:12 +0000186{
Andrew McNamara1196cf12005-01-07 04:42:45 +0000187 if (src == NULL)
188 *target = dflt;
189 else
190 *target = PyObject_IsTrue(src);
191 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000192}
193
Andrew McNamara1196cf12005-01-07 04:42:45 +0000194static int
195_set_int(const char *name, int *target, PyObject *src, int dflt)
196{
197 if (src == NULL)
198 *target = dflt;
199 else {
200 if (!PyInt_Check(src)) {
201 PyErr_Format(PyExc_TypeError,
202 "\"%s\" must be an integer", name);
203 return -1;
204 }
205 *target = PyInt_AsLong(src);
206 }
207 return 0;
208}
209
210static int
211_set_char(const char *name, char *target, PyObject *src, char dflt)
212{
213 if (src == NULL)
214 *target = dflt;
215 else {
Andrew McNamaraa8292632005-01-10 12:25:11 +0000216 if (src == Py_None || PyString_Size(src) == 0)
Andrew McNamara1196cf12005-01-07 04:42:45 +0000217 *target = '\0';
218 else if (!PyString_Check(src) || PyString_Size(src) != 1) {
219 PyErr_Format(PyExc_TypeError,
220 "\"%s\" must be an 1-character string",
221 name);
222 return -1;
223 }
224 else {
225 char *s = PyString_AsString(src);
226 if (s == NULL)
227 return -1;
228 *target = s[0];
229 }
230 }
231 return 0;
232}
233
234static int
235_set_str(const char *name, PyObject **target, PyObject *src, const char *dflt)
236{
237 if (src == NULL)
238 *target = PyString_FromString(dflt);
239 else {
240 if (src == Py_None)
241 *target = NULL;
Andrew McNamara37d2bdf2005-01-10 12:22:48 +0000242 else if (!IS_BASESTRING(src)) {
Andrew McNamara1196cf12005-01-07 04:42:45 +0000243 PyErr_Format(PyExc_TypeError,
244 "\"%s\" must be an string", name);
245 return -1;
Andrew McNamaradd3e6cb2005-01-07 06:46:50 +0000246 }
247 else {
Andrew McNamara1196cf12005-01-07 04:42:45 +0000248 Py_XDECREF(*target);
249 Py_INCREF(src);
250 *target = src;
251 }
252 }
253 return 0;
254}
255
256static int
257dialect_check_quoting(int quoting)
258{
259 StyleDesc *qs = quote_styles;
260
261 for (qs = quote_styles; qs->name; qs++) {
262 if (qs->style == quoting)
263 return 0;
264 }
265 PyErr_Format(PyExc_TypeError, "bad \"quoting\" value");
266 return -1;
267}
Skip Montanarob4a04172003-03-20 23:29:12 +0000268
269#define D_OFF(x) offsetof(DialectObj, x)
270
271static struct PyMemberDef Dialect_memberlist[] = {
Andrew McNamara1196cf12005-01-07 04:42:45 +0000272 { "delimiter", T_CHAR, D_OFF(delimiter), READONLY },
273 { "skipinitialspace", T_INT, D_OFF(skipinitialspace), READONLY },
274 { "doublequote", T_INT, D_OFF(doublequote), READONLY },
275 { "strict", T_INT, D_OFF(strict), READONLY },
Skip Montanarob4a04172003-03-20 23:29:12 +0000276 { NULL }
277};
278
279static PyGetSetDef Dialect_getsetlist[] = {
Andrew McNamara1196cf12005-01-07 04:42:45 +0000280 { "escapechar", (getter)Dialect_get_escapechar},
281 { "lineterminator", (getter)Dialect_get_lineterminator},
282 { "quotechar", (getter)Dialect_get_quotechar},
283 { "quoting", (getter)Dialect_get_quoting},
284 {NULL},
Skip Montanarob4a04172003-03-20 23:29:12 +0000285};
286
287static void
288Dialect_dealloc(DialectObj *self)
289{
290 Py_XDECREF(self->lineterminator);
Skip Montanarob4a04172003-03-20 23:29:12 +0000291 self->ob_type->tp_free((PyObject *)self);
292}
293
Martin v. Löwis02cbf4a2006-02-27 17:20:04 +0000294static char *dialect_kws[] = {
Andrew McNamara1196cf12005-01-07 04:42:45 +0000295 "dialect",
296 "delimiter",
297 "doublequote",
298 "escapechar",
299 "lineterminator",
300 "quotechar",
301 "quoting",
302 "skipinitialspace",
303 "strict",
304 NULL
305};
306
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000307static PyObject *
308dialect_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
Skip Montanarob4a04172003-03-20 23:29:12 +0000309{
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000310 DialectObj *self;
311 PyObject *ret = NULL;
312 PyObject *dialect = NULL;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000313 PyObject *delimiter = NULL;
314 PyObject *doublequote = NULL;
315 PyObject *escapechar = NULL;
316 PyObject *lineterminator = NULL;
317 PyObject *quotechar = NULL;
318 PyObject *quoting = NULL;
319 PyObject *skipinitialspace = NULL;
320 PyObject *strict = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000321
Andrew McNamara1196cf12005-01-07 04:42:45 +0000322 if (!PyArg_ParseTupleAndKeywords(args, kwargs,
323 "|OOOOOOOOO", dialect_kws,
324 &dialect,
325 &delimiter,
326 &doublequote,
327 &escapechar,
328 &lineterminator,
329 &quotechar,
330 &quoting,
331 &skipinitialspace,
332 &strict))
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000333 return NULL;
334
335 if (dialect != NULL) {
336 if (IS_BASESTRING(dialect)) {
337 dialect = get_dialect_from_registry(dialect);
338 if (dialect == NULL)
339 return NULL;
340 }
341 else
342 Py_INCREF(dialect);
343 /* Can we reuse this instance? */
344 if (PyObject_TypeCheck(dialect, &Dialect_Type) &&
345 delimiter == 0 &&
346 doublequote == 0 &&
347 escapechar == 0 &&
348 lineterminator == 0 &&
349 quotechar == 0 &&
350 quoting == 0 &&
351 skipinitialspace == 0 &&
352 strict == 0)
353 return dialect;
354 }
355
356 self = (DialectObj *)type->tp_alloc(type, 0);
357 if (self == NULL) {
358 Py_XDECREF(dialect);
359 return NULL;
360 }
361 self->lineterminator = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000362
Andrew McNamara1196cf12005-01-07 04:42:45 +0000363 Py_XINCREF(delimiter);
364 Py_XINCREF(doublequote);
365 Py_XINCREF(escapechar);
366 Py_XINCREF(lineterminator);
367 Py_XINCREF(quotechar);
368 Py_XINCREF(quoting);
369 Py_XINCREF(skipinitialspace);
370 Py_XINCREF(strict);
371 if (dialect != NULL) {
Andrew McNamara1196cf12005-01-07 04:42:45 +0000372#define DIALECT_GETATTR(v, n) \
373 if (v == NULL) \
374 v = PyObject_GetAttrString(dialect, n)
Andrew McNamara1196cf12005-01-07 04:42:45 +0000375 DIALECT_GETATTR(delimiter, "delimiter");
376 DIALECT_GETATTR(doublequote, "doublequote");
377 DIALECT_GETATTR(escapechar, "escapechar");
378 DIALECT_GETATTR(lineterminator, "lineterminator");
379 DIALECT_GETATTR(quotechar, "quotechar");
380 DIALECT_GETATTR(quoting, "quoting");
381 DIALECT_GETATTR(skipinitialspace, "skipinitialspace");
382 DIALECT_GETATTR(strict, "strict");
383 PyErr_Clear();
Andrew McNamara1196cf12005-01-07 04:42:45 +0000384 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000385
Andrew McNamara1196cf12005-01-07 04:42:45 +0000386 /* check types and convert to C values */
387#define DIASET(meth, name, target, src, dflt) \
388 if (meth(name, target, src, dflt)) \
389 goto err
390 DIASET(_set_char, "delimiter", &self->delimiter, delimiter, ',');
391 DIASET(_set_bool, "doublequote", &self->doublequote, doublequote, 1);
392 DIASET(_set_char, "escapechar", &self->escapechar, escapechar, 0);
393 DIASET(_set_str, "lineterminator", &self->lineterminator, lineterminator, "\r\n");
394 DIASET(_set_char, "quotechar", &self->quotechar, quotechar, '"');
395 DIASET(_set_int, "quoting", &self->quoting, quoting, QUOTE_MINIMAL);
396 DIASET(_set_bool, "skipinitialspace", &self->skipinitialspace, skipinitialspace, 0);
397 DIASET(_set_bool, "strict", &self->strict, strict, 0);
Skip Montanarob4a04172003-03-20 23:29:12 +0000398
Andrew McNamara1196cf12005-01-07 04:42:45 +0000399 /* validate options */
400 if (dialect_check_quoting(self->quoting))
401 goto err;
402 if (self->delimiter == 0) {
403 PyErr_SetString(PyExc_TypeError, "delimiter must be set");
404 goto err;
405 }
Andrew McNamara5d45a8d2005-01-12 08:16:17 +0000406 if (quotechar == Py_None && quoting == NULL)
Andrew McNamara1196cf12005-01-07 04:42:45 +0000407 self->quoting = QUOTE_NONE;
408 if (self->quoting != QUOTE_NONE && self->quotechar == 0) {
409 PyErr_SetString(PyExc_TypeError,
410 "quotechar must be set if quoting enabled");
411 goto err;
412 }
413 if (self->lineterminator == 0) {
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000414 PyErr_SetString(PyExc_TypeError, "lineterminator must be set");
Andrew McNamara1196cf12005-01-07 04:42:45 +0000415 goto err;
416 }
417
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000418 ret = (PyObject *)self;
Skip Montanarod60fbd42005-06-15 01:33:30 +0000419 Py_INCREF(self);
Andrew McNamara1196cf12005-01-07 04:42:45 +0000420err:
Skip Montanarod60fbd42005-06-15 01:33:30 +0000421 Py_XDECREF(self);
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000422 Py_XDECREF(dialect);
Andrew McNamara1196cf12005-01-07 04:42:45 +0000423 Py_XDECREF(delimiter);
424 Py_XDECREF(doublequote);
425 Py_XDECREF(escapechar);
426 Py_XDECREF(lineterminator);
427 Py_XDECREF(quotechar);
428 Py_XDECREF(quoting);
429 Py_XDECREF(skipinitialspace);
430 Py_XDECREF(strict);
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000431 return ret;
Skip Montanarob4a04172003-03-20 23:29:12 +0000432}
433
434
435PyDoc_STRVAR(Dialect_Type_doc,
436"CSV dialect\n"
437"\n"
438"The Dialect type records CSV parsing and generation options.\n");
439
440static PyTypeObject Dialect_Type = {
441 PyObject_HEAD_INIT(NULL)
442 0, /* ob_size */
443 "_csv.Dialect", /* tp_name */
444 sizeof(DialectObj), /* tp_basicsize */
445 0, /* tp_itemsize */
446 /* methods */
447 (destructor)Dialect_dealloc, /* tp_dealloc */
448 (printfunc)0, /* tp_print */
449 (getattrfunc)0, /* tp_getattr */
450 (setattrfunc)0, /* tp_setattr */
451 (cmpfunc)0, /* tp_compare */
452 (reprfunc)0, /* tp_repr */
453 0, /* tp_as_number */
454 0, /* tp_as_sequence */
455 0, /* tp_as_mapping */
456 (hashfunc)0, /* tp_hash */
457 (ternaryfunc)0, /* tp_call */
458 (reprfunc)0, /* tp_str */
459 0, /* tp_getattro */
460 0, /* tp_setattro */
461 0, /* tp_as_buffer */
462 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
463 Dialect_Type_doc, /* tp_doc */
464 0, /* tp_traverse */
465 0, /* tp_clear */
466 0, /* tp_richcompare */
467 0, /* tp_weaklistoffset */
468 0, /* tp_iter */
469 0, /* tp_iternext */
Andrew McNamara1196cf12005-01-07 04:42:45 +0000470 0, /* tp_methods */
Skip Montanarob4a04172003-03-20 23:29:12 +0000471 Dialect_memberlist, /* tp_members */
472 Dialect_getsetlist, /* tp_getset */
473 0, /* tp_base */
474 0, /* tp_dict */
475 0, /* tp_descr_get */
476 0, /* tp_descr_set */
477 0, /* tp_dictoffset */
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000478 0, /* tp_init */
479 0, /* tp_alloc */
Skip Montanarob4a04172003-03-20 23:29:12 +0000480 dialect_new, /* tp_new */
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000481 0, /* tp_free */
Skip Montanarob4a04172003-03-20 23:29:12 +0000482};
483
Andrew McNamara91b97462005-01-11 01:07:23 +0000484/*
485 * Return an instance of the dialect type, given a Python instance or kwarg
486 * description of the dialect
487 */
488static PyObject *
489_call_dialect(PyObject *dialect_inst, PyObject *kwargs)
490{
491 PyObject *ctor_args;
492 PyObject *dialect;
493
494 ctor_args = Py_BuildValue(dialect_inst ? "(O)" : "()", dialect_inst);
495 if (ctor_args == NULL)
496 return NULL;
497 dialect = PyObject_Call((PyObject *)&Dialect_Type, ctor_args, kwargs);
498 Py_DECREF(ctor_args);
499 return dialect;
500}
501
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000502/*
503 * READER
504 */
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000505static int
Skip Montanarob4a04172003-03-20 23:29:12 +0000506parse_save_field(ReaderObj *self)
507{
508 PyObject *field;
509
510 field = PyString_FromStringAndSize(self->field, self->field_len);
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000511 if (field == NULL)
512 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000513 self->field_len = 0;
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000514 if (self->numeric_field) {
515 PyObject *tmp;
516
517 self->numeric_field = 0;
518 tmp = PyNumber_Float(field);
519 if (tmp == NULL) {
520 Py_DECREF(field);
521 return -1;
522 }
523 Py_DECREF(field);
524 field = tmp;
525 }
526 PyList_Append(self->fields, field);
527 Py_DECREF(field);
528 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000529}
530
531static int
532parse_grow_buff(ReaderObj *self)
533{
534 if (self->field_size == 0) {
535 self->field_size = 4096;
Andrew McNamaradcfb38c2003-06-09 05:59:23 +0000536 if (self->field != NULL)
537 PyMem_Free(self->field);
Skip Montanarob4a04172003-03-20 23:29:12 +0000538 self->field = PyMem_Malloc(self->field_size);
539 }
540 else {
541 self->field_size *= 2;
542 self->field = PyMem_Realloc(self->field, self->field_size);
543 }
544 if (self->field == NULL) {
545 PyErr_NoMemory();
546 return 0;
547 }
548 return 1;
549}
550
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000551static int
Skip Montanarob4a04172003-03-20 23:29:12 +0000552parse_add_char(ReaderObj *self, char c)
553{
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000554 if (self->field_len >= field_limit) {
555 PyErr_Format(error_obj, "field larger than field limit (%ld)",
556 field_limit);
557 return -1;
558 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000559 if (self->field_len == self->field_size && !parse_grow_buff(self))
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000560 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000561 self->field[self->field_len++] = c;
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000562 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000563}
564
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000565static int
Skip Montanarob4a04172003-03-20 23:29:12 +0000566parse_process_char(ReaderObj *self, char c)
567{
568 DialectObj *dialect = self->dialect;
569
570 switch (self->state) {
571 case START_RECORD:
572 /* start of record */
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000573 if (c == '\0')
Skip Montanarob4a04172003-03-20 23:29:12 +0000574 /* empty line - return [] */
575 break;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000576 else if (c == '\n' || c == '\r') {
577 self->state = EAT_CRNL;
578 break;
579 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000580 /* normal character - handle as START_FIELD */
581 self->state = START_FIELD;
582 /* fallthru */
583 case START_FIELD:
584 /* expecting field */
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000585 if (c == '\n' || c == '\r' || c == '\0') {
Skip Montanarob4a04172003-03-20 23:29:12 +0000586 /* save empty field - return [fields] */
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000587 if (parse_save_field(self) < 0)
588 return -1;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000589 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
Skip Montanarob4a04172003-03-20 23:29:12 +0000590 }
Andrew McNamara1196cf12005-01-07 04:42:45 +0000591 else if (c == dialect->quotechar &&
592 dialect->quoting != QUOTE_NONE) {
Skip Montanarob4a04172003-03-20 23:29:12 +0000593 /* start quoted field */
594 self->state = IN_QUOTED_FIELD;
595 }
596 else if (c == dialect->escapechar) {
597 /* possible escaped character */
598 self->state = ESCAPED_CHAR;
599 }
600 else if (c == ' ' && dialect->skipinitialspace)
601 /* ignore space at start of field */
602 ;
603 else if (c == dialect->delimiter) {
604 /* save empty field */
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000605 if (parse_save_field(self) < 0)
606 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000607 }
608 else {
609 /* begin new unquoted field */
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000610 if (dialect->quoting == QUOTE_NONNUMERIC)
611 self->numeric_field = 1;
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000612 if (parse_add_char(self, c) < 0)
613 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000614 self->state = IN_FIELD;
615 }
616 break;
617
618 case ESCAPED_CHAR:
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000619 if (c == '\0')
620 c = '\n';
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000621 if (parse_add_char(self, c) < 0)
622 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000623 self->state = IN_FIELD;
624 break;
625
626 case IN_FIELD:
627 /* in unquoted field */
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000628 if (c == '\n' || c == '\r' || c == '\0') {
Skip Montanarob4a04172003-03-20 23:29:12 +0000629 /* end of line - return [fields] */
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000630 if (parse_save_field(self) < 0)
631 return -1;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000632 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
Skip Montanarob4a04172003-03-20 23:29:12 +0000633 }
634 else if (c == dialect->escapechar) {
635 /* possible escaped character */
636 self->state = ESCAPED_CHAR;
637 }
638 else if (c == dialect->delimiter) {
639 /* save field - wait for new field */
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000640 if (parse_save_field(self) < 0)
641 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000642 self->state = START_FIELD;
643 }
644 else {
645 /* normal character - save in field */
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000646 if (parse_add_char(self, c) < 0)
647 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000648 }
649 break;
650
651 case IN_QUOTED_FIELD:
652 /* in quoted field */
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000653 if (c == '\0')
654 ;
Skip Montanarob4a04172003-03-20 23:29:12 +0000655 else if (c == dialect->escapechar) {
656 /* Possible escape character */
657 self->state = ESCAPE_IN_QUOTED_FIELD;
658 }
Andrew McNamara1196cf12005-01-07 04:42:45 +0000659 else if (c == dialect->quotechar &&
660 dialect->quoting != QUOTE_NONE) {
Skip Montanarob4a04172003-03-20 23:29:12 +0000661 if (dialect->doublequote) {
662 /* doublequote; " represented by "" */
663 self->state = QUOTE_IN_QUOTED_FIELD;
664 }
665 else {
666 /* end of quote part of field */
667 self->state = IN_FIELD;
668 }
669 }
670 else {
671 /* normal character - save in field */
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000672 if (parse_add_char(self, c) < 0)
673 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000674 }
675 break;
676
677 case ESCAPE_IN_QUOTED_FIELD:
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000678 if (c == '\0')
679 c = '\n';
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000680 if (parse_add_char(self, c) < 0)
681 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000682 self->state = IN_QUOTED_FIELD;
683 break;
684
685 case QUOTE_IN_QUOTED_FIELD:
686 /* doublequote - seen a quote in an quoted field */
687 if (dialect->quoting != QUOTE_NONE &&
688 c == dialect->quotechar) {
689 /* save "" as " */
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000690 if (parse_add_char(self, c) < 0)
691 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000692 self->state = IN_QUOTED_FIELD;
693 }
694 else if (c == dialect->delimiter) {
695 /* save field - wait for new field */
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000696 if (parse_save_field(self) < 0)
697 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000698 self->state = START_FIELD;
699 }
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000700 else if (c == '\n' || c == '\r' || c == '\0') {
Skip Montanarob4a04172003-03-20 23:29:12 +0000701 /* end of line - return [fields] */
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000702 if (parse_save_field(self) < 0)
703 return -1;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000704 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
Skip Montanarob4a04172003-03-20 23:29:12 +0000705 }
706 else if (!dialect->strict) {
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000707 if (parse_add_char(self, c) < 0)
708 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000709 self->state = IN_FIELD;
710 }
711 else {
712 /* illegal */
Andrew McNamara5cfd8372005-01-12 11:39:50 +0000713 PyErr_Format(error_obj, "'%c' expected after '%c'",
Skip Montanarob4a04172003-03-20 23:29:12 +0000714 dialect->delimiter,
715 dialect->quotechar);
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000716 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000717 }
718 break;
719
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000720 case EAT_CRNL:
721 if (c == '\n' || c == '\r')
722 ;
723 else if (c == '\0')
724 self->state = START_RECORD;
725 else {
726 PyErr_Format(error_obj, "new-line character seen in unquoted field - do you need to open the file in universal-newline mode?");
727 return -1;
728 }
729 break;
730
Skip Montanarob4a04172003-03-20 23:29:12 +0000731 }
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000732 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000733}
734
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000735static int
736parse_reset(ReaderObj *self)
737{
738 Py_XDECREF(self->fields);
739 self->fields = PyList_New(0);
740 if (self->fields == NULL)
741 return -1;
742 self->field_len = 0;
743 self->state = START_RECORD;
744 self->numeric_field = 0;
745 return 0;
746}
Skip Montanarob4a04172003-03-20 23:29:12 +0000747
748static PyObject *
Skip Montanarob4a04172003-03-20 23:29:12 +0000749Reader_iternext(ReaderObj *self)
750{
751 PyObject *lineobj;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000752 PyObject *fields = NULL;
753 char *line, c;
754 int linelen;
Skip Montanarob4a04172003-03-20 23:29:12 +0000755
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000756 if (parse_reset(self) < 0)
757 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000758 do {
759 lineobj = PyIter_Next(self->input_iter);
760 if (lineobj == NULL) {
761 /* End of input OR exception */
762 if (!PyErr_Occurred() && self->field_len != 0)
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000763 PyErr_Format(error_obj,
764 "newline inside string");
Skip Montanarob4a04172003-03-20 23:29:12 +0000765 return NULL;
766 }
Andrew McNamara7f2053e2005-01-12 11:17:16 +0000767 ++self->line_num;
Skip Montanarob4a04172003-03-20 23:29:12 +0000768
Skip Montanarob4a04172003-03-20 23:29:12 +0000769 line = PyString_AsString(lineobj);
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000770 linelen = PyString_Size(lineobj);
Skip Montanarob4a04172003-03-20 23:29:12 +0000771
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000772 if (line == NULL || linelen < 0) {
Skip Montanarob4a04172003-03-20 23:29:12 +0000773 Py_DECREF(lineobj);
774 return NULL;
775 }
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000776 while (linelen--) {
777 c = *line++;
778 if (c == '\0') {
779 Py_DECREF(lineobj);
780 PyErr_Format(error_obj,
781 "line contains NULL byte");
782 goto err;
783 }
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000784 if (parse_process_char(self, c) < 0) {
785 Py_DECREF(lineobj);
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000786 goto err;
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000787 }
788 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000789 Py_DECREF(lineobj);
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000790 if (parse_process_char(self, 0) < 0)
791 goto err;
Skip Montanarob4a04172003-03-20 23:29:12 +0000792 } while (self->state != START_RECORD);
793
794 fields = self->fields;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000795 self->fields = NULL;
796err:
Skip Montanarob4a04172003-03-20 23:29:12 +0000797 return fields;
798}
799
800static void
801Reader_dealloc(ReaderObj *self)
802{
Andrew McNamara77ead872005-01-10 02:09:41 +0000803 PyObject_GC_UnTrack(self);
Skip Montanarob4a04172003-03-20 23:29:12 +0000804 Py_XDECREF(self->dialect);
805 Py_XDECREF(self->input_iter);
806 Py_XDECREF(self->fields);
Andrew McNamaradcfb38c2003-06-09 05:59:23 +0000807 if (self->field != NULL)
808 PyMem_Free(self->field);
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000809 PyObject_GC_Del(self);
810}
811
812static int
813Reader_traverse(ReaderObj *self, visitproc visit, void *arg)
814{
815 int err;
816#define VISIT(SLOT) \
817 if (SLOT) { \
818 err = visit((PyObject *)(SLOT), arg); \
819 if (err) \
820 return err; \
821 }
822 VISIT(self->dialect);
823 VISIT(self->input_iter);
824 VISIT(self->fields);
825 return 0;
826}
827
828static int
829Reader_clear(ReaderObj *self)
830{
Thomas Woutersedf17d82006-04-15 17:28:34 +0000831 Py_CLEAR(self->dialect);
832 Py_CLEAR(self->input_iter);
833 Py_CLEAR(self->fields);
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000834 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000835}
836
837PyDoc_STRVAR(Reader_Type_doc,
838"CSV reader\n"
839"\n"
840"Reader objects are responsible for reading and parsing tabular data\n"
841"in CSV format.\n"
842);
843
844static struct PyMethodDef Reader_methods[] = {
845 { NULL, NULL }
846};
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000847#define R_OFF(x) offsetof(ReaderObj, x)
848
849static struct PyMemberDef Reader_memberlist[] = {
850 { "dialect", T_OBJECT, R_OFF(dialect), RO },
851 { "line_num", T_ULONG, R_OFF(line_num), RO },
852 { NULL }
853};
854
Skip Montanarob4a04172003-03-20 23:29:12 +0000855
856static PyTypeObject Reader_Type = {
857 PyObject_HEAD_INIT(NULL)
858 0, /*ob_size*/
859 "_csv.reader", /*tp_name*/
860 sizeof(ReaderObj), /*tp_basicsize*/
861 0, /*tp_itemsize*/
862 /* methods */
863 (destructor)Reader_dealloc, /*tp_dealloc*/
864 (printfunc)0, /*tp_print*/
865 (getattrfunc)0, /*tp_getattr*/
866 (setattrfunc)0, /*tp_setattr*/
867 (cmpfunc)0, /*tp_compare*/
868 (reprfunc)0, /*tp_repr*/
869 0, /*tp_as_number*/
870 0, /*tp_as_sequence*/
871 0, /*tp_as_mapping*/
872 (hashfunc)0, /*tp_hash*/
873 (ternaryfunc)0, /*tp_call*/
874 (reprfunc)0, /*tp_str*/
875 0, /*tp_getattro*/
876 0, /*tp_setattro*/
877 0, /*tp_as_buffer*/
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000878 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
879 Py_TPFLAGS_HAVE_GC, /*tp_flags*/
Skip Montanarob4a04172003-03-20 23:29:12 +0000880 Reader_Type_doc, /*tp_doc*/
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000881 (traverseproc)Reader_traverse, /*tp_traverse*/
882 (inquiry)Reader_clear, /*tp_clear*/
Skip Montanarob4a04172003-03-20 23:29:12 +0000883 0, /*tp_richcompare*/
884 0, /*tp_weaklistoffset*/
Andrew McNamara575a00b2005-01-06 02:25:41 +0000885 PyObject_SelfIter, /*tp_iter*/
Skip Montanarob4a04172003-03-20 23:29:12 +0000886 (getiterfunc)Reader_iternext, /*tp_iternext*/
887 Reader_methods, /*tp_methods*/
888 Reader_memberlist, /*tp_members*/
889 0, /*tp_getset*/
890
891};
892
893static PyObject *
894csv_reader(PyObject *module, PyObject *args, PyObject *keyword_args)
895{
Andrew McNamara91b97462005-01-11 01:07:23 +0000896 PyObject * iterator, * dialect = NULL;
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000897 ReaderObj * self = PyObject_GC_New(ReaderObj, &Reader_Type);
Skip Montanarob4a04172003-03-20 23:29:12 +0000898
899 if (!self)
900 return NULL;
901
902 self->dialect = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000903 self->fields = NULL;
904 self->input_iter = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000905 self->field = NULL;
906 self->field_size = 0;
Andrew McNamara7f2053e2005-01-12 11:17:16 +0000907 self->line_num = 0;
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000908
909 if (parse_reset(self) < 0) {
910 Py_DECREF(self);
911 return NULL;
912 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000913
Raymond Hettinger1761a7c2004-06-20 04:23:19 +0000914 if (!PyArg_UnpackTuple(args, "", 1, 2, &iterator, &dialect)) {
Skip Montanarob4a04172003-03-20 23:29:12 +0000915 Py_DECREF(self);
916 return NULL;
917 }
918 self->input_iter = PyObject_GetIter(iterator);
919 if (self->input_iter == NULL) {
920 PyErr_SetString(PyExc_TypeError,
921 "argument 1 must be an iterator");
922 Py_DECREF(self);
923 return NULL;
924 }
Andrew McNamara91b97462005-01-11 01:07:23 +0000925 self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args);
Skip Montanarob4a04172003-03-20 23:29:12 +0000926 if (self->dialect == NULL) {
927 Py_DECREF(self);
928 return NULL;
929 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000930
Andrew McNamara77ead872005-01-10 02:09:41 +0000931 PyObject_GC_Track(self);
Skip Montanarob4a04172003-03-20 23:29:12 +0000932 return (PyObject *)self;
933}
934
935/*
936 * WRITER
937 */
938/* ---------------------------------------------------------------- */
939static void
940join_reset(WriterObj *self)
941{
942 self->rec_len = 0;
943 self->num_fields = 0;
944}
945
946#define MEM_INCR 32768
947
948/* Calculate new record length or append field to record. Return new
949 * record length.
950 */
951static int
952join_append_data(WriterObj *self, char *field, int quote_empty,
953 int *quoted, int copy_phase)
954{
955 DialectObj *dialect = self->dialect;
956 int i, rec_len;
Andrew McNamarac89f2842005-01-12 07:44:42 +0000957 char *lineterm;
958
959#define ADDCH(c) \
960 do {\
961 if (copy_phase) \
962 self->rec[rec_len] = c;\
963 rec_len++;\
964 } while(0)
965
966 lineterm = PyString_AsString(dialect->lineterminator);
967 if (lineterm == NULL)
968 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000969
970 rec_len = self->rec_len;
971
Andrew McNamarac89f2842005-01-12 07:44:42 +0000972 /* If this is not the first field we need a field separator */
973 if (self->num_fields > 0)
974 ADDCH(dialect->delimiter);
975
976 /* Handle preceding quote */
977 if (copy_phase && *quoted)
978 ADDCH(dialect->quotechar);
979
980 /* Copy/count field data */
Skip Montanarob4a04172003-03-20 23:29:12 +0000981 for (i = 0;; i++) {
982 char c = field[i];
Andrew McNamarac89f2842005-01-12 07:44:42 +0000983 int want_escape = 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000984
985 if (c == '\0')
986 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000987
Andrew McNamarac89f2842005-01-12 07:44:42 +0000988 if (c == dialect->delimiter ||
989 c == dialect->escapechar ||
990 c == dialect->quotechar ||
991 strchr(lineterm, c)) {
992 if (dialect->quoting == QUOTE_NONE)
993 want_escape = 1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000994 else {
Andrew McNamarac89f2842005-01-12 07:44:42 +0000995 if (c == dialect->quotechar) {
996 if (dialect->doublequote)
997 ADDCH(dialect->quotechar);
998 else
999 want_escape = 1;
1000 }
1001 if (!want_escape)
1002 *quoted = 1;
1003 }
1004 if (want_escape) {
1005 if (!dialect->escapechar) {
1006 PyErr_Format(error_obj,
1007 "need to escape, but no escapechar set");
1008 return -1;
1009 }
1010 ADDCH(dialect->escapechar);
Skip Montanarob4a04172003-03-20 23:29:12 +00001011 }
1012 }
1013 /* Copy field character into record buffer.
1014 */
Andrew McNamarac89f2842005-01-12 07:44:42 +00001015 ADDCH(c);
Skip Montanarob4a04172003-03-20 23:29:12 +00001016 }
1017
1018 /* If field is empty check if it needs to be quoted.
1019 */
1020 if (i == 0 && quote_empty) {
1021 if (dialect->quoting == QUOTE_NONE) {
1022 PyErr_Format(error_obj,
1023 "single empty field record must be quoted");
1024 return -1;
Andrew McNamaradd3e6cb2005-01-07 06:46:50 +00001025 }
1026 else
Skip Montanarob4a04172003-03-20 23:29:12 +00001027 *quoted = 1;
1028 }
1029
Skip Montanarob4a04172003-03-20 23:29:12 +00001030 if (*quoted) {
1031 if (copy_phase)
Andrew McNamarac89f2842005-01-12 07:44:42 +00001032 ADDCH(dialect->quotechar);
Skip Montanarob4a04172003-03-20 23:29:12 +00001033 else
Andrew McNamarac89f2842005-01-12 07:44:42 +00001034 rec_len += 2;
Skip Montanarob4a04172003-03-20 23:29:12 +00001035 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001036 return rec_len;
Andrew McNamarac89f2842005-01-12 07:44:42 +00001037#undef ADDCH
Skip Montanarob4a04172003-03-20 23:29:12 +00001038}
1039
1040static int
1041join_check_rec_size(WriterObj *self, int rec_len)
1042{
1043 if (rec_len > self->rec_size) {
1044 if (self->rec_size == 0) {
1045 self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR;
Andrew McNamaradcfb38c2003-06-09 05:59:23 +00001046 if (self->rec != NULL)
1047 PyMem_Free(self->rec);
Skip Montanarob4a04172003-03-20 23:29:12 +00001048 self->rec = PyMem_Malloc(self->rec_size);
1049 }
1050 else {
1051 char *old_rec = self->rec;
1052
1053 self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR;
1054 self->rec = PyMem_Realloc(self->rec, self->rec_size);
1055 if (self->rec == NULL)
1056 PyMem_Free(old_rec);
1057 }
1058 if (self->rec == NULL) {
1059 PyErr_NoMemory();
1060 return 0;
1061 }
1062 }
1063 return 1;
1064}
1065
1066static int
1067join_append(WriterObj *self, char *field, int *quoted, int quote_empty)
1068{
1069 int rec_len;
1070
1071 rec_len = join_append_data(self, field, quote_empty, quoted, 0);
1072 if (rec_len < 0)
1073 return 0;
1074
1075 /* grow record buffer if necessary */
1076 if (!join_check_rec_size(self, rec_len))
1077 return 0;
1078
1079 self->rec_len = join_append_data(self, field, quote_empty, quoted, 1);
1080 self->num_fields++;
1081
1082 return 1;
1083}
1084
1085static int
1086join_append_lineterminator(WriterObj *self)
1087{
1088 int terminator_len;
Andrew McNamaracf0fd5a2005-01-12 01:16:35 +00001089 char *terminator;
Skip Montanarob4a04172003-03-20 23:29:12 +00001090
1091 terminator_len = PyString_Size(self->dialect->lineterminator);
1092
1093 /* grow record buffer if necessary */
1094 if (!join_check_rec_size(self, self->rec_len + terminator_len))
1095 return 0;
1096
Andrew McNamaracf0fd5a2005-01-12 01:16:35 +00001097 terminator = PyString_AsString(self->dialect->lineterminator);
1098 if (terminator == NULL)
1099 return 0;
1100 memmove(self->rec + self->rec_len, terminator, terminator_len);
Skip Montanarob4a04172003-03-20 23:29:12 +00001101 self->rec_len += terminator_len;
1102
1103 return 1;
1104}
1105
1106PyDoc_STRVAR(csv_writerow_doc,
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001107"writerow(sequence)\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001108"\n"
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001109"Construct and write a CSV record from a sequence of fields. Non-string\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001110"elements will be converted to string.");
1111
1112static PyObject *
1113csv_writerow(WriterObj *self, PyObject *seq)
1114{
1115 DialectObj *dialect = self->dialect;
1116 int len, i;
1117
1118 if (!PySequence_Check(seq))
1119 return PyErr_Format(error_obj, "sequence expected");
1120
1121 len = PySequence_Length(seq);
1122 if (len < 0)
1123 return NULL;
1124
1125 /* Join all fields in internal buffer.
1126 */
1127 join_reset(self);
1128 for (i = 0; i < len; i++) {
1129 PyObject *field;
1130 int append_ok;
1131 int quoted;
1132
1133 field = PySequence_GetItem(seq, i);
1134 if (field == NULL)
1135 return NULL;
1136
Andrew McNamarac89f2842005-01-12 07:44:42 +00001137 switch (dialect->quoting) {
1138 case QUOTE_NONNUMERIC:
1139 quoted = !PyNumber_Check(field);
1140 break;
1141 case QUOTE_ALL:
1142 quoted = 1;
1143 break;
1144 default:
1145 quoted = 0;
1146 break;
Skip Montanarob4a04172003-03-20 23:29:12 +00001147 }
1148
1149 if (PyString_Check(field)) {
Skip Montanaro577c7a72003-04-12 19:17:14 +00001150 append_ok = join_append(self,
1151 PyString_AS_STRING(field),
Skip Montanarob4a04172003-03-20 23:29:12 +00001152 &quoted, len == 1);
1153 Py_DECREF(field);
1154 }
1155 else if (field == Py_None) {
1156 append_ok = join_append(self, "", &quoted, len == 1);
1157 Py_DECREF(field);
1158 }
1159 else {
1160 PyObject *str;
1161
1162 str = PyObject_Str(field);
1163 Py_DECREF(field);
1164 if (str == NULL)
1165 return NULL;
1166
Skip Montanaro577c7a72003-04-12 19:17:14 +00001167 append_ok = join_append(self, PyString_AS_STRING(str),
Skip Montanarob4a04172003-03-20 23:29:12 +00001168 &quoted, len == 1);
1169 Py_DECREF(str);
1170 }
1171 if (!append_ok)
1172 return NULL;
1173 }
1174
1175 /* Add line terminator.
1176 */
1177 if (!join_append_lineterminator(self))
1178 return 0;
1179
1180 return PyObject_CallFunction(self->writeline,
1181 "(s#)", self->rec, self->rec_len);
1182}
1183
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001184PyDoc_STRVAR(csv_writerows_doc,
1185"writerows(sequence of sequences)\n"
1186"\n"
1187"Construct and write a series of sequences to a csv file. Non-string\n"
1188"elements will be converted to string.");
1189
Skip Montanarob4a04172003-03-20 23:29:12 +00001190static PyObject *
1191csv_writerows(WriterObj *self, PyObject *seqseq)
1192{
1193 PyObject *row_iter, *row_obj, *result;
1194
1195 row_iter = PyObject_GetIter(seqseq);
1196 if (row_iter == NULL) {
1197 PyErr_SetString(PyExc_TypeError,
Skip Montanaro98f16e02003-04-11 23:10:13 +00001198 "writerows() argument must be iterable");
Skip Montanarob4a04172003-03-20 23:29:12 +00001199 return NULL;
1200 }
1201 while ((row_obj = PyIter_Next(row_iter))) {
1202 result = csv_writerow(self, row_obj);
1203 Py_DECREF(row_obj);
1204 if (!result) {
1205 Py_DECREF(row_iter);
1206 return NULL;
1207 }
1208 else
1209 Py_DECREF(result);
1210 }
1211 Py_DECREF(row_iter);
1212 if (PyErr_Occurred())
1213 return NULL;
1214 Py_INCREF(Py_None);
1215 return Py_None;
1216}
1217
1218static struct PyMethodDef Writer_methods[] = {
1219 { "writerow", (PyCFunction)csv_writerow, METH_O, csv_writerow_doc},
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001220 { "writerows", (PyCFunction)csv_writerows, METH_O, csv_writerows_doc},
Skip Montanarob4a04172003-03-20 23:29:12 +00001221 { NULL, NULL }
1222};
1223
1224#define W_OFF(x) offsetof(WriterObj, x)
1225
1226static struct PyMemberDef Writer_memberlist[] = {
1227 { "dialect", T_OBJECT, W_OFF(dialect), RO },
1228 { NULL }
1229};
1230
1231static void
1232Writer_dealloc(WriterObj *self)
1233{
Andrew McNamara77ead872005-01-10 02:09:41 +00001234 PyObject_GC_UnTrack(self);
Skip Montanarob4a04172003-03-20 23:29:12 +00001235 Py_XDECREF(self->dialect);
1236 Py_XDECREF(self->writeline);
Andrew McNamaradcfb38c2003-06-09 05:59:23 +00001237 if (self->rec != NULL)
1238 PyMem_Free(self->rec);
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001239 PyObject_GC_Del(self);
1240}
1241
1242static int
1243Writer_traverse(WriterObj *self, visitproc visit, void *arg)
1244{
1245 int err;
1246#define VISIT(SLOT) \
1247 if (SLOT) { \
1248 err = visit((PyObject *)(SLOT), arg); \
1249 if (err) \
1250 return err; \
1251 }
1252 VISIT(self->dialect);
1253 VISIT(self->writeline);
1254 return 0;
1255}
1256
1257static int
1258Writer_clear(WriterObj *self)
1259{
Thomas Woutersedf17d82006-04-15 17:28:34 +00001260 Py_CLEAR(self->dialect);
1261 Py_CLEAR(self->writeline);
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001262 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001263}
1264
1265PyDoc_STRVAR(Writer_Type_doc,
1266"CSV writer\n"
1267"\n"
1268"Writer objects are responsible for generating tabular data\n"
1269"in CSV format from sequence input.\n"
1270);
1271
1272static PyTypeObject Writer_Type = {
1273 PyObject_HEAD_INIT(NULL)
1274 0, /*ob_size*/
1275 "_csv.writer", /*tp_name*/
1276 sizeof(WriterObj), /*tp_basicsize*/
1277 0, /*tp_itemsize*/
1278 /* methods */
1279 (destructor)Writer_dealloc, /*tp_dealloc*/
1280 (printfunc)0, /*tp_print*/
1281 (getattrfunc)0, /*tp_getattr*/
1282 (setattrfunc)0, /*tp_setattr*/
1283 (cmpfunc)0, /*tp_compare*/
1284 (reprfunc)0, /*tp_repr*/
1285 0, /*tp_as_number*/
1286 0, /*tp_as_sequence*/
1287 0, /*tp_as_mapping*/
1288 (hashfunc)0, /*tp_hash*/
1289 (ternaryfunc)0, /*tp_call*/
1290 (reprfunc)0, /*tp_str*/
1291 0, /*tp_getattro*/
1292 0, /*tp_setattro*/
1293 0, /*tp_as_buffer*/
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001294 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
1295 Py_TPFLAGS_HAVE_GC, /*tp_flags*/
Skip Montanarob4a04172003-03-20 23:29:12 +00001296 Writer_Type_doc,
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001297 (traverseproc)Writer_traverse, /*tp_traverse*/
1298 (inquiry)Writer_clear, /*tp_clear*/
Skip Montanarob4a04172003-03-20 23:29:12 +00001299 0, /*tp_richcompare*/
1300 0, /*tp_weaklistoffset*/
1301 (getiterfunc)0, /*tp_iter*/
1302 (getiterfunc)0, /*tp_iternext*/
1303 Writer_methods, /*tp_methods*/
1304 Writer_memberlist, /*tp_members*/
1305 0, /*tp_getset*/
1306};
1307
1308static PyObject *
1309csv_writer(PyObject *module, PyObject *args, PyObject *keyword_args)
1310{
Andrew McNamara91b97462005-01-11 01:07:23 +00001311 PyObject * output_file, * dialect = NULL;
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001312 WriterObj * self = PyObject_GC_New(WriterObj, &Writer_Type);
Skip Montanarob4a04172003-03-20 23:29:12 +00001313
1314 if (!self)
1315 return NULL;
1316
1317 self->dialect = NULL;
1318 self->writeline = NULL;
1319
1320 self->rec = NULL;
1321 self->rec_size = 0;
1322 self->rec_len = 0;
1323 self->num_fields = 0;
1324
Raymond Hettinger1761a7c2004-06-20 04:23:19 +00001325 if (!PyArg_UnpackTuple(args, "", 1, 2, &output_file, &dialect)) {
Skip Montanarob4a04172003-03-20 23:29:12 +00001326 Py_DECREF(self);
1327 return NULL;
1328 }
1329 self->writeline = PyObject_GetAttrString(output_file, "write");
1330 if (self->writeline == NULL || !PyCallable_Check(self->writeline)) {
1331 PyErr_SetString(PyExc_TypeError,
Andrew McNamara5cfd8372005-01-12 11:39:50 +00001332 "argument 1 must have a \"write\" method");
Skip Montanarob4a04172003-03-20 23:29:12 +00001333 Py_DECREF(self);
1334 return NULL;
1335 }
Andrew McNamara91b97462005-01-11 01:07:23 +00001336 self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args);
Skip Montanarob4a04172003-03-20 23:29:12 +00001337 if (self->dialect == NULL) {
1338 Py_DECREF(self);
1339 return NULL;
1340 }
Andrew McNamara77ead872005-01-10 02:09:41 +00001341 PyObject_GC_Track(self);
Skip Montanarob4a04172003-03-20 23:29:12 +00001342 return (PyObject *)self;
1343}
1344
1345/*
1346 * DIALECT REGISTRY
1347 */
1348static PyObject *
1349csv_list_dialects(PyObject *module, PyObject *args)
1350{
1351 return PyDict_Keys(dialects);
1352}
1353
1354static PyObject *
Andrew McNamara86625972005-01-11 01:28:33 +00001355csv_register_dialect(PyObject *module, PyObject *args, PyObject *kwargs)
Skip Montanarob4a04172003-03-20 23:29:12 +00001356{
Andrew McNamara86625972005-01-11 01:28:33 +00001357 PyObject *name_obj, *dialect_obj = NULL;
1358 PyObject *dialect;
Skip Montanarob4a04172003-03-20 23:29:12 +00001359
Andrew McNamara86625972005-01-11 01:28:33 +00001360 if (!PyArg_UnpackTuple(args, "", 1, 2, &name_obj, &dialect_obj))
Skip Montanarob4a04172003-03-20 23:29:12 +00001361 return NULL;
Andrew McNamara37d2bdf2005-01-10 12:22:48 +00001362 if (!IS_BASESTRING(name_obj)) {
Skip Montanarob4a04172003-03-20 23:29:12 +00001363 PyErr_SetString(PyExc_TypeError,
1364 "dialect name must be a string or unicode");
1365 return NULL;
1366 }
Andrew McNamara86625972005-01-11 01:28:33 +00001367 dialect = _call_dialect(dialect_obj, kwargs);
1368 if (dialect == NULL)
1369 return NULL;
1370 if (PyDict_SetItem(dialects, name_obj, dialect) < 0) {
1371 Py_DECREF(dialect);
Skip Montanarob4a04172003-03-20 23:29:12 +00001372 return NULL;
1373 }
Andrew McNamara86625972005-01-11 01:28:33 +00001374 Py_DECREF(dialect);
Skip Montanarob4a04172003-03-20 23:29:12 +00001375 Py_INCREF(Py_None);
1376 return Py_None;
1377}
1378
1379static PyObject *
Skip Montanaro577c7a72003-04-12 19:17:14 +00001380csv_unregister_dialect(PyObject *module, PyObject *name_obj)
Skip Montanarob4a04172003-03-20 23:29:12 +00001381{
Skip Montanarob4a04172003-03-20 23:29:12 +00001382 if (PyDict_DelItem(dialects, name_obj) < 0)
1383 return PyErr_Format(error_obj, "unknown dialect");
1384 Py_INCREF(Py_None);
1385 return Py_None;
1386}
1387
1388static PyObject *
Skip Montanaro577c7a72003-04-12 19:17:14 +00001389csv_get_dialect(PyObject *module, PyObject *name_obj)
Skip Montanarob4a04172003-03-20 23:29:12 +00001390{
Skip Montanarob4a04172003-03-20 23:29:12 +00001391 return get_dialect_from_registry(name_obj);
1392}
1393
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001394static PyObject *
Andrew McNamara31d88962005-01-12 03:45:10 +00001395csv_field_size_limit(PyObject *module, PyObject *args)
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001396{
1397 PyObject *new_limit = NULL;
1398 long old_limit = field_limit;
1399
Andrew McNamara31d88962005-01-12 03:45:10 +00001400 if (!PyArg_UnpackTuple(args, "field_size_limit", 0, 1, &new_limit))
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001401 return NULL;
1402 if (new_limit != NULL) {
1403 if (!PyInt_Check(new_limit)) {
1404 PyErr_Format(PyExc_TypeError,
1405 "limit must be an integer");
1406 return NULL;
1407 }
1408 field_limit = PyInt_AsLong(new_limit);
1409 }
1410 return PyInt_FromLong(old_limit);
1411}
1412
Skip Montanarob4a04172003-03-20 23:29:12 +00001413/*
1414 * MODULE
1415 */
1416
1417PyDoc_STRVAR(csv_module_doc,
1418"CSV parsing and writing.\n"
1419"\n"
1420"This module provides classes that assist in the reading and writing\n"
1421"of Comma Separated Value (CSV) files, and implements the interface\n"
1422"described by PEP 305. Although many CSV files are simple to parse,\n"
1423"the format is not formally defined by a stable specification and\n"
1424"is subtle enough that parsing lines of a CSV file with something\n"
1425"like line.split(\",\") is bound to fail. The module supports three\n"
1426"basic APIs: reading, writing, and registration of dialects.\n"
1427"\n"
1428"\n"
1429"DIALECT REGISTRATION:\n"
1430"\n"
1431"Readers and writers support a dialect argument, which is a convenient\n"
1432"handle on a group of settings. When the dialect argument is a string,\n"
1433"it identifies one of the dialects previously registered with the module.\n"
1434"If it is a class or instance, the attributes of the argument are used as\n"
1435"the settings for the reader or writer:\n"
1436"\n"
1437" class excel:\n"
1438" delimiter = ','\n"
1439" quotechar = '\"'\n"
1440" escapechar = None\n"
1441" doublequote = True\n"
1442" skipinitialspace = False\n"
Johannes Gijsbers8d3b9dd2004-08-15 12:23:10 +00001443" lineterminator = '\\r\\n'\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001444" quoting = QUOTE_MINIMAL\n"
1445"\n"
1446"SETTINGS:\n"
1447"\n"
1448" * quotechar - specifies a one-character string to use as the \n"
1449" quoting character. It defaults to '\"'.\n"
1450" * delimiter - specifies a one-character string to use as the \n"
1451" field separator. It defaults to ','.\n"
1452" * skipinitialspace - specifies how to interpret whitespace which\n"
1453" immediately follows a delimiter. It defaults to False, which\n"
1454" means that whitespace immediately following a delimiter is part\n"
1455" of the following field.\n"
1456" * lineterminator - specifies the character sequence which should \n"
1457" terminate rows.\n"
1458" * quoting - controls when quotes should be generated by the writer.\n"
1459" It can take on any of the following module constants:\n"
1460"\n"
1461" csv.QUOTE_MINIMAL means only when required, for example, when a\n"
1462" field contains either the quotechar or the delimiter\n"
1463" csv.QUOTE_ALL means that quotes are always placed around fields.\n"
1464" csv.QUOTE_NONNUMERIC means that quotes are always placed around\n"
Skip Montanaro148eb6a2003-12-02 18:57:47 +00001465" fields which do not parse as integers or floating point\n"
1466" numbers.\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001467" csv.QUOTE_NONE means that quotes are never placed around fields.\n"
1468" * escapechar - specifies a one-character string used to escape \n"
1469" the delimiter when quoting is set to QUOTE_NONE.\n"
1470" * doublequote - controls the handling of quotes inside fields. When\n"
1471" True, two consecutive quotes are interpreted as one during read,\n"
1472" and when writing, each quote character embedded in the data is\n"
1473" written as two quotes\n");
1474
1475PyDoc_STRVAR(csv_reader_doc,
1476" csv_reader = reader(iterable [, dialect='excel']\n"
1477" [optional keyword args])\n"
1478" for row in csv_reader:\n"
1479" process(row)\n"
1480"\n"
1481"The \"iterable\" argument can be any object that returns a line\n"
1482"of input for each iteration, such as a file object or a list. The\n"
1483"optional \"dialect\" parameter is discussed below. The function\n"
1484"also accepts optional keyword arguments which override settings\n"
1485"provided by the dialect.\n"
1486"\n"
1487"The returned object is an iterator. Each iteration returns a row\n"
Johannes Gijsbers8d3b9dd2004-08-15 12:23:10 +00001488"of the CSV file (which can span multiple input lines):\n");
Skip Montanarob4a04172003-03-20 23:29:12 +00001489
1490PyDoc_STRVAR(csv_writer_doc,
1491" csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1492" [optional keyword args])\n"
Fredrik Lundh4aaaa492006-04-04 16:51:13 +00001493" for row in sequence:\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001494" csv_writer.writerow(row)\n"
1495"\n"
1496" [or]\n"
1497"\n"
1498" csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1499" [optional keyword args])\n"
1500" csv_writer.writerows(rows)\n"
1501"\n"
1502"The \"fileobj\" argument can be any object that supports the file API.\n");
1503
1504PyDoc_STRVAR(csv_list_dialects_doc,
1505"Return a list of all know dialect names.\n"
1506" names = csv.list_dialects()");
1507
1508PyDoc_STRVAR(csv_get_dialect_doc,
1509"Return the dialect instance associated with name.\n"
1510" dialect = csv.get_dialect(name)");
1511
1512PyDoc_STRVAR(csv_register_dialect_doc,
1513"Create a mapping from a string name to a dialect class.\n"
1514" dialect = csv.register_dialect(name, dialect)");
1515
1516PyDoc_STRVAR(csv_unregister_dialect_doc,
1517"Delete the name/dialect mapping associated with a string name.\n"
1518" csv.unregister_dialect(name)");
1519
Andrew McNamara31d88962005-01-12 03:45:10 +00001520PyDoc_STRVAR(csv_field_size_limit_doc,
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001521"Sets an upper limit on parsed fields.\n"
Andrew McNamara31d88962005-01-12 03:45:10 +00001522" csv.field_size_limit([limit])\n"
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001523"\n"
1524"Returns old limit. If limit is not given, no new limit is set and\n"
1525"the old limit is returned");
1526
Skip Montanarob4a04172003-03-20 23:29:12 +00001527static struct PyMethodDef csv_methods[] = {
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001528 { "reader", (PyCFunction)csv_reader,
1529 METH_VARARGS | METH_KEYWORDS, csv_reader_doc},
1530 { "writer", (PyCFunction)csv_writer,
1531 METH_VARARGS | METH_KEYWORDS, csv_writer_doc},
1532 { "list_dialects", (PyCFunction)csv_list_dialects,
1533 METH_NOARGS, csv_list_dialects_doc},
1534 { "register_dialect", (PyCFunction)csv_register_dialect,
Andrew McNamara86625972005-01-11 01:28:33 +00001535 METH_VARARGS | METH_KEYWORDS, csv_register_dialect_doc},
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001536 { "unregister_dialect", (PyCFunction)csv_unregister_dialect,
1537 METH_O, csv_unregister_dialect_doc},
1538 { "get_dialect", (PyCFunction)csv_get_dialect,
1539 METH_O, csv_get_dialect_doc},
Andrew McNamara31d88962005-01-12 03:45:10 +00001540 { "field_size_limit", (PyCFunction)csv_field_size_limit,
1541 METH_VARARGS, csv_field_size_limit_doc},
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001542 { NULL, NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +00001543};
1544
1545PyMODINIT_FUNC
1546init_csv(void)
1547{
1548 PyObject *module;
Skip Montanarob4a04172003-03-20 23:29:12 +00001549 StyleDesc *style;
1550
1551 if (PyType_Ready(&Dialect_Type) < 0)
1552 return;
1553
1554 if (PyType_Ready(&Reader_Type) < 0)
1555 return;
1556
1557 if (PyType_Ready(&Writer_Type) < 0)
1558 return;
1559
1560 /* Create the module and add the functions */
1561 module = Py_InitModule3("_csv", csv_methods, csv_module_doc);
1562 if (module == NULL)
1563 return;
1564
1565 /* Add version to the module. */
Skip Montanaro7b01a832003-04-12 19:23:46 +00001566 if (PyModule_AddStringConstant(module, "__version__",
1567 MODULE_VERSION) == -1)
Skip Montanarob4a04172003-03-20 23:29:12 +00001568 return;
1569
1570 /* Add _dialects dictionary */
1571 dialects = PyDict_New();
1572 if (dialects == NULL)
1573 return;
1574 if (PyModule_AddObject(module, "_dialects", dialects))
1575 return;
1576
1577 /* Add quote styles into dictionary */
1578 for (style = quote_styles; style->name; style++) {
Skip Montanaro7b01a832003-04-12 19:23:46 +00001579 if (PyModule_AddIntConstant(module, style->name,
1580 style->style) == -1)
Skip Montanarob4a04172003-03-20 23:29:12 +00001581 return;
1582 }
1583
1584 /* Add the Dialect type */
Skip Montanaro32c5d422005-06-15 13:35:08 +00001585 Py_INCREF(&Dialect_Type);
Skip Montanarob4a04172003-03-20 23:29:12 +00001586 if (PyModule_AddObject(module, "Dialect", (PyObject *)&Dialect_Type))
1587 return;
1588
1589 /* Add the CSV exception object to the module. */
1590 error_obj = PyErr_NewException("_csv.Error", NULL, NULL);
1591 if (error_obj == NULL)
1592 return;
1593 PyModule_AddObject(module, "Error", error_obj);
1594}