blob: da5ae0d238d6774b9a46f73d3e43b50591e0bfa2 [file] [log] [blame]
Skip Montanaroa16b21f2003-03-23 14:32:54 +00001/* csv module */
2
3/*
4
5This module provides the low-level underpinnings of a CSV reading/writing
6module. Users should not use this module directly, but import the csv.py
7module instead.
8
9**** For people modifying this code, please note that as of this writing
Skip Montanarodfa35fa2003-04-11 21:40:01 +000010**** (2003-03-23), it is intended that this code should work with Python
Skip Montanaroa16b21f2003-03-23 14:32:54 +000011**** 2.2.
12
Skip Montanarob4a04172003-03-20 23:29:12 +000013*/
14
Skip Montanaro7b01a832003-04-12 19:23:46 +000015#define MODULE_VERSION "1.0"
16
Skip Montanarob4a04172003-03-20 23:29:12 +000017#include "Python.h"
18#include "structmember.h"
19
Skip Montanaroa16b21f2003-03-23 14:32:54 +000020
Skip Montanarob4a04172003-03-20 23:29:12 +000021/* begin 2.2 compatibility macros */
22#ifndef PyDoc_STRVAR
23/* Define macros for inline documentation. */
24#define PyDoc_VAR(name) static char name[]
25#define PyDoc_STRVAR(name,str) PyDoc_VAR(name) = PyDoc_STR(str)
26#ifdef WITH_DOC_STRINGS
27#define PyDoc_STR(str) str
28#else
29#define PyDoc_STR(str) ""
30#endif
31#endif /* ifndef PyDoc_STRVAR */
32
33#ifndef PyMODINIT_FUNC
34# if defined(__cplusplus)
35# define PyMODINIT_FUNC extern "C" void
36# else /* __cplusplus */
37# define PyMODINIT_FUNC void
38# endif /* __cplusplus */
39#endif
40/* end 2.2 compatibility macros */
41
Andrew McNamara37d2bdf2005-01-10 12:22:48 +000042#define IS_BASESTRING(o) \
43 PyObject_TypeCheck(o, &PyBaseString_Type)
44
Skip Montanarob4a04172003-03-20 23:29:12 +000045static PyObject *error_obj; /* CSV exception */
46static PyObject *dialects; /* Dialect registry */
Andrew McNamarae4d05c42005-01-11 07:32:02 +000047static long field_limit = 128 * 1024; /* max parsed field size */
Skip Montanarob4a04172003-03-20 23:29:12 +000048
49typedef enum {
50 START_RECORD, START_FIELD, ESCAPED_CHAR, IN_FIELD,
Andrew McNamaraf69d94f2005-01-13 11:30:54 +000051 IN_QUOTED_FIELD, ESCAPE_IN_QUOTED_FIELD, QUOTE_IN_QUOTED_FIELD,
52 EAT_CRNL
Skip Montanarob4a04172003-03-20 23:29:12 +000053} ParserState;
54
55typedef enum {
56 QUOTE_MINIMAL, QUOTE_ALL, QUOTE_NONNUMERIC, QUOTE_NONE
57} QuoteStyle;
58
59typedef struct {
60 QuoteStyle style;
61 char *name;
62} StyleDesc;
63
64static StyleDesc quote_styles[] = {
65 { QUOTE_MINIMAL, "QUOTE_MINIMAL" },
66 { QUOTE_ALL, "QUOTE_ALL" },
67 { QUOTE_NONNUMERIC, "QUOTE_NONNUMERIC" },
68 { QUOTE_NONE, "QUOTE_NONE" },
69 { 0 }
70};
71
72typedef struct {
73 PyObject_HEAD
74
75 int doublequote; /* is " represented by ""? */
76 char delimiter; /* field separator */
77 char quotechar; /* quote character */
78 char escapechar; /* escape character */
79 int skipinitialspace; /* ignore spaces following delimiter? */
80 PyObject *lineterminator; /* string to write between records */
Andrew McNamara1196cf12005-01-07 04:42:45 +000081 int quoting; /* style of quoting to write */
Skip Montanarob4a04172003-03-20 23:29:12 +000082
83 int strict; /* raise exception on bad CSV */
84} DialectObj;
85
86staticforward PyTypeObject Dialect_Type;
87
88typedef struct {
89 PyObject_HEAD
90
91 PyObject *input_iter; /* iterate over this for input lines */
92
93 DialectObj *dialect; /* parsing dialect */
94
95 PyObject *fields; /* field list for current record */
96 ParserState state; /* current CSV parse state */
97 char *field; /* build current field in here */
98 int field_size; /* size of allocated buffer */
99 int field_len; /* length of current field */
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000100 int numeric_field; /* treat field as numeric */
Andrew McNamara7f2053e2005-01-12 11:17:16 +0000101 unsigned long line_num; /* Source-file line number */
Skip Montanarob4a04172003-03-20 23:29:12 +0000102} ReaderObj;
103
104staticforward PyTypeObject Reader_Type;
105
106#define ReaderObject_Check(v) ((v)->ob_type == &Reader_Type)
107
108typedef struct {
109 PyObject_HEAD
110
111 PyObject *writeline; /* write output lines to this file */
112
113 DialectObj *dialect; /* parsing dialect */
114
115 char *rec; /* buffer for parser.join */
116 int rec_size; /* size of allocated record */
117 int rec_len; /* length of record */
118 int num_fields; /* number of fields in record */
119} WriterObj;
120
121staticforward PyTypeObject Writer_Type;
122
123/*
124 * DIALECT class
125 */
126
127static PyObject *
128get_dialect_from_registry(PyObject * name_obj)
129{
130 PyObject *dialect_obj;
131
132 dialect_obj = PyDict_GetItem(dialects, name_obj);
Andrew McNamaradbce2612005-01-10 23:17:35 +0000133 if (dialect_obj == NULL) {
134 if (!PyErr_Occurred())
135 PyErr_Format(error_obj, "unknown dialect");
136 }
137 else
138 Py_INCREF(dialect_obj);
Skip Montanarob4a04172003-03-20 23:29:12 +0000139 return dialect_obj;
140}
141
Skip Montanarob4a04172003-03-20 23:29:12 +0000142static PyObject *
143get_string(PyObject *str)
144{
145 Py_XINCREF(str);
146 return str;
147}
148
Skip Montanarob4a04172003-03-20 23:29:12 +0000149static PyObject *
150get_nullchar_as_None(char c)
151{
152 if (c == '\0') {
153 Py_INCREF(Py_None);
154 return Py_None;
155 }
156 else
157 return PyString_FromStringAndSize((char*)&c, 1);
158}
159
Skip Montanarob4a04172003-03-20 23:29:12 +0000160static PyObject *
161Dialect_get_lineterminator(DialectObj *self)
162{
163 return get_string(self->lineterminator);
164}
165
Skip Montanarob4a04172003-03-20 23:29:12 +0000166static PyObject *
167Dialect_get_escapechar(DialectObj *self)
168{
169 return get_nullchar_as_None(self->escapechar);
170}
171
Andrew McNamara1196cf12005-01-07 04:42:45 +0000172static PyObject *
173Dialect_get_quotechar(DialectObj *self)
Skip Montanarob4a04172003-03-20 23:29:12 +0000174{
Andrew McNamara1196cf12005-01-07 04:42:45 +0000175 return get_nullchar_as_None(self->quotechar);
Skip Montanarob4a04172003-03-20 23:29:12 +0000176}
177
178static PyObject *
179Dialect_get_quoting(DialectObj *self)
180{
181 return PyInt_FromLong(self->quoting);
182}
183
184static int
Andrew McNamara1196cf12005-01-07 04:42:45 +0000185_set_bool(const char *name, int *target, PyObject *src, int dflt)
Skip Montanarob4a04172003-03-20 23:29:12 +0000186{
Andrew McNamara1196cf12005-01-07 04:42:45 +0000187 if (src == NULL)
188 *target = dflt;
189 else
190 *target = PyObject_IsTrue(src);
191 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000192}
193
Andrew McNamara1196cf12005-01-07 04:42:45 +0000194static int
195_set_int(const char *name, int *target, PyObject *src, int dflt)
196{
197 if (src == NULL)
198 *target = dflt;
199 else {
200 if (!PyInt_Check(src)) {
201 PyErr_Format(PyExc_TypeError,
202 "\"%s\" must be an integer", name);
203 return -1;
204 }
205 *target = PyInt_AsLong(src);
206 }
207 return 0;
208}
209
210static int
211_set_char(const char *name, char *target, PyObject *src, char dflt)
212{
213 if (src == NULL)
214 *target = dflt;
215 else {
Andrew McNamaraa8292632005-01-10 12:25:11 +0000216 if (src == Py_None || PyString_Size(src) == 0)
Andrew McNamara1196cf12005-01-07 04:42:45 +0000217 *target = '\0';
218 else if (!PyString_Check(src) || PyString_Size(src) != 1) {
219 PyErr_Format(PyExc_TypeError,
220 "\"%s\" must be an 1-character string",
221 name);
222 return -1;
223 }
224 else {
225 char *s = PyString_AsString(src);
226 if (s == NULL)
227 return -1;
228 *target = s[0];
229 }
230 }
231 return 0;
232}
233
234static int
235_set_str(const char *name, PyObject **target, PyObject *src, const char *dflt)
236{
237 if (src == NULL)
238 *target = PyString_FromString(dflt);
239 else {
240 if (src == Py_None)
241 *target = NULL;
Andrew McNamara37d2bdf2005-01-10 12:22:48 +0000242 else if (!IS_BASESTRING(src)) {
Andrew McNamara1196cf12005-01-07 04:42:45 +0000243 PyErr_Format(PyExc_TypeError,
244 "\"%s\" must be an string", name);
245 return -1;
Andrew McNamaradd3e6cb2005-01-07 06:46:50 +0000246 }
247 else {
Andrew McNamara1196cf12005-01-07 04:42:45 +0000248 Py_XDECREF(*target);
249 Py_INCREF(src);
250 *target = src;
251 }
252 }
253 return 0;
254}
255
256static int
257dialect_check_quoting(int quoting)
258{
259 StyleDesc *qs = quote_styles;
260
261 for (qs = quote_styles; qs->name; qs++) {
262 if (qs->style == quoting)
263 return 0;
264 }
265 PyErr_Format(PyExc_TypeError, "bad \"quoting\" value");
266 return -1;
267}
Skip Montanarob4a04172003-03-20 23:29:12 +0000268
269#define D_OFF(x) offsetof(DialectObj, x)
270
271static struct PyMemberDef Dialect_memberlist[] = {
Andrew McNamara1196cf12005-01-07 04:42:45 +0000272 { "delimiter", T_CHAR, D_OFF(delimiter), READONLY },
273 { "skipinitialspace", T_INT, D_OFF(skipinitialspace), READONLY },
274 { "doublequote", T_INT, D_OFF(doublequote), READONLY },
275 { "strict", T_INT, D_OFF(strict), READONLY },
Skip Montanarob4a04172003-03-20 23:29:12 +0000276 { NULL }
277};
278
279static PyGetSetDef Dialect_getsetlist[] = {
Andrew McNamara1196cf12005-01-07 04:42:45 +0000280 { "escapechar", (getter)Dialect_get_escapechar},
281 { "lineterminator", (getter)Dialect_get_lineterminator},
282 { "quotechar", (getter)Dialect_get_quotechar},
283 { "quoting", (getter)Dialect_get_quoting},
284 {NULL},
Skip Montanarob4a04172003-03-20 23:29:12 +0000285};
286
287static void
288Dialect_dealloc(DialectObj *self)
289{
290 Py_XDECREF(self->lineterminator);
Skip Montanarob4a04172003-03-20 23:29:12 +0000291 self->ob_type->tp_free((PyObject *)self);
292}
293
Martin v. Löwis02cbf4a2006-02-27 17:20:04 +0000294static char *dialect_kws[] = {
Andrew McNamara1196cf12005-01-07 04:42:45 +0000295 "dialect",
296 "delimiter",
297 "doublequote",
298 "escapechar",
299 "lineterminator",
300 "quotechar",
301 "quoting",
302 "skipinitialspace",
303 "strict",
304 NULL
305};
306
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000307static PyObject *
308dialect_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
Skip Montanarob4a04172003-03-20 23:29:12 +0000309{
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000310 DialectObj *self;
311 PyObject *ret = NULL;
312 PyObject *dialect = NULL;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000313 PyObject *delimiter = NULL;
314 PyObject *doublequote = NULL;
315 PyObject *escapechar = NULL;
316 PyObject *lineterminator = NULL;
317 PyObject *quotechar = NULL;
318 PyObject *quoting = NULL;
319 PyObject *skipinitialspace = NULL;
320 PyObject *strict = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000321
Andrew McNamara1196cf12005-01-07 04:42:45 +0000322 if (!PyArg_ParseTupleAndKeywords(args, kwargs,
323 "|OOOOOOOOO", dialect_kws,
324 &dialect,
325 &delimiter,
326 &doublequote,
327 &escapechar,
328 &lineterminator,
329 &quotechar,
330 &quoting,
331 &skipinitialspace,
332 &strict))
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000333 return NULL;
334
335 if (dialect != NULL) {
336 if (IS_BASESTRING(dialect)) {
337 dialect = get_dialect_from_registry(dialect);
338 if (dialect == NULL)
339 return NULL;
340 }
341 else
342 Py_INCREF(dialect);
343 /* Can we reuse this instance? */
344 if (PyObject_TypeCheck(dialect, &Dialect_Type) &&
345 delimiter == 0 &&
346 doublequote == 0 &&
347 escapechar == 0 &&
348 lineterminator == 0 &&
349 quotechar == 0 &&
350 quoting == 0 &&
351 skipinitialspace == 0 &&
352 strict == 0)
353 return dialect;
354 }
355
356 self = (DialectObj *)type->tp_alloc(type, 0);
357 if (self == NULL) {
358 Py_XDECREF(dialect);
359 return NULL;
360 }
361 self->lineterminator = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000362
Andrew McNamara1196cf12005-01-07 04:42:45 +0000363 Py_XINCREF(delimiter);
364 Py_XINCREF(doublequote);
365 Py_XINCREF(escapechar);
366 Py_XINCREF(lineterminator);
367 Py_XINCREF(quotechar);
368 Py_XINCREF(quoting);
369 Py_XINCREF(skipinitialspace);
370 Py_XINCREF(strict);
371 if (dialect != NULL) {
Andrew McNamara1196cf12005-01-07 04:42:45 +0000372#define DIALECT_GETATTR(v, n) \
373 if (v == NULL) \
374 v = PyObject_GetAttrString(dialect, n)
Andrew McNamara1196cf12005-01-07 04:42:45 +0000375 DIALECT_GETATTR(delimiter, "delimiter");
376 DIALECT_GETATTR(doublequote, "doublequote");
377 DIALECT_GETATTR(escapechar, "escapechar");
378 DIALECT_GETATTR(lineterminator, "lineterminator");
379 DIALECT_GETATTR(quotechar, "quotechar");
380 DIALECT_GETATTR(quoting, "quoting");
381 DIALECT_GETATTR(skipinitialspace, "skipinitialspace");
382 DIALECT_GETATTR(strict, "strict");
383 PyErr_Clear();
Andrew McNamara1196cf12005-01-07 04:42:45 +0000384 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000385
Andrew McNamara1196cf12005-01-07 04:42:45 +0000386 /* check types and convert to C values */
387#define DIASET(meth, name, target, src, dflt) \
388 if (meth(name, target, src, dflt)) \
389 goto err
390 DIASET(_set_char, "delimiter", &self->delimiter, delimiter, ',');
391 DIASET(_set_bool, "doublequote", &self->doublequote, doublequote, 1);
392 DIASET(_set_char, "escapechar", &self->escapechar, escapechar, 0);
393 DIASET(_set_str, "lineterminator", &self->lineterminator, lineterminator, "\r\n");
394 DIASET(_set_char, "quotechar", &self->quotechar, quotechar, '"');
395 DIASET(_set_int, "quoting", &self->quoting, quoting, QUOTE_MINIMAL);
396 DIASET(_set_bool, "skipinitialspace", &self->skipinitialspace, skipinitialspace, 0);
397 DIASET(_set_bool, "strict", &self->strict, strict, 0);
Skip Montanarob4a04172003-03-20 23:29:12 +0000398
Andrew McNamara1196cf12005-01-07 04:42:45 +0000399 /* validate options */
400 if (dialect_check_quoting(self->quoting))
401 goto err;
402 if (self->delimiter == 0) {
403 PyErr_SetString(PyExc_TypeError, "delimiter must be set");
404 goto err;
405 }
Andrew McNamara5d45a8d2005-01-12 08:16:17 +0000406 if (quotechar == Py_None && quoting == NULL)
Andrew McNamara1196cf12005-01-07 04:42:45 +0000407 self->quoting = QUOTE_NONE;
408 if (self->quoting != QUOTE_NONE && self->quotechar == 0) {
409 PyErr_SetString(PyExc_TypeError,
410 "quotechar must be set if quoting enabled");
411 goto err;
412 }
413 if (self->lineterminator == 0) {
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000414 PyErr_SetString(PyExc_TypeError, "lineterminator must be set");
Andrew McNamara1196cf12005-01-07 04:42:45 +0000415 goto err;
416 }
417
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000418 ret = (PyObject *)self;
Skip Montanarod60fbd42005-06-15 01:33:30 +0000419 Py_INCREF(self);
Andrew McNamara1196cf12005-01-07 04:42:45 +0000420err:
Skip Montanarod60fbd42005-06-15 01:33:30 +0000421 Py_XDECREF(self);
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000422 Py_XDECREF(dialect);
Andrew McNamara1196cf12005-01-07 04:42:45 +0000423 Py_XDECREF(delimiter);
424 Py_XDECREF(doublequote);
425 Py_XDECREF(escapechar);
426 Py_XDECREF(lineterminator);
427 Py_XDECREF(quotechar);
428 Py_XDECREF(quoting);
429 Py_XDECREF(skipinitialspace);
430 Py_XDECREF(strict);
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000431 return ret;
Skip Montanarob4a04172003-03-20 23:29:12 +0000432}
433
434
435PyDoc_STRVAR(Dialect_Type_doc,
436"CSV dialect\n"
437"\n"
438"The Dialect type records CSV parsing and generation options.\n");
439
440static PyTypeObject Dialect_Type = {
441 PyObject_HEAD_INIT(NULL)
442 0, /* ob_size */
443 "_csv.Dialect", /* tp_name */
444 sizeof(DialectObj), /* tp_basicsize */
445 0, /* tp_itemsize */
446 /* methods */
447 (destructor)Dialect_dealloc, /* tp_dealloc */
448 (printfunc)0, /* tp_print */
449 (getattrfunc)0, /* tp_getattr */
450 (setattrfunc)0, /* tp_setattr */
451 (cmpfunc)0, /* tp_compare */
452 (reprfunc)0, /* tp_repr */
453 0, /* tp_as_number */
454 0, /* tp_as_sequence */
455 0, /* tp_as_mapping */
456 (hashfunc)0, /* tp_hash */
457 (ternaryfunc)0, /* tp_call */
458 (reprfunc)0, /* tp_str */
459 0, /* tp_getattro */
460 0, /* tp_setattro */
461 0, /* tp_as_buffer */
462 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
463 Dialect_Type_doc, /* tp_doc */
464 0, /* tp_traverse */
465 0, /* tp_clear */
466 0, /* tp_richcompare */
467 0, /* tp_weaklistoffset */
468 0, /* tp_iter */
469 0, /* tp_iternext */
Andrew McNamara1196cf12005-01-07 04:42:45 +0000470 0, /* tp_methods */
Skip Montanarob4a04172003-03-20 23:29:12 +0000471 Dialect_memberlist, /* tp_members */
472 Dialect_getsetlist, /* tp_getset */
473 0, /* tp_base */
474 0, /* tp_dict */
475 0, /* tp_descr_get */
476 0, /* tp_descr_set */
477 0, /* tp_dictoffset */
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000478 0, /* tp_init */
479 0, /* tp_alloc */
Skip Montanarob4a04172003-03-20 23:29:12 +0000480 dialect_new, /* tp_new */
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000481 0, /* tp_free */
Skip Montanarob4a04172003-03-20 23:29:12 +0000482};
483
Andrew McNamara91b97462005-01-11 01:07:23 +0000484/*
485 * Return an instance of the dialect type, given a Python instance or kwarg
486 * description of the dialect
487 */
488static PyObject *
489_call_dialect(PyObject *dialect_inst, PyObject *kwargs)
490{
491 PyObject *ctor_args;
492 PyObject *dialect;
493
494 ctor_args = Py_BuildValue(dialect_inst ? "(O)" : "()", dialect_inst);
495 if (ctor_args == NULL)
496 return NULL;
497 dialect = PyObject_Call((PyObject *)&Dialect_Type, ctor_args, kwargs);
498 Py_DECREF(ctor_args);
499 return dialect;
500}
501
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000502/*
503 * READER
504 */
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000505static int
Skip Montanarob4a04172003-03-20 23:29:12 +0000506parse_save_field(ReaderObj *self)
507{
508 PyObject *field;
509
510 field = PyString_FromStringAndSize(self->field, self->field_len);
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000511 if (field == NULL)
512 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000513 self->field_len = 0;
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000514 if (self->numeric_field) {
515 PyObject *tmp;
516
517 self->numeric_field = 0;
518 tmp = PyNumber_Float(field);
519 if (tmp == NULL) {
520 Py_DECREF(field);
521 return -1;
522 }
523 Py_DECREF(field);
524 field = tmp;
525 }
526 PyList_Append(self->fields, field);
527 Py_DECREF(field);
528 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000529}
530
531static int
532parse_grow_buff(ReaderObj *self)
533{
534 if (self->field_size == 0) {
535 self->field_size = 4096;
Andrew McNamaradcfb38c2003-06-09 05:59:23 +0000536 if (self->field != NULL)
537 PyMem_Free(self->field);
Skip Montanarob4a04172003-03-20 23:29:12 +0000538 self->field = PyMem_Malloc(self->field_size);
539 }
540 else {
541 self->field_size *= 2;
542 self->field = PyMem_Realloc(self->field, self->field_size);
543 }
544 if (self->field == NULL) {
545 PyErr_NoMemory();
546 return 0;
547 }
548 return 1;
549}
550
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000551static int
Skip Montanarob4a04172003-03-20 23:29:12 +0000552parse_add_char(ReaderObj *self, char c)
553{
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000554 if (self->field_len >= field_limit) {
555 PyErr_Format(error_obj, "field larger than field limit (%ld)",
556 field_limit);
557 return -1;
558 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000559 if (self->field_len == self->field_size && !parse_grow_buff(self))
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000560 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000561 self->field[self->field_len++] = c;
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000562 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000563}
564
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000565static int
Skip Montanarob4a04172003-03-20 23:29:12 +0000566parse_process_char(ReaderObj *self, char c)
567{
568 DialectObj *dialect = self->dialect;
569
570 switch (self->state) {
571 case START_RECORD:
572 /* start of record */
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000573 if (c == '\0')
Skip Montanarob4a04172003-03-20 23:29:12 +0000574 /* empty line - return [] */
575 break;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000576 else if (c == '\n' || c == '\r') {
577 self->state = EAT_CRNL;
578 break;
579 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000580 /* normal character - handle as START_FIELD */
581 self->state = START_FIELD;
582 /* fallthru */
583 case START_FIELD:
584 /* expecting field */
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000585 if (c == '\n' || c == '\r' || c == '\0') {
Skip Montanarob4a04172003-03-20 23:29:12 +0000586 /* save empty field - return [fields] */
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000587 if (parse_save_field(self) < 0)
588 return -1;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000589 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
Skip Montanarob4a04172003-03-20 23:29:12 +0000590 }
Andrew McNamara1196cf12005-01-07 04:42:45 +0000591 else if (c == dialect->quotechar &&
592 dialect->quoting != QUOTE_NONE) {
Skip Montanarob4a04172003-03-20 23:29:12 +0000593 /* start quoted field */
594 self->state = IN_QUOTED_FIELD;
595 }
596 else if (c == dialect->escapechar) {
597 /* possible escaped character */
598 self->state = ESCAPED_CHAR;
599 }
600 else if (c == ' ' && dialect->skipinitialspace)
601 /* ignore space at start of field */
602 ;
603 else if (c == dialect->delimiter) {
604 /* save empty field */
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000605 if (parse_save_field(self) < 0)
606 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000607 }
608 else {
609 /* begin new unquoted field */
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000610 if (dialect->quoting == QUOTE_NONNUMERIC)
611 self->numeric_field = 1;
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000612 if (parse_add_char(self, c) < 0)
613 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000614 self->state = IN_FIELD;
615 }
616 break;
617
618 case ESCAPED_CHAR:
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000619 if (c == '\0')
620 c = '\n';
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000621 if (parse_add_char(self, c) < 0)
622 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000623 self->state = IN_FIELD;
624 break;
625
626 case IN_FIELD:
627 /* in unquoted field */
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000628 if (c == '\n' || c == '\r' || c == '\0') {
Skip Montanarob4a04172003-03-20 23:29:12 +0000629 /* end of line - return [fields] */
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000630 if (parse_save_field(self) < 0)
631 return -1;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000632 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
Skip Montanarob4a04172003-03-20 23:29:12 +0000633 }
634 else if (c == dialect->escapechar) {
635 /* possible escaped character */
636 self->state = ESCAPED_CHAR;
637 }
638 else if (c == dialect->delimiter) {
639 /* save field - wait for new field */
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000640 if (parse_save_field(self) < 0)
641 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000642 self->state = START_FIELD;
643 }
644 else {
645 /* normal character - save in field */
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000646 if (parse_add_char(self, c) < 0)
647 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000648 }
649 break;
650
651 case IN_QUOTED_FIELD:
652 /* in quoted field */
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000653 if (c == '\0')
654 ;
Skip Montanarob4a04172003-03-20 23:29:12 +0000655 else if (c == dialect->escapechar) {
656 /* Possible escape character */
657 self->state = ESCAPE_IN_QUOTED_FIELD;
658 }
Andrew McNamara1196cf12005-01-07 04:42:45 +0000659 else if (c == dialect->quotechar &&
660 dialect->quoting != QUOTE_NONE) {
Skip Montanarob4a04172003-03-20 23:29:12 +0000661 if (dialect->doublequote) {
662 /* doublequote; " represented by "" */
663 self->state = QUOTE_IN_QUOTED_FIELD;
664 }
665 else {
666 /* end of quote part of field */
667 self->state = IN_FIELD;
668 }
669 }
670 else {
671 /* normal character - save in field */
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000672 if (parse_add_char(self, c) < 0)
673 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000674 }
675 break;
676
677 case ESCAPE_IN_QUOTED_FIELD:
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000678 if (c == '\0')
679 c = '\n';
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000680 if (parse_add_char(self, c) < 0)
681 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000682 self->state = IN_QUOTED_FIELD;
683 break;
684
685 case QUOTE_IN_QUOTED_FIELD:
686 /* doublequote - seen a quote in an quoted field */
687 if (dialect->quoting != QUOTE_NONE &&
688 c == dialect->quotechar) {
689 /* save "" as " */
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000690 if (parse_add_char(self, c) < 0)
691 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000692 self->state = IN_QUOTED_FIELD;
693 }
694 else if (c == dialect->delimiter) {
695 /* save field - wait for new field */
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000696 if (parse_save_field(self) < 0)
697 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000698 self->state = START_FIELD;
699 }
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000700 else if (c == '\n' || c == '\r' || c == '\0') {
Skip Montanarob4a04172003-03-20 23:29:12 +0000701 /* end of line - return [fields] */
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000702 if (parse_save_field(self) < 0)
703 return -1;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000704 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
Skip Montanarob4a04172003-03-20 23:29:12 +0000705 }
706 else if (!dialect->strict) {
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000707 if (parse_add_char(self, c) < 0)
708 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000709 self->state = IN_FIELD;
710 }
711 else {
712 /* illegal */
Andrew McNamara5cfd8372005-01-12 11:39:50 +0000713 PyErr_Format(error_obj, "'%c' expected after '%c'",
Skip Montanarob4a04172003-03-20 23:29:12 +0000714 dialect->delimiter,
715 dialect->quotechar);
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000716 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000717 }
718 break;
719
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000720 case EAT_CRNL:
721 if (c == '\n' || c == '\r')
722 ;
723 else if (c == '\0')
724 self->state = START_RECORD;
725 else {
726 PyErr_Format(error_obj, "new-line character seen in unquoted field - do you need to open the file in universal-newline mode?");
727 return -1;
728 }
729 break;
730
Skip Montanarob4a04172003-03-20 23:29:12 +0000731 }
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000732 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000733}
734
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000735static int
736parse_reset(ReaderObj *self)
737{
738 Py_XDECREF(self->fields);
739 self->fields = PyList_New(0);
740 if (self->fields == NULL)
741 return -1;
742 self->field_len = 0;
743 self->state = START_RECORD;
744 self->numeric_field = 0;
745 return 0;
746}
Skip Montanarob4a04172003-03-20 23:29:12 +0000747
748static PyObject *
Skip Montanarob4a04172003-03-20 23:29:12 +0000749Reader_iternext(ReaderObj *self)
750{
751 PyObject *lineobj;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000752 PyObject *fields = NULL;
753 char *line, c;
754 int linelen;
Skip Montanarob4a04172003-03-20 23:29:12 +0000755
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000756 if (parse_reset(self) < 0)
757 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000758 do {
759 lineobj = PyIter_Next(self->input_iter);
760 if (lineobj == NULL) {
761 /* End of input OR exception */
762 if (!PyErr_Occurred() && self->field_len != 0)
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000763 PyErr_Format(error_obj,
764 "newline inside string");
Skip Montanarob4a04172003-03-20 23:29:12 +0000765 return NULL;
766 }
Andrew McNamara7f2053e2005-01-12 11:17:16 +0000767 ++self->line_num;
Skip Montanarob4a04172003-03-20 23:29:12 +0000768
Skip Montanarob4a04172003-03-20 23:29:12 +0000769 line = PyString_AsString(lineobj);
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000770 linelen = PyString_Size(lineobj);
Skip Montanarob4a04172003-03-20 23:29:12 +0000771
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000772 if (line == NULL || linelen < 0) {
Skip Montanarob4a04172003-03-20 23:29:12 +0000773 Py_DECREF(lineobj);
774 return NULL;
775 }
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000776 while (linelen--) {
777 c = *line++;
778 if (c == '\0') {
779 Py_DECREF(lineobj);
780 PyErr_Format(error_obj,
781 "line contains NULL byte");
782 goto err;
783 }
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000784 if (parse_process_char(self, c) < 0) {
785 Py_DECREF(lineobj);
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000786 goto err;
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000787 }
788 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000789 Py_DECREF(lineobj);
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000790 if (parse_process_char(self, 0) < 0)
791 goto err;
Skip Montanarob4a04172003-03-20 23:29:12 +0000792 } while (self->state != START_RECORD);
793
794 fields = self->fields;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000795 self->fields = NULL;
796err:
Skip Montanarob4a04172003-03-20 23:29:12 +0000797 return fields;
798}
799
800static void
801Reader_dealloc(ReaderObj *self)
802{
Andrew McNamara77ead872005-01-10 02:09:41 +0000803 PyObject_GC_UnTrack(self);
Skip Montanarob4a04172003-03-20 23:29:12 +0000804 Py_XDECREF(self->dialect);
805 Py_XDECREF(self->input_iter);
806 Py_XDECREF(self->fields);
Andrew McNamaradcfb38c2003-06-09 05:59:23 +0000807 if (self->field != NULL)
808 PyMem_Free(self->field);
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000809 PyObject_GC_Del(self);
810}
811
812static int
813Reader_traverse(ReaderObj *self, visitproc visit, void *arg)
814{
815 int err;
816#define VISIT(SLOT) \
817 if (SLOT) { \
818 err = visit((PyObject *)(SLOT), arg); \
819 if (err) \
820 return err; \
821 }
822 VISIT(self->dialect);
823 VISIT(self->input_iter);
824 VISIT(self->fields);
825 return 0;
826}
827
828static int
829Reader_clear(ReaderObj *self)
830{
831 Py_XDECREF(self->dialect);
832 Py_XDECREF(self->input_iter);
833 Py_XDECREF(self->fields);
834 self->dialect = NULL;
835 self->input_iter = NULL;
836 self->fields = NULL;
837 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000838}
839
840PyDoc_STRVAR(Reader_Type_doc,
841"CSV reader\n"
842"\n"
843"Reader objects are responsible for reading and parsing tabular data\n"
844"in CSV format.\n"
845);
846
847static struct PyMethodDef Reader_methods[] = {
848 { NULL, NULL }
849};
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000850#define R_OFF(x) offsetof(ReaderObj, x)
851
852static struct PyMemberDef Reader_memberlist[] = {
853 { "dialect", T_OBJECT, R_OFF(dialect), RO },
854 { "line_num", T_ULONG, R_OFF(line_num), RO },
855 { NULL }
856};
857
Skip Montanarob4a04172003-03-20 23:29:12 +0000858
859static PyTypeObject Reader_Type = {
860 PyObject_HEAD_INIT(NULL)
861 0, /*ob_size*/
862 "_csv.reader", /*tp_name*/
863 sizeof(ReaderObj), /*tp_basicsize*/
864 0, /*tp_itemsize*/
865 /* methods */
866 (destructor)Reader_dealloc, /*tp_dealloc*/
867 (printfunc)0, /*tp_print*/
868 (getattrfunc)0, /*tp_getattr*/
869 (setattrfunc)0, /*tp_setattr*/
870 (cmpfunc)0, /*tp_compare*/
871 (reprfunc)0, /*tp_repr*/
872 0, /*tp_as_number*/
873 0, /*tp_as_sequence*/
874 0, /*tp_as_mapping*/
875 (hashfunc)0, /*tp_hash*/
876 (ternaryfunc)0, /*tp_call*/
877 (reprfunc)0, /*tp_str*/
878 0, /*tp_getattro*/
879 0, /*tp_setattro*/
880 0, /*tp_as_buffer*/
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000881 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
882 Py_TPFLAGS_HAVE_GC, /*tp_flags*/
Skip Montanarob4a04172003-03-20 23:29:12 +0000883 Reader_Type_doc, /*tp_doc*/
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000884 (traverseproc)Reader_traverse, /*tp_traverse*/
885 (inquiry)Reader_clear, /*tp_clear*/
Skip Montanarob4a04172003-03-20 23:29:12 +0000886 0, /*tp_richcompare*/
887 0, /*tp_weaklistoffset*/
Andrew McNamara575a00b2005-01-06 02:25:41 +0000888 PyObject_SelfIter, /*tp_iter*/
Skip Montanarob4a04172003-03-20 23:29:12 +0000889 (getiterfunc)Reader_iternext, /*tp_iternext*/
890 Reader_methods, /*tp_methods*/
891 Reader_memberlist, /*tp_members*/
892 0, /*tp_getset*/
893
894};
895
896static PyObject *
897csv_reader(PyObject *module, PyObject *args, PyObject *keyword_args)
898{
Andrew McNamara91b97462005-01-11 01:07:23 +0000899 PyObject * iterator, * dialect = NULL;
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000900 ReaderObj * self = PyObject_GC_New(ReaderObj, &Reader_Type);
Skip Montanarob4a04172003-03-20 23:29:12 +0000901
902 if (!self)
903 return NULL;
904
905 self->dialect = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000906 self->fields = NULL;
907 self->input_iter = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000908 self->field = NULL;
909 self->field_size = 0;
Andrew McNamara7f2053e2005-01-12 11:17:16 +0000910 self->line_num = 0;
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000911
912 if (parse_reset(self) < 0) {
913 Py_DECREF(self);
914 return NULL;
915 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000916
Raymond Hettinger1761a7c2004-06-20 04:23:19 +0000917 if (!PyArg_UnpackTuple(args, "", 1, 2, &iterator, &dialect)) {
Skip Montanarob4a04172003-03-20 23:29:12 +0000918 Py_DECREF(self);
919 return NULL;
920 }
921 self->input_iter = PyObject_GetIter(iterator);
922 if (self->input_iter == NULL) {
923 PyErr_SetString(PyExc_TypeError,
924 "argument 1 must be an iterator");
925 Py_DECREF(self);
926 return NULL;
927 }
Andrew McNamara91b97462005-01-11 01:07:23 +0000928 self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args);
Skip Montanarob4a04172003-03-20 23:29:12 +0000929 if (self->dialect == NULL) {
930 Py_DECREF(self);
931 return NULL;
932 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000933
Andrew McNamara77ead872005-01-10 02:09:41 +0000934 PyObject_GC_Track(self);
Skip Montanarob4a04172003-03-20 23:29:12 +0000935 return (PyObject *)self;
936}
937
938/*
939 * WRITER
940 */
941/* ---------------------------------------------------------------- */
942static void
943join_reset(WriterObj *self)
944{
945 self->rec_len = 0;
946 self->num_fields = 0;
947}
948
949#define MEM_INCR 32768
950
951/* Calculate new record length or append field to record. Return new
952 * record length.
953 */
954static int
955join_append_data(WriterObj *self, char *field, int quote_empty,
956 int *quoted, int copy_phase)
957{
958 DialectObj *dialect = self->dialect;
959 int i, rec_len;
Andrew McNamarac89f2842005-01-12 07:44:42 +0000960 char *lineterm;
961
962#define ADDCH(c) \
963 do {\
964 if (copy_phase) \
965 self->rec[rec_len] = c;\
966 rec_len++;\
967 } while(0)
968
969 lineterm = PyString_AsString(dialect->lineterminator);
970 if (lineterm == NULL)
971 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000972
973 rec_len = self->rec_len;
974
Andrew McNamarac89f2842005-01-12 07:44:42 +0000975 /* If this is not the first field we need a field separator */
976 if (self->num_fields > 0)
977 ADDCH(dialect->delimiter);
978
979 /* Handle preceding quote */
980 if (copy_phase && *quoted)
981 ADDCH(dialect->quotechar);
982
983 /* Copy/count field data */
Skip Montanarob4a04172003-03-20 23:29:12 +0000984 for (i = 0;; i++) {
985 char c = field[i];
Andrew McNamarac89f2842005-01-12 07:44:42 +0000986 int want_escape = 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000987
988 if (c == '\0')
989 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000990
Andrew McNamarac89f2842005-01-12 07:44:42 +0000991 if (c == dialect->delimiter ||
992 c == dialect->escapechar ||
993 c == dialect->quotechar ||
994 strchr(lineterm, c)) {
995 if (dialect->quoting == QUOTE_NONE)
996 want_escape = 1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000997 else {
Andrew McNamarac89f2842005-01-12 07:44:42 +0000998 if (c == dialect->quotechar) {
999 if (dialect->doublequote)
1000 ADDCH(dialect->quotechar);
1001 else
1002 want_escape = 1;
1003 }
1004 if (!want_escape)
1005 *quoted = 1;
1006 }
1007 if (want_escape) {
1008 if (!dialect->escapechar) {
1009 PyErr_Format(error_obj,
1010 "need to escape, but no escapechar set");
1011 return -1;
1012 }
1013 ADDCH(dialect->escapechar);
Skip Montanarob4a04172003-03-20 23:29:12 +00001014 }
1015 }
1016 /* Copy field character into record buffer.
1017 */
Andrew McNamarac89f2842005-01-12 07:44:42 +00001018 ADDCH(c);
Skip Montanarob4a04172003-03-20 23:29:12 +00001019 }
1020
1021 /* If field is empty check if it needs to be quoted.
1022 */
1023 if (i == 0 && quote_empty) {
1024 if (dialect->quoting == QUOTE_NONE) {
1025 PyErr_Format(error_obj,
1026 "single empty field record must be quoted");
1027 return -1;
Andrew McNamaradd3e6cb2005-01-07 06:46:50 +00001028 }
1029 else
Skip Montanarob4a04172003-03-20 23:29:12 +00001030 *quoted = 1;
1031 }
1032
Skip Montanarob4a04172003-03-20 23:29:12 +00001033 if (*quoted) {
1034 if (copy_phase)
Andrew McNamarac89f2842005-01-12 07:44:42 +00001035 ADDCH(dialect->quotechar);
Skip Montanarob4a04172003-03-20 23:29:12 +00001036 else
Andrew McNamarac89f2842005-01-12 07:44:42 +00001037 rec_len += 2;
Skip Montanarob4a04172003-03-20 23:29:12 +00001038 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001039 return rec_len;
Andrew McNamarac89f2842005-01-12 07:44:42 +00001040#undef ADDCH
Skip Montanarob4a04172003-03-20 23:29:12 +00001041}
1042
1043static int
1044join_check_rec_size(WriterObj *self, int rec_len)
1045{
1046 if (rec_len > self->rec_size) {
1047 if (self->rec_size == 0) {
1048 self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR;
Andrew McNamaradcfb38c2003-06-09 05:59:23 +00001049 if (self->rec != NULL)
1050 PyMem_Free(self->rec);
Skip Montanarob4a04172003-03-20 23:29:12 +00001051 self->rec = PyMem_Malloc(self->rec_size);
1052 }
1053 else {
1054 char *old_rec = self->rec;
1055
1056 self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR;
1057 self->rec = PyMem_Realloc(self->rec, self->rec_size);
1058 if (self->rec == NULL)
1059 PyMem_Free(old_rec);
1060 }
1061 if (self->rec == NULL) {
1062 PyErr_NoMemory();
1063 return 0;
1064 }
1065 }
1066 return 1;
1067}
1068
1069static int
1070join_append(WriterObj *self, char *field, int *quoted, int quote_empty)
1071{
1072 int rec_len;
1073
1074 rec_len = join_append_data(self, field, quote_empty, quoted, 0);
1075 if (rec_len < 0)
1076 return 0;
1077
1078 /* grow record buffer if necessary */
1079 if (!join_check_rec_size(self, rec_len))
1080 return 0;
1081
1082 self->rec_len = join_append_data(self, field, quote_empty, quoted, 1);
1083 self->num_fields++;
1084
1085 return 1;
1086}
1087
1088static int
1089join_append_lineterminator(WriterObj *self)
1090{
1091 int terminator_len;
Andrew McNamaracf0fd5a2005-01-12 01:16:35 +00001092 char *terminator;
Skip Montanarob4a04172003-03-20 23:29:12 +00001093
1094 terminator_len = PyString_Size(self->dialect->lineterminator);
1095
1096 /* grow record buffer if necessary */
1097 if (!join_check_rec_size(self, self->rec_len + terminator_len))
1098 return 0;
1099
Andrew McNamaracf0fd5a2005-01-12 01:16:35 +00001100 terminator = PyString_AsString(self->dialect->lineterminator);
1101 if (terminator == NULL)
1102 return 0;
1103 memmove(self->rec + self->rec_len, terminator, terminator_len);
Skip Montanarob4a04172003-03-20 23:29:12 +00001104 self->rec_len += terminator_len;
1105
1106 return 1;
1107}
1108
1109PyDoc_STRVAR(csv_writerow_doc,
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001110"writerow(sequence)\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001111"\n"
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001112"Construct and write a CSV record from a sequence of fields. Non-string\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001113"elements will be converted to string.");
1114
1115static PyObject *
1116csv_writerow(WriterObj *self, PyObject *seq)
1117{
1118 DialectObj *dialect = self->dialect;
1119 int len, i;
1120
1121 if (!PySequence_Check(seq))
1122 return PyErr_Format(error_obj, "sequence expected");
1123
1124 len = PySequence_Length(seq);
1125 if (len < 0)
1126 return NULL;
1127
1128 /* Join all fields in internal buffer.
1129 */
1130 join_reset(self);
1131 for (i = 0; i < len; i++) {
1132 PyObject *field;
1133 int append_ok;
1134 int quoted;
1135
1136 field = PySequence_GetItem(seq, i);
1137 if (field == NULL)
1138 return NULL;
1139
Andrew McNamarac89f2842005-01-12 07:44:42 +00001140 switch (dialect->quoting) {
1141 case QUOTE_NONNUMERIC:
1142 quoted = !PyNumber_Check(field);
1143 break;
1144 case QUOTE_ALL:
1145 quoted = 1;
1146 break;
1147 default:
1148 quoted = 0;
1149 break;
Skip Montanarob4a04172003-03-20 23:29:12 +00001150 }
1151
1152 if (PyString_Check(field)) {
Skip Montanaro577c7a72003-04-12 19:17:14 +00001153 append_ok = join_append(self,
1154 PyString_AS_STRING(field),
Skip Montanarob4a04172003-03-20 23:29:12 +00001155 &quoted, len == 1);
1156 Py_DECREF(field);
1157 }
1158 else if (field == Py_None) {
1159 append_ok = join_append(self, "", &quoted, len == 1);
1160 Py_DECREF(field);
1161 }
1162 else {
1163 PyObject *str;
1164
1165 str = PyObject_Str(field);
1166 Py_DECREF(field);
1167 if (str == NULL)
1168 return NULL;
1169
Skip Montanaro577c7a72003-04-12 19:17:14 +00001170 append_ok = join_append(self, PyString_AS_STRING(str),
Skip Montanarob4a04172003-03-20 23:29:12 +00001171 &quoted, len == 1);
1172 Py_DECREF(str);
1173 }
1174 if (!append_ok)
1175 return NULL;
1176 }
1177
1178 /* Add line terminator.
1179 */
1180 if (!join_append_lineterminator(self))
1181 return 0;
1182
1183 return PyObject_CallFunction(self->writeline,
1184 "(s#)", self->rec, self->rec_len);
1185}
1186
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001187PyDoc_STRVAR(csv_writerows_doc,
1188"writerows(sequence of sequences)\n"
1189"\n"
1190"Construct and write a series of sequences to a csv file. Non-string\n"
1191"elements will be converted to string.");
1192
Skip Montanarob4a04172003-03-20 23:29:12 +00001193static PyObject *
1194csv_writerows(WriterObj *self, PyObject *seqseq)
1195{
1196 PyObject *row_iter, *row_obj, *result;
1197
1198 row_iter = PyObject_GetIter(seqseq);
1199 if (row_iter == NULL) {
1200 PyErr_SetString(PyExc_TypeError,
Skip Montanaro98f16e02003-04-11 23:10:13 +00001201 "writerows() argument must be iterable");
Skip Montanarob4a04172003-03-20 23:29:12 +00001202 return NULL;
1203 }
1204 while ((row_obj = PyIter_Next(row_iter))) {
1205 result = csv_writerow(self, row_obj);
1206 Py_DECREF(row_obj);
1207 if (!result) {
1208 Py_DECREF(row_iter);
1209 return NULL;
1210 }
1211 else
1212 Py_DECREF(result);
1213 }
1214 Py_DECREF(row_iter);
1215 if (PyErr_Occurred())
1216 return NULL;
1217 Py_INCREF(Py_None);
1218 return Py_None;
1219}
1220
1221static struct PyMethodDef Writer_methods[] = {
1222 { "writerow", (PyCFunction)csv_writerow, METH_O, csv_writerow_doc},
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001223 { "writerows", (PyCFunction)csv_writerows, METH_O, csv_writerows_doc},
Skip Montanarob4a04172003-03-20 23:29:12 +00001224 { NULL, NULL }
1225};
1226
1227#define W_OFF(x) offsetof(WriterObj, x)
1228
1229static struct PyMemberDef Writer_memberlist[] = {
1230 { "dialect", T_OBJECT, W_OFF(dialect), RO },
1231 { NULL }
1232};
1233
1234static void
1235Writer_dealloc(WriterObj *self)
1236{
Andrew McNamara77ead872005-01-10 02:09:41 +00001237 PyObject_GC_UnTrack(self);
Skip Montanarob4a04172003-03-20 23:29:12 +00001238 Py_XDECREF(self->dialect);
1239 Py_XDECREF(self->writeline);
Andrew McNamaradcfb38c2003-06-09 05:59:23 +00001240 if (self->rec != NULL)
1241 PyMem_Free(self->rec);
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001242 PyObject_GC_Del(self);
1243}
1244
1245static int
1246Writer_traverse(WriterObj *self, visitproc visit, void *arg)
1247{
1248 int err;
1249#define VISIT(SLOT) \
1250 if (SLOT) { \
1251 err = visit((PyObject *)(SLOT), arg); \
1252 if (err) \
1253 return err; \
1254 }
1255 VISIT(self->dialect);
1256 VISIT(self->writeline);
1257 return 0;
1258}
1259
1260static int
1261Writer_clear(WriterObj *self)
1262{
1263 Py_XDECREF(self->dialect);
1264 Py_XDECREF(self->writeline);
1265 self->dialect = NULL;
1266 self->writeline = NULL;
1267 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001268}
1269
1270PyDoc_STRVAR(Writer_Type_doc,
1271"CSV writer\n"
1272"\n"
1273"Writer objects are responsible for generating tabular data\n"
1274"in CSV format from sequence input.\n"
1275);
1276
1277static PyTypeObject Writer_Type = {
1278 PyObject_HEAD_INIT(NULL)
1279 0, /*ob_size*/
1280 "_csv.writer", /*tp_name*/
1281 sizeof(WriterObj), /*tp_basicsize*/
1282 0, /*tp_itemsize*/
1283 /* methods */
1284 (destructor)Writer_dealloc, /*tp_dealloc*/
1285 (printfunc)0, /*tp_print*/
1286 (getattrfunc)0, /*tp_getattr*/
1287 (setattrfunc)0, /*tp_setattr*/
1288 (cmpfunc)0, /*tp_compare*/
1289 (reprfunc)0, /*tp_repr*/
1290 0, /*tp_as_number*/
1291 0, /*tp_as_sequence*/
1292 0, /*tp_as_mapping*/
1293 (hashfunc)0, /*tp_hash*/
1294 (ternaryfunc)0, /*tp_call*/
1295 (reprfunc)0, /*tp_str*/
1296 0, /*tp_getattro*/
1297 0, /*tp_setattro*/
1298 0, /*tp_as_buffer*/
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001299 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
1300 Py_TPFLAGS_HAVE_GC, /*tp_flags*/
Skip Montanarob4a04172003-03-20 23:29:12 +00001301 Writer_Type_doc,
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001302 (traverseproc)Writer_traverse, /*tp_traverse*/
1303 (inquiry)Writer_clear, /*tp_clear*/
Skip Montanarob4a04172003-03-20 23:29:12 +00001304 0, /*tp_richcompare*/
1305 0, /*tp_weaklistoffset*/
1306 (getiterfunc)0, /*tp_iter*/
1307 (getiterfunc)0, /*tp_iternext*/
1308 Writer_methods, /*tp_methods*/
1309 Writer_memberlist, /*tp_members*/
1310 0, /*tp_getset*/
1311};
1312
1313static PyObject *
1314csv_writer(PyObject *module, PyObject *args, PyObject *keyword_args)
1315{
Andrew McNamara91b97462005-01-11 01:07:23 +00001316 PyObject * output_file, * dialect = NULL;
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001317 WriterObj * self = PyObject_GC_New(WriterObj, &Writer_Type);
Skip Montanarob4a04172003-03-20 23:29:12 +00001318
1319 if (!self)
1320 return NULL;
1321
1322 self->dialect = NULL;
1323 self->writeline = NULL;
1324
1325 self->rec = NULL;
1326 self->rec_size = 0;
1327 self->rec_len = 0;
1328 self->num_fields = 0;
1329
Raymond Hettinger1761a7c2004-06-20 04:23:19 +00001330 if (!PyArg_UnpackTuple(args, "", 1, 2, &output_file, &dialect)) {
Skip Montanarob4a04172003-03-20 23:29:12 +00001331 Py_DECREF(self);
1332 return NULL;
1333 }
1334 self->writeline = PyObject_GetAttrString(output_file, "write");
1335 if (self->writeline == NULL || !PyCallable_Check(self->writeline)) {
1336 PyErr_SetString(PyExc_TypeError,
Andrew McNamara5cfd8372005-01-12 11:39:50 +00001337 "argument 1 must have a \"write\" method");
Skip Montanarob4a04172003-03-20 23:29:12 +00001338 Py_DECREF(self);
1339 return NULL;
1340 }
Andrew McNamara91b97462005-01-11 01:07:23 +00001341 self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args);
Skip Montanarob4a04172003-03-20 23:29:12 +00001342 if (self->dialect == NULL) {
1343 Py_DECREF(self);
1344 return NULL;
1345 }
Andrew McNamara77ead872005-01-10 02:09:41 +00001346 PyObject_GC_Track(self);
Skip Montanarob4a04172003-03-20 23:29:12 +00001347 return (PyObject *)self;
1348}
1349
1350/*
1351 * DIALECT REGISTRY
1352 */
1353static PyObject *
1354csv_list_dialects(PyObject *module, PyObject *args)
1355{
1356 return PyDict_Keys(dialects);
1357}
1358
1359static PyObject *
Andrew McNamara86625972005-01-11 01:28:33 +00001360csv_register_dialect(PyObject *module, PyObject *args, PyObject *kwargs)
Skip Montanarob4a04172003-03-20 23:29:12 +00001361{
Andrew McNamara86625972005-01-11 01:28:33 +00001362 PyObject *name_obj, *dialect_obj = NULL;
1363 PyObject *dialect;
Skip Montanarob4a04172003-03-20 23:29:12 +00001364
Andrew McNamara86625972005-01-11 01:28:33 +00001365 if (!PyArg_UnpackTuple(args, "", 1, 2, &name_obj, &dialect_obj))
Skip Montanarob4a04172003-03-20 23:29:12 +00001366 return NULL;
Andrew McNamara37d2bdf2005-01-10 12:22:48 +00001367 if (!IS_BASESTRING(name_obj)) {
Skip Montanarob4a04172003-03-20 23:29:12 +00001368 PyErr_SetString(PyExc_TypeError,
1369 "dialect name must be a string or unicode");
1370 return NULL;
1371 }
Andrew McNamara86625972005-01-11 01:28:33 +00001372 dialect = _call_dialect(dialect_obj, kwargs);
1373 if (dialect == NULL)
1374 return NULL;
1375 if (PyDict_SetItem(dialects, name_obj, dialect) < 0) {
1376 Py_DECREF(dialect);
Skip Montanarob4a04172003-03-20 23:29:12 +00001377 return NULL;
1378 }
Andrew McNamara86625972005-01-11 01:28:33 +00001379 Py_DECREF(dialect);
Skip Montanarob4a04172003-03-20 23:29:12 +00001380 Py_INCREF(Py_None);
1381 return Py_None;
1382}
1383
1384static PyObject *
Skip Montanaro577c7a72003-04-12 19:17:14 +00001385csv_unregister_dialect(PyObject *module, PyObject *name_obj)
Skip Montanarob4a04172003-03-20 23:29:12 +00001386{
Skip Montanarob4a04172003-03-20 23:29:12 +00001387 if (PyDict_DelItem(dialects, name_obj) < 0)
1388 return PyErr_Format(error_obj, "unknown dialect");
1389 Py_INCREF(Py_None);
1390 return Py_None;
1391}
1392
1393static PyObject *
Skip Montanaro577c7a72003-04-12 19:17:14 +00001394csv_get_dialect(PyObject *module, PyObject *name_obj)
Skip Montanarob4a04172003-03-20 23:29:12 +00001395{
Skip Montanarob4a04172003-03-20 23:29:12 +00001396 return get_dialect_from_registry(name_obj);
1397}
1398
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001399static PyObject *
Andrew McNamara31d88962005-01-12 03:45:10 +00001400csv_field_size_limit(PyObject *module, PyObject *args)
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001401{
1402 PyObject *new_limit = NULL;
1403 long old_limit = field_limit;
1404
Andrew McNamara31d88962005-01-12 03:45:10 +00001405 if (!PyArg_UnpackTuple(args, "field_size_limit", 0, 1, &new_limit))
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001406 return NULL;
1407 if (new_limit != NULL) {
1408 if (!PyInt_Check(new_limit)) {
1409 PyErr_Format(PyExc_TypeError,
1410 "limit must be an integer");
1411 return NULL;
1412 }
1413 field_limit = PyInt_AsLong(new_limit);
1414 }
1415 return PyInt_FromLong(old_limit);
1416}
1417
Skip Montanarob4a04172003-03-20 23:29:12 +00001418/*
1419 * MODULE
1420 */
1421
1422PyDoc_STRVAR(csv_module_doc,
1423"CSV parsing and writing.\n"
1424"\n"
1425"This module provides classes that assist in the reading and writing\n"
1426"of Comma Separated Value (CSV) files, and implements the interface\n"
1427"described by PEP 305. Although many CSV files are simple to parse,\n"
1428"the format is not formally defined by a stable specification and\n"
1429"is subtle enough that parsing lines of a CSV file with something\n"
1430"like line.split(\",\") is bound to fail. The module supports three\n"
1431"basic APIs: reading, writing, and registration of dialects.\n"
1432"\n"
1433"\n"
1434"DIALECT REGISTRATION:\n"
1435"\n"
1436"Readers and writers support a dialect argument, which is a convenient\n"
1437"handle on a group of settings. When the dialect argument is a string,\n"
1438"it identifies one of the dialects previously registered with the module.\n"
1439"If it is a class or instance, the attributes of the argument are used as\n"
1440"the settings for the reader or writer:\n"
1441"\n"
1442" class excel:\n"
1443" delimiter = ','\n"
1444" quotechar = '\"'\n"
1445" escapechar = None\n"
1446" doublequote = True\n"
1447" skipinitialspace = False\n"
Johannes Gijsbers8d3b9dd2004-08-15 12:23:10 +00001448" lineterminator = '\\r\\n'\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001449" quoting = QUOTE_MINIMAL\n"
1450"\n"
1451"SETTINGS:\n"
1452"\n"
1453" * quotechar - specifies a one-character string to use as the \n"
1454" quoting character. It defaults to '\"'.\n"
1455" * delimiter - specifies a one-character string to use as the \n"
1456" field separator. It defaults to ','.\n"
1457" * skipinitialspace - specifies how to interpret whitespace which\n"
1458" immediately follows a delimiter. It defaults to False, which\n"
1459" means that whitespace immediately following a delimiter is part\n"
1460" of the following field.\n"
1461" * lineterminator - specifies the character sequence which should \n"
1462" terminate rows.\n"
1463" * quoting - controls when quotes should be generated by the writer.\n"
1464" It can take on any of the following module constants:\n"
1465"\n"
1466" csv.QUOTE_MINIMAL means only when required, for example, when a\n"
1467" field contains either the quotechar or the delimiter\n"
1468" csv.QUOTE_ALL means that quotes are always placed around fields.\n"
1469" csv.QUOTE_NONNUMERIC means that quotes are always placed around\n"
Skip Montanaro148eb6a2003-12-02 18:57:47 +00001470" fields which do not parse as integers or floating point\n"
1471" numbers.\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001472" csv.QUOTE_NONE means that quotes are never placed around fields.\n"
1473" * escapechar - specifies a one-character string used to escape \n"
1474" the delimiter when quoting is set to QUOTE_NONE.\n"
1475" * doublequote - controls the handling of quotes inside fields. When\n"
1476" True, two consecutive quotes are interpreted as one during read,\n"
1477" and when writing, each quote character embedded in the data is\n"
1478" written as two quotes\n");
1479
1480PyDoc_STRVAR(csv_reader_doc,
1481" csv_reader = reader(iterable [, dialect='excel']\n"
1482" [optional keyword args])\n"
1483" for row in csv_reader:\n"
1484" process(row)\n"
1485"\n"
1486"The \"iterable\" argument can be any object that returns a line\n"
1487"of input for each iteration, such as a file object or a list. The\n"
1488"optional \"dialect\" parameter is discussed below. The function\n"
1489"also accepts optional keyword arguments which override settings\n"
1490"provided by the dialect.\n"
1491"\n"
1492"The returned object is an iterator. Each iteration returns a row\n"
Johannes Gijsbers8d3b9dd2004-08-15 12:23:10 +00001493"of the CSV file (which can span multiple input lines):\n");
Skip Montanarob4a04172003-03-20 23:29:12 +00001494
1495PyDoc_STRVAR(csv_writer_doc,
1496" csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1497" [optional keyword args])\n"
1498" for row in csv_writer:\n"
1499" csv_writer.writerow(row)\n"
1500"\n"
1501" [or]\n"
1502"\n"
1503" csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1504" [optional keyword args])\n"
1505" csv_writer.writerows(rows)\n"
1506"\n"
1507"The \"fileobj\" argument can be any object that supports the file API.\n");
1508
1509PyDoc_STRVAR(csv_list_dialects_doc,
1510"Return a list of all know dialect names.\n"
1511" names = csv.list_dialects()");
1512
1513PyDoc_STRVAR(csv_get_dialect_doc,
1514"Return the dialect instance associated with name.\n"
1515" dialect = csv.get_dialect(name)");
1516
1517PyDoc_STRVAR(csv_register_dialect_doc,
1518"Create a mapping from a string name to a dialect class.\n"
1519" dialect = csv.register_dialect(name, dialect)");
1520
1521PyDoc_STRVAR(csv_unregister_dialect_doc,
1522"Delete the name/dialect mapping associated with a string name.\n"
1523" csv.unregister_dialect(name)");
1524
Andrew McNamara31d88962005-01-12 03:45:10 +00001525PyDoc_STRVAR(csv_field_size_limit_doc,
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001526"Sets an upper limit on parsed fields.\n"
Andrew McNamara31d88962005-01-12 03:45:10 +00001527" csv.field_size_limit([limit])\n"
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001528"\n"
1529"Returns old limit. If limit is not given, no new limit is set and\n"
1530"the old limit is returned");
1531
Skip Montanarob4a04172003-03-20 23:29:12 +00001532static struct PyMethodDef csv_methods[] = {
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001533 { "reader", (PyCFunction)csv_reader,
1534 METH_VARARGS | METH_KEYWORDS, csv_reader_doc},
1535 { "writer", (PyCFunction)csv_writer,
1536 METH_VARARGS | METH_KEYWORDS, csv_writer_doc},
1537 { "list_dialects", (PyCFunction)csv_list_dialects,
1538 METH_NOARGS, csv_list_dialects_doc},
1539 { "register_dialect", (PyCFunction)csv_register_dialect,
Andrew McNamara86625972005-01-11 01:28:33 +00001540 METH_VARARGS | METH_KEYWORDS, csv_register_dialect_doc},
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001541 { "unregister_dialect", (PyCFunction)csv_unregister_dialect,
1542 METH_O, csv_unregister_dialect_doc},
1543 { "get_dialect", (PyCFunction)csv_get_dialect,
1544 METH_O, csv_get_dialect_doc},
Andrew McNamara31d88962005-01-12 03:45:10 +00001545 { "field_size_limit", (PyCFunction)csv_field_size_limit,
1546 METH_VARARGS, csv_field_size_limit_doc},
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001547 { NULL, NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +00001548};
1549
1550PyMODINIT_FUNC
1551init_csv(void)
1552{
1553 PyObject *module;
Skip Montanarob4a04172003-03-20 23:29:12 +00001554 StyleDesc *style;
1555
1556 if (PyType_Ready(&Dialect_Type) < 0)
1557 return;
1558
1559 if (PyType_Ready(&Reader_Type) < 0)
1560 return;
1561
1562 if (PyType_Ready(&Writer_Type) < 0)
1563 return;
1564
1565 /* Create the module and add the functions */
1566 module = Py_InitModule3("_csv", csv_methods, csv_module_doc);
1567 if (module == NULL)
1568 return;
1569
1570 /* Add version to the module. */
Skip Montanaro7b01a832003-04-12 19:23:46 +00001571 if (PyModule_AddStringConstant(module, "__version__",
1572 MODULE_VERSION) == -1)
Skip Montanarob4a04172003-03-20 23:29:12 +00001573 return;
1574
1575 /* Add _dialects dictionary */
1576 dialects = PyDict_New();
1577 if (dialects == NULL)
1578 return;
1579 if (PyModule_AddObject(module, "_dialects", dialects))
1580 return;
1581
1582 /* Add quote styles into dictionary */
1583 for (style = quote_styles; style->name; style++) {
Skip Montanaro7b01a832003-04-12 19:23:46 +00001584 if (PyModule_AddIntConstant(module, style->name,
1585 style->style) == -1)
Skip Montanarob4a04172003-03-20 23:29:12 +00001586 return;
1587 }
1588
1589 /* Add the Dialect type */
Skip Montanaro32c5d422005-06-15 13:35:08 +00001590 Py_INCREF(&Dialect_Type);
Skip Montanarob4a04172003-03-20 23:29:12 +00001591 if (PyModule_AddObject(module, "Dialect", (PyObject *)&Dialect_Type))
1592 return;
1593
1594 /* Add the CSV exception object to the module. */
1595 error_obj = PyErr_NewException("_csv.Error", NULL, NULL);
1596 if (error_obj == NULL)
1597 return;
1598 PyModule_AddObject(module, "Error", error_obj);
1599}