blob: 03e715a41b7516970bcc91f8a341a32762715a3f [file] [log] [blame]
Skip Montanaroa16b21f2003-03-23 14:32:54 +00001/* csv module */
2
3/*
4
5This module provides the low-level underpinnings of a CSV reading/writing
6module. Users should not use this module directly, but import the csv.py
7module instead.
8
9**** For people modifying this code, please note that as of this writing
Skip Montanarodfa35fa2003-04-11 21:40:01 +000010**** (2003-03-23), it is intended that this code should work with Python
Skip Montanaroa16b21f2003-03-23 14:32:54 +000011**** 2.2.
12
Skip Montanarob4a04172003-03-20 23:29:12 +000013*/
14
Skip Montanaro7b01a832003-04-12 19:23:46 +000015#define MODULE_VERSION "1.0"
16
Skip Montanarob4a04172003-03-20 23:29:12 +000017#include "Python.h"
18#include "structmember.h"
19
Skip Montanaroa16b21f2003-03-23 14:32:54 +000020
Skip Montanarob4a04172003-03-20 23:29:12 +000021/* begin 2.2 compatibility macros */
22#ifndef PyDoc_STRVAR
23/* Define macros for inline documentation. */
24#define PyDoc_VAR(name) static char name[]
25#define PyDoc_STRVAR(name,str) PyDoc_VAR(name) = PyDoc_STR(str)
26#ifdef WITH_DOC_STRINGS
27#define PyDoc_STR(str) str
28#else
29#define PyDoc_STR(str) ""
30#endif
31#endif /* ifndef PyDoc_STRVAR */
32
33#ifndef PyMODINIT_FUNC
34# if defined(__cplusplus)
35# define PyMODINIT_FUNC extern "C" void
36# else /* __cplusplus */
37# define PyMODINIT_FUNC void
38# endif /* __cplusplus */
39#endif
40/* end 2.2 compatibility macros */
41
Andrew McNamara37d2bdf2005-01-10 12:22:48 +000042#define IS_BASESTRING(o) \
43 PyObject_TypeCheck(o, &PyBaseString_Type)
44
Skip Montanarob4a04172003-03-20 23:29:12 +000045static PyObject *error_obj; /* CSV exception */
46static PyObject *dialects; /* Dialect registry */
Andrew McNamarae4d05c42005-01-11 07:32:02 +000047static long field_limit = 128 * 1024; /* max parsed field size */
Skip Montanarob4a04172003-03-20 23:29:12 +000048
49typedef enum {
50 START_RECORD, START_FIELD, ESCAPED_CHAR, IN_FIELD,
51 IN_QUOTED_FIELD, ESCAPE_IN_QUOTED_FIELD, QUOTE_IN_QUOTED_FIELD
52} ParserState;
53
54typedef enum {
55 QUOTE_MINIMAL, QUOTE_ALL, QUOTE_NONNUMERIC, QUOTE_NONE
56} QuoteStyle;
57
58typedef struct {
59 QuoteStyle style;
60 char *name;
61} StyleDesc;
62
63static StyleDesc quote_styles[] = {
64 { QUOTE_MINIMAL, "QUOTE_MINIMAL" },
65 { QUOTE_ALL, "QUOTE_ALL" },
66 { QUOTE_NONNUMERIC, "QUOTE_NONNUMERIC" },
67 { QUOTE_NONE, "QUOTE_NONE" },
68 { 0 }
69};
70
71typedef struct {
72 PyObject_HEAD
73
74 int doublequote; /* is " represented by ""? */
75 char delimiter; /* field separator */
76 char quotechar; /* quote character */
77 char escapechar; /* escape character */
78 int skipinitialspace; /* ignore spaces following delimiter? */
79 PyObject *lineterminator; /* string to write between records */
Andrew McNamara1196cf12005-01-07 04:42:45 +000080 int quoting; /* style of quoting to write */
Skip Montanarob4a04172003-03-20 23:29:12 +000081
82 int strict; /* raise exception on bad CSV */
83} DialectObj;
84
85staticforward PyTypeObject Dialect_Type;
86
87typedef struct {
88 PyObject_HEAD
89
90 PyObject *input_iter; /* iterate over this for input lines */
91
92 DialectObj *dialect; /* parsing dialect */
93
94 PyObject *fields; /* field list for current record */
95 ParserState state; /* current CSV parse state */
96 char *field; /* build current field in here */
97 int field_size; /* size of allocated buffer */
98 int field_len; /* length of current field */
99 int had_parse_error; /* did we have a parse error? */
100} ReaderObj;
101
102staticforward PyTypeObject Reader_Type;
103
104#define ReaderObject_Check(v) ((v)->ob_type == &Reader_Type)
105
106typedef struct {
107 PyObject_HEAD
108
109 PyObject *writeline; /* write output lines to this file */
110
111 DialectObj *dialect; /* parsing dialect */
112
113 char *rec; /* buffer for parser.join */
114 int rec_size; /* size of allocated record */
115 int rec_len; /* length of record */
116 int num_fields; /* number of fields in record */
117} WriterObj;
118
119staticforward PyTypeObject Writer_Type;
120
121/*
122 * DIALECT class
123 */
124
125static PyObject *
126get_dialect_from_registry(PyObject * name_obj)
127{
128 PyObject *dialect_obj;
129
130 dialect_obj = PyDict_GetItem(dialects, name_obj);
Andrew McNamaradbce2612005-01-10 23:17:35 +0000131 if (dialect_obj == NULL) {
132 if (!PyErr_Occurred())
133 PyErr_Format(error_obj, "unknown dialect");
134 }
135 else
136 Py_INCREF(dialect_obj);
Skip Montanarob4a04172003-03-20 23:29:12 +0000137 return dialect_obj;
138}
139
Skip Montanarob4a04172003-03-20 23:29:12 +0000140static PyObject *
141get_string(PyObject *str)
142{
143 Py_XINCREF(str);
144 return str;
145}
146
Skip Montanarob4a04172003-03-20 23:29:12 +0000147static PyObject *
148get_nullchar_as_None(char c)
149{
150 if (c == '\0') {
151 Py_INCREF(Py_None);
152 return Py_None;
153 }
154 else
155 return PyString_FromStringAndSize((char*)&c, 1);
156}
157
Skip Montanarob4a04172003-03-20 23:29:12 +0000158static PyObject *
159Dialect_get_lineterminator(DialectObj *self)
160{
161 return get_string(self->lineterminator);
162}
163
Skip Montanarob4a04172003-03-20 23:29:12 +0000164static PyObject *
165Dialect_get_escapechar(DialectObj *self)
166{
167 return get_nullchar_as_None(self->escapechar);
168}
169
Andrew McNamara1196cf12005-01-07 04:42:45 +0000170static PyObject *
171Dialect_get_quotechar(DialectObj *self)
Skip Montanarob4a04172003-03-20 23:29:12 +0000172{
Andrew McNamara1196cf12005-01-07 04:42:45 +0000173 return get_nullchar_as_None(self->quotechar);
Skip Montanarob4a04172003-03-20 23:29:12 +0000174}
175
176static PyObject *
177Dialect_get_quoting(DialectObj *self)
178{
179 return PyInt_FromLong(self->quoting);
180}
181
182static int
Andrew McNamara1196cf12005-01-07 04:42:45 +0000183_set_bool(const char *name, int *target, PyObject *src, int dflt)
Skip Montanarob4a04172003-03-20 23:29:12 +0000184{
Andrew McNamara1196cf12005-01-07 04:42:45 +0000185 if (src == NULL)
186 *target = dflt;
187 else
188 *target = PyObject_IsTrue(src);
189 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000190}
191
Andrew McNamara1196cf12005-01-07 04:42:45 +0000192static int
193_set_int(const char *name, int *target, PyObject *src, int dflt)
194{
195 if (src == NULL)
196 *target = dflt;
197 else {
198 if (!PyInt_Check(src)) {
199 PyErr_Format(PyExc_TypeError,
200 "\"%s\" must be an integer", name);
201 return -1;
202 }
203 *target = PyInt_AsLong(src);
204 }
205 return 0;
206}
207
208static int
209_set_char(const char *name, char *target, PyObject *src, char dflt)
210{
211 if (src == NULL)
212 *target = dflt;
213 else {
Andrew McNamaraa8292632005-01-10 12:25:11 +0000214 if (src == Py_None || PyString_Size(src) == 0)
Andrew McNamara1196cf12005-01-07 04:42:45 +0000215 *target = '\0';
216 else if (!PyString_Check(src) || PyString_Size(src) != 1) {
217 PyErr_Format(PyExc_TypeError,
218 "\"%s\" must be an 1-character string",
219 name);
220 return -1;
221 }
222 else {
223 char *s = PyString_AsString(src);
224 if (s == NULL)
225 return -1;
226 *target = s[0];
227 }
228 }
229 return 0;
230}
231
232static int
233_set_str(const char *name, PyObject **target, PyObject *src, const char *dflt)
234{
235 if (src == NULL)
236 *target = PyString_FromString(dflt);
237 else {
238 if (src == Py_None)
239 *target = NULL;
Andrew McNamara37d2bdf2005-01-10 12:22:48 +0000240 else if (!IS_BASESTRING(src)) {
Andrew McNamara1196cf12005-01-07 04:42:45 +0000241 PyErr_Format(PyExc_TypeError,
242 "\"%s\" must be an string", name);
243 return -1;
Andrew McNamaradd3e6cb2005-01-07 06:46:50 +0000244 }
245 else {
Andrew McNamara1196cf12005-01-07 04:42:45 +0000246 Py_XDECREF(*target);
247 Py_INCREF(src);
248 *target = src;
249 }
250 }
251 return 0;
252}
253
254static int
255dialect_check_quoting(int quoting)
256{
257 StyleDesc *qs = quote_styles;
258
259 for (qs = quote_styles; qs->name; qs++) {
260 if (qs->style == quoting)
261 return 0;
262 }
263 PyErr_Format(PyExc_TypeError, "bad \"quoting\" value");
264 return -1;
265}
Skip Montanarob4a04172003-03-20 23:29:12 +0000266
267#define D_OFF(x) offsetof(DialectObj, x)
268
269static struct PyMemberDef Dialect_memberlist[] = {
Andrew McNamara1196cf12005-01-07 04:42:45 +0000270 { "delimiter", T_CHAR, D_OFF(delimiter), READONLY },
271 { "skipinitialspace", T_INT, D_OFF(skipinitialspace), READONLY },
272 { "doublequote", T_INT, D_OFF(doublequote), READONLY },
273 { "strict", T_INT, D_OFF(strict), READONLY },
Skip Montanarob4a04172003-03-20 23:29:12 +0000274 { NULL }
275};
276
277static PyGetSetDef Dialect_getsetlist[] = {
Andrew McNamara1196cf12005-01-07 04:42:45 +0000278 { "escapechar", (getter)Dialect_get_escapechar},
279 { "lineterminator", (getter)Dialect_get_lineterminator},
280 { "quotechar", (getter)Dialect_get_quotechar},
281 { "quoting", (getter)Dialect_get_quoting},
282 {NULL},
Skip Montanarob4a04172003-03-20 23:29:12 +0000283};
284
285static void
286Dialect_dealloc(DialectObj *self)
287{
288 Py_XDECREF(self->lineterminator);
Skip Montanarob4a04172003-03-20 23:29:12 +0000289 self->ob_type->tp_free((PyObject *)self);
290}
291
Andrew McNamara1196cf12005-01-07 04:42:45 +0000292static char *dialect_kws[] = {
293 "dialect",
294 "delimiter",
295 "doublequote",
296 "escapechar",
297 "lineterminator",
298 "quotechar",
299 "quoting",
300 "skipinitialspace",
301 "strict",
302 NULL
303};
304
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000305static PyObject *
306dialect_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
Skip Montanarob4a04172003-03-20 23:29:12 +0000307{
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000308 DialectObj *self;
309 PyObject *ret = NULL;
310 PyObject *dialect = NULL;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000311 PyObject *delimiter = NULL;
312 PyObject *doublequote = NULL;
313 PyObject *escapechar = NULL;
314 PyObject *lineterminator = NULL;
315 PyObject *quotechar = NULL;
316 PyObject *quoting = NULL;
317 PyObject *skipinitialspace = NULL;
318 PyObject *strict = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000319
Andrew McNamara1196cf12005-01-07 04:42:45 +0000320 if (!PyArg_ParseTupleAndKeywords(args, kwargs,
321 "|OOOOOOOOO", dialect_kws,
322 &dialect,
323 &delimiter,
324 &doublequote,
325 &escapechar,
326 &lineterminator,
327 &quotechar,
328 &quoting,
329 &skipinitialspace,
330 &strict))
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000331 return NULL;
332
333 if (dialect != NULL) {
334 if (IS_BASESTRING(dialect)) {
335 dialect = get_dialect_from_registry(dialect);
336 if (dialect == NULL)
337 return NULL;
338 }
339 else
340 Py_INCREF(dialect);
341 /* Can we reuse this instance? */
342 if (PyObject_TypeCheck(dialect, &Dialect_Type) &&
343 delimiter == 0 &&
344 doublequote == 0 &&
345 escapechar == 0 &&
346 lineterminator == 0 &&
347 quotechar == 0 &&
348 quoting == 0 &&
349 skipinitialspace == 0 &&
350 strict == 0)
351 return dialect;
352 }
353
354 self = (DialectObj *)type->tp_alloc(type, 0);
355 if (self == NULL) {
356 Py_XDECREF(dialect);
357 return NULL;
358 }
359 self->lineterminator = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000360
Andrew McNamara1196cf12005-01-07 04:42:45 +0000361 Py_XINCREF(delimiter);
362 Py_XINCREF(doublequote);
363 Py_XINCREF(escapechar);
364 Py_XINCREF(lineterminator);
365 Py_XINCREF(quotechar);
366 Py_XINCREF(quoting);
367 Py_XINCREF(skipinitialspace);
368 Py_XINCREF(strict);
369 if (dialect != NULL) {
Andrew McNamara1196cf12005-01-07 04:42:45 +0000370#define DIALECT_GETATTR(v, n) \
371 if (v == NULL) \
372 v = PyObject_GetAttrString(dialect, n)
Andrew McNamara1196cf12005-01-07 04:42:45 +0000373 DIALECT_GETATTR(delimiter, "delimiter");
374 DIALECT_GETATTR(doublequote, "doublequote");
375 DIALECT_GETATTR(escapechar, "escapechar");
376 DIALECT_GETATTR(lineterminator, "lineterminator");
377 DIALECT_GETATTR(quotechar, "quotechar");
378 DIALECT_GETATTR(quoting, "quoting");
379 DIALECT_GETATTR(skipinitialspace, "skipinitialspace");
380 DIALECT_GETATTR(strict, "strict");
381 PyErr_Clear();
Andrew McNamara1196cf12005-01-07 04:42:45 +0000382 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000383
Andrew McNamara1196cf12005-01-07 04:42:45 +0000384 /* check types and convert to C values */
385#define DIASET(meth, name, target, src, dflt) \
386 if (meth(name, target, src, dflt)) \
387 goto err
388 DIASET(_set_char, "delimiter", &self->delimiter, delimiter, ',');
389 DIASET(_set_bool, "doublequote", &self->doublequote, doublequote, 1);
390 DIASET(_set_char, "escapechar", &self->escapechar, escapechar, 0);
391 DIASET(_set_str, "lineterminator", &self->lineterminator, lineterminator, "\r\n");
392 DIASET(_set_char, "quotechar", &self->quotechar, quotechar, '"');
393 DIASET(_set_int, "quoting", &self->quoting, quoting, QUOTE_MINIMAL);
394 DIASET(_set_bool, "skipinitialspace", &self->skipinitialspace, skipinitialspace, 0);
395 DIASET(_set_bool, "strict", &self->strict, strict, 0);
Skip Montanarob4a04172003-03-20 23:29:12 +0000396
Andrew McNamara1196cf12005-01-07 04:42:45 +0000397 /* validate options */
398 if (dialect_check_quoting(self->quoting))
399 goto err;
400 if (self->delimiter == 0) {
401 PyErr_SetString(PyExc_TypeError, "delimiter must be set");
402 goto err;
403 }
404 if (quotechar == Py_None && self->quoting != QUOTE_NONE)
405 self->quoting = QUOTE_NONE;
406 if (self->quoting != QUOTE_NONE && self->quotechar == 0) {
407 PyErr_SetString(PyExc_TypeError,
408 "quotechar must be set if quoting enabled");
409 goto err;
410 }
411 if (self->lineterminator == 0) {
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000412 PyErr_SetString(PyExc_TypeError, "lineterminator must be set");
Andrew McNamara1196cf12005-01-07 04:42:45 +0000413 goto err;
414 }
415
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000416 ret = (PyObject *)self;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000417err:
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000418 Py_XDECREF(dialect);
Andrew McNamara1196cf12005-01-07 04:42:45 +0000419 Py_XDECREF(delimiter);
420 Py_XDECREF(doublequote);
421 Py_XDECREF(escapechar);
422 Py_XDECREF(lineterminator);
423 Py_XDECREF(quotechar);
424 Py_XDECREF(quoting);
425 Py_XDECREF(skipinitialspace);
426 Py_XDECREF(strict);
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000427 return ret;
Skip Montanarob4a04172003-03-20 23:29:12 +0000428}
429
430
431PyDoc_STRVAR(Dialect_Type_doc,
432"CSV dialect\n"
433"\n"
434"The Dialect type records CSV parsing and generation options.\n");
435
436static PyTypeObject Dialect_Type = {
437 PyObject_HEAD_INIT(NULL)
438 0, /* ob_size */
439 "_csv.Dialect", /* tp_name */
440 sizeof(DialectObj), /* tp_basicsize */
441 0, /* tp_itemsize */
442 /* methods */
443 (destructor)Dialect_dealloc, /* tp_dealloc */
444 (printfunc)0, /* tp_print */
445 (getattrfunc)0, /* tp_getattr */
446 (setattrfunc)0, /* tp_setattr */
447 (cmpfunc)0, /* tp_compare */
448 (reprfunc)0, /* tp_repr */
449 0, /* tp_as_number */
450 0, /* tp_as_sequence */
451 0, /* tp_as_mapping */
452 (hashfunc)0, /* tp_hash */
453 (ternaryfunc)0, /* tp_call */
454 (reprfunc)0, /* tp_str */
455 0, /* tp_getattro */
456 0, /* tp_setattro */
457 0, /* tp_as_buffer */
458 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
459 Dialect_Type_doc, /* tp_doc */
460 0, /* tp_traverse */
461 0, /* tp_clear */
462 0, /* tp_richcompare */
463 0, /* tp_weaklistoffset */
464 0, /* tp_iter */
465 0, /* tp_iternext */
Andrew McNamara1196cf12005-01-07 04:42:45 +0000466 0, /* tp_methods */
Skip Montanarob4a04172003-03-20 23:29:12 +0000467 Dialect_memberlist, /* tp_members */
468 Dialect_getsetlist, /* tp_getset */
469 0, /* tp_base */
470 0, /* tp_dict */
471 0, /* tp_descr_get */
472 0, /* tp_descr_set */
473 0, /* tp_dictoffset */
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000474 0, /* tp_init */
475 0, /* tp_alloc */
Skip Montanarob4a04172003-03-20 23:29:12 +0000476 dialect_new, /* tp_new */
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000477 0, /* tp_free */
Skip Montanarob4a04172003-03-20 23:29:12 +0000478};
479
Andrew McNamara91b97462005-01-11 01:07:23 +0000480/*
481 * Return an instance of the dialect type, given a Python instance or kwarg
482 * description of the dialect
483 */
484static PyObject *
485_call_dialect(PyObject *dialect_inst, PyObject *kwargs)
486{
487 PyObject *ctor_args;
488 PyObject *dialect;
489
490 ctor_args = Py_BuildValue(dialect_inst ? "(O)" : "()", dialect_inst);
491 if (ctor_args == NULL)
492 return NULL;
493 dialect = PyObject_Call((PyObject *)&Dialect_Type, ctor_args, kwargs);
494 Py_DECREF(ctor_args);
495 return dialect;
496}
497
Skip Montanarob4a04172003-03-20 23:29:12 +0000498static void
499parse_save_field(ReaderObj *self)
500{
501 PyObject *field;
502
503 field = PyString_FromStringAndSize(self->field, self->field_len);
504 if (field != NULL) {
505 PyList_Append(self->fields, field);
506 Py_XDECREF(field);
507 }
508 self->field_len = 0;
509}
510
511static int
512parse_grow_buff(ReaderObj *self)
513{
514 if (self->field_size == 0) {
515 self->field_size = 4096;
Andrew McNamaradcfb38c2003-06-09 05:59:23 +0000516 if (self->field != NULL)
517 PyMem_Free(self->field);
Skip Montanarob4a04172003-03-20 23:29:12 +0000518 self->field = PyMem_Malloc(self->field_size);
519 }
520 else {
521 self->field_size *= 2;
522 self->field = PyMem_Realloc(self->field, self->field_size);
523 }
524 if (self->field == NULL) {
525 PyErr_NoMemory();
526 return 0;
527 }
528 return 1;
529}
530
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000531static int
Skip Montanarob4a04172003-03-20 23:29:12 +0000532parse_add_char(ReaderObj *self, char c)
533{
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000534 if (self->field_len >= field_limit) {
535 PyErr_Format(error_obj, "field larger than field limit (%ld)",
536 field_limit);
537 return -1;
538 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000539 if (self->field_len == self->field_size && !parse_grow_buff(self))
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000540 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000541 self->field[self->field_len++] = c;
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000542 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000543}
544
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000545static int
Skip Montanarob4a04172003-03-20 23:29:12 +0000546parse_process_char(ReaderObj *self, char c)
547{
548 DialectObj *dialect = self->dialect;
549
550 switch (self->state) {
551 case START_RECORD:
552 /* start of record */
553 if (c == '\n')
554 /* empty line - return [] */
555 break;
556 /* normal character - handle as START_FIELD */
557 self->state = START_FIELD;
558 /* fallthru */
559 case START_FIELD:
560 /* expecting field */
561 if (c == '\n') {
562 /* save empty field - return [fields] */
563 parse_save_field(self);
564 self->state = START_RECORD;
565 }
Andrew McNamara1196cf12005-01-07 04:42:45 +0000566 else if (c == dialect->quotechar &&
567 dialect->quoting != QUOTE_NONE) {
Skip Montanarob4a04172003-03-20 23:29:12 +0000568 /* start quoted field */
569 self->state = IN_QUOTED_FIELD;
570 }
571 else if (c == dialect->escapechar) {
572 /* possible escaped character */
573 self->state = ESCAPED_CHAR;
574 }
575 else if (c == ' ' && dialect->skipinitialspace)
576 /* ignore space at start of field */
577 ;
578 else if (c == dialect->delimiter) {
579 /* save empty field */
580 parse_save_field(self);
581 }
582 else {
583 /* begin new unquoted field */
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000584 if (parse_add_char(self, c) < 0)
585 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000586 self->state = IN_FIELD;
587 }
588 break;
589
590 case ESCAPED_CHAR:
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000591 if (parse_add_char(self, c) < 0)
592 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000593 self->state = IN_FIELD;
594 break;
595
596 case IN_FIELD:
597 /* in unquoted field */
598 if (c == '\n') {
599 /* end of line - return [fields] */
600 parse_save_field(self);
601 self->state = START_RECORD;
602 }
603 else if (c == dialect->escapechar) {
604 /* possible escaped character */
605 self->state = ESCAPED_CHAR;
606 }
607 else if (c == dialect->delimiter) {
608 /* save field - wait for new field */
609 parse_save_field(self);
610 self->state = START_FIELD;
611 }
612 else {
613 /* normal character - save in field */
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000614 if (parse_add_char(self, c) < 0)
615 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000616 }
617 break;
618
619 case IN_QUOTED_FIELD:
620 /* in quoted field */
621 if (c == '\n') {
622 /* end of line - save '\n' in field */
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000623 if (parse_add_char(self, '\n') < 0)
624 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000625 }
626 else if (c == dialect->escapechar) {
627 /* Possible escape character */
628 self->state = ESCAPE_IN_QUOTED_FIELD;
629 }
Andrew McNamara1196cf12005-01-07 04:42:45 +0000630 else if (c == dialect->quotechar &&
631 dialect->quoting != QUOTE_NONE) {
Skip Montanarob4a04172003-03-20 23:29:12 +0000632 if (dialect->doublequote) {
633 /* doublequote; " represented by "" */
634 self->state = QUOTE_IN_QUOTED_FIELD;
635 }
636 else {
637 /* end of quote part of field */
638 self->state = IN_FIELD;
639 }
640 }
641 else {
642 /* normal character - save in field */
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000643 if (parse_add_char(self, c) < 0)
644 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000645 }
646 break;
647
648 case ESCAPE_IN_QUOTED_FIELD:
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000649 if (parse_add_char(self, c) < 0)
650 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000651 self->state = IN_QUOTED_FIELD;
652 break;
653
654 case QUOTE_IN_QUOTED_FIELD:
655 /* doublequote - seen a quote in an quoted field */
656 if (dialect->quoting != QUOTE_NONE &&
657 c == dialect->quotechar) {
658 /* save "" as " */
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000659 if (parse_add_char(self, c) < 0)
660 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000661 self->state = IN_QUOTED_FIELD;
662 }
663 else if (c == dialect->delimiter) {
664 /* save field - wait for new field */
665 parse_save_field(self);
666 self->state = START_FIELD;
667 }
668 else if (c == '\n') {
669 /* end of line - return [fields] */
670 parse_save_field(self);
671 self->state = START_RECORD;
672 }
673 else if (!dialect->strict) {
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000674 if (parse_add_char(self, c) < 0)
675 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000676 self->state = IN_FIELD;
677 }
678 else {
679 /* illegal */
680 self->had_parse_error = 1;
681 PyErr_Format(error_obj, "%c expected after %c",
682 dialect->delimiter,
683 dialect->quotechar);
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000684 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000685 }
686 break;
687
688 }
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000689 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000690}
691
692/*
693 * READER
694 */
695#define R_OFF(x) offsetof(ReaderObj, x)
696
697static struct PyMemberDef Reader_memberlist[] = {
698 { "dialect", T_OBJECT, R_OFF(dialect), RO },
699 { NULL }
700};
701
702static PyObject *
Skip Montanarob4a04172003-03-20 23:29:12 +0000703Reader_iternext(ReaderObj *self)
704{
705 PyObject *lineobj;
706 PyObject *fields;
707 char *line;
708
709 do {
710 lineobj = PyIter_Next(self->input_iter);
711 if (lineobj == NULL) {
712 /* End of input OR exception */
713 if (!PyErr_Occurred() && self->field_len != 0)
714 return PyErr_Format(error_obj,
715 "newline inside string");
716 return NULL;
717 }
718
719 if (self->had_parse_error) {
720 if (self->fields) {
721 Py_XDECREF(self->fields);
722 }
723 self->fields = PyList_New(0);
724 self->field_len = 0;
725 self->state = START_RECORD;
726 self->had_parse_error = 0;
727 }
728 line = PyString_AsString(lineobj);
729
730 if (line == NULL) {
731 Py_DECREF(lineobj);
732 return NULL;
733 }
Tim Petersef4b7ed2003-03-21 01:35:28 +0000734 if (strlen(line) < (size_t)PyString_GET_SIZE(lineobj)) {
Skip Montanarob4a04172003-03-20 23:29:12 +0000735 self->had_parse_error = 1;
736 Py_DECREF(lineobj);
737 return PyErr_Format(error_obj,
738 "string with NUL bytes");
739 }
740
741 /* Process line of text - send '\n' to processing code to
742 represent end of line. End of line which is not at end of
743 string is an error. */
744 while (*line) {
745 char c;
746
747 c = *line++;
748 if (c == '\r') {
749 c = *line++;
750 if (c == '\0')
751 /* macintosh end of line */
752 break;
753 if (c == '\n') {
754 c = *line++;
755 if (c == '\0')
756 /* DOS end of line */
757 break;
758 }
759 self->had_parse_error = 1;
760 Py_DECREF(lineobj);
761 return PyErr_Format(error_obj,
762 "newline inside string");
763 }
764 if (c == '\n') {
765 c = *line++;
766 if (c == '\0')
767 /* unix end of line */
768 break;
769 self->had_parse_error = 1;
770 Py_DECREF(lineobj);
771 return PyErr_Format(error_obj,
772 "newline inside string");
773 }
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000774 if (parse_process_char(self, c) < 0) {
775 Py_DECREF(lineobj);
776 return NULL;
777 }
778 }
779 if (parse_process_char(self, '\n') < 0) {
780 Py_DECREF(lineobj);
781 return NULL;
782 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000783 Py_DECREF(lineobj);
784 } while (self->state != START_RECORD);
785
786 fields = self->fields;
787 self->fields = PyList_New(0);
788 return fields;
789}
790
791static void
792Reader_dealloc(ReaderObj *self)
793{
Andrew McNamara77ead872005-01-10 02:09:41 +0000794 PyObject_GC_UnTrack(self);
Skip Montanarob4a04172003-03-20 23:29:12 +0000795 Py_XDECREF(self->dialect);
796 Py_XDECREF(self->input_iter);
797 Py_XDECREF(self->fields);
Andrew McNamaradcfb38c2003-06-09 05:59:23 +0000798 if (self->field != NULL)
799 PyMem_Free(self->field);
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000800 PyObject_GC_Del(self);
801}
802
803static int
804Reader_traverse(ReaderObj *self, visitproc visit, void *arg)
805{
806 int err;
807#define VISIT(SLOT) \
808 if (SLOT) { \
809 err = visit((PyObject *)(SLOT), arg); \
810 if (err) \
811 return err; \
812 }
813 VISIT(self->dialect);
814 VISIT(self->input_iter);
815 VISIT(self->fields);
816 return 0;
817}
818
819static int
820Reader_clear(ReaderObj *self)
821{
822 Py_XDECREF(self->dialect);
823 Py_XDECREF(self->input_iter);
824 Py_XDECREF(self->fields);
825 self->dialect = NULL;
826 self->input_iter = NULL;
827 self->fields = NULL;
828 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000829}
830
831PyDoc_STRVAR(Reader_Type_doc,
832"CSV reader\n"
833"\n"
834"Reader objects are responsible for reading and parsing tabular data\n"
835"in CSV format.\n"
836);
837
838static struct PyMethodDef Reader_methods[] = {
839 { NULL, NULL }
840};
841
842static PyTypeObject Reader_Type = {
843 PyObject_HEAD_INIT(NULL)
844 0, /*ob_size*/
845 "_csv.reader", /*tp_name*/
846 sizeof(ReaderObj), /*tp_basicsize*/
847 0, /*tp_itemsize*/
848 /* methods */
849 (destructor)Reader_dealloc, /*tp_dealloc*/
850 (printfunc)0, /*tp_print*/
851 (getattrfunc)0, /*tp_getattr*/
852 (setattrfunc)0, /*tp_setattr*/
853 (cmpfunc)0, /*tp_compare*/
854 (reprfunc)0, /*tp_repr*/
855 0, /*tp_as_number*/
856 0, /*tp_as_sequence*/
857 0, /*tp_as_mapping*/
858 (hashfunc)0, /*tp_hash*/
859 (ternaryfunc)0, /*tp_call*/
860 (reprfunc)0, /*tp_str*/
861 0, /*tp_getattro*/
862 0, /*tp_setattro*/
863 0, /*tp_as_buffer*/
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000864 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
865 Py_TPFLAGS_HAVE_GC, /*tp_flags*/
Skip Montanarob4a04172003-03-20 23:29:12 +0000866 Reader_Type_doc, /*tp_doc*/
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000867 (traverseproc)Reader_traverse, /*tp_traverse*/
868 (inquiry)Reader_clear, /*tp_clear*/
Skip Montanarob4a04172003-03-20 23:29:12 +0000869 0, /*tp_richcompare*/
870 0, /*tp_weaklistoffset*/
Andrew McNamara575a00b2005-01-06 02:25:41 +0000871 PyObject_SelfIter, /*tp_iter*/
Skip Montanarob4a04172003-03-20 23:29:12 +0000872 (getiterfunc)Reader_iternext, /*tp_iternext*/
873 Reader_methods, /*tp_methods*/
874 Reader_memberlist, /*tp_members*/
875 0, /*tp_getset*/
876
877};
878
879static PyObject *
880csv_reader(PyObject *module, PyObject *args, PyObject *keyword_args)
881{
Andrew McNamara91b97462005-01-11 01:07:23 +0000882 PyObject * iterator, * dialect = NULL;
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000883 ReaderObj * self = PyObject_GC_New(ReaderObj, &Reader_Type);
Skip Montanarob4a04172003-03-20 23:29:12 +0000884
885 if (!self)
886 return NULL;
887
888 self->dialect = NULL;
889 self->input_iter = self->fields = NULL;
890
891 self->fields = NULL;
892 self->input_iter = NULL;
893 self->had_parse_error = 0;
894 self->field = NULL;
895 self->field_size = 0;
896 self->field_len = 0;
897 self->state = START_RECORD;
898
Raymond Hettinger1761a7c2004-06-20 04:23:19 +0000899 if (!PyArg_UnpackTuple(args, "", 1, 2, &iterator, &dialect)) {
Skip Montanarob4a04172003-03-20 23:29:12 +0000900 Py_DECREF(self);
901 return NULL;
902 }
903 self->input_iter = PyObject_GetIter(iterator);
904 if (self->input_iter == NULL) {
905 PyErr_SetString(PyExc_TypeError,
906 "argument 1 must be an iterator");
907 Py_DECREF(self);
908 return NULL;
909 }
Andrew McNamara91b97462005-01-11 01:07:23 +0000910 self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args);
Skip Montanarob4a04172003-03-20 23:29:12 +0000911 if (self->dialect == NULL) {
912 Py_DECREF(self);
913 return NULL;
914 }
915 self->fields = PyList_New(0);
916 if (self->fields == NULL) {
917 Py_DECREF(self);
918 return NULL;
919 }
920
Andrew McNamara77ead872005-01-10 02:09:41 +0000921 PyObject_GC_Track(self);
Skip Montanarob4a04172003-03-20 23:29:12 +0000922 return (PyObject *)self;
923}
924
925/*
926 * WRITER
927 */
928/* ---------------------------------------------------------------- */
929static void
930join_reset(WriterObj *self)
931{
932 self->rec_len = 0;
933 self->num_fields = 0;
934}
935
936#define MEM_INCR 32768
937
938/* Calculate new record length or append field to record. Return new
939 * record length.
940 */
941static int
942join_append_data(WriterObj *self, char *field, int quote_empty,
943 int *quoted, int copy_phase)
944{
945 DialectObj *dialect = self->dialect;
946 int i, rec_len;
947
948 rec_len = self->rec_len;
949
950 /* If this is not the first field we need a field separator.
951 */
952 if (self->num_fields > 0) {
953 if (copy_phase)
954 self->rec[rec_len] = dialect->delimiter;
955 rec_len++;
956 }
957 /* Handle preceding quote.
958 */
959 switch (dialect->quoting) {
960 case QUOTE_ALL:
961 *quoted = 1;
962 if (copy_phase)
963 self->rec[rec_len] = dialect->quotechar;
964 rec_len++;
965 break;
966 case QUOTE_MINIMAL:
967 case QUOTE_NONNUMERIC:
968 /* We only know about quoted in the copy phase.
969 */
970 if (copy_phase && *quoted) {
971 self->rec[rec_len] = dialect->quotechar;
972 rec_len++;
973 }
974 break;
975 case QUOTE_NONE:
976 break;
977 }
978 /* Copy/count field data.
979 */
980 for (i = 0;; i++) {
981 char c = field[i];
982
983 if (c == '\0')
984 break;
985 /* If in doublequote mode we escape quote chars with a
986 * quote.
987 */
988 if (dialect->quoting != QUOTE_NONE &&
989 c == dialect->quotechar && dialect->doublequote) {
990 if (copy_phase)
991 self->rec[rec_len] = dialect->quotechar;
992 *quoted = 1;
993 rec_len++;
994 }
995
996 /* Some special characters need to be escaped. If we have a
997 * quote character switch to quoted field instead of escaping
998 * individual characters.
999 */
1000 if (!*quoted
1001 && (c == dialect->delimiter ||
1002 c == dialect->escapechar ||
1003 c == '\n' || c == '\r')) {
1004 if (dialect->quoting != QUOTE_NONE)
1005 *quoted = 1;
1006 else if (dialect->escapechar) {
1007 if (copy_phase)
1008 self->rec[rec_len] = dialect->escapechar;
1009 rec_len++;
1010 }
1011 else {
1012 PyErr_Format(error_obj,
1013 "delimiter must be quoted or escaped");
1014 return -1;
1015 }
1016 }
1017 /* Copy field character into record buffer.
1018 */
1019 if (copy_phase)
1020 self->rec[rec_len] = c;
1021 rec_len++;
1022 }
1023
1024 /* If field is empty check if it needs to be quoted.
1025 */
1026 if (i == 0 && quote_empty) {
1027 if (dialect->quoting == QUOTE_NONE) {
1028 PyErr_Format(error_obj,
1029 "single empty field record must be quoted");
1030 return -1;
Andrew McNamaradd3e6cb2005-01-07 06:46:50 +00001031 }
1032 else
Skip Montanarob4a04172003-03-20 23:29:12 +00001033 *quoted = 1;
1034 }
1035
1036 /* Handle final quote character on field.
1037 */
1038 if (*quoted) {
1039 if (copy_phase)
1040 self->rec[rec_len] = dialect->quotechar;
1041 else
1042 /* Didn't know about leading quote until we found it
1043 * necessary in field data - compensate for it now.
1044 */
1045 rec_len++;
1046 rec_len++;
1047 }
1048
1049 return rec_len;
1050}
1051
1052static int
1053join_check_rec_size(WriterObj *self, int rec_len)
1054{
1055 if (rec_len > self->rec_size) {
1056 if (self->rec_size == 0) {
1057 self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR;
Andrew McNamaradcfb38c2003-06-09 05:59:23 +00001058 if (self->rec != NULL)
1059 PyMem_Free(self->rec);
Skip Montanarob4a04172003-03-20 23:29:12 +00001060 self->rec = PyMem_Malloc(self->rec_size);
1061 }
1062 else {
1063 char *old_rec = self->rec;
1064
1065 self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR;
1066 self->rec = PyMem_Realloc(self->rec, self->rec_size);
1067 if (self->rec == NULL)
1068 PyMem_Free(old_rec);
1069 }
1070 if (self->rec == NULL) {
1071 PyErr_NoMemory();
1072 return 0;
1073 }
1074 }
1075 return 1;
1076}
1077
1078static int
1079join_append(WriterObj *self, char *field, int *quoted, int quote_empty)
1080{
1081 int rec_len;
1082
1083 rec_len = join_append_data(self, field, quote_empty, quoted, 0);
1084 if (rec_len < 0)
1085 return 0;
1086
1087 /* grow record buffer if necessary */
1088 if (!join_check_rec_size(self, rec_len))
1089 return 0;
1090
1091 self->rec_len = join_append_data(self, field, quote_empty, quoted, 1);
1092 self->num_fields++;
1093
1094 return 1;
1095}
1096
1097static int
1098join_append_lineterminator(WriterObj *self)
1099{
1100 int terminator_len;
1101
1102 terminator_len = PyString_Size(self->dialect->lineterminator);
1103
1104 /* grow record buffer if necessary */
1105 if (!join_check_rec_size(self, self->rec_len + terminator_len))
1106 return 0;
1107
1108 memmove(self->rec + self->rec_len,
Skip Montanaro577c7a72003-04-12 19:17:14 +00001109 /* should not be NULL */
Skip Montanarob4a04172003-03-20 23:29:12 +00001110 PyString_AsString(self->dialect->lineterminator),
1111 terminator_len);
1112 self->rec_len += terminator_len;
1113
1114 return 1;
1115}
1116
1117PyDoc_STRVAR(csv_writerow_doc,
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001118"writerow(sequence)\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001119"\n"
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001120"Construct and write a CSV record from a sequence of fields. Non-string\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001121"elements will be converted to string.");
1122
1123static PyObject *
1124csv_writerow(WriterObj *self, PyObject *seq)
1125{
1126 DialectObj *dialect = self->dialect;
1127 int len, i;
1128
1129 if (!PySequence_Check(seq))
1130 return PyErr_Format(error_obj, "sequence expected");
1131
1132 len = PySequence_Length(seq);
1133 if (len < 0)
1134 return NULL;
1135
1136 /* Join all fields in internal buffer.
1137 */
1138 join_reset(self);
1139 for (i = 0; i < len; i++) {
1140 PyObject *field;
1141 int append_ok;
1142 int quoted;
1143
1144 field = PySequence_GetItem(seq, i);
1145 if (field == NULL)
1146 return NULL;
1147
1148 quoted = 0;
1149 if (dialect->quoting == QUOTE_NONNUMERIC) {
1150 PyObject *num;
1151
1152 num = PyNumber_Float(field);
1153 if (num == NULL) {
1154 quoted = 1;
1155 PyErr_Clear();
1156 }
1157 else {
1158 Py_DECREF(num);
1159 }
1160 }
1161
1162 if (PyString_Check(field)) {
Skip Montanaro577c7a72003-04-12 19:17:14 +00001163 append_ok = join_append(self,
1164 PyString_AS_STRING(field),
Skip Montanarob4a04172003-03-20 23:29:12 +00001165 &quoted, len == 1);
1166 Py_DECREF(field);
1167 }
1168 else if (field == Py_None) {
1169 append_ok = join_append(self, "", &quoted, len == 1);
1170 Py_DECREF(field);
1171 }
1172 else {
1173 PyObject *str;
1174
1175 str = PyObject_Str(field);
1176 Py_DECREF(field);
1177 if (str == NULL)
1178 return NULL;
1179
Skip Montanaro577c7a72003-04-12 19:17:14 +00001180 append_ok = join_append(self, PyString_AS_STRING(str),
Skip Montanarob4a04172003-03-20 23:29:12 +00001181 &quoted, len == 1);
1182 Py_DECREF(str);
1183 }
1184 if (!append_ok)
1185 return NULL;
1186 }
1187
1188 /* Add line terminator.
1189 */
1190 if (!join_append_lineterminator(self))
1191 return 0;
1192
1193 return PyObject_CallFunction(self->writeline,
1194 "(s#)", self->rec, self->rec_len);
1195}
1196
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001197PyDoc_STRVAR(csv_writerows_doc,
1198"writerows(sequence of sequences)\n"
1199"\n"
1200"Construct and write a series of sequences to a csv file. Non-string\n"
1201"elements will be converted to string.");
1202
Skip Montanarob4a04172003-03-20 23:29:12 +00001203static PyObject *
1204csv_writerows(WriterObj *self, PyObject *seqseq)
1205{
1206 PyObject *row_iter, *row_obj, *result;
1207
1208 row_iter = PyObject_GetIter(seqseq);
1209 if (row_iter == NULL) {
1210 PyErr_SetString(PyExc_TypeError,
Skip Montanaro98f16e02003-04-11 23:10:13 +00001211 "writerows() argument must be iterable");
Skip Montanarob4a04172003-03-20 23:29:12 +00001212 return NULL;
1213 }
1214 while ((row_obj = PyIter_Next(row_iter))) {
1215 result = csv_writerow(self, row_obj);
1216 Py_DECREF(row_obj);
1217 if (!result) {
1218 Py_DECREF(row_iter);
1219 return NULL;
1220 }
1221 else
1222 Py_DECREF(result);
1223 }
1224 Py_DECREF(row_iter);
1225 if (PyErr_Occurred())
1226 return NULL;
1227 Py_INCREF(Py_None);
1228 return Py_None;
1229}
1230
1231static struct PyMethodDef Writer_methods[] = {
1232 { "writerow", (PyCFunction)csv_writerow, METH_O, csv_writerow_doc},
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001233 { "writerows", (PyCFunction)csv_writerows, METH_O, csv_writerows_doc},
Skip Montanarob4a04172003-03-20 23:29:12 +00001234 { NULL, NULL }
1235};
1236
1237#define W_OFF(x) offsetof(WriterObj, x)
1238
1239static struct PyMemberDef Writer_memberlist[] = {
1240 { "dialect", T_OBJECT, W_OFF(dialect), RO },
1241 { NULL }
1242};
1243
1244static void
1245Writer_dealloc(WriterObj *self)
1246{
Andrew McNamara77ead872005-01-10 02:09:41 +00001247 PyObject_GC_UnTrack(self);
Skip Montanarob4a04172003-03-20 23:29:12 +00001248 Py_XDECREF(self->dialect);
1249 Py_XDECREF(self->writeline);
Andrew McNamaradcfb38c2003-06-09 05:59:23 +00001250 if (self->rec != NULL)
1251 PyMem_Free(self->rec);
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001252 PyObject_GC_Del(self);
1253}
1254
1255static int
1256Writer_traverse(WriterObj *self, visitproc visit, void *arg)
1257{
1258 int err;
1259#define VISIT(SLOT) \
1260 if (SLOT) { \
1261 err = visit((PyObject *)(SLOT), arg); \
1262 if (err) \
1263 return err; \
1264 }
1265 VISIT(self->dialect);
1266 VISIT(self->writeline);
1267 return 0;
1268}
1269
1270static int
1271Writer_clear(WriterObj *self)
1272{
1273 Py_XDECREF(self->dialect);
1274 Py_XDECREF(self->writeline);
1275 self->dialect = NULL;
1276 self->writeline = NULL;
1277 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001278}
1279
1280PyDoc_STRVAR(Writer_Type_doc,
1281"CSV writer\n"
1282"\n"
1283"Writer objects are responsible for generating tabular data\n"
1284"in CSV format from sequence input.\n"
1285);
1286
1287static PyTypeObject Writer_Type = {
1288 PyObject_HEAD_INIT(NULL)
1289 0, /*ob_size*/
1290 "_csv.writer", /*tp_name*/
1291 sizeof(WriterObj), /*tp_basicsize*/
1292 0, /*tp_itemsize*/
1293 /* methods */
1294 (destructor)Writer_dealloc, /*tp_dealloc*/
1295 (printfunc)0, /*tp_print*/
1296 (getattrfunc)0, /*tp_getattr*/
1297 (setattrfunc)0, /*tp_setattr*/
1298 (cmpfunc)0, /*tp_compare*/
1299 (reprfunc)0, /*tp_repr*/
1300 0, /*tp_as_number*/
1301 0, /*tp_as_sequence*/
1302 0, /*tp_as_mapping*/
1303 (hashfunc)0, /*tp_hash*/
1304 (ternaryfunc)0, /*tp_call*/
1305 (reprfunc)0, /*tp_str*/
1306 0, /*tp_getattro*/
1307 0, /*tp_setattro*/
1308 0, /*tp_as_buffer*/
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001309 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
1310 Py_TPFLAGS_HAVE_GC, /*tp_flags*/
Skip Montanarob4a04172003-03-20 23:29:12 +00001311 Writer_Type_doc,
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001312 (traverseproc)Writer_traverse, /*tp_traverse*/
1313 (inquiry)Writer_clear, /*tp_clear*/
Skip Montanarob4a04172003-03-20 23:29:12 +00001314 0, /*tp_richcompare*/
1315 0, /*tp_weaklistoffset*/
1316 (getiterfunc)0, /*tp_iter*/
1317 (getiterfunc)0, /*tp_iternext*/
1318 Writer_methods, /*tp_methods*/
1319 Writer_memberlist, /*tp_members*/
1320 0, /*tp_getset*/
1321};
1322
1323static PyObject *
1324csv_writer(PyObject *module, PyObject *args, PyObject *keyword_args)
1325{
Andrew McNamara91b97462005-01-11 01:07:23 +00001326 PyObject * output_file, * dialect = NULL;
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001327 WriterObj * self = PyObject_GC_New(WriterObj, &Writer_Type);
Skip Montanarob4a04172003-03-20 23:29:12 +00001328
1329 if (!self)
1330 return NULL;
1331
1332 self->dialect = NULL;
1333 self->writeline = NULL;
1334
1335 self->rec = NULL;
1336 self->rec_size = 0;
1337 self->rec_len = 0;
1338 self->num_fields = 0;
1339
Raymond Hettinger1761a7c2004-06-20 04:23:19 +00001340 if (!PyArg_UnpackTuple(args, "", 1, 2, &output_file, &dialect)) {
Skip Montanarob4a04172003-03-20 23:29:12 +00001341 Py_DECREF(self);
1342 return NULL;
1343 }
1344 self->writeline = PyObject_GetAttrString(output_file, "write");
1345 if (self->writeline == NULL || !PyCallable_Check(self->writeline)) {
1346 PyErr_SetString(PyExc_TypeError,
1347 "argument 1 must be an instance with a write method");
1348 Py_DECREF(self);
1349 return NULL;
1350 }
Andrew McNamara91b97462005-01-11 01:07:23 +00001351 self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args);
Skip Montanarob4a04172003-03-20 23:29:12 +00001352 if (self->dialect == NULL) {
1353 Py_DECREF(self);
1354 return NULL;
1355 }
Andrew McNamara77ead872005-01-10 02:09:41 +00001356 PyObject_GC_Track(self);
Skip Montanarob4a04172003-03-20 23:29:12 +00001357 return (PyObject *)self;
1358}
1359
1360/*
1361 * DIALECT REGISTRY
1362 */
1363static PyObject *
1364csv_list_dialects(PyObject *module, PyObject *args)
1365{
1366 return PyDict_Keys(dialects);
1367}
1368
1369static PyObject *
Andrew McNamara86625972005-01-11 01:28:33 +00001370csv_register_dialect(PyObject *module, PyObject *args, PyObject *kwargs)
Skip Montanarob4a04172003-03-20 23:29:12 +00001371{
Andrew McNamara86625972005-01-11 01:28:33 +00001372 PyObject *name_obj, *dialect_obj = NULL;
1373 PyObject *dialect;
Skip Montanarob4a04172003-03-20 23:29:12 +00001374
Andrew McNamara86625972005-01-11 01:28:33 +00001375 if (!PyArg_UnpackTuple(args, "", 1, 2, &name_obj, &dialect_obj))
Skip Montanarob4a04172003-03-20 23:29:12 +00001376 return NULL;
Andrew McNamara37d2bdf2005-01-10 12:22:48 +00001377 if (!IS_BASESTRING(name_obj)) {
Skip Montanarob4a04172003-03-20 23:29:12 +00001378 PyErr_SetString(PyExc_TypeError,
1379 "dialect name must be a string or unicode");
1380 return NULL;
1381 }
Andrew McNamara86625972005-01-11 01:28:33 +00001382 dialect = _call_dialect(dialect_obj, kwargs);
1383 if (dialect == NULL)
1384 return NULL;
1385 if (PyDict_SetItem(dialects, name_obj, dialect) < 0) {
1386 Py_DECREF(dialect);
Skip Montanarob4a04172003-03-20 23:29:12 +00001387 return NULL;
1388 }
Andrew McNamara86625972005-01-11 01:28:33 +00001389 Py_DECREF(dialect);
Skip Montanarob4a04172003-03-20 23:29:12 +00001390 Py_INCREF(Py_None);
1391 return Py_None;
1392}
1393
1394static PyObject *
Skip Montanaro577c7a72003-04-12 19:17:14 +00001395csv_unregister_dialect(PyObject *module, PyObject *name_obj)
Skip Montanarob4a04172003-03-20 23:29:12 +00001396{
Skip Montanarob4a04172003-03-20 23:29:12 +00001397 if (PyDict_DelItem(dialects, name_obj) < 0)
1398 return PyErr_Format(error_obj, "unknown dialect");
1399 Py_INCREF(Py_None);
1400 return Py_None;
1401}
1402
1403static PyObject *
Skip Montanaro577c7a72003-04-12 19:17:14 +00001404csv_get_dialect(PyObject *module, PyObject *name_obj)
Skip Montanarob4a04172003-03-20 23:29:12 +00001405{
Skip Montanarob4a04172003-03-20 23:29:12 +00001406 return get_dialect_from_registry(name_obj);
1407}
1408
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001409static PyObject *
1410csv_set_field_limit(PyObject *module, PyObject *args)
1411{
1412 PyObject *new_limit = NULL;
1413 long old_limit = field_limit;
1414
1415 if (!PyArg_UnpackTuple(args, "set_field_limit", 0, 1, &new_limit))
1416 return NULL;
1417 if (new_limit != NULL) {
1418 if (!PyInt_Check(new_limit)) {
1419 PyErr_Format(PyExc_TypeError,
1420 "limit must be an integer");
1421 return NULL;
1422 }
1423 field_limit = PyInt_AsLong(new_limit);
1424 }
1425 return PyInt_FromLong(old_limit);
1426}
1427
Skip Montanarob4a04172003-03-20 23:29:12 +00001428/*
1429 * MODULE
1430 */
1431
1432PyDoc_STRVAR(csv_module_doc,
1433"CSV parsing and writing.\n"
1434"\n"
1435"This module provides classes that assist in the reading and writing\n"
1436"of Comma Separated Value (CSV) files, and implements the interface\n"
1437"described by PEP 305. Although many CSV files are simple to parse,\n"
1438"the format is not formally defined by a stable specification and\n"
1439"is subtle enough that parsing lines of a CSV file with something\n"
1440"like line.split(\",\") is bound to fail. The module supports three\n"
1441"basic APIs: reading, writing, and registration of dialects.\n"
1442"\n"
1443"\n"
1444"DIALECT REGISTRATION:\n"
1445"\n"
1446"Readers and writers support a dialect argument, which is a convenient\n"
1447"handle on a group of settings. When the dialect argument is a string,\n"
1448"it identifies one of the dialects previously registered with the module.\n"
1449"If it is a class or instance, the attributes of the argument are used as\n"
1450"the settings for the reader or writer:\n"
1451"\n"
1452" class excel:\n"
1453" delimiter = ','\n"
1454" quotechar = '\"'\n"
1455" escapechar = None\n"
1456" doublequote = True\n"
1457" skipinitialspace = False\n"
Johannes Gijsbers8d3b9dd2004-08-15 12:23:10 +00001458" lineterminator = '\\r\\n'\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001459" quoting = QUOTE_MINIMAL\n"
1460"\n"
1461"SETTINGS:\n"
1462"\n"
1463" * quotechar - specifies a one-character string to use as the \n"
1464" quoting character. It defaults to '\"'.\n"
1465" * delimiter - specifies a one-character string to use as the \n"
1466" field separator. It defaults to ','.\n"
1467" * skipinitialspace - specifies how to interpret whitespace which\n"
1468" immediately follows a delimiter. It defaults to False, which\n"
1469" means that whitespace immediately following a delimiter is part\n"
1470" of the following field.\n"
1471" * lineterminator - specifies the character sequence which should \n"
1472" terminate rows.\n"
1473" * quoting - controls when quotes should be generated by the writer.\n"
1474" It can take on any of the following module constants:\n"
1475"\n"
1476" csv.QUOTE_MINIMAL means only when required, for example, when a\n"
1477" field contains either the quotechar or the delimiter\n"
1478" csv.QUOTE_ALL means that quotes are always placed around fields.\n"
1479" csv.QUOTE_NONNUMERIC means that quotes are always placed around\n"
Skip Montanaro148eb6a2003-12-02 18:57:47 +00001480" fields which do not parse as integers or floating point\n"
1481" numbers.\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001482" csv.QUOTE_NONE means that quotes are never placed around fields.\n"
1483" * escapechar - specifies a one-character string used to escape \n"
1484" the delimiter when quoting is set to QUOTE_NONE.\n"
1485" * doublequote - controls the handling of quotes inside fields. When\n"
1486" True, two consecutive quotes are interpreted as one during read,\n"
1487" and when writing, each quote character embedded in the data is\n"
1488" written as two quotes\n");
1489
1490PyDoc_STRVAR(csv_reader_doc,
1491" csv_reader = reader(iterable [, dialect='excel']\n"
1492" [optional keyword args])\n"
1493" for row in csv_reader:\n"
1494" process(row)\n"
1495"\n"
1496"The \"iterable\" argument can be any object that returns a line\n"
1497"of input for each iteration, such as a file object or a list. The\n"
1498"optional \"dialect\" parameter is discussed below. The function\n"
1499"also accepts optional keyword arguments which override settings\n"
1500"provided by the dialect.\n"
1501"\n"
1502"The returned object is an iterator. Each iteration returns a row\n"
Johannes Gijsbers8d3b9dd2004-08-15 12:23:10 +00001503"of the CSV file (which can span multiple input lines):\n");
Skip Montanarob4a04172003-03-20 23:29:12 +00001504
1505PyDoc_STRVAR(csv_writer_doc,
1506" csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1507" [optional keyword args])\n"
1508" for row in csv_writer:\n"
1509" csv_writer.writerow(row)\n"
1510"\n"
1511" [or]\n"
1512"\n"
1513" csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1514" [optional keyword args])\n"
1515" csv_writer.writerows(rows)\n"
1516"\n"
1517"The \"fileobj\" argument can be any object that supports the file API.\n");
1518
1519PyDoc_STRVAR(csv_list_dialects_doc,
1520"Return a list of all know dialect names.\n"
1521" names = csv.list_dialects()");
1522
1523PyDoc_STRVAR(csv_get_dialect_doc,
1524"Return the dialect instance associated with name.\n"
1525" dialect = csv.get_dialect(name)");
1526
1527PyDoc_STRVAR(csv_register_dialect_doc,
1528"Create a mapping from a string name to a dialect class.\n"
1529" dialect = csv.register_dialect(name, dialect)");
1530
1531PyDoc_STRVAR(csv_unregister_dialect_doc,
1532"Delete the name/dialect mapping associated with a string name.\n"
1533" csv.unregister_dialect(name)");
1534
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001535PyDoc_STRVAR(csv_set_field_limit_doc,
1536"Sets an upper limit on parsed fields.\n"
1537" csv.set_field_limit([limit])\n"
1538"\n"
1539"Returns old limit. If limit is not given, no new limit is set and\n"
1540"the old limit is returned");
1541
Skip Montanarob4a04172003-03-20 23:29:12 +00001542static struct PyMethodDef csv_methods[] = {
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001543 { "reader", (PyCFunction)csv_reader,
1544 METH_VARARGS | METH_KEYWORDS, csv_reader_doc},
1545 { "writer", (PyCFunction)csv_writer,
1546 METH_VARARGS | METH_KEYWORDS, csv_writer_doc},
1547 { "list_dialects", (PyCFunction)csv_list_dialects,
1548 METH_NOARGS, csv_list_dialects_doc},
1549 { "register_dialect", (PyCFunction)csv_register_dialect,
Andrew McNamara86625972005-01-11 01:28:33 +00001550 METH_VARARGS | METH_KEYWORDS, csv_register_dialect_doc},
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001551 { "unregister_dialect", (PyCFunction)csv_unregister_dialect,
1552 METH_O, csv_unregister_dialect_doc},
1553 { "get_dialect", (PyCFunction)csv_get_dialect,
1554 METH_O, csv_get_dialect_doc},
1555 { "set_field_limit", (PyCFunction)csv_set_field_limit,
1556 METH_VARARGS, csv_set_field_limit_doc},
1557 { NULL, NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +00001558};
1559
1560PyMODINIT_FUNC
1561init_csv(void)
1562{
1563 PyObject *module;
Skip Montanarob4a04172003-03-20 23:29:12 +00001564 StyleDesc *style;
1565
1566 if (PyType_Ready(&Dialect_Type) < 0)
1567 return;
1568
1569 if (PyType_Ready(&Reader_Type) < 0)
1570 return;
1571
1572 if (PyType_Ready(&Writer_Type) < 0)
1573 return;
1574
1575 /* Create the module and add the functions */
1576 module = Py_InitModule3("_csv", csv_methods, csv_module_doc);
1577 if (module == NULL)
1578 return;
1579
1580 /* Add version to the module. */
Skip Montanaro7b01a832003-04-12 19:23:46 +00001581 if (PyModule_AddStringConstant(module, "__version__",
1582 MODULE_VERSION) == -1)
Skip Montanarob4a04172003-03-20 23:29:12 +00001583 return;
1584
1585 /* Add _dialects dictionary */
1586 dialects = PyDict_New();
1587 if (dialects == NULL)
1588 return;
1589 if (PyModule_AddObject(module, "_dialects", dialects))
1590 return;
1591
1592 /* Add quote styles into dictionary */
1593 for (style = quote_styles; style->name; style++) {
Skip Montanaro7b01a832003-04-12 19:23:46 +00001594 if (PyModule_AddIntConstant(module, style->name,
1595 style->style) == -1)
Skip Montanarob4a04172003-03-20 23:29:12 +00001596 return;
1597 }
1598
1599 /* Add the Dialect type */
1600 if (PyModule_AddObject(module, "Dialect", (PyObject *)&Dialect_Type))
1601 return;
1602
1603 /* Add the CSV exception object to the module. */
1604 error_obj = PyErr_NewException("_csv.Error", NULL, NULL);
1605 if (error_obj == NULL)
1606 return;
1607 PyModule_AddObject(module, "Error", error_obj);
1608}