blob: f9d39ae70f3fe8162c8415c89ed161388062e354 [file] [log] [blame]
Skip Montanaroa16b21f2003-03-23 14:32:54 +00001/* csv module */
2
3/*
4
5This module provides the low-level underpinnings of a CSV reading/writing
6module. Users should not use this module directly, but import the csv.py
7module instead.
8
9**** For people modifying this code, please note that as of this writing
Skip Montanarodfa35fa2003-04-11 21:40:01 +000010**** (2003-03-23), it is intended that this code should work with Python
Skip Montanaroa16b21f2003-03-23 14:32:54 +000011**** 2.2.
12
Skip Montanarob4a04172003-03-20 23:29:12 +000013*/
14
Skip Montanaro7b01a832003-04-12 19:23:46 +000015#define MODULE_VERSION "1.0"
16
Skip Montanarob4a04172003-03-20 23:29:12 +000017#include "Python.h"
18#include "structmember.h"
19
Skip Montanaroa16b21f2003-03-23 14:32:54 +000020
Skip Montanarob4a04172003-03-20 23:29:12 +000021/* begin 2.2 compatibility macros */
22#ifndef PyDoc_STRVAR
23/* Define macros for inline documentation. */
24#define PyDoc_VAR(name) static char name[]
25#define PyDoc_STRVAR(name,str) PyDoc_VAR(name) = PyDoc_STR(str)
26#ifdef WITH_DOC_STRINGS
27#define PyDoc_STR(str) str
28#else
29#define PyDoc_STR(str) ""
30#endif
31#endif /* ifndef PyDoc_STRVAR */
32
33#ifndef PyMODINIT_FUNC
34# if defined(__cplusplus)
35# define PyMODINIT_FUNC extern "C" void
36# else /* __cplusplus */
37# define PyMODINIT_FUNC void
38# endif /* __cplusplus */
39#endif
Thomas Wouters49fd7fa2006-04-21 10:40:58 +000040
41#ifndef Py_CLEAR
42#define Py_CLEAR(op) \
43 do { \
44 if (op) { \
45 PyObject *tmp = (PyObject *)(op); \
46 (op) = NULL; \
47 Py_DECREF(tmp); \
48 } \
49 } while (0)
50#endif
51#ifndef Py_VISIT
52#define Py_VISIT(op) \
53 do { \
54 if (op) { \
55 int vret = visit((PyObject *)(op), arg); \
56 if (vret) \
57 return vret; \
58 } \
59 } while (0)
60#endif
61
Skip Montanarob4a04172003-03-20 23:29:12 +000062/* end 2.2 compatibility macros */
63
Andrew McNamara37d2bdf2005-01-10 12:22:48 +000064#define IS_BASESTRING(o) \
65 PyObject_TypeCheck(o, &PyBaseString_Type)
66
Skip Montanarob4a04172003-03-20 23:29:12 +000067static PyObject *error_obj; /* CSV exception */
68static PyObject *dialects; /* Dialect registry */
Andrew McNamarae4d05c42005-01-11 07:32:02 +000069static long field_limit = 128 * 1024; /* max parsed field size */
Skip Montanarob4a04172003-03-20 23:29:12 +000070
71typedef enum {
72 START_RECORD, START_FIELD, ESCAPED_CHAR, IN_FIELD,
Andrew McNamaraf69d94f2005-01-13 11:30:54 +000073 IN_QUOTED_FIELD, ESCAPE_IN_QUOTED_FIELD, QUOTE_IN_QUOTED_FIELD,
74 EAT_CRNL
Skip Montanarob4a04172003-03-20 23:29:12 +000075} ParserState;
76
77typedef enum {
78 QUOTE_MINIMAL, QUOTE_ALL, QUOTE_NONNUMERIC, QUOTE_NONE
79} QuoteStyle;
80
81typedef struct {
82 QuoteStyle style;
83 char *name;
84} StyleDesc;
85
86static StyleDesc quote_styles[] = {
87 { QUOTE_MINIMAL, "QUOTE_MINIMAL" },
88 { QUOTE_ALL, "QUOTE_ALL" },
89 { QUOTE_NONNUMERIC, "QUOTE_NONNUMERIC" },
90 { QUOTE_NONE, "QUOTE_NONE" },
91 { 0 }
92};
93
94typedef struct {
95 PyObject_HEAD
Guido van Rossum46264582007-08-06 19:32:18 +000096
Skip Montanarob4a04172003-03-20 23:29:12 +000097 int doublequote; /* is " represented by ""? */
Guido van Rossum46264582007-08-06 19:32:18 +000098 Py_UNICODE delimiter; /* field separator */
99 Py_UNICODE quotechar; /* quote character */
100 Py_UNICODE escapechar; /* escape character */
Skip Montanarob4a04172003-03-20 23:29:12 +0000101 int skipinitialspace; /* ignore spaces following delimiter? */
102 PyObject *lineterminator; /* string to write between records */
Andrew McNamara1196cf12005-01-07 04:42:45 +0000103 int quoting; /* style of quoting to write */
Skip Montanarob4a04172003-03-20 23:29:12 +0000104
105 int strict; /* raise exception on bad CSV */
106} DialectObj;
107
Neal Norwitz227b5332006-03-22 09:28:35 +0000108static PyTypeObject Dialect_Type;
Skip Montanarob4a04172003-03-20 23:29:12 +0000109
110typedef struct {
111 PyObject_HEAD
112
113 PyObject *input_iter; /* iterate over this for input lines */
114
115 DialectObj *dialect; /* parsing dialect */
116
117 PyObject *fields; /* field list for current record */
118 ParserState state; /* current CSV parse state */
Guido van Rossum46264582007-08-06 19:32:18 +0000119 Py_UNICODE *field; /* build current field in here */
Skip Montanarob4a04172003-03-20 23:29:12 +0000120 int field_size; /* size of allocated buffer */
Guido van Rossum46264582007-08-06 19:32:18 +0000121 Py_ssize_t field_len; /* length of current field */
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000122 int numeric_field; /* treat field as numeric */
Andrew McNamara7f2053e2005-01-12 11:17:16 +0000123 unsigned long line_num; /* Source-file line number */
Skip Montanarob4a04172003-03-20 23:29:12 +0000124} ReaderObj;
125
Neal Norwitz227b5332006-03-22 09:28:35 +0000126static PyTypeObject Reader_Type;
Skip Montanarob4a04172003-03-20 23:29:12 +0000127
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000128#define ReaderObject_Check(v) (Py_Type(v) == &Reader_Type)
Skip Montanarob4a04172003-03-20 23:29:12 +0000129
130typedef struct {
131 PyObject_HEAD
132
133 PyObject *writeline; /* write output lines to this file */
134
135 DialectObj *dialect; /* parsing dialect */
136
Guido van Rossum46264582007-08-06 19:32:18 +0000137 Py_UNICODE *rec; /* buffer for parser.join */
Skip Montanarob4a04172003-03-20 23:29:12 +0000138 int rec_size; /* size of allocated record */
Guido van Rossum46264582007-08-06 19:32:18 +0000139 Py_ssize_t rec_len; /* length of record */
Skip Montanarob4a04172003-03-20 23:29:12 +0000140 int num_fields; /* number of fields in record */
Guido van Rossum46264582007-08-06 19:32:18 +0000141} WriterObj;
Skip Montanarob4a04172003-03-20 23:29:12 +0000142
Neal Norwitz227b5332006-03-22 09:28:35 +0000143static PyTypeObject Writer_Type;
Skip Montanarob4a04172003-03-20 23:29:12 +0000144
145/*
146 * DIALECT class
147 */
148
149static PyObject *
150get_dialect_from_registry(PyObject * name_obj)
151{
152 PyObject *dialect_obj;
153
154 dialect_obj = PyDict_GetItem(dialects, name_obj);
Andrew McNamaradbce2612005-01-10 23:17:35 +0000155 if (dialect_obj == NULL) {
156 if (!PyErr_Occurred())
157 PyErr_Format(error_obj, "unknown dialect");
158 }
159 else
160 Py_INCREF(dialect_obj);
Skip Montanarob4a04172003-03-20 23:29:12 +0000161 return dialect_obj;
162}
163
Skip Montanarob4a04172003-03-20 23:29:12 +0000164static PyObject *
165get_string(PyObject *str)
166{
167 Py_XINCREF(str);
168 return str;
169}
170
Skip Montanarob4a04172003-03-20 23:29:12 +0000171static PyObject *
172get_nullchar_as_None(char c)
173{
174 if (c == '\0') {
175 Py_INCREF(Py_None);
176 return Py_None;
177 }
178 else
Guido van Rossum46264582007-08-06 19:32:18 +0000179 return PyUnicode_DecodeASCII((char*)&c, 1, NULL);
Skip Montanarob4a04172003-03-20 23:29:12 +0000180}
181
Skip Montanarob4a04172003-03-20 23:29:12 +0000182static PyObject *
183Dialect_get_lineterminator(DialectObj *self)
184{
185 return get_string(self->lineterminator);
186}
187
Skip Montanarob4a04172003-03-20 23:29:12 +0000188static PyObject *
189Dialect_get_escapechar(DialectObj *self)
190{
191 return get_nullchar_as_None(self->escapechar);
192}
193
Andrew McNamara1196cf12005-01-07 04:42:45 +0000194static PyObject *
195Dialect_get_quotechar(DialectObj *self)
Skip Montanarob4a04172003-03-20 23:29:12 +0000196{
Andrew McNamara1196cf12005-01-07 04:42:45 +0000197 return get_nullchar_as_None(self->quotechar);
Skip Montanarob4a04172003-03-20 23:29:12 +0000198}
199
200static PyObject *
201Dialect_get_quoting(DialectObj *self)
202{
203 return PyInt_FromLong(self->quoting);
204}
205
206static int
Andrew McNamara1196cf12005-01-07 04:42:45 +0000207_set_bool(const char *name, int *target, PyObject *src, int dflt)
Skip Montanarob4a04172003-03-20 23:29:12 +0000208{
Andrew McNamara1196cf12005-01-07 04:42:45 +0000209 if (src == NULL)
210 *target = dflt;
211 else
212 *target = PyObject_IsTrue(src);
213 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000214}
215
Andrew McNamara1196cf12005-01-07 04:42:45 +0000216static int
217_set_int(const char *name, int *target, PyObject *src, int dflt)
218{
219 if (src == NULL)
220 *target = dflt;
221 else {
Guido van Rossumddefaf32007-01-14 03:31:43 +0000222 if (!PyInt_CheckExact(src)) {
Andrew McNamara1196cf12005-01-07 04:42:45 +0000223 PyErr_Format(PyExc_TypeError,
224 "\"%s\" must be an integer", name);
225 return -1;
226 }
227 *target = PyInt_AsLong(src);
228 }
229 return 0;
230}
231
232static int
Guido van Rossum46264582007-08-06 19:32:18 +0000233_set_char(const char *name, Py_UNICODE *target, PyObject *src, Py_UNICODE dflt)
Andrew McNamara1196cf12005-01-07 04:42:45 +0000234{
235 if (src == NULL)
236 *target = dflt;
237 else {
Guido van Rossumbce56a62007-05-10 18:04:33 +0000238 *target = '\0';
239 if (src != Py_None) {
Guido van Rossum46264582007-08-06 19:32:18 +0000240 Py_UNICODE *buf;
Guido van Rossumbce56a62007-05-10 18:04:33 +0000241 Py_ssize_t len;
Guido van Rossum46264582007-08-06 19:32:18 +0000242 buf = PyUnicode_AsUnicode(src);
243 len = PyUnicode_GetSize(src);
244 if (buf == NULL || len > 1) {
Guido van Rossumbce56a62007-05-10 18:04:33 +0000245 PyErr_Format(PyExc_TypeError,
246 "\"%s\" must be an 1-character string",
Guido van Rossum46264582007-08-06 19:32:18 +0000247 name);
Andrew McNamara1196cf12005-01-07 04:42:45 +0000248 return -1;
Guido van Rossumbce56a62007-05-10 18:04:33 +0000249 }
250 if (len > 0)
251 *target = buf[0];
Andrew McNamara1196cf12005-01-07 04:42:45 +0000252 }
253 }
254 return 0;
255}
256
257static int
258_set_str(const char *name, PyObject **target, PyObject *src, const char *dflt)
259{
260 if (src == NULL)
Guido van Rossum46264582007-08-06 19:32:18 +0000261 *target = PyUnicode_DecodeASCII(dflt, strlen(dflt), NULL);
Andrew McNamara1196cf12005-01-07 04:42:45 +0000262 else {
263 if (src == Py_None)
264 *target = NULL;
Andrew McNamara37d2bdf2005-01-10 12:22:48 +0000265 else if (!IS_BASESTRING(src)) {
Andrew McNamara1196cf12005-01-07 04:42:45 +0000266 PyErr_Format(PyExc_TypeError,
267 "\"%s\" must be an string", name);
268 return -1;
Andrew McNamaradd3e6cb2005-01-07 06:46:50 +0000269 }
270 else {
Andrew McNamara1196cf12005-01-07 04:42:45 +0000271 Py_XDECREF(*target);
272 Py_INCREF(src);
273 *target = src;
274 }
275 }
276 return 0;
277}
278
279static int
280dialect_check_quoting(int quoting)
281{
282 StyleDesc *qs = quote_styles;
283
284 for (qs = quote_styles; qs->name; qs++) {
285 if (qs->style == quoting)
286 return 0;
287 }
288 PyErr_Format(PyExc_TypeError, "bad \"quoting\" value");
289 return -1;
290}
Skip Montanarob4a04172003-03-20 23:29:12 +0000291
292#define D_OFF(x) offsetof(DialectObj, x)
293
294static struct PyMemberDef Dialect_memberlist[] = {
Andrew McNamara1196cf12005-01-07 04:42:45 +0000295 { "delimiter", T_CHAR, D_OFF(delimiter), READONLY },
296 { "skipinitialspace", T_INT, D_OFF(skipinitialspace), READONLY },
297 { "doublequote", T_INT, D_OFF(doublequote), READONLY },
298 { "strict", T_INT, D_OFF(strict), READONLY },
Skip Montanarob4a04172003-03-20 23:29:12 +0000299 { NULL }
300};
301
302static PyGetSetDef Dialect_getsetlist[] = {
Andrew McNamara1196cf12005-01-07 04:42:45 +0000303 { "escapechar", (getter)Dialect_get_escapechar},
304 { "lineterminator", (getter)Dialect_get_lineterminator},
305 { "quotechar", (getter)Dialect_get_quotechar},
306 { "quoting", (getter)Dialect_get_quoting},
307 {NULL},
Skip Montanarob4a04172003-03-20 23:29:12 +0000308};
309
310static void
311Dialect_dealloc(DialectObj *self)
312{
313 Py_XDECREF(self->lineterminator);
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000314 Py_Type(self)->tp_free((PyObject *)self);
Skip Montanarob4a04172003-03-20 23:29:12 +0000315}
316
Martin v. Löwis02cbf4a2006-02-27 17:20:04 +0000317static char *dialect_kws[] = {
Andrew McNamara1196cf12005-01-07 04:42:45 +0000318 "dialect",
319 "delimiter",
320 "doublequote",
321 "escapechar",
322 "lineterminator",
323 "quotechar",
324 "quoting",
325 "skipinitialspace",
326 "strict",
327 NULL
328};
329
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000330static PyObject *
331dialect_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
Skip Montanarob4a04172003-03-20 23:29:12 +0000332{
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000333 DialectObj *self;
334 PyObject *ret = NULL;
335 PyObject *dialect = NULL;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000336 PyObject *delimiter = NULL;
337 PyObject *doublequote = NULL;
338 PyObject *escapechar = NULL;
339 PyObject *lineterminator = NULL;
340 PyObject *quotechar = NULL;
341 PyObject *quoting = NULL;
342 PyObject *skipinitialspace = NULL;
343 PyObject *strict = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000344
Andrew McNamara1196cf12005-01-07 04:42:45 +0000345 if (!PyArg_ParseTupleAndKeywords(args, kwargs,
346 "|OOOOOOOOO", dialect_kws,
347 &dialect,
348 &delimiter,
349 &doublequote,
350 &escapechar,
351 &lineterminator,
352 &quotechar,
353 &quoting,
354 &skipinitialspace,
355 &strict))
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000356 return NULL;
357
358 if (dialect != NULL) {
359 if (IS_BASESTRING(dialect)) {
360 dialect = get_dialect_from_registry(dialect);
361 if (dialect == NULL)
362 return NULL;
363 }
364 else
365 Py_INCREF(dialect);
366 /* Can we reuse this instance? */
367 if (PyObject_TypeCheck(dialect, &Dialect_Type) &&
368 delimiter == 0 &&
369 doublequote == 0 &&
370 escapechar == 0 &&
371 lineterminator == 0 &&
372 quotechar == 0 &&
373 quoting == 0 &&
374 skipinitialspace == 0 &&
375 strict == 0)
376 return dialect;
377 }
378
379 self = (DialectObj *)type->tp_alloc(type, 0);
380 if (self == NULL) {
381 Py_XDECREF(dialect);
382 return NULL;
383 }
384 self->lineterminator = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000385
Andrew McNamara1196cf12005-01-07 04:42:45 +0000386 Py_XINCREF(delimiter);
387 Py_XINCREF(doublequote);
388 Py_XINCREF(escapechar);
389 Py_XINCREF(lineterminator);
390 Py_XINCREF(quotechar);
391 Py_XINCREF(quoting);
392 Py_XINCREF(skipinitialspace);
393 Py_XINCREF(strict);
394 if (dialect != NULL) {
Andrew McNamara1196cf12005-01-07 04:42:45 +0000395#define DIALECT_GETATTR(v, n) \
396 if (v == NULL) \
397 v = PyObject_GetAttrString(dialect, n)
Andrew McNamara1196cf12005-01-07 04:42:45 +0000398 DIALECT_GETATTR(delimiter, "delimiter");
399 DIALECT_GETATTR(doublequote, "doublequote");
400 DIALECT_GETATTR(escapechar, "escapechar");
401 DIALECT_GETATTR(lineterminator, "lineterminator");
402 DIALECT_GETATTR(quotechar, "quotechar");
403 DIALECT_GETATTR(quoting, "quoting");
404 DIALECT_GETATTR(skipinitialspace, "skipinitialspace");
405 DIALECT_GETATTR(strict, "strict");
406 PyErr_Clear();
Andrew McNamara1196cf12005-01-07 04:42:45 +0000407 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000408
Andrew McNamara1196cf12005-01-07 04:42:45 +0000409 /* check types and convert to C values */
410#define DIASET(meth, name, target, src, dflt) \
411 if (meth(name, target, src, dflt)) \
412 goto err
413 DIASET(_set_char, "delimiter", &self->delimiter, delimiter, ',');
414 DIASET(_set_bool, "doublequote", &self->doublequote, doublequote, 1);
415 DIASET(_set_char, "escapechar", &self->escapechar, escapechar, 0);
416 DIASET(_set_str, "lineterminator", &self->lineterminator, lineterminator, "\r\n");
417 DIASET(_set_char, "quotechar", &self->quotechar, quotechar, '"');
418 DIASET(_set_int, "quoting", &self->quoting, quoting, QUOTE_MINIMAL);
419 DIASET(_set_bool, "skipinitialspace", &self->skipinitialspace, skipinitialspace, 0);
420 DIASET(_set_bool, "strict", &self->strict, strict, 0);
Skip Montanarob4a04172003-03-20 23:29:12 +0000421
Andrew McNamara1196cf12005-01-07 04:42:45 +0000422 /* validate options */
423 if (dialect_check_quoting(self->quoting))
424 goto err;
425 if (self->delimiter == 0) {
426 PyErr_SetString(PyExc_TypeError, "delimiter must be set");
427 goto err;
428 }
Andrew McNamara5d45a8d2005-01-12 08:16:17 +0000429 if (quotechar == Py_None && quoting == NULL)
Andrew McNamara1196cf12005-01-07 04:42:45 +0000430 self->quoting = QUOTE_NONE;
431 if (self->quoting != QUOTE_NONE && self->quotechar == 0) {
432 PyErr_SetString(PyExc_TypeError,
433 "quotechar must be set if quoting enabled");
434 goto err;
435 }
436 if (self->lineterminator == 0) {
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000437 PyErr_SetString(PyExc_TypeError, "lineterminator must be set");
Andrew McNamara1196cf12005-01-07 04:42:45 +0000438 goto err;
439 }
440
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000441 ret = (PyObject *)self;
Skip Montanarod60fbd42005-06-15 01:33:30 +0000442 Py_INCREF(self);
Andrew McNamara1196cf12005-01-07 04:42:45 +0000443err:
Skip Montanarod60fbd42005-06-15 01:33:30 +0000444 Py_XDECREF(self);
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000445 Py_XDECREF(dialect);
Andrew McNamara1196cf12005-01-07 04:42:45 +0000446 Py_XDECREF(delimiter);
447 Py_XDECREF(doublequote);
448 Py_XDECREF(escapechar);
449 Py_XDECREF(lineterminator);
450 Py_XDECREF(quotechar);
451 Py_XDECREF(quoting);
452 Py_XDECREF(skipinitialspace);
453 Py_XDECREF(strict);
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000454 return ret;
Skip Montanarob4a04172003-03-20 23:29:12 +0000455}
456
457
458PyDoc_STRVAR(Dialect_Type_doc,
459"CSV dialect\n"
460"\n"
461"The Dialect type records CSV parsing and generation options.\n");
462
463static PyTypeObject Dialect_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000464 PyVarObject_HEAD_INIT(NULL, 0)
Skip Montanarob4a04172003-03-20 23:29:12 +0000465 "_csv.Dialect", /* tp_name */
466 sizeof(DialectObj), /* tp_basicsize */
467 0, /* tp_itemsize */
468 /* methods */
469 (destructor)Dialect_dealloc, /* tp_dealloc */
470 (printfunc)0, /* tp_print */
471 (getattrfunc)0, /* tp_getattr */
472 (setattrfunc)0, /* tp_setattr */
473 (cmpfunc)0, /* tp_compare */
474 (reprfunc)0, /* tp_repr */
475 0, /* tp_as_number */
476 0, /* tp_as_sequence */
477 0, /* tp_as_mapping */
478 (hashfunc)0, /* tp_hash */
479 (ternaryfunc)0, /* tp_call */
480 (reprfunc)0, /* tp_str */
481 0, /* tp_getattro */
482 0, /* tp_setattro */
483 0, /* tp_as_buffer */
484 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
485 Dialect_Type_doc, /* tp_doc */
486 0, /* tp_traverse */
487 0, /* tp_clear */
488 0, /* tp_richcompare */
489 0, /* tp_weaklistoffset */
490 0, /* tp_iter */
491 0, /* tp_iternext */
Andrew McNamara1196cf12005-01-07 04:42:45 +0000492 0, /* tp_methods */
Skip Montanarob4a04172003-03-20 23:29:12 +0000493 Dialect_memberlist, /* tp_members */
494 Dialect_getsetlist, /* tp_getset */
495 0, /* tp_base */
496 0, /* tp_dict */
497 0, /* tp_descr_get */
498 0, /* tp_descr_set */
499 0, /* tp_dictoffset */
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000500 0, /* tp_init */
501 0, /* tp_alloc */
Skip Montanarob4a04172003-03-20 23:29:12 +0000502 dialect_new, /* tp_new */
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000503 0, /* tp_free */
Skip Montanarob4a04172003-03-20 23:29:12 +0000504};
505
Andrew McNamara91b97462005-01-11 01:07:23 +0000506/*
507 * Return an instance of the dialect type, given a Python instance or kwarg
508 * description of the dialect
509 */
510static PyObject *
511_call_dialect(PyObject *dialect_inst, PyObject *kwargs)
512{
513 PyObject *ctor_args;
514 PyObject *dialect;
515
516 ctor_args = Py_BuildValue(dialect_inst ? "(O)" : "()", dialect_inst);
517 if (ctor_args == NULL)
518 return NULL;
519 dialect = PyObject_Call((PyObject *)&Dialect_Type, ctor_args, kwargs);
520 Py_DECREF(ctor_args);
521 return dialect;
522}
523
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000524/*
525 * READER
526 */
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000527static int
Skip Montanarob4a04172003-03-20 23:29:12 +0000528parse_save_field(ReaderObj *self)
529{
530 PyObject *field;
531
Guido van Rossum46264582007-08-06 19:32:18 +0000532 field = PyUnicode_FromUnicode(self->field, self->field_len);
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000533 if (field == NULL)
534 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000535 self->field_len = 0;
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000536 if (self->numeric_field) {
537 PyObject *tmp;
538
539 self->numeric_field = 0;
540 tmp = PyNumber_Float(field);
541 if (tmp == NULL) {
542 Py_DECREF(field);
543 return -1;
544 }
545 Py_DECREF(field);
546 field = tmp;
547 }
548 PyList_Append(self->fields, field);
549 Py_DECREF(field);
550 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000551}
552
553static int
554parse_grow_buff(ReaderObj *self)
555{
556 if (self->field_size == 0) {
557 self->field_size = 4096;
Andrew McNamaradcfb38c2003-06-09 05:59:23 +0000558 if (self->field != NULL)
559 PyMem_Free(self->field);
Guido van Rossum46264582007-08-06 19:32:18 +0000560 self->field = PyMem_New(Py_UNICODE, self->field_size);
Skip Montanarob4a04172003-03-20 23:29:12 +0000561 }
562 else {
563 self->field_size *= 2;
Guido van Rossum46264582007-08-06 19:32:18 +0000564 self->field = PyMem_Resize(self->field, Py_UNICODE,
565 self->field_size);
Skip Montanarob4a04172003-03-20 23:29:12 +0000566 }
567 if (self->field == NULL) {
568 PyErr_NoMemory();
569 return 0;
570 }
571 return 1;
572}
573
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000574static int
Guido van Rossum46264582007-08-06 19:32:18 +0000575parse_add_char(ReaderObj *self, Py_UNICODE c)
Skip Montanarob4a04172003-03-20 23:29:12 +0000576{
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000577 if (self->field_len >= field_limit) {
578 PyErr_Format(error_obj, "field larger than field limit (%ld)",
579 field_limit);
580 return -1;
581 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000582 if (self->field_len == self->field_size && !parse_grow_buff(self))
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000583 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000584 self->field[self->field_len++] = c;
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000585 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000586}
587
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000588static int
Guido van Rossum46264582007-08-06 19:32:18 +0000589parse_process_char(ReaderObj *self, Py_UNICODE c)
Skip Montanarob4a04172003-03-20 23:29:12 +0000590{
591 DialectObj *dialect = self->dialect;
592
593 switch (self->state) {
594 case START_RECORD:
595 /* start of record */
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000596 if (c == '\0')
Skip Montanarob4a04172003-03-20 23:29:12 +0000597 /* empty line - return [] */
598 break;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000599 else if (c == '\n' || c == '\r') {
600 self->state = EAT_CRNL;
601 break;
602 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000603 /* normal character - handle as START_FIELD */
604 self->state = START_FIELD;
605 /* fallthru */
606 case START_FIELD:
607 /* expecting field */
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000608 if (c == '\n' || c == '\r' || c == '\0') {
Skip Montanarob4a04172003-03-20 23:29:12 +0000609 /* save empty field - return [fields] */
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000610 if (parse_save_field(self) < 0)
611 return -1;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000612 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
Skip Montanarob4a04172003-03-20 23:29:12 +0000613 }
Andrew McNamara1196cf12005-01-07 04:42:45 +0000614 else if (c == dialect->quotechar &&
615 dialect->quoting != QUOTE_NONE) {
Skip Montanarob4a04172003-03-20 23:29:12 +0000616 /* start quoted field */
617 self->state = IN_QUOTED_FIELD;
618 }
619 else if (c == dialect->escapechar) {
620 /* possible escaped character */
621 self->state = ESCAPED_CHAR;
622 }
623 else if (c == ' ' && dialect->skipinitialspace)
624 /* ignore space at start of field */
625 ;
626 else if (c == dialect->delimiter) {
627 /* save empty field */
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000628 if (parse_save_field(self) < 0)
629 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000630 }
631 else {
632 /* begin new unquoted field */
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000633 if (dialect->quoting == QUOTE_NONNUMERIC)
634 self->numeric_field = 1;
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000635 if (parse_add_char(self, c) < 0)
636 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000637 self->state = IN_FIELD;
638 }
639 break;
640
641 case ESCAPED_CHAR:
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000642 if (c == '\0')
643 c = '\n';
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000644 if (parse_add_char(self, c) < 0)
645 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000646 self->state = IN_FIELD;
647 break;
648
649 case IN_FIELD:
650 /* in unquoted field */
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000651 if (c == '\n' || c == '\r' || c == '\0') {
Skip Montanarob4a04172003-03-20 23:29:12 +0000652 /* end of line - return [fields] */
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000653 if (parse_save_field(self) < 0)
654 return -1;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000655 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
Skip Montanarob4a04172003-03-20 23:29:12 +0000656 }
657 else if (c == dialect->escapechar) {
658 /* possible escaped character */
659 self->state = ESCAPED_CHAR;
660 }
661 else if (c == dialect->delimiter) {
662 /* save field - wait for new field */
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000663 if (parse_save_field(self) < 0)
664 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000665 self->state = START_FIELD;
666 }
667 else {
668 /* normal character - save in field */
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000669 if (parse_add_char(self, c) < 0)
670 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000671 }
672 break;
673
674 case IN_QUOTED_FIELD:
675 /* in quoted field */
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000676 if (c == '\0')
677 ;
Skip Montanarob4a04172003-03-20 23:29:12 +0000678 else if (c == dialect->escapechar) {
679 /* Possible escape character */
680 self->state = ESCAPE_IN_QUOTED_FIELD;
681 }
Andrew McNamara1196cf12005-01-07 04:42:45 +0000682 else if (c == dialect->quotechar &&
683 dialect->quoting != QUOTE_NONE) {
Skip Montanarob4a04172003-03-20 23:29:12 +0000684 if (dialect->doublequote) {
685 /* doublequote; " represented by "" */
686 self->state = QUOTE_IN_QUOTED_FIELD;
687 }
688 else {
689 /* end of quote part of field */
690 self->state = IN_FIELD;
691 }
692 }
693 else {
694 /* normal character - save in field */
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000695 if (parse_add_char(self, c) < 0)
696 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000697 }
698 break;
699
700 case ESCAPE_IN_QUOTED_FIELD:
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000701 if (c == '\0')
702 c = '\n';
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000703 if (parse_add_char(self, c) < 0)
704 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000705 self->state = IN_QUOTED_FIELD;
706 break;
707
708 case QUOTE_IN_QUOTED_FIELD:
709 /* doublequote - seen a quote in an quoted field */
710 if (dialect->quoting != QUOTE_NONE &&
711 c == dialect->quotechar) {
712 /* save "" as " */
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000713 if (parse_add_char(self, c) < 0)
714 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000715 self->state = IN_QUOTED_FIELD;
716 }
717 else if (c == dialect->delimiter) {
718 /* save field - wait for new field */
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000719 if (parse_save_field(self) < 0)
720 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000721 self->state = START_FIELD;
722 }
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000723 else if (c == '\n' || c == '\r' || c == '\0') {
Skip Montanarob4a04172003-03-20 23:29:12 +0000724 /* end of line - return [fields] */
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000725 if (parse_save_field(self) < 0)
726 return -1;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000727 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
Skip Montanarob4a04172003-03-20 23:29:12 +0000728 }
729 else if (!dialect->strict) {
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000730 if (parse_add_char(self, c) < 0)
731 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000732 self->state = IN_FIELD;
733 }
734 else {
735 /* illegal */
Andrew McNamara5cfd8372005-01-12 11:39:50 +0000736 PyErr_Format(error_obj, "'%c' expected after '%c'",
Skip Montanarob4a04172003-03-20 23:29:12 +0000737 dialect->delimiter,
738 dialect->quotechar);
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000739 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000740 }
741 break;
742
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000743 case EAT_CRNL:
744 if (c == '\n' || c == '\r')
745 ;
746 else if (c == '\0')
747 self->state = START_RECORD;
748 else {
749 PyErr_Format(error_obj, "new-line character seen in unquoted field - do you need to open the file in universal-newline mode?");
750 return -1;
751 }
752 break;
753
Skip Montanarob4a04172003-03-20 23:29:12 +0000754 }
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000755 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000756}
757
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000758static int
759parse_reset(ReaderObj *self)
760{
761 Py_XDECREF(self->fields);
762 self->fields = PyList_New(0);
763 if (self->fields == NULL)
764 return -1;
765 self->field_len = 0;
766 self->state = START_RECORD;
767 self->numeric_field = 0;
768 return 0;
769}
Skip Montanarob4a04172003-03-20 23:29:12 +0000770
771static PyObject *
Skip Montanarob4a04172003-03-20 23:29:12 +0000772Reader_iternext(ReaderObj *self)
773{
774 PyObject *lineobj;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000775 PyObject *fields = NULL;
Guido van Rossum46264582007-08-06 19:32:18 +0000776 Py_UNICODE *line, c;
777 Py_ssize_t linelen;
Skip Montanarob4a04172003-03-20 23:29:12 +0000778
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000779 if (parse_reset(self) < 0)
780 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000781 do {
782 lineobj = PyIter_Next(self->input_iter);
783 if (lineobj == NULL) {
784 /* End of input OR exception */
785 if (!PyErr_Occurred() && self->field_len != 0)
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000786 PyErr_Format(error_obj,
787 "newline inside string");
Skip Montanarob4a04172003-03-20 23:29:12 +0000788 return NULL;
789 }
Guido van Rossum46264582007-08-06 19:32:18 +0000790 ++self->line_num;
791 line = PyUnicode_AsUnicode(lineobj);
792 linelen = PyUnicode_GetSize(lineobj);
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000793 if (line == NULL || linelen < 0) {
Skip Montanarob4a04172003-03-20 23:29:12 +0000794 Py_DECREF(lineobj);
795 return NULL;
796 }
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000797 while (linelen--) {
798 c = *line++;
799 if (c == '\0') {
800 Py_DECREF(lineobj);
801 PyErr_Format(error_obj,
802 "line contains NULL byte");
803 goto err;
804 }
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000805 if (parse_process_char(self, c) < 0) {
806 Py_DECREF(lineobj);
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000807 goto err;
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000808 }
809 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000810 Py_DECREF(lineobj);
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000811 if (parse_process_char(self, 0) < 0)
812 goto err;
Skip Montanarob4a04172003-03-20 23:29:12 +0000813 } while (self->state != START_RECORD);
814
815 fields = self->fields;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000816 self->fields = NULL;
817err:
Skip Montanarob4a04172003-03-20 23:29:12 +0000818 return fields;
819}
820
821static void
822Reader_dealloc(ReaderObj *self)
823{
Andrew McNamara77ead872005-01-10 02:09:41 +0000824 PyObject_GC_UnTrack(self);
Skip Montanarob4a04172003-03-20 23:29:12 +0000825 Py_XDECREF(self->dialect);
826 Py_XDECREF(self->input_iter);
827 Py_XDECREF(self->fields);
Andrew McNamaradcfb38c2003-06-09 05:59:23 +0000828 if (self->field != NULL)
829 PyMem_Free(self->field);
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000830 PyObject_GC_Del(self);
831}
832
833static int
834Reader_traverse(ReaderObj *self, visitproc visit, void *arg)
835{
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000836 Py_VISIT(self->dialect);
837 Py_VISIT(self->input_iter);
838 Py_VISIT(self->fields);
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000839 return 0;
840}
841
842static int
843Reader_clear(ReaderObj *self)
844{
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000845 Py_CLEAR(self->dialect);
846 Py_CLEAR(self->input_iter);
847 Py_CLEAR(self->fields);
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000848 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000849}
850
851PyDoc_STRVAR(Reader_Type_doc,
852"CSV reader\n"
853"\n"
854"Reader objects are responsible for reading and parsing tabular data\n"
855"in CSV format.\n"
856);
857
858static struct PyMethodDef Reader_methods[] = {
859 { NULL, NULL }
860};
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000861#define R_OFF(x) offsetof(ReaderObj, x)
862
863static struct PyMemberDef Reader_memberlist[] = {
Guido van Rossum33d26892007-08-05 15:29:28 +0000864 { "dialect", T_OBJECT, R_OFF(dialect), READONLY },
865 { "line_num", T_ULONG, R_OFF(line_num), READONLY },
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000866 { NULL }
867};
868
Skip Montanarob4a04172003-03-20 23:29:12 +0000869
870static PyTypeObject Reader_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000871 PyVarObject_HEAD_INIT(NULL, 0)
Skip Montanarob4a04172003-03-20 23:29:12 +0000872 "_csv.reader", /*tp_name*/
873 sizeof(ReaderObj), /*tp_basicsize*/
874 0, /*tp_itemsize*/
875 /* methods */
876 (destructor)Reader_dealloc, /*tp_dealloc*/
877 (printfunc)0, /*tp_print*/
878 (getattrfunc)0, /*tp_getattr*/
879 (setattrfunc)0, /*tp_setattr*/
880 (cmpfunc)0, /*tp_compare*/
881 (reprfunc)0, /*tp_repr*/
882 0, /*tp_as_number*/
883 0, /*tp_as_sequence*/
884 0, /*tp_as_mapping*/
885 (hashfunc)0, /*tp_hash*/
886 (ternaryfunc)0, /*tp_call*/
887 (reprfunc)0, /*tp_str*/
888 0, /*tp_getattro*/
889 0, /*tp_setattro*/
890 0, /*tp_as_buffer*/
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000891 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
892 Py_TPFLAGS_HAVE_GC, /*tp_flags*/
Skip Montanarob4a04172003-03-20 23:29:12 +0000893 Reader_Type_doc, /*tp_doc*/
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000894 (traverseproc)Reader_traverse, /*tp_traverse*/
895 (inquiry)Reader_clear, /*tp_clear*/
Skip Montanarob4a04172003-03-20 23:29:12 +0000896 0, /*tp_richcompare*/
897 0, /*tp_weaklistoffset*/
Andrew McNamara575a00b2005-01-06 02:25:41 +0000898 PyObject_SelfIter, /*tp_iter*/
Skip Montanarob4a04172003-03-20 23:29:12 +0000899 (getiterfunc)Reader_iternext, /*tp_iternext*/
900 Reader_methods, /*tp_methods*/
901 Reader_memberlist, /*tp_members*/
902 0, /*tp_getset*/
903
904};
905
906static PyObject *
907csv_reader(PyObject *module, PyObject *args, PyObject *keyword_args)
908{
Andrew McNamara91b97462005-01-11 01:07:23 +0000909 PyObject * iterator, * dialect = NULL;
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000910 ReaderObj * self = PyObject_GC_New(ReaderObj, &Reader_Type);
Skip Montanarob4a04172003-03-20 23:29:12 +0000911
912 if (!self)
913 return NULL;
914
915 self->dialect = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000916 self->fields = NULL;
917 self->input_iter = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000918 self->field = NULL;
919 self->field_size = 0;
Andrew McNamara7f2053e2005-01-12 11:17:16 +0000920 self->line_num = 0;
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000921
922 if (parse_reset(self) < 0) {
923 Py_DECREF(self);
924 return NULL;
925 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000926
Raymond Hettinger1761a7c2004-06-20 04:23:19 +0000927 if (!PyArg_UnpackTuple(args, "", 1, 2, &iterator, &dialect)) {
Skip Montanarob4a04172003-03-20 23:29:12 +0000928 Py_DECREF(self);
929 return NULL;
930 }
931 self->input_iter = PyObject_GetIter(iterator);
932 if (self->input_iter == NULL) {
933 PyErr_SetString(PyExc_TypeError,
934 "argument 1 must be an iterator");
935 Py_DECREF(self);
936 return NULL;
937 }
Andrew McNamara91b97462005-01-11 01:07:23 +0000938 self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args);
Skip Montanarob4a04172003-03-20 23:29:12 +0000939 if (self->dialect == NULL) {
940 Py_DECREF(self);
941 return NULL;
942 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000943
Andrew McNamara77ead872005-01-10 02:09:41 +0000944 PyObject_GC_Track(self);
Skip Montanarob4a04172003-03-20 23:29:12 +0000945 return (PyObject *)self;
946}
947
948/*
949 * WRITER
950 */
951/* ---------------------------------------------------------------- */
952static void
953join_reset(WriterObj *self)
954{
955 self->rec_len = 0;
956 self->num_fields = 0;
957}
958
959#define MEM_INCR 32768
960
961/* Calculate new record length or append field to record. Return new
962 * record length.
963 */
964static int
Guido van Rossum46264582007-08-06 19:32:18 +0000965join_append_data(WriterObj *self, Py_UNICODE *field, int quote_empty,
966 int *quoted, int copy_phase)
Skip Montanarob4a04172003-03-20 23:29:12 +0000967{
968 DialectObj *dialect = self->dialect;
Guido van Rossum46264582007-08-06 19:32:18 +0000969 int i;
970 int rec_len;
971 Py_UNICODE *lineterm;
Andrew McNamarac89f2842005-01-12 07:44:42 +0000972
973#define ADDCH(c) \
974 do {\
975 if (copy_phase) \
976 self->rec[rec_len] = c;\
977 rec_len++;\
978 } while(0)
979
Guido van Rossum46264582007-08-06 19:32:18 +0000980 lineterm = PyUnicode_AsUnicode(dialect->lineterminator);
Andrew McNamarac89f2842005-01-12 07:44:42 +0000981 if (lineterm == NULL)
982 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000983
984 rec_len = self->rec_len;
985
Andrew McNamarac89f2842005-01-12 07:44:42 +0000986 /* If this is not the first field we need a field separator */
987 if (self->num_fields > 0)
988 ADDCH(dialect->delimiter);
989
990 /* Handle preceding quote */
991 if (copy_phase && *quoted)
992 ADDCH(dialect->quotechar);
993
994 /* Copy/count field data */
Guido van Rossum46264582007-08-06 19:32:18 +0000995 /* If field is null just pass over */
996 for (i = 0; field; i++) {
997 Py_UNICODE c = field[i];
Andrew McNamarac89f2842005-01-12 07:44:42 +0000998 int want_escape = 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000999
1000 if (c == '\0')
1001 break;
Skip Montanarob4a04172003-03-20 23:29:12 +00001002
Andrew McNamarac89f2842005-01-12 07:44:42 +00001003 if (c == dialect->delimiter ||
1004 c == dialect->escapechar ||
Guido van Rossum46264582007-08-06 19:32:18 +00001005 c == dialect->quotechar ||
1006 Py_UNICODE_strchr(lineterm, c)) {
Andrew McNamarac89f2842005-01-12 07:44:42 +00001007 if (dialect->quoting == QUOTE_NONE)
1008 want_escape = 1;
Skip Montanarob4a04172003-03-20 23:29:12 +00001009 else {
Andrew McNamarac89f2842005-01-12 07:44:42 +00001010 if (c == dialect->quotechar) {
1011 if (dialect->doublequote)
1012 ADDCH(dialect->quotechar);
1013 else
1014 want_escape = 1;
1015 }
1016 if (!want_escape)
1017 *quoted = 1;
1018 }
1019 if (want_escape) {
1020 if (!dialect->escapechar) {
1021 PyErr_Format(error_obj,
1022 "need to escape, but no escapechar set");
1023 return -1;
1024 }
1025 ADDCH(dialect->escapechar);
Skip Montanarob4a04172003-03-20 23:29:12 +00001026 }
1027 }
1028 /* Copy field character into record buffer.
1029 */
Andrew McNamarac89f2842005-01-12 07:44:42 +00001030 ADDCH(c);
Skip Montanarob4a04172003-03-20 23:29:12 +00001031 }
1032
1033 /* If field is empty check if it needs to be quoted.
1034 */
1035 if (i == 0 && quote_empty) {
1036 if (dialect->quoting == QUOTE_NONE) {
1037 PyErr_Format(error_obj,
Guido van Rossum46264582007-08-06 19:32:18 +00001038 "single empty field record must be quoted");
Skip Montanarob4a04172003-03-20 23:29:12 +00001039 return -1;
Andrew McNamaradd3e6cb2005-01-07 06:46:50 +00001040 }
1041 else
Skip Montanarob4a04172003-03-20 23:29:12 +00001042 *quoted = 1;
1043 }
1044
Skip Montanarob4a04172003-03-20 23:29:12 +00001045 if (*quoted) {
1046 if (copy_phase)
Andrew McNamarac89f2842005-01-12 07:44:42 +00001047 ADDCH(dialect->quotechar);
Skip Montanarob4a04172003-03-20 23:29:12 +00001048 else
Andrew McNamarac89f2842005-01-12 07:44:42 +00001049 rec_len += 2;
Skip Montanarob4a04172003-03-20 23:29:12 +00001050 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001051 return rec_len;
Andrew McNamarac89f2842005-01-12 07:44:42 +00001052#undef ADDCH
Skip Montanarob4a04172003-03-20 23:29:12 +00001053}
1054
1055static int
1056join_check_rec_size(WriterObj *self, int rec_len)
1057{
1058 if (rec_len > self->rec_size) {
1059 if (self->rec_size == 0) {
1060 self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR;
Andrew McNamaradcfb38c2003-06-09 05:59:23 +00001061 if (self->rec != NULL)
1062 PyMem_Free(self->rec);
Guido van Rossum46264582007-08-06 19:32:18 +00001063 self->rec = PyMem_New(Py_UNICODE, self->rec_size);
Skip Montanarob4a04172003-03-20 23:29:12 +00001064 }
1065 else {
Guido van Rossum46264582007-08-06 19:32:18 +00001066 Py_UNICODE* old_rec = self->rec;
Skip Montanarob4a04172003-03-20 23:29:12 +00001067
1068 self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR;
Guido van Rossum46264582007-08-06 19:32:18 +00001069 self->rec = PyMem_Resize(self->rec, Py_UNICODE,
1070 self->rec_size);
Skip Montanarob4a04172003-03-20 23:29:12 +00001071 if (self->rec == NULL)
1072 PyMem_Free(old_rec);
1073 }
1074 if (self->rec == NULL) {
1075 PyErr_NoMemory();
1076 return 0;
1077 }
1078 }
1079 return 1;
1080}
1081
1082static int
Guido van Rossum46264582007-08-06 19:32:18 +00001083join_append(WriterObj *self, Py_UNICODE *field, int *quoted, int quote_empty)
Skip Montanarob4a04172003-03-20 23:29:12 +00001084{
1085 int rec_len;
1086
1087 rec_len = join_append_data(self, field, quote_empty, quoted, 0);
1088 if (rec_len < 0)
1089 return 0;
1090
1091 /* grow record buffer if necessary */
1092 if (!join_check_rec_size(self, rec_len))
1093 return 0;
1094
1095 self->rec_len = join_append_data(self, field, quote_empty, quoted, 1);
1096 self->num_fields++;
1097
1098 return 1;
1099}
1100
1101static int
1102join_append_lineterminator(WriterObj *self)
1103{
1104 int terminator_len;
Guido van Rossum46264582007-08-06 19:32:18 +00001105 Py_UNICODE *terminator;
Skip Montanarob4a04172003-03-20 23:29:12 +00001106
Guido van Rossum46264582007-08-06 19:32:18 +00001107 terminator_len = PyUnicode_GetSize(self->dialect->lineterminator);
Thomas Wouters477c8d52006-05-27 19:21:47 +00001108 if (terminator_len == -1)
1109 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001110
1111 /* grow record buffer if necessary */
1112 if (!join_check_rec_size(self, self->rec_len + terminator_len))
1113 return 0;
1114
Guido van Rossum46264582007-08-06 19:32:18 +00001115 terminator = PyUnicode_AsUnicode(self->dialect->lineterminator);
Andrew McNamaracf0fd5a2005-01-12 01:16:35 +00001116 if (terminator == NULL)
1117 return 0;
Guido van Rossum46264582007-08-06 19:32:18 +00001118 memmove(self->rec + self->rec_len, terminator,
1119 sizeof(Py_UNICODE)*terminator_len);
Skip Montanarob4a04172003-03-20 23:29:12 +00001120 self->rec_len += terminator_len;
1121
1122 return 1;
1123}
1124
1125PyDoc_STRVAR(csv_writerow_doc,
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001126"writerow(sequence)\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001127"\n"
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001128"Construct and write a CSV record from a sequence of fields. Non-string\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001129"elements will be converted to string.");
1130
1131static PyObject *
1132csv_writerow(WriterObj *self, PyObject *seq)
1133{
1134 DialectObj *dialect = self->dialect;
1135 int len, i;
1136
1137 if (!PySequence_Check(seq))
1138 return PyErr_Format(error_obj, "sequence expected");
1139
1140 len = PySequence_Length(seq);
1141 if (len < 0)
1142 return NULL;
1143
1144 /* Join all fields in internal buffer.
1145 */
1146 join_reset(self);
1147 for (i = 0; i < len; i++) {
1148 PyObject *field;
1149 int append_ok;
1150 int quoted;
1151
1152 field = PySequence_GetItem(seq, i);
1153 if (field == NULL)
1154 return NULL;
1155
Andrew McNamarac89f2842005-01-12 07:44:42 +00001156 switch (dialect->quoting) {
1157 case QUOTE_NONNUMERIC:
1158 quoted = !PyNumber_Check(field);
1159 break;
1160 case QUOTE_ALL:
1161 quoted = 1;
1162 break;
1163 default:
1164 quoted = 0;
1165 break;
Skip Montanarob4a04172003-03-20 23:29:12 +00001166 }
1167
Guido van Rossum46264582007-08-06 19:32:18 +00001168 if (PyUnicode_Check(field)) {
Skip Montanaro577c7a72003-04-12 19:17:14 +00001169 append_ok = join_append(self,
Guido van Rossum46264582007-08-06 19:32:18 +00001170 PyUnicode_AS_UNICODE(field),
1171 &quoted, len == 1);
Skip Montanarob4a04172003-03-20 23:29:12 +00001172 Py_DECREF(field);
1173 }
1174 else if (field == Py_None) {
Guido van Rossum46264582007-08-06 19:32:18 +00001175 append_ok = join_append(self, NULL,
1176 &quoted, len == 1);
Skip Montanarob4a04172003-03-20 23:29:12 +00001177 Py_DECREF(field);
1178 }
1179 else {
1180 PyObject *str;
1181
Guido van Rossum46264582007-08-06 19:32:18 +00001182 str = PyObject_Unicode(field);
1183 Py_DECREF(field);
Skip Montanarob4a04172003-03-20 23:29:12 +00001184 if (str == NULL)
1185 return NULL;
Guido van Rossum46264582007-08-06 19:32:18 +00001186 append_ok = join_append(self,
1187 PyUnicode_AS_UNICODE(str),
1188 &quoted, len == 1);
Skip Montanarob4a04172003-03-20 23:29:12 +00001189 Py_DECREF(str);
1190 }
1191 if (!append_ok)
1192 return NULL;
1193 }
1194
1195 /* Add line terminator.
1196 */
1197 if (!join_append_lineterminator(self))
1198 return 0;
1199
Guido van Rossum46264582007-08-06 19:32:18 +00001200 return PyObject_CallFunction(self->writeline,
1201 "(u#)", self->rec,
1202 self->rec_len);
Skip Montanarob4a04172003-03-20 23:29:12 +00001203}
1204
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001205PyDoc_STRVAR(csv_writerows_doc,
1206"writerows(sequence of sequences)\n"
1207"\n"
1208"Construct and write a series of sequences to a csv file. Non-string\n"
1209"elements will be converted to string.");
1210
Skip Montanarob4a04172003-03-20 23:29:12 +00001211static PyObject *
1212csv_writerows(WriterObj *self, PyObject *seqseq)
1213{
1214 PyObject *row_iter, *row_obj, *result;
1215
1216 row_iter = PyObject_GetIter(seqseq);
1217 if (row_iter == NULL) {
1218 PyErr_SetString(PyExc_TypeError,
Skip Montanaro98f16e02003-04-11 23:10:13 +00001219 "writerows() argument must be iterable");
Skip Montanarob4a04172003-03-20 23:29:12 +00001220 return NULL;
1221 }
1222 while ((row_obj = PyIter_Next(row_iter))) {
1223 result = csv_writerow(self, row_obj);
1224 Py_DECREF(row_obj);
1225 if (!result) {
1226 Py_DECREF(row_iter);
1227 return NULL;
1228 }
1229 else
1230 Py_DECREF(result);
1231 }
1232 Py_DECREF(row_iter);
1233 if (PyErr_Occurred())
1234 return NULL;
1235 Py_INCREF(Py_None);
1236 return Py_None;
1237}
1238
1239static struct PyMethodDef Writer_methods[] = {
1240 { "writerow", (PyCFunction)csv_writerow, METH_O, csv_writerow_doc},
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001241 { "writerows", (PyCFunction)csv_writerows, METH_O, csv_writerows_doc},
Skip Montanarob4a04172003-03-20 23:29:12 +00001242 { NULL, NULL }
1243};
1244
1245#define W_OFF(x) offsetof(WriterObj, x)
1246
1247static struct PyMemberDef Writer_memberlist[] = {
Guido van Rossum33d26892007-08-05 15:29:28 +00001248 { "dialect", T_OBJECT, W_OFF(dialect), READONLY },
Skip Montanarob4a04172003-03-20 23:29:12 +00001249 { NULL }
1250};
1251
1252static void
1253Writer_dealloc(WriterObj *self)
1254{
Andrew McNamara77ead872005-01-10 02:09:41 +00001255 PyObject_GC_UnTrack(self);
Skip Montanarob4a04172003-03-20 23:29:12 +00001256 Py_XDECREF(self->dialect);
1257 Py_XDECREF(self->writeline);
Andrew McNamaradcfb38c2003-06-09 05:59:23 +00001258 if (self->rec != NULL)
1259 PyMem_Free(self->rec);
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001260 PyObject_GC_Del(self);
1261}
1262
1263static int
1264Writer_traverse(WriterObj *self, visitproc visit, void *arg)
1265{
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001266 Py_VISIT(self->dialect);
1267 Py_VISIT(self->writeline);
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001268 return 0;
1269}
1270
1271static int
1272Writer_clear(WriterObj *self)
1273{
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001274 Py_CLEAR(self->dialect);
1275 Py_CLEAR(self->writeline);
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001276 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001277}
1278
1279PyDoc_STRVAR(Writer_Type_doc,
1280"CSV writer\n"
1281"\n"
1282"Writer objects are responsible for generating tabular data\n"
1283"in CSV format from sequence input.\n"
1284);
1285
1286static PyTypeObject Writer_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001287 PyVarObject_HEAD_INIT(NULL, 0)
Skip Montanarob4a04172003-03-20 23:29:12 +00001288 "_csv.writer", /*tp_name*/
1289 sizeof(WriterObj), /*tp_basicsize*/
1290 0, /*tp_itemsize*/
1291 /* methods */
1292 (destructor)Writer_dealloc, /*tp_dealloc*/
1293 (printfunc)0, /*tp_print*/
1294 (getattrfunc)0, /*tp_getattr*/
1295 (setattrfunc)0, /*tp_setattr*/
1296 (cmpfunc)0, /*tp_compare*/
1297 (reprfunc)0, /*tp_repr*/
1298 0, /*tp_as_number*/
1299 0, /*tp_as_sequence*/
1300 0, /*tp_as_mapping*/
1301 (hashfunc)0, /*tp_hash*/
1302 (ternaryfunc)0, /*tp_call*/
1303 (reprfunc)0, /*tp_str*/
1304 0, /*tp_getattro*/
1305 0, /*tp_setattro*/
1306 0, /*tp_as_buffer*/
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001307 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
1308 Py_TPFLAGS_HAVE_GC, /*tp_flags*/
Skip Montanarob4a04172003-03-20 23:29:12 +00001309 Writer_Type_doc,
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001310 (traverseproc)Writer_traverse, /*tp_traverse*/
1311 (inquiry)Writer_clear, /*tp_clear*/
Skip Montanarob4a04172003-03-20 23:29:12 +00001312 0, /*tp_richcompare*/
1313 0, /*tp_weaklistoffset*/
1314 (getiterfunc)0, /*tp_iter*/
1315 (getiterfunc)0, /*tp_iternext*/
1316 Writer_methods, /*tp_methods*/
1317 Writer_memberlist, /*tp_members*/
1318 0, /*tp_getset*/
1319};
1320
1321static PyObject *
1322csv_writer(PyObject *module, PyObject *args, PyObject *keyword_args)
1323{
Andrew McNamara91b97462005-01-11 01:07:23 +00001324 PyObject * output_file, * dialect = NULL;
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001325 WriterObj * self = PyObject_GC_New(WriterObj, &Writer_Type);
Skip Montanarob4a04172003-03-20 23:29:12 +00001326
1327 if (!self)
1328 return NULL;
1329
1330 self->dialect = NULL;
1331 self->writeline = NULL;
1332
1333 self->rec = NULL;
1334 self->rec_size = 0;
1335 self->rec_len = 0;
1336 self->num_fields = 0;
1337
Raymond Hettinger1761a7c2004-06-20 04:23:19 +00001338 if (!PyArg_UnpackTuple(args, "", 1, 2, &output_file, &dialect)) {
Skip Montanarob4a04172003-03-20 23:29:12 +00001339 Py_DECREF(self);
1340 return NULL;
1341 }
1342 self->writeline = PyObject_GetAttrString(output_file, "write");
1343 if (self->writeline == NULL || !PyCallable_Check(self->writeline)) {
1344 PyErr_SetString(PyExc_TypeError,
Andrew McNamara5cfd8372005-01-12 11:39:50 +00001345 "argument 1 must have a \"write\" method");
Skip Montanarob4a04172003-03-20 23:29:12 +00001346 Py_DECREF(self);
1347 return NULL;
1348 }
Andrew McNamara91b97462005-01-11 01:07:23 +00001349 self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args);
Skip Montanarob4a04172003-03-20 23:29:12 +00001350 if (self->dialect == NULL) {
1351 Py_DECREF(self);
1352 return NULL;
1353 }
Andrew McNamara77ead872005-01-10 02:09:41 +00001354 PyObject_GC_Track(self);
Skip Montanarob4a04172003-03-20 23:29:12 +00001355 return (PyObject *)self;
1356}
1357
1358/*
1359 * DIALECT REGISTRY
1360 */
1361static PyObject *
1362csv_list_dialects(PyObject *module, PyObject *args)
1363{
1364 return PyDict_Keys(dialects);
1365}
1366
1367static PyObject *
Andrew McNamara86625972005-01-11 01:28:33 +00001368csv_register_dialect(PyObject *module, PyObject *args, PyObject *kwargs)
Skip Montanarob4a04172003-03-20 23:29:12 +00001369{
Andrew McNamara86625972005-01-11 01:28:33 +00001370 PyObject *name_obj, *dialect_obj = NULL;
1371 PyObject *dialect;
Skip Montanarob4a04172003-03-20 23:29:12 +00001372
Andrew McNamara86625972005-01-11 01:28:33 +00001373 if (!PyArg_UnpackTuple(args, "", 1, 2, &name_obj, &dialect_obj))
Skip Montanarob4a04172003-03-20 23:29:12 +00001374 return NULL;
Andrew McNamara37d2bdf2005-01-10 12:22:48 +00001375 if (!IS_BASESTRING(name_obj)) {
Skip Montanarob4a04172003-03-20 23:29:12 +00001376 PyErr_SetString(PyExc_TypeError,
1377 "dialect name must be a string or unicode");
1378 return NULL;
1379 }
Andrew McNamara86625972005-01-11 01:28:33 +00001380 dialect = _call_dialect(dialect_obj, kwargs);
1381 if (dialect == NULL)
1382 return NULL;
1383 if (PyDict_SetItem(dialects, name_obj, dialect) < 0) {
1384 Py_DECREF(dialect);
Skip Montanarob4a04172003-03-20 23:29:12 +00001385 return NULL;
1386 }
Andrew McNamara86625972005-01-11 01:28:33 +00001387 Py_DECREF(dialect);
Skip Montanarob4a04172003-03-20 23:29:12 +00001388 Py_INCREF(Py_None);
1389 return Py_None;
1390}
1391
1392static PyObject *
Skip Montanaro577c7a72003-04-12 19:17:14 +00001393csv_unregister_dialect(PyObject *module, PyObject *name_obj)
Skip Montanarob4a04172003-03-20 23:29:12 +00001394{
Skip Montanarob4a04172003-03-20 23:29:12 +00001395 if (PyDict_DelItem(dialects, name_obj) < 0)
1396 return PyErr_Format(error_obj, "unknown dialect");
1397 Py_INCREF(Py_None);
1398 return Py_None;
1399}
1400
1401static PyObject *
Skip Montanaro577c7a72003-04-12 19:17:14 +00001402csv_get_dialect(PyObject *module, PyObject *name_obj)
Skip Montanarob4a04172003-03-20 23:29:12 +00001403{
Skip Montanarob4a04172003-03-20 23:29:12 +00001404 return get_dialect_from_registry(name_obj);
1405}
1406
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001407static PyObject *
Andrew McNamara31d88962005-01-12 03:45:10 +00001408csv_field_size_limit(PyObject *module, PyObject *args)
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001409{
1410 PyObject *new_limit = NULL;
1411 long old_limit = field_limit;
1412
Andrew McNamara31d88962005-01-12 03:45:10 +00001413 if (!PyArg_UnpackTuple(args, "field_size_limit", 0, 1, &new_limit))
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001414 return NULL;
1415 if (new_limit != NULL) {
Guido van Rossumddefaf32007-01-14 03:31:43 +00001416 if (!PyInt_CheckExact(new_limit)) {
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001417 PyErr_Format(PyExc_TypeError,
1418 "limit must be an integer");
1419 return NULL;
1420 }
1421 field_limit = PyInt_AsLong(new_limit);
1422 }
1423 return PyInt_FromLong(old_limit);
1424}
1425
Skip Montanarob4a04172003-03-20 23:29:12 +00001426/*
1427 * MODULE
1428 */
1429
1430PyDoc_STRVAR(csv_module_doc,
1431"CSV parsing and writing.\n"
1432"\n"
1433"This module provides classes that assist in the reading and writing\n"
1434"of Comma Separated Value (CSV) files, and implements the interface\n"
1435"described by PEP 305. Although many CSV files are simple to parse,\n"
1436"the format is not formally defined by a stable specification and\n"
1437"is subtle enough that parsing lines of a CSV file with something\n"
1438"like line.split(\",\") is bound to fail. The module supports three\n"
1439"basic APIs: reading, writing, and registration of dialects.\n"
1440"\n"
1441"\n"
1442"DIALECT REGISTRATION:\n"
1443"\n"
1444"Readers and writers support a dialect argument, which is a convenient\n"
1445"handle on a group of settings. When the dialect argument is a string,\n"
1446"it identifies one of the dialects previously registered with the module.\n"
1447"If it is a class or instance, the attributes of the argument are used as\n"
1448"the settings for the reader or writer:\n"
1449"\n"
1450" class excel:\n"
1451" delimiter = ','\n"
1452" quotechar = '\"'\n"
1453" escapechar = None\n"
1454" doublequote = True\n"
1455" skipinitialspace = False\n"
Johannes Gijsbers8d3b9dd2004-08-15 12:23:10 +00001456" lineterminator = '\\r\\n'\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001457" quoting = QUOTE_MINIMAL\n"
1458"\n"
1459"SETTINGS:\n"
1460"\n"
1461" * quotechar - specifies a one-character string to use as the \n"
1462" quoting character. It defaults to '\"'.\n"
1463" * delimiter - specifies a one-character string to use as the \n"
1464" field separator. It defaults to ','.\n"
1465" * skipinitialspace - specifies how to interpret whitespace which\n"
1466" immediately follows a delimiter. It defaults to False, which\n"
1467" means that whitespace immediately following a delimiter is part\n"
1468" of the following field.\n"
1469" * lineterminator - specifies the character sequence which should \n"
1470" terminate rows.\n"
1471" * quoting - controls when quotes should be generated by the writer.\n"
1472" It can take on any of the following module constants:\n"
1473"\n"
1474" csv.QUOTE_MINIMAL means only when required, for example, when a\n"
1475" field contains either the quotechar or the delimiter\n"
1476" csv.QUOTE_ALL means that quotes are always placed around fields.\n"
1477" csv.QUOTE_NONNUMERIC means that quotes are always placed around\n"
Skip Montanaro148eb6a2003-12-02 18:57:47 +00001478" fields which do not parse as integers or floating point\n"
1479" numbers.\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001480" csv.QUOTE_NONE means that quotes are never placed around fields.\n"
1481" * escapechar - specifies a one-character string used to escape \n"
1482" the delimiter when quoting is set to QUOTE_NONE.\n"
1483" * doublequote - controls the handling of quotes inside fields. When\n"
1484" True, two consecutive quotes are interpreted as one during read,\n"
1485" and when writing, each quote character embedded in the data is\n"
1486" written as two quotes\n");
1487
1488PyDoc_STRVAR(csv_reader_doc,
1489" csv_reader = reader(iterable [, dialect='excel']\n"
1490" [optional keyword args])\n"
1491" for row in csv_reader:\n"
1492" process(row)\n"
1493"\n"
1494"The \"iterable\" argument can be any object that returns a line\n"
1495"of input for each iteration, such as a file object or a list. The\n"
1496"optional \"dialect\" parameter is discussed below. The function\n"
1497"also accepts optional keyword arguments which override settings\n"
1498"provided by the dialect.\n"
1499"\n"
1500"The returned object is an iterator. Each iteration returns a row\n"
Johannes Gijsbers8d3b9dd2004-08-15 12:23:10 +00001501"of the CSV file (which can span multiple input lines):\n");
Skip Montanarob4a04172003-03-20 23:29:12 +00001502
1503PyDoc_STRVAR(csv_writer_doc,
1504" csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1505" [optional keyword args])\n"
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001506" for row in sequence:\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001507" csv_writer.writerow(row)\n"
1508"\n"
1509" [or]\n"
1510"\n"
1511" csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1512" [optional keyword args])\n"
1513" csv_writer.writerows(rows)\n"
1514"\n"
1515"The \"fileobj\" argument can be any object that supports the file API.\n");
1516
1517PyDoc_STRVAR(csv_list_dialects_doc,
1518"Return a list of all know dialect names.\n"
1519" names = csv.list_dialects()");
1520
1521PyDoc_STRVAR(csv_get_dialect_doc,
1522"Return the dialect instance associated with name.\n"
1523" dialect = csv.get_dialect(name)");
1524
1525PyDoc_STRVAR(csv_register_dialect_doc,
1526"Create a mapping from a string name to a dialect class.\n"
1527" dialect = csv.register_dialect(name, dialect)");
1528
1529PyDoc_STRVAR(csv_unregister_dialect_doc,
1530"Delete the name/dialect mapping associated with a string name.\n"
1531" csv.unregister_dialect(name)");
1532
Andrew McNamara31d88962005-01-12 03:45:10 +00001533PyDoc_STRVAR(csv_field_size_limit_doc,
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001534"Sets an upper limit on parsed fields.\n"
Andrew McNamara31d88962005-01-12 03:45:10 +00001535" csv.field_size_limit([limit])\n"
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001536"\n"
1537"Returns old limit. If limit is not given, no new limit is set and\n"
1538"the old limit is returned");
1539
Skip Montanarob4a04172003-03-20 23:29:12 +00001540static struct PyMethodDef csv_methods[] = {
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001541 { "reader", (PyCFunction)csv_reader,
1542 METH_VARARGS | METH_KEYWORDS, csv_reader_doc},
1543 { "writer", (PyCFunction)csv_writer,
1544 METH_VARARGS | METH_KEYWORDS, csv_writer_doc},
1545 { "list_dialects", (PyCFunction)csv_list_dialects,
1546 METH_NOARGS, csv_list_dialects_doc},
1547 { "register_dialect", (PyCFunction)csv_register_dialect,
Andrew McNamara86625972005-01-11 01:28:33 +00001548 METH_VARARGS | METH_KEYWORDS, csv_register_dialect_doc},
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001549 { "unregister_dialect", (PyCFunction)csv_unregister_dialect,
1550 METH_O, csv_unregister_dialect_doc},
1551 { "get_dialect", (PyCFunction)csv_get_dialect,
1552 METH_O, csv_get_dialect_doc},
Andrew McNamara31d88962005-01-12 03:45:10 +00001553 { "field_size_limit", (PyCFunction)csv_field_size_limit,
1554 METH_VARARGS, csv_field_size_limit_doc},
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001555 { NULL, NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +00001556};
1557
1558PyMODINIT_FUNC
1559init_csv(void)
1560{
1561 PyObject *module;
Skip Montanarob4a04172003-03-20 23:29:12 +00001562 StyleDesc *style;
1563
1564 if (PyType_Ready(&Dialect_Type) < 0)
1565 return;
1566
1567 if (PyType_Ready(&Reader_Type) < 0)
1568 return;
1569
1570 if (PyType_Ready(&Writer_Type) < 0)
1571 return;
1572
1573 /* Create the module and add the functions */
1574 module = Py_InitModule3("_csv", csv_methods, csv_module_doc);
1575 if (module == NULL)
1576 return;
1577
1578 /* Add version to the module. */
Skip Montanaro7b01a832003-04-12 19:23:46 +00001579 if (PyModule_AddStringConstant(module, "__version__",
1580 MODULE_VERSION) == -1)
Skip Montanarob4a04172003-03-20 23:29:12 +00001581 return;
1582
1583 /* Add _dialects dictionary */
1584 dialects = PyDict_New();
1585 if (dialects == NULL)
1586 return;
1587 if (PyModule_AddObject(module, "_dialects", dialects))
1588 return;
1589
1590 /* Add quote styles into dictionary */
1591 for (style = quote_styles; style->name; style++) {
Skip Montanaro7b01a832003-04-12 19:23:46 +00001592 if (PyModule_AddIntConstant(module, style->name,
1593 style->style) == -1)
Skip Montanarob4a04172003-03-20 23:29:12 +00001594 return;
1595 }
1596
1597 /* Add the Dialect type */
Skip Montanaro32c5d422005-06-15 13:35:08 +00001598 Py_INCREF(&Dialect_Type);
Skip Montanarob4a04172003-03-20 23:29:12 +00001599 if (PyModule_AddObject(module, "Dialect", (PyObject *)&Dialect_Type))
1600 return;
1601
1602 /* Add the CSV exception object to the module. */
1603 error_obj = PyErr_NewException("_csv.Error", NULL, NULL);
1604 if (error_obj == NULL)
1605 return;
1606 PyModule_AddObject(module, "Error", error_obj);
1607}