blob: 9a72955736bb090e46349200571b1972b5210915 [file] [log] [blame]
Skip Montanaroa16b21f2003-03-23 14:32:54 +00001/* csv module */
2
3/*
4
5This module provides the low-level underpinnings of a CSV reading/writing
6module. Users should not use this module directly, but import the csv.py
7module instead.
8
9**** For people modifying this code, please note that as of this writing
Skip Montanarodfa35fa2003-04-11 21:40:01 +000010**** (2003-03-23), it is intended that this code should work with Python
Skip Montanaroa16b21f2003-03-23 14:32:54 +000011**** 2.2.
12
Skip Montanarob4a04172003-03-20 23:29:12 +000013*/
14
Skip Montanaro7b01a832003-04-12 19:23:46 +000015#define MODULE_VERSION "1.0"
16
Skip Montanarob4a04172003-03-20 23:29:12 +000017#include "Python.h"
18#include "structmember.h"
19
Andrew McNamara37d2bdf2005-01-10 12:22:48 +000020#define IS_BASESTRING(o) \
Guido van Rossum3172c5d2007-10-16 18:12:55 +000021 PyUnicode_Check(o)
Andrew McNamara37d2bdf2005-01-10 12:22:48 +000022
Skip Montanarob4a04172003-03-20 23:29:12 +000023static PyObject *error_obj; /* CSV exception */
24static PyObject *dialects; /* Dialect registry */
Andrew McNamarae4d05c42005-01-11 07:32:02 +000025static long field_limit = 128 * 1024; /* max parsed field size */
Skip Montanarob4a04172003-03-20 23:29:12 +000026
27typedef enum {
28 START_RECORD, START_FIELD, ESCAPED_CHAR, IN_FIELD,
Andrew McNamaraf69d94f2005-01-13 11:30:54 +000029 IN_QUOTED_FIELD, ESCAPE_IN_QUOTED_FIELD, QUOTE_IN_QUOTED_FIELD,
30 EAT_CRNL
Skip Montanarob4a04172003-03-20 23:29:12 +000031} ParserState;
32
33typedef enum {
34 QUOTE_MINIMAL, QUOTE_ALL, QUOTE_NONNUMERIC, QUOTE_NONE
35} QuoteStyle;
36
37typedef struct {
38 QuoteStyle style;
39 char *name;
40} StyleDesc;
41
42static StyleDesc quote_styles[] = {
43 { QUOTE_MINIMAL, "QUOTE_MINIMAL" },
44 { QUOTE_ALL, "QUOTE_ALL" },
45 { QUOTE_NONNUMERIC, "QUOTE_NONNUMERIC" },
46 { QUOTE_NONE, "QUOTE_NONE" },
47 { 0 }
48};
49
50typedef struct {
51 PyObject_HEAD
Guido van Rossum46264582007-08-06 19:32:18 +000052
Skip Montanarob4a04172003-03-20 23:29:12 +000053 int doublequote; /* is " represented by ""? */
Guido van Rossum46264582007-08-06 19:32:18 +000054 Py_UNICODE delimiter; /* field separator */
55 Py_UNICODE quotechar; /* quote character */
56 Py_UNICODE escapechar; /* escape character */
Skip Montanarob4a04172003-03-20 23:29:12 +000057 int skipinitialspace; /* ignore spaces following delimiter? */
58 PyObject *lineterminator; /* string to write between records */
Andrew McNamara1196cf12005-01-07 04:42:45 +000059 int quoting; /* style of quoting to write */
Skip Montanarob4a04172003-03-20 23:29:12 +000060
61 int strict; /* raise exception on bad CSV */
62} DialectObj;
63
Neal Norwitz227b5332006-03-22 09:28:35 +000064static PyTypeObject Dialect_Type;
Skip Montanarob4a04172003-03-20 23:29:12 +000065
66typedef struct {
67 PyObject_HEAD
68
69 PyObject *input_iter; /* iterate over this for input lines */
70
71 DialectObj *dialect; /* parsing dialect */
72
73 PyObject *fields; /* field list for current record */
74 ParserState state; /* current CSV parse state */
Guido van Rossum46264582007-08-06 19:32:18 +000075 Py_UNICODE *field; /* build current field in here */
Skip Montanarob4a04172003-03-20 23:29:12 +000076 int field_size; /* size of allocated buffer */
Guido van Rossum46264582007-08-06 19:32:18 +000077 Py_ssize_t field_len; /* length of current field */
Andrew McNamara0f0599d2005-01-12 09:45:18 +000078 int numeric_field; /* treat field as numeric */
Andrew McNamara7f2053e2005-01-12 11:17:16 +000079 unsigned long line_num; /* Source-file line number */
Skip Montanarob4a04172003-03-20 23:29:12 +000080} ReaderObj;
81
Neal Norwitz227b5332006-03-22 09:28:35 +000082static PyTypeObject Reader_Type;
Skip Montanarob4a04172003-03-20 23:29:12 +000083
Christian Heimes90aa7642007-12-19 02:45:37 +000084#define ReaderObject_Check(v) (Py_TYPE(v) == &Reader_Type)
Skip Montanarob4a04172003-03-20 23:29:12 +000085
86typedef struct {
87 PyObject_HEAD
88
89 PyObject *writeline; /* write output lines to this file */
90
91 DialectObj *dialect; /* parsing dialect */
92
Guido van Rossum46264582007-08-06 19:32:18 +000093 Py_UNICODE *rec; /* buffer for parser.join */
Skip Montanarob4a04172003-03-20 23:29:12 +000094 int rec_size; /* size of allocated record */
Guido van Rossum46264582007-08-06 19:32:18 +000095 Py_ssize_t rec_len; /* length of record */
Skip Montanarob4a04172003-03-20 23:29:12 +000096 int num_fields; /* number of fields in record */
Guido van Rossum46264582007-08-06 19:32:18 +000097} WriterObj;
Skip Montanarob4a04172003-03-20 23:29:12 +000098
Neal Norwitz227b5332006-03-22 09:28:35 +000099static PyTypeObject Writer_Type;
Skip Montanarob4a04172003-03-20 23:29:12 +0000100
101/*
102 * DIALECT class
103 */
104
105static PyObject *
106get_dialect_from_registry(PyObject * name_obj)
107{
108 PyObject *dialect_obj;
109
110 dialect_obj = PyDict_GetItem(dialects, name_obj);
Andrew McNamaradbce2612005-01-10 23:17:35 +0000111 if (dialect_obj == NULL) {
112 if (!PyErr_Occurred())
113 PyErr_Format(error_obj, "unknown dialect");
114 }
115 else
116 Py_INCREF(dialect_obj);
Skip Montanarob4a04172003-03-20 23:29:12 +0000117 return dialect_obj;
118}
119
Skip Montanarob4a04172003-03-20 23:29:12 +0000120static PyObject *
121get_string(PyObject *str)
122{
123 Py_XINCREF(str);
124 return str;
125}
126
Skip Montanarob4a04172003-03-20 23:29:12 +0000127static PyObject *
Skip Montanaroe3b10f42007-08-06 20:55:47 +0000128get_nullchar_as_None(Py_UNICODE c)
Skip Montanarob4a04172003-03-20 23:29:12 +0000129{
130 if (c == '\0') {
131 Py_INCREF(Py_None);
132 return Py_None;
133 }
134 else
Skip Montanaroe3b10f42007-08-06 20:55:47 +0000135 return PyUnicode_FromUnicode((Py_UNICODE *)&c, 1);
Skip Montanarob4a04172003-03-20 23:29:12 +0000136}
137
Skip Montanarob4a04172003-03-20 23:29:12 +0000138static PyObject *
139Dialect_get_lineterminator(DialectObj *self)
140{
141 return get_string(self->lineterminator);
142}
143
Skip Montanarob4a04172003-03-20 23:29:12 +0000144static PyObject *
Guido van Rossuma9769c22007-08-07 23:59:30 +0000145Dialect_get_delimiter(DialectObj *self)
146{
147 return get_nullchar_as_None(self->delimiter);
148}
149
150static PyObject *
Skip Montanarob4a04172003-03-20 23:29:12 +0000151Dialect_get_escapechar(DialectObj *self)
152{
153 return get_nullchar_as_None(self->escapechar);
154}
155
Andrew McNamara1196cf12005-01-07 04:42:45 +0000156static PyObject *
157Dialect_get_quotechar(DialectObj *self)
Skip Montanarob4a04172003-03-20 23:29:12 +0000158{
Andrew McNamara1196cf12005-01-07 04:42:45 +0000159 return get_nullchar_as_None(self->quotechar);
Skip Montanarob4a04172003-03-20 23:29:12 +0000160}
161
162static PyObject *
163Dialect_get_quoting(DialectObj *self)
164{
Christian Heimes217cfd12007-12-02 14:31:20 +0000165 return PyLong_FromLong(self->quoting);
Skip Montanarob4a04172003-03-20 23:29:12 +0000166}
167
168static int
Andrew McNamara1196cf12005-01-07 04:42:45 +0000169_set_bool(const char *name, int *target, PyObject *src, int dflt)
Skip Montanarob4a04172003-03-20 23:29:12 +0000170{
Andrew McNamara1196cf12005-01-07 04:42:45 +0000171 if (src == NULL)
172 *target = dflt;
173 else
174 *target = PyObject_IsTrue(src);
175 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000176}
177
Andrew McNamara1196cf12005-01-07 04:42:45 +0000178static int
179_set_int(const char *name, int *target, PyObject *src, int dflt)
180{
181 if (src == NULL)
182 *target = dflt;
183 else {
Martin v. Löwisd1a1d1e2007-12-04 22:10:37 +0000184 long value;
185 if (!PyLong_CheckExact(src)) {
Andrew McNamara1196cf12005-01-07 04:42:45 +0000186 PyErr_Format(PyExc_TypeError,
187 "\"%s\" must be an integer", name);
188 return -1;
189 }
Martin v. Löwisd1a1d1e2007-12-04 22:10:37 +0000190 value = PyLong_AsLong(src);
191 if (value == -1 && PyErr_Occurred())
192 return -1;
193#if SIZEOF_LONG > SIZEOF_INT
194 if (value > INT_MAX || value < INT_MIN) {
195 PyErr_Format(PyExc_ValueError,
196 "integer out of range for \"%s\"", name);
197 return -1;
198 }
199#endif
200 *target = (int)value;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000201 }
202 return 0;
203}
204
205static int
Guido van Rossum46264582007-08-06 19:32:18 +0000206_set_char(const char *name, Py_UNICODE *target, PyObject *src, Py_UNICODE dflt)
Andrew McNamara1196cf12005-01-07 04:42:45 +0000207{
208 if (src == NULL)
209 *target = dflt;
210 else {
Guido van Rossumbce56a62007-05-10 18:04:33 +0000211 *target = '\0';
212 if (src != Py_None) {
Guido van Rossum46264582007-08-06 19:32:18 +0000213 Py_UNICODE *buf;
Guido van Rossumbce56a62007-05-10 18:04:33 +0000214 Py_ssize_t len;
Guido van Rossum46264582007-08-06 19:32:18 +0000215 buf = PyUnicode_AsUnicode(src);
216 len = PyUnicode_GetSize(src);
217 if (buf == NULL || len > 1) {
Guido van Rossumbce56a62007-05-10 18:04:33 +0000218 PyErr_Format(PyExc_TypeError,
219 "\"%s\" must be an 1-character string",
Guido van Rossum46264582007-08-06 19:32:18 +0000220 name);
Andrew McNamara1196cf12005-01-07 04:42:45 +0000221 return -1;
Guido van Rossumbce56a62007-05-10 18:04:33 +0000222 }
223 if (len > 0)
224 *target = buf[0];
Andrew McNamara1196cf12005-01-07 04:42:45 +0000225 }
226 }
227 return 0;
228}
229
230static int
231_set_str(const char *name, PyObject **target, PyObject *src, const char *dflt)
232{
233 if (src == NULL)
Guido van Rossum46264582007-08-06 19:32:18 +0000234 *target = PyUnicode_DecodeASCII(dflt, strlen(dflt), NULL);
Andrew McNamara1196cf12005-01-07 04:42:45 +0000235 else {
236 if (src == Py_None)
237 *target = NULL;
Andrew McNamara37d2bdf2005-01-10 12:22:48 +0000238 else if (!IS_BASESTRING(src)) {
Andrew McNamara1196cf12005-01-07 04:42:45 +0000239 PyErr_Format(PyExc_TypeError,
Amaury Forgeot d'Arc10c476d2007-11-19 21:20:21 +0000240 "\"%s\" must be a string", name);
Andrew McNamara1196cf12005-01-07 04:42:45 +0000241 return -1;
Andrew McNamaradd3e6cb2005-01-07 06:46:50 +0000242 }
243 else {
Andrew McNamara1196cf12005-01-07 04:42:45 +0000244 Py_XDECREF(*target);
245 Py_INCREF(src);
246 *target = src;
247 }
248 }
249 return 0;
250}
251
252static int
253dialect_check_quoting(int quoting)
254{
255 StyleDesc *qs = quote_styles;
256
257 for (qs = quote_styles; qs->name; qs++) {
258 if (qs->style == quoting)
259 return 0;
260 }
261 PyErr_Format(PyExc_TypeError, "bad \"quoting\" value");
262 return -1;
263}
Skip Montanarob4a04172003-03-20 23:29:12 +0000264
265#define D_OFF(x) offsetof(DialectObj, x)
266
267static struct PyMemberDef Dialect_memberlist[] = {
Andrew McNamara1196cf12005-01-07 04:42:45 +0000268 { "skipinitialspace", T_INT, D_OFF(skipinitialspace), READONLY },
269 { "doublequote", T_INT, D_OFF(doublequote), READONLY },
270 { "strict", T_INT, D_OFF(strict), READONLY },
Skip Montanarob4a04172003-03-20 23:29:12 +0000271 { NULL }
272};
273
274static PyGetSetDef Dialect_getsetlist[] = {
Guido van Rossuma9769c22007-08-07 23:59:30 +0000275 { "delimiter", (getter)Dialect_get_delimiter},
Andrew McNamara1196cf12005-01-07 04:42:45 +0000276 { "escapechar", (getter)Dialect_get_escapechar},
277 { "lineterminator", (getter)Dialect_get_lineterminator},
278 { "quotechar", (getter)Dialect_get_quotechar},
279 { "quoting", (getter)Dialect_get_quoting},
280 {NULL},
Skip Montanarob4a04172003-03-20 23:29:12 +0000281};
282
283static void
284Dialect_dealloc(DialectObj *self)
285{
286 Py_XDECREF(self->lineterminator);
Christian Heimes90aa7642007-12-19 02:45:37 +0000287 Py_TYPE(self)->tp_free((PyObject *)self);
Skip Montanarob4a04172003-03-20 23:29:12 +0000288}
289
Martin v. Löwis02cbf4a2006-02-27 17:20:04 +0000290static char *dialect_kws[] = {
Andrew McNamara1196cf12005-01-07 04:42:45 +0000291 "dialect",
292 "delimiter",
293 "doublequote",
294 "escapechar",
295 "lineterminator",
296 "quotechar",
297 "quoting",
298 "skipinitialspace",
299 "strict",
300 NULL
301};
302
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000303static PyObject *
304dialect_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
Skip Montanarob4a04172003-03-20 23:29:12 +0000305{
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000306 DialectObj *self;
307 PyObject *ret = NULL;
308 PyObject *dialect = NULL;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000309 PyObject *delimiter = NULL;
310 PyObject *doublequote = NULL;
311 PyObject *escapechar = NULL;
312 PyObject *lineterminator = NULL;
313 PyObject *quotechar = NULL;
314 PyObject *quoting = NULL;
315 PyObject *skipinitialspace = NULL;
316 PyObject *strict = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000317
Andrew McNamara1196cf12005-01-07 04:42:45 +0000318 if (!PyArg_ParseTupleAndKeywords(args, kwargs,
319 "|OOOOOOOOO", dialect_kws,
320 &dialect,
321 &delimiter,
322 &doublequote,
323 &escapechar,
324 &lineterminator,
325 &quotechar,
326 &quoting,
327 &skipinitialspace,
328 &strict))
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000329 return NULL;
330
331 if (dialect != NULL) {
332 if (IS_BASESTRING(dialect)) {
333 dialect = get_dialect_from_registry(dialect);
334 if (dialect == NULL)
335 return NULL;
336 }
337 else
338 Py_INCREF(dialect);
339 /* Can we reuse this instance? */
340 if (PyObject_TypeCheck(dialect, &Dialect_Type) &&
341 delimiter == 0 &&
342 doublequote == 0 &&
343 escapechar == 0 &&
344 lineterminator == 0 &&
345 quotechar == 0 &&
346 quoting == 0 &&
347 skipinitialspace == 0 &&
348 strict == 0)
349 return dialect;
350 }
351
352 self = (DialectObj *)type->tp_alloc(type, 0);
353 if (self == NULL) {
354 Py_XDECREF(dialect);
355 return NULL;
356 }
357 self->lineterminator = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000358
Andrew McNamara1196cf12005-01-07 04:42:45 +0000359 Py_XINCREF(delimiter);
360 Py_XINCREF(doublequote);
361 Py_XINCREF(escapechar);
362 Py_XINCREF(lineterminator);
363 Py_XINCREF(quotechar);
364 Py_XINCREF(quoting);
365 Py_XINCREF(skipinitialspace);
366 Py_XINCREF(strict);
367 if (dialect != NULL) {
Andrew McNamara1196cf12005-01-07 04:42:45 +0000368#define DIALECT_GETATTR(v, n) \
369 if (v == NULL) \
370 v = PyObject_GetAttrString(dialect, n)
Andrew McNamara1196cf12005-01-07 04:42:45 +0000371 DIALECT_GETATTR(delimiter, "delimiter");
372 DIALECT_GETATTR(doublequote, "doublequote");
373 DIALECT_GETATTR(escapechar, "escapechar");
374 DIALECT_GETATTR(lineterminator, "lineterminator");
375 DIALECT_GETATTR(quotechar, "quotechar");
376 DIALECT_GETATTR(quoting, "quoting");
377 DIALECT_GETATTR(skipinitialspace, "skipinitialspace");
378 DIALECT_GETATTR(strict, "strict");
379 PyErr_Clear();
Andrew McNamara1196cf12005-01-07 04:42:45 +0000380 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000381
Andrew McNamara1196cf12005-01-07 04:42:45 +0000382 /* check types and convert to C values */
383#define DIASET(meth, name, target, src, dflt) \
384 if (meth(name, target, src, dflt)) \
385 goto err
386 DIASET(_set_char, "delimiter", &self->delimiter, delimiter, ',');
387 DIASET(_set_bool, "doublequote", &self->doublequote, doublequote, 1);
388 DIASET(_set_char, "escapechar", &self->escapechar, escapechar, 0);
389 DIASET(_set_str, "lineterminator", &self->lineterminator, lineterminator, "\r\n");
390 DIASET(_set_char, "quotechar", &self->quotechar, quotechar, '"');
391 DIASET(_set_int, "quoting", &self->quoting, quoting, QUOTE_MINIMAL);
392 DIASET(_set_bool, "skipinitialspace", &self->skipinitialspace, skipinitialspace, 0);
393 DIASET(_set_bool, "strict", &self->strict, strict, 0);
Skip Montanarob4a04172003-03-20 23:29:12 +0000394
Andrew McNamara1196cf12005-01-07 04:42:45 +0000395 /* validate options */
396 if (dialect_check_quoting(self->quoting))
397 goto err;
398 if (self->delimiter == 0) {
399 PyErr_SetString(PyExc_TypeError, "delimiter must be set");
400 goto err;
401 }
Andrew McNamara5d45a8d2005-01-12 08:16:17 +0000402 if (quotechar == Py_None && quoting == NULL)
Andrew McNamara1196cf12005-01-07 04:42:45 +0000403 self->quoting = QUOTE_NONE;
404 if (self->quoting != QUOTE_NONE && self->quotechar == 0) {
405 PyErr_SetString(PyExc_TypeError,
406 "quotechar must be set if quoting enabled");
407 goto err;
408 }
409 if (self->lineterminator == 0) {
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000410 PyErr_SetString(PyExc_TypeError, "lineterminator must be set");
Andrew McNamara1196cf12005-01-07 04:42:45 +0000411 goto err;
412 }
413
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000414 ret = (PyObject *)self;
Skip Montanarod60fbd42005-06-15 01:33:30 +0000415 Py_INCREF(self);
Andrew McNamara1196cf12005-01-07 04:42:45 +0000416err:
Skip Montanarod60fbd42005-06-15 01:33:30 +0000417 Py_XDECREF(self);
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000418 Py_XDECREF(dialect);
Andrew McNamara1196cf12005-01-07 04:42:45 +0000419 Py_XDECREF(delimiter);
420 Py_XDECREF(doublequote);
421 Py_XDECREF(escapechar);
422 Py_XDECREF(lineterminator);
423 Py_XDECREF(quotechar);
424 Py_XDECREF(quoting);
425 Py_XDECREF(skipinitialspace);
426 Py_XDECREF(strict);
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000427 return ret;
Skip Montanarob4a04172003-03-20 23:29:12 +0000428}
429
430
431PyDoc_STRVAR(Dialect_Type_doc,
432"CSV dialect\n"
433"\n"
434"The Dialect type records CSV parsing and generation options.\n");
435
436static PyTypeObject Dialect_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000437 PyVarObject_HEAD_INIT(NULL, 0)
Skip Montanarob4a04172003-03-20 23:29:12 +0000438 "_csv.Dialect", /* tp_name */
439 sizeof(DialectObj), /* tp_basicsize */
440 0, /* tp_itemsize */
441 /* methods */
442 (destructor)Dialect_dealloc, /* tp_dealloc */
443 (printfunc)0, /* tp_print */
444 (getattrfunc)0, /* tp_getattr */
445 (setattrfunc)0, /* tp_setattr */
446 (cmpfunc)0, /* tp_compare */
447 (reprfunc)0, /* tp_repr */
448 0, /* tp_as_number */
449 0, /* tp_as_sequence */
450 0, /* tp_as_mapping */
451 (hashfunc)0, /* tp_hash */
452 (ternaryfunc)0, /* tp_call */
453 (reprfunc)0, /* tp_str */
454 0, /* tp_getattro */
455 0, /* tp_setattro */
456 0, /* tp_as_buffer */
457 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
458 Dialect_Type_doc, /* tp_doc */
459 0, /* tp_traverse */
460 0, /* tp_clear */
461 0, /* tp_richcompare */
462 0, /* tp_weaklistoffset */
463 0, /* tp_iter */
464 0, /* tp_iternext */
Andrew McNamara1196cf12005-01-07 04:42:45 +0000465 0, /* tp_methods */
Skip Montanarob4a04172003-03-20 23:29:12 +0000466 Dialect_memberlist, /* tp_members */
467 Dialect_getsetlist, /* tp_getset */
468 0, /* tp_base */
469 0, /* tp_dict */
470 0, /* tp_descr_get */
471 0, /* tp_descr_set */
472 0, /* tp_dictoffset */
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000473 0, /* tp_init */
474 0, /* tp_alloc */
Skip Montanarob4a04172003-03-20 23:29:12 +0000475 dialect_new, /* tp_new */
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000476 0, /* tp_free */
Skip Montanarob4a04172003-03-20 23:29:12 +0000477};
478
Andrew McNamara91b97462005-01-11 01:07:23 +0000479/*
480 * Return an instance of the dialect type, given a Python instance or kwarg
481 * description of the dialect
482 */
483static PyObject *
484_call_dialect(PyObject *dialect_inst, PyObject *kwargs)
485{
486 PyObject *ctor_args;
487 PyObject *dialect;
488
489 ctor_args = Py_BuildValue(dialect_inst ? "(O)" : "()", dialect_inst);
490 if (ctor_args == NULL)
491 return NULL;
492 dialect = PyObject_Call((PyObject *)&Dialect_Type, ctor_args, kwargs);
493 Py_DECREF(ctor_args);
494 return dialect;
495}
496
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000497/*
498 * READER
499 */
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000500static int
Skip Montanarob4a04172003-03-20 23:29:12 +0000501parse_save_field(ReaderObj *self)
502{
503 PyObject *field;
504
Guido van Rossum46264582007-08-06 19:32:18 +0000505 field = PyUnicode_FromUnicode(self->field, self->field_len);
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000506 if (field == NULL)
507 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000508 self->field_len = 0;
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000509 if (self->numeric_field) {
510 PyObject *tmp;
511
512 self->numeric_field = 0;
513 tmp = PyNumber_Float(field);
514 if (tmp == NULL) {
515 Py_DECREF(field);
516 return -1;
517 }
518 Py_DECREF(field);
519 field = tmp;
520 }
521 PyList_Append(self->fields, field);
522 Py_DECREF(field);
523 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000524}
525
526static int
527parse_grow_buff(ReaderObj *self)
528{
529 if (self->field_size == 0) {
530 self->field_size = 4096;
Andrew McNamaradcfb38c2003-06-09 05:59:23 +0000531 if (self->field != NULL)
532 PyMem_Free(self->field);
Guido van Rossum46264582007-08-06 19:32:18 +0000533 self->field = PyMem_New(Py_UNICODE, self->field_size);
Skip Montanarob4a04172003-03-20 23:29:12 +0000534 }
535 else {
536 self->field_size *= 2;
Guido van Rossum46264582007-08-06 19:32:18 +0000537 self->field = PyMem_Resize(self->field, Py_UNICODE,
538 self->field_size);
Skip Montanarob4a04172003-03-20 23:29:12 +0000539 }
540 if (self->field == NULL) {
541 PyErr_NoMemory();
542 return 0;
543 }
544 return 1;
545}
546
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000547static int
Guido van Rossum46264582007-08-06 19:32:18 +0000548parse_add_char(ReaderObj *self, Py_UNICODE c)
Skip Montanarob4a04172003-03-20 23:29:12 +0000549{
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000550 if (self->field_len >= field_limit) {
551 PyErr_Format(error_obj, "field larger than field limit (%ld)",
552 field_limit);
553 return -1;
554 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000555 if (self->field_len == self->field_size && !parse_grow_buff(self))
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000556 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000557 self->field[self->field_len++] = c;
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000558 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000559}
560
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000561static int
Guido van Rossum46264582007-08-06 19:32:18 +0000562parse_process_char(ReaderObj *self, Py_UNICODE c)
Skip Montanarob4a04172003-03-20 23:29:12 +0000563{
564 DialectObj *dialect = self->dialect;
565
566 switch (self->state) {
567 case START_RECORD:
568 /* start of record */
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000569 if (c == '\0')
Skip Montanarob4a04172003-03-20 23:29:12 +0000570 /* empty line - return [] */
571 break;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000572 else if (c == '\n' || c == '\r') {
573 self->state = EAT_CRNL;
574 break;
575 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000576 /* normal character - handle as START_FIELD */
577 self->state = START_FIELD;
578 /* fallthru */
579 case START_FIELD:
580 /* expecting field */
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000581 if (c == '\n' || c == '\r' || c == '\0') {
Skip Montanarob4a04172003-03-20 23:29:12 +0000582 /* save empty field - return [fields] */
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000583 if (parse_save_field(self) < 0)
584 return -1;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000585 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
Skip Montanarob4a04172003-03-20 23:29:12 +0000586 }
Andrew McNamara1196cf12005-01-07 04:42:45 +0000587 else if (c == dialect->quotechar &&
588 dialect->quoting != QUOTE_NONE) {
Skip Montanarob4a04172003-03-20 23:29:12 +0000589 /* start quoted field */
590 self->state = IN_QUOTED_FIELD;
591 }
592 else if (c == dialect->escapechar) {
593 /* possible escaped character */
594 self->state = ESCAPED_CHAR;
595 }
596 else if (c == ' ' && dialect->skipinitialspace)
597 /* ignore space at start of field */
598 ;
599 else if (c == dialect->delimiter) {
600 /* save empty field */
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000601 if (parse_save_field(self) < 0)
602 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000603 }
604 else {
605 /* begin new unquoted field */
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000606 if (dialect->quoting == QUOTE_NONNUMERIC)
607 self->numeric_field = 1;
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000608 if (parse_add_char(self, c) < 0)
609 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000610 self->state = IN_FIELD;
611 }
612 break;
613
614 case ESCAPED_CHAR:
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000615 if (c == '\0')
616 c = '\n';
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000617 if (parse_add_char(self, c) < 0)
618 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000619 self->state = IN_FIELD;
620 break;
621
622 case IN_FIELD:
623 /* in unquoted field */
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000624 if (c == '\n' || c == '\r' || c == '\0') {
Skip Montanarob4a04172003-03-20 23:29:12 +0000625 /* end of line - return [fields] */
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000626 if (parse_save_field(self) < 0)
627 return -1;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000628 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
Skip Montanarob4a04172003-03-20 23:29:12 +0000629 }
630 else if (c == dialect->escapechar) {
631 /* possible escaped character */
632 self->state = ESCAPED_CHAR;
633 }
634 else if (c == dialect->delimiter) {
635 /* save field - wait for new field */
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000636 if (parse_save_field(self) < 0)
637 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000638 self->state = START_FIELD;
639 }
640 else {
641 /* normal character - save in field */
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000642 if (parse_add_char(self, c) < 0)
643 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000644 }
645 break;
646
647 case IN_QUOTED_FIELD:
648 /* in quoted field */
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000649 if (c == '\0')
650 ;
Skip Montanarob4a04172003-03-20 23:29:12 +0000651 else if (c == dialect->escapechar) {
652 /* Possible escape character */
653 self->state = ESCAPE_IN_QUOTED_FIELD;
654 }
Andrew McNamara1196cf12005-01-07 04:42:45 +0000655 else if (c == dialect->quotechar &&
656 dialect->quoting != QUOTE_NONE) {
Skip Montanarob4a04172003-03-20 23:29:12 +0000657 if (dialect->doublequote) {
658 /* doublequote; " represented by "" */
659 self->state = QUOTE_IN_QUOTED_FIELD;
660 }
661 else {
662 /* end of quote part of field */
663 self->state = IN_FIELD;
664 }
665 }
666 else {
667 /* normal character - save in field */
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000668 if (parse_add_char(self, c) < 0)
669 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000670 }
671 break;
672
673 case ESCAPE_IN_QUOTED_FIELD:
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000674 if (c == '\0')
675 c = '\n';
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000676 if (parse_add_char(self, c) < 0)
677 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000678 self->state = IN_QUOTED_FIELD;
679 break;
680
681 case QUOTE_IN_QUOTED_FIELD:
682 /* doublequote - seen a quote in an quoted field */
683 if (dialect->quoting != QUOTE_NONE &&
684 c == dialect->quotechar) {
685 /* save "" as " */
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000686 if (parse_add_char(self, c) < 0)
687 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000688 self->state = IN_QUOTED_FIELD;
689 }
690 else if (c == dialect->delimiter) {
691 /* save field - wait for new field */
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000692 if (parse_save_field(self) < 0)
693 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000694 self->state = START_FIELD;
695 }
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000696 else if (c == '\n' || c == '\r' || c == '\0') {
Skip Montanarob4a04172003-03-20 23:29:12 +0000697 /* end of line - return [fields] */
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000698 if (parse_save_field(self) < 0)
699 return -1;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000700 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
Skip Montanarob4a04172003-03-20 23:29:12 +0000701 }
702 else if (!dialect->strict) {
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000703 if (parse_add_char(self, c) < 0)
704 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000705 self->state = IN_FIELD;
706 }
707 else {
708 /* illegal */
Andrew McNamara5cfd8372005-01-12 11:39:50 +0000709 PyErr_Format(error_obj, "'%c' expected after '%c'",
Skip Montanarob4a04172003-03-20 23:29:12 +0000710 dialect->delimiter,
711 dialect->quotechar);
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000712 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000713 }
714 break;
715
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000716 case EAT_CRNL:
717 if (c == '\n' || c == '\r')
718 ;
719 else if (c == '\0')
720 self->state = START_RECORD;
721 else {
722 PyErr_Format(error_obj, "new-line character seen in unquoted field - do you need to open the file in universal-newline mode?");
723 return -1;
724 }
725 break;
726
Skip Montanarob4a04172003-03-20 23:29:12 +0000727 }
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000728 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000729}
730
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000731static int
732parse_reset(ReaderObj *self)
733{
734 Py_XDECREF(self->fields);
735 self->fields = PyList_New(0);
736 if (self->fields == NULL)
737 return -1;
738 self->field_len = 0;
739 self->state = START_RECORD;
740 self->numeric_field = 0;
741 return 0;
742}
Skip Montanarob4a04172003-03-20 23:29:12 +0000743
744static PyObject *
Skip Montanarob4a04172003-03-20 23:29:12 +0000745Reader_iternext(ReaderObj *self)
746{
747 PyObject *lineobj;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000748 PyObject *fields = NULL;
Guido van Rossum46264582007-08-06 19:32:18 +0000749 Py_UNICODE *line, c;
750 Py_ssize_t linelen;
Skip Montanarob4a04172003-03-20 23:29:12 +0000751
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000752 if (parse_reset(self) < 0)
753 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000754 do {
755 lineobj = PyIter_Next(self->input_iter);
756 if (lineobj == NULL) {
757 /* End of input OR exception */
758 if (!PyErr_Occurred() && self->field_len != 0)
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000759 PyErr_Format(error_obj,
760 "newline inside string");
Skip Montanarob4a04172003-03-20 23:29:12 +0000761 return NULL;
762 }
Georg Brandlf5192612007-11-25 00:52:21 +0000763 if (!PyUnicode_Check(lineobj)) {
Georg Brandl1c280ab2007-11-27 20:40:22 +0000764 PyErr_Format(error_obj,
765 "iterator should return strings, "
766 "not %.200s "
767 "(did you open the file in text mode?)",
Amaury Forgeot d'Arc10c476d2007-11-19 21:20:21 +0000768 lineobj->ob_type->tp_name
769 );
Georg Brandlf5192612007-11-25 00:52:21 +0000770 Py_DECREF(lineobj);
Amaury Forgeot d'Arc10c476d2007-11-19 21:20:21 +0000771 return NULL;
772 }
Guido van Rossum46264582007-08-06 19:32:18 +0000773 ++self->line_num;
774 line = PyUnicode_AsUnicode(lineobj);
775 linelen = PyUnicode_GetSize(lineobj);
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000776 if (line == NULL || linelen < 0) {
Skip Montanarob4a04172003-03-20 23:29:12 +0000777 Py_DECREF(lineobj);
778 return NULL;
779 }
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000780 while (linelen--) {
781 c = *line++;
782 if (c == '\0') {
783 Py_DECREF(lineobj);
784 PyErr_Format(error_obj,
785 "line contains NULL byte");
786 goto err;
787 }
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000788 if (parse_process_char(self, c) < 0) {
789 Py_DECREF(lineobj);
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000790 goto err;
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000791 }
792 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000793 Py_DECREF(lineobj);
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000794 if (parse_process_char(self, 0) < 0)
795 goto err;
Skip Montanarob4a04172003-03-20 23:29:12 +0000796 } while (self->state != START_RECORD);
797
798 fields = self->fields;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000799 self->fields = NULL;
800err:
Skip Montanarob4a04172003-03-20 23:29:12 +0000801 return fields;
802}
803
804static void
805Reader_dealloc(ReaderObj *self)
806{
Andrew McNamara77ead872005-01-10 02:09:41 +0000807 PyObject_GC_UnTrack(self);
Skip Montanarob4a04172003-03-20 23:29:12 +0000808 Py_XDECREF(self->dialect);
809 Py_XDECREF(self->input_iter);
810 Py_XDECREF(self->fields);
Andrew McNamaradcfb38c2003-06-09 05:59:23 +0000811 if (self->field != NULL)
812 PyMem_Free(self->field);
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000813 PyObject_GC_Del(self);
814}
815
816static int
817Reader_traverse(ReaderObj *self, visitproc visit, void *arg)
818{
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000819 Py_VISIT(self->dialect);
820 Py_VISIT(self->input_iter);
821 Py_VISIT(self->fields);
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000822 return 0;
823}
824
825static int
826Reader_clear(ReaderObj *self)
827{
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000828 Py_CLEAR(self->dialect);
829 Py_CLEAR(self->input_iter);
830 Py_CLEAR(self->fields);
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000831 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000832}
833
834PyDoc_STRVAR(Reader_Type_doc,
835"CSV reader\n"
836"\n"
837"Reader objects are responsible for reading and parsing tabular data\n"
838"in CSV format.\n"
839);
840
841static struct PyMethodDef Reader_methods[] = {
842 { NULL, NULL }
843};
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000844#define R_OFF(x) offsetof(ReaderObj, x)
845
846static struct PyMemberDef Reader_memberlist[] = {
Guido van Rossum33d26892007-08-05 15:29:28 +0000847 { "dialect", T_OBJECT, R_OFF(dialect), READONLY },
848 { "line_num", T_ULONG, R_OFF(line_num), READONLY },
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000849 { NULL }
850};
851
Skip Montanarob4a04172003-03-20 23:29:12 +0000852
853static PyTypeObject Reader_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000854 PyVarObject_HEAD_INIT(NULL, 0)
Skip Montanarob4a04172003-03-20 23:29:12 +0000855 "_csv.reader", /*tp_name*/
856 sizeof(ReaderObj), /*tp_basicsize*/
857 0, /*tp_itemsize*/
858 /* methods */
859 (destructor)Reader_dealloc, /*tp_dealloc*/
860 (printfunc)0, /*tp_print*/
861 (getattrfunc)0, /*tp_getattr*/
862 (setattrfunc)0, /*tp_setattr*/
863 (cmpfunc)0, /*tp_compare*/
864 (reprfunc)0, /*tp_repr*/
865 0, /*tp_as_number*/
866 0, /*tp_as_sequence*/
867 0, /*tp_as_mapping*/
868 (hashfunc)0, /*tp_hash*/
869 (ternaryfunc)0, /*tp_call*/
870 (reprfunc)0, /*tp_str*/
871 0, /*tp_getattro*/
872 0, /*tp_setattro*/
873 0, /*tp_as_buffer*/
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000874 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
875 Py_TPFLAGS_HAVE_GC, /*tp_flags*/
Skip Montanarob4a04172003-03-20 23:29:12 +0000876 Reader_Type_doc, /*tp_doc*/
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000877 (traverseproc)Reader_traverse, /*tp_traverse*/
878 (inquiry)Reader_clear, /*tp_clear*/
Skip Montanarob4a04172003-03-20 23:29:12 +0000879 0, /*tp_richcompare*/
880 0, /*tp_weaklistoffset*/
Andrew McNamara575a00b2005-01-06 02:25:41 +0000881 PyObject_SelfIter, /*tp_iter*/
Skip Montanarob4a04172003-03-20 23:29:12 +0000882 (getiterfunc)Reader_iternext, /*tp_iternext*/
883 Reader_methods, /*tp_methods*/
884 Reader_memberlist, /*tp_members*/
885 0, /*tp_getset*/
886
887};
888
889static PyObject *
890csv_reader(PyObject *module, PyObject *args, PyObject *keyword_args)
891{
Andrew McNamara91b97462005-01-11 01:07:23 +0000892 PyObject * iterator, * dialect = NULL;
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000893 ReaderObj * self = PyObject_GC_New(ReaderObj, &Reader_Type);
Skip Montanarob4a04172003-03-20 23:29:12 +0000894
895 if (!self)
896 return NULL;
897
898 self->dialect = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000899 self->fields = NULL;
900 self->input_iter = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000901 self->field = NULL;
902 self->field_size = 0;
Andrew McNamara7f2053e2005-01-12 11:17:16 +0000903 self->line_num = 0;
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000904
905 if (parse_reset(self) < 0) {
906 Py_DECREF(self);
907 return NULL;
908 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000909
Raymond Hettinger1761a7c2004-06-20 04:23:19 +0000910 if (!PyArg_UnpackTuple(args, "", 1, 2, &iterator, &dialect)) {
Skip Montanarob4a04172003-03-20 23:29:12 +0000911 Py_DECREF(self);
912 return NULL;
913 }
914 self->input_iter = PyObject_GetIter(iterator);
915 if (self->input_iter == NULL) {
916 PyErr_SetString(PyExc_TypeError,
917 "argument 1 must be an iterator");
918 Py_DECREF(self);
919 return NULL;
920 }
Andrew McNamara91b97462005-01-11 01:07:23 +0000921 self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args);
Skip Montanarob4a04172003-03-20 23:29:12 +0000922 if (self->dialect == NULL) {
923 Py_DECREF(self);
924 return NULL;
925 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000926
Andrew McNamara77ead872005-01-10 02:09:41 +0000927 PyObject_GC_Track(self);
Skip Montanarob4a04172003-03-20 23:29:12 +0000928 return (PyObject *)self;
929}
930
931/*
932 * WRITER
933 */
934/* ---------------------------------------------------------------- */
935static void
936join_reset(WriterObj *self)
937{
938 self->rec_len = 0;
939 self->num_fields = 0;
940}
941
942#define MEM_INCR 32768
943
944/* Calculate new record length or append field to record. Return new
945 * record length.
946 */
947static int
Guido van Rossum46264582007-08-06 19:32:18 +0000948join_append_data(WriterObj *self, Py_UNICODE *field, int quote_empty,
949 int *quoted, int copy_phase)
Skip Montanarob4a04172003-03-20 23:29:12 +0000950{
951 DialectObj *dialect = self->dialect;
Guido van Rossum46264582007-08-06 19:32:18 +0000952 int i;
953 int rec_len;
954 Py_UNICODE *lineterm;
Andrew McNamarac89f2842005-01-12 07:44:42 +0000955
956#define ADDCH(c) \
957 do {\
958 if (copy_phase) \
959 self->rec[rec_len] = c;\
960 rec_len++;\
961 } while(0)
962
Guido van Rossum46264582007-08-06 19:32:18 +0000963 lineterm = PyUnicode_AsUnicode(dialect->lineterminator);
Andrew McNamarac89f2842005-01-12 07:44:42 +0000964 if (lineterm == NULL)
965 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000966
967 rec_len = self->rec_len;
968
Andrew McNamarac89f2842005-01-12 07:44:42 +0000969 /* If this is not the first field we need a field separator */
970 if (self->num_fields > 0)
971 ADDCH(dialect->delimiter);
972
973 /* Handle preceding quote */
974 if (copy_phase && *quoted)
975 ADDCH(dialect->quotechar);
976
977 /* Copy/count field data */
Guido van Rossum46264582007-08-06 19:32:18 +0000978 /* If field is null just pass over */
979 for (i = 0; field; i++) {
980 Py_UNICODE c = field[i];
Andrew McNamarac89f2842005-01-12 07:44:42 +0000981 int want_escape = 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000982
983 if (c == '\0')
984 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000985
Andrew McNamarac89f2842005-01-12 07:44:42 +0000986 if (c == dialect->delimiter ||
987 c == dialect->escapechar ||
Guido van Rossum46264582007-08-06 19:32:18 +0000988 c == dialect->quotechar ||
989 Py_UNICODE_strchr(lineterm, c)) {
Andrew McNamarac89f2842005-01-12 07:44:42 +0000990 if (dialect->quoting == QUOTE_NONE)
991 want_escape = 1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000992 else {
Andrew McNamarac89f2842005-01-12 07:44:42 +0000993 if (c == dialect->quotechar) {
994 if (dialect->doublequote)
995 ADDCH(dialect->quotechar);
996 else
997 want_escape = 1;
998 }
999 if (!want_escape)
1000 *quoted = 1;
1001 }
1002 if (want_escape) {
1003 if (!dialect->escapechar) {
1004 PyErr_Format(error_obj,
1005 "need to escape, but no escapechar set");
1006 return -1;
1007 }
1008 ADDCH(dialect->escapechar);
Skip Montanarob4a04172003-03-20 23:29:12 +00001009 }
1010 }
1011 /* Copy field character into record buffer.
1012 */
Andrew McNamarac89f2842005-01-12 07:44:42 +00001013 ADDCH(c);
Skip Montanarob4a04172003-03-20 23:29:12 +00001014 }
1015
1016 /* If field is empty check if it needs to be quoted.
1017 */
1018 if (i == 0 && quote_empty) {
1019 if (dialect->quoting == QUOTE_NONE) {
1020 PyErr_Format(error_obj,
Guido van Rossum46264582007-08-06 19:32:18 +00001021 "single empty field record must be quoted");
Skip Montanarob4a04172003-03-20 23:29:12 +00001022 return -1;
Andrew McNamaradd3e6cb2005-01-07 06:46:50 +00001023 }
1024 else
Skip Montanarob4a04172003-03-20 23:29:12 +00001025 *quoted = 1;
1026 }
1027
Skip Montanarob4a04172003-03-20 23:29:12 +00001028 if (*quoted) {
1029 if (copy_phase)
Andrew McNamarac89f2842005-01-12 07:44:42 +00001030 ADDCH(dialect->quotechar);
Skip Montanarob4a04172003-03-20 23:29:12 +00001031 else
Andrew McNamarac89f2842005-01-12 07:44:42 +00001032 rec_len += 2;
Skip Montanarob4a04172003-03-20 23:29:12 +00001033 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001034 return rec_len;
Andrew McNamarac89f2842005-01-12 07:44:42 +00001035#undef ADDCH
Skip Montanarob4a04172003-03-20 23:29:12 +00001036}
1037
1038static int
1039join_check_rec_size(WriterObj *self, int rec_len)
1040{
1041 if (rec_len > self->rec_size) {
1042 if (self->rec_size == 0) {
1043 self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR;
Andrew McNamaradcfb38c2003-06-09 05:59:23 +00001044 if (self->rec != NULL)
1045 PyMem_Free(self->rec);
Guido van Rossum46264582007-08-06 19:32:18 +00001046 self->rec = PyMem_New(Py_UNICODE, self->rec_size);
Skip Montanarob4a04172003-03-20 23:29:12 +00001047 }
1048 else {
Guido van Rossum46264582007-08-06 19:32:18 +00001049 Py_UNICODE* old_rec = self->rec;
Skip Montanarob4a04172003-03-20 23:29:12 +00001050
1051 self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR;
Guido van Rossum46264582007-08-06 19:32:18 +00001052 self->rec = PyMem_Resize(self->rec, Py_UNICODE,
1053 self->rec_size);
Skip Montanarob4a04172003-03-20 23:29:12 +00001054 if (self->rec == NULL)
1055 PyMem_Free(old_rec);
1056 }
1057 if (self->rec == NULL) {
1058 PyErr_NoMemory();
1059 return 0;
1060 }
1061 }
1062 return 1;
1063}
1064
1065static int
Guido van Rossum46264582007-08-06 19:32:18 +00001066join_append(WriterObj *self, Py_UNICODE *field, int *quoted, int quote_empty)
Skip Montanarob4a04172003-03-20 23:29:12 +00001067{
1068 int rec_len;
1069
1070 rec_len = join_append_data(self, field, quote_empty, quoted, 0);
1071 if (rec_len < 0)
1072 return 0;
1073
1074 /* grow record buffer if necessary */
1075 if (!join_check_rec_size(self, rec_len))
1076 return 0;
1077
1078 self->rec_len = join_append_data(self, field, quote_empty, quoted, 1);
1079 self->num_fields++;
1080
1081 return 1;
1082}
1083
1084static int
1085join_append_lineterminator(WriterObj *self)
1086{
1087 int terminator_len;
Guido van Rossum46264582007-08-06 19:32:18 +00001088 Py_UNICODE *terminator;
Skip Montanarob4a04172003-03-20 23:29:12 +00001089
Guido van Rossum46264582007-08-06 19:32:18 +00001090 terminator_len = PyUnicode_GetSize(self->dialect->lineterminator);
Thomas Wouters477c8d52006-05-27 19:21:47 +00001091 if (terminator_len == -1)
1092 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001093
1094 /* grow record buffer if necessary */
1095 if (!join_check_rec_size(self, self->rec_len + terminator_len))
1096 return 0;
1097
Guido van Rossum46264582007-08-06 19:32:18 +00001098 terminator = PyUnicode_AsUnicode(self->dialect->lineterminator);
Andrew McNamaracf0fd5a2005-01-12 01:16:35 +00001099 if (terminator == NULL)
1100 return 0;
Guido van Rossum46264582007-08-06 19:32:18 +00001101 memmove(self->rec + self->rec_len, terminator,
1102 sizeof(Py_UNICODE)*terminator_len);
Skip Montanarob4a04172003-03-20 23:29:12 +00001103 self->rec_len += terminator_len;
1104
1105 return 1;
1106}
1107
1108PyDoc_STRVAR(csv_writerow_doc,
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001109"writerow(sequence)\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001110"\n"
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001111"Construct and write a CSV record from a sequence of fields. Non-string\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001112"elements will be converted to string.");
1113
1114static PyObject *
1115csv_writerow(WriterObj *self, PyObject *seq)
1116{
1117 DialectObj *dialect = self->dialect;
1118 int len, i;
1119
1120 if (!PySequence_Check(seq))
1121 return PyErr_Format(error_obj, "sequence expected");
1122
1123 len = PySequence_Length(seq);
1124 if (len < 0)
1125 return NULL;
1126
1127 /* Join all fields in internal buffer.
1128 */
1129 join_reset(self);
1130 for (i = 0; i < len; i++) {
1131 PyObject *field;
1132 int append_ok;
1133 int quoted;
1134
1135 field = PySequence_GetItem(seq, i);
1136 if (field == NULL)
1137 return NULL;
1138
Andrew McNamarac89f2842005-01-12 07:44:42 +00001139 switch (dialect->quoting) {
1140 case QUOTE_NONNUMERIC:
1141 quoted = !PyNumber_Check(field);
1142 break;
1143 case QUOTE_ALL:
1144 quoted = 1;
1145 break;
1146 default:
1147 quoted = 0;
1148 break;
Skip Montanarob4a04172003-03-20 23:29:12 +00001149 }
1150
Guido van Rossum46264582007-08-06 19:32:18 +00001151 if (PyUnicode_Check(field)) {
Skip Montanaro577c7a72003-04-12 19:17:14 +00001152 append_ok = join_append(self,
Guido van Rossum46264582007-08-06 19:32:18 +00001153 PyUnicode_AS_UNICODE(field),
1154 &quoted, len == 1);
Skip Montanarob4a04172003-03-20 23:29:12 +00001155 Py_DECREF(field);
1156 }
1157 else if (field == Py_None) {
Guido van Rossum46264582007-08-06 19:32:18 +00001158 append_ok = join_append(self, NULL,
1159 &quoted, len == 1);
Skip Montanarob4a04172003-03-20 23:29:12 +00001160 Py_DECREF(field);
1161 }
1162 else {
1163 PyObject *str;
1164
Thomas Heller519a0422007-11-15 20:48:54 +00001165 str = PyObject_Str(field);
Guido van Rossum46264582007-08-06 19:32:18 +00001166 Py_DECREF(field);
Skip Montanarob4a04172003-03-20 23:29:12 +00001167 if (str == NULL)
1168 return NULL;
Guido van Rossum46264582007-08-06 19:32:18 +00001169 append_ok = join_append(self,
1170 PyUnicode_AS_UNICODE(str),
1171 &quoted, len == 1);
Skip Montanarob4a04172003-03-20 23:29:12 +00001172 Py_DECREF(str);
1173 }
1174 if (!append_ok)
1175 return NULL;
1176 }
1177
1178 /* Add line terminator.
1179 */
1180 if (!join_append_lineterminator(self))
1181 return 0;
1182
Guido van Rossum46264582007-08-06 19:32:18 +00001183 return PyObject_CallFunction(self->writeline,
1184 "(u#)", self->rec,
1185 self->rec_len);
Skip Montanarob4a04172003-03-20 23:29:12 +00001186}
1187
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001188PyDoc_STRVAR(csv_writerows_doc,
1189"writerows(sequence of sequences)\n"
1190"\n"
1191"Construct and write a series of sequences to a csv file. Non-string\n"
1192"elements will be converted to string.");
1193
Skip Montanarob4a04172003-03-20 23:29:12 +00001194static PyObject *
1195csv_writerows(WriterObj *self, PyObject *seqseq)
1196{
1197 PyObject *row_iter, *row_obj, *result;
1198
1199 row_iter = PyObject_GetIter(seqseq);
1200 if (row_iter == NULL) {
1201 PyErr_SetString(PyExc_TypeError,
Skip Montanaro98f16e02003-04-11 23:10:13 +00001202 "writerows() argument must be iterable");
Skip Montanarob4a04172003-03-20 23:29:12 +00001203 return NULL;
1204 }
1205 while ((row_obj = PyIter_Next(row_iter))) {
1206 result = csv_writerow(self, row_obj);
1207 Py_DECREF(row_obj);
1208 if (!result) {
1209 Py_DECREF(row_iter);
1210 return NULL;
1211 }
1212 else
1213 Py_DECREF(result);
1214 }
1215 Py_DECREF(row_iter);
1216 if (PyErr_Occurred())
1217 return NULL;
1218 Py_INCREF(Py_None);
1219 return Py_None;
1220}
1221
1222static struct PyMethodDef Writer_methods[] = {
1223 { "writerow", (PyCFunction)csv_writerow, METH_O, csv_writerow_doc},
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001224 { "writerows", (PyCFunction)csv_writerows, METH_O, csv_writerows_doc},
Skip Montanarob4a04172003-03-20 23:29:12 +00001225 { NULL, NULL }
1226};
1227
1228#define W_OFF(x) offsetof(WriterObj, x)
1229
1230static struct PyMemberDef Writer_memberlist[] = {
Guido van Rossum33d26892007-08-05 15:29:28 +00001231 { "dialect", T_OBJECT, W_OFF(dialect), READONLY },
Skip Montanarob4a04172003-03-20 23:29:12 +00001232 { NULL }
1233};
1234
1235static void
1236Writer_dealloc(WriterObj *self)
1237{
Andrew McNamara77ead872005-01-10 02:09:41 +00001238 PyObject_GC_UnTrack(self);
Skip Montanarob4a04172003-03-20 23:29:12 +00001239 Py_XDECREF(self->dialect);
1240 Py_XDECREF(self->writeline);
Andrew McNamaradcfb38c2003-06-09 05:59:23 +00001241 if (self->rec != NULL)
1242 PyMem_Free(self->rec);
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001243 PyObject_GC_Del(self);
1244}
1245
1246static int
1247Writer_traverse(WriterObj *self, visitproc visit, void *arg)
1248{
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001249 Py_VISIT(self->dialect);
1250 Py_VISIT(self->writeline);
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001251 return 0;
1252}
1253
1254static int
1255Writer_clear(WriterObj *self)
1256{
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001257 Py_CLEAR(self->dialect);
1258 Py_CLEAR(self->writeline);
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001259 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001260}
1261
1262PyDoc_STRVAR(Writer_Type_doc,
1263"CSV writer\n"
1264"\n"
1265"Writer objects are responsible for generating tabular data\n"
1266"in CSV format from sequence input.\n"
1267);
1268
1269static PyTypeObject Writer_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001270 PyVarObject_HEAD_INIT(NULL, 0)
Skip Montanarob4a04172003-03-20 23:29:12 +00001271 "_csv.writer", /*tp_name*/
1272 sizeof(WriterObj), /*tp_basicsize*/
1273 0, /*tp_itemsize*/
1274 /* methods */
1275 (destructor)Writer_dealloc, /*tp_dealloc*/
1276 (printfunc)0, /*tp_print*/
1277 (getattrfunc)0, /*tp_getattr*/
1278 (setattrfunc)0, /*tp_setattr*/
1279 (cmpfunc)0, /*tp_compare*/
1280 (reprfunc)0, /*tp_repr*/
1281 0, /*tp_as_number*/
1282 0, /*tp_as_sequence*/
1283 0, /*tp_as_mapping*/
1284 (hashfunc)0, /*tp_hash*/
1285 (ternaryfunc)0, /*tp_call*/
1286 (reprfunc)0, /*tp_str*/
1287 0, /*tp_getattro*/
1288 0, /*tp_setattro*/
1289 0, /*tp_as_buffer*/
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001290 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
1291 Py_TPFLAGS_HAVE_GC, /*tp_flags*/
Skip Montanarob4a04172003-03-20 23:29:12 +00001292 Writer_Type_doc,
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001293 (traverseproc)Writer_traverse, /*tp_traverse*/
1294 (inquiry)Writer_clear, /*tp_clear*/
Skip Montanarob4a04172003-03-20 23:29:12 +00001295 0, /*tp_richcompare*/
1296 0, /*tp_weaklistoffset*/
1297 (getiterfunc)0, /*tp_iter*/
1298 (getiterfunc)0, /*tp_iternext*/
1299 Writer_methods, /*tp_methods*/
1300 Writer_memberlist, /*tp_members*/
1301 0, /*tp_getset*/
1302};
1303
1304static PyObject *
1305csv_writer(PyObject *module, PyObject *args, PyObject *keyword_args)
1306{
Andrew McNamara91b97462005-01-11 01:07:23 +00001307 PyObject * output_file, * dialect = NULL;
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001308 WriterObj * self = PyObject_GC_New(WriterObj, &Writer_Type);
Skip Montanarob4a04172003-03-20 23:29:12 +00001309
1310 if (!self)
1311 return NULL;
1312
1313 self->dialect = NULL;
1314 self->writeline = NULL;
1315
1316 self->rec = NULL;
1317 self->rec_size = 0;
1318 self->rec_len = 0;
1319 self->num_fields = 0;
1320
Raymond Hettinger1761a7c2004-06-20 04:23:19 +00001321 if (!PyArg_UnpackTuple(args, "", 1, 2, &output_file, &dialect)) {
Skip Montanarob4a04172003-03-20 23:29:12 +00001322 Py_DECREF(self);
1323 return NULL;
1324 }
1325 self->writeline = PyObject_GetAttrString(output_file, "write");
1326 if (self->writeline == NULL || !PyCallable_Check(self->writeline)) {
1327 PyErr_SetString(PyExc_TypeError,
Andrew McNamara5cfd8372005-01-12 11:39:50 +00001328 "argument 1 must have a \"write\" method");
Skip Montanarob4a04172003-03-20 23:29:12 +00001329 Py_DECREF(self);
1330 return NULL;
1331 }
Andrew McNamara91b97462005-01-11 01:07:23 +00001332 self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args);
Skip Montanarob4a04172003-03-20 23:29:12 +00001333 if (self->dialect == NULL) {
1334 Py_DECREF(self);
1335 return NULL;
1336 }
Andrew McNamara77ead872005-01-10 02:09:41 +00001337 PyObject_GC_Track(self);
Skip Montanarob4a04172003-03-20 23:29:12 +00001338 return (PyObject *)self;
1339}
1340
1341/*
1342 * DIALECT REGISTRY
1343 */
1344static PyObject *
1345csv_list_dialects(PyObject *module, PyObject *args)
1346{
1347 return PyDict_Keys(dialects);
1348}
1349
1350static PyObject *
Andrew McNamara86625972005-01-11 01:28:33 +00001351csv_register_dialect(PyObject *module, PyObject *args, PyObject *kwargs)
Skip Montanarob4a04172003-03-20 23:29:12 +00001352{
Andrew McNamara86625972005-01-11 01:28:33 +00001353 PyObject *name_obj, *dialect_obj = NULL;
1354 PyObject *dialect;
Skip Montanarob4a04172003-03-20 23:29:12 +00001355
Andrew McNamara86625972005-01-11 01:28:33 +00001356 if (!PyArg_UnpackTuple(args, "", 1, 2, &name_obj, &dialect_obj))
Skip Montanarob4a04172003-03-20 23:29:12 +00001357 return NULL;
Andrew McNamara37d2bdf2005-01-10 12:22:48 +00001358 if (!IS_BASESTRING(name_obj)) {
Skip Montanarob4a04172003-03-20 23:29:12 +00001359 PyErr_SetString(PyExc_TypeError,
1360 "dialect name must be a string or unicode");
1361 return NULL;
1362 }
Andrew McNamara86625972005-01-11 01:28:33 +00001363 dialect = _call_dialect(dialect_obj, kwargs);
1364 if (dialect == NULL)
1365 return NULL;
1366 if (PyDict_SetItem(dialects, name_obj, dialect) < 0) {
1367 Py_DECREF(dialect);
Skip Montanarob4a04172003-03-20 23:29:12 +00001368 return NULL;
1369 }
Andrew McNamara86625972005-01-11 01:28:33 +00001370 Py_DECREF(dialect);
Skip Montanarob4a04172003-03-20 23:29:12 +00001371 Py_INCREF(Py_None);
1372 return Py_None;
1373}
1374
1375static PyObject *
Skip Montanaro577c7a72003-04-12 19:17:14 +00001376csv_unregister_dialect(PyObject *module, PyObject *name_obj)
Skip Montanarob4a04172003-03-20 23:29:12 +00001377{
Skip Montanarob4a04172003-03-20 23:29:12 +00001378 if (PyDict_DelItem(dialects, name_obj) < 0)
1379 return PyErr_Format(error_obj, "unknown dialect");
1380 Py_INCREF(Py_None);
1381 return Py_None;
1382}
1383
1384static PyObject *
Skip Montanaro577c7a72003-04-12 19:17:14 +00001385csv_get_dialect(PyObject *module, PyObject *name_obj)
Skip Montanarob4a04172003-03-20 23:29:12 +00001386{
Skip Montanarob4a04172003-03-20 23:29:12 +00001387 return get_dialect_from_registry(name_obj);
1388}
1389
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001390static PyObject *
Andrew McNamara31d88962005-01-12 03:45:10 +00001391csv_field_size_limit(PyObject *module, PyObject *args)
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001392{
1393 PyObject *new_limit = NULL;
1394 long old_limit = field_limit;
1395
Andrew McNamara31d88962005-01-12 03:45:10 +00001396 if (!PyArg_UnpackTuple(args, "field_size_limit", 0, 1, &new_limit))
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001397 return NULL;
1398 if (new_limit != NULL) {
Martin v. Löwisd1a1d1e2007-12-04 22:10:37 +00001399 if (!PyLong_CheckExact(new_limit)) {
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001400 PyErr_Format(PyExc_TypeError,
1401 "limit must be an integer");
1402 return NULL;
1403 }
Christian Heimes217cfd12007-12-02 14:31:20 +00001404 field_limit = PyLong_AsLong(new_limit);
Martin v. Löwisd1a1d1e2007-12-04 22:10:37 +00001405 if (field_limit == -1 && PyErr_Occurred()) {
1406 field_limit = old_limit;
1407 return NULL;
1408 }
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001409 }
Christian Heimes217cfd12007-12-02 14:31:20 +00001410 return PyLong_FromLong(old_limit);
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001411}
1412
Skip Montanarob4a04172003-03-20 23:29:12 +00001413/*
1414 * MODULE
1415 */
1416
1417PyDoc_STRVAR(csv_module_doc,
1418"CSV parsing and writing.\n"
1419"\n"
1420"This module provides classes that assist in the reading and writing\n"
1421"of Comma Separated Value (CSV) files, and implements the interface\n"
1422"described by PEP 305. Although many CSV files are simple to parse,\n"
1423"the format is not formally defined by a stable specification and\n"
1424"is subtle enough that parsing lines of a CSV file with something\n"
1425"like line.split(\",\") is bound to fail. The module supports three\n"
1426"basic APIs: reading, writing, and registration of dialects.\n"
1427"\n"
1428"\n"
1429"DIALECT REGISTRATION:\n"
1430"\n"
1431"Readers and writers support a dialect argument, which is a convenient\n"
1432"handle on a group of settings. When the dialect argument is a string,\n"
1433"it identifies one of the dialects previously registered with the module.\n"
1434"If it is a class or instance, the attributes of the argument are used as\n"
1435"the settings for the reader or writer:\n"
1436"\n"
1437" class excel:\n"
1438" delimiter = ','\n"
1439" quotechar = '\"'\n"
1440" escapechar = None\n"
1441" doublequote = True\n"
1442" skipinitialspace = False\n"
Johannes Gijsbers8d3b9dd2004-08-15 12:23:10 +00001443" lineterminator = '\\r\\n'\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001444" quoting = QUOTE_MINIMAL\n"
1445"\n"
1446"SETTINGS:\n"
1447"\n"
1448" * quotechar - specifies a one-character string to use as the \n"
1449" quoting character. It defaults to '\"'.\n"
1450" * delimiter - specifies a one-character string to use as the \n"
1451" field separator. It defaults to ','.\n"
1452" * skipinitialspace - specifies how to interpret whitespace which\n"
1453" immediately follows a delimiter. It defaults to False, which\n"
1454" means that whitespace immediately following a delimiter is part\n"
1455" of the following field.\n"
1456" * lineterminator - specifies the character sequence which should \n"
1457" terminate rows.\n"
1458" * quoting - controls when quotes should be generated by the writer.\n"
1459" It can take on any of the following module constants:\n"
1460"\n"
1461" csv.QUOTE_MINIMAL means only when required, for example, when a\n"
1462" field contains either the quotechar or the delimiter\n"
1463" csv.QUOTE_ALL means that quotes are always placed around fields.\n"
1464" csv.QUOTE_NONNUMERIC means that quotes are always placed around\n"
Skip Montanaro148eb6a2003-12-02 18:57:47 +00001465" fields which do not parse as integers or floating point\n"
1466" numbers.\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001467" csv.QUOTE_NONE means that quotes are never placed around fields.\n"
1468" * escapechar - specifies a one-character string used to escape \n"
1469" the delimiter when quoting is set to QUOTE_NONE.\n"
1470" * doublequote - controls the handling of quotes inside fields. When\n"
1471" True, two consecutive quotes are interpreted as one during read,\n"
1472" and when writing, each quote character embedded in the data is\n"
1473" written as two quotes\n");
1474
1475PyDoc_STRVAR(csv_reader_doc,
1476" csv_reader = reader(iterable [, dialect='excel']\n"
1477" [optional keyword args])\n"
1478" for row in csv_reader:\n"
1479" process(row)\n"
1480"\n"
1481"The \"iterable\" argument can be any object that returns a line\n"
1482"of input for each iteration, such as a file object or a list. The\n"
1483"optional \"dialect\" parameter is discussed below. The function\n"
1484"also accepts optional keyword arguments which override settings\n"
1485"provided by the dialect.\n"
1486"\n"
1487"The returned object is an iterator. Each iteration returns a row\n"
Johannes Gijsbers8d3b9dd2004-08-15 12:23:10 +00001488"of the CSV file (which can span multiple input lines):\n");
Skip Montanarob4a04172003-03-20 23:29:12 +00001489
1490PyDoc_STRVAR(csv_writer_doc,
1491" csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1492" [optional keyword args])\n"
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001493" for row in sequence:\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001494" csv_writer.writerow(row)\n"
1495"\n"
1496" [or]\n"
1497"\n"
1498" csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1499" [optional keyword args])\n"
1500" csv_writer.writerows(rows)\n"
1501"\n"
1502"The \"fileobj\" argument can be any object that supports the file API.\n");
1503
1504PyDoc_STRVAR(csv_list_dialects_doc,
1505"Return a list of all know dialect names.\n"
1506" names = csv.list_dialects()");
1507
1508PyDoc_STRVAR(csv_get_dialect_doc,
1509"Return the dialect instance associated with name.\n"
1510" dialect = csv.get_dialect(name)");
1511
1512PyDoc_STRVAR(csv_register_dialect_doc,
1513"Create a mapping from a string name to a dialect class.\n"
1514" dialect = csv.register_dialect(name, dialect)");
1515
1516PyDoc_STRVAR(csv_unregister_dialect_doc,
1517"Delete the name/dialect mapping associated with a string name.\n"
1518" csv.unregister_dialect(name)");
1519
Andrew McNamara31d88962005-01-12 03:45:10 +00001520PyDoc_STRVAR(csv_field_size_limit_doc,
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001521"Sets an upper limit on parsed fields.\n"
Andrew McNamara31d88962005-01-12 03:45:10 +00001522" csv.field_size_limit([limit])\n"
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001523"\n"
1524"Returns old limit. If limit is not given, no new limit is set and\n"
1525"the old limit is returned");
1526
Skip Montanarob4a04172003-03-20 23:29:12 +00001527static struct PyMethodDef csv_methods[] = {
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001528 { "reader", (PyCFunction)csv_reader,
1529 METH_VARARGS | METH_KEYWORDS, csv_reader_doc},
1530 { "writer", (PyCFunction)csv_writer,
1531 METH_VARARGS | METH_KEYWORDS, csv_writer_doc},
1532 { "list_dialects", (PyCFunction)csv_list_dialects,
1533 METH_NOARGS, csv_list_dialects_doc},
1534 { "register_dialect", (PyCFunction)csv_register_dialect,
Andrew McNamara86625972005-01-11 01:28:33 +00001535 METH_VARARGS | METH_KEYWORDS, csv_register_dialect_doc},
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001536 { "unregister_dialect", (PyCFunction)csv_unregister_dialect,
1537 METH_O, csv_unregister_dialect_doc},
1538 { "get_dialect", (PyCFunction)csv_get_dialect,
1539 METH_O, csv_get_dialect_doc},
Andrew McNamara31d88962005-01-12 03:45:10 +00001540 { "field_size_limit", (PyCFunction)csv_field_size_limit,
1541 METH_VARARGS, csv_field_size_limit_doc},
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001542 { NULL, NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +00001543};
1544
1545PyMODINIT_FUNC
1546init_csv(void)
1547{
1548 PyObject *module;
Skip Montanarob4a04172003-03-20 23:29:12 +00001549 StyleDesc *style;
1550
1551 if (PyType_Ready(&Dialect_Type) < 0)
1552 return;
1553
1554 if (PyType_Ready(&Reader_Type) < 0)
1555 return;
1556
1557 if (PyType_Ready(&Writer_Type) < 0)
1558 return;
1559
1560 /* Create the module and add the functions */
1561 module = Py_InitModule3("_csv", csv_methods, csv_module_doc);
1562 if (module == NULL)
1563 return;
1564
1565 /* Add version to the module. */
Skip Montanaro7b01a832003-04-12 19:23:46 +00001566 if (PyModule_AddStringConstant(module, "__version__",
1567 MODULE_VERSION) == -1)
Skip Montanarob4a04172003-03-20 23:29:12 +00001568 return;
1569
1570 /* Add _dialects dictionary */
1571 dialects = PyDict_New();
1572 if (dialects == NULL)
1573 return;
1574 if (PyModule_AddObject(module, "_dialects", dialects))
1575 return;
1576
1577 /* Add quote styles into dictionary */
1578 for (style = quote_styles; style->name; style++) {
Skip Montanaro7b01a832003-04-12 19:23:46 +00001579 if (PyModule_AddIntConstant(module, style->name,
1580 style->style) == -1)
Skip Montanarob4a04172003-03-20 23:29:12 +00001581 return;
1582 }
1583
1584 /* Add the Dialect type */
Skip Montanaro32c5d422005-06-15 13:35:08 +00001585 Py_INCREF(&Dialect_Type);
Skip Montanarob4a04172003-03-20 23:29:12 +00001586 if (PyModule_AddObject(module, "Dialect", (PyObject *)&Dialect_Type))
1587 return;
1588
1589 /* Add the CSV exception object to the module. */
1590 error_obj = PyErr_NewException("_csv.Error", NULL, NULL);
1591 if (error_obj == NULL)
1592 return;
1593 PyModule_AddObject(module, "Error", error_obj);
1594}