blob: 430ccdc5af003395c81ee17da3ae24bcd53221c7 [file] [log] [blame]
Skip Montanaroa16b21f2003-03-23 14:32:54 +00001/* csv module */
2
3/*
4
5This module provides the low-level underpinnings of a CSV reading/writing
6module. Users should not use this module directly, but import the csv.py
7module instead.
8
9**** For people modifying this code, please note that as of this writing
Skip Montanarodfa35fa2003-04-11 21:40:01 +000010**** (2003-03-23), it is intended that this code should work with Python
Skip Montanaroa16b21f2003-03-23 14:32:54 +000011**** 2.2.
12
Skip Montanarob4a04172003-03-20 23:29:12 +000013*/
14
Skip Montanaro7b01a832003-04-12 19:23:46 +000015#define MODULE_VERSION "1.0"
16
Skip Montanarob4a04172003-03-20 23:29:12 +000017#include "Python.h"
18#include "structmember.h"
19
Andrew McNamara37d2bdf2005-01-10 12:22:48 +000020#define IS_BASESTRING(o) \
Guido van Rossum3172c5d2007-10-16 18:12:55 +000021 PyUnicode_Check(o)
Andrew McNamara37d2bdf2005-01-10 12:22:48 +000022
Skip Montanarob4a04172003-03-20 23:29:12 +000023static PyObject *error_obj; /* CSV exception */
24static PyObject *dialects; /* Dialect registry */
Andrew McNamarae4d05c42005-01-11 07:32:02 +000025static long field_limit = 128 * 1024; /* max parsed field size */
Skip Montanarob4a04172003-03-20 23:29:12 +000026
27typedef enum {
28 START_RECORD, START_FIELD, ESCAPED_CHAR, IN_FIELD,
Andrew McNamaraf69d94f2005-01-13 11:30:54 +000029 IN_QUOTED_FIELD, ESCAPE_IN_QUOTED_FIELD, QUOTE_IN_QUOTED_FIELD,
30 EAT_CRNL
Skip Montanarob4a04172003-03-20 23:29:12 +000031} ParserState;
32
33typedef enum {
34 QUOTE_MINIMAL, QUOTE_ALL, QUOTE_NONNUMERIC, QUOTE_NONE
35} QuoteStyle;
36
37typedef struct {
38 QuoteStyle style;
39 char *name;
40} StyleDesc;
41
42static StyleDesc quote_styles[] = {
43 { QUOTE_MINIMAL, "QUOTE_MINIMAL" },
44 { QUOTE_ALL, "QUOTE_ALL" },
45 { QUOTE_NONNUMERIC, "QUOTE_NONNUMERIC" },
46 { QUOTE_NONE, "QUOTE_NONE" },
47 { 0 }
48};
49
50typedef struct {
51 PyObject_HEAD
Guido van Rossum46264582007-08-06 19:32:18 +000052
Skip Montanarob4a04172003-03-20 23:29:12 +000053 int doublequote; /* is " represented by ""? */
Guido van Rossum46264582007-08-06 19:32:18 +000054 Py_UNICODE delimiter; /* field separator */
55 Py_UNICODE quotechar; /* quote character */
56 Py_UNICODE escapechar; /* escape character */
Skip Montanarob4a04172003-03-20 23:29:12 +000057 int skipinitialspace; /* ignore spaces following delimiter? */
58 PyObject *lineterminator; /* string to write between records */
Andrew McNamara1196cf12005-01-07 04:42:45 +000059 int quoting; /* style of quoting to write */
Skip Montanarob4a04172003-03-20 23:29:12 +000060
61 int strict; /* raise exception on bad CSV */
62} DialectObj;
63
Neal Norwitz227b5332006-03-22 09:28:35 +000064static PyTypeObject Dialect_Type;
Skip Montanarob4a04172003-03-20 23:29:12 +000065
66typedef struct {
67 PyObject_HEAD
68
69 PyObject *input_iter; /* iterate over this for input lines */
70
71 DialectObj *dialect; /* parsing dialect */
72
73 PyObject *fields; /* field list for current record */
74 ParserState state; /* current CSV parse state */
Guido van Rossum46264582007-08-06 19:32:18 +000075 Py_UNICODE *field; /* build current field in here */
Skip Montanarob4a04172003-03-20 23:29:12 +000076 int field_size; /* size of allocated buffer */
Guido van Rossum46264582007-08-06 19:32:18 +000077 Py_ssize_t field_len; /* length of current field */
Andrew McNamara0f0599d2005-01-12 09:45:18 +000078 int numeric_field; /* treat field as numeric */
Andrew McNamara7f2053e2005-01-12 11:17:16 +000079 unsigned long line_num; /* Source-file line number */
Skip Montanarob4a04172003-03-20 23:29:12 +000080} ReaderObj;
81
Neal Norwitz227b5332006-03-22 09:28:35 +000082static PyTypeObject Reader_Type;
Skip Montanarob4a04172003-03-20 23:29:12 +000083
Christian Heimes90aa7642007-12-19 02:45:37 +000084#define ReaderObject_Check(v) (Py_TYPE(v) == &Reader_Type)
Skip Montanarob4a04172003-03-20 23:29:12 +000085
86typedef struct {
87 PyObject_HEAD
88
89 PyObject *writeline; /* write output lines to this file */
90
91 DialectObj *dialect; /* parsing dialect */
92
Guido van Rossum46264582007-08-06 19:32:18 +000093 Py_UNICODE *rec; /* buffer for parser.join */
Skip Montanarob4a04172003-03-20 23:29:12 +000094 int rec_size; /* size of allocated record */
Guido van Rossum46264582007-08-06 19:32:18 +000095 Py_ssize_t rec_len; /* length of record */
Skip Montanarob4a04172003-03-20 23:29:12 +000096 int num_fields; /* number of fields in record */
Guido van Rossum46264582007-08-06 19:32:18 +000097} WriterObj;
Skip Montanarob4a04172003-03-20 23:29:12 +000098
Neal Norwitz227b5332006-03-22 09:28:35 +000099static PyTypeObject Writer_Type;
Skip Montanarob4a04172003-03-20 23:29:12 +0000100
101/*
102 * DIALECT class
103 */
104
105static PyObject *
106get_dialect_from_registry(PyObject * name_obj)
107{
108 PyObject *dialect_obj;
109
110 dialect_obj = PyDict_GetItem(dialects, name_obj);
Andrew McNamaradbce2612005-01-10 23:17:35 +0000111 if (dialect_obj == NULL) {
112 if (!PyErr_Occurred())
113 PyErr_Format(error_obj, "unknown dialect");
114 }
115 else
116 Py_INCREF(dialect_obj);
Skip Montanarob4a04172003-03-20 23:29:12 +0000117 return dialect_obj;
118}
119
Skip Montanarob4a04172003-03-20 23:29:12 +0000120static PyObject *
121get_string(PyObject *str)
122{
123 Py_XINCREF(str);
124 return str;
125}
126
Skip Montanarob4a04172003-03-20 23:29:12 +0000127static PyObject *
Skip Montanaroe3b10f42007-08-06 20:55:47 +0000128get_nullchar_as_None(Py_UNICODE c)
Skip Montanarob4a04172003-03-20 23:29:12 +0000129{
130 if (c == '\0') {
131 Py_INCREF(Py_None);
132 return Py_None;
133 }
134 else
Skip Montanaroe3b10f42007-08-06 20:55:47 +0000135 return PyUnicode_FromUnicode((Py_UNICODE *)&c, 1);
Skip Montanarob4a04172003-03-20 23:29:12 +0000136}
137
Skip Montanarob4a04172003-03-20 23:29:12 +0000138static PyObject *
139Dialect_get_lineterminator(DialectObj *self)
140{
141 return get_string(self->lineterminator);
142}
143
Skip Montanarob4a04172003-03-20 23:29:12 +0000144static PyObject *
Guido van Rossuma9769c22007-08-07 23:59:30 +0000145Dialect_get_delimiter(DialectObj *self)
146{
147 return get_nullchar_as_None(self->delimiter);
148}
149
150static PyObject *
Skip Montanarob4a04172003-03-20 23:29:12 +0000151Dialect_get_escapechar(DialectObj *self)
152{
153 return get_nullchar_as_None(self->escapechar);
154}
155
Andrew McNamara1196cf12005-01-07 04:42:45 +0000156static PyObject *
157Dialect_get_quotechar(DialectObj *self)
Skip Montanarob4a04172003-03-20 23:29:12 +0000158{
Andrew McNamara1196cf12005-01-07 04:42:45 +0000159 return get_nullchar_as_None(self->quotechar);
Skip Montanarob4a04172003-03-20 23:29:12 +0000160}
161
162static PyObject *
163Dialect_get_quoting(DialectObj *self)
164{
Christian Heimes217cfd12007-12-02 14:31:20 +0000165 return PyLong_FromLong(self->quoting);
Skip Montanarob4a04172003-03-20 23:29:12 +0000166}
167
168static int
Andrew McNamara1196cf12005-01-07 04:42:45 +0000169_set_bool(const char *name, int *target, PyObject *src, int dflt)
Skip Montanarob4a04172003-03-20 23:29:12 +0000170{
Andrew McNamara1196cf12005-01-07 04:42:45 +0000171 if (src == NULL)
172 *target = dflt;
173 else
174 *target = PyObject_IsTrue(src);
175 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000176}
177
Andrew McNamara1196cf12005-01-07 04:42:45 +0000178static int
179_set_int(const char *name, int *target, PyObject *src, int dflt)
180{
181 if (src == NULL)
182 *target = dflt;
183 else {
Martin v. Löwisd1a1d1e2007-12-04 22:10:37 +0000184 long value;
185 if (!PyLong_CheckExact(src)) {
Andrew McNamara1196cf12005-01-07 04:42:45 +0000186 PyErr_Format(PyExc_TypeError,
187 "\"%s\" must be an integer", name);
188 return -1;
189 }
Martin v. Löwisd1a1d1e2007-12-04 22:10:37 +0000190 value = PyLong_AsLong(src);
191 if (value == -1 && PyErr_Occurred())
192 return -1;
193#if SIZEOF_LONG > SIZEOF_INT
194 if (value > INT_MAX || value < INT_MIN) {
195 PyErr_Format(PyExc_ValueError,
196 "integer out of range for \"%s\"", name);
197 return -1;
198 }
199#endif
200 *target = (int)value;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000201 }
202 return 0;
203}
204
205static int
Guido van Rossum46264582007-08-06 19:32:18 +0000206_set_char(const char *name, Py_UNICODE *target, PyObject *src, Py_UNICODE dflt)
Andrew McNamara1196cf12005-01-07 04:42:45 +0000207{
208 if (src == NULL)
209 *target = dflt;
210 else {
Guido van Rossumbce56a62007-05-10 18:04:33 +0000211 *target = '\0';
212 if (src != Py_None) {
Guido van Rossum46264582007-08-06 19:32:18 +0000213 Py_UNICODE *buf;
Guido van Rossumbce56a62007-05-10 18:04:33 +0000214 Py_ssize_t len;
Guido van Rossum46264582007-08-06 19:32:18 +0000215 buf = PyUnicode_AsUnicode(src);
216 len = PyUnicode_GetSize(src);
217 if (buf == NULL || len > 1) {
Guido van Rossumbce56a62007-05-10 18:04:33 +0000218 PyErr_Format(PyExc_TypeError,
219 "\"%s\" must be an 1-character string",
Guido van Rossum46264582007-08-06 19:32:18 +0000220 name);
Andrew McNamara1196cf12005-01-07 04:42:45 +0000221 return -1;
Guido van Rossumbce56a62007-05-10 18:04:33 +0000222 }
223 if (len > 0)
224 *target = buf[0];
Andrew McNamara1196cf12005-01-07 04:42:45 +0000225 }
226 }
227 return 0;
228}
229
230static int
231_set_str(const char *name, PyObject **target, PyObject *src, const char *dflt)
232{
233 if (src == NULL)
Guido van Rossum46264582007-08-06 19:32:18 +0000234 *target = PyUnicode_DecodeASCII(dflt, strlen(dflt), NULL);
Andrew McNamara1196cf12005-01-07 04:42:45 +0000235 else {
236 if (src == Py_None)
237 *target = NULL;
Andrew McNamara37d2bdf2005-01-10 12:22:48 +0000238 else if (!IS_BASESTRING(src)) {
Andrew McNamara1196cf12005-01-07 04:42:45 +0000239 PyErr_Format(PyExc_TypeError,
Amaury Forgeot d'Arc10c476d2007-11-19 21:20:21 +0000240 "\"%s\" must be a string", name);
Andrew McNamara1196cf12005-01-07 04:42:45 +0000241 return -1;
Andrew McNamaradd3e6cb2005-01-07 06:46:50 +0000242 }
243 else {
Andrew McNamara1196cf12005-01-07 04:42:45 +0000244 Py_XDECREF(*target);
245 Py_INCREF(src);
246 *target = src;
247 }
248 }
249 return 0;
250}
251
252static int
253dialect_check_quoting(int quoting)
254{
255 StyleDesc *qs = quote_styles;
256
257 for (qs = quote_styles; qs->name; qs++) {
258 if (qs->style == quoting)
259 return 0;
260 }
261 PyErr_Format(PyExc_TypeError, "bad \"quoting\" value");
262 return -1;
263}
Skip Montanarob4a04172003-03-20 23:29:12 +0000264
265#define D_OFF(x) offsetof(DialectObj, x)
266
267static struct PyMemberDef Dialect_memberlist[] = {
Andrew McNamara1196cf12005-01-07 04:42:45 +0000268 { "skipinitialspace", T_INT, D_OFF(skipinitialspace), READONLY },
269 { "doublequote", T_INT, D_OFF(doublequote), READONLY },
270 { "strict", T_INT, D_OFF(strict), READONLY },
Skip Montanarob4a04172003-03-20 23:29:12 +0000271 { NULL }
272};
273
274static PyGetSetDef Dialect_getsetlist[] = {
Guido van Rossuma9769c22007-08-07 23:59:30 +0000275 { "delimiter", (getter)Dialect_get_delimiter},
Andrew McNamara1196cf12005-01-07 04:42:45 +0000276 { "escapechar", (getter)Dialect_get_escapechar},
277 { "lineterminator", (getter)Dialect_get_lineterminator},
278 { "quotechar", (getter)Dialect_get_quotechar},
279 { "quoting", (getter)Dialect_get_quoting},
280 {NULL},
Skip Montanarob4a04172003-03-20 23:29:12 +0000281};
282
283static void
284Dialect_dealloc(DialectObj *self)
285{
286 Py_XDECREF(self->lineterminator);
Christian Heimes90aa7642007-12-19 02:45:37 +0000287 Py_TYPE(self)->tp_free((PyObject *)self);
Skip Montanarob4a04172003-03-20 23:29:12 +0000288}
289
Martin v. Löwis02cbf4a2006-02-27 17:20:04 +0000290static char *dialect_kws[] = {
Andrew McNamara1196cf12005-01-07 04:42:45 +0000291 "dialect",
292 "delimiter",
293 "doublequote",
294 "escapechar",
295 "lineterminator",
296 "quotechar",
297 "quoting",
298 "skipinitialspace",
299 "strict",
300 NULL
301};
302
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000303static PyObject *
304dialect_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
Skip Montanarob4a04172003-03-20 23:29:12 +0000305{
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000306 DialectObj *self;
307 PyObject *ret = NULL;
308 PyObject *dialect = NULL;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000309 PyObject *delimiter = NULL;
310 PyObject *doublequote = NULL;
311 PyObject *escapechar = NULL;
312 PyObject *lineterminator = NULL;
313 PyObject *quotechar = NULL;
314 PyObject *quoting = NULL;
315 PyObject *skipinitialspace = NULL;
316 PyObject *strict = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000317
Andrew McNamara1196cf12005-01-07 04:42:45 +0000318 if (!PyArg_ParseTupleAndKeywords(args, kwargs,
319 "|OOOOOOOOO", dialect_kws,
320 &dialect,
321 &delimiter,
322 &doublequote,
323 &escapechar,
324 &lineterminator,
325 &quotechar,
326 &quoting,
327 &skipinitialspace,
328 &strict))
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000329 return NULL;
330
331 if (dialect != NULL) {
332 if (IS_BASESTRING(dialect)) {
333 dialect = get_dialect_from_registry(dialect);
334 if (dialect == NULL)
335 return NULL;
336 }
337 else
338 Py_INCREF(dialect);
339 /* Can we reuse this instance? */
340 if (PyObject_TypeCheck(dialect, &Dialect_Type) &&
341 delimiter == 0 &&
342 doublequote == 0 &&
343 escapechar == 0 &&
344 lineterminator == 0 &&
345 quotechar == 0 &&
346 quoting == 0 &&
347 skipinitialspace == 0 &&
348 strict == 0)
349 return dialect;
350 }
351
352 self = (DialectObj *)type->tp_alloc(type, 0);
353 if (self == NULL) {
354 Py_XDECREF(dialect);
355 return NULL;
356 }
357 self->lineterminator = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000358
Andrew McNamara1196cf12005-01-07 04:42:45 +0000359 Py_XINCREF(delimiter);
360 Py_XINCREF(doublequote);
361 Py_XINCREF(escapechar);
362 Py_XINCREF(lineterminator);
363 Py_XINCREF(quotechar);
364 Py_XINCREF(quoting);
365 Py_XINCREF(skipinitialspace);
366 Py_XINCREF(strict);
367 if (dialect != NULL) {
Andrew McNamara1196cf12005-01-07 04:42:45 +0000368#define DIALECT_GETATTR(v, n) \
369 if (v == NULL) \
370 v = PyObject_GetAttrString(dialect, n)
Andrew McNamara1196cf12005-01-07 04:42:45 +0000371 DIALECT_GETATTR(delimiter, "delimiter");
372 DIALECT_GETATTR(doublequote, "doublequote");
373 DIALECT_GETATTR(escapechar, "escapechar");
374 DIALECT_GETATTR(lineterminator, "lineterminator");
375 DIALECT_GETATTR(quotechar, "quotechar");
376 DIALECT_GETATTR(quoting, "quoting");
377 DIALECT_GETATTR(skipinitialspace, "skipinitialspace");
378 DIALECT_GETATTR(strict, "strict");
379 PyErr_Clear();
Andrew McNamara1196cf12005-01-07 04:42:45 +0000380 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000381
Andrew McNamara1196cf12005-01-07 04:42:45 +0000382 /* check types and convert to C values */
383#define DIASET(meth, name, target, src, dflt) \
384 if (meth(name, target, src, dflt)) \
385 goto err
386 DIASET(_set_char, "delimiter", &self->delimiter, delimiter, ',');
387 DIASET(_set_bool, "doublequote", &self->doublequote, doublequote, 1);
388 DIASET(_set_char, "escapechar", &self->escapechar, escapechar, 0);
389 DIASET(_set_str, "lineterminator", &self->lineterminator, lineterminator, "\r\n");
390 DIASET(_set_char, "quotechar", &self->quotechar, quotechar, '"');
391 DIASET(_set_int, "quoting", &self->quoting, quoting, QUOTE_MINIMAL);
392 DIASET(_set_bool, "skipinitialspace", &self->skipinitialspace, skipinitialspace, 0);
393 DIASET(_set_bool, "strict", &self->strict, strict, 0);
Skip Montanarob4a04172003-03-20 23:29:12 +0000394
Andrew McNamara1196cf12005-01-07 04:42:45 +0000395 /* validate options */
396 if (dialect_check_quoting(self->quoting))
397 goto err;
398 if (self->delimiter == 0) {
399 PyErr_SetString(PyExc_TypeError, "delimiter must be set");
400 goto err;
401 }
Andrew McNamara5d45a8d2005-01-12 08:16:17 +0000402 if (quotechar == Py_None && quoting == NULL)
Andrew McNamara1196cf12005-01-07 04:42:45 +0000403 self->quoting = QUOTE_NONE;
404 if (self->quoting != QUOTE_NONE && self->quotechar == 0) {
405 PyErr_SetString(PyExc_TypeError,
406 "quotechar must be set if quoting enabled");
407 goto err;
408 }
409 if (self->lineterminator == 0) {
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000410 PyErr_SetString(PyExc_TypeError, "lineterminator must be set");
Andrew McNamara1196cf12005-01-07 04:42:45 +0000411 goto err;
412 }
413
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000414 ret = (PyObject *)self;
Skip Montanarod60fbd42005-06-15 01:33:30 +0000415 Py_INCREF(self);
Andrew McNamara1196cf12005-01-07 04:42:45 +0000416err:
Skip Montanarod60fbd42005-06-15 01:33:30 +0000417 Py_XDECREF(self);
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000418 Py_XDECREF(dialect);
Andrew McNamara1196cf12005-01-07 04:42:45 +0000419 Py_XDECREF(delimiter);
420 Py_XDECREF(doublequote);
421 Py_XDECREF(escapechar);
422 Py_XDECREF(lineterminator);
423 Py_XDECREF(quotechar);
424 Py_XDECREF(quoting);
425 Py_XDECREF(skipinitialspace);
426 Py_XDECREF(strict);
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000427 return ret;
Skip Montanarob4a04172003-03-20 23:29:12 +0000428}
429
430
431PyDoc_STRVAR(Dialect_Type_doc,
432"CSV dialect\n"
433"\n"
434"The Dialect type records CSV parsing and generation options.\n");
435
436static PyTypeObject Dialect_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000437 PyVarObject_HEAD_INIT(NULL, 0)
Skip Montanarob4a04172003-03-20 23:29:12 +0000438 "_csv.Dialect", /* tp_name */
439 sizeof(DialectObj), /* tp_basicsize */
440 0, /* tp_itemsize */
441 /* methods */
442 (destructor)Dialect_dealloc, /* tp_dealloc */
443 (printfunc)0, /* tp_print */
444 (getattrfunc)0, /* tp_getattr */
445 (setattrfunc)0, /* tp_setattr */
446 (cmpfunc)0, /* tp_compare */
447 (reprfunc)0, /* tp_repr */
448 0, /* tp_as_number */
449 0, /* tp_as_sequence */
450 0, /* tp_as_mapping */
451 (hashfunc)0, /* tp_hash */
452 (ternaryfunc)0, /* tp_call */
453 (reprfunc)0, /* tp_str */
454 0, /* tp_getattro */
455 0, /* tp_setattro */
456 0, /* tp_as_buffer */
457 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
458 Dialect_Type_doc, /* tp_doc */
459 0, /* tp_traverse */
460 0, /* tp_clear */
461 0, /* tp_richcompare */
462 0, /* tp_weaklistoffset */
463 0, /* tp_iter */
464 0, /* tp_iternext */
Andrew McNamara1196cf12005-01-07 04:42:45 +0000465 0, /* tp_methods */
Skip Montanarob4a04172003-03-20 23:29:12 +0000466 Dialect_memberlist, /* tp_members */
467 Dialect_getsetlist, /* tp_getset */
468 0, /* tp_base */
469 0, /* tp_dict */
470 0, /* tp_descr_get */
471 0, /* tp_descr_set */
472 0, /* tp_dictoffset */
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000473 0, /* tp_init */
474 0, /* tp_alloc */
Skip Montanarob4a04172003-03-20 23:29:12 +0000475 dialect_new, /* tp_new */
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000476 0, /* tp_free */
Skip Montanarob4a04172003-03-20 23:29:12 +0000477};
478
Andrew McNamara91b97462005-01-11 01:07:23 +0000479/*
480 * Return an instance of the dialect type, given a Python instance or kwarg
481 * description of the dialect
482 */
483static PyObject *
484_call_dialect(PyObject *dialect_inst, PyObject *kwargs)
485{
486 PyObject *ctor_args;
487 PyObject *dialect;
488
489 ctor_args = Py_BuildValue(dialect_inst ? "(O)" : "()", dialect_inst);
490 if (ctor_args == NULL)
491 return NULL;
492 dialect = PyObject_Call((PyObject *)&Dialect_Type, ctor_args, kwargs);
493 Py_DECREF(ctor_args);
494 return dialect;
495}
496
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000497/*
498 * READER
499 */
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000500static int
Skip Montanarob4a04172003-03-20 23:29:12 +0000501parse_save_field(ReaderObj *self)
502{
503 PyObject *field;
504
Guido van Rossum46264582007-08-06 19:32:18 +0000505 field = PyUnicode_FromUnicode(self->field, self->field_len);
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000506 if (field == NULL)
507 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000508 self->field_len = 0;
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000509 if (self->numeric_field) {
510 PyObject *tmp;
511
512 self->numeric_field = 0;
513 tmp = PyNumber_Float(field);
514 if (tmp == NULL) {
515 Py_DECREF(field);
516 return -1;
517 }
518 Py_DECREF(field);
519 field = tmp;
520 }
521 PyList_Append(self->fields, field);
522 Py_DECREF(field);
523 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000524}
525
526static int
527parse_grow_buff(ReaderObj *self)
528{
529 if (self->field_size == 0) {
530 self->field_size = 4096;
Andrew McNamaradcfb38c2003-06-09 05:59:23 +0000531 if (self->field != NULL)
532 PyMem_Free(self->field);
Guido van Rossum46264582007-08-06 19:32:18 +0000533 self->field = PyMem_New(Py_UNICODE, self->field_size);
Skip Montanarob4a04172003-03-20 23:29:12 +0000534 }
535 else {
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +0000536 if (self->field_size > INT_MAX / 2) {
537 PyErr_NoMemory();
538 return 0;
539 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000540 self->field_size *= 2;
Guido van Rossum46264582007-08-06 19:32:18 +0000541 self->field = PyMem_Resize(self->field, Py_UNICODE,
542 self->field_size);
Skip Montanarob4a04172003-03-20 23:29:12 +0000543 }
544 if (self->field == NULL) {
545 PyErr_NoMemory();
546 return 0;
547 }
548 return 1;
549}
550
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000551static int
Guido van Rossum46264582007-08-06 19:32:18 +0000552parse_add_char(ReaderObj *self, Py_UNICODE c)
Skip Montanarob4a04172003-03-20 23:29:12 +0000553{
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000554 if (self->field_len >= field_limit) {
555 PyErr_Format(error_obj, "field larger than field limit (%ld)",
556 field_limit);
557 return -1;
558 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000559 if (self->field_len == self->field_size && !parse_grow_buff(self))
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000560 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000561 self->field[self->field_len++] = c;
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000562 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000563}
564
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000565static int
Guido van Rossum46264582007-08-06 19:32:18 +0000566parse_process_char(ReaderObj *self, Py_UNICODE c)
Skip Montanarob4a04172003-03-20 23:29:12 +0000567{
568 DialectObj *dialect = self->dialect;
569
570 switch (self->state) {
571 case START_RECORD:
572 /* start of record */
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000573 if (c == '\0')
Skip Montanarob4a04172003-03-20 23:29:12 +0000574 /* empty line - return [] */
575 break;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000576 else if (c == '\n' || c == '\r') {
577 self->state = EAT_CRNL;
578 break;
579 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000580 /* normal character - handle as START_FIELD */
581 self->state = START_FIELD;
582 /* fallthru */
583 case START_FIELD:
584 /* expecting field */
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000585 if (c == '\n' || c == '\r' || c == '\0') {
Skip Montanarob4a04172003-03-20 23:29:12 +0000586 /* save empty field - return [fields] */
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000587 if (parse_save_field(self) < 0)
588 return -1;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000589 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
Skip Montanarob4a04172003-03-20 23:29:12 +0000590 }
Andrew McNamara1196cf12005-01-07 04:42:45 +0000591 else if (c == dialect->quotechar &&
592 dialect->quoting != QUOTE_NONE) {
Skip Montanarob4a04172003-03-20 23:29:12 +0000593 /* start quoted field */
594 self->state = IN_QUOTED_FIELD;
595 }
596 else if (c == dialect->escapechar) {
597 /* possible escaped character */
598 self->state = ESCAPED_CHAR;
599 }
600 else if (c == ' ' && dialect->skipinitialspace)
601 /* ignore space at start of field */
602 ;
603 else if (c == dialect->delimiter) {
604 /* save empty field */
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000605 if (parse_save_field(self) < 0)
606 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000607 }
608 else {
609 /* begin new unquoted field */
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000610 if (dialect->quoting == QUOTE_NONNUMERIC)
611 self->numeric_field = 1;
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000612 if (parse_add_char(self, c) < 0)
613 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000614 self->state = IN_FIELD;
615 }
616 break;
617
618 case ESCAPED_CHAR:
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000619 if (c == '\0')
620 c = '\n';
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000621 if (parse_add_char(self, c) < 0)
622 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000623 self->state = IN_FIELD;
624 break;
625
626 case IN_FIELD:
627 /* in unquoted field */
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000628 if (c == '\n' || c == '\r' || c == '\0') {
Skip Montanarob4a04172003-03-20 23:29:12 +0000629 /* end of line - return [fields] */
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000630 if (parse_save_field(self) < 0)
631 return -1;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000632 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
Skip Montanarob4a04172003-03-20 23:29:12 +0000633 }
634 else if (c == dialect->escapechar) {
635 /* possible escaped character */
636 self->state = ESCAPED_CHAR;
637 }
638 else if (c == dialect->delimiter) {
639 /* save field - wait for new field */
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000640 if (parse_save_field(self) < 0)
641 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000642 self->state = START_FIELD;
643 }
644 else {
645 /* normal character - save in field */
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000646 if (parse_add_char(self, c) < 0)
647 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000648 }
649 break;
650
651 case IN_QUOTED_FIELD:
652 /* in quoted field */
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000653 if (c == '\0')
654 ;
Skip Montanarob4a04172003-03-20 23:29:12 +0000655 else if (c == dialect->escapechar) {
656 /* Possible escape character */
657 self->state = ESCAPE_IN_QUOTED_FIELD;
658 }
Andrew McNamara1196cf12005-01-07 04:42:45 +0000659 else if (c == dialect->quotechar &&
660 dialect->quoting != QUOTE_NONE) {
Skip Montanarob4a04172003-03-20 23:29:12 +0000661 if (dialect->doublequote) {
662 /* doublequote; " represented by "" */
663 self->state = QUOTE_IN_QUOTED_FIELD;
664 }
665 else {
666 /* end of quote part of field */
667 self->state = IN_FIELD;
668 }
669 }
670 else {
671 /* normal character - save in field */
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000672 if (parse_add_char(self, c) < 0)
673 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000674 }
675 break;
676
677 case ESCAPE_IN_QUOTED_FIELD:
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000678 if (c == '\0')
679 c = '\n';
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000680 if (parse_add_char(self, c) < 0)
681 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000682 self->state = IN_QUOTED_FIELD;
683 break;
684
685 case QUOTE_IN_QUOTED_FIELD:
686 /* doublequote - seen a quote in an quoted field */
687 if (dialect->quoting != QUOTE_NONE &&
688 c == dialect->quotechar) {
689 /* save "" as " */
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000690 if (parse_add_char(self, c) < 0)
691 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000692 self->state = IN_QUOTED_FIELD;
693 }
694 else if (c == dialect->delimiter) {
695 /* save field - wait for new field */
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000696 if (parse_save_field(self) < 0)
697 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000698 self->state = START_FIELD;
699 }
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000700 else if (c == '\n' || c == '\r' || c == '\0') {
Skip Montanarob4a04172003-03-20 23:29:12 +0000701 /* end of line - return [fields] */
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000702 if (parse_save_field(self) < 0)
703 return -1;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000704 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
Skip Montanarob4a04172003-03-20 23:29:12 +0000705 }
706 else if (!dialect->strict) {
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000707 if (parse_add_char(self, c) < 0)
708 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000709 self->state = IN_FIELD;
710 }
711 else {
712 /* illegal */
Andrew McNamara5cfd8372005-01-12 11:39:50 +0000713 PyErr_Format(error_obj, "'%c' expected after '%c'",
Skip Montanarob4a04172003-03-20 23:29:12 +0000714 dialect->delimiter,
715 dialect->quotechar);
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000716 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000717 }
718 break;
719
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000720 case EAT_CRNL:
721 if (c == '\n' || c == '\r')
722 ;
723 else if (c == '\0')
724 self->state = START_RECORD;
725 else {
726 PyErr_Format(error_obj, "new-line character seen in unquoted field - do you need to open the file in universal-newline mode?");
727 return -1;
728 }
729 break;
730
Skip Montanarob4a04172003-03-20 23:29:12 +0000731 }
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000732 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000733}
734
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000735static int
736parse_reset(ReaderObj *self)
737{
738 Py_XDECREF(self->fields);
739 self->fields = PyList_New(0);
740 if (self->fields == NULL)
741 return -1;
742 self->field_len = 0;
743 self->state = START_RECORD;
744 self->numeric_field = 0;
745 return 0;
746}
Skip Montanarob4a04172003-03-20 23:29:12 +0000747
748static PyObject *
Skip Montanarob4a04172003-03-20 23:29:12 +0000749Reader_iternext(ReaderObj *self)
750{
751 PyObject *lineobj;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000752 PyObject *fields = NULL;
Guido van Rossum46264582007-08-06 19:32:18 +0000753 Py_UNICODE *line, c;
754 Py_ssize_t linelen;
Skip Montanarob4a04172003-03-20 23:29:12 +0000755
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000756 if (parse_reset(self) < 0)
757 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000758 do {
759 lineobj = PyIter_Next(self->input_iter);
760 if (lineobj == NULL) {
761 /* End of input OR exception */
762 if (!PyErr_Occurred() && self->field_len != 0)
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000763 PyErr_Format(error_obj,
764 "newline inside string");
Skip Montanarob4a04172003-03-20 23:29:12 +0000765 return NULL;
766 }
Georg Brandlf5192612007-11-25 00:52:21 +0000767 if (!PyUnicode_Check(lineobj)) {
Georg Brandl1c280ab2007-11-27 20:40:22 +0000768 PyErr_Format(error_obj,
769 "iterator should return strings, "
770 "not %.200s "
771 "(did you open the file in text mode?)",
Amaury Forgeot d'Arc10c476d2007-11-19 21:20:21 +0000772 lineobj->ob_type->tp_name
773 );
Georg Brandlf5192612007-11-25 00:52:21 +0000774 Py_DECREF(lineobj);
Amaury Forgeot d'Arc10c476d2007-11-19 21:20:21 +0000775 return NULL;
776 }
Guido van Rossum46264582007-08-06 19:32:18 +0000777 ++self->line_num;
778 line = PyUnicode_AsUnicode(lineobj);
779 linelen = PyUnicode_GetSize(lineobj);
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000780 if (line == NULL || linelen < 0) {
Skip Montanarob4a04172003-03-20 23:29:12 +0000781 Py_DECREF(lineobj);
782 return NULL;
783 }
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000784 while (linelen--) {
785 c = *line++;
786 if (c == '\0') {
787 Py_DECREF(lineobj);
788 PyErr_Format(error_obj,
789 "line contains NULL byte");
790 goto err;
791 }
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000792 if (parse_process_char(self, c) < 0) {
793 Py_DECREF(lineobj);
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000794 goto err;
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000795 }
796 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000797 Py_DECREF(lineobj);
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000798 if (parse_process_char(self, 0) < 0)
799 goto err;
Skip Montanarob4a04172003-03-20 23:29:12 +0000800 } while (self->state != START_RECORD);
801
802 fields = self->fields;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000803 self->fields = NULL;
804err:
Skip Montanarob4a04172003-03-20 23:29:12 +0000805 return fields;
806}
807
808static void
809Reader_dealloc(ReaderObj *self)
810{
Andrew McNamara77ead872005-01-10 02:09:41 +0000811 PyObject_GC_UnTrack(self);
Skip Montanarob4a04172003-03-20 23:29:12 +0000812 Py_XDECREF(self->dialect);
813 Py_XDECREF(self->input_iter);
814 Py_XDECREF(self->fields);
Andrew McNamaradcfb38c2003-06-09 05:59:23 +0000815 if (self->field != NULL)
816 PyMem_Free(self->field);
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000817 PyObject_GC_Del(self);
818}
819
820static int
821Reader_traverse(ReaderObj *self, visitproc visit, void *arg)
822{
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000823 Py_VISIT(self->dialect);
824 Py_VISIT(self->input_iter);
825 Py_VISIT(self->fields);
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000826 return 0;
827}
828
829static int
830Reader_clear(ReaderObj *self)
831{
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000832 Py_CLEAR(self->dialect);
833 Py_CLEAR(self->input_iter);
834 Py_CLEAR(self->fields);
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000835 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000836}
837
838PyDoc_STRVAR(Reader_Type_doc,
839"CSV reader\n"
840"\n"
841"Reader objects are responsible for reading and parsing tabular data\n"
842"in CSV format.\n"
843);
844
845static struct PyMethodDef Reader_methods[] = {
846 { NULL, NULL }
847};
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000848#define R_OFF(x) offsetof(ReaderObj, x)
849
850static struct PyMemberDef Reader_memberlist[] = {
Guido van Rossum33d26892007-08-05 15:29:28 +0000851 { "dialect", T_OBJECT, R_OFF(dialect), READONLY },
852 { "line_num", T_ULONG, R_OFF(line_num), READONLY },
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000853 { NULL }
854};
855
Skip Montanarob4a04172003-03-20 23:29:12 +0000856
857static PyTypeObject Reader_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000858 PyVarObject_HEAD_INIT(NULL, 0)
Skip Montanarob4a04172003-03-20 23:29:12 +0000859 "_csv.reader", /*tp_name*/
860 sizeof(ReaderObj), /*tp_basicsize*/
861 0, /*tp_itemsize*/
862 /* methods */
863 (destructor)Reader_dealloc, /*tp_dealloc*/
864 (printfunc)0, /*tp_print*/
865 (getattrfunc)0, /*tp_getattr*/
866 (setattrfunc)0, /*tp_setattr*/
867 (cmpfunc)0, /*tp_compare*/
868 (reprfunc)0, /*tp_repr*/
869 0, /*tp_as_number*/
870 0, /*tp_as_sequence*/
871 0, /*tp_as_mapping*/
872 (hashfunc)0, /*tp_hash*/
873 (ternaryfunc)0, /*tp_call*/
874 (reprfunc)0, /*tp_str*/
875 0, /*tp_getattro*/
876 0, /*tp_setattro*/
877 0, /*tp_as_buffer*/
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000878 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
879 Py_TPFLAGS_HAVE_GC, /*tp_flags*/
Skip Montanarob4a04172003-03-20 23:29:12 +0000880 Reader_Type_doc, /*tp_doc*/
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000881 (traverseproc)Reader_traverse, /*tp_traverse*/
882 (inquiry)Reader_clear, /*tp_clear*/
Skip Montanarob4a04172003-03-20 23:29:12 +0000883 0, /*tp_richcompare*/
884 0, /*tp_weaklistoffset*/
Andrew McNamara575a00b2005-01-06 02:25:41 +0000885 PyObject_SelfIter, /*tp_iter*/
Skip Montanarob4a04172003-03-20 23:29:12 +0000886 (getiterfunc)Reader_iternext, /*tp_iternext*/
887 Reader_methods, /*tp_methods*/
888 Reader_memberlist, /*tp_members*/
889 0, /*tp_getset*/
890
891};
892
893static PyObject *
894csv_reader(PyObject *module, PyObject *args, PyObject *keyword_args)
895{
Andrew McNamara91b97462005-01-11 01:07:23 +0000896 PyObject * iterator, * dialect = NULL;
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000897 ReaderObj * self = PyObject_GC_New(ReaderObj, &Reader_Type);
Skip Montanarob4a04172003-03-20 23:29:12 +0000898
899 if (!self)
900 return NULL;
901
902 self->dialect = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000903 self->fields = NULL;
904 self->input_iter = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000905 self->field = NULL;
906 self->field_size = 0;
Andrew McNamara7f2053e2005-01-12 11:17:16 +0000907 self->line_num = 0;
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000908
909 if (parse_reset(self) < 0) {
910 Py_DECREF(self);
911 return NULL;
912 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000913
Raymond Hettinger1761a7c2004-06-20 04:23:19 +0000914 if (!PyArg_UnpackTuple(args, "", 1, 2, &iterator, &dialect)) {
Skip Montanarob4a04172003-03-20 23:29:12 +0000915 Py_DECREF(self);
916 return NULL;
917 }
918 self->input_iter = PyObject_GetIter(iterator);
919 if (self->input_iter == NULL) {
920 PyErr_SetString(PyExc_TypeError,
921 "argument 1 must be an iterator");
922 Py_DECREF(self);
923 return NULL;
924 }
Andrew McNamara91b97462005-01-11 01:07:23 +0000925 self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args);
Skip Montanarob4a04172003-03-20 23:29:12 +0000926 if (self->dialect == NULL) {
927 Py_DECREF(self);
928 return NULL;
929 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000930
Andrew McNamara77ead872005-01-10 02:09:41 +0000931 PyObject_GC_Track(self);
Skip Montanarob4a04172003-03-20 23:29:12 +0000932 return (PyObject *)self;
933}
934
935/*
936 * WRITER
937 */
938/* ---------------------------------------------------------------- */
939static void
940join_reset(WriterObj *self)
941{
942 self->rec_len = 0;
943 self->num_fields = 0;
944}
945
946#define MEM_INCR 32768
947
948/* Calculate new record length or append field to record. Return new
949 * record length.
950 */
951static int
Guido van Rossum46264582007-08-06 19:32:18 +0000952join_append_data(WriterObj *self, Py_UNICODE *field, int quote_empty,
953 int *quoted, int copy_phase)
Skip Montanarob4a04172003-03-20 23:29:12 +0000954{
955 DialectObj *dialect = self->dialect;
Guido van Rossum46264582007-08-06 19:32:18 +0000956 int i;
957 int rec_len;
958 Py_UNICODE *lineterm;
Andrew McNamarac89f2842005-01-12 07:44:42 +0000959
960#define ADDCH(c) \
961 do {\
962 if (copy_phase) \
963 self->rec[rec_len] = c;\
964 rec_len++;\
965 } while(0)
966
Guido van Rossum46264582007-08-06 19:32:18 +0000967 lineterm = PyUnicode_AsUnicode(dialect->lineterminator);
Andrew McNamarac89f2842005-01-12 07:44:42 +0000968 if (lineterm == NULL)
969 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000970
971 rec_len = self->rec_len;
972
Andrew McNamarac89f2842005-01-12 07:44:42 +0000973 /* If this is not the first field we need a field separator */
974 if (self->num_fields > 0)
975 ADDCH(dialect->delimiter);
976
977 /* Handle preceding quote */
978 if (copy_phase && *quoted)
979 ADDCH(dialect->quotechar);
980
981 /* Copy/count field data */
Guido van Rossum46264582007-08-06 19:32:18 +0000982 /* If field is null just pass over */
983 for (i = 0; field; i++) {
984 Py_UNICODE c = field[i];
Andrew McNamarac89f2842005-01-12 07:44:42 +0000985 int want_escape = 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000986
987 if (c == '\0')
988 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000989
Andrew McNamarac89f2842005-01-12 07:44:42 +0000990 if (c == dialect->delimiter ||
991 c == dialect->escapechar ||
Guido van Rossum46264582007-08-06 19:32:18 +0000992 c == dialect->quotechar ||
993 Py_UNICODE_strchr(lineterm, c)) {
Andrew McNamarac89f2842005-01-12 07:44:42 +0000994 if (dialect->quoting == QUOTE_NONE)
995 want_escape = 1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000996 else {
Andrew McNamarac89f2842005-01-12 07:44:42 +0000997 if (c == dialect->quotechar) {
998 if (dialect->doublequote)
999 ADDCH(dialect->quotechar);
1000 else
1001 want_escape = 1;
1002 }
1003 if (!want_escape)
1004 *quoted = 1;
1005 }
1006 if (want_escape) {
1007 if (!dialect->escapechar) {
1008 PyErr_Format(error_obj,
1009 "need to escape, but no escapechar set");
1010 return -1;
1011 }
1012 ADDCH(dialect->escapechar);
Skip Montanarob4a04172003-03-20 23:29:12 +00001013 }
1014 }
1015 /* Copy field character into record buffer.
1016 */
Andrew McNamarac89f2842005-01-12 07:44:42 +00001017 ADDCH(c);
Skip Montanarob4a04172003-03-20 23:29:12 +00001018 }
1019
1020 /* If field is empty check if it needs to be quoted.
1021 */
1022 if (i == 0 && quote_empty) {
1023 if (dialect->quoting == QUOTE_NONE) {
1024 PyErr_Format(error_obj,
Guido van Rossum46264582007-08-06 19:32:18 +00001025 "single empty field record must be quoted");
Skip Montanarob4a04172003-03-20 23:29:12 +00001026 return -1;
Andrew McNamaradd3e6cb2005-01-07 06:46:50 +00001027 }
1028 else
Skip Montanarob4a04172003-03-20 23:29:12 +00001029 *quoted = 1;
1030 }
1031
Skip Montanarob4a04172003-03-20 23:29:12 +00001032 if (*quoted) {
1033 if (copy_phase)
Andrew McNamarac89f2842005-01-12 07:44:42 +00001034 ADDCH(dialect->quotechar);
Skip Montanarob4a04172003-03-20 23:29:12 +00001035 else
Andrew McNamarac89f2842005-01-12 07:44:42 +00001036 rec_len += 2;
Skip Montanarob4a04172003-03-20 23:29:12 +00001037 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001038 return rec_len;
Andrew McNamarac89f2842005-01-12 07:44:42 +00001039#undef ADDCH
Skip Montanarob4a04172003-03-20 23:29:12 +00001040}
1041
1042static int
1043join_check_rec_size(WriterObj *self, int rec_len)
1044{
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +00001045
1046 if (rec_len < 0 || rec_len > INT_MAX - MEM_INCR) {
1047 PyErr_NoMemory();
1048 return 0;
1049 }
1050
Skip Montanarob4a04172003-03-20 23:29:12 +00001051 if (rec_len > self->rec_size) {
1052 if (self->rec_size == 0) {
1053 self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR;
Andrew McNamaradcfb38c2003-06-09 05:59:23 +00001054 if (self->rec != NULL)
1055 PyMem_Free(self->rec);
Guido van Rossum46264582007-08-06 19:32:18 +00001056 self->rec = PyMem_New(Py_UNICODE, self->rec_size);
Skip Montanarob4a04172003-03-20 23:29:12 +00001057 }
1058 else {
Guido van Rossum46264582007-08-06 19:32:18 +00001059 Py_UNICODE* old_rec = self->rec;
Skip Montanarob4a04172003-03-20 23:29:12 +00001060
1061 self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR;
Guido van Rossum46264582007-08-06 19:32:18 +00001062 self->rec = PyMem_Resize(self->rec, Py_UNICODE,
1063 self->rec_size);
Skip Montanarob4a04172003-03-20 23:29:12 +00001064 if (self->rec == NULL)
1065 PyMem_Free(old_rec);
1066 }
1067 if (self->rec == NULL) {
1068 PyErr_NoMemory();
1069 return 0;
1070 }
1071 }
1072 return 1;
1073}
1074
1075static int
Guido van Rossum46264582007-08-06 19:32:18 +00001076join_append(WriterObj *self, Py_UNICODE *field, int *quoted, int quote_empty)
Skip Montanarob4a04172003-03-20 23:29:12 +00001077{
1078 int rec_len;
1079
1080 rec_len = join_append_data(self, field, quote_empty, quoted, 0);
1081 if (rec_len < 0)
1082 return 0;
1083
1084 /* grow record buffer if necessary */
1085 if (!join_check_rec_size(self, rec_len))
1086 return 0;
1087
1088 self->rec_len = join_append_data(self, field, quote_empty, quoted, 1);
1089 self->num_fields++;
1090
1091 return 1;
1092}
1093
1094static int
1095join_append_lineterminator(WriterObj *self)
1096{
1097 int terminator_len;
Guido van Rossum46264582007-08-06 19:32:18 +00001098 Py_UNICODE *terminator;
Skip Montanarob4a04172003-03-20 23:29:12 +00001099
Guido van Rossum46264582007-08-06 19:32:18 +00001100 terminator_len = PyUnicode_GetSize(self->dialect->lineterminator);
Thomas Wouters477c8d52006-05-27 19:21:47 +00001101 if (terminator_len == -1)
1102 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001103
1104 /* grow record buffer if necessary */
1105 if (!join_check_rec_size(self, self->rec_len + terminator_len))
1106 return 0;
1107
Guido van Rossum46264582007-08-06 19:32:18 +00001108 terminator = PyUnicode_AsUnicode(self->dialect->lineterminator);
Andrew McNamaracf0fd5a2005-01-12 01:16:35 +00001109 if (terminator == NULL)
1110 return 0;
Guido van Rossum46264582007-08-06 19:32:18 +00001111 memmove(self->rec + self->rec_len, terminator,
1112 sizeof(Py_UNICODE)*terminator_len);
Skip Montanarob4a04172003-03-20 23:29:12 +00001113 self->rec_len += terminator_len;
1114
1115 return 1;
1116}
1117
1118PyDoc_STRVAR(csv_writerow_doc,
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001119"writerow(sequence)\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001120"\n"
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001121"Construct and write a CSV record from a sequence of fields. Non-string\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001122"elements will be converted to string.");
1123
1124static PyObject *
1125csv_writerow(WriterObj *self, PyObject *seq)
1126{
1127 DialectObj *dialect = self->dialect;
1128 int len, i;
1129
1130 if (!PySequence_Check(seq))
1131 return PyErr_Format(error_obj, "sequence expected");
1132
1133 len = PySequence_Length(seq);
1134 if (len < 0)
1135 return NULL;
1136
1137 /* Join all fields in internal buffer.
1138 */
1139 join_reset(self);
1140 for (i = 0; i < len; i++) {
1141 PyObject *field;
1142 int append_ok;
1143 int quoted;
1144
1145 field = PySequence_GetItem(seq, i);
1146 if (field == NULL)
1147 return NULL;
1148
Andrew McNamarac89f2842005-01-12 07:44:42 +00001149 switch (dialect->quoting) {
1150 case QUOTE_NONNUMERIC:
1151 quoted = !PyNumber_Check(field);
1152 break;
1153 case QUOTE_ALL:
1154 quoted = 1;
1155 break;
1156 default:
1157 quoted = 0;
1158 break;
Skip Montanarob4a04172003-03-20 23:29:12 +00001159 }
1160
Guido van Rossum46264582007-08-06 19:32:18 +00001161 if (PyUnicode_Check(field)) {
Skip Montanaro577c7a72003-04-12 19:17:14 +00001162 append_ok = join_append(self,
Guido van Rossum46264582007-08-06 19:32:18 +00001163 PyUnicode_AS_UNICODE(field),
1164 &quoted, len == 1);
Skip Montanarob4a04172003-03-20 23:29:12 +00001165 Py_DECREF(field);
1166 }
1167 else if (field == Py_None) {
Guido van Rossum46264582007-08-06 19:32:18 +00001168 append_ok = join_append(self, NULL,
1169 &quoted, len == 1);
Skip Montanarob4a04172003-03-20 23:29:12 +00001170 Py_DECREF(field);
1171 }
1172 else {
1173 PyObject *str;
1174
Thomas Heller519a0422007-11-15 20:48:54 +00001175 str = PyObject_Str(field);
Guido van Rossum46264582007-08-06 19:32:18 +00001176 Py_DECREF(field);
Skip Montanarob4a04172003-03-20 23:29:12 +00001177 if (str == NULL)
1178 return NULL;
Guido van Rossum46264582007-08-06 19:32:18 +00001179 append_ok = join_append(self,
1180 PyUnicode_AS_UNICODE(str),
1181 &quoted, len == 1);
Skip Montanarob4a04172003-03-20 23:29:12 +00001182 Py_DECREF(str);
1183 }
1184 if (!append_ok)
1185 return NULL;
1186 }
1187
1188 /* Add line terminator.
1189 */
1190 if (!join_append_lineterminator(self))
1191 return 0;
1192
Guido van Rossum46264582007-08-06 19:32:18 +00001193 return PyObject_CallFunction(self->writeline,
1194 "(u#)", self->rec,
1195 self->rec_len);
Skip Montanarob4a04172003-03-20 23:29:12 +00001196}
1197
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001198PyDoc_STRVAR(csv_writerows_doc,
1199"writerows(sequence of sequences)\n"
1200"\n"
1201"Construct and write a series of sequences to a csv file. Non-string\n"
1202"elements will be converted to string.");
1203
Skip Montanarob4a04172003-03-20 23:29:12 +00001204static PyObject *
1205csv_writerows(WriterObj *self, PyObject *seqseq)
1206{
1207 PyObject *row_iter, *row_obj, *result;
1208
1209 row_iter = PyObject_GetIter(seqseq);
1210 if (row_iter == NULL) {
1211 PyErr_SetString(PyExc_TypeError,
Skip Montanaro98f16e02003-04-11 23:10:13 +00001212 "writerows() argument must be iterable");
Skip Montanarob4a04172003-03-20 23:29:12 +00001213 return NULL;
1214 }
1215 while ((row_obj = PyIter_Next(row_iter))) {
1216 result = csv_writerow(self, row_obj);
1217 Py_DECREF(row_obj);
1218 if (!result) {
1219 Py_DECREF(row_iter);
1220 return NULL;
1221 }
1222 else
1223 Py_DECREF(result);
1224 }
1225 Py_DECREF(row_iter);
1226 if (PyErr_Occurred())
1227 return NULL;
1228 Py_INCREF(Py_None);
1229 return Py_None;
1230}
1231
1232static struct PyMethodDef Writer_methods[] = {
1233 { "writerow", (PyCFunction)csv_writerow, METH_O, csv_writerow_doc},
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001234 { "writerows", (PyCFunction)csv_writerows, METH_O, csv_writerows_doc},
Skip Montanarob4a04172003-03-20 23:29:12 +00001235 { NULL, NULL }
1236};
1237
1238#define W_OFF(x) offsetof(WriterObj, x)
1239
1240static struct PyMemberDef Writer_memberlist[] = {
Guido van Rossum33d26892007-08-05 15:29:28 +00001241 { "dialect", T_OBJECT, W_OFF(dialect), READONLY },
Skip Montanarob4a04172003-03-20 23:29:12 +00001242 { NULL }
1243};
1244
1245static void
1246Writer_dealloc(WriterObj *self)
1247{
Andrew McNamara77ead872005-01-10 02:09:41 +00001248 PyObject_GC_UnTrack(self);
Skip Montanarob4a04172003-03-20 23:29:12 +00001249 Py_XDECREF(self->dialect);
1250 Py_XDECREF(self->writeline);
Andrew McNamaradcfb38c2003-06-09 05:59:23 +00001251 if (self->rec != NULL)
1252 PyMem_Free(self->rec);
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001253 PyObject_GC_Del(self);
1254}
1255
1256static int
1257Writer_traverse(WriterObj *self, visitproc visit, void *arg)
1258{
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001259 Py_VISIT(self->dialect);
1260 Py_VISIT(self->writeline);
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001261 return 0;
1262}
1263
1264static int
1265Writer_clear(WriterObj *self)
1266{
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001267 Py_CLEAR(self->dialect);
1268 Py_CLEAR(self->writeline);
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001269 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001270}
1271
1272PyDoc_STRVAR(Writer_Type_doc,
1273"CSV writer\n"
1274"\n"
1275"Writer objects are responsible for generating tabular data\n"
1276"in CSV format from sequence input.\n"
1277);
1278
1279static PyTypeObject Writer_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001280 PyVarObject_HEAD_INIT(NULL, 0)
Skip Montanarob4a04172003-03-20 23:29:12 +00001281 "_csv.writer", /*tp_name*/
1282 sizeof(WriterObj), /*tp_basicsize*/
1283 0, /*tp_itemsize*/
1284 /* methods */
1285 (destructor)Writer_dealloc, /*tp_dealloc*/
1286 (printfunc)0, /*tp_print*/
1287 (getattrfunc)0, /*tp_getattr*/
1288 (setattrfunc)0, /*tp_setattr*/
1289 (cmpfunc)0, /*tp_compare*/
1290 (reprfunc)0, /*tp_repr*/
1291 0, /*tp_as_number*/
1292 0, /*tp_as_sequence*/
1293 0, /*tp_as_mapping*/
1294 (hashfunc)0, /*tp_hash*/
1295 (ternaryfunc)0, /*tp_call*/
1296 (reprfunc)0, /*tp_str*/
1297 0, /*tp_getattro*/
1298 0, /*tp_setattro*/
1299 0, /*tp_as_buffer*/
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001300 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
1301 Py_TPFLAGS_HAVE_GC, /*tp_flags*/
Skip Montanarob4a04172003-03-20 23:29:12 +00001302 Writer_Type_doc,
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001303 (traverseproc)Writer_traverse, /*tp_traverse*/
1304 (inquiry)Writer_clear, /*tp_clear*/
Skip Montanarob4a04172003-03-20 23:29:12 +00001305 0, /*tp_richcompare*/
1306 0, /*tp_weaklistoffset*/
1307 (getiterfunc)0, /*tp_iter*/
1308 (getiterfunc)0, /*tp_iternext*/
1309 Writer_methods, /*tp_methods*/
1310 Writer_memberlist, /*tp_members*/
1311 0, /*tp_getset*/
1312};
1313
1314static PyObject *
1315csv_writer(PyObject *module, PyObject *args, PyObject *keyword_args)
1316{
Andrew McNamara91b97462005-01-11 01:07:23 +00001317 PyObject * output_file, * dialect = NULL;
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001318 WriterObj * self = PyObject_GC_New(WriterObj, &Writer_Type);
Skip Montanarob4a04172003-03-20 23:29:12 +00001319
1320 if (!self)
1321 return NULL;
1322
1323 self->dialect = NULL;
1324 self->writeline = NULL;
1325
1326 self->rec = NULL;
1327 self->rec_size = 0;
1328 self->rec_len = 0;
1329 self->num_fields = 0;
1330
Raymond Hettinger1761a7c2004-06-20 04:23:19 +00001331 if (!PyArg_UnpackTuple(args, "", 1, 2, &output_file, &dialect)) {
Skip Montanarob4a04172003-03-20 23:29:12 +00001332 Py_DECREF(self);
1333 return NULL;
1334 }
1335 self->writeline = PyObject_GetAttrString(output_file, "write");
1336 if (self->writeline == NULL || !PyCallable_Check(self->writeline)) {
1337 PyErr_SetString(PyExc_TypeError,
Andrew McNamara5cfd8372005-01-12 11:39:50 +00001338 "argument 1 must have a \"write\" method");
Skip Montanarob4a04172003-03-20 23:29:12 +00001339 Py_DECREF(self);
1340 return NULL;
1341 }
Andrew McNamara91b97462005-01-11 01:07:23 +00001342 self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args);
Skip Montanarob4a04172003-03-20 23:29:12 +00001343 if (self->dialect == NULL) {
1344 Py_DECREF(self);
1345 return NULL;
1346 }
Andrew McNamara77ead872005-01-10 02:09:41 +00001347 PyObject_GC_Track(self);
Skip Montanarob4a04172003-03-20 23:29:12 +00001348 return (PyObject *)self;
1349}
1350
1351/*
1352 * DIALECT REGISTRY
1353 */
1354static PyObject *
1355csv_list_dialects(PyObject *module, PyObject *args)
1356{
1357 return PyDict_Keys(dialects);
1358}
1359
1360static PyObject *
Andrew McNamara86625972005-01-11 01:28:33 +00001361csv_register_dialect(PyObject *module, PyObject *args, PyObject *kwargs)
Skip Montanarob4a04172003-03-20 23:29:12 +00001362{
Andrew McNamara86625972005-01-11 01:28:33 +00001363 PyObject *name_obj, *dialect_obj = NULL;
1364 PyObject *dialect;
Skip Montanarob4a04172003-03-20 23:29:12 +00001365
Andrew McNamara86625972005-01-11 01:28:33 +00001366 if (!PyArg_UnpackTuple(args, "", 1, 2, &name_obj, &dialect_obj))
Skip Montanarob4a04172003-03-20 23:29:12 +00001367 return NULL;
Andrew McNamara37d2bdf2005-01-10 12:22:48 +00001368 if (!IS_BASESTRING(name_obj)) {
Skip Montanarob4a04172003-03-20 23:29:12 +00001369 PyErr_SetString(PyExc_TypeError,
1370 "dialect name must be a string or unicode");
1371 return NULL;
1372 }
Andrew McNamara86625972005-01-11 01:28:33 +00001373 dialect = _call_dialect(dialect_obj, kwargs);
1374 if (dialect == NULL)
1375 return NULL;
1376 if (PyDict_SetItem(dialects, name_obj, dialect) < 0) {
1377 Py_DECREF(dialect);
Skip Montanarob4a04172003-03-20 23:29:12 +00001378 return NULL;
1379 }
Andrew McNamara86625972005-01-11 01:28:33 +00001380 Py_DECREF(dialect);
Skip Montanarob4a04172003-03-20 23:29:12 +00001381 Py_INCREF(Py_None);
1382 return Py_None;
1383}
1384
1385static PyObject *
Skip Montanaro577c7a72003-04-12 19:17:14 +00001386csv_unregister_dialect(PyObject *module, PyObject *name_obj)
Skip Montanarob4a04172003-03-20 23:29:12 +00001387{
Skip Montanarob4a04172003-03-20 23:29:12 +00001388 if (PyDict_DelItem(dialects, name_obj) < 0)
1389 return PyErr_Format(error_obj, "unknown dialect");
1390 Py_INCREF(Py_None);
1391 return Py_None;
1392}
1393
1394static PyObject *
Skip Montanaro577c7a72003-04-12 19:17:14 +00001395csv_get_dialect(PyObject *module, PyObject *name_obj)
Skip Montanarob4a04172003-03-20 23:29:12 +00001396{
Skip Montanarob4a04172003-03-20 23:29:12 +00001397 return get_dialect_from_registry(name_obj);
1398}
1399
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001400static PyObject *
Andrew McNamara31d88962005-01-12 03:45:10 +00001401csv_field_size_limit(PyObject *module, PyObject *args)
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001402{
1403 PyObject *new_limit = NULL;
1404 long old_limit = field_limit;
1405
Andrew McNamara31d88962005-01-12 03:45:10 +00001406 if (!PyArg_UnpackTuple(args, "field_size_limit", 0, 1, &new_limit))
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001407 return NULL;
1408 if (new_limit != NULL) {
Martin v. Löwisd1a1d1e2007-12-04 22:10:37 +00001409 if (!PyLong_CheckExact(new_limit)) {
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001410 PyErr_Format(PyExc_TypeError,
1411 "limit must be an integer");
1412 return NULL;
1413 }
Christian Heimes217cfd12007-12-02 14:31:20 +00001414 field_limit = PyLong_AsLong(new_limit);
Martin v. Löwisd1a1d1e2007-12-04 22:10:37 +00001415 if (field_limit == -1 && PyErr_Occurred()) {
1416 field_limit = old_limit;
1417 return NULL;
1418 }
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001419 }
Christian Heimes217cfd12007-12-02 14:31:20 +00001420 return PyLong_FromLong(old_limit);
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001421}
1422
Skip Montanarob4a04172003-03-20 23:29:12 +00001423/*
1424 * MODULE
1425 */
1426
1427PyDoc_STRVAR(csv_module_doc,
1428"CSV parsing and writing.\n"
1429"\n"
1430"This module provides classes that assist in the reading and writing\n"
1431"of Comma Separated Value (CSV) files, and implements the interface\n"
1432"described by PEP 305. Although many CSV files are simple to parse,\n"
1433"the format is not formally defined by a stable specification and\n"
1434"is subtle enough that parsing lines of a CSV file with something\n"
1435"like line.split(\",\") is bound to fail. The module supports three\n"
1436"basic APIs: reading, writing, and registration of dialects.\n"
1437"\n"
1438"\n"
1439"DIALECT REGISTRATION:\n"
1440"\n"
1441"Readers and writers support a dialect argument, which is a convenient\n"
1442"handle on a group of settings. When the dialect argument is a string,\n"
1443"it identifies one of the dialects previously registered with the module.\n"
1444"If it is a class or instance, the attributes of the argument are used as\n"
1445"the settings for the reader or writer:\n"
1446"\n"
1447" class excel:\n"
1448" delimiter = ','\n"
1449" quotechar = '\"'\n"
1450" escapechar = None\n"
1451" doublequote = True\n"
1452" skipinitialspace = False\n"
Johannes Gijsbers8d3b9dd2004-08-15 12:23:10 +00001453" lineterminator = '\\r\\n'\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001454" quoting = QUOTE_MINIMAL\n"
1455"\n"
1456"SETTINGS:\n"
1457"\n"
1458" * quotechar - specifies a one-character string to use as the \n"
1459" quoting character. It defaults to '\"'.\n"
1460" * delimiter - specifies a one-character string to use as the \n"
1461" field separator. It defaults to ','.\n"
1462" * skipinitialspace - specifies how to interpret whitespace which\n"
1463" immediately follows a delimiter. It defaults to False, which\n"
1464" means that whitespace immediately following a delimiter is part\n"
1465" of the following field.\n"
1466" * lineterminator - specifies the character sequence which should \n"
1467" terminate rows.\n"
1468" * quoting - controls when quotes should be generated by the writer.\n"
1469" It can take on any of the following module constants:\n"
1470"\n"
1471" csv.QUOTE_MINIMAL means only when required, for example, when a\n"
1472" field contains either the quotechar or the delimiter\n"
1473" csv.QUOTE_ALL means that quotes are always placed around fields.\n"
1474" csv.QUOTE_NONNUMERIC means that quotes are always placed around\n"
Skip Montanaro148eb6a2003-12-02 18:57:47 +00001475" fields which do not parse as integers or floating point\n"
1476" numbers.\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001477" csv.QUOTE_NONE means that quotes are never placed around fields.\n"
1478" * escapechar - specifies a one-character string used to escape \n"
1479" the delimiter when quoting is set to QUOTE_NONE.\n"
1480" * doublequote - controls the handling of quotes inside fields. When\n"
1481" True, two consecutive quotes are interpreted as one during read,\n"
1482" and when writing, each quote character embedded in the data is\n"
1483" written as two quotes\n");
1484
1485PyDoc_STRVAR(csv_reader_doc,
1486" csv_reader = reader(iterable [, dialect='excel']\n"
1487" [optional keyword args])\n"
1488" for row in csv_reader:\n"
1489" process(row)\n"
1490"\n"
1491"The \"iterable\" argument can be any object that returns a line\n"
1492"of input for each iteration, such as a file object or a list. The\n"
1493"optional \"dialect\" parameter is discussed below. The function\n"
1494"also accepts optional keyword arguments which override settings\n"
1495"provided by the dialect.\n"
1496"\n"
1497"The returned object is an iterator. Each iteration returns a row\n"
Johannes Gijsbers8d3b9dd2004-08-15 12:23:10 +00001498"of the CSV file (which can span multiple input lines):\n");
Skip Montanarob4a04172003-03-20 23:29:12 +00001499
1500PyDoc_STRVAR(csv_writer_doc,
1501" csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1502" [optional keyword args])\n"
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001503" for row in sequence:\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001504" csv_writer.writerow(row)\n"
1505"\n"
1506" [or]\n"
1507"\n"
1508" csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1509" [optional keyword args])\n"
1510" csv_writer.writerows(rows)\n"
1511"\n"
1512"The \"fileobj\" argument can be any object that supports the file API.\n");
1513
1514PyDoc_STRVAR(csv_list_dialects_doc,
1515"Return a list of all know dialect names.\n"
1516" names = csv.list_dialects()");
1517
1518PyDoc_STRVAR(csv_get_dialect_doc,
1519"Return the dialect instance associated with name.\n"
1520" dialect = csv.get_dialect(name)");
1521
1522PyDoc_STRVAR(csv_register_dialect_doc,
1523"Create a mapping from a string name to a dialect class.\n"
1524" dialect = csv.register_dialect(name, dialect)");
1525
1526PyDoc_STRVAR(csv_unregister_dialect_doc,
1527"Delete the name/dialect mapping associated with a string name.\n"
1528" csv.unregister_dialect(name)");
1529
Andrew McNamara31d88962005-01-12 03:45:10 +00001530PyDoc_STRVAR(csv_field_size_limit_doc,
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001531"Sets an upper limit on parsed fields.\n"
Andrew McNamara31d88962005-01-12 03:45:10 +00001532" csv.field_size_limit([limit])\n"
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001533"\n"
1534"Returns old limit. If limit is not given, no new limit is set and\n"
1535"the old limit is returned");
1536
Skip Montanarob4a04172003-03-20 23:29:12 +00001537static struct PyMethodDef csv_methods[] = {
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001538 { "reader", (PyCFunction)csv_reader,
1539 METH_VARARGS | METH_KEYWORDS, csv_reader_doc},
1540 { "writer", (PyCFunction)csv_writer,
1541 METH_VARARGS | METH_KEYWORDS, csv_writer_doc},
1542 { "list_dialects", (PyCFunction)csv_list_dialects,
1543 METH_NOARGS, csv_list_dialects_doc},
1544 { "register_dialect", (PyCFunction)csv_register_dialect,
Andrew McNamara86625972005-01-11 01:28:33 +00001545 METH_VARARGS | METH_KEYWORDS, csv_register_dialect_doc},
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001546 { "unregister_dialect", (PyCFunction)csv_unregister_dialect,
1547 METH_O, csv_unregister_dialect_doc},
1548 { "get_dialect", (PyCFunction)csv_get_dialect,
1549 METH_O, csv_get_dialect_doc},
Andrew McNamara31d88962005-01-12 03:45:10 +00001550 { "field_size_limit", (PyCFunction)csv_field_size_limit,
1551 METH_VARARGS, csv_field_size_limit_doc},
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001552 { NULL, NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +00001553};
1554
Martin v. Löwis1a214512008-06-11 05:26:20 +00001555
1556static struct PyModuleDef _csvmodule = {
1557 PyModuleDef_HEAD_INIT,
1558 "_csv",
1559 csv_module_doc,
1560 -1,
1561 csv_methods,
1562 NULL,
1563 NULL,
1564 NULL,
1565 NULL
1566};
1567
Skip Montanarob4a04172003-03-20 23:29:12 +00001568PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00001569PyInit__csv(void)
Skip Montanarob4a04172003-03-20 23:29:12 +00001570{
1571 PyObject *module;
Skip Montanarob4a04172003-03-20 23:29:12 +00001572 StyleDesc *style;
1573
1574 if (PyType_Ready(&Dialect_Type) < 0)
Martin v. Löwis1a214512008-06-11 05:26:20 +00001575 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001576
1577 if (PyType_Ready(&Reader_Type) < 0)
Martin v. Löwis1a214512008-06-11 05:26:20 +00001578 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001579
1580 if (PyType_Ready(&Writer_Type) < 0)
Martin v. Löwis1a214512008-06-11 05:26:20 +00001581 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001582
1583 /* Create the module and add the functions */
Martin v. Löwis1a214512008-06-11 05:26:20 +00001584 module = PyModule_Create(&_csvmodule);
Skip Montanarob4a04172003-03-20 23:29:12 +00001585 if (module == NULL)
Martin v. Löwis1a214512008-06-11 05:26:20 +00001586 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001587
1588 /* Add version to the module. */
Skip Montanaro7b01a832003-04-12 19:23:46 +00001589 if (PyModule_AddStringConstant(module, "__version__",
1590 MODULE_VERSION) == -1)
Martin v. Löwis1a214512008-06-11 05:26:20 +00001591 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001592
1593 /* Add _dialects dictionary */
1594 dialects = PyDict_New();
1595 if (dialects == NULL)
Martin v. Löwis1a214512008-06-11 05:26:20 +00001596 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001597 if (PyModule_AddObject(module, "_dialects", dialects))
Martin v. Löwis1a214512008-06-11 05:26:20 +00001598 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001599
1600 /* Add quote styles into dictionary */
1601 for (style = quote_styles; style->name; style++) {
Skip Montanaro7b01a832003-04-12 19:23:46 +00001602 if (PyModule_AddIntConstant(module, style->name,
1603 style->style) == -1)
Martin v. Löwis1a214512008-06-11 05:26:20 +00001604 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001605 }
1606
1607 /* Add the Dialect type */
Skip Montanaro32c5d422005-06-15 13:35:08 +00001608 Py_INCREF(&Dialect_Type);
Skip Montanarob4a04172003-03-20 23:29:12 +00001609 if (PyModule_AddObject(module, "Dialect", (PyObject *)&Dialect_Type))
Martin v. Löwis1a214512008-06-11 05:26:20 +00001610 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001611
1612 /* Add the CSV exception object to the module. */
1613 error_obj = PyErr_NewException("_csv.Error", NULL, NULL);
1614 if (error_obj == NULL)
Martin v. Löwis1a214512008-06-11 05:26:20 +00001615 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001616 PyModule_AddObject(module, "Error", error_obj);
Martin v. Löwis1a214512008-06-11 05:26:20 +00001617 return module;
Skip Montanarob4a04172003-03-20 23:29:12 +00001618}