blob: c30cea98d93b33f1459e547ed8753d8088d41f60 [file] [log] [blame]
Skip Montanaroa16b21f2003-03-23 14:32:54 +00001/* csv module */
2
3/*
4
5This module provides the low-level underpinnings of a CSV reading/writing
6module. Users should not use this module directly, but import the csv.py
7module instead.
8
9**** For people modifying this code, please note that as of this writing
Skip Montanarodfa35fa2003-04-11 21:40:01 +000010**** (2003-03-23), it is intended that this code should work with Python
Skip Montanaroa16b21f2003-03-23 14:32:54 +000011**** 2.2.
12
Skip Montanarob4a04172003-03-20 23:29:12 +000013*/
14
Skip Montanaro7b01a832003-04-12 19:23:46 +000015#define MODULE_VERSION "1.0"
16
Skip Montanarob4a04172003-03-20 23:29:12 +000017#include "Python.h"
18#include "structmember.h"
19
Andrew McNamara37d2bdf2005-01-10 12:22:48 +000020#define IS_BASESTRING(o) \
Guido van Rossum3172c5d2007-10-16 18:12:55 +000021 PyUnicode_Check(o)
Andrew McNamara37d2bdf2005-01-10 12:22:48 +000022
Skip Montanarob4a04172003-03-20 23:29:12 +000023static PyObject *error_obj; /* CSV exception */
24static PyObject *dialects; /* Dialect registry */
Andrew McNamarae4d05c42005-01-11 07:32:02 +000025static long field_limit = 128 * 1024; /* max parsed field size */
Skip Montanarob4a04172003-03-20 23:29:12 +000026
27typedef enum {
28 START_RECORD, START_FIELD, ESCAPED_CHAR, IN_FIELD,
Andrew McNamaraf69d94f2005-01-13 11:30:54 +000029 IN_QUOTED_FIELD, ESCAPE_IN_QUOTED_FIELD, QUOTE_IN_QUOTED_FIELD,
30 EAT_CRNL
Skip Montanarob4a04172003-03-20 23:29:12 +000031} ParserState;
32
33typedef enum {
34 QUOTE_MINIMAL, QUOTE_ALL, QUOTE_NONNUMERIC, QUOTE_NONE
35} QuoteStyle;
36
37typedef struct {
38 QuoteStyle style;
39 char *name;
40} StyleDesc;
41
42static StyleDesc quote_styles[] = {
43 { QUOTE_MINIMAL, "QUOTE_MINIMAL" },
44 { QUOTE_ALL, "QUOTE_ALL" },
45 { QUOTE_NONNUMERIC, "QUOTE_NONNUMERIC" },
46 { QUOTE_NONE, "QUOTE_NONE" },
47 { 0 }
48};
49
50typedef struct {
51 PyObject_HEAD
Guido van Rossum46264582007-08-06 19:32:18 +000052
Skip Montanarob4a04172003-03-20 23:29:12 +000053 int doublequote; /* is " represented by ""? */
Guido van Rossum46264582007-08-06 19:32:18 +000054 Py_UNICODE delimiter; /* field separator */
55 Py_UNICODE quotechar; /* quote character */
56 Py_UNICODE escapechar; /* escape character */
Skip Montanarob4a04172003-03-20 23:29:12 +000057 int skipinitialspace; /* ignore spaces following delimiter? */
58 PyObject *lineterminator; /* string to write between records */
Andrew McNamara1196cf12005-01-07 04:42:45 +000059 int quoting; /* style of quoting to write */
Skip Montanarob4a04172003-03-20 23:29:12 +000060
61 int strict; /* raise exception on bad CSV */
62} DialectObj;
63
Neal Norwitz227b5332006-03-22 09:28:35 +000064static PyTypeObject Dialect_Type;
Skip Montanarob4a04172003-03-20 23:29:12 +000065
66typedef struct {
67 PyObject_HEAD
68
69 PyObject *input_iter; /* iterate over this for input lines */
70
71 DialectObj *dialect; /* parsing dialect */
72
73 PyObject *fields; /* field list for current record */
74 ParserState state; /* current CSV parse state */
Guido van Rossum46264582007-08-06 19:32:18 +000075 Py_UNICODE *field; /* build current field in here */
Skip Montanarob4a04172003-03-20 23:29:12 +000076 int field_size; /* size of allocated buffer */
Guido van Rossum46264582007-08-06 19:32:18 +000077 Py_ssize_t field_len; /* length of current field */
Andrew McNamara0f0599d2005-01-12 09:45:18 +000078 int numeric_field; /* treat field as numeric */
Andrew McNamara7f2053e2005-01-12 11:17:16 +000079 unsigned long line_num; /* Source-file line number */
Skip Montanarob4a04172003-03-20 23:29:12 +000080} ReaderObj;
81
Neal Norwitz227b5332006-03-22 09:28:35 +000082static PyTypeObject Reader_Type;
Skip Montanarob4a04172003-03-20 23:29:12 +000083
Martin v. Löwis9f2e3462007-07-21 17:22:18 +000084#define ReaderObject_Check(v) (Py_Type(v) == &Reader_Type)
Skip Montanarob4a04172003-03-20 23:29:12 +000085
86typedef struct {
87 PyObject_HEAD
88
89 PyObject *writeline; /* write output lines to this file */
90
91 DialectObj *dialect; /* parsing dialect */
92
Guido van Rossum46264582007-08-06 19:32:18 +000093 Py_UNICODE *rec; /* buffer for parser.join */
Skip Montanarob4a04172003-03-20 23:29:12 +000094 int rec_size; /* size of allocated record */
Guido van Rossum46264582007-08-06 19:32:18 +000095 Py_ssize_t rec_len; /* length of record */
Skip Montanarob4a04172003-03-20 23:29:12 +000096 int num_fields; /* number of fields in record */
Guido van Rossum46264582007-08-06 19:32:18 +000097} WriterObj;
Skip Montanarob4a04172003-03-20 23:29:12 +000098
Neal Norwitz227b5332006-03-22 09:28:35 +000099static PyTypeObject Writer_Type;
Skip Montanarob4a04172003-03-20 23:29:12 +0000100
101/*
102 * DIALECT class
103 */
104
105static PyObject *
106get_dialect_from_registry(PyObject * name_obj)
107{
108 PyObject *dialect_obj;
109
110 dialect_obj = PyDict_GetItem(dialects, name_obj);
Andrew McNamaradbce2612005-01-10 23:17:35 +0000111 if (dialect_obj == NULL) {
112 if (!PyErr_Occurred())
113 PyErr_Format(error_obj, "unknown dialect");
114 }
115 else
116 Py_INCREF(dialect_obj);
Skip Montanarob4a04172003-03-20 23:29:12 +0000117 return dialect_obj;
118}
119
Skip Montanarob4a04172003-03-20 23:29:12 +0000120static PyObject *
121get_string(PyObject *str)
122{
123 Py_XINCREF(str);
124 return str;
125}
126
Skip Montanarob4a04172003-03-20 23:29:12 +0000127static PyObject *
Skip Montanaroe3b10f42007-08-06 20:55:47 +0000128get_nullchar_as_None(Py_UNICODE c)
Skip Montanarob4a04172003-03-20 23:29:12 +0000129{
130 if (c == '\0') {
131 Py_INCREF(Py_None);
132 return Py_None;
133 }
134 else
Skip Montanaroe3b10f42007-08-06 20:55:47 +0000135 return PyUnicode_FromUnicode((Py_UNICODE *)&c, 1);
Skip Montanarob4a04172003-03-20 23:29:12 +0000136}
137
Skip Montanarob4a04172003-03-20 23:29:12 +0000138static PyObject *
139Dialect_get_lineterminator(DialectObj *self)
140{
141 return get_string(self->lineterminator);
142}
143
Skip Montanarob4a04172003-03-20 23:29:12 +0000144static PyObject *
Guido van Rossuma9769c22007-08-07 23:59:30 +0000145Dialect_get_delimiter(DialectObj *self)
146{
147 return get_nullchar_as_None(self->delimiter);
148}
149
150static PyObject *
Skip Montanarob4a04172003-03-20 23:29:12 +0000151Dialect_get_escapechar(DialectObj *self)
152{
153 return get_nullchar_as_None(self->escapechar);
154}
155
Andrew McNamara1196cf12005-01-07 04:42:45 +0000156static PyObject *
157Dialect_get_quotechar(DialectObj *self)
Skip Montanarob4a04172003-03-20 23:29:12 +0000158{
Andrew McNamara1196cf12005-01-07 04:42:45 +0000159 return get_nullchar_as_None(self->quotechar);
Skip Montanarob4a04172003-03-20 23:29:12 +0000160}
161
162static PyObject *
163Dialect_get_quoting(DialectObj *self)
164{
Christian Heimes217cfd12007-12-02 14:31:20 +0000165 return PyLong_FromLong(self->quoting);
Skip Montanarob4a04172003-03-20 23:29:12 +0000166}
167
168static int
Andrew McNamara1196cf12005-01-07 04:42:45 +0000169_set_bool(const char *name, int *target, PyObject *src, int dflt)
Skip Montanarob4a04172003-03-20 23:29:12 +0000170{
Andrew McNamara1196cf12005-01-07 04:42:45 +0000171 if (src == NULL)
172 *target = dflt;
173 else
174 *target = PyObject_IsTrue(src);
175 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000176}
177
Andrew McNamara1196cf12005-01-07 04:42:45 +0000178static int
179_set_int(const char *name, int *target, PyObject *src, int dflt)
180{
181 if (src == NULL)
182 *target = dflt;
183 else {
Guido van Rossumddefaf32007-01-14 03:31:43 +0000184 if (!PyInt_CheckExact(src)) {
Andrew McNamara1196cf12005-01-07 04:42:45 +0000185 PyErr_Format(PyExc_TypeError,
186 "\"%s\" must be an integer", name);
187 return -1;
188 }
Christian Heimes217cfd12007-12-02 14:31:20 +0000189 *target = PyLong_AsLong(src);
Andrew McNamara1196cf12005-01-07 04:42:45 +0000190 }
191 return 0;
192}
193
194static int
Guido van Rossum46264582007-08-06 19:32:18 +0000195_set_char(const char *name, Py_UNICODE *target, PyObject *src, Py_UNICODE dflt)
Andrew McNamara1196cf12005-01-07 04:42:45 +0000196{
197 if (src == NULL)
198 *target = dflt;
199 else {
Guido van Rossumbce56a62007-05-10 18:04:33 +0000200 *target = '\0';
201 if (src != Py_None) {
Guido van Rossum46264582007-08-06 19:32:18 +0000202 Py_UNICODE *buf;
Guido van Rossumbce56a62007-05-10 18:04:33 +0000203 Py_ssize_t len;
Guido van Rossum46264582007-08-06 19:32:18 +0000204 buf = PyUnicode_AsUnicode(src);
205 len = PyUnicode_GetSize(src);
206 if (buf == NULL || len > 1) {
Guido van Rossumbce56a62007-05-10 18:04:33 +0000207 PyErr_Format(PyExc_TypeError,
208 "\"%s\" must be an 1-character string",
Guido van Rossum46264582007-08-06 19:32:18 +0000209 name);
Andrew McNamara1196cf12005-01-07 04:42:45 +0000210 return -1;
Guido van Rossumbce56a62007-05-10 18:04:33 +0000211 }
212 if (len > 0)
213 *target = buf[0];
Andrew McNamara1196cf12005-01-07 04:42:45 +0000214 }
215 }
216 return 0;
217}
218
219static int
220_set_str(const char *name, PyObject **target, PyObject *src, const char *dflt)
221{
222 if (src == NULL)
Guido van Rossum46264582007-08-06 19:32:18 +0000223 *target = PyUnicode_DecodeASCII(dflt, strlen(dflt), NULL);
Andrew McNamara1196cf12005-01-07 04:42:45 +0000224 else {
225 if (src == Py_None)
226 *target = NULL;
Andrew McNamara37d2bdf2005-01-10 12:22:48 +0000227 else if (!IS_BASESTRING(src)) {
Andrew McNamara1196cf12005-01-07 04:42:45 +0000228 PyErr_Format(PyExc_TypeError,
Amaury Forgeot d'Arc10c476d2007-11-19 21:20:21 +0000229 "\"%s\" must be a string", name);
Andrew McNamara1196cf12005-01-07 04:42:45 +0000230 return -1;
Andrew McNamaradd3e6cb2005-01-07 06:46:50 +0000231 }
232 else {
Andrew McNamara1196cf12005-01-07 04:42:45 +0000233 Py_XDECREF(*target);
234 Py_INCREF(src);
235 *target = src;
236 }
237 }
238 return 0;
239}
240
241static int
242dialect_check_quoting(int quoting)
243{
244 StyleDesc *qs = quote_styles;
245
246 for (qs = quote_styles; qs->name; qs++) {
247 if (qs->style == quoting)
248 return 0;
249 }
250 PyErr_Format(PyExc_TypeError, "bad \"quoting\" value");
251 return -1;
252}
Skip Montanarob4a04172003-03-20 23:29:12 +0000253
254#define D_OFF(x) offsetof(DialectObj, x)
255
256static struct PyMemberDef Dialect_memberlist[] = {
Andrew McNamara1196cf12005-01-07 04:42:45 +0000257 { "skipinitialspace", T_INT, D_OFF(skipinitialspace), READONLY },
258 { "doublequote", T_INT, D_OFF(doublequote), READONLY },
259 { "strict", T_INT, D_OFF(strict), READONLY },
Skip Montanarob4a04172003-03-20 23:29:12 +0000260 { NULL }
261};
262
263static PyGetSetDef Dialect_getsetlist[] = {
Guido van Rossuma9769c22007-08-07 23:59:30 +0000264 { "delimiter", (getter)Dialect_get_delimiter},
Andrew McNamara1196cf12005-01-07 04:42:45 +0000265 { "escapechar", (getter)Dialect_get_escapechar},
266 { "lineterminator", (getter)Dialect_get_lineterminator},
267 { "quotechar", (getter)Dialect_get_quotechar},
268 { "quoting", (getter)Dialect_get_quoting},
269 {NULL},
Skip Montanarob4a04172003-03-20 23:29:12 +0000270};
271
272static void
273Dialect_dealloc(DialectObj *self)
274{
275 Py_XDECREF(self->lineterminator);
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000276 Py_Type(self)->tp_free((PyObject *)self);
Skip Montanarob4a04172003-03-20 23:29:12 +0000277}
278
Martin v. Löwis02cbf4a2006-02-27 17:20:04 +0000279static char *dialect_kws[] = {
Andrew McNamara1196cf12005-01-07 04:42:45 +0000280 "dialect",
281 "delimiter",
282 "doublequote",
283 "escapechar",
284 "lineterminator",
285 "quotechar",
286 "quoting",
287 "skipinitialspace",
288 "strict",
289 NULL
290};
291
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000292static PyObject *
293dialect_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
Skip Montanarob4a04172003-03-20 23:29:12 +0000294{
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000295 DialectObj *self;
296 PyObject *ret = NULL;
297 PyObject *dialect = NULL;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000298 PyObject *delimiter = NULL;
299 PyObject *doublequote = NULL;
300 PyObject *escapechar = NULL;
301 PyObject *lineterminator = NULL;
302 PyObject *quotechar = NULL;
303 PyObject *quoting = NULL;
304 PyObject *skipinitialspace = NULL;
305 PyObject *strict = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000306
Andrew McNamara1196cf12005-01-07 04:42:45 +0000307 if (!PyArg_ParseTupleAndKeywords(args, kwargs,
308 "|OOOOOOOOO", dialect_kws,
309 &dialect,
310 &delimiter,
311 &doublequote,
312 &escapechar,
313 &lineterminator,
314 &quotechar,
315 &quoting,
316 &skipinitialspace,
317 &strict))
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000318 return NULL;
319
320 if (dialect != NULL) {
321 if (IS_BASESTRING(dialect)) {
322 dialect = get_dialect_from_registry(dialect);
323 if (dialect == NULL)
324 return NULL;
325 }
326 else
327 Py_INCREF(dialect);
328 /* Can we reuse this instance? */
329 if (PyObject_TypeCheck(dialect, &Dialect_Type) &&
330 delimiter == 0 &&
331 doublequote == 0 &&
332 escapechar == 0 &&
333 lineterminator == 0 &&
334 quotechar == 0 &&
335 quoting == 0 &&
336 skipinitialspace == 0 &&
337 strict == 0)
338 return dialect;
339 }
340
341 self = (DialectObj *)type->tp_alloc(type, 0);
342 if (self == NULL) {
343 Py_XDECREF(dialect);
344 return NULL;
345 }
346 self->lineterminator = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000347
Andrew McNamara1196cf12005-01-07 04:42:45 +0000348 Py_XINCREF(delimiter);
349 Py_XINCREF(doublequote);
350 Py_XINCREF(escapechar);
351 Py_XINCREF(lineterminator);
352 Py_XINCREF(quotechar);
353 Py_XINCREF(quoting);
354 Py_XINCREF(skipinitialspace);
355 Py_XINCREF(strict);
356 if (dialect != NULL) {
Andrew McNamara1196cf12005-01-07 04:42:45 +0000357#define DIALECT_GETATTR(v, n) \
358 if (v == NULL) \
359 v = PyObject_GetAttrString(dialect, n)
Andrew McNamara1196cf12005-01-07 04:42:45 +0000360 DIALECT_GETATTR(delimiter, "delimiter");
361 DIALECT_GETATTR(doublequote, "doublequote");
362 DIALECT_GETATTR(escapechar, "escapechar");
363 DIALECT_GETATTR(lineterminator, "lineterminator");
364 DIALECT_GETATTR(quotechar, "quotechar");
365 DIALECT_GETATTR(quoting, "quoting");
366 DIALECT_GETATTR(skipinitialspace, "skipinitialspace");
367 DIALECT_GETATTR(strict, "strict");
368 PyErr_Clear();
Andrew McNamara1196cf12005-01-07 04:42:45 +0000369 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000370
Andrew McNamara1196cf12005-01-07 04:42:45 +0000371 /* check types and convert to C values */
372#define DIASET(meth, name, target, src, dflt) \
373 if (meth(name, target, src, dflt)) \
374 goto err
375 DIASET(_set_char, "delimiter", &self->delimiter, delimiter, ',');
376 DIASET(_set_bool, "doublequote", &self->doublequote, doublequote, 1);
377 DIASET(_set_char, "escapechar", &self->escapechar, escapechar, 0);
378 DIASET(_set_str, "lineterminator", &self->lineterminator, lineterminator, "\r\n");
379 DIASET(_set_char, "quotechar", &self->quotechar, quotechar, '"');
380 DIASET(_set_int, "quoting", &self->quoting, quoting, QUOTE_MINIMAL);
381 DIASET(_set_bool, "skipinitialspace", &self->skipinitialspace, skipinitialspace, 0);
382 DIASET(_set_bool, "strict", &self->strict, strict, 0);
Skip Montanarob4a04172003-03-20 23:29:12 +0000383
Andrew McNamara1196cf12005-01-07 04:42:45 +0000384 /* validate options */
385 if (dialect_check_quoting(self->quoting))
386 goto err;
387 if (self->delimiter == 0) {
388 PyErr_SetString(PyExc_TypeError, "delimiter must be set");
389 goto err;
390 }
Andrew McNamara5d45a8d2005-01-12 08:16:17 +0000391 if (quotechar == Py_None && quoting == NULL)
Andrew McNamara1196cf12005-01-07 04:42:45 +0000392 self->quoting = QUOTE_NONE;
393 if (self->quoting != QUOTE_NONE && self->quotechar == 0) {
394 PyErr_SetString(PyExc_TypeError,
395 "quotechar must be set if quoting enabled");
396 goto err;
397 }
398 if (self->lineterminator == 0) {
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000399 PyErr_SetString(PyExc_TypeError, "lineterminator must be set");
Andrew McNamara1196cf12005-01-07 04:42:45 +0000400 goto err;
401 }
402
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000403 ret = (PyObject *)self;
Skip Montanarod60fbd42005-06-15 01:33:30 +0000404 Py_INCREF(self);
Andrew McNamara1196cf12005-01-07 04:42:45 +0000405err:
Skip Montanarod60fbd42005-06-15 01:33:30 +0000406 Py_XDECREF(self);
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000407 Py_XDECREF(dialect);
Andrew McNamara1196cf12005-01-07 04:42:45 +0000408 Py_XDECREF(delimiter);
409 Py_XDECREF(doublequote);
410 Py_XDECREF(escapechar);
411 Py_XDECREF(lineterminator);
412 Py_XDECREF(quotechar);
413 Py_XDECREF(quoting);
414 Py_XDECREF(skipinitialspace);
415 Py_XDECREF(strict);
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000416 return ret;
Skip Montanarob4a04172003-03-20 23:29:12 +0000417}
418
419
420PyDoc_STRVAR(Dialect_Type_doc,
421"CSV dialect\n"
422"\n"
423"The Dialect type records CSV parsing and generation options.\n");
424
425static PyTypeObject Dialect_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000426 PyVarObject_HEAD_INIT(NULL, 0)
Skip Montanarob4a04172003-03-20 23:29:12 +0000427 "_csv.Dialect", /* tp_name */
428 sizeof(DialectObj), /* tp_basicsize */
429 0, /* tp_itemsize */
430 /* methods */
431 (destructor)Dialect_dealloc, /* tp_dealloc */
432 (printfunc)0, /* tp_print */
433 (getattrfunc)0, /* tp_getattr */
434 (setattrfunc)0, /* tp_setattr */
435 (cmpfunc)0, /* tp_compare */
436 (reprfunc)0, /* tp_repr */
437 0, /* tp_as_number */
438 0, /* tp_as_sequence */
439 0, /* tp_as_mapping */
440 (hashfunc)0, /* tp_hash */
441 (ternaryfunc)0, /* tp_call */
442 (reprfunc)0, /* tp_str */
443 0, /* tp_getattro */
444 0, /* tp_setattro */
445 0, /* tp_as_buffer */
446 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
447 Dialect_Type_doc, /* tp_doc */
448 0, /* tp_traverse */
449 0, /* tp_clear */
450 0, /* tp_richcompare */
451 0, /* tp_weaklistoffset */
452 0, /* tp_iter */
453 0, /* tp_iternext */
Andrew McNamara1196cf12005-01-07 04:42:45 +0000454 0, /* tp_methods */
Skip Montanarob4a04172003-03-20 23:29:12 +0000455 Dialect_memberlist, /* tp_members */
456 Dialect_getsetlist, /* tp_getset */
457 0, /* tp_base */
458 0, /* tp_dict */
459 0, /* tp_descr_get */
460 0, /* tp_descr_set */
461 0, /* tp_dictoffset */
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000462 0, /* tp_init */
463 0, /* tp_alloc */
Skip Montanarob4a04172003-03-20 23:29:12 +0000464 dialect_new, /* tp_new */
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000465 0, /* tp_free */
Skip Montanarob4a04172003-03-20 23:29:12 +0000466};
467
Andrew McNamara91b97462005-01-11 01:07:23 +0000468/*
469 * Return an instance of the dialect type, given a Python instance or kwarg
470 * description of the dialect
471 */
472static PyObject *
473_call_dialect(PyObject *dialect_inst, PyObject *kwargs)
474{
475 PyObject *ctor_args;
476 PyObject *dialect;
477
478 ctor_args = Py_BuildValue(dialect_inst ? "(O)" : "()", dialect_inst);
479 if (ctor_args == NULL)
480 return NULL;
481 dialect = PyObject_Call((PyObject *)&Dialect_Type, ctor_args, kwargs);
482 Py_DECREF(ctor_args);
483 return dialect;
484}
485
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000486/*
487 * READER
488 */
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000489static int
Skip Montanarob4a04172003-03-20 23:29:12 +0000490parse_save_field(ReaderObj *self)
491{
492 PyObject *field;
493
Guido van Rossum46264582007-08-06 19:32:18 +0000494 field = PyUnicode_FromUnicode(self->field, self->field_len);
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000495 if (field == NULL)
496 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000497 self->field_len = 0;
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000498 if (self->numeric_field) {
499 PyObject *tmp;
500
501 self->numeric_field = 0;
502 tmp = PyNumber_Float(field);
503 if (tmp == NULL) {
504 Py_DECREF(field);
505 return -1;
506 }
507 Py_DECREF(field);
508 field = tmp;
509 }
510 PyList_Append(self->fields, field);
511 Py_DECREF(field);
512 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000513}
514
515static int
516parse_grow_buff(ReaderObj *self)
517{
518 if (self->field_size == 0) {
519 self->field_size = 4096;
Andrew McNamaradcfb38c2003-06-09 05:59:23 +0000520 if (self->field != NULL)
521 PyMem_Free(self->field);
Guido van Rossum46264582007-08-06 19:32:18 +0000522 self->field = PyMem_New(Py_UNICODE, self->field_size);
Skip Montanarob4a04172003-03-20 23:29:12 +0000523 }
524 else {
525 self->field_size *= 2;
Guido van Rossum46264582007-08-06 19:32:18 +0000526 self->field = PyMem_Resize(self->field, Py_UNICODE,
527 self->field_size);
Skip Montanarob4a04172003-03-20 23:29:12 +0000528 }
529 if (self->field == NULL) {
530 PyErr_NoMemory();
531 return 0;
532 }
533 return 1;
534}
535
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000536static int
Guido van Rossum46264582007-08-06 19:32:18 +0000537parse_add_char(ReaderObj *self, Py_UNICODE c)
Skip Montanarob4a04172003-03-20 23:29:12 +0000538{
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000539 if (self->field_len >= field_limit) {
540 PyErr_Format(error_obj, "field larger than field limit (%ld)",
541 field_limit);
542 return -1;
543 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000544 if (self->field_len == self->field_size && !parse_grow_buff(self))
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000545 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000546 self->field[self->field_len++] = c;
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000547 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000548}
549
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000550static int
Guido van Rossum46264582007-08-06 19:32:18 +0000551parse_process_char(ReaderObj *self, Py_UNICODE c)
Skip Montanarob4a04172003-03-20 23:29:12 +0000552{
553 DialectObj *dialect = self->dialect;
554
555 switch (self->state) {
556 case START_RECORD:
557 /* start of record */
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000558 if (c == '\0')
Skip Montanarob4a04172003-03-20 23:29:12 +0000559 /* empty line - return [] */
560 break;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000561 else if (c == '\n' || c == '\r') {
562 self->state = EAT_CRNL;
563 break;
564 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000565 /* normal character - handle as START_FIELD */
566 self->state = START_FIELD;
567 /* fallthru */
568 case START_FIELD:
569 /* expecting field */
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000570 if (c == '\n' || c == '\r' || c == '\0') {
Skip Montanarob4a04172003-03-20 23:29:12 +0000571 /* save empty field - return [fields] */
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000572 if (parse_save_field(self) < 0)
573 return -1;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000574 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
Skip Montanarob4a04172003-03-20 23:29:12 +0000575 }
Andrew McNamara1196cf12005-01-07 04:42:45 +0000576 else if (c == dialect->quotechar &&
577 dialect->quoting != QUOTE_NONE) {
Skip Montanarob4a04172003-03-20 23:29:12 +0000578 /* start quoted field */
579 self->state = IN_QUOTED_FIELD;
580 }
581 else if (c == dialect->escapechar) {
582 /* possible escaped character */
583 self->state = ESCAPED_CHAR;
584 }
585 else if (c == ' ' && dialect->skipinitialspace)
586 /* ignore space at start of field */
587 ;
588 else if (c == dialect->delimiter) {
589 /* save empty field */
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000590 if (parse_save_field(self) < 0)
591 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000592 }
593 else {
594 /* begin new unquoted field */
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000595 if (dialect->quoting == QUOTE_NONNUMERIC)
596 self->numeric_field = 1;
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000597 if (parse_add_char(self, c) < 0)
598 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000599 self->state = IN_FIELD;
600 }
601 break;
602
603 case ESCAPED_CHAR:
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000604 if (c == '\0')
605 c = '\n';
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000606 if (parse_add_char(self, c) < 0)
607 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000608 self->state = IN_FIELD;
609 break;
610
611 case IN_FIELD:
612 /* in unquoted field */
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000613 if (c == '\n' || c == '\r' || c == '\0') {
Skip Montanarob4a04172003-03-20 23:29:12 +0000614 /* end of line - return [fields] */
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000615 if (parse_save_field(self) < 0)
616 return -1;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000617 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
Skip Montanarob4a04172003-03-20 23:29:12 +0000618 }
619 else if (c == dialect->escapechar) {
620 /* possible escaped character */
621 self->state = ESCAPED_CHAR;
622 }
623 else if (c == dialect->delimiter) {
624 /* save field - wait for new field */
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000625 if (parse_save_field(self) < 0)
626 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000627 self->state = START_FIELD;
628 }
629 else {
630 /* normal character - save in field */
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000631 if (parse_add_char(self, c) < 0)
632 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000633 }
634 break;
635
636 case IN_QUOTED_FIELD:
637 /* in quoted field */
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000638 if (c == '\0')
639 ;
Skip Montanarob4a04172003-03-20 23:29:12 +0000640 else if (c == dialect->escapechar) {
641 /* Possible escape character */
642 self->state = ESCAPE_IN_QUOTED_FIELD;
643 }
Andrew McNamara1196cf12005-01-07 04:42:45 +0000644 else if (c == dialect->quotechar &&
645 dialect->quoting != QUOTE_NONE) {
Skip Montanarob4a04172003-03-20 23:29:12 +0000646 if (dialect->doublequote) {
647 /* doublequote; " represented by "" */
648 self->state = QUOTE_IN_QUOTED_FIELD;
649 }
650 else {
651 /* end of quote part of field */
652 self->state = IN_FIELD;
653 }
654 }
655 else {
656 /* normal character - save in field */
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000657 if (parse_add_char(self, c) < 0)
658 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000659 }
660 break;
661
662 case ESCAPE_IN_QUOTED_FIELD:
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000663 if (c == '\0')
664 c = '\n';
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000665 if (parse_add_char(self, c) < 0)
666 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000667 self->state = IN_QUOTED_FIELD;
668 break;
669
670 case QUOTE_IN_QUOTED_FIELD:
671 /* doublequote - seen a quote in an quoted field */
672 if (dialect->quoting != QUOTE_NONE &&
673 c == dialect->quotechar) {
674 /* save "" as " */
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000675 if (parse_add_char(self, c) < 0)
676 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000677 self->state = IN_QUOTED_FIELD;
678 }
679 else if (c == dialect->delimiter) {
680 /* save field - wait for new field */
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000681 if (parse_save_field(self) < 0)
682 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000683 self->state = START_FIELD;
684 }
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000685 else if (c == '\n' || c == '\r' || c == '\0') {
Skip Montanarob4a04172003-03-20 23:29:12 +0000686 /* end of line - return [fields] */
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000687 if (parse_save_field(self) < 0)
688 return -1;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000689 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
Skip Montanarob4a04172003-03-20 23:29:12 +0000690 }
691 else if (!dialect->strict) {
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000692 if (parse_add_char(self, c) < 0)
693 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000694 self->state = IN_FIELD;
695 }
696 else {
697 /* illegal */
Andrew McNamara5cfd8372005-01-12 11:39:50 +0000698 PyErr_Format(error_obj, "'%c' expected after '%c'",
Skip Montanarob4a04172003-03-20 23:29:12 +0000699 dialect->delimiter,
700 dialect->quotechar);
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000701 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000702 }
703 break;
704
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000705 case EAT_CRNL:
706 if (c == '\n' || c == '\r')
707 ;
708 else if (c == '\0')
709 self->state = START_RECORD;
710 else {
711 PyErr_Format(error_obj, "new-line character seen in unquoted field - do you need to open the file in universal-newline mode?");
712 return -1;
713 }
714 break;
715
Skip Montanarob4a04172003-03-20 23:29:12 +0000716 }
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000717 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000718}
719
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000720static int
721parse_reset(ReaderObj *self)
722{
723 Py_XDECREF(self->fields);
724 self->fields = PyList_New(0);
725 if (self->fields == NULL)
726 return -1;
727 self->field_len = 0;
728 self->state = START_RECORD;
729 self->numeric_field = 0;
730 return 0;
731}
Skip Montanarob4a04172003-03-20 23:29:12 +0000732
733static PyObject *
Skip Montanarob4a04172003-03-20 23:29:12 +0000734Reader_iternext(ReaderObj *self)
735{
736 PyObject *lineobj;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000737 PyObject *fields = NULL;
Guido van Rossum46264582007-08-06 19:32:18 +0000738 Py_UNICODE *line, c;
739 Py_ssize_t linelen;
Skip Montanarob4a04172003-03-20 23:29:12 +0000740
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000741 if (parse_reset(self) < 0)
742 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000743 do {
744 lineobj = PyIter_Next(self->input_iter);
745 if (lineobj == NULL) {
746 /* End of input OR exception */
747 if (!PyErr_Occurred() && self->field_len != 0)
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000748 PyErr_Format(error_obj,
749 "newline inside string");
Skip Montanarob4a04172003-03-20 23:29:12 +0000750 return NULL;
751 }
Georg Brandlf5192612007-11-25 00:52:21 +0000752 if (!PyUnicode_Check(lineobj)) {
Georg Brandl1c280ab2007-11-27 20:40:22 +0000753 PyErr_Format(error_obj,
754 "iterator should return strings, "
755 "not %.200s "
756 "(did you open the file in text mode?)",
Amaury Forgeot d'Arc10c476d2007-11-19 21:20:21 +0000757 lineobj->ob_type->tp_name
758 );
Georg Brandlf5192612007-11-25 00:52:21 +0000759 Py_DECREF(lineobj);
Amaury Forgeot d'Arc10c476d2007-11-19 21:20:21 +0000760 return NULL;
761 }
Guido van Rossum46264582007-08-06 19:32:18 +0000762 ++self->line_num;
763 line = PyUnicode_AsUnicode(lineobj);
764 linelen = PyUnicode_GetSize(lineobj);
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000765 if (line == NULL || linelen < 0) {
Skip Montanarob4a04172003-03-20 23:29:12 +0000766 Py_DECREF(lineobj);
767 return NULL;
768 }
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000769 while (linelen--) {
770 c = *line++;
771 if (c == '\0') {
772 Py_DECREF(lineobj);
773 PyErr_Format(error_obj,
774 "line contains NULL byte");
775 goto err;
776 }
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000777 if (parse_process_char(self, c) < 0) {
778 Py_DECREF(lineobj);
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000779 goto err;
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000780 }
781 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000782 Py_DECREF(lineobj);
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000783 if (parse_process_char(self, 0) < 0)
784 goto err;
Skip Montanarob4a04172003-03-20 23:29:12 +0000785 } while (self->state != START_RECORD);
786
787 fields = self->fields;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000788 self->fields = NULL;
789err:
Skip Montanarob4a04172003-03-20 23:29:12 +0000790 return fields;
791}
792
793static void
794Reader_dealloc(ReaderObj *self)
795{
Andrew McNamara77ead872005-01-10 02:09:41 +0000796 PyObject_GC_UnTrack(self);
Skip Montanarob4a04172003-03-20 23:29:12 +0000797 Py_XDECREF(self->dialect);
798 Py_XDECREF(self->input_iter);
799 Py_XDECREF(self->fields);
Andrew McNamaradcfb38c2003-06-09 05:59:23 +0000800 if (self->field != NULL)
801 PyMem_Free(self->field);
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000802 PyObject_GC_Del(self);
803}
804
805static int
806Reader_traverse(ReaderObj *self, visitproc visit, void *arg)
807{
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000808 Py_VISIT(self->dialect);
809 Py_VISIT(self->input_iter);
810 Py_VISIT(self->fields);
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000811 return 0;
812}
813
814static int
815Reader_clear(ReaderObj *self)
816{
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000817 Py_CLEAR(self->dialect);
818 Py_CLEAR(self->input_iter);
819 Py_CLEAR(self->fields);
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000820 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000821}
822
823PyDoc_STRVAR(Reader_Type_doc,
824"CSV reader\n"
825"\n"
826"Reader objects are responsible for reading and parsing tabular data\n"
827"in CSV format.\n"
828);
829
830static struct PyMethodDef Reader_methods[] = {
831 { NULL, NULL }
832};
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000833#define R_OFF(x) offsetof(ReaderObj, x)
834
835static struct PyMemberDef Reader_memberlist[] = {
Guido van Rossum33d26892007-08-05 15:29:28 +0000836 { "dialect", T_OBJECT, R_OFF(dialect), READONLY },
837 { "line_num", T_ULONG, R_OFF(line_num), READONLY },
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000838 { NULL }
839};
840
Skip Montanarob4a04172003-03-20 23:29:12 +0000841
842static PyTypeObject Reader_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000843 PyVarObject_HEAD_INIT(NULL, 0)
Skip Montanarob4a04172003-03-20 23:29:12 +0000844 "_csv.reader", /*tp_name*/
845 sizeof(ReaderObj), /*tp_basicsize*/
846 0, /*tp_itemsize*/
847 /* methods */
848 (destructor)Reader_dealloc, /*tp_dealloc*/
849 (printfunc)0, /*tp_print*/
850 (getattrfunc)0, /*tp_getattr*/
851 (setattrfunc)0, /*tp_setattr*/
852 (cmpfunc)0, /*tp_compare*/
853 (reprfunc)0, /*tp_repr*/
854 0, /*tp_as_number*/
855 0, /*tp_as_sequence*/
856 0, /*tp_as_mapping*/
857 (hashfunc)0, /*tp_hash*/
858 (ternaryfunc)0, /*tp_call*/
859 (reprfunc)0, /*tp_str*/
860 0, /*tp_getattro*/
861 0, /*tp_setattro*/
862 0, /*tp_as_buffer*/
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000863 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
864 Py_TPFLAGS_HAVE_GC, /*tp_flags*/
Skip Montanarob4a04172003-03-20 23:29:12 +0000865 Reader_Type_doc, /*tp_doc*/
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000866 (traverseproc)Reader_traverse, /*tp_traverse*/
867 (inquiry)Reader_clear, /*tp_clear*/
Skip Montanarob4a04172003-03-20 23:29:12 +0000868 0, /*tp_richcompare*/
869 0, /*tp_weaklistoffset*/
Andrew McNamara575a00b2005-01-06 02:25:41 +0000870 PyObject_SelfIter, /*tp_iter*/
Skip Montanarob4a04172003-03-20 23:29:12 +0000871 (getiterfunc)Reader_iternext, /*tp_iternext*/
872 Reader_methods, /*tp_methods*/
873 Reader_memberlist, /*tp_members*/
874 0, /*tp_getset*/
875
876};
877
878static PyObject *
879csv_reader(PyObject *module, PyObject *args, PyObject *keyword_args)
880{
Andrew McNamara91b97462005-01-11 01:07:23 +0000881 PyObject * iterator, * dialect = NULL;
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000882 ReaderObj * self = PyObject_GC_New(ReaderObj, &Reader_Type);
Skip Montanarob4a04172003-03-20 23:29:12 +0000883
884 if (!self)
885 return NULL;
886
887 self->dialect = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000888 self->fields = NULL;
889 self->input_iter = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000890 self->field = NULL;
891 self->field_size = 0;
Andrew McNamara7f2053e2005-01-12 11:17:16 +0000892 self->line_num = 0;
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000893
894 if (parse_reset(self) < 0) {
895 Py_DECREF(self);
896 return NULL;
897 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000898
Raymond Hettinger1761a7c2004-06-20 04:23:19 +0000899 if (!PyArg_UnpackTuple(args, "", 1, 2, &iterator, &dialect)) {
Skip Montanarob4a04172003-03-20 23:29:12 +0000900 Py_DECREF(self);
901 return NULL;
902 }
903 self->input_iter = PyObject_GetIter(iterator);
904 if (self->input_iter == NULL) {
905 PyErr_SetString(PyExc_TypeError,
906 "argument 1 must be an iterator");
907 Py_DECREF(self);
908 return NULL;
909 }
Andrew McNamara91b97462005-01-11 01:07:23 +0000910 self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args);
Skip Montanarob4a04172003-03-20 23:29:12 +0000911 if (self->dialect == NULL) {
912 Py_DECREF(self);
913 return NULL;
914 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000915
Andrew McNamara77ead872005-01-10 02:09:41 +0000916 PyObject_GC_Track(self);
Skip Montanarob4a04172003-03-20 23:29:12 +0000917 return (PyObject *)self;
918}
919
920/*
921 * WRITER
922 */
923/* ---------------------------------------------------------------- */
924static void
925join_reset(WriterObj *self)
926{
927 self->rec_len = 0;
928 self->num_fields = 0;
929}
930
931#define MEM_INCR 32768
932
933/* Calculate new record length or append field to record. Return new
934 * record length.
935 */
936static int
Guido van Rossum46264582007-08-06 19:32:18 +0000937join_append_data(WriterObj *self, Py_UNICODE *field, int quote_empty,
938 int *quoted, int copy_phase)
Skip Montanarob4a04172003-03-20 23:29:12 +0000939{
940 DialectObj *dialect = self->dialect;
Guido van Rossum46264582007-08-06 19:32:18 +0000941 int i;
942 int rec_len;
943 Py_UNICODE *lineterm;
Andrew McNamarac89f2842005-01-12 07:44:42 +0000944
945#define ADDCH(c) \
946 do {\
947 if (copy_phase) \
948 self->rec[rec_len] = c;\
949 rec_len++;\
950 } while(0)
951
Guido van Rossum46264582007-08-06 19:32:18 +0000952 lineterm = PyUnicode_AsUnicode(dialect->lineterminator);
Andrew McNamarac89f2842005-01-12 07:44:42 +0000953 if (lineterm == NULL)
954 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000955
956 rec_len = self->rec_len;
957
Andrew McNamarac89f2842005-01-12 07:44:42 +0000958 /* If this is not the first field we need a field separator */
959 if (self->num_fields > 0)
960 ADDCH(dialect->delimiter);
961
962 /* Handle preceding quote */
963 if (copy_phase && *quoted)
964 ADDCH(dialect->quotechar);
965
966 /* Copy/count field data */
Guido van Rossum46264582007-08-06 19:32:18 +0000967 /* If field is null just pass over */
968 for (i = 0; field; i++) {
969 Py_UNICODE c = field[i];
Andrew McNamarac89f2842005-01-12 07:44:42 +0000970 int want_escape = 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000971
972 if (c == '\0')
973 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000974
Andrew McNamarac89f2842005-01-12 07:44:42 +0000975 if (c == dialect->delimiter ||
976 c == dialect->escapechar ||
Guido van Rossum46264582007-08-06 19:32:18 +0000977 c == dialect->quotechar ||
978 Py_UNICODE_strchr(lineterm, c)) {
Andrew McNamarac89f2842005-01-12 07:44:42 +0000979 if (dialect->quoting == QUOTE_NONE)
980 want_escape = 1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000981 else {
Andrew McNamarac89f2842005-01-12 07:44:42 +0000982 if (c == dialect->quotechar) {
983 if (dialect->doublequote)
984 ADDCH(dialect->quotechar);
985 else
986 want_escape = 1;
987 }
988 if (!want_escape)
989 *quoted = 1;
990 }
991 if (want_escape) {
992 if (!dialect->escapechar) {
993 PyErr_Format(error_obj,
994 "need to escape, but no escapechar set");
995 return -1;
996 }
997 ADDCH(dialect->escapechar);
Skip Montanarob4a04172003-03-20 23:29:12 +0000998 }
999 }
1000 /* Copy field character into record buffer.
1001 */
Andrew McNamarac89f2842005-01-12 07:44:42 +00001002 ADDCH(c);
Skip Montanarob4a04172003-03-20 23:29:12 +00001003 }
1004
1005 /* If field is empty check if it needs to be quoted.
1006 */
1007 if (i == 0 && quote_empty) {
1008 if (dialect->quoting == QUOTE_NONE) {
1009 PyErr_Format(error_obj,
Guido van Rossum46264582007-08-06 19:32:18 +00001010 "single empty field record must be quoted");
Skip Montanarob4a04172003-03-20 23:29:12 +00001011 return -1;
Andrew McNamaradd3e6cb2005-01-07 06:46:50 +00001012 }
1013 else
Skip Montanarob4a04172003-03-20 23:29:12 +00001014 *quoted = 1;
1015 }
1016
Skip Montanarob4a04172003-03-20 23:29:12 +00001017 if (*quoted) {
1018 if (copy_phase)
Andrew McNamarac89f2842005-01-12 07:44:42 +00001019 ADDCH(dialect->quotechar);
Skip Montanarob4a04172003-03-20 23:29:12 +00001020 else
Andrew McNamarac89f2842005-01-12 07:44:42 +00001021 rec_len += 2;
Skip Montanarob4a04172003-03-20 23:29:12 +00001022 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001023 return rec_len;
Andrew McNamarac89f2842005-01-12 07:44:42 +00001024#undef ADDCH
Skip Montanarob4a04172003-03-20 23:29:12 +00001025}
1026
1027static int
1028join_check_rec_size(WriterObj *self, int rec_len)
1029{
1030 if (rec_len > self->rec_size) {
1031 if (self->rec_size == 0) {
1032 self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR;
Andrew McNamaradcfb38c2003-06-09 05:59:23 +00001033 if (self->rec != NULL)
1034 PyMem_Free(self->rec);
Guido van Rossum46264582007-08-06 19:32:18 +00001035 self->rec = PyMem_New(Py_UNICODE, self->rec_size);
Skip Montanarob4a04172003-03-20 23:29:12 +00001036 }
1037 else {
Guido van Rossum46264582007-08-06 19:32:18 +00001038 Py_UNICODE* old_rec = self->rec;
Skip Montanarob4a04172003-03-20 23:29:12 +00001039
1040 self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR;
Guido van Rossum46264582007-08-06 19:32:18 +00001041 self->rec = PyMem_Resize(self->rec, Py_UNICODE,
1042 self->rec_size);
Skip Montanarob4a04172003-03-20 23:29:12 +00001043 if (self->rec == NULL)
1044 PyMem_Free(old_rec);
1045 }
1046 if (self->rec == NULL) {
1047 PyErr_NoMemory();
1048 return 0;
1049 }
1050 }
1051 return 1;
1052}
1053
1054static int
Guido van Rossum46264582007-08-06 19:32:18 +00001055join_append(WriterObj *self, Py_UNICODE *field, int *quoted, int quote_empty)
Skip Montanarob4a04172003-03-20 23:29:12 +00001056{
1057 int rec_len;
1058
1059 rec_len = join_append_data(self, field, quote_empty, quoted, 0);
1060 if (rec_len < 0)
1061 return 0;
1062
1063 /* grow record buffer if necessary */
1064 if (!join_check_rec_size(self, rec_len))
1065 return 0;
1066
1067 self->rec_len = join_append_data(self, field, quote_empty, quoted, 1);
1068 self->num_fields++;
1069
1070 return 1;
1071}
1072
1073static int
1074join_append_lineterminator(WriterObj *self)
1075{
1076 int terminator_len;
Guido van Rossum46264582007-08-06 19:32:18 +00001077 Py_UNICODE *terminator;
Skip Montanarob4a04172003-03-20 23:29:12 +00001078
Guido van Rossum46264582007-08-06 19:32:18 +00001079 terminator_len = PyUnicode_GetSize(self->dialect->lineterminator);
Thomas Wouters477c8d52006-05-27 19:21:47 +00001080 if (terminator_len == -1)
1081 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001082
1083 /* grow record buffer if necessary */
1084 if (!join_check_rec_size(self, self->rec_len + terminator_len))
1085 return 0;
1086
Guido van Rossum46264582007-08-06 19:32:18 +00001087 terminator = PyUnicode_AsUnicode(self->dialect->lineterminator);
Andrew McNamaracf0fd5a2005-01-12 01:16:35 +00001088 if (terminator == NULL)
1089 return 0;
Guido van Rossum46264582007-08-06 19:32:18 +00001090 memmove(self->rec + self->rec_len, terminator,
1091 sizeof(Py_UNICODE)*terminator_len);
Skip Montanarob4a04172003-03-20 23:29:12 +00001092 self->rec_len += terminator_len;
1093
1094 return 1;
1095}
1096
1097PyDoc_STRVAR(csv_writerow_doc,
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001098"writerow(sequence)\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001099"\n"
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001100"Construct and write a CSV record from a sequence of fields. Non-string\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001101"elements will be converted to string.");
1102
1103static PyObject *
1104csv_writerow(WriterObj *self, PyObject *seq)
1105{
1106 DialectObj *dialect = self->dialect;
1107 int len, i;
1108
1109 if (!PySequence_Check(seq))
1110 return PyErr_Format(error_obj, "sequence expected");
1111
1112 len = PySequence_Length(seq);
1113 if (len < 0)
1114 return NULL;
1115
1116 /* Join all fields in internal buffer.
1117 */
1118 join_reset(self);
1119 for (i = 0; i < len; i++) {
1120 PyObject *field;
1121 int append_ok;
1122 int quoted;
1123
1124 field = PySequence_GetItem(seq, i);
1125 if (field == NULL)
1126 return NULL;
1127
Andrew McNamarac89f2842005-01-12 07:44:42 +00001128 switch (dialect->quoting) {
1129 case QUOTE_NONNUMERIC:
1130 quoted = !PyNumber_Check(field);
1131 break;
1132 case QUOTE_ALL:
1133 quoted = 1;
1134 break;
1135 default:
1136 quoted = 0;
1137 break;
Skip Montanarob4a04172003-03-20 23:29:12 +00001138 }
1139
Guido van Rossum46264582007-08-06 19:32:18 +00001140 if (PyUnicode_Check(field)) {
Skip Montanaro577c7a72003-04-12 19:17:14 +00001141 append_ok = join_append(self,
Guido van Rossum46264582007-08-06 19:32:18 +00001142 PyUnicode_AS_UNICODE(field),
1143 &quoted, len == 1);
Skip Montanarob4a04172003-03-20 23:29:12 +00001144 Py_DECREF(field);
1145 }
1146 else if (field == Py_None) {
Guido van Rossum46264582007-08-06 19:32:18 +00001147 append_ok = join_append(self, NULL,
1148 &quoted, len == 1);
Skip Montanarob4a04172003-03-20 23:29:12 +00001149 Py_DECREF(field);
1150 }
1151 else {
1152 PyObject *str;
1153
Thomas Heller519a0422007-11-15 20:48:54 +00001154 str = PyObject_Str(field);
Guido van Rossum46264582007-08-06 19:32:18 +00001155 Py_DECREF(field);
Skip Montanarob4a04172003-03-20 23:29:12 +00001156 if (str == NULL)
1157 return NULL;
Guido van Rossum46264582007-08-06 19:32:18 +00001158 append_ok = join_append(self,
1159 PyUnicode_AS_UNICODE(str),
1160 &quoted, len == 1);
Skip Montanarob4a04172003-03-20 23:29:12 +00001161 Py_DECREF(str);
1162 }
1163 if (!append_ok)
1164 return NULL;
1165 }
1166
1167 /* Add line terminator.
1168 */
1169 if (!join_append_lineterminator(self))
1170 return 0;
1171
Guido van Rossum46264582007-08-06 19:32:18 +00001172 return PyObject_CallFunction(self->writeline,
1173 "(u#)", self->rec,
1174 self->rec_len);
Skip Montanarob4a04172003-03-20 23:29:12 +00001175}
1176
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001177PyDoc_STRVAR(csv_writerows_doc,
1178"writerows(sequence of sequences)\n"
1179"\n"
1180"Construct and write a series of sequences to a csv file. Non-string\n"
1181"elements will be converted to string.");
1182
Skip Montanarob4a04172003-03-20 23:29:12 +00001183static PyObject *
1184csv_writerows(WriterObj *self, PyObject *seqseq)
1185{
1186 PyObject *row_iter, *row_obj, *result;
1187
1188 row_iter = PyObject_GetIter(seqseq);
1189 if (row_iter == NULL) {
1190 PyErr_SetString(PyExc_TypeError,
Skip Montanaro98f16e02003-04-11 23:10:13 +00001191 "writerows() argument must be iterable");
Skip Montanarob4a04172003-03-20 23:29:12 +00001192 return NULL;
1193 }
1194 while ((row_obj = PyIter_Next(row_iter))) {
1195 result = csv_writerow(self, row_obj);
1196 Py_DECREF(row_obj);
1197 if (!result) {
1198 Py_DECREF(row_iter);
1199 return NULL;
1200 }
1201 else
1202 Py_DECREF(result);
1203 }
1204 Py_DECREF(row_iter);
1205 if (PyErr_Occurred())
1206 return NULL;
1207 Py_INCREF(Py_None);
1208 return Py_None;
1209}
1210
1211static struct PyMethodDef Writer_methods[] = {
1212 { "writerow", (PyCFunction)csv_writerow, METH_O, csv_writerow_doc},
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001213 { "writerows", (PyCFunction)csv_writerows, METH_O, csv_writerows_doc},
Skip Montanarob4a04172003-03-20 23:29:12 +00001214 { NULL, NULL }
1215};
1216
1217#define W_OFF(x) offsetof(WriterObj, x)
1218
1219static struct PyMemberDef Writer_memberlist[] = {
Guido van Rossum33d26892007-08-05 15:29:28 +00001220 { "dialect", T_OBJECT, W_OFF(dialect), READONLY },
Skip Montanarob4a04172003-03-20 23:29:12 +00001221 { NULL }
1222};
1223
1224static void
1225Writer_dealloc(WriterObj *self)
1226{
Andrew McNamara77ead872005-01-10 02:09:41 +00001227 PyObject_GC_UnTrack(self);
Skip Montanarob4a04172003-03-20 23:29:12 +00001228 Py_XDECREF(self->dialect);
1229 Py_XDECREF(self->writeline);
Andrew McNamaradcfb38c2003-06-09 05:59:23 +00001230 if (self->rec != NULL)
1231 PyMem_Free(self->rec);
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001232 PyObject_GC_Del(self);
1233}
1234
1235static int
1236Writer_traverse(WriterObj *self, visitproc visit, void *arg)
1237{
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001238 Py_VISIT(self->dialect);
1239 Py_VISIT(self->writeline);
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001240 return 0;
1241}
1242
1243static int
1244Writer_clear(WriterObj *self)
1245{
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001246 Py_CLEAR(self->dialect);
1247 Py_CLEAR(self->writeline);
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001248 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001249}
1250
1251PyDoc_STRVAR(Writer_Type_doc,
1252"CSV writer\n"
1253"\n"
1254"Writer objects are responsible for generating tabular data\n"
1255"in CSV format from sequence input.\n"
1256);
1257
1258static PyTypeObject Writer_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001259 PyVarObject_HEAD_INIT(NULL, 0)
Skip Montanarob4a04172003-03-20 23:29:12 +00001260 "_csv.writer", /*tp_name*/
1261 sizeof(WriterObj), /*tp_basicsize*/
1262 0, /*tp_itemsize*/
1263 /* methods */
1264 (destructor)Writer_dealloc, /*tp_dealloc*/
1265 (printfunc)0, /*tp_print*/
1266 (getattrfunc)0, /*tp_getattr*/
1267 (setattrfunc)0, /*tp_setattr*/
1268 (cmpfunc)0, /*tp_compare*/
1269 (reprfunc)0, /*tp_repr*/
1270 0, /*tp_as_number*/
1271 0, /*tp_as_sequence*/
1272 0, /*tp_as_mapping*/
1273 (hashfunc)0, /*tp_hash*/
1274 (ternaryfunc)0, /*tp_call*/
1275 (reprfunc)0, /*tp_str*/
1276 0, /*tp_getattro*/
1277 0, /*tp_setattro*/
1278 0, /*tp_as_buffer*/
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001279 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
1280 Py_TPFLAGS_HAVE_GC, /*tp_flags*/
Skip Montanarob4a04172003-03-20 23:29:12 +00001281 Writer_Type_doc,
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001282 (traverseproc)Writer_traverse, /*tp_traverse*/
1283 (inquiry)Writer_clear, /*tp_clear*/
Skip Montanarob4a04172003-03-20 23:29:12 +00001284 0, /*tp_richcompare*/
1285 0, /*tp_weaklistoffset*/
1286 (getiterfunc)0, /*tp_iter*/
1287 (getiterfunc)0, /*tp_iternext*/
1288 Writer_methods, /*tp_methods*/
1289 Writer_memberlist, /*tp_members*/
1290 0, /*tp_getset*/
1291};
1292
1293static PyObject *
1294csv_writer(PyObject *module, PyObject *args, PyObject *keyword_args)
1295{
Andrew McNamara91b97462005-01-11 01:07:23 +00001296 PyObject * output_file, * dialect = NULL;
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001297 WriterObj * self = PyObject_GC_New(WriterObj, &Writer_Type);
Skip Montanarob4a04172003-03-20 23:29:12 +00001298
1299 if (!self)
1300 return NULL;
1301
1302 self->dialect = NULL;
1303 self->writeline = NULL;
1304
1305 self->rec = NULL;
1306 self->rec_size = 0;
1307 self->rec_len = 0;
1308 self->num_fields = 0;
1309
Raymond Hettinger1761a7c2004-06-20 04:23:19 +00001310 if (!PyArg_UnpackTuple(args, "", 1, 2, &output_file, &dialect)) {
Skip Montanarob4a04172003-03-20 23:29:12 +00001311 Py_DECREF(self);
1312 return NULL;
1313 }
1314 self->writeline = PyObject_GetAttrString(output_file, "write");
1315 if (self->writeline == NULL || !PyCallable_Check(self->writeline)) {
1316 PyErr_SetString(PyExc_TypeError,
Andrew McNamara5cfd8372005-01-12 11:39:50 +00001317 "argument 1 must have a \"write\" method");
Skip Montanarob4a04172003-03-20 23:29:12 +00001318 Py_DECREF(self);
1319 return NULL;
1320 }
Andrew McNamara91b97462005-01-11 01:07:23 +00001321 self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args);
Skip Montanarob4a04172003-03-20 23:29:12 +00001322 if (self->dialect == NULL) {
1323 Py_DECREF(self);
1324 return NULL;
1325 }
Andrew McNamara77ead872005-01-10 02:09:41 +00001326 PyObject_GC_Track(self);
Skip Montanarob4a04172003-03-20 23:29:12 +00001327 return (PyObject *)self;
1328}
1329
1330/*
1331 * DIALECT REGISTRY
1332 */
1333static PyObject *
1334csv_list_dialects(PyObject *module, PyObject *args)
1335{
1336 return PyDict_Keys(dialects);
1337}
1338
1339static PyObject *
Andrew McNamara86625972005-01-11 01:28:33 +00001340csv_register_dialect(PyObject *module, PyObject *args, PyObject *kwargs)
Skip Montanarob4a04172003-03-20 23:29:12 +00001341{
Andrew McNamara86625972005-01-11 01:28:33 +00001342 PyObject *name_obj, *dialect_obj = NULL;
1343 PyObject *dialect;
Skip Montanarob4a04172003-03-20 23:29:12 +00001344
Andrew McNamara86625972005-01-11 01:28:33 +00001345 if (!PyArg_UnpackTuple(args, "", 1, 2, &name_obj, &dialect_obj))
Skip Montanarob4a04172003-03-20 23:29:12 +00001346 return NULL;
Andrew McNamara37d2bdf2005-01-10 12:22:48 +00001347 if (!IS_BASESTRING(name_obj)) {
Skip Montanarob4a04172003-03-20 23:29:12 +00001348 PyErr_SetString(PyExc_TypeError,
1349 "dialect name must be a string or unicode");
1350 return NULL;
1351 }
Andrew McNamara86625972005-01-11 01:28:33 +00001352 dialect = _call_dialect(dialect_obj, kwargs);
1353 if (dialect == NULL)
1354 return NULL;
1355 if (PyDict_SetItem(dialects, name_obj, dialect) < 0) {
1356 Py_DECREF(dialect);
Skip Montanarob4a04172003-03-20 23:29:12 +00001357 return NULL;
1358 }
Andrew McNamara86625972005-01-11 01:28:33 +00001359 Py_DECREF(dialect);
Skip Montanarob4a04172003-03-20 23:29:12 +00001360 Py_INCREF(Py_None);
1361 return Py_None;
1362}
1363
1364static PyObject *
Skip Montanaro577c7a72003-04-12 19:17:14 +00001365csv_unregister_dialect(PyObject *module, PyObject *name_obj)
Skip Montanarob4a04172003-03-20 23:29:12 +00001366{
Skip Montanarob4a04172003-03-20 23:29:12 +00001367 if (PyDict_DelItem(dialects, name_obj) < 0)
1368 return PyErr_Format(error_obj, "unknown dialect");
1369 Py_INCREF(Py_None);
1370 return Py_None;
1371}
1372
1373static PyObject *
Skip Montanaro577c7a72003-04-12 19:17:14 +00001374csv_get_dialect(PyObject *module, PyObject *name_obj)
Skip Montanarob4a04172003-03-20 23:29:12 +00001375{
Skip Montanarob4a04172003-03-20 23:29:12 +00001376 return get_dialect_from_registry(name_obj);
1377}
1378
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001379static PyObject *
Andrew McNamara31d88962005-01-12 03:45:10 +00001380csv_field_size_limit(PyObject *module, PyObject *args)
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001381{
1382 PyObject *new_limit = NULL;
1383 long old_limit = field_limit;
1384
Andrew McNamara31d88962005-01-12 03:45:10 +00001385 if (!PyArg_UnpackTuple(args, "field_size_limit", 0, 1, &new_limit))
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001386 return NULL;
1387 if (new_limit != NULL) {
Guido van Rossumddefaf32007-01-14 03:31:43 +00001388 if (!PyInt_CheckExact(new_limit)) {
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001389 PyErr_Format(PyExc_TypeError,
1390 "limit must be an integer");
1391 return NULL;
1392 }
Christian Heimes217cfd12007-12-02 14:31:20 +00001393 field_limit = PyLong_AsLong(new_limit);
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001394 }
Christian Heimes217cfd12007-12-02 14:31:20 +00001395 return PyLong_FromLong(old_limit);
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001396}
1397
Skip Montanarob4a04172003-03-20 23:29:12 +00001398/*
1399 * MODULE
1400 */
1401
1402PyDoc_STRVAR(csv_module_doc,
1403"CSV parsing and writing.\n"
1404"\n"
1405"This module provides classes that assist in the reading and writing\n"
1406"of Comma Separated Value (CSV) files, and implements the interface\n"
1407"described by PEP 305. Although many CSV files are simple to parse,\n"
1408"the format is not formally defined by a stable specification and\n"
1409"is subtle enough that parsing lines of a CSV file with something\n"
1410"like line.split(\",\") is bound to fail. The module supports three\n"
1411"basic APIs: reading, writing, and registration of dialects.\n"
1412"\n"
1413"\n"
1414"DIALECT REGISTRATION:\n"
1415"\n"
1416"Readers and writers support a dialect argument, which is a convenient\n"
1417"handle on a group of settings. When the dialect argument is a string,\n"
1418"it identifies one of the dialects previously registered with the module.\n"
1419"If it is a class or instance, the attributes of the argument are used as\n"
1420"the settings for the reader or writer:\n"
1421"\n"
1422" class excel:\n"
1423" delimiter = ','\n"
1424" quotechar = '\"'\n"
1425" escapechar = None\n"
1426" doublequote = True\n"
1427" skipinitialspace = False\n"
Johannes Gijsbers8d3b9dd2004-08-15 12:23:10 +00001428" lineterminator = '\\r\\n'\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001429" quoting = QUOTE_MINIMAL\n"
1430"\n"
1431"SETTINGS:\n"
1432"\n"
1433" * quotechar - specifies a one-character string to use as the \n"
1434" quoting character. It defaults to '\"'.\n"
1435" * delimiter - specifies a one-character string to use as the \n"
1436" field separator. It defaults to ','.\n"
1437" * skipinitialspace - specifies how to interpret whitespace which\n"
1438" immediately follows a delimiter. It defaults to False, which\n"
1439" means that whitespace immediately following a delimiter is part\n"
1440" of the following field.\n"
1441" * lineterminator - specifies the character sequence which should \n"
1442" terminate rows.\n"
1443" * quoting - controls when quotes should be generated by the writer.\n"
1444" It can take on any of the following module constants:\n"
1445"\n"
1446" csv.QUOTE_MINIMAL means only when required, for example, when a\n"
1447" field contains either the quotechar or the delimiter\n"
1448" csv.QUOTE_ALL means that quotes are always placed around fields.\n"
1449" csv.QUOTE_NONNUMERIC means that quotes are always placed around\n"
Skip Montanaro148eb6a2003-12-02 18:57:47 +00001450" fields which do not parse as integers or floating point\n"
1451" numbers.\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001452" csv.QUOTE_NONE means that quotes are never placed around fields.\n"
1453" * escapechar - specifies a one-character string used to escape \n"
1454" the delimiter when quoting is set to QUOTE_NONE.\n"
1455" * doublequote - controls the handling of quotes inside fields. When\n"
1456" True, two consecutive quotes are interpreted as one during read,\n"
1457" and when writing, each quote character embedded in the data is\n"
1458" written as two quotes\n");
1459
1460PyDoc_STRVAR(csv_reader_doc,
1461" csv_reader = reader(iterable [, dialect='excel']\n"
1462" [optional keyword args])\n"
1463" for row in csv_reader:\n"
1464" process(row)\n"
1465"\n"
1466"The \"iterable\" argument can be any object that returns a line\n"
1467"of input for each iteration, such as a file object or a list. The\n"
1468"optional \"dialect\" parameter is discussed below. The function\n"
1469"also accepts optional keyword arguments which override settings\n"
1470"provided by the dialect.\n"
1471"\n"
1472"The returned object is an iterator. Each iteration returns a row\n"
Johannes Gijsbers8d3b9dd2004-08-15 12:23:10 +00001473"of the CSV file (which can span multiple input lines):\n");
Skip Montanarob4a04172003-03-20 23:29:12 +00001474
1475PyDoc_STRVAR(csv_writer_doc,
1476" csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1477" [optional keyword args])\n"
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001478" for row in sequence:\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001479" csv_writer.writerow(row)\n"
1480"\n"
1481" [or]\n"
1482"\n"
1483" csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1484" [optional keyword args])\n"
1485" csv_writer.writerows(rows)\n"
1486"\n"
1487"The \"fileobj\" argument can be any object that supports the file API.\n");
1488
1489PyDoc_STRVAR(csv_list_dialects_doc,
1490"Return a list of all know dialect names.\n"
1491" names = csv.list_dialects()");
1492
1493PyDoc_STRVAR(csv_get_dialect_doc,
1494"Return the dialect instance associated with name.\n"
1495" dialect = csv.get_dialect(name)");
1496
1497PyDoc_STRVAR(csv_register_dialect_doc,
1498"Create a mapping from a string name to a dialect class.\n"
1499" dialect = csv.register_dialect(name, dialect)");
1500
1501PyDoc_STRVAR(csv_unregister_dialect_doc,
1502"Delete the name/dialect mapping associated with a string name.\n"
1503" csv.unregister_dialect(name)");
1504
Andrew McNamara31d88962005-01-12 03:45:10 +00001505PyDoc_STRVAR(csv_field_size_limit_doc,
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001506"Sets an upper limit on parsed fields.\n"
Andrew McNamara31d88962005-01-12 03:45:10 +00001507" csv.field_size_limit([limit])\n"
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001508"\n"
1509"Returns old limit. If limit is not given, no new limit is set and\n"
1510"the old limit is returned");
1511
Skip Montanarob4a04172003-03-20 23:29:12 +00001512static struct PyMethodDef csv_methods[] = {
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001513 { "reader", (PyCFunction)csv_reader,
1514 METH_VARARGS | METH_KEYWORDS, csv_reader_doc},
1515 { "writer", (PyCFunction)csv_writer,
1516 METH_VARARGS | METH_KEYWORDS, csv_writer_doc},
1517 { "list_dialects", (PyCFunction)csv_list_dialects,
1518 METH_NOARGS, csv_list_dialects_doc},
1519 { "register_dialect", (PyCFunction)csv_register_dialect,
Andrew McNamara86625972005-01-11 01:28:33 +00001520 METH_VARARGS | METH_KEYWORDS, csv_register_dialect_doc},
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001521 { "unregister_dialect", (PyCFunction)csv_unregister_dialect,
1522 METH_O, csv_unregister_dialect_doc},
1523 { "get_dialect", (PyCFunction)csv_get_dialect,
1524 METH_O, csv_get_dialect_doc},
Andrew McNamara31d88962005-01-12 03:45:10 +00001525 { "field_size_limit", (PyCFunction)csv_field_size_limit,
1526 METH_VARARGS, csv_field_size_limit_doc},
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001527 { NULL, NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +00001528};
1529
1530PyMODINIT_FUNC
1531init_csv(void)
1532{
1533 PyObject *module;
Skip Montanarob4a04172003-03-20 23:29:12 +00001534 StyleDesc *style;
1535
1536 if (PyType_Ready(&Dialect_Type) < 0)
1537 return;
1538
1539 if (PyType_Ready(&Reader_Type) < 0)
1540 return;
1541
1542 if (PyType_Ready(&Writer_Type) < 0)
1543 return;
1544
1545 /* Create the module and add the functions */
1546 module = Py_InitModule3("_csv", csv_methods, csv_module_doc);
1547 if (module == NULL)
1548 return;
1549
1550 /* Add version to the module. */
Skip Montanaro7b01a832003-04-12 19:23:46 +00001551 if (PyModule_AddStringConstant(module, "__version__",
1552 MODULE_VERSION) == -1)
Skip Montanarob4a04172003-03-20 23:29:12 +00001553 return;
1554
1555 /* Add _dialects dictionary */
1556 dialects = PyDict_New();
1557 if (dialects == NULL)
1558 return;
1559 if (PyModule_AddObject(module, "_dialects", dialects))
1560 return;
1561
1562 /* Add quote styles into dictionary */
1563 for (style = quote_styles; style->name; style++) {
Skip Montanaro7b01a832003-04-12 19:23:46 +00001564 if (PyModule_AddIntConstant(module, style->name,
1565 style->style) == -1)
Skip Montanarob4a04172003-03-20 23:29:12 +00001566 return;
1567 }
1568
1569 /* Add the Dialect type */
Skip Montanaro32c5d422005-06-15 13:35:08 +00001570 Py_INCREF(&Dialect_Type);
Skip Montanarob4a04172003-03-20 23:29:12 +00001571 if (PyModule_AddObject(module, "Dialect", (PyObject *)&Dialect_Type))
1572 return;
1573
1574 /* Add the CSV exception object to the module. */
1575 error_obj = PyErr_NewException("_csv.Error", NULL, NULL);
1576 if (error_obj == NULL)
1577 return;
1578 PyModule_AddObject(module, "Error", error_obj);
1579}